libStatGen Software  1
GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex > Class Template Reference

Public Member Functions

 GreedyTupleAligner (Weight &wt)
 
int MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch)
 Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'. More...
 
void Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition)
 Core local alignment algorithm. More...
 

Detailed Description

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
class GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >

Definition at line 60 of file GreedyTupleAligner.h.

Member Function Documentation

◆ Align()

template<typename QueryType , typename ReferenceType , typename ReferenceIndex >
void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align ( QueryType  query,
int  queryLength,
ReferenceType  reference,
ReferenceIndex  searchStartIndex,
int  searchSize,
CigarRoller cigarRoller,
ReferenceIndex &  matchPosition 
)
inline

Core local alignment algorithm.

Parameters
queryinput query
queryLengthlength of query
referencereference genome
searchStartIndexmatching starts here
searchSizehow far we will search
cigarRollerstore alignment results here
matchPositionstore match position

Definition at line 159 of file GreedyTupleAligner.h.

167  {
168  // Algorithm:
169  // finished align? (should we try different align position?)
170  // if not, try next tuple
171  // is the tuple aligned?
172  // yes, extend to previous, mark unmatched part mismatch or gap
173  // extend to next matched part
174  int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used
175  int queryMatchCount = 0; // query matched # of bases
176  int q1 = 0; // to align
177  int pos = -1;
178  int lastR1 = -1; // index: record last
179 
180  cigarRoller.clear();
181  matchPosition = -1;
182 
183  while (queryMatchCount < queryLength)
184  {
185  if (r1 == searchSize - 1) // touched ref right boundary
186  {
187  cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
188  break;
189  }
190  if (queryLength - q1 < tupleSize)
191  {
192  // XXX this needs to do something more sane
193  // printf("some bases left!\n");
194  // a simple fix: treat all left-over bases as mismatches/matches
195  cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
196  break;
197  }
198  int mismatch = 0;
199  int matchedLen = 0;
200  if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple
201 
202  >= 0)
203  {
204  // found match position for tuple
205 
206  if (lastR1<0)
207  matchPosition = pos;
208 
209  //
210  // deal with left
211  //
212  if (lastR1>=0) // have previously aligned part of the query to the reference genome yet
213  {
214  if (pos > 0)
215  {
216  cigarRoller.Add(CigarRoller::del, pos);
217  }
218  }
219  else
220  {
221  lastR1 = pos;
222  }
223 
224  r1 += pos;
225  r1 += matchedLen;
226  q1 += matchedLen;
227 
228  //
229  // deal with right
230  //
231  cigarRoller.Add(CigarRoller::match, matchedLen);
232  queryMatchCount = q1;
233  lastR1 = r1;
234 
235  continue;
236  } // end if
237 
238  //
239  // try insertion
240  // maximum insert ? say 2
241  //
242  for (int i = 1; i < queryLength - q1 - tupleSize; i++)
243  {
244  int mismatch = 0;
245  int matchedLen = 0;
246  // check reference genome broundary
247  if (searchStartIndex + r1 >= reference.getNumberBases())
248  return;
249  if ((pos = MatchTuple(query+q1 + i ,
250  queryLength - q1 -i ,
251  reference,
252  searchStartIndex + r1,
253  searchSize - r1,
254  matchedLen,
255  mismatch)) // found match position for tuple
256  >= 0)
257  {
258  if (matchPosition < 0)
259  matchPosition = pos + q1 + i ;
260  }
261  queryMatchCount += i;
262  q1 += i;
263  cigarRoller.Add(CigarRoller::insert, i);
264 
265  lastR1 = r1 + pos;
266  r1 += pos + tupleSize;
267  q1 += tupleSize;
268 
269  // deal with right
270  while (searchStartIndex + r1 < reference.getNumberBases()
271  && query[q1]==reference[searchStartIndex + r1]
272  && q1 < queryLength)
273  {
274  r1++;
275  q1++;
276  }
277  if (q1 < queryLength)
278  {
279  cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
280  queryMatchCount = q1;
281  }
282  else
283  {
284  cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
285  queryMatchCount = queryLength ;
286  break ;
287  }
288  }
289 
290  r1++;
291  q1++;
292 
293  // try next
294  } // end while (queryMatchCount < queryLength)
295  }
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
Definition: CigarRoller.cpp:77
void clear()
Clear this object so that it has no Cigar Operations.
@ del
deletion from the reference (the reference contains bases that have no corresponding base in the quer...
Definition: Cigar.h:92
@ mismatch
mismatch operation. Associated with CIGAR Operation "M"
Definition: Cigar.h:90
@ match
match/mismatch operation. Associated with CIGAR Operation "M"
Definition: Cigar.h:89
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Definition: Cigar.h:91
@ softClip
Soft clip on the read (clipped sequence present in the query sequence, but not in reference)....
Definition: Cigar.h:94
int MatchTuple(const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch)
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to ...

References CigarRoller::Add(), CigarRoller::clear(), Cigar::del, Cigar::insert, Cigar::match, GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple(), Cigar::mismatch, and Cigar::softClip.

◆ MatchTuple()

template<typename QueryType , typename ReferenceType , typename ReferenceIndex >
int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple ( const QueryType  query,
const int  queryLength,
const ReferenceType  reference,
const ReferenceIndex  searchStartIndex,
const int  searchSize,
int &  matchedLength,
int &  mismatch 
)
inline

Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.

Parameters
queryinput query
queryLengthlength of query
referencereference sequence
searchStartIndexthe positino where search starts
searchSizethe total length in reference sequence that will be examine
matchedLengthstore how many bases are matched
mismatchstore how many bases are mismatched
Returns
-1 for unsuccess return

Definition at line 79 of file GreedyTupleAligner.h.

87  {
88  // now use naive search,
89  // TODO: will incorportate KMP serach later
90  // TODO: adjust tolerance of mismatches
91  const int MAX_MISMATCH=2;
92  int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;
93 
94 #if defined(DEBUG_GREEDY_ALIGNER)
95  cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
96 #endif
97  // here i is the matching position (inclusive)
98  // j is the matched length
99  for (int i = 0; i <= searchSize - tupleSize; i++)
100  {
101  int j = 0;
102  mismatch = 0;
103  while (j < queryLength)
104  {
105  if (searchStartIndex + i + j >= reference.getNumberBases())
106  break;
107  if (query[j] != reference[searchStartIndex + i + j])
108  {
109  mismatch++;
110  if (mismatch >= MAX_MISMATCH)
111  break;
112  }
113  j++;
114  }
115 
116  if (j>0 && (j==queryLength)) j--;
117 
118  while (searchStartIndex +i +j < reference.getNumberBases()
119  && ((j+1) > mismatch)
120  && mismatch>0
121  && query[j] != reference[searchStartIndex + i+j])
122  {
123  // if pattern matching goes beyong the preset mismatch cutoff,
124  // we will have to go backwards
125  j--;
126  mismatch--;
127  }
128 
129  int score = j - mismatch;
130 
131  if (score > bestScore)
132  {
133  bestPos = i;
134  bestScore = score;
135  bestMismatch = mismatch;
136  bestMatchedLength = j+1;
137  }
138  }
139 
140  if (bestScore > 0)
141  {
142  mismatch = bestMismatch;
143  matchedLength = bestMatchedLength;
144  return bestPos;
145  }
146  return -1;
147  }

Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().


The documentation for this class was generated from the following file: