21 #include "SamFilter.h"
23 #include "SamQuerySeqWithRefHelper.h"
24 #include "BaseUtilities.h"
29 double mismatchThreshold)
39 const int32_t initialLastFrontClipPos = -1;
40 int32_t lastFrontClipPos = initialLastFrontClipPos;
42 int32_t firstBackClipPos = readLength;
44 bool fromFrontComplete =
false;
45 bool fromBackComplete =
false;
46 int32_t numBasesFromFront = 0;
47 int32_t numBasesFromBack = 0;
48 int32_t numMismatchFromFront = 0;
49 int32_t numMismatchFromBack = 0;
53 while(!fromFrontComplete || !fromBackComplete)
57 while(!fromFrontComplete &&
58 ((numBasesFromFront <= numBasesFromBack) ||
64 fromFrontComplete =
true;
72 fromFrontComplete =
true;
79 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
82 ++numMismatchFromFront;
84 double mismatchPercent =
85 (double)numMismatchFromFront / numBasesFromFront;
86 if(mismatchPercent > mismatchThreshold)
91 numBasesFromFront = 0;
92 numMismatchFromFront = 0;
99 while(!fromBackComplete &&
100 ((numBasesFromBack <= numBasesFromFront) ||
101 (fromFrontComplete)))
106 fromBackComplete =
true;
114 fromBackComplete =
true;
121 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
124 ++numMismatchFromBack;
126 double mismatchPercent =
127 (double)numMismatchFromBack / numBasesFromBack;
128 if(mismatchPercent > mismatchThreshold)
133 numBasesFromBack = 0;
134 numMismatchFromBack = 0;
150 return(
softClip(record, lastFrontClipPos + 1, readLength - firstBackClipPos));
156 int32_t numFrontClips,
157 int32_t numBackClips)
165 status =
softClip(*cigar, numFrontClips, numBackClips,
166 startPos, updatedCigar);
192 int32_t numFrontClips,
193 int32_t numBackClips,
198 int32_t endClipPos = readLength - numBackClips;
201 if((numFrontClips != 0) || (numBackClips != 0))
206 int32_t totalClips = numFrontClips + numBackClips;
207 if(totalClips >= readLength)
219 int origCigarOpIndex = 0;
224 int32_t numPositions = 0;
227 bool onlyClips =
true;
233 while((origCigarOpIndex < oldCigar.
size()) &&
234 (numPositions < numFrontClips))
237 switch(op->operation)
256 numPositions += op->count;
268 if(numFrontClips != 0)
275 int32_t newCount = numPositions - numFrontClips;
283 if(numPositions > endClipPos)
285 newCount -= (numPositions - endClipPos);
289 updatedCigar.
Add(op->operation, newCount);
307 while((origCigarOpIndex < oldCigar.
size()) &&
308 (numPositions <= endClipPos))
337 uint32_t numPosTilClip = endClipPos - numPositions;
339 if(numPosTilClip < op->count)
343 if(numPosTilClip != 0)
345 updatedCigar.
Add(op->operation,
364 numPositions += op->count;
373 if(numBackClips != 0)
381 while(origCigarOpIndex < oldCigar.
size())
407 if(numFrontClips > 0)
415 int32_t lastFrontClipPos = numFrontClips - 1;
421 startPos = newStartPos + 1;
432 uint32_t qualityThreshold,
433 uint8_t defaultQualityInt)
435 uint32_t totalMismatchQuality =
440 if(totalMismatchQuality > qualityThreshold)
453 uint8_t defaultQualityInt)
456 int mismatchQual = 0;
464 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
467 char readQualityChar =
469 uint8_t readQualityInt =
475 readQualityInt = defaultQualityInt;
477 mismatchQual += readQualityInt;
482 return(mismatchQual);
489 uint16_t flag = record.
getFlag();
493 flag &= ~
SamFlag::SECONDARY_ALIGNMENT;
494 flag &= ~
SamFlag::SUPPLEMENTARY_ALIGNMENT;
static const uint8_t UNKNOWN_QUALITY_INT
Int value used when the quality is unknown.
static uint8_t getPhredBaseQuality(char charQuality)
Get phred base quality from the specified ascii quality.
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
int size() const
Return the number of cigar operations.
int getExpectedQueryBaseCount() const
Return the length of the read that corresponds to the current CIGAR string.
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
int32_t getRefPosition(int32_t queryIndex, int32_t queryStartPos)
Return the reference position associated with the specified query index or INDEX_NA based on this cig...
@ del
deletion from the reference (the reference contains bases that have no corresponding base in the quer...
@ mismatch
mismatch operation. Associated with CIGAR Operation "M"
@ hardClip
Hard clip on the read (clipped sequence not present in the query sequence or reference)....
@ match
match/mismatch operation. Associated with CIGAR Operation "M"
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
@ skip
skipped region from the reference (the reference contains bases that have no corresponding base in th...
@ softClip
Soft clip on the read (clipped sequence present in the query sequence, but not in reference)....
@ none
no operation has been set.
const CigarOperator & getOperator(int i) const
Return the Cigar Operation at the specified index (starting at 0).
static bool isClip(Operation op)
Return true if the specified operation is a clipping operation, false if not.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
FilterStatus
Enum describing what sort of filtering was done.
@ NONE
The filter did not affect the read.
@ FILTERED
Filtering caused the read to be modified to unmapped.
@ CLIPPED
Filtering clipped the read.
static FilterStatus softClip(SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
Soft clip the record from the front and/or the back.
static uint32_t sumMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint8_t defaultQualityInt)
Get the sum of the qualities of all mismatches in the record.
static FilterStatus clipOnMismatchThreshold(SamRecord &record, GenomeSequence &refSequence, double mismatchThreshold)
Clip the read based on the specified mismatch threshold.
static void filterRead(SamRecord &record)
Filter the read by marking it as unmapped.
static FilterStatus filterOnMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint32_t qualityThreshold, uint8_t defaultQualityInt)
Filter the read based on the specified quality threshold.
Class for extracting information from a SAM Flag.
static void setUnmapped(uint16_t &flag)
Mark the passed in flag as unmapped.
Iterates through the query and compare with reference.
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
uint16_t getFlag()
Get the flag (FLAG).
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
int32_t getReadLength()
Get the length of the read.
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
This class contains the match/mismatch information between the reference and a read for a single base...
int32_t getQueryIndex()
Get the query index for this object.
Type getType()
Get the type (match/mismatch/unknown) for this object.