17 #include "GreedyTupleAligner.h"
21 #include <gtest/gtest.h>
27 sequence(sequence) {};
28 char operator[] (
int index)
const {
29 if (index < 0 || index >= (
int)sequence.size()) {
30 std::cerr <<
"exceeds boundary! at " << __FILE__ <<
":" <<__LINE__ << std::endl;
33 return sequence[index];
35 char& operator[] (
int index) {
36 if (index < 0 || index >= (
int)sequence.size()) {
37 std::cerr <<
"exceeds boundary! at " << __FILE__ <<
":" <<__LINE__ << std::endl;
40 return sequence[index];
43 int getNumberBases()
const{
44 return sequence.size();
50 void printRefQueryCigar(
const std::vector<char>& prettyPrintReference,
51 const std::vector<char>& prettyPrintQuery,
55 for(std::vector<char>::const_iterator i=prettyPrintReference.begin(); i<prettyPrintReference.end(); i++) out << *i;
58 for(std::vector<char>::const_iterator i=prettyPrintQuery.begin(); i<prettyPrintQuery.end(); i++) out << *i;
65 void runAlign(
const char *query,
const char *ref,
CigarRoller& cs,
int& matchPosition) {
69 ga.Align(query, strlen(query), reference, 0, strlen(ref), cs, matchPosition);
72 void computePrettyOutput (std::vector<char>& prettyPrintReference,
74 std::vector<char>& prettyPrintQuery,
76 const int matchPosition,
79 const char* pRef = ref;
80 const char* pQuery = query;
81 for (
int index = 0; index < matchPosition; index++){
82 prettyPrintReference.push_back(*pRef++);
83 prettyPrintQuery.push_back(
' ');
85 for (
int i = 0; i< expectedCigar.
size(); i++) {
86 switch( expectedCigar[i].operation) {
89 for (
unsigned int j = 0; j < expectedCigar[i].count; j++){
90 prettyPrintReference.push_back(*pRef++);
91 prettyPrintQuery.push_back(*pQuery++);
95 for (
unsigned int j = 0; j < expectedCigar[i].count; j++){
96 prettyPrintReference.push_back(*pRef++);
97 prettyPrintQuery.push_back(
' ');
101 for (
unsigned int j = 0; j < expectedCigar[i].count; j++){
102 prettyPrintReference.push_back(
' ');
103 prettyPrintQuery.push_back(*pQuery++);
111 prettyPrintReference.push_back(*pRef++);
112 while (*pQuery !=
'\0')
113 prettyPrintReference.push_back(*pQuery++);
116 bool verifyAlign(
const char *query,
const char *ref,
const char *expectedCigarString, std::ostream& out = std::cout) {
117 out.seekp(std::ios_base::beg);
123 runAlign(query, ref, cs, matchPosition);
126 if (matchPosition < 0) {
127 fprintf(stderr,
"No match in %s, %d \n", __FILE__, __LINE__);
131 std::vector<char> prettyPrintReference,prettyPrintQuery;
132 computePrettyOutput( prettyPrintReference, ref,
133 prettyPrintQuery, query,
140 printRefQueryCigar(prettyPrintReference, prettyPrintQuery, cs, out);
142 out <<
"Expected Cigar string length " << expectedCigar.
getExpectedQueryBaseCount() <<
" does not match the length of the query " << strlen(query) <<
". Please fix test case." << std::endl;
146 printRefQueryCigar(prettyPrintReference, prettyPrintQuery, cs, out);
148 out <<
"Query Length of " << strlen(query) <<
" does not match computed cigar string length of " << cs.
getExpectedQueryBaseCount() << std::endl;
151 if (strcmp(expectedCigarString, str) == 0) {
155 printRefQueryCigar(prettyPrintReference, prettyPrintQuery, cs, out);
156 out <<
"[Correct Answer = " << expectedCigarString <<
"] --------------------- Wrong!" << std::endl;
162 TEST(GreedyTupleAlignerTest, AlignToShortReference) {
163 std::stringstream ss(std::stringstream::out);
166 EXPECT_TRUE( verifyAlign(
"12345",
"123456789",
"5M") ) << ss.str().c_str();
167 EXPECT_TRUE( verifyAlign(
"23456",
"123456789",
"5M") ) << ss.str().c_str();
169 EXPECT_TRUE( verifyAlign(
"123B567",
"123456789",
"7M") ) << ss.str().c_str();
170 EXPECT_TRUE( verifyAlign(
"234D678",
"123456789",
"7M") ) << ss.str().c_str();
172 EXPECT_TRUE( verifyAlign(
"123467890",
"1234567890",
"4M1D5M") ) << ss.str().c_str();
173 EXPECT_TRUE( verifyAlign(
"123467890",
"B1234567890B",
"4M1D5M") ) << ss.str().c_str();
175 EXPECT_TRUE( verifyAlign(
"12345067890",
"1234567890",
"5M1I5M") ) << ss.str().c_str();
176 EXPECT_TRUE( verifyAlign(
"12345067890",
"BBBB1234567890BBBB",
"5M1I5M") ) << ss.str().c_str();
178 EXPECT_TRUE( verifyAlign(
"1234",
"1235",
"3M1S") ) << ss.str().c_str();
182 EXPECT_TRUE( verifyAlign(
"1023456700",
"123456789",
"1I7M2S") ) << ss.str().c_str();
185 TEST(GreedyTupleTestAligner, AlignToLongReference) {
186 std::stringstream ss(std::stringstream::out);
188 EXPECT_TRUE( verifyAlign(
"TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACGTCATCGGTCAGAAATTGGG",
"CCGAGATTGTGCCATTGCACTCCTGCCTGGGTAACAGAGTCAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGATTAGGTTTTATAGATGGAAAATTCACAGCTCTCTCCAGATCAGAAATCTCCAAGAGTAAATTAGTGTCTTAAAGGGGTTGTAATAACTTTCCTATGTGACTAAGTGCATTATTAATCAATTTTTCTATGATCAAGTACTCCTTTACATACCTGCTAATACAATTTTTGATATGAAATCAGTCCTAGAGGGAATCAATGTAAGATACAGACTTGATGAGTGCTTGCAGTTTTTTATTGACAATCTGAAGAATGACTTGACTCTAAATTGCAGCTCAAGGCTTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACAGGAATCAAATCTGTCTAGCCTCTCTTTTTGGCAAGGTTAAGAACAATTCCACTTCATCCTAATCCCAATGATTCCTGCCGACCCTCTTCCAAAAACTATTTAAAGACATGTTCTTCAAAGTTATATTTGTCTTTCCTTCAGGGAGAAAAAGAATACCAATCACTTATAATATGGAAACTAGCAGAAATGGGTCACATAAGTCATCTGTCAGAAATTGGGAAAATAGAGTAGGTCAGTCTTTCCAGTCATGGTACTTTTACCTTCAATCA",
"88M200D20M") ) << ss.str().c_str();
194 EXPECT_TRUE( verifyAlign(
"GTGAAACTCCATCTCAAAAATAAGTAAATAAATAAATACATACATAGGCACAGTGCAGTTGTTAGTCAGAATTAGGTCACACTGGATTAGGGTGAGTACTTAATGCAACAGGTCTGGGG",
"GTGCCAGAGTTTAATTAATAGGATAAGGTTATGAGTCAGACTGTGTACCCCAAAAAAGATATGTTGAACTCCTAAGCCCCTGAACCACAGAATGGGATCCTATTCAGAAATAGGCACAGTGTCCGGGCACCATGGCTCACACTGGTAATCCCAGCACTCTGGGAGGCTGAGGTGGGTGCATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAACAGAACAGTTAGCCAGGTGTGGTGGTGGGCACCTGTAATCCCAGCTACTTGGGAGGCTGAGACAGGAGAATGGCTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCGAGATCGTGCCATTGCACTTCAGCCTGGGCCACAAGAGTGAAACTCCATCTCAAAAATAAGTAAATAAATAAATACATACGTAGGCACAGTGCAGTTGTTGTTAGTTAGAATTAGGTCACACTGGATTAGGGTGAGTCCTTAATCCAACAGGTCTGGTGTCCTTACAAATAGACAAATACACAGAAGGAACATGGCCACATGGAGATACAGACACACCAAAACATCATATTGAGATGTGGGCAAAGATTGGAGAGACACTTCTCCAAGTCAAGGAACATCTGGGACTACCCAGAAACTGTAAGAGGCAGAGAAAGGTCCTTCCCTGTAGGCTTTAGAGGAACATGGCCCTGCCAACATCTTGATCTTGGATTTCCAGCCTCCAGCATGTGAGACAAGTTTCTGGGTTTTTTTGGAGACAGAGTCTCACTCTTGTCACCCAGGCTGGAGTGCAGTGGCATGAACTTGGCTCACTGCAACCTCCTCCCAGGATCAAGGTATTGTCCTGCCTCAGCCTCCCGAGTAGCTGGGATGACAGGGGCCCGCCACCACGCCAGCTCATTTTTGTATTTTTTACTAGAGAAGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAAGTGATCCACCCGCCTTGGCCTCCCAAAGTGCTAGGATTACAGGTGTGAGCCACTGCGCCTGGCAAGTTTCTGTTGCCTTAAGCCACTCTTTCTGTGGTAATTTGTTATCATGGCCCTAAGAAATGACTAGAGAGAGAAAGCAAATCCCTTTGTTTCTGCATTTACTGAAACAGATGAATAGATTTCTAGCTCCCTTGGGGTCTGAACTTTTAAAAGAGAGATTTCTTATACATATGATAATCATGATATTGT",
"63M3D56M") ) << ss.str().c_str();
196 EXPECT_TRUE( verifyAlign(
"ATATTGTTTTTTTCAATGCATATCAAAACAATGTTTACAATATACTACAGCCTAAGTGTGCAATAGCATTATGTGTAGAAATGCACATACCATAATTAGTTTTTTTTTTTGAAAAAACT",
"GTTCCAAAAGATTATATTTGTTAGGTTAGAGAATTTTAACTTATTTATATAATGGAGATTTTCTAATACTGAGAATACCTTAATTCTTATTGTAAGCCTACTTAACAGTGACAAAATGTTATTATAACGTGGTATTGAAATTAATATGATAGTATTTTATATGGATATTTGCATATGCAATTGACATATATTGTATATACAATATATAACTGTGTATTATATATTATATTTATATAATGTTATATTGTATATGAATATATTTGAATTATATGTATATACATATATATAGGCATTCATCAGAAATATTGCAGGTTTGGTTTCAGACGACTATAATAAAGTGAATATTGCAATAAAGCGAATCACAAGAAATTATTGTTTTTTTCAATGCATATCAAAACAATGTTTACAATATACTACAGCCTAAGTGTGCAATAGCATTATGTGTAGAAATGCACATACCATAATTAGTTTTTTTTTTGAAAAAACTGTTAATGATTATCTGAGCCTTCAGTGAGTTGTAATCTTTTCATGGTGGAGGATCATACCTCTACGTTGATGTCAGCTGACTGATCAGGGTAGTAGTTGCTGAAGGCTTGGGTGGCTGTGGCAATTTCTTAAAATAGGATAACAATGGCATTTACCACATTAATTGACTCCTTCTTTCACAAAAGATTTCTCTGTCTCATGCAATGCTGTTTGACAGCATTTTCCCCACAGTAGAATTTCTTTAAAAATTGG",
"109M249D10M") ) << ss.str().c_str();
198 EXPECT_TRUE( verifyAlign(
"CCAGACTATCTCAAGCAATCAACAGATTTAATGTAAGGAGTGTCAAAATCTGAATGATGCTTTTTGCAGAAATAGAAAATCCCTTTCTAATATTTTTATATTTTTGAC",
"TTATCGAGGCTGGCGGATTTTGTGAGGCCAGGAGTTCAAGACCAGCCTGGCCAACATGGCAAAACTCTGTCTCTACTAAAAATACAAAAGTTAGCTGGGCATAGTGGCACATGTCTATAGTTCTAGCTATGTGGGAGGCTGAGACACGAGAATCGCTTGAACCCAGGAGGTGGAGGTTGTGGTGAGCCGAGCTCACGCCATTGCTCTCCAGCCTGGGCAACAGAGCAAGACTGTCTCAAAAACAAAAACAAAAAACACAAAAACTACAAGACTTTTATGAAATAACTTAAGGAAGATATAAATAAATGGAAAGATATCCCATGTTCCTGACTTGGAAGACTTAATTTTGTTAAGATGTCCATACTATCTCAAGCAATCAACAGATTTAATGTAAGGAGTGTCAAAATCTGAATGATGCTTTTTGCAGAAATAGAAAATCCCTTTCTAATATTTTTATGTAATCTCAAGGGACCCCAAATAGCCAAAAGAATCCTGAAAAAGTAGAATAAAGCTGGAGGACTCATGATTCCTGATTTGAAAACTTACTACCAGATACAATAATCAAAACAGTTCCGTGCTTGTCATAAAGACAAACATATAGACCAATGGAACAGAATAGAGATTACAGGGACAAATCCTCATATATATGGTCAAATGATTTTTGACCAGTGCCAAGATCATTCATGGGTGAAAAGACAATCTTTTCAATAAAAGAT",
"99M200D9M") ) << ss.str().c_str();
202 EXPECT_FALSE( verifyAlign(
"ATGAGGTCAGGAGATGGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCTAAAAAAAGTGTAACAGAGGTGCATACTCAAAACTACAAAAGTCTCGTGAAAGGAA",
"CAAGAAAAAGAAATAAAATACATTTTAGTAGGAAAGGAAGAAGTTAAATTGTCTCCATTTGGTGACAACATGAGCTTATATGCAGAAAACCTAAAGACTCTACCAAAAAAACTGCTGGAACTGATAAATGAATTCGGTGAAGTCCTAGGGTATAAAATCAATGTACAAAATAAGTGGTGTTTCTATATTCTAATAAATTATTCAAAAGGGAAATTAAGAAATCAATCCCATTTTCAATAGCAACAACAAAAAAAATGACAATGCCAAAGTATAAATTTAACCAAGAAGCTACAAGAGTCTGGGCGCAGTGGCTTATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATGGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCTACTAAAAAAAAATAATAATAATAATAATAATAATAATAATTAGCCGGGCGTGGTGGTAGGCATCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTAGGAGAATGGCATGAACCTGGGAGGTGGAGCTTGCAGTGAGCAGAGATCACACCCCTGCACTCCAGCCTGGGTGACAGGGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAGTGTAACAGAGTTGCATACTGAAAACTATAAAATTCTGATGAAAGAAAATGAAGCAACAAATAATTAATATAATAAAAAAGTCCATACTATCCAAAAT",
"54M200D54M") ) << ss.str().c_str();
213 EXPECT_TRUE( verifyAlign(
"GAATCAATACGCTCGGGATGCAGCGCCTAGCCGTTGGTTTGAGAATGGTTCTCTAGAGTTATCTTCACCCTCTACCTTGTGTGGCACTATTTCTTCTATGACCTTGAC",
"TGGCTCAAGACCTGACCTTGTGCACGTCTTGGATGCCAGTTCTATTCCCCTCACAGGCCATATGAATCCTGTCCTTTCTGCCTCAAAATGCCCATCCAGAGCCTCTACATTGATTAGCTTTTCCCTCCCTTCCAGAAAAGTTCAAAGGCTACCTCCTCCTTGAAGCCTTCACAAATACCTTAATCTAACTGTTTATAACCCTCTGCCATCTTAGCACTGTGGAAAATACATAAACTTGGGGTTAGAACATCATCAGTTTTAATGTAAGCACCATCCTTTCTAGCTGTACAGCCCTCCTGAGCCTTAGTTCTTACATCTTGAAGATGGAACCAGCTCAACAAGCATAGGGATGTAGCAAGAATCAAGACACTGTAGATGCAGCACCTGGCCGGTGGTAAGAGCTTGGTTATCACAAGTTATCTTCACCCTCTACCTTGTGTGGCACTATTTCTTCTATGACCTTGACTGCTCTCTGCTCTGATCTGGAAGTTCGCTGGGAAAAGGTGTCCCCTTTTTTATTACCTACCGGGAGAACCATGAGTGATGCTCTACTTGTAGTATATATACCCTGAGATGATTATTCTTAAAGACTAGTTCTCATGACTTGAGAGTTTGCTCTGTGTTAGGTACCATTCTAACACTGGATGTTGACTATGTATGTTATTTAATACTTCCATCAACCCCATAATGTAGGGAGAATCATTATGCCCATTTTA",
"108M") ) << ss.str().c_str();
217 TEST(GreedyTupleTestAligner, EquivalentAlignment) {
218 std::stringstream ss(std::stringstream::out);
229 EXPECT_FALSE( verifyAlign(
"TTTTCTTTTCAAAAATTTAAAAGTGACATACAAAATTATATGTGTATGTACAACAAAAGCTTAACTATAACACCTTGTTACATACTTTGGAATTGAAAGGCAGGAATG",
"CAGCACCCTAATTCACTATGCCCTAAGCTTCAAGGGCTTCAGAGTAAGCTCTCAGTGGAGTCTGATTGGAATCCCTCTTCGCCAGCTTGTGAGGTATGGGGCTAGGTTCCACAATATTCCCTTTGAGGGAGTAGATCTTCCAGCCTTCTGGGGCATGCTCTGAAAGTCCTCTTTGCAGAAGTAGCTCTTTAAAATCATATTCTCTTTCCAATTTGACCTCTTTTTTTATCCTTGTTCTGTCCATGCTGTCCAAAGCATCTTGGACTAAGTTTTGACTTTTTTTTTAAGTGCTGCATTTCCATTTGACATTTTACCTTTGTAAATTTCTATTTTTTTACCTTTGTGACTTATTAAAATATTTTCTTTTCAAAAATTTAAAAGTGACATACAAAATTATATGTGTATGTACAACAAAAGCTTAACTATAACACCTTGTTACATACTTTGCTATCCAGGCCACTGATCCTTTCTTACATAGTAAGTCAGCTATAGTTCATTAGCTTACAGTTTTTAGATACAAGTCTTAATCCATCCCTTCTCCTTTTGTATTCTTTACTTTCTGCAATATTTAAGACTTTTTGCGTTCTGACTAAAAGAAACCACCTGAAATTGGCATATGCAACTGTTCATGAATGAGAACTCGCATGGAATTGAAAGGCAGGAATGCAGCTTGACCTTAGAATGGATTTGATCCAGGAACTAGAAGGTGGGTAGGA",
"87M200D21M") ) << ss.str().c_str();
232 EXPECT_FALSE( verifyAlign(
"AACAGTTGAGAGGTACTAAAATTGAGTTTTCTTGAAAAATATATTTAATCTAAAGTACTGAAAATTTGGGGGAAAATGCTTAAGGTCATATTCCTTTTTTGAAAAGAT",
"TCATCTTTCTCCCATACTGGCTGTTTCCTGCCCTCAAACACTGGACTCCAAGTTCTTCAGCTTGTGGACTCTTGGACCTACAACCAGTGGTCTGCCAGGGCCCTTTGGGCCTTCGGCCACAGACTGATGGCTACACTGTCGGCTCCCCTACTTTTGAGGTTTTGTGTCTTGGACTGGCTTTCTTGCTCCTCAGCTTGCAGACAGCCTACTGTAGGACTTCACTTTGTGACTATTTGAGTCAATACTCCTTAATAAACACCCTTTCATATATACATATATCCTATTAGTCCTGTTCCTCTAGAGAACCCTAATACAGTGTTGTACATTGAAATAAATATAATTATTCTGGTTTTGGTTGAACAGTTGAGAGGTACTAAAATTGAGTTTTCTTGAAAAATATATTTAATCTAAAGTACTGAAAATTTGGGGGAAAATGCTTCTGTAAATCCTAAGTTATTATTTCTTCAACTATATTCTGTAGTTAATTTCTCCAGCAATTCTTAATTTCAGCACAAATTAGCCACTGTTTGAATTAGGAATACTGAATCGTCTCCATTGCAGTGCAGTTAATAAGTCATTTCTTGATGAAGTAGTCCATGTAGGACTTGAAATCTTGTCTTTTTCATGATACATTATCATAAGGTCATATTCCTTTTTTGAAAAGATTGATGATACTATTCTGAAAGACACTAGTAGAGTTAGGCTTGGTTTTATGA",
"80M200D28M") ) << ss.str().c_str();
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
const char * getString()
Get the string reprentation of the Cigar operations in this object, caller must delete the returned v...
int size() const
Return the number of cigar operations.
int getExpectedQueryBaseCount() const
Return the length of the read that corresponds to the current CIGAR string.
@ del
deletion from the reference (the reference contains bases that have no corresponding base in the quer...
@ mismatch
mismatch operation. Associated with CIGAR Operation "M"
@ match
match/mismatch operation. Associated with CIGAR Operation "M"
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Weight includes various penalties(e.g.