From db638be7e3f7c3b1f630200589657f70ac1d928c Mon Sep 17 00:00:00 2001 From: arq5x <arq5x@virginia.edu> Date: Mon, 9 Dec 2013 17:14:39 -0500 Subject: [PATCH] [TST] cleanup sample test --- src/bamToBed/bamToBed.cpp | 87 +++++++++++++++++++++----------- test/bamtobed/test-bamtobed.sh | 63 +++++++++++++++++++++++ test/bamtobed/two_blocks_w_D.sam | 6 +++ test/sample/test-sample.sh | 6 +-- 4 files changed, 129 insertions(+), 33 deletions(-) create mode 100644 test/bamtobed/two_blocks_w_D.sam diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp index c89441a0..16d61644 100644 --- a/src/bamToBed/bamToBed.cpp +++ b/src/bamToBed/bamToBed.cpp @@ -38,9 +38,9 @@ void bamtobed_help(void); void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag, bool writeBed12, - bool obeySplits, const string &color, - bool useCigar, bool useNovoalign, - bool useBWA); + bool obeySplits, bool splitOnDeletions, + const string &color, bool useCigar, + bool useNovoalign, bool useBWA); void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance, bool mate1First); @@ -49,11 +49,13 @@ string PrintTag(const BamAlignment &bam, const string &tag); void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, - bool obeySplits, bool useCigar, - bool useNovoalign, bool useBWA); + bool obeySplits, bool splitOnDeletions, + bool useCigar, bool useNovoalign, + bool useBWA); void PrintBed12(const BamAlignment &bam, const RefVector &refs, - bool useEditDistance, const string &bamTag, + bool useEditDistance, const string &bamTag, + bool obeySplits, bool splitOnDeletions, string color = "255,0,0"); void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, @@ -86,6 +88,7 @@ int bamtobed_main(int argc, char* argv[]) { bool useAlignmentScore = false; bool useCigar = false; bool obeySplits = false; + bool splitOnDeletions = false; bool mate1First = false; bool useNovoalign = false; // custom for Quinlan/Hall research bool useBWA = false; // custom for Quinlan/Hall research @@ -115,32 +118,36 @@ int bamtobed_main(int argc, char* argv[]) { } } else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) { - writeBedPE = true; + writeBedPE = true; } else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) { - writeBed12 = true; + writeBed12 = true; } else if(PARAMETER_CHECK("-split", 6, parameterLength)) { - obeySplits = true; + obeySplits = true; + } + else if(PARAMETER_CHECK("-splitD", 7, parameterLength)) { + obeySplits = true; + splitOnDeletions = true; } else if(PARAMETER_CHECK("-ed", 3, parameterLength)) { - useEditDistance = true; - tag = "NM"; + useEditDistance = true; + tag = "NM"; } else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) { - useCigar = true; + useCigar = true; } else if(PARAMETER_CHECK("-as", 3, parameterLength)) { - useAlignmentScore = true; + useAlignmentScore = true; } else if(PARAMETER_CHECK("-novo", 5, parameterLength)) { - useNovoalign = true; + useNovoalign = true; } else if(PARAMETER_CHECK("-bwa", 4, parameterLength)) { - useBWA = true; + useBWA = true; } else if(PARAMETER_CHECK("-mate1", 6, parameterLength)) { - mate1First = true; + mate1First = true; } else if(PARAMETER_CHECK("-color", 6, parameterLength)) { if ((i+1) < argc) { @@ -210,8 +217,8 @@ int bamtobed_main(int argc, char* argv[]) { if (writeBedPE == false) ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, - obeySplits, color, - useCigar, + obeySplits, splitOnDeletions, + color, useCigar, useNovoalign, useBWA); else ConvertBamToBedpe(bamFile, useEditDistance, mate1First); @@ -239,10 +246,14 @@ void bamtobed_help(void) { cerr << "\t-mate1\t" << "When writing BEDPE (-bedpe) format, " << endl; cerr << "\t\talways report mate one as the first BEDPE \"block\"." << endl << endl; - cerr << "\t-bed12\t" << "Write \"blocked\" BED format (aka \"BED12\")." << endl << endl; + cerr << "\t-bed12\t" << "Write \"blocked\" BED format (aka \"BED12\"). Forces -split." << endl << endl; cerr << "\t\thttp://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" << endl << endl; - cerr << "\t-split\t" << "Report \"split\" BAM alignments as separate BED entries." << endl << endl; + cerr << "\t-split\t" << "Report \"split\" BAM alignments as separate BED entries." << endl ; + cerr << "\t\tSplits only on N CIGAR operations." << endl << endl; + + cerr << "\t-splitD\t" << "Split alignments based on N and D CIGAR operators." << endl; + cerr << "\t\tForces -split." << endl << endl; cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl; cerr << "\t\t- Default for BED is to use mapping quality." << endl; @@ -267,8 +278,10 @@ void bamtobed_help(void) { void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag, - bool writeBed12, bool obeySplits, const string &color, - bool useCigar, bool useNovoalign, bool useBWA) { + bool writeBed12, bool obeySplits, bool splitOnDeletions, + const string &color, bool useCigar, bool useNovoalign, bool useBWA) +{ + // open the BAM file BamReader reader; if (!reader.Open(bamFile)) { @@ -285,9 +298,11 @@ void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string & while (reader.GetNextAlignment(bam)) { if (bam.IsMapped() == true) { if (writeBed12 == false) // BED - PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar, useNovoalign, useBWA); + PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, splitOnDeletions, + useCigar, useNovoalign, useBWA); else //"blocked" BED - PrintBed12(bam, refs, useEditDistance, bamTag, color); + PrintBed12(bam, refs, useEditDistance, bamTag, obeySplits, splitOnDeletions, + color); } } reader.Close(); @@ -379,8 +394,9 @@ string PrintTag(const BamAlignment &bam, const string &tag) void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, - bool obeySplits, bool useCigar, - bool useNovoalign, bool useBWA) + bool obeySplits, bool splitOnDeletions, + bool useCigar, bool useNovoalign, + bool useBWA) { // set the strand string strand = "+"; @@ -474,14 +490,18 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, vector<BED> bedBlocks; string chrom = refs.at(bam.RefID).RefName; // extract the block starts and lengths from the CIGAR string - GetBamBlocks(bam, chrom, bedBlocks, false, true); + if (!splitOnDeletions) + GetBamBlocks(bam, chrom, bedBlocks, false, true); + else + GetBamBlocks(bam, chrom, bedBlocks, true, true); + unsigned int i; for (i = 0; i < bedBlocks.size(); ++i) { BED curr = bedBlocks[i]; if (bamTag == "") { - printf("%s\t%d\t%d\t\%s\t%d\t%s\n", + printf("%s\t%d\t%d\t%s\t%d\t%s\n", chrom.c_str(), curr.start, curr.end, @@ -504,7 +524,11 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, } -void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color) { +void PrintBed12(const BamAlignment &bam, const RefVector &refs, + bool useEditDistance, const string &bamTag, + bool obeySplits, bool splitOnDeletions, + string color) +{ // set the strand string strand = "+"; @@ -520,7 +544,10 @@ void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDist string chrom = refs.at(bam.RefID).RefName; CHRPOS alignmentEnd = bam.GetEndPosition(); // extract the block starts and lengths from the CIGAR string - GetBamBlocks(bam, chrom, bedBlocks, false, true); + if (!splitOnDeletions) + GetBamBlocks(bam, chrom, bedBlocks, false, true); + else + GetBamBlocks(bam, chrom, bedBlocks, true, true); if (bamTag == "") { cout << refs.at(bam.RefID).RefName << "\t" diff --git a/test/bamtobed/test-bamtobed.sh b/test/bamtobed/test-bamtobed.sh index 358d8a7e..193a3c1e 100644 --- a/test/bamtobed/test-bamtobed.sh +++ b/test/bamtobed/test-bamtobed.sh @@ -18,6 +18,7 @@ samtools view -Sb one_block.sam > one_block.bam 2>/dev/null samtools view -Sb two_blocks.sam > two_blocks.bam 2>/dev/null samtools view -Sb three_blocks.sam > three_blocks.bam 2>/dev/null samtools view -Sb sam-w-del.sam > sam-w-del.bam 2>/dev/null +samtools view -Sb two_blocks_w_D.sam > two_blocks_w_D.bam 2>/dev/null ################################################################## @@ -110,4 +111,66 @@ $BT bamtobed -i three_blocks.bam -bed12 | $BT bed12tobed6 > bed12 check split bed12 rm split bed12 +################################################################## +# Test an alignment with a D operator and N operator -split option +################################################################## +echo " bamtobed.t9...\c" +echo \ +"chr1 0 15 two_blocks_1_1/2 40 + +chr1 25 40 two_blocks_1_1/2 40 + +chr1 99 129 two_blocks_1_2/1 40 + +chr1 0 15 two_blocks_2_1/2 40 + +chr1 25 42 two_blocks_2_1/2 40 + +chr1 99 129 two_blocks_2_2/1 40 +" > exp +$BT bamtobed -i two_blocks_w_D.bam -split > obs +check exp obs +rm exp obs + + +################################################################## +# Test an alignment with a D operator and N operator -splitD option +################################################################## +echo " bamtobed.t10...\c" +echo \ +"chr1 0 15 two_blocks_1_1/2 40 + +chr1 25 40 two_blocks_1_1/2 40 + +chr1 99 129 two_blocks_1_2/1 40 + +chr1 0 15 two_blocks_2_1/2 40 + +chr1 25 35 two_blocks_2_1/2 40 + +chr1 37 42 two_blocks_2_1/2 40 + +chr1 99 129 two_blocks_2_2/1 40 +" > exp +$BT bamtobed -i two_blocks_w_D.bam -splitD > obs +check exp obs +rm exp obs + + +################################################################## +# Test an alignment with a D operator and N operator -bed12 option +################################################################## +echo " bamtobed.t9...\c" +echo \ +"chr1 0 40 two_blocks_1_1/2 40 + 0 40 255,0,0 2 15,15 0,25 +chr1 99 129 two_blocks_1_2/1 40 + 99 129 255,0,0 1 30 0 +chr1 0 42 two_blocks_2_1/2 40 + 0 42 255,0,0 2 15,17 0,25 +chr1 99 129 two_blocks_2_2/1 40 + 99 129 255,0,0 1 30 0" > exp +$BT bamtobed -i two_blocks_w_D.bam -bed12 > obs +check exp obs +rm exp obs + + +################################################################## +# Test an alignment with a D operator and N operator -bed12 option +################################################################## +echo " bamtobed.t11...\c" +echo \ +"chr1 0 40 two_blocks_1_1/2 40 + 0 40 255,0,0 2 15,15 0,25 +chr1 99 129 two_blocks_1_2/1 40 + 99 129 255,0,0 1 30 0 +chr1 0 42 two_blocks_2_1/2 40 + 0 42 255,0,0 3 15,10,5 0,25,37 +chr1 99 129 two_blocks_2_2/1 40 + 99 129 255,0,0 1 30 0" > exp +$BT bamtobed -i two_blocks_w_D.bam -bed12 -splitD > obs + +check exp obs +rm exp obs + + rm *.bam diff --git a/test/bamtobed/two_blocks_w_D.sam b/test/bamtobed/two_blocks_w_D.sam new file mode 100644 index 00000000..00ddff2b --- /dev/null +++ b/test/bamtobed/two_blocks_w_D.sam @@ -0,0 +1,6 @@ +@HD VN:1.0 GO:none SO:coordinate +@SQ SN:chr1 LN:249250621 +two_blocks_1_1 163 chr1 1 40 15M10N15M * 0 0 GAAGGCCACCGCCGCGGTTATTTTCCTTCA CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI MD:Z:50 XA:i:2 +two_blocks_1_2 99 chr1 100 40 30M * 0 0 AGGCGATGCTAACGAAAAATTCGGAAATTT CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI MD:Z:50 XA:i:2 +two_blocks_2_1 163 chr1 1 40 15M10N10M2D5M * 0 0 GAAGGCCACCGCCGCGGTTATTTTCCTTCA CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI MD:Z:50 XA:i:2 +two_blocks_2_2 99 chr1 100 40 30M * 0 0 AGGCGATGCTAACGAAAAATTCGGAAATTT CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI MD:Z:50 XA:i:2 diff --git a/test/sample/test-sample.sh b/test/sample/test-sample.sh index 56254dce..9cae9bc0 100644 --- a/test/sample/test-sample.sh +++ b/test/sample/test-sample.sh @@ -91,10 +91,10 @@ rm obs exp # Test that we get the requested number of records ############################################################ echo " sample.new.t07...\c" -echo 10 > exp -$BT sample -i mainFile.bed -n 10 | wc -l | cut -f1 -d ' '> obs +echo " 10" > exp +$BT sample -i mainFile.bed -n 10 | wc -l > obs check obs exp -rm obs exp +#rm obs exp ########################################################### -- GitLab