From db638be7e3f7c3b1f630200589657f70ac1d928c Mon Sep 17 00:00:00 2001
From: arq5x <arq5x@virginia.edu>
Date: Mon, 9 Dec 2013 17:14:39 -0500
Subject: [PATCH] [TST] cleanup sample test

---
 src/bamToBed/bamToBed.cpp        | 87 +++++++++++++++++++++-----------
 test/bamtobed/test-bamtobed.sh   | 63 +++++++++++++++++++++++
 test/bamtobed/two_blocks_w_D.sam |  6 +++
 test/sample/test-sample.sh       |  6 +--
 4 files changed, 129 insertions(+), 33 deletions(-)
 create mode 100644 test/bamtobed/two_blocks_w_D.sam

diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp
index c89441a0..16d61644 100644
--- a/src/bamToBed/bamToBed.cpp
+++ b/src/bamToBed/bamToBed.cpp
@@ -38,9 +38,9 @@ void bamtobed_help(void);
 
 void ConvertBamToBed(const string &bamFile, bool useEditDistance, 
                      const string &bamTag, bool writeBed12, 
-                     bool obeySplits, const string &color, 
-                     bool useCigar,   bool useNovoalign, 
-                     bool useBWA);
+                     bool obeySplits, bool splitOnDeletions, 
+                     const string &color, bool useCigar,
+                     bool useNovoalign, bool useBWA);
                      
 void ConvertBamToBedpe(const string &bamFile, 
                        const bool &useEditDistance, bool mate1First);
@@ -49,11 +49,13 @@ string PrintTag(const BamAlignment &bam, const string &tag);
 
 void PrintBed(const BamAlignment &bam, const RefVector &refs, 
               bool useEditDistance, const string &bamTag, 
-              bool obeySplits, bool useCigar, 
-              bool useNovoalign, bool useBWA);
+              bool obeySplits, bool splitOnDeletions, 
+              bool useCigar, bool useNovoalign, 
+              bool useBWA);
               
 void PrintBed12(const BamAlignment &bam, const RefVector &refs, 
-                bool useEditDistance, const string &bamTag, 
+                bool useEditDistance, const string &bamTag,
+                bool obeySplits, bool splitOnDeletions, 
                 string color = "255,0,0");
 
 void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,
@@ -86,6 +88,7 @@ int bamtobed_main(int argc, char* argv[]) {
     bool useAlignmentScore = false;
     bool useCigar          = false;
     bool obeySplits        = false;
+    bool splitOnDeletions  = false;
     bool mate1First        = false;
     bool useNovoalign      = false;  // custom for Quinlan/Hall research
     bool useBWA            = false;  // custom for Quinlan/Hall research
@@ -115,32 +118,36 @@ int bamtobed_main(int argc, char* argv[]) {
             }
         }
         else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {
-                writeBedPE = true;
+            writeBedPE = true;
         }
         else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {
-                writeBed12 = true;
+            writeBed12 = true;
         }
         else if(PARAMETER_CHECK("-split", 6, parameterLength)) {
-                obeySplits = true;
+            obeySplits = true;
+        }
+        else if(PARAMETER_CHECK("-splitD", 7, parameterLength)) {
+            obeySplits = true;
+            splitOnDeletions = true;
         }
         else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {
-                useEditDistance = true;
-                tag = "NM";
+            useEditDistance = true;
+            tag = "NM";
         }
         else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) {
-                useCigar = true;
+            useCigar = true;
         }
         else if(PARAMETER_CHECK("-as", 3, parameterLength)) {
-                useAlignmentScore = true;
+            useAlignmentScore = true;
         }
         else if(PARAMETER_CHECK("-novo", 5, parameterLength)) {
-                useNovoalign = true;
+            useNovoalign = true;
         }
         else if(PARAMETER_CHECK("-bwa", 4, parameterLength)) {
-                useBWA = true;
+            useBWA = true;
         }
         else if(PARAMETER_CHECK("-mate1", 6, parameterLength)) {
-                mate1First = true;
+            mate1First = true;
         }
         else if(PARAMETER_CHECK("-color", 6, parameterLength)) {
             if ((i+1) < argc) {
@@ -210,8 +217,8 @@ int bamtobed_main(int argc, char* argv[]) {
         if (writeBedPE == false)
             ConvertBamToBed(bamFile, useEditDistance, 
                             tag, writeBed12, 
-                            obeySplits, color, 
-                            useCigar,
+                            obeySplits, splitOnDeletions, 
+                            color, useCigar,
                             useNovoalign, useBWA);
         else
             ConvertBamToBedpe(bamFile, useEditDistance, mate1First);
@@ -239,10 +246,14 @@ void bamtobed_help(void) {
     cerr << "\t-mate1\t"      << "When writing BEDPE (-bedpe) format, " << endl;
     cerr                      << "\t\talways report mate one as the first BEDPE \"block\"." << endl << endl;
   
-    cerr << "\t-bed12\t"      << "Write \"blocked\" BED format (aka \"BED12\")." << endl << endl;
+    cerr << "\t-bed12\t"      << "Write \"blocked\" BED format (aka \"BED12\"). Forces -split." << endl << endl;
     cerr                      << "\t\thttp://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" << endl << endl;
   
-    cerr << "\t-split\t"      << "Report \"split\" BAM alignments as separate BED entries." << endl << endl;
+    cerr << "\t-split\t"      << "Report \"split\" BAM alignments as separate BED entries." << endl ;
+    cerr                      << "\t\tSplits only on N CIGAR operations." << endl << endl;
+
+    cerr << "\t-splitD\t"   << "Split alignments based on N and D CIGAR operators." << endl;
+    cerr                      << "\t\tForces -split." << endl << endl;
   
     cerr << "\t-ed\t"         << "Use BAM edit distance (NM tag) for BED score." << endl;
     cerr                      << "\t\t- Default for BED is to use mapping quality." << endl;
@@ -267,8 +278,10 @@ void bamtobed_help(void) {
 
 
 void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag,
-                     bool writeBed12, bool obeySplits, const string &color, 
-                     bool useCigar, bool useNovoalign, bool useBWA) {
+                     bool writeBed12, bool obeySplits, bool splitOnDeletions, 
+                     const string &color, bool useCigar, bool useNovoalign, bool useBWA) 
+{
+    
     // open the BAM file
     BamReader reader;
     if (!reader.Open(bamFile)) {
@@ -285,9 +298,11 @@ void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &
     while (reader.GetNextAlignment(bam)) {
         if (bam.IsMapped() == true) {
             if (writeBed12 == false)                 // BED
-                PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar, useNovoalign, useBWA);
+                PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, splitOnDeletions,
+                         useCigar, useNovoalign, useBWA);
             else                                     //"blocked" BED
-                PrintBed12(bam, refs, useEditDistance, bamTag, color);
+                PrintBed12(bam, refs, useEditDistance, bamTag, obeySplits, splitOnDeletions,
+                           color);
         }
     }
     reader.Close();
@@ -379,8 +394,9 @@ string PrintTag(const BamAlignment &bam, const string &tag)
 
 void PrintBed(const BamAlignment &bam,  const RefVector &refs, 
               bool useEditDistance, const string &bamTag, 
-              bool obeySplits, bool useCigar, 
-              bool useNovoalign, bool useBWA) 
+              bool obeySplits, bool splitOnDeletions,
+              bool useCigar, bool useNovoalign, 
+              bool useBWA) 
 {
     // set the strand
     string strand = "+";
@@ -474,14 +490,18 @@ void PrintBed(const BamAlignment &bam,  const RefVector &refs,
         vector<BED> bedBlocks;
         string chrom = refs.at(bam.RefID).RefName;
         // extract the block starts and lengths from the CIGAR string
-        GetBamBlocks(bam, chrom, bedBlocks, false, true);
+        if (!splitOnDeletions)
+            GetBamBlocks(bam, chrom, bedBlocks, false, true);
+        else
+            GetBamBlocks(bam, chrom, bedBlocks, true, true);
+
 
         unsigned int i;
         for (i = 0; i < bedBlocks.size(); ++i) {
             BED curr = bedBlocks[i];
 
             if (bamTag == "") {
-                printf("%s\t%d\t%d\t\%s\t%d\t%s\n", 
+                printf("%s\t%d\t%d\t%s\t%d\t%s\n", 
                        chrom.c_str(), 
                        curr.start,
                        curr.end,
@@ -504,7 +524,11 @@ void PrintBed(const BamAlignment &bam,  const RefVector &refs,
 }
 
 
-void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color) {
+void PrintBed12(const BamAlignment &bam, const RefVector &refs, 
+                bool useEditDistance, const string &bamTag, 
+                bool obeySplits, bool splitOnDeletions,
+                string color) 
+{
 
     // set the strand
     string strand = "+";
@@ -520,7 +544,10 @@ void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDist
     string chrom = refs.at(bam.RefID).RefName;
     CHRPOS alignmentEnd = bam.GetEndPosition();
     // extract the block starts and lengths from the CIGAR string
-    GetBamBlocks(bam, chrom, bedBlocks, false, true);
+    if (!splitOnDeletions)
+        GetBamBlocks(bam, chrom, bedBlocks, false, true);
+    else
+        GetBamBlocks(bam, chrom, bedBlocks, true, true);
 
     if (bamTag == "") {
         cout << refs.at(bam.RefID).RefName << "\t"
diff --git a/test/bamtobed/test-bamtobed.sh b/test/bamtobed/test-bamtobed.sh
index 358d8a7e..193a3c1e 100644
--- a/test/bamtobed/test-bamtobed.sh
+++ b/test/bamtobed/test-bamtobed.sh
@@ -18,6 +18,7 @@ samtools view -Sb one_block.sam > one_block.bam 2>/dev/null
 samtools view -Sb two_blocks.sam > two_blocks.bam 2>/dev/null
 samtools view -Sb three_blocks.sam > three_blocks.bam 2>/dev/null
 samtools view -Sb sam-w-del.sam > sam-w-del.bam 2>/dev/null
+samtools view -Sb two_blocks_w_D.sam > two_blocks_w_D.bam 2>/dev/null
 
 
 ##################################################################
@@ -110,4 +111,66 @@ $BT bamtobed -i three_blocks.bam -bed12 |  $BT bed12tobed6 > bed12
 check split bed12
 rm split bed12
 
+##################################################################
+# Test an alignment with a D operator and N operator -split option
+##################################################################
+echo "    bamtobed.t9...\c"
+echo \
+"chr1	0	15	two_blocks_1_1/2	40	+
+chr1	25	40	two_blocks_1_1/2	40	+
+chr1	99	129	two_blocks_1_2/1	40	+
+chr1	0	15	two_blocks_2_1/2	40	+
+chr1	25	42	two_blocks_2_1/2	40	+
+chr1	99	129	two_blocks_2_2/1	40	+" > exp
+$BT bamtobed -i two_blocks_w_D.bam -split > obs
+check exp obs
+rm exp obs
+
+
+##################################################################
+# Test an alignment with a D operator and N operator -splitD option
+##################################################################
+echo "    bamtobed.t10...\c"
+echo \
+"chr1	0	15	two_blocks_1_1/2	40	+
+chr1	25	40	two_blocks_1_1/2	40	+
+chr1	99	129	two_blocks_1_2/1	40	+
+chr1	0	15	two_blocks_2_1/2	40	+
+chr1	25	35	two_blocks_2_1/2	40	+
+chr1	37	42	two_blocks_2_1/2	40	+
+chr1	99	129	two_blocks_2_2/1	40	+" > exp
+$BT bamtobed -i two_blocks_w_D.bam -splitD > obs
+check exp obs
+rm exp obs
+
+
+##################################################################
+# Test an alignment with a D operator and N operator -bed12 option
+##################################################################
+echo "    bamtobed.t9...\c"
+echo \
+"chr1	0	40	two_blocks_1_1/2	40	+	0	40	255,0,0	2	15,15	0,25
+chr1	99	129	two_blocks_1_2/1	40	+	99	129	255,0,0	1	30	0
+chr1	0	42	two_blocks_2_1/2	40	+	0	42	255,0,0	2	15,17	0,25
+chr1	99	129	two_blocks_2_2/1	40	+	99	129	255,0,0	1	30	0" > exp
+$BT bamtobed -i two_blocks_w_D.bam -bed12 > obs
+check exp obs
+rm exp obs
+
+
+##################################################################
+# Test an alignment with a D operator and N operator -bed12 option
+##################################################################
+echo "    bamtobed.t11...\c"
+echo \
+"chr1	0	40	two_blocks_1_1/2	40	+	0	40	255,0,0	2	15,15	0,25
+chr1	99	129	two_blocks_1_2/1	40	+	99	129	255,0,0	1	30	0
+chr1	0	42	two_blocks_2_1/2	40	+	0	42	255,0,0	3	15,10,5	0,25,37
+chr1	99	129	two_blocks_2_2/1	40	+	99	129	255,0,0	1	30	0" > exp
+$BT bamtobed -i two_blocks_w_D.bam -bed12 -splitD > obs
+
+check exp obs
+rm exp obs
+
+
 rm *.bam
diff --git a/test/bamtobed/two_blocks_w_D.sam b/test/bamtobed/two_blocks_w_D.sam
new file mode 100644
index 00000000..00ddff2b
--- /dev/null
+++ b/test/bamtobed/two_blocks_w_D.sam
@@ -0,0 +1,6 @@
+@HD	VN:1.0	GO:none	SO:coordinate
+@SQ	SN:chr1	LN:249250621
+two_blocks_1_1	163	chr1	1	40	15M10N15M	*	0	0	GAAGGCCACCGCCGCGGTTATTTTCCTTCA	CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI	MD:Z:50	XA:i:2
+two_blocks_1_2	99	chr1	100	40	30M	*	0	0	AGGCGATGCTAACGAAAAATTCGGAAATTT	CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI	MD:Z:50	XA:i:2
+two_blocks_2_1	163	chr1	1	40	15M10N10M2D5M	*	0	0	GAAGGCCACCGCCGCGGTTATTTTCCTTCA	CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI	MD:Z:50	XA:i:2
+two_blocks_2_2	99	chr1	100	40	30M	*	0	0	AGGCGATGCTAACGAAAAATTCGGAAATTT	CCCDDB?=FJIIJIGFJIJHIJJJJJJJJI	MD:Z:50	XA:i:2
diff --git a/test/sample/test-sample.sh b/test/sample/test-sample.sh
index 56254dce..9cae9bc0 100644
--- a/test/sample/test-sample.sh
+++ b/test/sample/test-sample.sh
@@ -91,10 +91,10 @@ rm obs exp
 #  Test that we get the requested number of records
 ############################################################
 echo "    sample.new.t07...\c"
-echo 10 > exp
-$BT sample -i mainFile.bed -n 10 | wc -l | cut -f1 -d ' '> obs
+echo "      10" > exp
+$BT sample -i mainFile.bed -n 10 | wc -l > obs
 check obs exp
-rm obs exp
+#rm obs exp
 
 
 ###########################################################
-- 
GitLab