From 86d41f165d0b00e233128d548eb4995b9e101574 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Thu, 23 Jun 2011 08:47:54 -0400
Subject: [PATCH] Added -counts option to coverageBed to only collect the count
 of overlaps (no pct., etc.)

---
 src/coverageBed/coverageBed.cpp  | 48 +++++++++++++++++++++++++++-----
 src/coverageBed/coverageBed.h    |  8 +++++-
 src/coverageBed/coverageMain.cpp | 10 +++++--
 src/utils/bedFile/bedFile.cpp    | 46 ++++++++++++++++--------------
 src/utils/bedFile/bedFile.h      |  4 +--
 5 files changed, 83 insertions(+), 33 deletions(-)

diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp
index d1ed82c0..312c6051 100644
--- a/src/coverageBed/coverageBed.cpp
+++ b/src/coverageBed/coverageBed.cpp
@@ -14,7 +14,8 @@
 
 // build
 BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand,
-                         bool writeHistogram, bool bamInput, bool obeySplits, bool eachBase) {
+                         bool writeHistogram, bool bamInput, bool obeySplits, 
+                         bool eachBase, bool countsOnly) {
 
     _bedAFile       = bedAFile;
     _bedBFile       = bedBFile;
@@ -27,6 +28,7 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand,
     _eachBase       = eachBase;
     _writeHistogram = writeHistogram;
     _bamInput       = bamInput;
+    _countsOnly     = countsOnly;
 
 
     if (_bamInput == false)
@@ -58,7 +60,7 @@ void BedCoverage::CollectCoverageBed() {
         if (bedStatus == BED_VALID) {
             // process the BED entry as a single block
             if (_obeySplits == false)
-                _bedB->countHits(a, _forceStrand);
+                _bedB->countHits(a, _forceStrand, _countsOnly);
             // split the BED into discrete blocksand process each independently.
             else {
                 bedVector bedBlocks;
@@ -66,7 +68,7 @@ void BedCoverage::CollectCoverageBed() {
 
                 // use countSplitHits to avoid over-counting each split chunk
                 // as distinct read coverage.
-                _bedB->countSplitHits(bedBlocks, _forceStrand);
+                _bedB->countSplitHits(bedBlocks, _forceStrand, _countsOnly);
             }
             a = nullBed;
         }
@@ -74,7 +76,10 @@ void BedCoverage::CollectCoverageBed() {
     _bedA->Close();
 
     // report the coverage (summary or histogram) for BED B.
-    ReportCoverage();
+    if (_countsOnly == true)
+        ReportCounts();
+    else 
+        ReportCoverage();
 }
 
 
@@ -107,7 +112,7 @@ void BedCoverage::CollectCoverageBam(string bamFile) {
                 a.strand = "+";
                 if (bam.IsReverseStrand()) a.strand = "-";
 
-                _bedB->countHits(a, _forceStrand);
+                _bedB->countHits(a, _forceStrand, _countsOnly);
             }
             // split the BAM alignment into discrete blocks and
             // look for overlaps only within each block.
@@ -119,17 +124,46 @@ void BedCoverage::CollectCoverageBam(string bamFile) {
                 getBamBlocks(bam, refs, bedBlocks, true);
                 // use countSplitHits to avoid over-counting each split chunk
                 // as distinct read coverage.
-                _bedB->countSplitHits(bedBlocks, _forceStrand);
+                _bedB->countSplitHits(bedBlocks, _forceStrand, _countsOnly);
             }
         }
     }
     // report the coverage (summary or histogram) for BED B.
-    ReportCoverage();
+    if (_countsOnly == true)
+        ReportCounts();
+    else 
+        ReportCoverage();
     // close the BAM file
     reader.Close();
 }
 
 
+void BedCoverage::ReportCounts() {
+
+
+    // process each chromosome
+    masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
+    masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
+    for (; chromItr != chromEnd; ++chromItr)
+    {
+        // for each chrom, process each bin
+        binsToBedCovs::const_iterator binItr = chromItr->second.begin();
+        binsToBedCovs::const_iterator binEnd = chromItr->second.end();
+        for (; binItr != binEnd; ++binItr)
+        {
+            // for each chrom & bin, compute and report
+            // the observed coverage for each feature
+            vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
+            vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
+            for (; bedItr != bedEnd; ++bedItr)
+            {
+                _bedB->reportBedTab(*bedItr);
+                printf("%d\n", bedItr->count);
+            }
+        }
+    }
+}
+
 void BedCoverage::ReportCoverage() {
 
     map<unsigned int, unsigned int> allDepthHist;
diff --git a/src/coverageBed/coverageBed.h b/src/coverageBed/coverageBed.h
index 217a88c1..a21f95fd 100644
--- a/src/coverageBed/coverageBed.h
+++ b/src/coverageBed/coverageBed.h
@@ -37,7 +37,7 @@ public:
 
     // constructor
     BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, bool writeHistogram,
-                bool bamInput, bool obeySplits, bool eachBase);
+                bool bamInput, bool obeySplits, bool eachBase, bool countsOnly);
 
     // destructor
     ~BedCoverage(void);
@@ -65,9 +65,15 @@ private:
 
     // should discrete coverage be reported for each base in each feature?
     bool _eachBase;
+    
+    // should we just count overlaps and not try to describe the breadth?
+    bool _countsOnly;
 
     // private function for reporting coverage information
     void ReportCoverage();
+    
+    // private function for reporting overlap counts
+    void ReportCounts();
 
     void CollectCoverageBed();
 
diff --git a/src/coverageBed/coverageMain.cpp b/src/coverageBed/coverageMain.cpp
index 7b687af5..30939681 100644
--- a/src/coverageBed/coverageMain.cpp
+++ b/src/coverageBed/coverageMain.cpp
@@ -35,11 +35,12 @@ int main(int argc, char* argv[]) {
     // parm flags
     bool forceStrand    = false;
     bool writeHistogram = false;
-    bool eachBase          = false;
+    bool eachBase       = false;
     bool obeySplits     = false;
     bool bamInput       = false;
     bool haveBedA       = false;
     bool haveBedB       = false;
+    bool countsOnly     = false;
 
     // check to see if we should print out some help
     if(argc <= 1) showHelp = true;
@@ -94,6 +95,9 @@ int main(int argc, char* argv[]) {
         else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
             obeySplits = true;
         }
+        else if (PARAMETER_CHECK("-counts", 7, parameterLength)) {
+            countsOnly = true;
+        }
         else {
             cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
             showHelp = true;
@@ -107,7 +111,7 @@ int main(int argc, char* argv[]) {
     }
 
     if (!showHelp) {
-        BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits, eachBase);
+        BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits, eachBase, countsOnly);
         delete bg;
         return 0;
     }
@@ -143,6 +147,8 @@ void ShowHelp(void) {
     cerr << "\t-d\t"            << "Report the depth at each position in each B feature." << endl;
     cerr                        << "\t\tPositions reported are one based.  Each position" << endl;
     cerr                        << "\t\tand depth follow the complete B feature." << endl << endl;
+    
+    cerr << "\t-counts\t"       << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl;
 
     cerr << "\t-split\t"        << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl;
     cerr                        << "\t\twhen computing coverage." << endl;
diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp
index 914a431a..c2fad794 100644
--- a/src/utils/bedFile/bedFile.cpp
+++ b/src/utils/bedFile/bedFile.cpp
@@ -327,7 +327,7 @@ bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start,
 }
 
 
-void BedFile::countHits(const BED &a, bool forceStrand) {
+void BedFile::countHits(const BED &a, bool forceStrand, bool countsOnly) {
 
     BIN startBin, endBin;
     startBin = (a.start >> _binFirstShift);
@@ -354,18 +354,20 @@ void BedFile::countHits(const BED &a, bool forceStrand) {
                 else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) {
 
                     bedItr->count++;
-                    if (a.zeroLength == false) {
-                        bedItr->depthMap[a.start+1].starts++;
-                        bedItr->depthMap[a.end].ends++;
-                    }
-                    else {
-                        // correct for the fact that we artificially expanded the zeroLength feature
-                        bedItr->depthMap[a.start+2].starts++;
-                        bedItr->depthMap[a.end-1].ends++;                        
-                    }
+                    if (countsOnly == false) {
+                        if (a.zeroLength == false) {
+                            bedItr->depthMap[a.start+1].starts++;
+                            bedItr->depthMap[a.end].ends++;
+                        }
+                        else {
+                            // correct for the fact that we artificially expanded the zeroLength feature
+                            bedItr->depthMap[a.start+2].starts++;
+                            bedItr->depthMap[a.end-1].ends++;                        
+                        }
 
-                    if (a.start < bedItr->minOverlapStart) {
-                        bedItr->minOverlapStart = a.start;
+                        if (a.start < bedItr->minOverlapStart) {
+                            bedItr->minOverlapStart = a.start;
+                        }
                     }
                 }
             }
@@ -376,7 +378,7 @@ void BedFile::countHits(const BED &a, bool forceStrand) {
 }
 
 
-void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand) {
+void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand, bool countsOnly) {
 
     // set to track the distinct B features that had coverage.
     // we'll update the counts of coverage for these features by one
@@ -410,14 +412,16 @@ void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand) {
                         continue;
                     }
                     else if (overlaps(bedItr->start, bedItr->end, blockItr->start, blockItr->end) > 0) {
-                        if (blockItr->zeroLength == false) {
-                            bedItr->depthMap[blockItr->start+1].starts++;
-                            bedItr->depthMap[blockItr->end].ends++;
-                        }
-                        else {
-                            // correct for the fact that we artificially expanded the zeroLength feature
-                            bedItr->depthMap[blockItr->start+2].starts++;
-                            bedItr->depthMap[blockItr->end-1].ends++;
+                        if (countsOnly == false) {
+                            if (blockItr->zeroLength == false) {
+                                bedItr->depthMap[blockItr->start+1].starts++;
+                                bedItr->depthMap[blockItr->end].ends++;
+                            }
+                            else {
+                                // correct for the fact that we artificially expanded the zeroLength feature
+                                bedItr->depthMap[blockItr->start+2].starts++;
+                                bedItr->depthMap[blockItr->end-1].ends++;
+                            }
                         }
 
                         validHits.insert(bedItr);
diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h
index 8d80113f..73913152 100644
--- a/src/utils/bedFile/bedFile.h
+++ b/src/utils/bedFile/bedFile.h
@@ -437,14 +437,14 @@ public:
     // Given a chrom, start, end and strand for a single feature,
     // increment a the number of hits for each feature in B file
     // that the feature overlaps
-    void countHits(const BED &a, bool forceStrand);
+    void countHits(const BED &a, bool forceStrand = false, bool countsOnly = false);
 
     // same as above, but has special logic that processes a set of
     // BED "blocks" from a single entry so as to avoid over-counting
     // each "block" of a single BAM/BED12 as distinct coverage.  That is,
     // if one read has four block, we only want to count the coverage as
     // coming from one read, not four.
-    void countSplitHits(const vector<BED> &bedBlock, bool forceStrand);
+    void countSplitHits(const vector<BED> &bedBlock, bool forceStrand = false, bool countsOnly = false);
 
     // Given a chrom, start, end and strand for a single feature,
     // increment a the number of hits for each feature in B file
-- 
GitLab