From 86d41f165d0b00e233128d548eb4995b9e101574 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Thu, 23 Jun 2011 08:47:54 -0400 Subject: [PATCH] Added -counts option to coverageBed to only collect the count of overlaps (no pct., etc.) --- src/coverageBed/coverageBed.cpp | 48 +++++++++++++++++++++++++++----- src/coverageBed/coverageBed.h | 8 +++++- src/coverageBed/coverageMain.cpp | 10 +++++-- src/utils/bedFile/bedFile.cpp | 46 ++++++++++++++++-------------- src/utils/bedFile/bedFile.h | 4 +-- 5 files changed, 83 insertions(+), 33 deletions(-) diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp index d1ed82c0..312c6051 100644 --- a/src/coverageBed/coverageBed.cpp +++ b/src/coverageBed/coverageBed.cpp @@ -14,7 +14,8 @@ // build BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, - bool writeHistogram, bool bamInput, bool obeySplits, bool eachBase) { + bool writeHistogram, bool bamInput, bool obeySplits, + bool eachBase, bool countsOnly) { _bedAFile = bedAFile; _bedBFile = bedBFile; @@ -27,6 +28,7 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, _eachBase = eachBase; _writeHistogram = writeHistogram; _bamInput = bamInput; + _countsOnly = countsOnly; if (_bamInput == false) @@ -58,7 +60,7 @@ void BedCoverage::CollectCoverageBed() { if (bedStatus == BED_VALID) { // process the BED entry as a single block if (_obeySplits == false) - _bedB->countHits(a, _forceStrand); + _bedB->countHits(a, _forceStrand, _countsOnly); // split the BED into discrete blocksand process each independently. else { bedVector bedBlocks; @@ -66,7 +68,7 @@ void BedCoverage::CollectCoverageBed() { // use countSplitHits to avoid over-counting each split chunk // as distinct read coverage. - _bedB->countSplitHits(bedBlocks, _forceStrand); + _bedB->countSplitHits(bedBlocks, _forceStrand, _countsOnly); } a = nullBed; } @@ -74,7 +76,10 @@ void BedCoverage::CollectCoverageBed() { _bedA->Close(); // report the coverage (summary or histogram) for BED B. - ReportCoverage(); + if (_countsOnly == true) + ReportCounts(); + else + ReportCoverage(); } @@ -107,7 +112,7 @@ void BedCoverage::CollectCoverageBam(string bamFile) { a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; - _bedB->countHits(a, _forceStrand); + _bedB->countHits(a, _forceStrand, _countsOnly); } // split the BAM alignment into discrete blocks and // look for overlaps only within each block. @@ -119,17 +124,46 @@ void BedCoverage::CollectCoverageBam(string bamFile) { getBamBlocks(bam, refs, bedBlocks, true); // use countSplitHits to avoid over-counting each split chunk // as distinct read coverage. - _bedB->countSplitHits(bedBlocks, _forceStrand); + _bedB->countSplitHits(bedBlocks, _forceStrand, _countsOnly); } } } // report the coverage (summary or histogram) for BED B. - ReportCoverage(); + if (_countsOnly == true) + ReportCounts(); + else + ReportCoverage(); // close the BAM file reader.Close(); } +void BedCoverage::ReportCounts() { + + + // process each chromosome + masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin(); + masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end(); + for (; chromItr != chromEnd; ++chromItr) + { + // for each chrom, process each bin + binsToBedCovs::const_iterator binItr = chromItr->second.begin(); + binsToBedCovs::const_iterator binEnd = chromItr->second.end(); + for (; binItr != binEnd; ++binItr) + { + // for each chrom & bin, compute and report + // the observed coverage for each feature + vector<BEDCOV>::const_iterator bedItr = binItr->second.begin(); + vector<BEDCOV>::const_iterator bedEnd = binItr->second.end(); + for (; bedItr != bedEnd; ++bedItr) + { + _bedB->reportBedTab(*bedItr); + printf("%d\n", bedItr->count); + } + } + } +} + void BedCoverage::ReportCoverage() { map<unsigned int, unsigned int> allDepthHist; diff --git a/src/coverageBed/coverageBed.h b/src/coverageBed/coverageBed.h index 217a88c1..a21f95fd 100644 --- a/src/coverageBed/coverageBed.h +++ b/src/coverageBed/coverageBed.h @@ -37,7 +37,7 @@ public: // constructor BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, bool writeHistogram, - bool bamInput, bool obeySplits, bool eachBase); + bool bamInput, bool obeySplits, bool eachBase, bool countsOnly); // destructor ~BedCoverage(void); @@ -65,9 +65,15 @@ private: // should discrete coverage be reported for each base in each feature? bool _eachBase; + + // should we just count overlaps and not try to describe the breadth? + bool _countsOnly; // private function for reporting coverage information void ReportCoverage(); + + // private function for reporting overlap counts + void ReportCounts(); void CollectCoverageBed(); diff --git a/src/coverageBed/coverageMain.cpp b/src/coverageBed/coverageMain.cpp index 7b687af5..30939681 100644 --- a/src/coverageBed/coverageMain.cpp +++ b/src/coverageBed/coverageMain.cpp @@ -35,11 +35,12 @@ int main(int argc, char* argv[]) { // parm flags bool forceStrand = false; bool writeHistogram = false; - bool eachBase = false; + bool eachBase = false; bool obeySplits = false; bool bamInput = false; bool haveBedA = false; bool haveBedB = false; + bool countsOnly = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -94,6 +95,9 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-split", 6, parameterLength)) { obeySplits = true; } + else if (PARAMETER_CHECK("-counts", 7, parameterLength)) { + countsOnly = true; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; @@ -107,7 +111,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits, eachBase); + BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits, eachBase, countsOnly); delete bg; return 0; } @@ -143,6 +147,8 @@ void ShowHelp(void) { cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl; cerr << "\t\tPositions reported are one based. Each position" << endl; cerr << "\t\tand depth follow the complete B feature." << endl << endl; + + cerr << "\t-counts\t" << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl; cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl; cerr << "\t\twhen computing coverage." << endl; diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index 914a431a..c2fad794 100644 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -327,7 +327,7 @@ bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, } -void BedFile::countHits(const BED &a, bool forceStrand) { +void BedFile::countHits(const BED &a, bool forceStrand, bool countsOnly) { BIN startBin, endBin; startBin = (a.start >> _binFirstShift); @@ -354,18 +354,20 @@ void BedFile::countHits(const BED &a, bool forceStrand) { else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) { bedItr->count++; - if (a.zeroLength == false) { - bedItr->depthMap[a.start+1].starts++; - bedItr->depthMap[a.end].ends++; - } - else { - // correct for the fact that we artificially expanded the zeroLength feature - bedItr->depthMap[a.start+2].starts++; - bedItr->depthMap[a.end-1].ends++; - } + if (countsOnly == false) { + if (a.zeroLength == false) { + bedItr->depthMap[a.start+1].starts++; + bedItr->depthMap[a.end].ends++; + } + else { + // correct for the fact that we artificially expanded the zeroLength feature + bedItr->depthMap[a.start+2].starts++; + bedItr->depthMap[a.end-1].ends++; + } - if (a.start < bedItr->minOverlapStart) { - bedItr->minOverlapStart = a.start; + if (a.start < bedItr->minOverlapStart) { + bedItr->minOverlapStart = a.start; + } } } } @@ -376,7 +378,7 @@ void BedFile::countHits(const BED &a, bool forceStrand) { } -void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand) { +void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand, bool countsOnly) { // set to track the distinct B features that had coverage. // we'll update the counts of coverage for these features by one @@ -410,14 +412,16 @@ void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand) { continue; } else if (overlaps(bedItr->start, bedItr->end, blockItr->start, blockItr->end) > 0) { - if (blockItr->zeroLength == false) { - bedItr->depthMap[blockItr->start+1].starts++; - bedItr->depthMap[blockItr->end].ends++; - } - else { - // correct for the fact that we artificially expanded the zeroLength feature - bedItr->depthMap[blockItr->start+2].starts++; - bedItr->depthMap[blockItr->end-1].ends++; + if (countsOnly == false) { + if (blockItr->zeroLength == false) { + bedItr->depthMap[blockItr->start+1].starts++; + bedItr->depthMap[blockItr->end].ends++; + } + else { + // correct for the fact that we artificially expanded the zeroLength feature + bedItr->depthMap[blockItr->start+2].starts++; + bedItr->depthMap[blockItr->end-1].ends++; + } } validHits.insert(bedItr); diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 8d80113f..73913152 100644 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -437,14 +437,14 @@ public: // Given a chrom, start, end and strand for a single feature, // increment a the number of hits for each feature in B file // that the feature overlaps - void countHits(const BED &a, bool forceStrand); + void countHits(const BED &a, bool forceStrand = false, bool countsOnly = false); // same as above, but has special logic that processes a set of // BED "blocks" from a single entry so as to avoid over-counting // each "block" of a single BAM/BED12 as distinct coverage. That is, // if one read has four block, we only want to count the coverage as // coming from one read, not four. - void countSplitHits(const vector<BED> &bedBlock, bool forceStrand); + void countSplitHits(const vector<BED> &bedBlock, bool forceStrand = false, bool countsOnly = false); // Given a chrom, start, end and strand for a single feature, // increment a the number of hits for each feature in B file -- GitLab