From 841c5596b76aa68226c5b8c0bb06d97617427788 Mon Sep 17 00:00:00 2001 From: Aaron Quinlan <aaronquinlan@gmail.com> Date: Wed, 22 Apr 2009 22:04:21 -0400 Subject: [PATCH] Added writeA (-wa) parameter to intersectBed -wa allows the original A entry to be written for each overlap with B. --- src/intersectBed/intersectBed.cpp | 69 ++++++++++++++++++++++-------- src/intersectBed/intersectBed.h | 3 +- src/intersectBed/intersectMain.cpp | 35 ++++++++++----- 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index c6b7dafe..87f470d7 100755 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -18,12 +18,14 @@ Constructor */ BedIntersect::BedIntersect(string &bedAFile, string &bedBFile, bool &anyHit, -bool &writeB, float &overlapFraction, bool &noHit, bool &writeCount) { + bool &writeA, bool &writeB, float &overlapFraction, + bool &noHit, bool &writeCount) { this->bedAFile = bedAFile; this->bedBFile = bedBFile; this->anyHit = anyHit; this->noHit = noHit; + this->writeA = writeA; this->writeB = writeB; this->writeCount = writeCount; this->overlapFraction = overlapFraction; @@ -152,22 +154,37 @@ void BedIntersect::IntersectBed() { if (bedA->parseBedLine(a, bedFields, lineNum)) { vector<BED> hits; + bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits); int numOverlaps = 0; for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) { + int s = max(a.start, h->start); int e = min(a.end, h->end); + if (s < e) { + // is there enough overlap (default ~ 1bp) if ( ((float)(e-s) / (float)(a.end - a.start)) > this->overlapFraction ) { numOverlaps++; if (!anyHit && !noHit && !writeCount) { if (!writeB) { - reportAIntersect(a,s,e); cout << endl; + if (writeA) { + reportA(a); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << endl; + } } else { - reportAIntersect(a,s,e); cout << "\t"; - reportB(*h); cout << endl; + if (writeA) { + reportA(a); cout << "\t"; + reportB(*h); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << "\t"; + reportB(*h); cout << endl; + } } } } @@ -198,39 +215,53 @@ void BedIntersect::IntersectBed() { if (bedA->parseBedLine(a, bedFields, lineNum)) { vector<BED> hits; + bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits); int numOverlaps = 0; for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) { - + int s = max(a.start, h->start); int e = min(a.end, h->end); + if (s < e) { + // is there enough overlap (default ~ 1bp) if ( ((float)(e-s) / (float)(a.end - a.start)) > this->overlapFraction ) { numOverlaps++; if (!anyHit && !noHit && !writeCount) { if (!writeB) { - reportAIntersect(a,s,e); cout << endl; + if (writeA) { + reportA(a); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << endl; + } } else { - reportAIntersect(a,s,e); cout << "\t"; - reportB(*h); cout << endl; + if (writeA) { + reportA(a); cout << "\t"; + reportB(*h); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << "\t"; + reportB(*h); cout << endl; + } } } - } - } - if (anyHit && (numOverlaps >= 1)) { - reportA(a); cout << endl; - } - else if (writeCount) { - reportA(a); cout << "\t" << numOverlaps << endl; - } - else if (noHit && (numOverlaps == 0)) { - reportA(a); cout << endl; + } } } + if (anyHit && (numOverlaps >= 1)) { + reportA(a); cout << endl; + } + else if (writeCount) { + reportA(a); cout << "\t" << numOverlaps << endl; + } + else if (noHit && (numOverlaps == 0)) { + reportA(a); cout << endl; + } } - } + } } } // END Intersect BED3 diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h index 376634ab..5aaf600c 100755 --- a/src/intersectBed/intersectBed.h +++ b/src/intersectBed/intersectBed.h @@ -25,7 +25,7 @@ class BedIntersect { public: // constructor - BedIntersect(string &, string &, bool &, bool &, float &, bool &, bool &); + BedIntersect(string &, string &, bool &, bool &, bool &, float &, bool &, bool &); // destructor ~BedIntersect(void); @@ -44,6 +44,7 @@ private: string bedBFile; string notInBFile; bool anyHit; + bool writeA; bool writeB; bool writeCount; float overlapFraction; diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index 0e1d3a6b..400dc7ee 100755 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -25,13 +25,17 @@ int main(int argc, char* argv[]) { // input arguments float overlapFraction = 1E-9; + int slop = 0; bool haveBedA = false; bool haveBedB = false; bool noHit = false; bool anyHit = false; + bool writeA = false; bool writeB = false; bool writeCount = false; + bool haveFraction = false; + bool haveSlop = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -53,34 +57,38 @@ int main(int argc, char* argv[]) { int parameterLength = (int)strlen(argv[i]); if(PARAMETER_CHECK("-a", 2, parameterLength)) { - haveBedA = true; - bedAFile = argv[i + 1]; + if ((i+1) < argc) { + haveBedA = true; + bedAFile = argv[i + 1]; + } i++; } else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - haveBedB = true; - bedBFile = argv[i + 1]; + if ((i+1) < argc) { + haveBedB = true; + bedBFile = argv[i + 1]; + } i++; } else if(PARAMETER_CHECK("-u", 2, parameterLength)) { anyHit = true; - i++; } else if(PARAMETER_CHECK("-f", 2, parameterLength)) { + haveFraction = true; overlapFraction = atof(argv[i + 1]); i++; } + else if(PARAMETER_CHECK("-wa", 3, parameterLength)) { + writeA = true; + } else if(PARAMETER_CHECK("-wb", 3, parameterLength)) { writeB = true; - i++; } else if(PARAMETER_CHECK("-c", 2, parameterLength)) { writeCount = true; - i++; } else if (PARAMETER_CHECK("-v", 2, parameterLength)) { noHit = true; - i++; } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; @@ -103,10 +111,15 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl; showHelp = true; } + + if (anyHit && writeCount) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl; + showHelp = true; + } if (!showHelp) { - BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeB, overlapFraction, noHit, writeCount); + BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, overlapFraction, noHit, writeCount); bi->IntersectBed(); return 0; } @@ -133,9 +146,11 @@ void ShowHelp(void) { cerr << "OPTIONS: " << endl; cerr << "\t" << "-u\t\t\t" << "Write ORIGINAL a.bed entry ONCE if ANY overlap bed." << endl << "\t\t\t\tIn other words, ignore multiple overlaps." << endl << endl; cerr << "\t" << "-v \t\t\t" << "Only report those entries in A that have NO OVERLAP in B." << endl << "\t\t\t\tSimilar to grep -v." << endl << endl; + cerr << "\t" << "-s (100000)\t\t" << "Slop added before and after each entry in A" << endl << "\t\t\t\tUseful for finding overlaps within N bases upstream and downstream." << endl << endl; cerr << "\t" << "-f (e.g. 0.05)\t\t" << "Minimum overlap req'd as fraction of a.bed." << endl << "\t\t\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl; cerr << "\t" << "-c \t\t\t" << "For each entry in A, report the number of hits in B while restricting to -f." << endl << "\t\t\t\tReports 0 for A entries that have no overlap with B." << endl << endl; - cerr << "\t" << "-wb \t\t\t" << "Write the entry in B for each overlap." << endl << "\t\t\t\tUseful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; + cerr << "\t" << "-wa \t\t\t" << "Write the original entry in A for each overlap." << endl << endl; + cerr << "\t" << "-wb \t\t\t" << "Write the original entry in B for each overlap." << endl << "\t\t\t\tUseful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; cerr << "NOTES: " << endl; cerr << "\t" << "-i stdin\t\t" << "Allows intersectBed to read BED from stdin. E.g.: cat a.bed | intersectBed -a stdin -b B.bed" << endl << endl; -- GitLab