diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index c6b7dafe218d6d40766727cc610f7ce937683c10..87f470d747052bc87e873f43f83cbaff575fd2da 100755 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -18,12 +18,14 @@ Constructor */ BedIntersect::BedIntersect(string &bedAFile, string &bedBFile, bool &anyHit, -bool &writeB, float &overlapFraction, bool &noHit, bool &writeCount) { + bool &writeA, bool &writeB, float &overlapFraction, + bool &noHit, bool &writeCount) { this->bedAFile = bedAFile; this->bedBFile = bedBFile; this->anyHit = anyHit; this->noHit = noHit; + this->writeA = writeA; this->writeB = writeB; this->writeCount = writeCount; this->overlapFraction = overlapFraction; @@ -152,22 +154,37 @@ void BedIntersect::IntersectBed() { if (bedA->parseBedLine(a, bedFields, lineNum)) { vector<BED> hits; + bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits); int numOverlaps = 0; for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) { + int s = max(a.start, h->start); int e = min(a.end, h->end); + if (s < e) { + // is there enough overlap (default ~ 1bp) if ( ((float)(e-s) / (float)(a.end - a.start)) > this->overlapFraction ) { numOverlaps++; if (!anyHit && !noHit && !writeCount) { if (!writeB) { - reportAIntersect(a,s,e); cout << endl; + if (writeA) { + reportA(a); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << endl; + } } else { - reportAIntersect(a,s,e); cout << "\t"; - reportB(*h); cout << endl; + if (writeA) { + reportA(a); cout << "\t"; + reportB(*h); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << "\t"; + reportB(*h); cout << endl; + } } } } @@ -198,39 +215,53 @@ void BedIntersect::IntersectBed() { if (bedA->parseBedLine(a, bedFields, lineNum)) { vector<BED> hits; + bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits); int numOverlaps = 0; for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) { - + int s = max(a.start, h->start); int e = min(a.end, h->end); + if (s < e) { + // is there enough overlap (default ~ 1bp) if ( ((float)(e-s) / (float)(a.end - a.start)) > this->overlapFraction ) { numOverlaps++; if (!anyHit && !noHit && !writeCount) { if (!writeB) { - reportAIntersect(a,s,e); cout << endl; + if (writeA) { + reportA(a); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << endl; + } } else { - reportAIntersect(a,s,e); cout << "\t"; - reportB(*h); cout << endl; + if (writeA) { + reportA(a); cout << "\t"; + reportB(*h); cout << endl; + } + else { + reportAIntersect(a,s,e); cout << "\t"; + reportB(*h); cout << endl; + } } } - } - } - if (anyHit && (numOverlaps >= 1)) { - reportA(a); cout << endl; - } - else if (writeCount) { - reportA(a); cout << "\t" << numOverlaps << endl; - } - else if (noHit && (numOverlaps == 0)) { - reportA(a); cout << endl; + } } } + if (anyHit && (numOverlaps >= 1)) { + reportA(a); cout << endl; + } + else if (writeCount) { + reportA(a); cout << "\t" << numOverlaps << endl; + } + else if (noHit && (numOverlaps == 0)) { + reportA(a); cout << endl; + } } - } + } } } // END Intersect BED3 diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h index 376634abfb90d56f804c66b40d4060137d1cc20f..5aaf600c389382eb1f6fcb6c82078a99c3d6d1c0 100755 --- a/src/intersectBed/intersectBed.h +++ b/src/intersectBed/intersectBed.h @@ -25,7 +25,7 @@ class BedIntersect { public: // constructor - BedIntersect(string &, string &, bool &, bool &, float &, bool &, bool &); + BedIntersect(string &, string &, bool &, bool &, bool &, float &, bool &, bool &); // destructor ~BedIntersect(void); @@ -44,6 +44,7 @@ private: string bedBFile; string notInBFile; bool anyHit; + bool writeA; bool writeB; bool writeCount; float overlapFraction; diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index 0e1d3a6b637d6fcf86d533831f0db14bd295459c..400dc7ee531c6846ce5c4e61f96f6e48a3372561 100755 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -25,13 +25,17 @@ int main(int argc, char* argv[]) { // input arguments float overlapFraction = 1E-9; + int slop = 0; bool haveBedA = false; bool haveBedB = false; bool noHit = false; bool anyHit = false; + bool writeA = false; bool writeB = false; bool writeCount = false; + bool haveFraction = false; + bool haveSlop = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -53,34 +57,38 @@ int main(int argc, char* argv[]) { int parameterLength = (int)strlen(argv[i]); if(PARAMETER_CHECK("-a", 2, parameterLength)) { - haveBedA = true; - bedAFile = argv[i + 1]; + if ((i+1) < argc) { + haveBedA = true; + bedAFile = argv[i + 1]; + } i++; } else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - haveBedB = true; - bedBFile = argv[i + 1]; + if ((i+1) < argc) { + haveBedB = true; + bedBFile = argv[i + 1]; + } i++; } else if(PARAMETER_CHECK("-u", 2, parameterLength)) { anyHit = true; - i++; } else if(PARAMETER_CHECK("-f", 2, parameterLength)) { + haveFraction = true; overlapFraction = atof(argv[i + 1]); i++; } + else if(PARAMETER_CHECK("-wa", 3, parameterLength)) { + writeA = true; + } else if(PARAMETER_CHECK("-wb", 3, parameterLength)) { writeB = true; - i++; } else if(PARAMETER_CHECK("-c", 2, parameterLength)) { writeCount = true; - i++; } else if (PARAMETER_CHECK("-v", 2, parameterLength)) { noHit = true; - i++; } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; @@ -103,10 +111,15 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl; showHelp = true; } + + if (anyHit && writeCount) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl; + showHelp = true; + } if (!showHelp) { - BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeB, overlapFraction, noHit, writeCount); + BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, overlapFraction, noHit, writeCount); bi->IntersectBed(); return 0; } @@ -133,9 +146,11 @@ void ShowHelp(void) { cerr << "OPTIONS: " << endl; cerr << "\t" << "-u\t\t\t" << "Write ORIGINAL a.bed entry ONCE if ANY overlap bed." << endl << "\t\t\t\tIn other words, ignore multiple overlaps." << endl << endl; cerr << "\t" << "-v \t\t\t" << "Only report those entries in A that have NO OVERLAP in B." << endl << "\t\t\t\tSimilar to grep -v." << endl << endl; + cerr << "\t" << "-s (100000)\t\t" << "Slop added before and after each entry in A" << endl << "\t\t\t\tUseful for finding overlaps within N bases upstream and downstream." << endl << endl; cerr << "\t" << "-f (e.g. 0.05)\t\t" << "Minimum overlap req'd as fraction of a.bed." << endl << "\t\t\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl; cerr << "\t" << "-c \t\t\t" << "For each entry in A, report the number of hits in B while restricting to -f." << endl << "\t\t\t\tReports 0 for A entries that have no overlap with B." << endl << endl; - cerr << "\t" << "-wb \t\t\t" << "Write the entry in B for each overlap." << endl << "\t\t\t\tUseful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; + cerr << "\t" << "-wa \t\t\t" << "Write the original entry in A for each overlap." << endl << endl; + cerr << "\t" << "-wb \t\t\t" << "Write the original entry in B for each overlap." << endl << "\t\t\t\tUseful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; cerr << "NOTES: " << endl; cerr << "\t" << "-i stdin\t\t" << "Allows intersectBed to read BED from stdin. E.g.: cat a.bed | intersectBed -a stdin -b B.bed" << endl << endl;