diff --git a/bin/closestBed b/bin/closestBed index 7d70fa4c9c24981be21fa2db2743e4b3d8fdc37a..bfea0660a86324e852bdeaa3e77e9ca450659810 100755 Binary files a/bin/closestBed and b/bin/closestBed differ diff --git a/bin/intersectBed b/bin/intersectBed index d1728c9b3a0bc452860325bf8f93bdeab2ccf251..7da0bb606c63bb04bda4aef4c76d767edb703465 100755 Binary files a/bin/intersectBed and b/bin/intersectBed differ diff --git a/bin/subtractBed b/bin/subtractBed index b9aef6a5a1511ae1277fbd8060e929853dcfef5c..f9923e440b76272f5ed743ffdb388b6660f5ec37 100755 Binary files a/bin/subtractBed and b/bin/subtractBed differ diff --git a/bin/windowBed b/bin/windowBed index 981f606a0c2da844f596fef988433f1d5be6d788..85ce50d8d4374f475a00e5830a4908ce3e64e6ff 100755 Binary files a/bin/windowBed and b/bin/windowBed differ diff --git a/obj/closestMain.o b/obj/closestMain.o index 149f4e12e9000ffb85b92e1f27d1e56373009284..70cb6534b8929c342a92681fab835fcea1285dd3 100644 Binary files a/obj/closestMain.o and b/obj/closestMain.o differ diff --git a/obj/intersectMain.o b/obj/intersectMain.o index 77731f5f8f7989950a3d5202a0d1c62706ee1f02..a383639beb1a4c921d62be67f72febb439425144 100644 Binary files a/obj/intersectMain.o and b/obj/intersectMain.o differ diff --git a/obj/subtractBed.o b/obj/subtractBed.o index a0f04779c12993910a5b9419bad86d770d96ef28..825a85f1a5471f636a7d587a3caaa3f3a899204f 100644 Binary files a/obj/subtractBed.o and b/obj/subtractBed.o differ diff --git a/obj/subtractMain.o b/obj/subtractMain.o index 97cb5e145fb7f9caa9af450d0fcd181b5594c595..b68c1657d85bbdc57e3560bcd028ba937598bd69 100644 Binary files a/obj/subtractMain.o and b/obj/subtractMain.o differ diff --git a/obj/windowMain.o b/obj/windowMain.o index 215e18c15eff7a59d1fcef4bb36c7ef9df18e722..db72bf73134d0d182c9208316a97f7c81ce7a292 100644 Binary files a/obj/windowMain.o and b/obj/windowMain.o differ diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp index bb30410d2ba62f6506d4e68a567d1eeed32efb1c..e8698eca5cbf89f337c212d2c1444d23dcc09465 100755 --- a/src/closestBed/closestMain.cpp +++ b/src/closestBed/closestMain.cpp @@ -108,22 +108,22 @@ void ShowHelp(void) { cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; cerr << "SUMMARY: For each feature in BED A, finds the closest " << endl; - cerr << " feature (upstream or downstream) in BED B." << endl << endl; + cerr << "\t feature (upstream or downstream) in BED B." << endl << endl; - cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; cerr << " " << "-s\t" << "Force strandedness. That is, find the closest feature in B that" << endl; cerr << "\toverlaps A on the same strand." << endl; - cerr << "\t - By default, overlaps are reported without respect to strand." << endl << endl; + cerr << "\t- By default, overlaps are reported without respect to strand." << endl << endl; cerr << " " << "-t\t" << "How ties for closest feature are handled. This occurs when two" << endl; cerr << "\tfeatures in B have exactly the same overlap with a feature in A." << endl; cerr << "\tBy default, all such features in B are reported." << endl; cerr << "\tHere are all the options:" << endl; - cerr << "\t - \"all\" Report all ties (default)." << endl; - cerr << "\t - \"first\" Report the first tie that occurred in the B file." << endl; - cerr << "\t - \"last\" Report the last tie that occurred in the B file." << endl << endl; + cerr << "\t- \"all\" Report all ties (default)." << endl; + cerr << "\t- \"first\" Report the first tie that occurred in the B file." << endl; + cerr << "\t- \"last\" Report the last tie that occurred in the B file." << endl << endl; cerr << "NOTES: " << endl; diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index e0c657dbcd5ed366e389bd82f59b53c35289a583..64e5caef33e05dcea733bbed6cad819017307095 100755 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -158,34 +158,34 @@ void ShowHelp(void) { cerr << "===============================================" << endl << endl; cerr << "SUMMARY: Report overlaps between a.bed and b.bed." << endl << endl; - cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; cerr << " " << "-wa\t" << "Write the original entry in A for each overlap." << endl << endl; cerr << " " << "-wb\t" << "Write the original entry in B for each overlap." << endl; - cerr << "\t - Useful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; + cerr << "\t- Useful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; cerr << " " << "-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl; - cerr << "\t - In other words, just report the fact >=1 hit was found." << endl << endl; + cerr << "\t- In other words, just report the fact >=1 hit was found." << endl << endl; cerr << " " << "-c\t" << "For each entry in A, report the number of overlaps with B." << endl; - cerr << "\t - Reports 0 for A entries that have no overlap with B." << endl; - cerr << "\t - Overlaps restricted by -f." << endl << endl; + cerr << "\t- Reports 0 for A entries that have no overlap with B." << endl; + cerr << "\t- Overlaps restricted by -f." << endl << endl; cerr << " " << "-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl; - cerr << "\t - Similar to \"grep -v.\"" << endl << endl; + cerr << "\t- Similar to \"grep -v.\"" << endl << endl; cerr << " " << "-f\t" << "Minimum overlap required as a fraction of A." << endl; - cerr << "\t - Default is 1E-9 (i.e., 1bp)." << endl << endl; + cerr << "\t- Default is 1E-9 (i.e., 1bp)." << endl << endl; cerr << " " << "-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl; - cerr << "\t - In other words, if -f is 0.90 and -r is used, this requires" << endl; - cerr << "\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl; + cerr << "\t- In other words, if -f is 0.90 and -r is used, this requires" << endl; + cerr << "\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl; cerr << " " << "-s\t" << "Force strandedness. That is, only report hits in B that" << endl; cerr << "\toverlap A on the same strand." << endl; - cerr << "\t - By default, overlaps are reported without respect to strand." << endl << endl; + cerr << "\t- By default, overlaps are reported without respect to strand." << endl << endl; // end the program here exit(1); diff --git a/src/subtractBed/b.bed b/src/subtractBed/b.bed index 68a8ea6259d8e67e231439fc47ae525dc1cbf064..bfd157e3360d38f823f639f075a7c4aef6481b9c 100644 --- a/src/subtractBed/b.bed +++ b/src/subtractBed/b.bed @@ -1,2 +1,2 @@ -chr1 150 250 +chr1 180 300 diff --git a/src/subtractBed/subtractBed.cpp b/src/subtractBed/subtractBed.cpp index 186c12ddf7c17a8e5685a480ab04889a75f700f7..7f22f6738021f4a9aab4360dad95280f9371c5ca 100755 --- a/src/subtractBed/subtractBed.cpp +++ b/src/subtractBed/subtractBed.cpp @@ -1,12 +1,14 @@ -// -// subtractBed.cpp -// BEDTools -// -// Created by Aaron Quinlan Spring 2009. -// Copyright 2009 Aaron Quinlan. All rights reserved. -// -// Summary: Removes overlapping segments from a BED entry. -// +/***************************************************************************** + subtractBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0+ license. +******************************************************************************/ #include "lineFileUtilities.h" #include "subtractBed.h" @@ -60,7 +62,7 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) { // is there enough overlap (default ~ 1bp) float overlap = ((float)(e-s) / (float)(a.end - a.start)); - + if (overlap >= 1.0) { numOverlaps++; numConsumedByB++; @@ -147,74 +149,52 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) { -void BedSubtract::SubtractBed() { +void BedSubtract::SubtractBed(istream &bedInput) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps bedB->loadBedFileIntoMap(); - string bedLine; - BED bedEntry; - int lineNum = 0; - - // are we dealing with a file? - if (bedA->bedFile != "stdin") { - - // open the BED file for reading - ifstream bed(bedA->bedFile.c_str(), ios::in); - if ( !bed ) { - cerr << "Error: The requested bed file (" <<bedA->bedFile << ") could not be opened. Exiting!" << endl; - exit (1); - } + string bedLine; + int lineNum = 0; // current input line number + vector<BED> hits; // vector of potential hits + vector<string> bedFields; // vector for a BED entry + + // reserve some space + hits.reserve(100); + bedFields.reserve(12); - BED a; - // process each entry in A - while (getline(bed, bedLine)) { - - if ((bedLine.find("track") != string::npos) || (bedLine.find("browser") != string::npos)) { - continue; - } - else { - // split the current line into ditinct fields - vector<string> bedFields; - Tokenize(bedLine,bedFields); + BED a; + // process each entry in A + while (getline(bedInput, bedLine)) { - lineNum++; - - // find the overlaps with B if it's a valid BED entry. - if (bedA->parseBedLine(a, bedFields, lineNum)) { - vector<BED> hits; - FindOverlaps(a, hits); - } - } + lineNum++; + Tokenize(bedLine,bedFields); + + // find the overlaps with B if it's a valid BED entry. + if (bedA->parseLine(a, bedFields, lineNum)) { + FindOverlaps(a, hits); + hits.clear(); } - } - // "A" is being passed via STDIN. - else { - BED a; - // process each entry in A - while (getline(cin, bedLine)) { + // reset for the next input line + bedFields.clear(); + } +} +// END Intersect - if ((bedLine.find("track") != string::npos) || (bedLine.find("browser") != string::npos)) { - continue; - } - else { - // split the current line into ditinct fields - vector<string> bedFields; - Tokenize(bedLine,bedFields); - lineNum++; - - // find the overlaps with B if it's a valid BED entry. - if (bedA->parseBedLine(a, bedFields, lineNum)) { - vector<BED> hits; - FindOverlaps(a, hits); - } - } +void BedSubtract::DetermineBedInput() { + if (bedA->bedFile != "stdin") { // process a file + ifstream beds(bedA->bedFile.c_str(), ios::in); + if ( !beds ) { + cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl; + exit (1); } + SubtractBed(beds); + } + else { // process stdin + SubtractBed(cin); } } -// END Intersect - diff --git a/src/subtractBed/subtractBed.h b/src/subtractBed/subtractBed.h index a7eb54ebccdfcbb4f9df5b11034de51fe9c92570..b11d86198798d993c861c2a1cacac2d802b8223e 100755 --- a/src/subtractBed/subtractBed.h +++ b/src/subtractBed/subtractBed.h @@ -1,3 +1,14 @@ +/***************************************************************************** + subtractBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0+ license. +******************************************************************************/ #ifndef SUBTRACTBED_H #define SUBTRACTBED_H @@ -25,8 +36,8 @@ public: void reportA(BED &); void FindOverlaps(BED &, vector<BED> &); - void SubtractBed(); - + void SubtractBed(istream &bedInput); + void DetermineBedInput(); private: diff --git a/src/subtractBed/subtractMain.cpp b/src/subtractBed/subtractMain.cpp index 6e95537b86c281bf11904bc278324d8077b249d1..592a78c8b158bf58c5c33d136eb7039b3a8a5d72 100755 --- a/src/subtractBed/subtractMain.cpp +++ b/src/subtractBed/subtractMain.cpp @@ -1,3 +1,14 @@ +/***************************************************************************** + subtractMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0+ license. +******************************************************************************/ #include "subtractBed.h" #include "version.h" @@ -86,7 +97,7 @@ int main(int argc, char* argv[]) { if (!showHelp) { BedSubtract *bs = new BedSubtract(bedAFile, bedBFile, overlapFraction, forceStrand); - bs->SubtractBed(); + bs->DetermineBedInput(); return 0; } else { @@ -101,17 +112,19 @@ void ShowHelp(void) { cerr << " Aaron Quinlan, Ph.D. (aaronquinlan@gmail.com) " << endl ; cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; - cerr << "Description: Removes the portion(s) of an interval that is overlapped by another feature(s). " << endl << endl; + cerr << "SUMMARY: Removes the portion(s) of an interval that is overlapped" << endl; + cerr << "\t by another feature(s)." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; - cerr << "\t" << "-s\t\t\t" << "Force strandedness. Only report hits in B that overlap A on the same strand." << endl << "\t\t\t\tBy default, overlaps are reported without respect to strand." << endl << endl; - cerr << "\t" << "-f (e.g. 0.05)\t\t" << "Minimum overlap req'd as fraction of a.bed." << endl << "\t\t\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl; + cerr << " " << "-f\t" << "Minimum overlap required as a fraction of A." << endl; + cerr << "\t- Default is 1E-9 (i.e., 1bp)." << endl << endl; + + cerr << " " << "-s\t" << "Force strandedness. That is, only report hits in B that" << endl; + cerr << "\toverlap A on the same strand." << endl; + cerr << "\t- By default, overlaps are reported without respect to strand." << endl << endl; - cerr << "NOTES: " << endl; - cerr << "\t" << "-i stdin\t\t" << "Allows BED file A to be read from stdin. E.g.: cat a.bed | subtractBed -a stdin -b B.bed" << endl << endl; - cerr << "\t***Only tab-delimited BED3 - BED6 formats allowed.***"<< endl << endl; // end the program here exit(1); diff --git a/src/windowBed/windowMain.cpp b/src/windowBed/windowMain.cpp index 4e5a7502fba5321c00b5c0c6ff47cff866b70fc8..fd2641d2e5141e6a06b0d4a2bf4210ff6f1dfa64 100755 --- a/src/windowBed/windowMain.cpp +++ b/src/windowBed/windowMain.cpp @@ -168,44 +168,44 @@ void ShowHelp(void) { cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; cerr << "SUMMARY: Examines a \"window\" around each feature in A and" << endl; - cerr << " reports all features in B that overlap the window. For each" << endl; - cerr << " overlap the entire entry in A and B are reported." << endl << endl; + cerr << "\t reports all features in B that overlap the window. For each" << endl; + cerr << "\t overlap the entire entry in A and B are reported." << endl << endl; - cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; cerr << " " << "-w\t" << "Base pairs added upstream and downstream of each entry" << endl; cerr << "\tin A when searching for overlaps in B." << endl; - cerr << "\t - Creates symterical \"windows\" around A." << endl; - cerr << "\t - Default is 1000 bp." << endl << endl; + cerr << "\t- Creates symterical \"windows\" around A." << endl; + cerr << "\t- Default is 1000 bp." << endl << endl; cerr << " " << "-l\t" << "Base pairs added upstream (left of) of each entry" << endl; cerr << "\tin A when searching for overlaps in B." << endl; - cerr << "\t - Allows one to define assymterical \"windows\"." << endl; - cerr << "\t - Default is 1000 bp." << endl << endl; + cerr << "\t- Allows one to define assymterical \"windows\"." << endl; + cerr << "\t- Default is 1000 bp." << endl << endl; cerr << " " << "-r\t" << "Base pairs added downstream (right of) of each entry" << endl; cerr << "\tin A when searching for overlaps in B." << endl; - cerr << "\t - Allows one to define assymterical \"windows\"." << endl; - cerr << "\t - Default is 1000 bp." << endl << endl; + cerr << "\t- Allows one to define assymterical \"windows\"." << endl; + cerr << "\t- Default is 1000 bp." << endl << endl; cerr << " " << "-sw\t" << "Define -l and -r based on strand. For example if used, -l 500" << endl; cerr << "\tfor a negative-stranded feature will add 500 bp downstream." << endl; - cerr << "\t - Default = disabled." << endl << endl; + cerr << "\t- Default = disabled." << endl << endl; cerr << " " << "-sm\t" << "Only report hits in B that overlap A on the same strand." << endl; - cerr << "\t - By default, overlaps are reported without respect to strand." << endl << endl; + cerr << "\t- By default, overlaps are reported without respect to strand." << endl << endl; cerr << " " << "-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl; - cerr << "\t - In other words, just report the fact >=1 hit was found." << endl << endl; + cerr << "\t- In other words, just report the fact >=1 hit was found." << endl << endl; cerr << " " << "-c\t" << "For each entry in A, report the number of overlaps with B." << endl; - cerr << "\t - Reports 0 for A entries that have no overlap with B." << endl; - cerr << "\t - Overlaps restricted by -f." << endl << endl; + cerr << "\t- Reports 0 for A entries that have no overlap with B." << endl; + cerr << "\t- Overlaps restricted by -f." << endl << endl; cerr << " " << "-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl; - cerr << "\t - Similar to \"grep -v.\"" << endl << endl; + cerr << "\t- Similar to \"grep -v.\"" << endl << endl; // end the program here exit(1);