diff --git a/bin/closestBed b/bin/closestBed index 32eabe225ce040d0039862be7e70573b1911c189..7d70fa4c9c24981be21fa2db2743e4b3d8fdc37a 100755 Binary files a/bin/closestBed and b/bin/closestBed differ diff --git a/bin/intersectBed b/bin/intersectBed index daa7c51268dea86631638952fabb00c43d7f79cd..d1728c9b3a0bc452860325bf8f93bdeab2ccf251 100755 Binary files a/bin/intersectBed and b/bin/intersectBed differ diff --git a/obj/closestBed.o b/obj/closestBed.o index ff1fb98b41bfdcd2c32932a672a6c174912c0ceb..6ed2759ffc860f57d3d247fd5aad9cd699521061 100644 Binary files a/obj/closestBed.o and b/obj/closestBed.o differ diff --git a/obj/closestMain.o b/obj/closestMain.o index c1f70ff628ca8b38f56f1830309f1114de46adaf..149f4e12e9000ffb85b92e1f27d1e56373009284 100644 Binary files a/obj/closestMain.o and b/obj/closestMain.o differ diff --git a/obj/intersectBed.o b/obj/intersectBed.o index 2d4b72b244825c2465a9a69641fb8ab226bee7f0..aaecf35d53e628f6bc125e363cbd2b1ca8e2ee87 100644 Binary files a/obj/intersectBed.o and b/obj/intersectBed.o differ diff --git a/obj/intersectMain.o b/obj/intersectMain.o index 04c7b914c474a327935be62c3c645aab7aadf8ef..77731f5f8f7989950a3d5202a0d1c62706ee1f02 100644 Binary files a/obj/intersectMain.o and b/obj/intersectMain.o differ diff --git a/src/closestBed/a.bed b/src/closestBed/a.bed new file mode 100644 index 0000000000000000000000000000000000000000..5b1e447c6ff93e0490cfb9bc26d761a0677b9714 --- /dev/null +++ b/src/closestBed/a.bed @@ -0,0 +1,3 @@ +chr1 100 101 rs1234 + + diff --git a/src/closestBed/b.bed b/src/closestBed/b.bed new file mode 100644 index 0000000000000000000000000000000000000000..807c0cd01f2c9c9841060c2adca918f19ab7c102 --- /dev/null +++ b/src/closestBed/b.bed @@ -0,0 +1,3 @@ +chr1 20000 30000 geneA 100 + +chr1 20000 30000 geneB 100 - + diff --git a/src/closestBed/closestBed.cpp b/src/closestBed/closestBed.cpp index 0bf1326753b0f1b8aa34de8775ba81c9bdc0a498..05c7657afe63f9a8437ec959accf2f03fec64738 100755 --- a/src/closestBed/closestBed.cpp +++ b/src/closestBed/closestBed.cpp @@ -1,12 +1,14 @@ -// -// closestBed.cpp -// BEDTools -// -// Created by Aaron Quinlan Spring 2009. -// Copyright 2009 Aaron Quinlan. All rights reserved. -// -// Summary: Looks for the closest features in two BED files. -// +/***************************************************************************** + closestBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0+ license. +******************************************************************************/ #include "lineFileUtilities.h" #include "closestBed.h" @@ -188,66 +190,53 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { } -void BedClosest::ClosestBed() { +void BedClosest::ClosestBed(istream &bedInput) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps bedB->loadBedFileIntoMap(); - - string bedLine; - BED bedEntry; - int lineNum = 0; - - // open the BED file for reading - if (bedA->bedFile != "stdin") { - - ifstream bed(bedA->bedFile.c_str(), ios::in); - if ( !bed ) { - cerr << "Error: The requested bed file (" <<bedA->bedFile << ") could not be opened. Exiting!" << endl; - exit (1); - } + string bedLine; + int lineNum = 0; // current input line number + vector<BED> hits; // vector of potential hits + vector<string> bedFields; // vector for a BED entry + + // reserve some space + hits.reserve(100); + bedFields.reserve(12); - BED a; - while (getline(bed, bedLine)) { - - if ((bedLine.find("track") != string::npos) || (bedLine.find("browser") != string::npos)) { - continue; - } - else { - vector<string> bedFields; - Tokenize(bedLine,bedFields); + BED a; + // process each entry in A + while (getline(bedInput, bedLine)) { - lineNum++; - if (bedA->parseBedLine(a, bedFields, lineNum)) { - vector<BED> hits; - FindWindowOverlaps(a, hits); - } - } + lineNum++; + Tokenize(bedLine,bedFields); + + // find the overlaps with B if it's a valid BED entry. + if (bedA->parseLine(a, bedFields, lineNum)) { + FindWindowOverlaps(a, hits); + hits.clear(); } - } - // "A" is being passed via STDIN. - else { - BED a; - while (getline(cin, bedLine)) { + // reset for the next input line + bedFields.clear(); + } +} +// END ClosestBed - if ((bedLine.find("track") != string::npos) || (bedLine.find("browser") != string::npos)) { - continue; - } - else { - vector<string> bedFields; - Tokenize(bedLine,bedFields); - lineNum++; - if (bedA->parseBedLine(a, bedFields, lineNum)) { - vector<BED> hits; - FindWindowOverlaps(a, hits); - } - } + +void BedClosest::DetermineBedInput() { + if (bedA->bedFile != "stdin") { // process a file + ifstream beds(bedA->bedFile.c_str(), ios::in); + if ( !beds ) { + cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl; + exit (1); } + ClosestBed(beds); + } + else { // process stdin + ClosestBed(cin); } } -// END ClosestBed - diff --git a/src/closestBed/closestBed.h b/src/closestBed/closestBed.h index 720406d09f2fd032bbe0f93f1fdcf3a44147c11f..0e3804bc49956f32f3b42a6db7cc2534dc7b4046 100755 --- a/src/closestBed/closestBed.h +++ b/src/closestBed/closestBed.h @@ -1,3 +1,14 @@ +/***************************************************************************** + closestBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0+ license. +******************************************************************************/ #ifndef CLOSESTBED_H #define CLOSESTBED_H @@ -25,8 +36,9 @@ public: void reportB(const BED &); void reportNullB(); - void ClosestBed(); + void ClosestBed(istream &bedInput); void FindWindowOverlaps(BED &, vector<BED> &); + void DetermineBedInput(); private: diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp index 7ec0c26949240187f0b04b4714e602fa826c93fa..bb30410d2ba62f6506d4e68a567d1eeed32efb1c 100755 --- a/src/closestBed/closestMain.cpp +++ b/src/closestBed/closestMain.cpp @@ -1,3 +1,14 @@ +/***************************************************************************** + closestMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0+ license. +******************************************************************************/ #include "closestBed.h" #include "version.h" @@ -81,7 +92,7 @@ int main(int argc, char* argv[]) { if (!showHelp) { BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode); - bc->ClosestBed(); + bc->DetermineBedInput(); return 0; } else { @@ -96,24 +107,29 @@ void ShowHelp(void) { cerr << " Aaron Quinlan, Ph.D. (aaronquinlan@gmail.com) " << endl ; cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; - cerr << "Description: For each feature in BED A, finds the closest feature (upstream or downstream) in BED B" << endl; + cerr << "SUMMARY: For each feature in BED A, finds the closest " << endl; + cerr << " feature (upstream or downstream) in BED B." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; - cerr << "\t" << "-s\t\t\t" << "Force strandedness. Only report hits in B that overlap A on the same strand." << endl << "\t\t\t\tBy default, overlaps are reported without respect to strand." << endl << endl; - cerr << "\t" << "-t\t\t\t" << "How to handle ties for closest features. This occurs when two features in B have exactly the" << endl; - cerr << "\t\t\t\t" << "same overlap with a feature in A. By default, all such features in B are reported. Here are all the options:" << endl; - cerr << "\t\t\t\t\t" << "all\t\tReport all ties (default)." << endl; - cerr << "\t\t\t\t\t" << "first\t\tReport the first tie that occurred in the B file." << endl; - cerr << "\t\t\t\t\t" << "last\t\tReport the last tie that occurred in the B file." << endl; + cerr << " " << "-s\t" << "Force strandedness. That is, find the closest feature in B that" << endl; + cerr << "\toverlaps A on the same strand." << endl; + cerr << "\t - By default, overlaps are reported without respect to strand." << endl << endl; + + cerr << " " << "-t\t" << "How ties for closest feature are handled. This occurs when two" << endl; + cerr << "\tfeatures in B have exactly the same overlap with a feature in A." << endl; + cerr << "\tBy default, all such features in B are reported." << endl; + cerr << "\tHere are all the options:" << endl; + cerr << "\t - \"all\" Report all ties (default)." << endl; + cerr << "\t - \"first\" Report the first tie that occurred in the B file." << endl; + cerr << "\t - \"last\" Report the last tie that occurred in the B file." << endl << endl; cerr << "NOTES: " << endl; - cerr << "\t" << "-i stdin\t\t" << "Allows BED file A to be read from stdin. E.g.: cat a.bed | closestBed -a stdin -b B.bed" << endl << endl; - cerr << "\t" << "Reports \"none\" for chrom and \"-1\" for all other fields when a feature is not found in B on the same chromosome" << endl; - cerr << "\t" << "as the feature in A. E.g. none\t-1\t-1" << endl << endl; - cerr << "\t***Only tab-delimited BED3 - BED6 formats allowed.***" << endl << endl; + cerr << " Reports \"none\" for chrom and \"-1\" for all other fields when a feature" << endl; + cerr << " is not found in B on the same chromosome as the feature in A." << endl; + cerr << " E.g. none\t-1\t-1" << endl << endl; // end the program here exit(1); diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index c4fdd9f2845ab3e27589abc381a87022f3f2179a..e0c657dbcd5ed366e389bd82f59b53c35289a583 100755 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -156,11 +156,11 @@ void ShowHelp(void) { cerr << " Aaron Quinlan, Ph.D. (aaronquinlan@gmail.com) " << endl ; cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; - cerr << "Description: Report overlaps between a.bed and b.bed." << endl << endl; + cerr << "SUMMARY: Report overlaps between a.bed and b.bed." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; - cerr << "Options: " << endl; + cerr << "OPTIONS: " << endl; cerr << " " << "-wa\t" << "Write the original entry in A for each overlap." << endl << endl; cerr << " " << "-wb\t" << "Write the original entry in B for each overlap." << endl; cerr << "\t - Useful for knowing _what_ A overlaps. Restricted by -f." << endl << endl; diff --git a/src/windowBed/windowMain.cpp b/src/windowBed/windowMain.cpp index d24289993aaaadf5771da4714fffd070c138704d..4e5a7502fba5321c00b5c0c6ff47cff866b70fc8 100755 --- a/src/windowBed/windowMain.cpp +++ b/src/windowBed/windowMain.cpp @@ -167,11 +167,11 @@ void ShowHelp(void) { cerr << " Aaron Quinlan, Ph.D. (aaronquinlan@gmail.com) " << endl ; cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; - cerr << "Description: Examines a \"window\" around each feature in A and" << endl; - cerr << "reports all features in B that overlap the window. For each" << endl; - cerr << "overlap the entire entry in A and B are reported." << endl << endl; + cerr << "SUMMARY: Examines a \"window\" around each feature in A and" << endl; + cerr << " reports all features in B that overlap the window. For each" << endl; + cerr << " overlap the entire entry in A and B are reported." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; + cerr << "USAGE: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; cerr << " " << "-w\t" << "Base pairs added upstream and downstream of each entry" << endl;