From a1af7c6a31e829fb8fbd70530580d030bbd9c1e0 Mon Sep 17 00:00:00 2001 From: Aaron Quinlan <aaronquinlan@gmail.com> Date: Mon, 27 Apr 2009 12:16:05 -0400 Subject: [PATCH] Version 1.2. Added closestBed, linksBed and subtractBed 1. Added subtractBed. A. Fixed bug that prevented "split" overlaps from being reported. B. Prevented A from being reported if >=1 feature in B completely spans it. 2. Added linksBed. 3. Added the ability to define separate windows for upstream and downstream to windowBed. --- Makefile | 2 +- README | 18 +++-- TODO | 14 ++++ USAGE_EXAMPLES | 29 ++++++- VERSION | 2 +- src/closestBed/closestMain.cpp | 2 +- src/complementBed/complementMain.cpp | 2 +- src/coverageBed/coverageMain.cpp | 2 +- src/fastaFromBed/fastaFromBedMain.cpp | 2 +- src/genomeCoverageBed/genomeCoverageMain.cpp | 2 +- src/intersectBed/intersectMain.cpp | 2 +- src/linksBed/linksBed.cpp | 29 ++++++- src/linksBed/linksMain.cpp | 36 ++++++--- src/mergeBed/mergeMain.cpp | 2 +- src/sortBed/sortMain.cpp | 2 +- src/subtractBed/a | 1 - src/subtractBed/b | 1 - src/subtractBed/subtractBed.cpp | 83 ++++++++++++++------ src/subtractBed/subtractMain.cpp | 2 +- src/windowBed/windowBed.cpp | 12 +-- src/windowBed/windowBed.h | 5 +- src/windowBed/windowMain.cpp | 47 +++++++++-- 22 files changed, 222 insertions(+), 75 deletions(-) create mode 100644 TODO delete mode 100644 src/subtractBed/a delete mode 100644 src/subtractBed/b diff --git a/Makefile b/Makefile index 06de28cc..ed2a44b1 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ export CXX ?= g++ #include includes/$(BLD_PLATFORM).inc # define our source subdirectories -SUBDIRS = $(SRC_DIR)/closestBed $(SRC_DIR)/complementBed $(SRC_DIR)/coverageBed $(SRC_DIR)/intersectBed $(SRC_DIR)/mergeBed $(SRC_DIR)/genomeCoverageBed $(SRC_DIR)/fastaFromBed $(SRC_DIR)/sortBed $(SRC_DIR)/windowBed $(SRC_DIR)/subtractBed $(SRC_DIR)/linlsBed +SUBDIRS = $(SRC_DIR)/closestBed $(SRC_DIR)/complementBed $(SRC_DIR)/coverageBed $(SRC_DIR)/intersectBed $(SRC_DIR)/mergeBed $(SRC_DIR)/genomeCoverageBed $(SRC_DIR)/fastaFromBed $(SRC_DIR)/sortBed $(SRC_DIR)/windowBed $(SRC_DIR)/subtractBed $(SRC_DIR)/linksBed UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile $(SRC_DIR)/utils/sequenceUtilities all: diff --git a/README b/README index 129a7ebd..296da6fe 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ ============================ -=== BEDTools Version 1.1 === +=== BEDTools Version 1.2 === ============================ Created by Aaron Quinlan Spring 2009. @@ -27,7 +27,8 @@ I am grateful for his elegant genome binning algorithm and therefore use it exte 2. Michael Stromberg, Boston College. I learned much of what I know about C++ style from Michael Stromberg. I was formerly a C and Perl programmer and struggled to switch to C++. The structure of BEDTools and much of the coding style -emulates Michael's. Thanks Mike. +emulates Michael's. Quite honestly, this package would never exist were it not for looking through Michael's +code base. Thanks Mike! 3. Galaxy. I had been using Galaxy for months prior to writing these tools. Much of the functionality is based on what is @@ -36,10 +37,17 @@ especially with large sequencing datasets. Regardless, I am grateful to the Gal useful site that is rapidly approaching ubiquity. 4. Ira Hall, UVa. -Discussion between Ira and I are largely the motivation for these tools. We are frequently tracking -down impromptu ideas and these tools were largely designed to facilitate tangents. +Discussions between Ira and I are largely the motivation for these tools. We are frequently tracking +down impromptu ideas and these tools were originally designed to facilitate such tangents. -5. Loyal Goff, CSAIL, Broad. +5. Royden Clark, UVa +Royden is the resident UCSC browser expert. He has helped me immensely by teaching me how to squeeze every last bit of functionality from the +browser. Also, the algorithmic concept behind the genomeCoverageBed program is Royden's. + +6. Fitz Elliot, UVa +Fitz kindly taught me the ways of version control with git. For those of who that are constantly confused by CVS and SVN, try git. I think you'll be happy. + +7. Loyal Goff, CSAIL, Broad. Many thanks to Loyal for his assistance in making BED Tools "compilable" on the SUSE platform. I appreciate your help and patience. diff --git a/TODO b/TODO new file mode 100644 index 00000000..5868d03c --- /dev/null +++ b/TODO @@ -0,0 +1,14 @@ +1. statsBed. + Total bases. + Total bases per strand + Max size. + Min size. + Median size. + Mean size. + Stdev. + MAD size. + Create histogram. Breaks. + + NOTE: uses UCSC browser coordinates. Thus, size = (end-start), not (end-start)+1 + +2. add the ability to force strandedness if desired. \ No newline at end of file diff --git a/USAGE_EXAMPLES b/USAGE_EXAMPLES index 59fe2fb1..67316002 100644 --- a/USAGE_EXAMPLES +++ b/USAGE_EXAMPLES @@ -1,5 +1,5 @@ ============================ -=== BEDTools Version 1.1 === +=== BEDTools Version 1.2 === ============================ Created by Aaron Quinlan Spring 2009. @@ -40,6 +40,24 @@ http://people.virginia.edu/~arq5x/bedtools.html $ intersectBed -a genes.bed -b LINES.bed | intersectBed -a stdin -b SINEs.bed -v + closestBed. Very similar to intersectBed, but finds the closest (not necessarily overlapping) feature in B to each feature in A. + If multiple features in B overlap a feature in A, the one with the highest overlap with respect to A is reported. + + 1. Find the closest ALU to each gene. + $ closestBed -a genes.bed -b ALUs.bed + + Allows input from stdin in the same manner as intersectBed (see #7). + + + subtractBed. For each feature in A, it returns the fraction of that feature that is not overlapped by B. If a feature in A is entirely "spanned" by + any feature in B, it will not be reported. + + 1. Remove introns from genes. + $ subtractBed -a genes.bed -b introns.bed + + Allows input from stdin in the same manner as intersectBed (see #7). + + windowBed. Very similar to intersectBed, but allows one to look for overlaps within a window flanking each A entry 1. Report all genes that are within 10000 bp UPSTREAM and DOWNSTREAM of CNVs. @@ -76,10 +94,15 @@ http://people.virginia.edu/~arq5x/bedtools.html Accepts input from stdin: - $ cat scrambled.bed | sortBed > sorted.bed - + $ cat scrambled.bed | sortBed -i stdin > sorted.bed + + linksBed. Creates HTML links from a BED file. + + $ linksBed myRegions.bed > myRegions.html + + coverageBed. Reports the number of entries in A that overlap with each feature in B. For example, one could use BED B as say 10Kb "windows" across a genome. If BED A contained the coordinates of sequence alignments, one could use coverageBed to count the number of reads that map to each window. This would be a preliminary step diff --git a/VERSION b/VERSION index b123147e..ea710abb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1 \ No newline at end of file +1.2 \ No newline at end of file diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp index 539c8faa..5c860657 100644 --- a/src/closestBed/closestMain.cpp +++ b/src/closestBed/closestMain.cpp @@ -6,7 +6,7 @@ using namespace std; #define PROGRAM_NAME "closestBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/complementBed/complementMain.cpp b/src/complementBed/complementMain.cpp index 7be948ad..e62871eb 100755 --- a/src/complementBed/complementMain.cpp +++ b/src/complementBed/complementMain.cpp @@ -7,7 +7,7 @@ using namespace std; #define PROGRAM_NAME "complementBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/coverageBed/coverageMain.cpp b/src/coverageBed/coverageMain.cpp index d303742e..65c99835 100755 --- a/src/coverageBed/coverageMain.cpp +++ b/src/coverageBed/coverageMain.cpp @@ -6,7 +6,7 @@ using namespace std; #define PROGRAM_NAME "coverageBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/fastaFromBed/fastaFromBedMain.cpp b/src/fastaFromBed/fastaFromBedMain.cpp index 79eb381a..3ee017e3 100755 --- a/src/fastaFromBed/fastaFromBedMain.cpp +++ b/src/fastaFromBed/fastaFromBedMain.cpp @@ -6,7 +6,7 @@ using namespace std; #define PROGRAM_NAME "fastaFromBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/genomeCoverageBed/genomeCoverageMain.cpp b/src/genomeCoverageBed/genomeCoverageMain.cpp index 863dda71..48f7efee 100755 --- a/src/genomeCoverageBed/genomeCoverageMain.cpp +++ b/src/genomeCoverageBed/genomeCoverageMain.cpp @@ -7,7 +7,7 @@ using namespace std; #define PROGRAM_NAME "coverageBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index 6732170a..71329254 100755 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -6,7 +6,7 @@ using namespace std; #define PROGRAM_NAME "intersectBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/linksBed/linksBed.cpp b/src/linksBed/linksBed.cpp index ad637bb8..b54d9c3a 100755 --- a/src/linksBed/linksBed.cpp +++ b/src/linksBed/linksBed.cpp @@ -43,10 +43,32 @@ void BedLinks::WriteURL(BED &bed, string &base) { cout << "</a>" << endl; cout << "\t</td>" << endl; - if (this->bed->bedType >= 4) { + if (this->bed->bedType == 4) { cout << "\t<td>" << endl; cout << bed.name << endl; cout << "\t</td>" << endl; + } + else if (this->bed->bedType == 5) { + cout << "\t<td>" << endl; + cout << bed.name << endl; + cout << "\t</td>" << endl; + + cout << "\t<td>" << endl; + cout << bed.score << endl; + cout << "\t</td>" << endl; + } + else if (this->bed->bedType == 6) { + cout << "\t<td>" << endl; + cout << bed.name << endl; + cout << "\t</td>" << endl; + + cout << "\t<td>" << endl; + cout << bed.score << endl; + cout << "\t</td>" << endl; + + cout << "\t<td>" << endl; + cout << bed.strand << endl; + cout << "\t</td>" << endl; } cout << "</tr>" << endl; } @@ -68,11 +90,12 @@ void BedLinks::LinksBed() { // create the HTML header cout << "<html>" << endl <<"\t<body>" << endl; cout << "<title>" << this->bedFile << "</title>" << endl; + // start the table of entries cout << "<br>Firefox users: Press and hold the \"apple\" or \"alt\" key and click link to open in new tab." << endl; cout << "<p style=\"font-family:courier\">" << endl; - cout << "<table border=\"0\"" << endl; - + cout << "<table border=\"0\" align=\"justify\"" << endl; + // Are we dealing with a BED file or a BED passed via stdin? diff --git a/src/linksBed/linksMain.cpp b/src/linksBed/linksMain.cpp index ce116329..50056773 100755 --- a/src/linksBed/linksMain.cpp +++ b/src/linksBed/linksMain.cpp @@ -7,7 +7,7 @@ using namespace std; #define PROGRAM_NAME "linksBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) @@ -24,6 +24,13 @@ int main(int argc, char* argv[]) { string bedFile; bool haveBed = false; + /* Defaults for Quinlan + //string org = "mouse"; + //string db = "mm9"; + //string base = "http://mirror.bioch.virginia.edu"; + */ + + /* Defaults for everyone else */ string org = "human"; string db = "hg18"; string base = "http://genome.ucsc.edu"; @@ -91,19 +98,24 @@ void ShowHelp(void) { cerr << " Hall Laboratory, University of Virginia" << endl; cerr << "===============================================" << endl << endl; cerr << "Description: Creates HTML links from a BED file." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input.bed>" << endl << endl; - - // cerr << "OPTIONS: " << endl; - // cerr << "\t" << "-sizeA\t\t" << "Sort the BED file by feature size in ascending order. Sorts across all chromosomes." << endl << endl; - // cerr << "\t" << "-sizeD\t\t" << "Sort the BED file by feature size in descending order. Sorts across all chromosomes." << endl << endl; - // cerr << "\t" << "-chrThenSizeA\t" << "Sort the BED file by chrom (ascending), then feature size in ascending order." << endl << endl; - // cerr << "\t" << "-chrThenSizeD\t" << "Sort the BED file by chrom (ascending), then feature size in descending order." << endl << endl; - // cerr << "\t" << "-chrThenScoreA\t" << "Sort the BED file by chrom (ascending), then score in ascending order." << endl << endl; - // cerr << "\t" << "-chrThenScoreD\t" << "Sort the BED file by chrom (ascending), then scor size in descending order." << endl << endl; - // + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input.bed> > out.html" << endl << endl; + + cerr << "OPTIONS: " << endl; + cerr << "\t" << "-base\t\t" << "The browser basename. Default: http://genome.ucsc.edu " << endl << endl; + cerr << "\t" << "-org\t\t" << "The organism. Default: human" << endl << endl; + cerr << "\t" << "-db\t\t" << "The build. Default: hg18" << endl << endl; + cerr << "NOTES: " << endl; - cerr << "\t" << "-i stdin\t\t" << "Allows BED file A to be read from stdin. E.g.: cat a.bed | sortBed -i stdin" << endl << endl; + cerr << "\t" << "-i stdin\t" << "Allows BED file A to be read from stdin. E.g.: cat a.bed | linksBed -i stdin > out.html" << endl << endl; cerr << "\t***Only BED3 - BED6 formats allowed.***"<< endl << endl; + + cerr << "EXAMPLE: " << endl; + cerr << "\t" << "By default, the links created will point to the human (hg18) UCSC browser. If you have a local mirror, you can override this behavior"; + cerr << "by supplying the -base, -org, and -db options." << endl; + cerr << "\t" << "For example, if the main URL of your local mirror for mouse MM9 is called: http://mymirror.myuniversity.edu, then you would use the following parameters:" << endl; + cerr << "\t\t" << "-base http://mymirror.myuniversity.edu" << endl; + cerr << "\t\t" << "-org mouse" << endl; + cerr << "\t\t" << "-db mm9" << endl; // end the program here exit(1); diff --git a/src/mergeBed/mergeMain.cpp b/src/mergeBed/mergeMain.cpp index 0a2f6946..62cbc10c 100755 --- a/src/mergeBed/mergeMain.cpp +++ b/src/mergeBed/mergeMain.cpp @@ -7,7 +7,7 @@ using namespace std; #define PROGRAM_NAME "mergeBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/sortBed/sortMain.cpp b/src/sortBed/sortMain.cpp index 48631367..f280aed0 100755 --- a/src/sortBed/sortMain.cpp +++ b/src/sortBed/sortMain.cpp @@ -7,7 +7,7 @@ using namespace std; #define PROGRAM_NAME "sortBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/subtractBed/a b/src/subtractBed/a deleted file mode 100644 index d6c80bc6..00000000 --- a/src/subtractBed/a +++ /dev/null @@ -1 +0,0 @@ -chr1 10 20 diff --git a/src/subtractBed/b b/src/subtractBed/b deleted file mode 100644 index c03d074e..00000000 --- a/src/subtractBed/b +++ /dev/null @@ -1 +0,0 @@ -chr1 0 100 diff --git a/src/subtractBed/subtractBed.cpp b/src/subtractBed/subtractBed.cpp index c7ef2178..cb9565d4 100755 --- a/src/subtractBed/subtractBed.cpp +++ b/src/subtractBed/subtractBed.cpp @@ -97,7 +97,10 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) { // find all of the overlaps between a and B. bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits); - int numOverlaps = 0; + // is A completely spanned by an entry in B? + // if so, A should not be reported. + int numConsumedByB = 0; + for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) { int s = max(a.start, h->start); @@ -105,36 +108,68 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) { if (s < e) { - numOverlaps++; - // is there enough overlap (default ~ 1bp) float overlap = ((float)(e-s) / (float)(a.end - a.start)); + if (overlap >= 1.0) { + numConsumedByB++; + } + } + } + + // report the subtraction with each entry in B + // if A was not "consumed" by any entry in B + if (numConsumedByB == 0) { + int numOverlaps = 0; + for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) { + + int s = max(a.start, h->start); + int e = min(a.end, h->end); + + if (s < e) { + + numOverlaps++; - if ( overlap > this->overlapFraction ) { + // is there enough overlap (default ~ 1bp) + float overlap = ((float)(e-s) / (float)(a.end - a.start)); + + if ( overlap > this->overlapFraction ) { - if (overlap < 1.0) { // if overlap = 1, then B consumes A and therefore, - // we won't report A. - // A ++++++++++++ - // B ---------- - // Res. ======== - if (h->start > a.start) { - int end = h->start + 1; - reportARemainder(a,a.start,end); cout << "\n"; - } - // A ++++++++++++ - // B ---------- - // Res. ======== - else { - reportARemainder(a,a.start,h->end); cout << "\n"; - } + if (overlap < 1.0) { // if overlap = 1, then B consumes A and therefore, + // we won't report A. + // A ++++++++++++ + // B ---- + // Res. ==== ==== + if ( (h->start > a.start) && (h->end < a.end) ) { + reportARemainder(a,a.start,h->start); cout << "\n"; + reportARemainder(a,h->end,a.end); cout << "\n"; + } + // A ++++++++++++ + // B ---------- + // Res. == + else if (h->start == a.start) { + reportARemainder(a,h->end,a.end); cout << "\n"; + } + // A ++++++++++++ + // B ---------- + // Res. ==== + else if (h->start < a.start) { + reportARemainder(a,h->end,a.end); cout << "\n"; + } + // A ++++++++++++ + // B ---------- + // Res. ======= + else if (h->start > a.start) { + reportARemainder(a,a.start,h->start); cout << "\n"; + } + } } } - } - } - if (numOverlaps == 0) { - // no overlap found, so just report A as-is. - reportA(a); cout << "\n"; + } + if (numOverlaps == 0) { + // no overlap found, so just report A as-is. + reportA(a); cout << "\n"; + } } } diff --git a/src/subtractBed/subtractMain.cpp b/src/subtractBed/subtractMain.cpp index 83962e0f..1c356622 100755 --- a/src/subtractBed/subtractMain.cpp +++ b/src/subtractBed/subtractMain.cpp @@ -6,7 +6,7 @@ using namespace std; #define PROGRAM_NAME "subtractBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) diff --git a/src/windowBed/windowBed.cpp b/src/windowBed/windowBed.cpp index 7b1636d6..33183457 100755 --- a/src/windowBed/windowBed.cpp +++ b/src/windowBed/windowBed.cpp @@ -17,12 +17,14 @@ /* Constructor */ -BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &slop, bool &anyHit, bool &noHit, bool &writeCount) { +BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, bool &writeCount) { this->bedAFile = bedAFile; this->bedBFile = bedBFile; - this->slop = slop; + this->leftSlop = leftSlop; + this->rightSlop = rightSlop; + this->anyHit = anyHit; this->noHit = noHit; this->writeCount = writeCount; @@ -99,13 +101,13 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) { int aFudgeStart = 0; int aFudgeEnd; - if ((a.start - this->slop) > 0) { - aFudgeStart = a.start - this->slop; + if ((a.start - this->leftSlop) > 0) { + aFudgeStart = a.start - this->leftSlop; } else { aFudgeStart = 0; } - aFudgeEnd = a.end + this->slop; + aFudgeEnd = a.end + this->rightSlop; bedB->binKeeperFind(bedB->bedMap[a.chrom], aFudgeStart, aFudgeEnd, hits); diff --git a/src/windowBed/windowBed.h b/src/windowBed/windowBed.h index ccb2cb90..d8ef2b80 100755 --- a/src/windowBed/windowBed.h +++ b/src/windowBed/windowBed.h @@ -25,7 +25,7 @@ class BedWindow { public: // constructor - BedWindow(string &, string &, int &, bool &, bool &, bool &); + BedWindow(string &, string &, int &, int &, bool &, bool &, bool &); // destructor ~BedWindow(void); @@ -42,7 +42,8 @@ private: string bedBFile; bool anyHit; bool writeCount; - int slop; + int leftSlop; + int rightSlop; bool noHit; // instance of a bed file class. diff --git a/src/windowBed/windowMain.cpp b/src/windowBed/windowMain.cpp index f58f161a..21f77f91 100755 --- a/src/windowBed/windowMain.cpp +++ b/src/windowBed/windowMain.cpp @@ -6,7 +6,7 @@ using namespace std; #define PROGRAM_NAME "windowBed" // define the version -#define VERSION "1.1.0" +#define VERSION "1.2.0" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) @@ -24,7 +24,8 @@ int main(int argc, char* argv[]) { string bedBFile; // input arguments - int slop = 1000; + int leftSlop = 1000; + int rightSlop = 1000; bool haveBedA = false; bool haveBedB = false; @@ -32,6 +33,8 @@ int main(int argc, char* argv[]) { bool anyHit = false; bool writeCount = false; bool haveSlop = false; + bool haveLeft = false; + bool haveRight = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -73,9 +76,20 @@ int main(int argc, char* argv[]) { } else if (PARAMETER_CHECK("-w", 2, parameterLength)) { haveSlop = true; - slop = atoi(argv[i + 1]); + leftSlop = atoi(argv[i + 1]); + rightSlop = leftSlop; i++; } + else if (PARAMETER_CHECK("-l", 2, parameterLength)) { + haveLeft = true; + leftSlop = atoi(argv[i + 1]); + i++; + } + else if (PARAMETER_CHECK("-r", 2, parameterLength)) { + haveRight = true; + rightSlop = atoi(argv[i + 1]); + i++; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; @@ -97,14 +111,29 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl; showHelp = true; } + + if (haveLeft && (leftSlop < 0)) { + cerr << endl << "*****" << endl << "*****ERROR: Upstream window (-l) must be positive." << endl << "*****" << endl; + showHelp = true; + } - if (haveSlop && (slop < 0)) { - cerr << endl << "*****" << endl << "*****ERROR: Slop (-s) must be positive." << endl << "*****" << endl; + if (haveRight && (rightSlop < 0)) { + cerr << endl << "*****" << endl << "*****ERROR: Downstream window (-r) must be positive." << endl << "*****" << endl; showHelp = true; } - + + if (haveSlop && (haveLeft || haveRight)) { + cerr << endl << "*****" << endl << "*****ERROR: Cannot choose -w with -l or -r. Either specify -l and -r or specify solely -w" << endl << "*****" << endl; + showHelp = true; + } + + if ((haveLeft && !haveRight) || (haveRight && !haveLeft)) { + cerr << endl << "*****" << endl << "*****ERROR: Please specify both -l and -r." << endl << "*****" << endl; + showHelp = true; + } + if (!showHelp) { - BedWindow *bi = new BedWindow(bedAFile, bedBFile, slop, anyHit, noHit, writeCount); + BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit, noHit, writeCount); bi->WindowIntersectBed(); return 0; } @@ -127,7 +156,9 @@ void ShowHelp(void) { cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; - cerr << "\t" << "-w (def. 1000)\t\t" << "Base pairs added before and after each entry in A when searching for overlaps in B." << endl << endl; + cerr << "\t" << "-w (def. 1000)\t\t" << "Base pairs added upstream and downstream of each entry in A when searching for overlaps in B." << endl << endl; + cerr << "\t" << "-l (def. 1000)\t\t" << "Base pairs added upstream (left of) of each entry in A when searching for overlaps in B." << endl << endl; + cerr << "\t" << "-r (def. 1000)\t\t" << "Base pairs added downstream (right of) of each entry in A when searching for overlaps in B." << endl << endl; cerr << "\t" << "-u\t\t\t" << "Write ORIGINAL a.bed entry ONCE if ANY overlap with B.bed." << endl << "\t\t\t\tIn other words, just report the fact >=1 hit was found." << endl << endl; cerr << "\t" << "-v \t\t\t" << "Only report those entries in A that have NO OVERLAP in B within the requested window." << endl << "\t\t\t\tSimilar to grep -v." << endl << endl; cerr << "\t" << "-c \t\t\t" << "For each entry in A, report the number of hits in B within the requested window." << endl << "\t\t\t\tReports 0 for A entries that have no overlap with B." << endl << endl; -- GitLab