diff --git a/RELEASE_HISTORY b/RELEASE_HISTORY index 12c62202d8d875017f0bbfa3fbaad3bf703427e6..4b5becb1cd227cb338e4c7a73a88951fa61b9423 100644 --- a/RELEASE_HISTORY +++ b/RELEASE_HISTORY @@ -1,17 +1,29 @@ +Version 2.2.3 (10/23/2009) +1. Changed windowBed to optionally define "left" and "right" windows based on strand. For example by default, -l 100 and -r 500 will +add 100 bases to the left (lower coordinates) of a feature in A when scanning for hits in B and 500 bases to the right (higher coordinates). + +However if one chooses the -sw option (windows bases on strandedness), the behavior changes. Assume the above example except that a feature in A +is on the negative strand ("-"). In this case, -l 100, -r 500 and -sw will add 100 bases to the right (higher coordinates) and 500 bases to the left (lower coordinates). + +In addition, there is a separate option (-sm) that can optionally force hits in B to only be tracked if they are on the same strand as A. + +***NOTE: This replaces the previous -s option and may affect existing pipelines***. + + Version 2.2.2 (10/20/2009) 1. Improved the speed of genomeCoverageBed by roughly 100 fold. The memory usage is now less than 2.0 Gb. + Version 2.2.1 1. Fixed a very obvious bug in subtractBed that caused improper behavior when a feature in A was overlapped by more than one feature in B. Many thanks to folks in the Hannon lab at CSHL for pointing this out. Version 2.2.0 - === Notable changes in this release === 1. coverageBed will optionally only count features in BED file A (e.g. sequencing reads) that overlap with - the intervals/windows in BED file B. This has been requested several times recently and facilitates CHiP-Seq and - RNA-Seq experiments. + the intervals/windows in BED file B on the same strand. This has been requested several times recently + and facilitates CHiP-Seq and RNA-Seq experiments. 2. intersectBed can now require a minimum __reciprocal__ overlap between intervals in BED A and BED B. For example, previously, if one used -f 0.90, it required that a feature in B overlap 90% of the feature in A for the "hit" @@ -65,18 +77,22 @@ VERSION 2.1.2 2. Change the "score" column (i.e. column 5) to b stored as a string. While this deviates from the UCSC convention, it allows significantly more information to be packed into the column. + VERSION 2.1.1 1. Added limits.h to bedFile.h to fix compilation issues on some systems. 2. Fixed bug in testing for "track" or "browser" lines. + VERSION 2.1.0 1. Fixed a bug in peIntersectBed that prevented -a from being correctly handled when passed via stdin. 2. Added new functionality to coverageBed that calculates the density of coverage. 3. Fixed bug in geneomCoverageBed. + VERSION 2.0.1 1. Added the ability to retain UCSC browser track/browser headers in BED files. + VERSION 2.0 1. Sped up the file parsing. ~10-20% increase in speed. 2. Created reportBed() as a common method in the bedFile class. Cleans up the code quite nicely. @@ -85,6 +101,7 @@ VERSION 2.0 5. Fixed bug that prevented overlaps from being reported when the overlap fraction requested is 1.0 + VERSION 1.2, 04/27/2009. (1eb06115bdf3c49e75793f764a70c3501bb53f33) 1. Added subtractBed. A. Fixed bug that prevented "split" overlaps from being reported. diff --git a/src/utils/version/version.h b/src/utils/version/version.h index 9d4887614377b8de2af8678e1e8f6ea48a8b0772..173345d9a1a47991747e376734962df75b789cc3 100644 --- a/src/utils/version/version.h +++ b/src/utils/version/version.h @@ -1,7 +1,8 @@ #ifndef VERSION_H #define VERSION_H -// define the version -#define VERSION "2.2.2" +// define the version. All tools in the +// suite carry the same version number. +#define VERSION "2.2.3" #endif /* VERSION_H */ diff --git a/src/windowBed/windowBed.cpp b/src/windowBed/windowBed.cpp index 5cfe8e90385681f0d308455a7b0dd8b6077b8207..7c6a84a1bb74674038078ff47867a0ad87fd9746 100755 --- a/src/windowBed/windowBed.cpp +++ b/src/windowBed/windowBed.cpp @@ -14,7 +14,8 @@ /* Constructor */ -BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, bool &writeCount, bool &forceStrand) { +BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, + bool &writeCount, bool &strandWindows, bool &matchOnStrand) { this->bedAFile = bedAFile; this->bedBFile = bedBFile; @@ -25,8 +26,9 @@ BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rig this->anyHit = anyHit; this->noHit = noHit; this->writeCount = writeCount; - this->forceStrand = forceStrand; - + this->strandWindows = strandWindows; + this->matchOnStrand = matchOnStrand; + this->bedA = new BedFile(bedAFile); this->bedB = new BedFile(bedBFile); } @@ -48,14 +50,41 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) { int aFudgeStart = 0; int aFudgeEnd; - if ((a.start - this->leftSlop) > 0) { - aFudgeStart = a.start - this->leftSlop; + + // Does the user want to treat the windows based on strand? + // If so, + // if "+", then left is left and right is right + // if "-", the left is right and right is left. + if (this->strandWindows) { + + if (a.strand == "+") { + if ((a.start - this->leftSlop) > 0) { + aFudgeStart = a.start - this->leftSlop; + } + else { + aFudgeStart = 0; + } + aFudgeEnd = a.end + this->rightSlop; + } + else { + if ((a.start - this->rightSlop) > 0) { + aFudgeStart = a.start - this->rightSlop; + } + else { + aFudgeStart = 0; + } + aFudgeEnd = a.end + this->leftSlop; + } } else { - aFudgeStart = 0; + if ((a.start - this->leftSlop) > 0) { + aFudgeStart = a.start - this->leftSlop; + } + else { + aFudgeStart = 0; + } + aFudgeEnd = a.end + this->rightSlop; } - aFudgeEnd = a.end + this->rightSlop; - bedB->binKeeperFind(bedB->bedMap[a.chrom], aFudgeStart, aFudgeEnd, hits); @@ -64,7 +93,7 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) { // if forcing strandedness, move on if the hit // is not on the same strand as A. - if ((this->forceStrand) && (a.strand != h->strand)) { + if ((this->matchOnStrand) && (a.strand != h->strand)) { continue; // continue force the next iteration of the for loop. } diff --git a/src/windowBed/windowBed.h b/src/windowBed/windowBed.h index bfc89a9bf2387e324bdad11ba5d0b3346b3c3ceb..f8bf0d0a2d5997eee2d24b93be24ce97a6a9e9a9 100755 --- a/src/windowBed/windowBed.h +++ b/src/windowBed/windowBed.h @@ -16,7 +16,7 @@ class BedWindow { public: // constructor - BedWindow(string &, string &, int &, int &, bool &, bool &, bool &, bool &); + BedWindow(string &, string &, int &, int &, bool &, bool &, bool &, bool &, bool &); // destructor ~BedWindow(void); @@ -36,7 +36,8 @@ private: int leftSlop; int rightSlop; bool noHit; - bool forceStrand; + bool strandWindows; + bool matchOnStrand; // instance of a bed file class. BedFile *bedA, *bedB; diff --git a/src/windowBed/windowMain.cpp b/src/windowBed/windowMain.cpp index 8978cb79cee8b9f45bed83ed54d34be6ee88835f..ade5bd1116fde73200e419a54dc23fbb01582851 100755 --- a/src/windowBed/windowMain.cpp +++ b/src/windowBed/windowMain.cpp @@ -34,7 +34,8 @@ int main(int argc, char* argv[]) { bool haveSlop = false; bool haveLeft = false; bool haveRight = false; - bool forceStrand = false; + bool strandWindows = false; + bool matchOnStrand = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -74,8 +75,11 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-v", 2, parameterLength)) { noHit = true; } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; + else if (PARAMETER_CHECK("-sw", 3, parameterLength)) { + strandWindows = true; + } + else if (PARAMETER_CHECK("-sm", 3, parameterLength)) { + matchOnStrand = true; } else if (PARAMETER_CHECK("-w", 2, parameterLength)) { haveSlop = true; @@ -136,7 +140,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit, noHit, writeCount, forceStrand); + BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit, noHit, writeCount, strandWindows, matchOnStrand); bi->WindowIntersectBed(); return 0; } @@ -159,10 +163,11 @@ void ShowHelp(void) { cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "OPTIONS: " << endl; - cerr << "\t" << "-s\t\t\t" << "Force strandedness. Only report hits in B that overlap A on the same strand." << endl << "\t\t\t\tBy default, overlaps are reported without respect to strand." << endl << endl; cerr << "\t" << "-w (def. 1000)\t\t" << "Base pairs added upstream and downstream of each entry in A when searching for overlaps in B." << endl << endl; cerr << "\t" << "-l (def. 1000)\t\t" << "Base pairs added upstream (left of) of each entry in A when searching for overlaps in B." << endl << endl; cerr << "\t" << "-r (def. 1000)\t\t" << "Base pairs added downstream (right of) of each entry in A when searching for overlaps in B." << endl << endl; + cerr << "\t" << "-sw\t\t\t" << "Define -l and -r based on strand. E.g. if used, -l 500 for a negative-stranded feature will add 500 bp downstream. Default = false." << endl << "\t\t\t\tBy default, overlaps are reported without respect to strand." << endl << endl; + cerr << "\t" << "-sm\t\t\t" << "Only report hits in B that overlap A on the same strand." << endl << "\t\t\t\tBy default, overlaps are reported without respect to strand." << endl << endl; cerr << "\t" << "-u\t\t\t" << "Write ORIGINAL a.bed entry ONCE if ANY overlap with B.bed." << endl << "\t\t\t\tIn other words, just report the fact >=1 hit was found." << endl << endl; cerr << "\t" << "-v \t\t\t" << "Only report those entries in A that have NO OVERLAP in B within the requested window." << endl << "\t\t\t\tSimilar to grep -v." << endl << endl; cerr << "\t" << "-c \t\t\t" << "For each entry in A, report the number of hits in B within the requested window." << endl << "\t\t\t\tReports 0 for A entries that have no overlap with B." << endl << endl;