diff --git a/src/closestBed/closestBed.cpp b/src/closestBed/closestBed.cpp index dcd44a1d0b78417d5abbd564c39b22e1b94a7e1e..1e0d8d30f20ecc345b70a3fee766c48e4553d286 100644 --- a/src/closestBed/closestBed.cpp +++ b/src/closestBed/closestBed.cpp @@ -22,13 +22,16 @@ const int SLOPGROWTH = 2048000; Constructor */ BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, - string &tieMode, bool reportDistance, bool ignoreOverlaps) + string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode, + bool ignoreOverlaps) : _bedAFile(bedAFile) , _bedBFile(bedBFile) , _tieMode(tieMode) , _sameStrand(sameStrand) , _diffStrand(diffStrand) , _reportDistance(reportDistance) + , _signDistance(signDistance) + , _strandedDistMode(_strandedDistMode) , _ignoreOverlaps(ignoreOverlaps) { _bedA = new BedFile(_bedAFile); @@ -56,7 +59,8 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { int numOverlaps = 0; vector<BED> closestB; CHRPOS minDistance = INT_MAX; - vector<CHRPOS> distances; + int32_t curDistance = INT_MAX; + vector<int32_t> distances; // is there at least one feature in B on the same chrom // as the current A feature? @@ -102,32 +106,48 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { } // the hit is to the "left" of A else if (h->end <= a.start) { - if ((a.start - h->end) < minDistance) { - minDistance = a.start - h->end; - + curDistance = a.start - h->end; + if (_signDistance) { + if ((_strandedDistMode == "ref") + || (_strandedDistMode == "a" && a.strand != "-") + || (_strandedDistMode == "b" && h->strand == "-")) { + curDistance = -curDistance; + } + } + + if (abs(curDistance) < minDistance) { + minDistance = abs(curDistance); + closestB.clear(); closestB.push_back(*h); distances.clear(); - distances.push_back(minDistance); + distances.push_back(curDistance); } - else if ((a.start - h->end) == minDistance) { + else if (abs(curDistance) == minDistance) { + minDistance = abs(curDistance); closestB.push_back(*h); - distances.push_back(minDistance); + distances.push_back(curDistance); } } // the hit is to the "right" of A else if (h->start >= a.end) { - if ((h->start - a.end) < minDistance) { - minDistance = h->start - a.end; - + curDistance = h->start - a.end; + if (_signDistance) { + if ((_strandedDistMode == "a" && a.strand == "-") + || (_strandedDistMode == "b" && h->strand != "-")) { + curDistance = -curDistance; + } + } + + if (abs(curDistance) < minDistance) { closestB.clear(); closestB.push_back(*h); distances.clear(); - distances.push_back(minDistance); + distances.push_back(curDistance); } - else if ((h->start - a.end) == minDistance) { + else if (abs(curDistance) == minDistance) { closestB.push_back(*h); - distances.push_back(minDistance); + distances.push_back(curDistance); } } } diff --git a/src/closestBed/closestBed.h b/src/closestBed/closestBed.h index 0ee74d2596b1e02b0b9a749d619bcc3b6507b764..82bd9b40b5fc6f6c2de1d246502f586f5ccdf923 100644 --- a/src/closestBed/closestBed.h +++ b/src/closestBed/closestBed.h @@ -29,7 +29,8 @@ public: // constructor BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, string &tieMode, - bool reportDistance, bool ignoreOverlaps); + bool reportDistance, bool signDistance, string &strandedDistMode, + bool ignoreOverlaps); // destructor ~BedClosest(void); @@ -46,6 +47,8 @@ private: bool _sameStrand; bool _diffStrand; bool _reportDistance; + bool _signDistance; + string _strandedDistMode; bool _ignoreOverlaps; BedFile *_bedA, *_bedB; diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp index d2f4fce7b0576c6fae745a98ea23e84c0b9644c7..5786e7b88a4ef4a6573193feb06566d52234490f 100644 --- a/src/closestBed/closestMain.cpp +++ b/src/closestBed/closestMain.cpp @@ -32,6 +32,7 @@ int main(int argc, char* argv[]) { string bedAFile; string bedBFile; string tieMode = "all"; + string strandedDistMode = "ref"; bool haveBedA = false; bool haveBedB = false; @@ -40,6 +41,8 @@ int main(int argc, char* argv[]) { bool diffStrand = false; bool ignoreOverlaps = false; bool reportDistance = false; + bool signDistance = false; + bool haveStrandedDistMode = false; // check to see if we should print out some help @@ -84,6 +87,15 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-d", 2, parameterLength)) { reportDistance = true; } + else if (PARAMETER_CHECK("-D", 2, parameterLength)) { + if ((i+1) < argc) { + reportDistance = true; + signDistance = true; + haveStrandedDistMode = true; + strandedDistMode = argv[i + 1]; + i++; + } + } else if (PARAMETER_CHECK("-io", 3, parameterLength)) { ignoreOverlaps = true; } @@ -111,6 +123,12 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Request \"all\" or \"first\" or \"last\" for Tie Mode (-t)" << endl << "*****" << endl; showHelp = true; } + + if (haveStrandedDistMode && (strandedDistMode != "a") && (strandedDistMode != "b") + && (strandedDistMode != "ref")) { + cerr << endl << "*****" << endl << "*****ERROR: Request \"a\" or \"b\" or \"ref\" for Stranded Distance Mode (-D)" << endl << "*****" << endl; + showHelp = true; + } if (sameStrand && diffStrand) { cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; @@ -118,7 +136,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - BedClosest *bc = new BedClosest(bedAFile, bedBFile, sameStrand, diffStrand, tieMode, reportDistance, ignoreOverlaps); + BedClosest *bc = new BedClosest(bedAFile, bedBFile, sameStrand, diffStrand, tieMode, reportDistance, signDistance, strandedDistMode, ignoreOverlaps); delete bc; return 0; } @@ -144,13 +162,24 @@ void ShowHelp(void) { cerr << "\t\tthat overlaps A on the _same_ strand." << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; - cerr << "\t-s\t" << "Require opposite strandedness. That is, find the closest feature in B" << endl; + cerr << "\t-S\t" << "Require opposite strandedness. That is, find the closest feature in B" << endl; cerr << "\t\tthat overlaps A on the _opposite_ strand." << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; cerr << "\t-d\t" << "In addition to the closest feature in B, " << endl; cerr << "\t\treport its distance to A as an extra column." << endl; cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl; + + cerr << "\t-D\t" << "Like -d, report the closest feature in B, and its distance to A" << endl; + cerr << "\t\tas an extra column. Unlike -d, use negative distances to report" << endl; + cerr << "\t\tupstream features. You must specify which orientation defines \"upstream\"." << endl; + cerr << "\t\tThe options are:" << endl; + cerr << "\t\t- \"ref\" Report distance with respect to the reference genome. " << endl; + cerr << "\t\t B features with a lower (start, stop) are upstream" << endl; + cerr << "\t\t- \"a\" Report distance with respect to A." << endl; + cerr << "\t\t When A is on the - strand, \"upstream\" means B has a higher (start,stop)." << endl; + cerr << "\t\t- \"b\" Report distance with respect to B." << endl; + cerr << "\t\t When B is on the - strand, \"upstream\" means A has a higher (start,stop)." << endl << endl; cerr << "\t-no\t" << "Ignore features in B that overlap A. That is, we want close, but " << endl; cerr << "\t\tnot touching features only." << endl << endl;