From 5f48036660c1ac96da1e7bac0cfe2680caa374f7 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Fri, 16 Sep 2011 10:04:39 -0400 Subject: [PATCH] chromsweep is a proper tool. Now to make it an iterable class. --- src/chromsweep/chromsweep.cpp | 69 +++++++------------------------ src/chromsweep/chromsweep.h | 17 +------- src/chromsweep/chromsweepMain.cpp | 8 +--- 3 files changed, 17 insertions(+), 77 deletions(-) diff --git a/src/chromsweep/chromsweep.cpp b/src/chromsweep/chromsweep.cpp index 2211c9ff..e36dcea4 100644 --- a/src/chromsweep/chromsweep.cpp +++ b/src/chromsweep/chromsweep.cpp @@ -55,6 +55,10 @@ ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit, // prime the results pump. _qy_lineNum = 0; _db_lineNum = 0; + + _hits.reserve(1000); + _cache.reserve(1000); + _bedA->Open(); _bedB->Open(); _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); @@ -125,72 +129,29 @@ void ChromSweep::ChromCheck() } else if (_curr_qy.chrom < _curr_db.chrom) { // report hits for the remaining queries on this chrom - BED tmp_curr_qy = _curr_qy; - while (!_bedA->Empty() && tmp_curr_qy.chrom == _curr_qy.chrom) + string curr_chrom = _curr_qy.chrom; + while (!_bedA->Empty() && _curr_qy.chrom == curr_chrom) { ScanCache(); - _results.push(make_pair(tmp_curr_qy, _hits)); - _qy_status = _bedA->GetNextBed(tmp_curr_qy, _qy_lineNum); + _results.push(make_pair(_curr_qy, _hits)); + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); _hits.clear(); } // now fast forward query to catch up to database - while (!_bedA->Empty() && tmp_curr_qy.chrom < _curr_db.chrom) + while (!_bedA->Empty() && _curr_qy.chrom < _curr_db.chrom) { // hits is empty to reflect the fact that no hits are found in catch-up mode - _results.push(make_pair(tmp_curr_qy, _hits)); - _qy_status = _bedA->GetNextBed(tmp_curr_qy, _qy_lineNum); + _results.push(make_pair(_curr_qy, _hits)); + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); } - _curr_qy = tmp_curr_qy; _cache.clear(); } } -// -// void ChromSweep::ChromCheck(BED &curr_qy, BED &curr_db, -// BedLineStatus &qy_status, BedLineStatus &db_status, -// int &qy_lineNum, int &db_lineNum, -// vector<BED> &db_cache, vector<BED> &hits) -// { -// if ((curr_qy.chrom == curr_db.chrom) || (db_status == BED_INVALID) || (qy_status == BED_INVALID)) { -// return; -// } -// -// if (curr_qy.chrom > curr_db.chrom) { -// while (!_bedB->Empty() && curr_db.chrom < curr_qy.chrom) -// { -// db_status = _bedB->GetNextBed(curr_db, db_lineNum); -// } -// db_cache.clear(); -// } -// else if (curr_qy.chrom < curr_db.chrom) { -// // report hits for the remaining queries on this chrom -// BED tmp_curr_qy = curr_qy; -// while (!_bedA->Empty() && tmp_curr_qy.chrom == curr_qy.chrom) -// { -// //db_cache = ScanCache(tmp_curr_qy, qy_status, db_cache, hits); -// ScanCache(tmp_curr_qy, qy_status, db_cache, hits); -// -// //ReportHits(tmp_curr_qy, hits); -// _results.push(make_pair(tmp_curr_qy, hits)); -// qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum); -// hits.clear(); -// } -// // now fast forward query to catch up to database -// while (!_bedA->Empty() && tmp_curr_qy.chrom < curr_db.chrom) -// { -// // hits is empty to reflect the fact that no hits are found in catch-up mode -// ReportHits(tmp_curr_qy, hits); -// qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum); -// } -// curr_qy = tmp_curr_qy; -// db_cache.clear(); -// } -// } -void ChromSweep::ReportHits(const BED &curr_qy, const vector<BED> &hits) { - _bedA->reportBedTab(curr_qy); - cout << hits.size() << endl; +void ChromSweep::ReportQuery(const BED &query) { + _bedA->reportBedTab(query); } @@ -201,9 +162,7 @@ bool ChromSweep::Next(pair<BED, vector<BED> > &next) { // scan the database cache for hits ScanCache(); // advance the db until we are ahead of the query. update hits and cache as necessary - while (!_bedB->Empty() && - _curr_qy.chrom == _curr_db.chrom && - !(after(_curr_db, _curr_qy))) + while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) { if (overlaps(_curr_qy.start, _curr_qy.end, _curr_db.start, _curr_db.end) > 0) { _hits.push_back(_curr_db); diff --git a/src/chromsweep/chromsweep.h b/src/chromsweep/chromsweep.h index bbbe6bdc..1fb58405 100644 --- a/src/chromsweep/chromsweep.h +++ b/src/chromsweep/chromsweep.h @@ -44,7 +44,7 @@ public: bool Next(pair<BED, vector<BED> > &next); - //pair<BED, vector<BED> > GetNextResult(void); + void ReportQuery(const BED &query); private: @@ -90,23 +90,8 @@ private: BedLineStatus _qy_status, _db_status; int _qy_lineNum, _db_lineNum; - //------------------------------------------------ - // private methods - //------------------------------------------------ - // void ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits); - // - // void ChromCheck(BED &curr_qy, BED &curr_db, - // BedLineStatus &qy_status, BedLineStatus &db_status, - // int &qy_lineNum, int &db_lineNum, - // vector<BED> &db_cache, vector<BED> &hits); - void ScanCache(); void ChromCheck(); - - void Sweep(); - void ReportHits(const BED &curr_qy, const vector<BED> &hits); - - }; #endif /* CHROMSWEEP_H */ diff --git a/src/chromsweep/chromsweepMain.cpp b/src/chromsweep/chromsweepMain.cpp index aebed1a7..90ed322c 100644 --- a/src/chromsweep/chromsweepMain.cpp +++ b/src/chromsweep/chromsweepMain.cpp @@ -201,13 +201,9 @@ int main(int argc, char* argv[]) { pair<BED, vector<BED> > hit_set; while (sweep->Next(hit_set)) { - cout << hit_set.first.chrom << "\t" - << hit_set.first.start << "\t" - << hit_set.first.end << "\t" - << hit_set.second.size() << "\n"; + sweep->ReportQuery(hit_set.first); + cout << hit_set.second.size() << "\n"; } - - delete sweep; return 0; } -- GitLab