Skip to content
Snippets Groups Projects
Commit 4f6253a0 authored by Aaron's avatar Aaron
Browse files

chromsweep is now a python-style generator. Big smile\!

parent b4928906
No related branches found
No related tags found
No related merge requests found
......@@ -51,8 +51,14 @@ ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit,
// create new BED file objects for A and B
_bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile);
Sweep();
// prime the results pump.
_qy_lineNum = 0;
_db_lineNum = 0;
_bedA->Open();
_bedB->Open();
_qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
_db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
}
......@@ -67,65 +73,120 @@ bool after(const BED &a, const BED &b) {
return (a.start >= b.end);
}
void ChromSweep::ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits) {
if (qy_status != BED_INVALID) {
vector<BED>::iterator c = db_cache.begin();
while (c != db_cache.end())
void ChromSweep::ScanCache() {
if (_qy_status != BED_INVALID) {
vector<BED>::iterator c = _cache.begin();
while (c != _cache.end())
{
if ((curr_qy.chrom == c->chrom) && !(after(curr_qy, *c))) {
if (overlaps(curr_qy.start, curr_qy.end, c->start, c->end) > 0) {
hits.push_back(*c);
if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) {
if (overlaps(_curr_qy.start, _curr_qy.end, c->start, c->end) > 0) {
_hits.push_back(*c);
}
++c;
}
else {
c = db_cache.erase(c);
c = _cache.erase(c);
}
}
}
}
// void ChromSweep::ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits) {
// if (qy_status != BED_INVALID) {
// vector<BED>::iterator c = db_cache.begin();
// while (c != db_cache.end())
// {
// if ((curr_qy.chrom == c->chrom) && !(after(curr_qy, *c))) {
// if (overlaps(curr_qy.start, curr_qy.end, c->start, c->end) > 0) {
// hits.push_back(*c);
// }
// ++c;
// }
// else {
// c = db_cache.erase(c);
// }
// }
// }
// }
void ChromSweep::ChromCheck(BED &curr_qy, BED &curr_db,
BedLineStatus &qy_status, BedLineStatus &db_status,
int &qy_lineNum, int &db_lineNum,
vector<BED> &db_cache, vector<BED> &hits)
void ChromSweep::ChromCheck()
{
if ((curr_qy.chrom == curr_db.chrom) || (db_status == BED_INVALID) || (qy_status == BED_INVALID)) {
if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) {
return;
}
if (curr_qy.chrom > curr_db.chrom) {
while (!_bedB->Empty() && curr_db.chrom < curr_qy.chrom)
if (_curr_qy.chrom > _curr_db.chrom) {
while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom)
{
db_status = _bedB->GetNextBed(curr_db, db_lineNum);
_db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
}
db_cache.clear();
_cache.clear();
}
else if (curr_qy.chrom < curr_db.chrom) {
else if (_curr_qy.chrom < _curr_db.chrom) {
// report hits for the remaining queries on this chrom
BED tmp_curr_qy = curr_qy;
while (!_bedA->Empty() && tmp_curr_qy.chrom == curr_qy.chrom)
BED tmp_curr_qy = _curr_qy;
while (!_bedA->Empty() && tmp_curr_qy.chrom == _curr_qy.chrom)
{
//db_cache = ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
ReportHits(tmp_curr_qy, hits);
qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
hits.clear();
ScanCache();
_results.push(make_pair(tmp_curr_qy, _hits));
_qy_status = _bedA->GetNextBed(tmp_curr_qy, _qy_lineNum);
_hits.clear();
}
// now fast forward query to catch up to database
while (!_bedA->Empty() && tmp_curr_qy.chrom < curr_db.chrom)
while (!_bedA->Empty() && tmp_curr_qy.chrom < _curr_db.chrom)
{
// hits is empty to reflect the fact that no hits are found in catch-up mode
ReportHits(tmp_curr_qy, hits);
qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
_results.push(make_pair(tmp_curr_qy, _hits));
_qy_status = _bedA->GetNextBed(tmp_curr_qy, _qy_lineNum);
}
curr_qy = tmp_curr_qy;
db_cache.clear();
_curr_qy = tmp_curr_qy;
_cache.clear();
}
}
//
// void ChromSweep::ChromCheck(BED &curr_qy, BED &curr_db,
// BedLineStatus &qy_status, BedLineStatus &db_status,
// int &qy_lineNum, int &db_lineNum,
// vector<BED> &db_cache, vector<BED> &hits)
// {
// if ((curr_qy.chrom == curr_db.chrom) || (db_status == BED_INVALID) || (qy_status == BED_INVALID)) {
// return;
// }
//
// if (curr_qy.chrom > curr_db.chrom) {
// while (!_bedB->Empty() && curr_db.chrom < curr_qy.chrom)
// {
// db_status = _bedB->GetNextBed(curr_db, db_lineNum);
// }
// db_cache.clear();
// }
// else if (curr_qy.chrom < curr_db.chrom) {
// // report hits for the remaining queries on this chrom
// BED tmp_curr_qy = curr_qy;
// while (!_bedA->Empty() && tmp_curr_qy.chrom == curr_qy.chrom)
// {
// //db_cache = ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
// ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
//
// //ReportHits(tmp_curr_qy, hits);
// _results.push(make_pair(tmp_curr_qy, hits));
// qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
// hits.clear();
// }
// // now fast forward query to catch up to database
// while (!_bedA->Empty() && tmp_curr_qy.chrom < curr_db.chrom)
// {
// // hits is empty to reflect the fact that no hits are found in catch-up mode
// ReportHits(tmp_curr_qy, hits);
// qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
// }
// curr_qy = tmp_curr_qy;
// db_cache.clear();
// }
// }
void ChromSweep::ReportHits(const BED &curr_qy, const vector<BED> &hits) {
_bedA->reportBedTab(curr_qy);
......@@ -133,44 +194,76 @@ void ChromSweep::ReportHits(const BED &curr_qy, const vector<BED> &hits) {
}
void ChromSweep::Sweep() {
int qy_lineNum = 0;
int db_lineNum = 0;
// current feature from each file
BED curr_qy, curr_db;
// status of the current lines
BedLineStatus qy_status, db_status;
vector<BED> db_cache;
vector<BED> hits;
// open the files; get the first line from each
_bedA->Open();
_bedB->Open();
qy_status = _bedA->GetNextBed(curr_qy, qy_lineNum);
db_status = _bedB->GetNextBed(curr_db, db_lineNum);
while (!_bedA->Empty()) {
bool ChromSweep::Next(pair<BED, vector<BED> > &next) {
if (!_bedA->Empty()) {
// have we changed chromosomes?
ChromCheck(curr_qy, curr_db, qy_status, db_status, qy_lineNum, db_lineNum, db_cache, hits);
ChromCheck();
// scan the database cache for hits
//db_cache = ScanCache(curr_qy, qy_status, db_cache, hits);
ScanCache(curr_qy, qy_status, db_cache, hits);
ScanCache();
// advance the db until we are ahead of the query. update hits and cache as necessary
while (!_bedB->Empty() &&
curr_qy.chrom == curr_db.chrom &&
!(after(curr_db, curr_qy)))
_curr_qy.chrom == _curr_db.chrom &&
!(after(_curr_db, _curr_qy)))
{
if (overlaps(curr_qy.start, curr_qy.end, curr_db.start, curr_db.end) > 0) {
hits.push_back(curr_db);
if (overlaps(_curr_qy.start, _curr_qy.end, _curr_db.start, _curr_db.end) > 0) {
_hits.push_back(_curr_db);
}
db_cache.push_back(curr_db);
db_status = _bedB->GetNextBed(curr_db, db_lineNum);
_cache.push_back(_curr_db);
_db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
}
// report the hits for this query and reset for the next query
ReportHits(curr_qy, hits);
hits.clear();
qy_status = _bedA->GetNextBed(curr_qy, qy_lineNum);
_results.push(make_pair(_curr_qy, _hits));
_hits.clear();
_qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
}
if (!_results.empty()) {
next = _results.front();
_results.pop();
return true;
}
else {return false;}
}
// void ChromSweep::Sweep() {
//
// int qy_lineNum = 0;
// int db_lineNum = 0;
//
// // current feature from each file
// BED curr_qy, curr_db;
//
// // status of the current lines
// BedLineStatus qy_status, db_status;
// vector<BED> db_cache;
// vector<BED> hits;
//
// // open the files; get the first line from each
// _bedA->Open();
// _bedB->Open();
// qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
// db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
// while (!_bedA->Empty()) {
// // have we changed chromosomes?
// ChromCheck(curr_qy, curr_db, qy_status, db_status, qy_lineNum, db_lineNum, db_cache, hits);
// // scan the database cache for hits
// //db_cache = ScanCache(curr_qy, qy_status, db_cache, hits);
// ScanCache(curr_qy, qy_status, db_cache, hits);
// // advance the db until we are ahead of the query. update hits and cache as necessary
// while (!_bedB->Empty() &&
// curr_qy.chrom == curr_db.chrom &&
// !(after(curr_db, curr_qy)))
// {
// if (overlaps(curr_qy.start, curr_qy.end, curr_db.start, curr_db.end) > 0) {
// hits.push_back(curr_db);
// }
// db_cache.push_back(curr_db);
// db_status = _bedB->GetNextBed(curr_db, db_lineNum);
// }
// // report the hits for this query and reset for the next query
// //ReportHits(curr_qy, hits);
// _results.push(make_pair(curr_qy, hits));
// hits.clear();
// qy_status = _bedA->GetNextBed(curr_qy, qy_lineNum);
// }
// }
......@@ -41,6 +41,10 @@ public:
// destructor
~ChromSweep(void);
bool Next(pair<BED, vector<BED> > &next);
//pair<BED, vector<BED> > GetNextResult(void);
private:
......@@ -77,15 +81,27 @@ private:
// instance of a bed file class.
BedFile *_bedA, *_bedB;
vector<BED> _cache;
vector<BED> _hits;
queue< pair<BED, vector<BED> > > _results;
// variables for the current query and db entries.
BED _curr_qy, _curr_db;
BedLineStatus _qy_status, _db_status;
int _qy_lineNum, _db_lineNum;
//------------------------------------------------
// private methods
//------------------------------------------------
void ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits);
void ChromCheck(BED &curr_qy, BED &curr_db,
BedLineStatus &qy_status, BedLineStatus &db_status,
int &qy_lineNum, int &db_lineNum,
vector<BED> &db_cache, vector<BED> &hits);
// void ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits);
//
// void ChromCheck(BED &curr_qy, BED &curr_db,
// BedLineStatus &qy_status, BedLineStatus &db_status,
// int &qy_lineNum, int &db_lineNum,
// vector<BED> &db_cache, vector<BED> &hits);
void ScanCache();
void ChromCheck();
void Sweep();
void ReportHits(const BED &curr_qy, const vector<BED> &hits);
......
......@@ -195,10 +195,20 @@ int main(int argc, char* argv[]) {
if (!showHelp) {
ChromSweep *bi = new ChromSweep(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
ChromSweep *sweep = new ChromSweep(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
writeAllOverlap, overlapFraction, noHit, writeCount, forceStrand,
reciprocalFraction, obeySplits, inputIsBam, outputIsBam);
delete bi;
pair<BED, vector<BED> > hit_set;
while (sweep->Next(hit_set)) {
cout << hit_set.first.chrom << "\t"
<< hit_set.first.start << "\t"
<< hit_set.first.end << "\t"
<< hit_set.second.size() << "\n";
}
delete sweep;
return 0;
}
else {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment