Commit 4422985d authored by Neil Kindlon's avatar Neil Kindlon
Browse files

More sort order unit tests for chromsweep and closest; bug fixes and minor enhancements

parent 2a2ae0cd
......@@ -119,6 +119,11 @@ bool ContextIntersect::isValidState()
return false;
}
}
if (_haveFraction && (_overlapFraction <= 0.0 || _overlapFraction > 1.0)) {
_errorMsg = "\n***** ERROR: _overlapFraction must be in the range (0.0, 1.0]. *****";
return false;
}
if (getUseDBnameTags() && _dbNameTags.size() != _dbFileIdxs.size()) {
_errorMsg = "\n***** ERROR: Number of database name tags given does not match number of databases. *****";
return false;
......
......@@ -45,6 +45,7 @@ bool CloseSweep::init() {
}
void CloseSweep::masterScan(RecordKeyVector &retList) {
if (_currQueryChromName != _prevQueryChromName) testChromOrder(_currQueryRec);
if (_context->reportDistance()) {
_finalDistances.clear();
}
......@@ -439,12 +440,18 @@ bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan)
// the query is ahead of the database. fast-forward the database to catch-up.
if (queryChromAfterDbRec(dbRec)) {
QuickString oldDbChrom(dbRec->getChrName());
while (dbRec != NULL &&
queryChromAfterDbRec(dbRec)) {
_dbFRMs[dbIdx]->deleteRecord(dbRec);
nextRecord(false, dbIdx);
dbRec = _currDbRecs[dbIdx];
if (!nextRecord(false, dbIdx)) break;
dbRec = _currDbRecs[dbIdx];
const QuickString &newDbChrom = dbRec->getChrName();
if (newDbChrom != oldDbChrom) {
testChromOrder(dbRec);
oldDbChrom = newDbChrom;
}
}
clearCache(dbIdx);
clearClosestEndPos(dbIdx);
......@@ -463,24 +470,6 @@ bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan)
}
bool CloseSweep::dbRecAfterQueryChrom(const Record *dbRec)
{
//If using a genome file, compare chrom ids.
//Otherwise, compare global order, inserting as needed.
if (_context->hasGenomeFile()) {
return ( dbRec->getChromId() > _currQueryRec->getChromId() ) ;
}
//see if the db has both it's curr chrom and the query's curr chrom.
const _orderTrackType *track = _fileTracks[dbRec->getFileIdx()];
_orderTrackType::const_iterator iter = track->find(dbRec->getChrName());
int dbOrder = iter->second;
iter = track->find(_currQueryRec->getChrName());
if (iter == track->end()) return false; // query file does not contain the db chrom.
int qOrder = iter->second;
return (dbOrder > qOrder);
}
void CloseSweep::setLeftClosestEndPos(int dbIdx, const Record *rec)
{
int recEndPos = rec->getEndPos();
......
......@@ -43,9 +43,7 @@ private:
void scanCache(int dbIdx, RecordKeyVector &retList);
bool chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan);
bool dbRecAfterQueryChrom(const Record *dbRec);
typedef enum { IGNORE, DELETE } rateOvlpType;
typedef enum { IGNORE, DELETE } rateOvlpType;
rateOvlpType considerRecord(const Record *cacheRec, int dbIdx, bool &stopScanning);
void finalizeSelections(int dbIdx, RecordKeyVector &retList);
void checkMultiDbs(RecordKeyVector &retList);
......
......@@ -28,7 +28,8 @@ NewChromSweep::NewChromSweep(ContextIntersect *context)
_runToQueryEnd(false),
_lexicoDisproven(false),
_lexicoAssumed(false),
_lexicoAssumedFileIdx(-1)
_lexicoAssumedFileIdx(-1),
_testLastQueryRec(false)
{
}
......@@ -72,9 +73,14 @@ bool NewChromSweep::init() {
}
void NewChromSweep::closeOut(bool testChromOrderVal) {
if (_testLastQueryRec) {
testChromOrder(_currQueryRec);
}
while (!_queryFRM->eof()) {
nextRecord(true);
testChromOrder(_currQueryRec);
}
if (testChromOrderVal) testChromOrder(_currQueryRec);
for (int i=0; i < _numDBs; i++) {
while (!_dbFRMs[i]->eof()) {
......@@ -194,11 +200,17 @@ bool NewChromSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantSc
if (queryChromAfterDbRec(dbRec)) {
// the query is ahead of the database. fast-forward the database to catch-up.
QuickString oldDbChrom(dbRec->getChrName());
while (dbRec != NULL &&
queryChromAfterDbRec(dbRec)) {
_dbFRMs[dbIdx]->deleteRecord(dbRec);
nextRecord(false, dbIdx);
if (!nextRecord(false, dbIdx)) break;
dbRec = _currDbRecs[dbIdx];
const QuickString &newDbChrom = dbRec->getChrName();
if (newDbChrom != oldDbChrom) {
testChromOrder(dbRec);
oldDbChrom = newDbChrom;
}
}
clearCache(dbIdx);
return false;
......@@ -229,6 +241,7 @@ bool NewChromSweep::next(RecordKeyVector &retList) {
if (needTestSortOrder) testChromOrder(_currQueryRec);
if (allCurrDBrecsNull() && allCachesEmpty() && !_runToQueryEnd) {
_testLastQueryRec = true;
return false;
}
_currQueryChromName = _currQueryRec->getChrName();
......@@ -314,9 +327,12 @@ void NewChromSweep::testChromOrder(const Record *rec)
_filePrevChrom[fileIdx] = chrom;
return; //no previously stored chrom for this file.
}
const QuickString &prevChrom = prevIter->second;
const QuickString prevChrom(prevIter->second);
_filePrevChrom[fileIdx] = chrom;
if (chrom == prevChrom) return;
if (chrom != prevChrom && verifyChromOrderMismatch(chrom, prevChrom, fileIdx)) {
if (verifyChromOrderMismatch(chrom, prevChrom, fileIdx)) {
fprintf(stderr, "ERROR: chromomsome sort ordering for file %s is inconsistent with other files. Record was:\n", _context->getInputFileName(fileIdx).c_str());
rec->print(stderr, true);
exit(1);
......
......@@ -120,6 +120,7 @@ protected:
bool _lexicoAssumed; //whether we've had to try to guess that any file might be in lexicographical order.
QuickString _lexicoAssumedChromName; //which chromosome we had to make that guess for. Used in error reporting.
int _lexicoAssumedFileIdx; //which file we had to make the guess for. Also for error reporting.
bool _testLastQueryRec;
void testChromOrder(const Record *rec);
bool queryChromAfterDbRec(const Record *dbRec);
......
chr1 10 20
chr10 10 20
chr11 10 20
chr12 10 20
chr2 10 20
chr1 10 20
chr11 10 20
chr3 10 20
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
53a54
> chr1 10346488 10346588 419771 100 + chr1 10346493 10346593 386024 100 -
61a63
> chr1 12637536 12637636 487117 100 - chr1 12637491 12637591 123616 100 +
112a115
> chr1 27003353 27003453 401078 100 + chr1 27003383 27003483 410920 100 -
304a308
> chr1 67919095 67919195 458371 100 - chr1 67919074 67919174 115168 100 -
312a317
> chr1 70182988 70183088 240451 100 + chr1 70182894 70182994 243027 100 -
330a336
> chr1 74612375 74612475 359478 100 - chr1 74612295 74612395 370747 100 -
390a397
> chr1 89611219 89611319 24037 100 - chr1 89611230 89611330 18932 100 -
479a487
> chr1 113433734 113433834 492565 100 + chr1 113433660 113433760 163736 100 +
664a673
> chr1 152364876 152364976 62193 100 - chr1 152364852 152364952 9189 100 -
761a771
> chr1 173149673 173149773 337683 100 - chr1 173149632 173149732 306893 100 +
862a873
> chr1 192327121 192327221 486005 100 - chr1 192327148 192327248 300661 100 -
923a935
> chr1 204842892 204842992 252090 100 - chr1 204842795 204842895 16860 100 -
966a979
> chr1 213387116 213387216 139699 100 - chr1 213387147 213387247 430037 100 +
1007a1021
> chr1 219909253 219909353 381028 100 + chr1 219909272 219909372 233439 100 -
1059a1074
> chr1 229916904 229917004 132877 100 - chr1 229916836 229916936 480620 100 +
1061a1077
> chr1 230205808 230205908 341202 100 - chr1 230205750 230205850 158247 100 -
1099a1116
> chr1 238587424 238587524 119251 100 + chr1 238587341 238587441 325702 100 +
This diff is collapsed.
This diff is collapsed.
chr1 10 20
chr1 80 100
chr2 50 80
chr2 100 120
chr10 5 50
chr10 80 120
chr11 20 60
chr11 80 120
chr12 10 50
chr12 60 90
chr1_gl0003 20 80
chr01 10 20
chr01 80 100
chr02 50 80
chr02 100 120
chr010 5 50
chr010 80 120
chr011 20 60
chr011 80 120
chr012 10 50
chr012 60 90
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment