diff --git a/src/utils/FileRecordTools/RecordOutputMgr.cpp b/src/utils/FileRecordTools/RecordOutputMgr.cpp index 984d6abadd2d33132d23f27885a869f9a3bc9cde..7595f3aa74952ac699fec0e233ee6c120f5e7353 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.cpp +++ b/src/utils/FileRecordTools/RecordOutputMgr.cpp @@ -28,14 +28,19 @@ RecordOutputMgr::RecordOutputMgr() : _context(NULL), _printable(true), _bamWriter(NULL), - _currBlockList(NULL) + _currBlockList(NULL), + _numWrites(0) { } RecordOutputMgr::~RecordOutputMgr() { - flush(); + if (_outBuf.size() > 0) { + flush(); + _numWrites++; + } + cerr << "Total number of buffer writes was " << _numWrites << endl; if (_bamWriter != NULL) { _bamWriter->Close(); delete _bamWriter; @@ -105,7 +110,10 @@ RecordOutputMgr::printBamType RecordOutputMgr::printBamRecord(RecordKeyList &key void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockList) { - if (needsFlush()) flush(); + if (needsFlush()) { + flush(); + _numWrites++; + } //The first time we print a record is when we print any header, because the header //hasn't been read from the query file until after the first record has also been read. diff --git a/src/utils/FileRecordTools/RecordOutputMgr.h b/src/utils/FileRecordTools/RecordOutputMgr.h index b8f766305a3e18088d4412e7365787306c124e3e..fe54b3eaac7b0e3872a9efdde9a4e1b27d851571 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.h +++ b/src/utils/FileRecordTools/RecordOutputMgr.h @@ -40,6 +40,7 @@ private: RecordKeyList *_currBlockList; QuickString _outBuf; + int _numWrites; //some helper functions to neaten the code. void tab() { _outBuf.append('\t'); } @@ -48,7 +49,7 @@ private: void printKey(const Record *key); void printKey(const Record *key, const QuickString & start, const QuickString & end); - static const unsigned int MAX_OUTBUF_SIZE = 33554432; //32 Mb + static const unsigned int MAX_OUTBUF_SIZE = 16384; //16 K bool needsFlush() const { return _outBuf.size() >= MAX_OUTBUF_SIZE *.9; } void flush(); }; diff --git a/src/utils/NewChromsweep/NewChromsweep.cpp b/src/utils/NewChromsweep/NewChromsweep.cpp index 6c441c4eba36ea7da49dbabaf346864c28ae2795..27cc4b92d90f9e5c654b7dd1a27c612fac1f6cc3 100644 --- a/src/utils/NewChromsweep/NewChromsweep.cpp +++ b/src/utils/NewChromsweep/NewChromsweep.cpp @@ -24,7 +24,8 @@ NewChromSweep::NewChromSweep(Context *context, _databaseRecordsTotalLength(0), _wasInitialized(false), _currQueryRec(NULL), - _currDatabaseRec(NULL) + _currDatabaseRec(NULL), + _runToQueryEnd(false) { } @@ -52,6 +53,13 @@ bool NewChromSweep::init() { if (_currDatabaseRec == NULL) { return false; } + + //determine whether to stop when the database end is hit, or keep going until the + //end of the query file is hit as well. + + if (_context->getNoHit() || _context->getWriteCount() || _context->getWriteOverlap() || _context->getWriteAllOverlap() || _context->getLeftJoin()) { + _runToQueryEnd = true; + } _wasInitialized = true; return true; } @@ -155,7 +163,7 @@ bool NewChromSweep::next(RecordKeyList &next) { return false; } - if (_currDatabaseRec == NULL && _cache.empty()) { + if (_currDatabaseRec == NULL && _cache.empty() && !_runToQueryEnd) { return false; } _hits.clear(); diff --git a/src/utils/NewChromsweep/NewChromsweep.h b/src/utils/NewChromsweep/NewChromsweep.h index abd20e923b56142c28410b0eb7b887ae45361ed2..2c0beb7f12db2bd5cd75650ce6a741b8d1975ce6 100644 --- a/src/utils/NewChromsweep/NewChromsweep.h +++ b/src/utils/NewChromsweep/NewChromsweep.h @@ -81,6 +81,7 @@ private: Record *_currDatabaseRec; // a cache of the current chrom from the query. used to handle chrom changes. QuickString _currChromName; + bool _runToQueryEnd; void nextRecord(bool query); //true fetches next query record, false fetches next db record. void nextDatabase();