From 930ddf38f069b40a5306857446e373a64417f1bc Mon Sep 17 00:00:00 2001 From: nkindlon <nek3d@virginia.edu> Date: Wed, 18 Dec 2013 22:24:04 -0500 Subject: [PATCH] Added -nobuf option, and changed flush to occur after every record if -nobuf is set. --- src/utils/Contexts/Context.cpp | 5 +++++ src/utils/Contexts/Context.h | 4 ++++ .../FileRecordTools/FileReaders/BufferedStreamMgr.h | 2 +- src/utils/FileRecordTools/RecordOutputMgr.cpp | 12 ++++++++++++ src/utils/FileRecordTools/RecordOutputMgr.h | 10 ++++++++-- 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/utils/Contexts/Context.cpp b/src/utils/Contexts/Context.cpp index 22bbf126..439c1fef 100644 --- a/src/utils/Contexts/Context.cpp +++ b/src/utils/Contexts/Context.cpp @@ -20,6 +20,7 @@ Context::Context() _showHelp(false), _obeySplits(false), _uncompressedBam(false), + _useBufferedOutput(true), _anyHit(false), _noHit(false), _writeA(false), @@ -288,6 +289,10 @@ bool Context::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { setSortedInput(true); markUsed(i - skipFirstArgs); } + else if(strcmp(argv[i], "-nobuf") == 0) { + setUseBufferedOutput(false); + markUsed(i - skipFirstArgs); + } else if(strcmp(argv[i], "-header") == 0) { setPrintHeader(true); markUsed(i - skipFirstArgs); diff --git a/src/utils/Contexts/Context.h b/src/utils/Contexts/Context.h index 7f26fc0c..19160e95 100644 --- a/src/utils/Contexts/Context.h +++ b/src/utils/Contexts/Context.h @@ -105,6 +105,9 @@ public: bool getUncompressedBam() const { return _uncompressedBam; } void setUncompressedBam(bool val) { _uncompressedBam = val; } + + bool getUseBufferedOutput() const { return _useBufferedOutput; } + void setUseBufferedOutput(bool val) { _useBufferedOutput = val; } // // INTERSECT METOHDS // @@ -241,6 +244,7 @@ protected: bool _showHelp; bool _obeySplits; bool _uncompressedBam; + bool _useBufferedOutput; bool _anyHit; bool _noHit; diff --git a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h index 963b5ce3..2a77854a 100644 --- a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h +++ b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h @@ -43,7 +43,7 @@ private: bool _streamFinished; QuickString _currScanBuffer; //The minus ones in these constants are for leaving room for a null terminator after reading into buffers. - static const int MAIN_BUF_READ_SIZE = 2047; // 2 Kb minus 1 + static const int MAIN_BUF_READ_SIZE = 1023; // 2 Kb minus 1 static const int GZIP_LINE_BUF_SIZE = 8191; // 8K bool readFileChunk(); bool getTypeData(); diff --git a/src/utils/FileRecordTools/RecordOutputMgr.cpp b/src/utils/FileRecordTools/RecordOutputMgr.cpp index c405a435..b9b1a0a3 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.cpp +++ b/src/utils/FileRecordTools/RecordOutputMgr.cpp @@ -82,6 +82,7 @@ bool RecordOutputMgr::printKeyAndTerminate(RecordKeyList &keyList) { } else if (bamCode == NOT_BAM) { keyList.getKey()->print(_outBuf); return false; + } //otherwise, it was BAM_AS_BED, and the key was printed. return false; @@ -157,6 +158,7 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi tab(); _outBuf.append('0'); newline(); + if (needsFlush()) flush(); } else if (_context->getLeftJoin()) { if (printKeyAndTerminate(keyList)) { @@ -166,6 +168,7 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi tab(); null(false, true); newline(); + if (needsFlush()) flush(); _currBlockList = NULL; return; } @@ -202,6 +205,7 @@ void RecordOutputMgr::checkForHeader() { } } _context->setPrintHeader(false); + if (needsFlush()) flush(); } void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record *hitRecord) @@ -269,22 +273,26 @@ void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record if (!_context->getWriteA() && !_context->getWriteB() && !_context->getWriteOverlap() && !_context->getLeftJoin()) { printKey(keyRecord, *startStr, *endStr); newline(); + if (needsFlush()) flush(); } else if ((_context->getWriteA() && _context->getWriteB()) || _context->getLeftJoin()) { printKey(keyRecord); tab(); hitRecord->print(_outBuf); newline(); + if (needsFlush()) flush(); } else if (_context->getWriteA()) { printKey(keyRecord); newline(); + if (needsFlush()) flush(); } else if (_context->getWriteB()) { printKey(keyRecord, *startStr, *endStr); tab(); hitRecord->print(_outBuf); newline(); + if (needsFlush()) flush(); } else if (_context->getWriteOverlap()) { int printOverlapBases = max(0, minEnd-maxStart); @@ -294,6 +302,7 @@ void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record tab(); int2str(printOverlapBases, _outBuf, true); newline(); + if (needsFlush()) flush(); } } @@ -305,6 +314,7 @@ void RecordOutputMgr::reportOverlapSummary(RecordKeyList &keyList) return; } newline(); + if (needsFlush()) flush(); } else if (_context->getWriteCount()) { if (printKeyAndTerminate(keyList)) { return; @@ -312,11 +322,13 @@ void RecordOutputMgr::reportOverlapSummary(RecordKeyList &keyList) tab(); int2str(numOverlapsFound, _outBuf, true); newline(); + if (needsFlush()) flush(); } else if (_context->getNoHit() && numOverlapsFound == 0) { if (printKeyAndTerminate(keyList)) { return; } newline(); + if (needsFlush()) flush(); } } diff --git a/src/utils/FileRecordTools/RecordOutputMgr.h b/src/utils/FileRecordTools/RecordOutputMgr.h index 1bb8bbfc..de626116 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.h +++ b/src/utils/FileRecordTools/RecordOutputMgr.h @@ -10,10 +10,10 @@ using namespace std; +#include "Context.h" #include "RecordKeyList.h" #include "api/BamWriter.h" -class Context; class BlockMgr; class RecordOutputMgr { @@ -53,7 +53,13 @@ private: void reportOverlapSummary(RecordKeyList &keyList); static const unsigned int MAX_OUTBUF_SIZE = 16384; //16 K - bool needsFlush() const { return _outBuf.size() >= MAX_OUTBUF_SIZE *.9; } + + // If we are using buffered output, only flush the output buffer if it's least + // 90% full. If we're not using buffered output, flush if it's not empty + bool needsFlush() const { + return ((_context->getUseBufferedOutput() &&_outBuf.size() >= MAX_OUTBUF_SIZE *.9) || + (!_context->getUseBufferedOutput() && !_outBuf.empty())); + } void flush(); }; -- GitLab