From fba9508902b3689f8e41dd7de3f0113ffbbe864b Mon Sep 17 00:00:00 2001 From: nkindlon <nek3d@virginia.edu> Date: Tue, 17 Dec 2013 21:54:11 -0500 Subject: [PATCH] Changed input stream buffer to only 1Kb to improve performance for small files. --- src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp | 2 +- src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h | 5 +---- src/utils/FileRecordTools/RecordOutputMgr.h | 2 +- src/utils/fileType/FileRecordTypeChecker.h | 1 - 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp index 55ec8865..78590a7e 100644 --- a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp +++ b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.cpp @@ -54,7 +54,7 @@ bool BufferedStreamMgr::init() if (_inputStreamMgr->isGzipped()) { _useBufSize = GZIP_LINE_BUF_SIZE; } else { - _useBufSize = MAIN_BUF_READ_SIZE; + _useBufSize = 67108863; //64Mb -1 } size_t trueBufSize = max(_useBufSize, (int)_currScanBuffer.size()); diff --git a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h index 424766c5..ca9d2545 100644 --- a/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h +++ b/src/utils/FileRecordTools/FileReaders/BufferedStreamMgr.h @@ -23,7 +23,6 @@ public: bool init(); const FileRecordTypeChecker & getTypeChecker() const { return _typeChecker; } -// istream *getStream() { return _inputStreamMgr->getFinalStream(); } bool eof() const { return _eof; } bool getLine(QuickString &line); @@ -44,12 +43,10 @@ private: bool _streamFinished; QuickString _currScanBuffer; //The minus ones in these constants are for leaving room for a null terminator after reading into buffers. - static const int MAIN_BUF_READ_SIZE = 67108863; //64 Mb minus 1 - static const int TYPE_CHECK_READ_SIZE = 4095; // 4K +// static const int MAIN_BUF_READ_SIZE = 2047; //64 Mb minus 1 static const int GZIP_LINE_BUF_SIZE = 8191; // 8K bool readFileChunk(); bool getTypeData(); -// void resetStream(); }; diff --git a/src/utils/FileRecordTools/RecordOutputMgr.h b/src/utils/FileRecordTools/RecordOutputMgr.h index 1bb8bbfc..cb1bbdfc 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.h +++ b/src/utils/FileRecordTools/RecordOutputMgr.h @@ -52,7 +52,7 @@ private: void reportOverlapDetail(const Record *keyRecord, const Record *hitRecord); void reportOverlapSummary(RecordKeyList &keyList); - static const unsigned int MAX_OUTBUF_SIZE = 16384; //16 K + static const unsigned int MAX_OUTBUF_SIZE = 16386; //16 K bool needsFlush() const { return _outBuf.size() >= MAX_OUTBUF_SIZE *.9; } void flush(); }; diff --git a/src/utils/fileType/FileRecordTypeChecker.h b/src/utils/fileType/FileRecordTypeChecker.h index 4eb3663b..6d598ace 100644 --- a/src/utils/fileType/FileRecordTypeChecker.h +++ b/src/utils/fileType/FileRecordTypeChecker.h @@ -85,7 +85,6 @@ private: vector<QuickString> _lines; vector<QuickString> _currLineElems; int _firstValidDataLineIdx; - static const int SCAN_BUFFER_SIZE = 8192; //8 KB buffer int _numBytesInBuffer; //this will hold the length of the buffer after the scan. int _numFields; -- GitLab