Skip to content
Snippets Groups Projects
Commit 6f0be005 authored by Aaron Quinlan's avatar Aaron Quinlan
Browse files

Merge pull request #86 from nkindlon/master

Added new -iobuf option and unit tests.
parents 4122ba15 ccd125c1
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "ContextBase.h" #include "ContextBase.h"
#include <unistd.h> #include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include <cctype>
ContextBase::ContextBase() ContextBase::ContextBase()
: :
...@@ -21,6 +22,7 @@ ContextBase::ContextBase() ...@@ -21,6 +22,7 @@ ContextBase::ContextBase()
_obeySplits(false), _obeySplits(false),
_uncompressedBam(false), _uncompressedBam(false),
_useBufferedOutput(true), _useBufferedOutput(true),
_ioBufSize(0),
_anyHit(false), _anyHit(false),
_noHit(false), _noHit(false),
_writeA(false), _writeA(false),
...@@ -167,6 +169,9 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { ...@@ -167,6 +169,9 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else if (strcmp(_argv[_i], "-nobuf") == 0) { else if (strcmp(_argv[_i], "-nobuf") == 0) {
if (!handle_nobuf()) return false; if (!handle_nobuf()) return false;
} }
else if (strcmp(_argv[_i], "-iobuf") == 0) {
if (!handle_iobuf()) return false;
}
else if (strcmp(_argv[_i], "-header") == 0) { else if (strcmp(_argv[_i], "-header") == 0) {
if (!handle_header()) return false; if (!handle_header()) return false;
} }
...@@ -257,6 +262,7 @@ bool ContextBase::openFiles() { ...@@ -257,6 +262,7 @@ bool ContextBase::openFiles() {
} }
frm->setFullBamFlags(_useFullBamTags); frm->setFullBamFlags(_useFullBamTags);
frm->setIsSorted(_sortedInput); frm->setIsSorted(_sortedInput);
frm->setIoBufSize(_ioBufSize);
if (!frm->open()) { if (!frm->open()) {
return false; return false;
} }
...@@ -369,6 +375,19 @@ bool ContextBase::handle_nobuf() ...@@ -369,6 +375,19 @@ bool ContextBase::handle_nobuf()
return true; return true;
} }
bool ContextBase::handle_iobuf()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -iobuf option given, but size of input buffer not specified. *****";
return false;
}
if (!parseIoBufSize(_argv[_i + 1])) return false;
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_seed() bool ContextBase::handle_seed()
{ {
if (_argc <= _i+1) { if (_argc <= _i+1) {
...@@ -499,3 +518,40 @@ FileRecordMgr *ContextBase::getNewFRM(const QuickString &filename) { ...@@ -499,3 +518,40 @@ FileRecordMgr *ContextBase::getNewFRM(const QuickString &filename) {
return frm; return frm;
} }
} }
bool ContextBase::parseIoBufSize(QuickString bufStr)
{
char lastChar = bufStr[bufStr.size()-1];
int multiplier = 1;
if (!isdigit(lastChar)) {
switch (lastChar) {
case 'K':
multiplier = 1 << 10;
break;
case 'M':
multiplier = 1 << 20;
break;
case 'G':
multiplier = 1 << 30;
break;
default:
_errorMsg = "\n***** ERROR: Unrecognized memory buffer size suffix \'";
_errorMsg += lastChar;
_errorMsg += "\' given. *****";
return false;
break;
}
//lop off suffix character
bufStr.resize(bufStr.size()-1);
}
if (!isNumeric(bufStr)) {
_errorMsg = "\n***** ERROR: argument passed to -iobuf is not numeric. *****";
return false;
}
_ioBufSize = str2chrPos(bufStr) * multiplier;
if (_ioBufSize < MIN_ALLOWED_BUF_SIZE) {
_errorMsg = "\n***** ERROR: specified buffer size is too small. *****";
return false;
}
return true;
}
...@@ -176,6 +176,7 @@ protected: ...@@ -176,6 +176,7 @@ protected:
bool _obeySplits; bool _obeySplits;
bool _uncompressedBam; bool _uncompressedBam;
bool _useBufferedOutput; bool _useBufferedOutput;
int _ioBufSize;
bool _anyHit; bool _anyHit;
bool _noHit; bool _noHit;
...@@ -234,6 +235,8 @@ protected: ...@@ -234,6 +235,8 @@ protected:
char **_argv; char **_argv;
int _i; int _i;
static const int MIN_ALLOWED_BUF_SIZE = 8;
virtual bool handle_bed(); virtual bool handle_bed();
virtual bool handle_fbam(); virtual bool handle_fbam();
virtual bool handle_g(); virtual bool handle_g();
...@@ -242,6 +245,8 @@ protected: ...@@ -242,6 +245,8 @@ protected:
virtual bool handle_i(); virtual bool handle_i();
virtual bool handle_n(); virtual bool handle_n();
virtual bool handle_nobuf(); virtual bool handle_nobuf();
virtual bool handle_iobuf();
virtual bool handle_seed(); virtual bool handle_seed();
virtual bool handle_split(); virtual bool handle_split();
virtual bool handle_sorted(); virtual bool handle_sorted();
...@@ -251,6 +256,9 @@ protected: ...@@ -251,6 +256,9 @@ protected:
virtual bool handle_o(); virtual bool handle_o();
virtual bool handle_null(); virtual bool handle_null();
virtual bool handle_delim(); virtual bool handle_delim();
bool parseIoBufSize(QuickString bufStr);
}; };
#endif /* CONTEXTBASE_H_ */ #endif /* CONTEXTBASE_H_ */
...@@ -17,7 +17,7 @@ BufferedStreamMgr::BufferedStreamMgr(const QuickString &filename) ...@@ -17,7 +17,7 @@ BufferedStreamMgr::BufferedStreamMgr(const QuickString &filename)
_mainBufCurrStartPos(0), _mainBufCurrStartPos(0),
_mainBufCurrLen(0), _mainBufCurrLen(0),
_eof(false), _eof(false),
_useBufSize(0), _useBufSize(DEFAULT_MAIN_BUF_READ_SIZE),
_streamFinished(false) _streamFinished(false)
{ {
...@@ -51,8 +51,6 @@ bool BufferedStreamMgr::init() ...@@ -51,8 +51,6 @@ bool BufferedStreamMgr::init()
} }
if (_inputStreamMgr->isGzipped()) { if (_inputStreamMgr->isGzipped()) {
_useBufSize = GZIP_LINE_BUF_SIZE; _useBufSize = GZIP_LINE_BUF_SIZE;
} else {
_useBufSize = MAIN_BUF_READ_SIZE;
} }
size_t trueBufSize = max(_useBufSize, (int)_currScanBuffer.size()); size_t trueBufSize = max(_useBufSize, (int)_currScanBuffer.size());
......
...@@ -27,7 +27,8 @@ public: ...@@ -27,7 +27,8 @@ public:
bool eof() const { return _eof; } bool eof() const { return _eof; }
bool getLine(QuickString &line); bool getLine(QuickString &line);
BamTools::BamReader *getBamReader() { return _inputStreamMgr->getBamReader(); } BamTools::BamReader *getBamReader() { return _inputStreamMgr->getBamReader(); }
static const int DEFAULT_MAIN_BUF_READ_SIZE = 1023;
void setIoBufSize(int val) { _useBufSize = val; }
private: private:
InputStreamMgr *_inputStreamMgr; InputStreamMgr *_inputStreamMgr;
typedef unsigned char bufType; typedef unsigned char bufType;
...@@ -42,8 +43,8 @@ private: ...@@ -42,8 +43,8 @@ private:
int _useBufSize; int _useBufSize;
bool _streamFinished; bool _streamFinished;
QuickString _currScanBuffer; QuickString _currScanBuffer;
//The minus ones in these constants are for leaving room for a null terminator after reading into buffers. //The minus ones in these constants are for leaving room for a null terminator after reading into buffers.
static const int MAIN_BUF_READ_SIZE = 1023; // 2 Kb minus 1
static const int GZIP_LINE_BUF_SIZE = 8191; // 8K static const int GZIP_LINE_BUF_SIZE = 8191; // 8K
bool readFileChunk(); bool readFileChunk();
bool getTypeData(); bool getTypeData();
......
...@@ -24,7 +24,8 @@ FileRecordMgr::FileRecordMgr(const QuickString &filename) ...@@ -24,7 +24,8 @@ FileRecordMgr::FileRecordMgr(const QuickString &filename)
_blockMgr(NULL), _blockMgr(NULL),
_bamReader(NULL), _bamReader(NULL),
_hasGenomeFile(false), _hasGenomeFile(false),
_genomeFile(NULL) _genomeFile(NULL),
_ioBufSize(0)
{ {
} }
...@@ -43,6 +44,7 @@ FileRecordMgr::~FileRecordMgr(){ ...@@ -43,6 +44,7 @@ FileRecordMgr::~FileRecordMgr(){
bool FileRecordMgr::open(){ bool FileRecordMgr::open(){
_bufStreamMgr = new BufferedStreamMgr(_filename); _bufStreamMgr = new BufferedStreamMgr(_filename);
if (_ioBufSize > 0) _bufStreamMgr->setIoBufSize(_ioBufSize);
if (!_bufStreamMgr->init()) { if (!_bufStreamMgr->init()) {
cerr << "Error: unable to open file or unable to determine types for file " << _filename << endl; cerr << "Error: unable to open file or unable to determine types for file " << _filename << endl;
delete _bufStreamMgr; delete _bufStreamMgr;
......
...@@ -100,6 +100,7 @@ public: ...@@ -100,6 +100,7 @@ public:
} }
void setIsSorted(bool val) { _isSortedInput = val; } void setIsSorted(bool val) { _isSortedInput = val; }
void setIoBufSize(int val) { _ioBufSize = val; }
protected: protected:
QuickString _filename; QuickString _filename;
...@@ -131,6 +132,7 @@ protected: ...@@ -131,6 +132,7 @@ protected:
BamTools::BamReader *_bamReader; BamTools::BamReader *_bamReader;
bool _hasGenomeFile; bool _hasGenomeFile;
NewGenomeFile *_genomeFile; NewGenomeFile *_genomeFile;
int _ioBufSize;
void allocateFileReader(); void allocateFileReader();
void testInputSortOrder(Record *record); void testInputSortOrder(Record *record);
......
...@@ -449,3 +449,72 @@ check exp obs ...@@ -449,3 +449,72 @@ check exp obs
rm obs exp rm obs exp
###########################################################
#
# Test new -iobuf option
#
###########################################################
###########################################################
# Test -iobuf expects an argument
###########################################################
echo " merge.t37...\c"
echo "***** ERROR: -iobuf option given, but size of input buffer not specified. *****" >exp
$BT merge -i a.bed -iobuf 2>&1 > /dev/null | head -2 | tail -1 > obs
check obs exp
rm obs exp
###########################################################
# Test -iobuf allows only suffixes K/M/G
###########################################################
echo " merge.t38...\c"
echo \
"***** ERROR: Unrecognized memory buffer size suffix 'L' given. *****" > exp
$BT merge -i a.bed -iobuf 20L 2>&1 > /dev/null | head -2 | tail -1 > obs
check obs exp
rm obs exp
###########################################################
# Test -iobuf doesn't allow a buffer size below 8 bytes.
###########################################################
echo " merge.t39...\c"
echo \
"***** ERROR: specified buffer size is too small. *****" > exp
$BT merge -i a.bed -iobuf 7 2>&1 > /dev/null | head -2 | tail -1 > obs
check exp obs
rm exp obs
###########################################################
# Test -iobuf doesn't allow non-numeric arguments
###########################################################
echo " merge.t40...\c"
echo \
"***** ERROR: argument passed to -iobuf is not numeric. *****" > exp
$BT merge -i a.bed -iobuf beerM 2>&1 > /dev/null | head -2 | tail -1 > obs
check exp obs
rm exp obs
###########################################################
# Test -iobuf allows correct argument with suffix
###########################################################
echo " merge.t41...\c"
echo \
"chr1 10 20
chr1 30 100" > exp
$BT merge -i a.bed -iobuf 128M > obs
check exp obs
rm exp obs
###########################################################
# Test -iobuf allows correct argument without suffix
###########################################################
echo " merge.t42...\c"
echo \
"chr1 10 20
chr1 30 100" > exp
$BT merge -i a.bed -iobuf 8192 > obs
check exp obs
rm exp obs
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment