Commit ccd125c1 authored by nkindlon's avatar nkindlon
Browse files

Added new -iobuf option and unit tests.

parent d8c62016
......@@ -8,6 +8,7 @@
#include "ContextBase.h"
#include <unistd.h>
#include <sys/types.h>
#include <cctype>
ContextBase::ContextBase()
:
......@@ -21,6 +22,7 @@ ContextBase::ContextBase()
_obeySplits(false),
_uncompressedBam(false),
_useBufferedOutput(true),
_ioBufSize(0),
_anyHit(false),
_noHit(false),
_writeA(false),
......@@ -167,6 +169,9 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else if (strcmp(_argv[_i], "-nobuf") == 0) {
if (!handle_nobuf()) return false;
}
else if (strcmp(_argv[_i], "-iobuf") == 0) {
if (!handle_iobuf()) return false;
}
else if (strcmp(_argv[_i], "-header") == 0) {
if (!handle_header()) return false;
}
......@@ -257,6 +262,7 @@ bool ContextBase::openFiles() {
}
frm->setFullBamFlags(_useFullBamTags);
frm->setIsSorted(_sortedInput);
frm->setIoBufSize(_ioBufSize);
if (!frm->open()) {
return false;
}
......@@ -369,6 +375,19 @@ bool ContextBase::handle_nobuf()
return true;
}
bool ContextBase::handle_iobuf()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -iobuf option given, but size of input buffer not specified. *****";
return false;
}
if (!parseIoBufSize(_argv[_i + 1])) return false;
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_seed()
{
if (_argc <= _i+1) {
......@@ -499,3 +518,40 @@ FileRecordMgr *ContextBase::getNewFRM(const QuickString &filename) {
return frm;
}
}
bool ContextBase::parseIoBufSize(QuickString bufStr)
{
char lastChar = bufStr[bufStr.size()-1];
int multiplier = 1;
if (!isdigit(lastChar)) {
switch (lastChar) {
case 'K':
multiplier = 1 << 10;
break;
case 'M':
multiplier = 1 << 20;
break;
case 'G':
multiplier = 1 << 30;
break;
default:
_errorMsg = "\n***** ERROR: Unrecognized memory buffer size suffix \'";
_errorMsg += lastChar;
_errorMsg += "\' given. *****";
return false;
break;
}
//lop off suffix character
bufStr.resize(bufStr.size()-1);
}
if (!isNumeric(bufStr)) {
_errorMsg = "\n***** ERROR: argument passed to -iobuf is not numeric. *****";
return false;
}
_ioBufSize = str2chrPos(bufStr) * multiplier;
if (_ioBufSize < MIN_ALLOWED_BUF_SIZE) {
_errorMsg = "\n***** ERROR: specified buffer size is too small. *****";
return false;
}
return true;
}
......@@ -176,6 +176,7 @@ protected:
bool _obeySplits;
bool _uncompressedBam;
bool _useBufferedOutput;
int _ioBufSize;
bool _anyHit;
bool _noHit;
......@@ -234,6 +235,8 @@ protected:
char **_argv;
int _i;
static const int MIN_ALLOWED_BUF_SIZE = 8;
virtual bool handle_bed();
virtual bool handle_fbam();
virtual bool handle_g();
......@@ -242,6 +245,8 @@ protected:
virtual bool handle_i();
virtual bool handle_n();
virtual bool handle_nobuf();
virtual bool handle_iobuf();
virtual bool handle_seed();
virtual bool handle_split();
virtual bool handle_sorted();
......@@ -251,6 +256,9 @@ protected:
virtual bool handle_o();
virtual bool handle_null();
virtual bool handle_delim();
bool parseIoBufSize(QuickString bufStr);
};
#endif /* CONTEXTBASE_H_ */
......@@ -17,7 +17,7 @@ BufferedStreamMgr::BufferedStreamMgr(const QuickString &filename)
_mainBufCurrStartPos(0),
_mainBufCurrLen(0),
_eof(false),
_useBufSize(0),
_useBufSize(DEFAULT_MAIN_BUF_READ_SIZE),
_streamFinished(false)
{
......@@ -51,8 +51,6 @@ bool BufferedStreamMgr::init()
}
if (_inputStreamMgr->isGzipped()) {
_useBufSize = GZIP_LINE_BUF_SIZE;
} else {
_useBufSize = MAIN_BUF_READ_SIZE;
}
size_t trueBufSize = max(_useBufSize, (int)_currScanBuffer.size());
......
......@@ -27,7 +27,8 @@ public:
bool eof() const { return _eof; }
bool getLine(QuickString &line);
BamTools::BamReader *getBamReader() { return _inputStreamMgr->getBamReader(); }
static const int DEFAULT_MAIN_BUF_READ_SIZE = 1023;
void setIoBufSize(int val) { _useBufSize = val; }
private:
InputStreamMgr *_inputStreamMgr;
typedef unsigned char bufType;
......@@ -42,8 +43,8 @@ private:
int _useBufSize;
bool _streamFinished;
QuickString _currScanBuffer;
//The minus ones in these constants are for leaving room for a null terminator after reading into buffers.
static const int MAIN_BUF_READ_SIZE = 1023; // 2 Kb minus 1
static const int GZIP_LINE_BUF_SIZE = 8191; // 8K
bool readFileChunk();
bool getTypeData();
......
......@@ -24,7 +24,8 @@ FileRecordMgr::FileRecordMgr(const QuickString &filename)
_blockMgr(NULL),
_bamReader(NULL),
_hasGenomeFile(false),
_genomeFile(NULL)
_genomeFile(NULL),
_ioBufSize(0)
{
}
......@@ -43,6 +44,7 @@ FileRecordMgr::~FileRecordMgr(){
bool FileRecordMgr::open(){
_bufStreamMgr = new BufferedStreamMgr(_filename);
if (_ioBufSize > 0) _bufStreamMgr->setIoBufSize(_ioBufSize);
if (!_bufStreamMgr->init()) {
cerr << "Error: unable to open file or unable to determine types for file " << _filename << endl;
delete _bufStreamMgr;
......
......@@ -100,6 +100,7 @@ public:
}
void setIsSorted(bool val) { _isSortedInput = val; }
void setIoBufSize(int val) { _ioBufSize = val; }
protected:
QuickString _filename;
......@@ -131,6 +132,7 @@ protected:
BamTools::BamReader *_bamReader;
bool _hasGenomeFile;
NewGenomeFile *_genomeFile;
int _ioBufSize;
void allocateFileReader();
void testInputSortOrder(Record *record);
......
......@@ -751,7 +751,7 @@ echo \
"chr1 11323785 11617177
chr1 12645605 13926923
chr1 14750216 15119039" >exp
~/mergeBugSpace/bt2-merge-debug/bin/bedtools intersect -a dosLineChar_a.bed -b dosLineCharWithExtraTab_b.bed -v > obs
$BT intersect -a dosLineChar_a.bed -b dosLineCharWithExtraTab_b.bed -v > obs
check exp obs
rm exp obs
......
......@@ -449,3 +449,72 @@ check exp obs
rm obs exp
###########################################################
#
# Test new -iobuf option
#
###########################################################
###########################################################
# Test -iobuf expects an argument
###########################################################
echo " merge.t37...\c"
echo "***** ERROR: -iobuf option given, but size of input buffer not specified. *****" >exp
$BT merge -i a.bed -iobuf 2>&1 > /dev/null | head -2 | tail -1 > obs
check obs exp
rm obs exp
###########################################################
# Test -iobuf allows only suffixes K/M/G
###########################################################
echo " merge.t38...\c"
echo \
"***** ERROR: Unrecognized memory buffer size suffix 'L' given. *****" > exp
$BT merge -i a.bed -iobuf 20L 2>&1 > /dev/null | head -2 | tail -1 > obs
check obs exp
rm obs exp
###########################################################
# Test -iobuf doesn't allow a buffer size below 8 bytes.
###########################################################
echo " merge.t39...\c"
echo \
"***** ERROR: specified buffer size is too small. *****" > exp
$BT merge -i a.bed -iobuf 7 2>&1 > /dev/null | head -2 | tail -1 > obs
check exp obs
rm exp obs
###########################################################
# Test -iobuf doesn't allow non-numeric arguments
###########################################################
echo " merge.t40...\c"
echo \
"***** ERROR: argument passed to -iobuf is not numeric. *****" > exp
$BT merge -i a.bed -iobuf beerM 2>&1 > /dev/null | head -2 | tail -1 > obs
check exp obs
rm exp obs
###########################################################
# Test -iobuf allows correct argument with suffix
###########################################################
echo " merge.t41...\c"
echo \
"chr1 10 20
chr1 30 100" > exp
$BT merge -i a.bed -iobuf 128M > obs
check exp obs
rm exp obs
###########################################################
# Test -iobuf allows correct argument without suffix
###########################################################
echo " merge.t42...\c"
echo \
"chr1 10 20
chr1 30 100" > exp
$BT merge -i a.bed -iobuf 8192 > obs
check exp obs
rm exp obs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment