Commit 3b5afaf7 authored by Neil Kindlon's avatar Neil Kindlon
Browse files

added -prec option, default 10. Updated unit tests for merge, map, jaccard....

added -prec option, default 10. Updated unit tests for merge, map, jaccard. Accept stdin by default in merge.
parent 0292f311
......@@ -56,7 +56,8 @@ ContextBase::ContextBase()
_keyListOps(NULL),
_desiredStrand(FileRecordMergeMgr::ANY_STRAND),
_maxDistance(0),
_useMergedIntervals(false)
_useMergedIntervals(false),
_reportPrecision(-1)
{
_programNames["intersect"] = INTERSECT;
......@@ -127,10 +128,6 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
......@@ -171,6 +168,9 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else if (strcmp(_argv[_i], "-iobuf") == 0) {
if (!handle_iobuf()) return false;
}
else if (strcmp(_argv[_i], "-prec") == 0) {
if (!handle_prec()) return false;
}
else if (strcmp(_argv[_i], "-header") == 0) {
if (!handle_header()) return false;
}
......@@ -213,14 +213,18 @@ bool ContextBase::isValidState()
}
if (hasColumnOpsMethods()) {
//TBD: Adjust column ops for multiple databases.
//For now, use last file.
// FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0);
// TBD: Adjust column ops for multiple databases.
// For now, use last file.
FileRecordMgr *dbFile = getFile(getNumInputFiles()-1);
_keyListOps->setDBfileType(dbFile->getFileType());
if (!_keyListOps->isValidColumnOps(dbFile)) {
return false;
}
//if user specified a precision, pass it to
//keyList ops
if (_reportPrecision != -1) {
_keyListOps->setPrecision(_reportPrecision);
}
}
return true;
}
......@@ -460,10 +464,33 @@ bool ContextBase::handle_o()
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
return true;
return false;
}
bool ContextBase::handle_prec()
{
if (!hasColumnOpsMethods()) {
return false;
}
if ((_i+1) < _argc) {
int prec = atoi(_argv[_i + 1]);
if (prec < 1) {
_errorMsg += "\n***** ERROR: -prec must be followed by a positive integer. Exiting. *****";
return false;
}
_reportPrecision = prec;
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
_errorMsg += "\n***** ERROR: -prec must be followed by a positive integer. Exiting. *****";
return false;
}
// for col ops, -null is a NULL value assigned
// when no overlaps are detected.
......
......@@ -143,6 +143,7 @@ public:
virtual bool hasColumnOpsMethods() const { return _hasColumnOpsMethods; }
const QuickString &getColumnOpsVal(RecordKeyVector &keyList) const;
//methods applicable only to column operations.
int getReportPrecision() const { return _reportPrecision; }
protected:
PROGRAM_TYPE _program;
......@@ -214,6 +215,8 @@ protected:
int _maxDistance;
bool _useMergedIntervals;
int _reportPrecision; //used in fields reported from numeric ops from map and merge.
void markUsed(int i) { _argsProcessed[i] = true; }
bool isUsed(int i) const { return _argsProcessed[i]; }
......@@ -249,7 +252,7 @@ protected:
virtual bool handle_null();
virtual bool handle_delim();
virtual bool handle_sortout();
bool handle_prec();
bool parseIoBufSize(QuickString bufStr);
};
......
......@@ -32,10 +32,6 @@ bool ContextMerge::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
......@@ -79,12 +75,12 @@ bool ContextMerge::isValidState()
delete _keyListOps;
_keyListOps = NULL;
}
if (!ContextBase::isValidState()) {
return false;
//default to stdin
if (getNumInputFiles() == 0) {
addInputFile("-");
}
if (_files.size() != 1) {
_errorMsg = "\n***** ERROR: input file not specified. *****";
// Allow only one input file for now
if (!ContextBase::isValidState()) {
return false;
}
......
......@@ -7,6 +7,8 @@
#include "KeyListOps.h"
#include "FileRecordMgr.h"
#include <cmath> //for isnan
#include <sstream>
#include <iomanip>
KeyListOps::KeyListOps():
_dbFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE)
......@@ -60,6 +62,7 @@ _dbFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE)
_columns = "5";
// default to "sum"
_operations = "sum";
_precision = DEFAULT_PRECISION;
}
......@@ -172,7 +175,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -181,7 +184,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -190,7 +193,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -199,7 +202,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -208,7 +211,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -225,7 +228,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -234,7 +237,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -243,7 +246,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -252,7 +255,7 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
if (isnan(val)) {
_outVals.append(_methods.getNullValue());
} else {
_outVals.append(val);
_outVals.append(format(val));
}
break;
......@@ -317,6 +320,14 @@ const QuickString & KeyListOps::getOpVals(RecordKeyVector &hits)
return _outVals;
}
const QuickString &KeyListOps::format(double val)
{
std::stringstream strmBuf;
strmBuf << std::setprecision (_precision) << val;
_formatStr = strmBuf.str();
return _formatStr;
}
void KeyListOpsHelp() {
cerr << "\t-o\t" << "Specify the operation that should be applied to -c." << endl;
......
......@@ -49,6 +49,7 @@ public:
bool isValidColumnOps(FileRecordMgr *dbFile);
const QuickString &getOpVals(RecordKeyVector &hits);
void setPrecision(int val) { _precision = val; }
private:
void init();
......@@ -65,9 +66,14 @@ private:
colOpsType _colOps;
QuickString _outVals;
QuickString _formatStr;
int _precision;
static const int DEFAULT_PRECISION = 10;
OP_TYPES getOpCode(const QuickString &operation) const;
bool isNumericOp(OP_TYPES op) const;
bool isNumericOp(const QuickString &op) const;
const QuickString &format(double val);
};
......
......@@ -119,7 +119,7 @@ rm obs exp
echo " jaccard.t11...\c"
echo \
"intersection union-intersection jaccard n_intersections
120 290 0.413793 4" >exp
70 340 0.205882 3" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -s > obs
check obs exp
rm obs exp
......
......@@ -773,11 +773,11 @@ rm obs exp
############################################################
echo " map.t51...\c"
echo \
"chr1 0 100 12.9167
"chr1 0 100 12.91666667
chr1 100 200 0
chr2 0 100 .
chr2 100 200 .
chr3 0 100 76.2222
chr3 0 100 76.22222222
chr3 100 200 0.25" > exp
$BT map -a ivls.bed -b values4.bed -c 7 -o stddev > obs
check obs exp
......@@ -792,7 +792,7 @@ echo \
chr1 100 200 .
chr2 0 100 .
chr2 100 200 .
chr3 0 100 114.333
chr3 0 100 114.3333333
chr3 100 200 0.5" > exp
$BT map -a ivls.bed -b values4.bed -c 7 -o sample_stddev > obs
check obs exp
......@@ -804,7 +804,7 @@ rm obs exp
echo " map.t53...\c"
echo \
"chr1 10000 12000 2.5
chr1 15000 20000 11.4444" > exp
chr1 15000 20000 11.44444444" > exp
$BT map -a d.bed -b fullFields.bam -c 5 -o mean > obs
check exp obs
rm exp obs
......@@ -2,7 +2,7 @@ chr1 10003 10143 10024.5
chr1 10358 10428 10358
chr1 11780 11921 11800.5
chr1 11996 12101 11998.5
chr1 12136 12347 12213.2
chr1 12136 12347 12213.25
chr1 12400 12503 12401.5
chr1 12635 12779 12661
chr1 12819 12919 12819
......@@ -11,8 +11,8 @@ chr1 13165 13287 13176
chr1 13561 13731 13595
chr1 14025 14125 14025
chr1 14221 14340 14230.5
chr1 14440 14715 14553.7
chr1 14805 15172 14944.7
chr1 14440 14715 14553.66667
chr1 14805 15172 14944.71429
chr1 15250 15350 15250
chr1 15470 15603 15486.5
chr1 15924 16065 15944.5
......@@ -30,7 +30,7 @@ chr1 18264 18364 18264
chr1 18577 18742 18610
chr1 19658 19758 19658
chr1 19819 19966 19842.5
chr1 20059 20320 20143.3
chr1 20059 20320 20143.33333
chr1 20457 20627 20494
chr1 20641 20741 20641
chr1 20754 20854 20754
......@@ -47,7 +47,7 @@ chr1 23130 23228 23130
chr1 23250 23350 23250
chr1 23557 23615 23557
chr1 24000 24120 24010
chr1 24248 24612 24351.7
chr1 24248 24612 24351.66667
chr1 24683 24850 24711
chr1 24921 25011 24921
chr1 25055 25291 25128.5
......@@ -74,7 +74,7 @@ chr1 32519 32619 32519
chr1 32732 32832 32732
chr1 32926 33120 32973
chr1 33143 33289 33189.5
chr1 33449 33669 33502.3
chr1 33449 33669 33502.33333
chr1 33842 33931 33842
chr1 33933 34044 33938.5
chr1 34070 34162 34070
......@@ -99,7 +99,7 @@ chr1 44085 44185 44085
chr1 44304 44504 44354
chr1 44688 44788 44688
chr1 45372 45472 45372
chr1 45738 45867 45750.3
chr1 45738 45867 45750.33333
chr1 46111 46212 46111.5
chr1 47250 47346 47250
chr1 47593 47693 47593
......
......@@ -12,7 +12,7 @@ chr1 13561 13731 2
chr1 14025 14125 3
chr1 14221 14340 14.5
chr1 14440 14715 14.5
chr1 14805 15172 12.1429
chr1 14805 15172 12.14285714
chr1 15250 15350 21
chr1 15470 15603 3
chr1 15924 16065 12
......@@ -30,7 +30,7 @@ chr1 18264 18364 0
chr1 18577 18742 1.5
chr1 19658 19758 25
chr1 19819 19966 26
chr1 20059 20320 31.3333
chr1 20059 20320 31.33333333
chr1 20457 20627 37
chr1 20641 20741 1
chr1 20754 20854 1
......@@ -47,8 +47,8 @@ chr1 23130 23228 3
chr1 23250 23350 0
chr1 23557 23615 0
chr1 24000 24120 0.5
chr1 24248 24612 0.833333
chr1 24683 24850 0.666667
chr1 24248 24612 0.8333333333
chr1 24683 24850 0.6666666667
chr1 24921 25011 1
chr1 25055 25291 1
chr1 25403 25740 1.4
......@@ -56,7 +56,7 @@ chr1 25767 25867 3
chr1 26053 26153 1
chr1 26406 26506 1
chr1 26680 26883 0.75
chr1 27102 27252 0.666667
chr1 27102 27252 0.6666666667
chr1 27582 27785 1
chr1 27995 28187 1
chr1 28198 28298 13
......@@ -74,7 +74,7 @@ chr1 32519 32619 1
chr1 32732 32832 3
chr1 32926 33120 1
chr1 33143 33289 2
chr1 33449 33669 1.33333
chr1 33449 33669 1.333333333
chr1 33842 33931 1
chr1 33933 34044 1
chr1 34070 34162 1
......@@ -91,7 +91,7 @@ chr1 38283 38380 1
chr1 38609 38709 1
chr1 38980 39088 1
chr1 39099 39199 3
chr1 39418 39585 1.66667
chr1 39418 39585 1.666666667
chr1 39920 40020 1
chr1 43646 43746 3
chr1 43974 44074 3
......
chr1 10003 10143 10024.5
chr1 10358 10428 1.35524e+08
chr1 10358 10428 135524043
chr1 11780 11921 12189
chr1 11996 12101 12401.5
chr1 12136 12347 12225.8
chr1 12136 12347 12225.75
chr1 12400 12503 11998.5
chr1 12635 12779 12504.7
chr1 12635 12779 12504.66667
chr1 12819 12919 13187
chr1 13039 13139 12681
chr1 13165 13287 13190
chr1 13561 13731 13595
chr1 14025 14125 13629
chr1 14221 14340 14607.5
chr1 14440 14715 14683.3
chr1 14805 15172 14904.3
chr1 14440 14715 14683.33333
chr1 14805 15172 14904.28571
chr1 15250 15350 14837
chr1 15470 15603 15068.5
chr1 15924 16065 16326
......@@ -31,13 +31,13 @@ chr1 18577 18742 18208
chr1 19658 19758 20059
chr1 19819 19966 20224
chr1 20059 20320 20137.5
chr1 20457 20627 20117.7
chr1 20457 20627 20117.66667
chr1 20641 20741 21047
chr1 20754 20854 21177
chr1 21047 21139 20641
chr1 21177 21277 20754
chr1 21449 21549 21834
chr1 21834 22029 22086.2
chr1 21834 22029 22086.25
chr1 22061 22149 22512
chr1 22242 22448 22052.6
chr1 22512 22566 22061
......@@ -47,18 +47,18 @@ chr1 23130 23228 22748
chr1 23250 23350 22870
chr1 23557 23615 24000
chr1 24000 24120 23989
chr1 24248 24612 24499.3
chr1 24683 24850 24540.7
chr1 24248 24612 24499.33333
chr1 24683 24850 24540.66667
chr1 24921 25011 24512
chr1 25055 25291 25339.8
chr1 25055 25291 25339.75
chr1 25403 25740 25455.8
chr1 25767 25867 25403
chr1 26053 26153 25640
chr1 26406 26506 26791
chr1 26680 26883 26945.2
chr1 27102 27252 26710.3
chr1 26680 26883 26945.25
chr1 27102 27252 26710.33333
chr1 27582 27785 28041.5
chr1 27995 28187 27905.3
chr1 27995 28187 27905.33333
chr1 28198 28298 28577
chr1 28439 28545 28031
chr1 28577 28674 28198
......@@ -74,7 +74,7 @@ chr1 32519 32619 32926
chr1 32732 32832 33143
chr1 32926 33120 32984
chr1 33143 33289 33150.5
chr1 33449 33669 33396.3
chr1 33449 33669 33396.33333
chr1 33842 33931 34268
chr1 33933 34044 33920
chr1 34070 34162 34511
......@@ -91,7 +91,7 @@ chr1 38283 38380 37904
chr1 38609 38709 38980
chr1 38980 39088 39013.5
chr1 39099 39199 39488
chr1 39418 39585 39335.7
chr1 39418 39585 39335.66667
chr1 39920 40020 39528
chr1 43646 43746 44085
chr1 43974 44074 44404
......@@ -105,7 +105,7 @@ chr1 47250 47346 47593
chr1 47593 47693 47250
chr1 47820 47920 48088
chr1 48088 48193 48149
chr1 48445 48677 48630.7
chr1 48445 48677 48630.66667
chr1 48681 48781 49092
chr1 48832 48932 48445
chr1 48967 49067 48577
......
......@@ -4,7 +4,7 @@ chr1 11780 11921 488.5
chr1 11996 12101 503
chr1 12136 12347 1
chr1 12400 12503 -503
chr1 12635 12779 -174.333
chr1 12635 12779 -174.3333333
chr1 12819 12919 468
chr1 13039 13139 -458
chr1 13165 13287 14
......@@ -12,7 +12,7 @@ chr1 13561 13731 0
chr1 14025 14125 -496
chr1 14221 14340 474
chr1 14440 14715 164
chr1 14805 15172 -54.7143
chr1 14805 15172 -54.71428571
chr1 15250 15350 -513
chr1 15470 15603 -518
chr1 15924 16065 481.5
......@@ -30,8 +30,8 @@ chr1 18264 18364 478
chr1 18577 18742 -501.5
chr1 19658 19758 501
chr1 19819 19966 477.5
chr1 20059 20320 -6.83333
chr1 20457 20627 -471.667
chr1 20059 20320 -6.833333333
chr1 20457 20627 -471.6666667
chr1 20641 20741 498
chr1 20754 20854 523
chr1 21047 21139 -498
......@@ -47,8 +47,8 @@ chr1 23130 23228 -480
chr1 23250 23350 -480
chr1 23557 23615 503
chr1 24000 24120 -1
chr1 24248 24612 173.167
chr1 24683 24850 -191.333
chr1 24248 24612 173.1666667
chr1 24683 24850 -191.3333333
chr1 24921 25011 -499
chr1 25055 25291 261.25
chr1 25403 25740 -107
......@@ -56,9 +56,9 @@ chr1 25767 25867 -464
chr1 26053 26153 -513
chr1 26406 26506 477
chr1 26680 26883 266.75
chr1 27102 27252 -514.667
chr1 27102 27252 -514.6666667
chr1 27582 27785 503.75
chr1 27995 28187 -166.333
chr1 27995 28187 -166.3333333
chr1 28198 28298 476
chr1 28439 28545 -508.5
chr1 28577 28674 -476
......@@ -74,7 +74,7 @@ chr1 32519 32619 507
chr1 32732 32832 511
chr1 32926 33120 3
chr1 33143 33289 -39
chr1 33449 33669 -135.333
chr1 33449 33669 -135.3333333
chr1 33842 33931 526
chr1 33933 34044 -16.5
chr1 34070 34162 536
......
chr1 5333587 5344172 line1 0 - 5334680 5344172
chr1 5481008 5484749 line2 0 - 5481796 5484749
chr1 5481008 5484749 line3 0 - 5481796 5484749
chr1 5481008 5484749 line4 0 - 5481796 5484749
chr1 6763278 6766882 line5 0 - 7766544 6766882
......@@ -542,3 +542,43 @@ echo \
$BT merge -i vcfSVtest.vcf > obs
check exp obs
rm obs exp
###########################################################
# Test that stdin is used by default
###########################################################
echo " merge.t45...\c"
echo \
"chr1 10 20
chr1 30 100" >exp
cat a.bed | $BT merge > obs
check exp obs
rm obs exp
###########################################################
# Test that precision default is high enough for
# formatting not to give scientific notation
###########################################################
echo " merge.t46...\c"
echo \
"chr1 5333587 5344172 5344172
chr1 5481008 5484749 16454247
chr1 6763278 6766882 6766882" > exp
$BT merge -i precisionTest2.bed -c 8 -o sum> obs
check exp obs
rm obs exp
###########################################################
# Test that user can specify a lower precision
###########################################################
echo " merge.t47...\c"
echo \
"chr1 5333587 5344172 5.3442e+06
chr1 5481008 5484749 1.6454e+07
chr1 6763278 6766882 6.7669e+06" > exp
$BT merge -i precisionTest2.bed -c 8 -o sum -prec 5 > obs
check exp obs
rm obs exp
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment