diff --git a/src/jaccard/jaccardMain.cpp b/src/jaccard/jaccardMain.cpp index 4e15ffbecd2d0bb8d6fc5fff790e0760fd02f56e..81a626236251d41df29986b39a64959322b8da0f 100644 --- a/src/jaccard/jaccardMain.cpp +++ b/src/jaccard/jaccardMain.cpp @@ -64,6 +64,15 @@ void jaccard_help(void) { cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl; + cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl; + cerr << "\t\tthat are on the same strand." << endl; + cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; + + cerr << "\t-S\t" << "Force merge for one specific strand only." << endl; + cerr << "\t\t" << "Follow with + or - to force merge from only" << endl; + cerr << "\t\t" << "the forward or reverse strand, respectively." << endl; + cerr << "\t\t" << "- By default, merging is done without respect to strand." << endl << endl; + cerr << "Notes: " << endl; cerr << "\t(1) Input files must be sorted by chrom, then start position." << endl << endl; diff --git a/src/mergeFile/mergeMain.cpp b/src/mergeFile/mergeMain.cpp index 2e74b894ab0de8aa9185945d71e05e4f2e8492de..cf6804e08211b47ffe86fdd42f0ef5a31c378264 100644 --- a/src/mergeFile/mergeMain.cpp +++ b/src/mergeFile/mergeMain.cpp @@ -53,13 +53,13 @@ void merge_help(void) { cerr << "Options: " << endl; cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl; - cerr << "\t\tthat are the same strand." << endl; + cerr << "\t\tthat are on the same strand." << endl; cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; - cerr << "\t-S\t" << "Force mergeing for a _specific_ strand. That is, only merge features" << endl; - cerr << "\t\tthat from a specific strabd." << endl; - cerr << "\t\t- For example, -S + will or -S -" << endl; - cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; + cerr << "\t-S\t" << "Force merge for one specific strand only." << endl; + cerr << "\t\t" << "Follow with + or - to force merge from only" << endl; + cerr << "\t\t" << "the forward or reverse strand, respectively." << endl; + cerr << "\t\t" << "- By default, merging is done without respect to strand." << endl << endl; cerr << "\t-d\t" << "Maximum distance between features allowed for features" << endl; cerr << "\t\tto be merged." << endl; cerr << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl; cerr << "\t\t- (INTEGER)" << endl; diff --git a/src/utils/Contexts/ContextJaccard.cpp b/src/utils/Contexts/ContextJaccard.cpp index 7dcdda51bbf1bad3f0cce36b0131ee9ea798f80e..24eafbbd2ea0ff1ac5623fdc8463705be97a2593 100644 --- a/src/utils/Contexts/ContextJaccard.cpp +++ b/src/utils/Contexts/ContextJaccard.cpp @@ -16,3 +16,94 @@ ContextJaccard::ContextJaccard() { ContextJaccard::~ContextJaccard() { } + +bool ContextJaccard::parseCmdArgs(int argc, char **argv, int skipFirstArgs) +{ + _argc = argc; + _argv = argv; + _skipFirstArgs = skipFirstArgs; + if (_argc < 2) { + setShowHelp(true); + return false; + } + + setProgram(_programNames[argv[0]]); + + _argsProcessed.resize(_argc - _skipFirstArgs, false); + + for (_i=_skipFirstArgs; _i < argc; _i++) { + if (isUsed(_i - _skipFirstArgs)) { + continue; + } + else if (strcmp(_argv[_i], "-s") == 0) { + if (!handle_s()) return false; + } + else if (strcmp(_argv[_i], "-S") == 0) { + if (!handle_S()) return false; + } + } + return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs); +} + +bool ContextJaccard::isValidState() +{ + if (!ContextIntersect::isValidState()) { + return false; + } + // Tests for stranded merge + // + if (_desiredStrand != FileRecordMergeMgr::ANY_STRAND) { // requested stranded merge + for (int i=0; i < getNumInputFiles(); i++) { + // make sure file has strand. + if (!getFile(i)->recordsHaveStrand()) { + _errorMsg = "\n***** ERROR: stranded merge requested, but input file "; + _errorMsg += getInputFileName(i); + _errorMsg += " does not have strands. *****"; + return false; + } + //make sure file is not VCF. + if (getFile(1)->getFileType() == FileRecordTypeChecker::VCF_FILE_TYPE) { + _errorMsg = "\n***** ERROR: stranded merge not supported for VCF file "; + _errorMsg += getInputFileName(i); + _errorMsg += ". *****"; + return false; + } + } + } + //column operations not allowed with BAM input + if (hasColumnOpsMethods() && + getFile(0)->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { + _errorMsg = "\n***** ERROR: stranded merge not supported for VCF files. *****"; + return false; + } + return true; +} + +bool ContextJaccard::handle_s() { + setSameStrand(true); + _desiredStrand = FileRecordMergeMgr::SAME_STRAND_EITHER; + markUsed(_i - _skipFirstArgs); + return true; +} + +bool ContextJaccard::handle_S() { + if ((_i+1) < _argc) { + bool validChar = false; + if (_argv[_i+1][0] == '+') { + _desiredStrand = FileRecordMergeMgr::SAME_STRAND_FORWARD; + validChar = true; + } else if (_argv[_i+1][0] == '-') { + validChar = true; + _desiredStrand = FileRecordMergeMgr::SAME_STRAND_REVERSE; + } + if (validChar) { + markUsed(_i - _skipFirstArgs); + _i++; + markUsed(_i - _skipFirstArgs); + setSameStrand(true); + return true; + } + } + _errorMsg = "\n***** ERROR: -S option must be followed by + or -. *****"; + return false; +} diff --git a/src/utils/Contexts/ContextJaccard.h b/src/utils/Contexts/ContextJaccard.h index 673065dfa84a3e7c54423ac2f92c0b8bd56255db..4cc1671c03373ee3ffe16b43edbade4df03fa912 100644 --- a/src/utils/Contexts/ContextJaccard.h +++ b/src/utils/Contexts/ContextJaccard.h @@ -14,10 +14,12 @@ class ContextJaccard : public ContextIntersect { public: ContextJaccard(); ~ContextJaccard(); + virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs); + virtual bool isValidState(); private: - - + bool handle_s(); + bool handle_S(); }; #endif /* CONTEXTJACCARD_H_ */ diff --git a/test/jaccard/aMixedStrands.bed b/test/jaccard/aMixedStrands.bed new file mode 100644 index 0000000000000000000000000000000000000000..b636fcfd87cb667e3affb42f87d932f432e69e30 --- /dev/null +++ b/test/jaccard/aMixedStrands.bed @@ -0,0 +1,12 @@ +chr1 10 50 a1f 2 + +chr1 20 60 b1r 4 - +chr1 25 70 c1q 8 . +chr1 30 75 d1q 16 . +chr1 40 80 e1f 32 + +chr1 45 90 f1r 64 - +chr2 10 50 a2q 2 . +chr2 20 40 b2f 4 + +chr2 25 50 c2r 8 - +chr2 30 60 d2f 16 + +chr2 35 65 e2q 32 . +chr2 39 80 f2r 64 - \ No newline at end of file diff --git a/test/jaccard/bMixedStrands.bed b/test/jaccard/bMixedStrands.bed new file mode 100644 index 0000000000000000000000000000000000000000..a4ab27fe140534f7bcda6b70bb1469c0dbec20c0 --- /dev/null +++ b/test/jaccard/bMixedStrands.bed @@ -0,0 +1,6 @@ +chr1 10 50 2a1r 2 - +chr1 40 70 2b1q 4 . +chr1 60 100 2c1f 8 + +chr2 15 40 2d2f 16 + +chr2 30 100 2e2r 32 - + diff --git a/test/jaccard/test-jaccard.sh b/test/jaccard/test-jaccard.sh index f979147bde2959de3ea0a8f4b2842d984ab3aa3a..b8d626bd4b50e262dd541ddfc5c90060d5d96296 100644 --- a/test/jaccard/test-jaccard.sh +++ b/test/jaccard/test-jaccard.sh @@ -100,3 +100,50 @@ echo \ $BT jaccard -a a.bam -b three_blocks_match.bam > obs check exp obs rm exp obs + +########################################################### +# Test jaccard with mixed strand files +########################################################### +echo " jaccard.t10...\c" +echo \ +"intersection union-intersection jaccard n_intersections +145 180 0.805556 2" >exp +$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed > obs +check obs exp +rm obs exp + +########################################################### +# Test jaccard with mixed strand files, -s option +# (match strand, either forward or reverse) +########################################################### +echo " jaccard.t11...\c" +echo \ +"intersection union-intersection jaccard n_intersections +120 290 0.413793 4" >exp +$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -s > obs +check obs exp +rm obs exp + +########################################################### +# Test jaccard with mixed strand files, -S + option +# (match strand, forward only) +########################################################### +echo " jaccard.t12...\c" +echo \ +"intersection union-intersection jaccard n_intersections +40 135 0.296296 2" >exp +$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -S + > obs +check obs exp +rm obs exp + +########################################################### +# Test jaccard with mixed strand files, -S - option +# (match strand, reverse only) +########################################################### +echo " jaccard.t13...\c" +echo \ +"intersection union-intersection jaccard n_intersections +80 155 0.516129 2" > exp +$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -S - > obs +check obs exp +rm obs exp diff --git a/test/jaccard/test-jaccard.sh~ b/test/jaccard/test-jaccard.sh~ deleted file mode 100644 index 9a6526b35ad802f01a58c338e19bc02506e6ab2f..0000000000000000000000000000000000000000 --- a/test/jaccard/test-jaccard.sh~ +++ /dev/null @@ -1,94 +0,0 @@ -BT=${BT-../../bin/bedtools} - -check() -{ - if diff $1 $2; then - echo ok - else - echo fail - fi -} - -########################################################### -# Test a basic self intersection -########################################################### -echo " jaccard.t01...\c" -echo \ -"intersection union-intersection jaccard n_intersections -110 110 1 2" > exp -$BT jaccard -a a.bed -b a.bed > obs -check obs exp -rm obs exp - - -echo " jaccard.t02...\c" -echo \ -"intersection union-intersection jaccard n_intersections -10 140 0.0714286 1" > exp -$BT jaccard -a a.bed -b b.bed > obs -check obs exp -rm obs exp - -echo " jaccard.t03...\c" -echo \ -"intersection union-intersection jaccard n_intersections -10 200 0.05 1" > exp -$BT jaccard -a a.bed -b c.bed > obs -check obs exp -rm obs exp - - -echo " jaccard.t04...\c" -echo \ -"intersection union-intersection jaccard n_intersections -0 210 0 0" > exp -$BT jaccard -a a.bed -b d.bed > obs -check obs exp -rm obs exp - -########################################################### -# Test stdin -########################################################### -echo " jaccard.t05...\c" -echo \ -"intersection union-intersection jaccard n_intersections -10 140 0.0714286 1" > exp -cat a.bed | $BT jaccard -a - -b b.bed > obs -check obs exp -rm obs exp - - -########################################################### -# Test symmetry -########################################################### -echo " jaccard.t06...\c" -$BT jaccard -a a.bed -b b.bed > obs1 -$BT jaccard -a b.bed -b a.bed > obs2 -check obs1 obs2 -rm obs1 obs2 - -########################################################### -# Test partially matching blocks without -split option. -########################################################### -echo " jaccard.t07...\c" -echo \ -"intersection union-intersection jaccard n_intersections -10 50 0.2 1" > exp -$BT jaccard -a three_blocks_match.bed -b e.bed > obs -check obs exp -rm obs exp - - -########################################################### -# Test partially matching blocks with -split option. -########################################################### -echo " jaccard.t08...\c" -echo \ -"intersection union-intersection jaccard n_intersections -5 55 0.0909091 1" > exp -$BT jaccard -a three_blocks_match.bed -b e.bed -split > obs -check obs exp -rm obs exp - -intersection union-intersection jaccard n_intersections -10 150 0.0666667 1