Skip to content
Snippets Groups Projects
Commit 656fd843 authored by Aaron Quinlan's avatar Aaron Quinlan
Browse files

Merge pull request #68 from nkindlon/master

Added stranded info to jaccard and merge help, fixed stranded jaccard bu...
parents aece1e8f 39fffc97
No related branches found
No related tags found
No related merge requests found
...@@ -64,6 +64,15 @@ void jaccard_help(void) { ...@@ -64,6 +64,15 @@ void jaccard_help(void) {
cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl; cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl;
cerr << "\t\tthat are on the same strand." << endl;
cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-S\t" << "Force merge for one specific strand only." << endl;
cerr << "\t\t" << "Follow with + or - to force merge from only" << endl;
cerr << "\t\t" << "the forward or reverse strand, respectively." << endl;
cerr << "\t\t" << "- By default, merging is done without respect to strand." << endl << endl;
cerr << "Notes: " << endl; cerr << "Notes: " << endl;
cerr << "\t(1) Input files must be sorted by chrom, then start position." cerr << "\t(1) Input files must be sorted by chrom, then start position."
<< endl << endl; << endl << endl;
......
...@@ -53,13 +53,13 @@ void merge_help(void) { ...@@ -53,13 +53,13 @@ void merge_help(void) {
cerr << "Options: " << endl; cerr << "Options: " << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl; cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl;
cerr << "\t\tthat are the same strand." << endl; cerr << "\t\tthat are on the same strand." << endl;
cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-S\t" << "Force mergeing for a _specific_ strand. That is, only merge features" << endl; cerr << "\t-S\t" << "Force merge for one specific strand only." << endl;
cerr << "\t\tthat from a specific strabd." << endl; cerr << "\t\t" << "Follow with + or - to force merge from only" << endl;
cerr << "\t\t- For example, -S + will or -S -" << endl; cerr << "\t\t" << "the forward or reverse strand, respectively." << endl;
cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; cerr << "\t\t" << "- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-d\t" << "Maximum distance between features allowed for features" << endl; cerr << "\t\tto be merged." << endl; cerr << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl; cerr << "\t-d\t" << "Maximum distance between features allowed for features" << endl; cerr << "\t\tto be merged." << endl; cerr << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl;
cerr << "\t\t- (INTEGER)" << endl; cerr << "\t\t- (INTEGER)" << endl;
......
...@@ -16,3 +16,94 @@ ContextJaccard::ContextJaccard() { ...@@ -16,3 +16,94 @@ ContextJaccard::ContextJaccard() {
ContextJaccard::~ContextJaccard() { ContextJaccard::~ContextJaccard() {
} }
bool ContextJaccard::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
{
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
_argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
}
else if (strcmp(_argv[_i], "-s") == 0) {
if (!handle_s()) return false;
}
else if (strcmp(_argv[_i], "-S") == 0) {
if (!handle_S()) return false;
}
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
bool ContextJaccard::isValidState()
{
if (!ContextIntersect::isValidState()) {
return false;
}
// Tests for stranded merge
//
if (_desiredStrand != FileRecordMergeMgr::ANY_STRAND) { // requested stranded merge
for (int i=0; i < getNumInputFiles(); i++) {
// make sure file has strand.
if (!getFile(i)->recordsHaveStrand()) {
_errorMsg = "\n***** ERROR: stranded merge requested, but input file ";
_errorMsg += getInputFileName(i);
_errorMsg += " does not have strands. *****";
return false;
}
//make sure file is not VCF.
if (getFile(1)->getFileType() == FileRecordTypeChecker::VCF_FILE_TYPE) {
_errorMsg = "\n***** ERROR: stranded merge not supported for VCF file ";
_errorMsg += getInputFileName(i);
_errorMsg += ". *****";
return false;
}
}
}
//column operations not allowed with BAM input
if (hasColumnOpsMethods() &&
getFile(0)->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
_errorMsg = "\n***** ERROR: stranded merge not supported for VCF files. *****";
return false;
}
return true;
}
bool ContextJaccard::handle_s() {
setSameStrand(true);
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_EITHER;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextJaccard::handle_S() {
if ((_i+1) < _argc) {
bool validChar = false;
if (_argv[_i+1][0] == '+') {
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_FORWARD;
validChar = true;
} else if (_argv[_i+1][0] == '-') {
validChar = true;
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_REVERSE;
}
if (validChar) {
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
setSameStrand(true);
return true;
}
}
_errorMsg = "\n***** ERROR: -S option must be followed by + or -. *****";
return false;
}
...@@ -14,10 +14,12 @@ class ContextJaccard : public ContextIntersect { ...@@ -14,10 +14,12 @@ class ContextJaccard : public ContextIntersect {
public: public:
ContextJaccard(); ContextJaccard();
~ContextJaccard(); ~ContextJaccard();
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
virtual bool isValidState();
private: private:
bool handle_s();
bool handle_S();
}; };
#endif /* CONTEXTJACCARD_H_ */ #endif /* CONTEXTJACCARD_H_ */
chr1 10 50 a1f 2 +
chr1 20 60 b1r 4 -
chr1 25 70 c1q 8 .
chr1 30 75 d1q 16 .
chr1 40 80 e1f 32 +
chr1 45 90 f1r 64 -
chr2 10 50 a2q 2 .
chr2 20 40 b2f 4 +
chr2 25 50 c2r 8 -
chr2 30 60 d2f 16 +
chr2 35 65 e2q 32 .
chr2 39 80 f2r 64 -
\ No newline at end of file
chr1 10 50 2a1r 2 -
chr1 40 70 2b1q 4 .
chr1 60 100 2c1f 8 +
chr2 15 40 2d2f 16 +
chr2 30 100 2e2r 32 -
...@@ -100,3 +100,50 @@ echo \ ...@@ -100,3 +100,50 @@ echo \
$BT jaccard -a a.bam -b three_blocks_match.bam > obs $BT jaccard -a a.bam -b three_blocks_match.bam > obs
check exp obs check exp obs
rm exp obs rm exp obs
###########################################################
# Test jaccard with mixed strand files
###########################################################
echo " jaccard.t10...\c"
echo \
"intersection union-intersection jaccard n_intersections
145 180 0.805556 2" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed > obs
check obs exp
rm obs exp
###########################################################
# Test jaccard with mixed strand files, -s option
# (match strand, either forward or reverse)
###########################################################
echo " jaccard.t11...\c"
echo \
"intersection union-intersection jaccard n_intersections
120 290 0.413793 4" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -s > obs
check obs exp
rm obs exp
###########################################################
# Test jaccard with mixed strand files, -S + option
# (match strand, forward only)
###########################################################
echo " jaccard.t12...\c"
echo \
"intersection union-intersection jaccard n_intersections
40 135 0.296296 2" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -S + > obs
check obs exp
rm obs exp
###########################################################
# Test jaccard with mixed strand files, -S - option
# (match strand, reverse only)
###########################################################
echo " jaccard.t13...\c"
echo \
"intersection union-intersection jaccard n_intersections
80 155 0.516129 2" > exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -S - > obs
check obs exp
rm obs exp
BT=${BT-../../bin/bedtools}
check()
{
if diff $1 $2; then
echo ok
else
echo fail
fi
}
###########################################################
# Test a basic self intersection
###########################################################
echo " jaccard.t01...\c"
echo \
"intersection union-intersection jaccard n_intersections
110 110 1 2" > exp
$BT jaccard -a a.bed -b a.bed > obs
check obs exp
rm obs exp
echo " jaccard.t02...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 140 0.0714286 1" > exp
$BT jaccard -a a.bed -b b.bed > obs
check obs exp
rm obs exp
echo " jaccard.t03...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 200 0.05 1" > exp
$BT jaccard -a a.bed -b c.bed > obs
check obs exp
rm obs exp
echo " jaccard.t04...\c"
echo \
"intersection union-intersection jaccard n_intersections
0 210 0 0" > exp
$BT jaccard -a a.bed -b d.bed > obs
check obs exp
rm obs exp
###########################################################
# Test stdin
###########################################################
echo " jaccard.t05...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 140 0.0714286 1" > exp
cat a.bed | $BT jaccard -a - -b b.bed > obs
check obs exp
rm obs exp
###########################################################
# Test symmetry
###########################################################
echo " jaccard.t06...\c"
$BT jaccard -a a.bed -b b.bed > obs1
$BT jaccard -a b.bed -b a.bed > obs2
check obs1 obs2
rm obs1 obs2
###########################################################
# Test partially matching blocks without -split option.
###########################################################
echo " jaccard.t07...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 50 0.2 1" > exp
$BT jaccard -a three_blocks_match.bed -b e.bed > obs
check obs exp
rm obs exp
###########################################################
# Test partially matching blocks with -split option.
###########################################################
echo " jaccard.t08...\c"
echo \
"intersection union-intersection jaccard n_intersections
5 55 0.0909091 1" > exp
$BT jaccard -a three_blocks_match.bed -b e.bed -split > obs
check obs exp
rm obs exp
intersection union-intersection jaccard n_intersections
10 150 0.0666667 1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment