diff --git a/src/utils/Contexts/ContextBase.cpp b/src/utils/Contexts/ContextBase.cpp index 6fa01c1b5fc0bf241404afe422da0ef0ec219109..1e426ad0367093be5d6c4798f58f53bf60bc3216 100644 --- a/src/utils/Contexts/ContextBase.cpp +++ b/src/utils/Contexts/ContextBase.cpp @@ -32,7 +32,7 @@ ContextBase::ContextBase() _writeOverlap(false), _writeAllOverlap(false), _haveFraction(false), - _overlapFraction(1E-9), + _overlapFraction(0.0), _reciprocal(false), _sameStrand(false), _diffStrand(false), diff --git a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp index 4beaccf2ace7a6d16c8a476ba73c09ad9f0ee9ae..9fe5011dda51fcdb5750656417ba37f0c2ef98c9 100644 --- a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp +++ b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp @@ -137,3 +137,20 @@ bool SingleLineDelimTextFileReader::findDelimiters() { } return true; } + +int SingleLineDelimTextFileReader::getVcfSVlen() { + int startPos = _delimPositions[VCF_TAG_FIELD] +1; + const char *startPtr = strstr(_sLine.c_str() + startPos, "SVLEN=") +6; + const char *endPtr = strchr(startPtr, ';'); + const char *midPtr = strchr(startPtr, ','); + int endCoord = -1; + if (midPtr != NULL && midPtr < endPtr) { + //comma found in the number, that means there are two numbers + int num1 = str2chrPos(startPtr, midPtr - startPtr); + int num2 = str2chrPos(midPtr +1, endPtr - (midPtr +1)); + endCoord = max(abs(num1), abs(num2)); + } else { + endCoord = abs(str2chrPos(startPtr, endPtr - startPtr)); + } + return endCoord; +} diff --git a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.h b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.h index cfb6241247ac7feddd2a856c36a52454cd7c0186..ac1a280e8c62fe182013b707f7eda0c73987822b 100644 --- a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.h +++ b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.h @@ -13,6 +13,10 @@ class SingleLineDelimTextFileReader : public FileReader { public: + //Allow VCF records to access a specialized private method. + //See end of class declaration for details. + friend class VcfRecord; + SingleLineDelimTextFileReader(int numFields, char delimChar = '\t'); ~SingleLineDelimTextFileReader(); @@ -24,6 +28,8 @@ public: virtual void appendField(int fieldNum, QuickString &str) const; virtual const QuickString &getHeader() const { return _header; } virtual bool hasHeader() const { return _fullHeaderFound; } + + protected: int _numFields; char _delimChar; @@ -38,6 +44,12 @@ protected: bool detectAndHandleHeader(); bool findDelimiters(); + //This is actually a very specialized function strictly for VCF + //records to read and process extra information about Structural Variants. + static const int VCF_TAG_FIELD = 7; + int getVcfSVlen(); + + }; diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp index 51023e30bd7963ca453af1908fcbe92b39955338..aa37276b6b75fee6a42b9751c3383ddf0ce5985c 100644 --- a/src/utils/FileRecordTools/Records/Record.cpp +++ b/src/utils/FileRecordTools/Records/Record.cpp @@ -160,15 +160,27 @@ bool Record::sameChromIntersects(const Record *record, int otherStart = record->getStartPos(); int otherEnd = record->getEndPos(); + bool otherZeroLen = (otherStart - otherEnd == 0); int maxStart = max(_startPos, otherStart); int minEnd = min(_endPos, otherEnd); + bool localZeroLen = (_endPos - _startPos == 0); //rule out all cases of no intersection at all if (minEnd < maxStart) { return false; } + if (overlapFraction == 0.0) { + //don't care about amount of overlap. + //however, if minEnd and maxStart are equal, and + //neither record is zeroLen, return false. + if (minEnd == maxStart && !otherZeroLen && !localZeroLen) { + return false; + } + return true; + } + int overlapBases = minEnd - maxStart; int len = _endPos - _startPos; int otherLen = otherEnd - otherStart; diff --git a/src/utils/FileRecordTools/Records/VcfRecord.cpp b/src/utils/FileRecordTools/Records/VcfRecord.cpp index b3ac4666a89fde61db2e1904512638734450ab2d..a66d880c94e3f64003bc1d16b81f81b15ef233b1 100644 --- a/src/utils/FileRecordTools/Records/VcfRecord.cpp +++ b/src/utils/FileRecordTools/Records/VcfRecord.cpp @@ -18,13 +18,17 @@ bool VcfRecord::initFromFile(SingleLineDelimTextFileReader *fileReader) _startPos--; // VCF is one-based. Here we intentionally don't decrement the string version, //because we'll still want to output the one-based number in the print methods, even though //internally we decrement the integer to comply with the 0-based format common to other records. - fileReader->getField(3, _varAlt); - //endPos is just the startPos plus the length of the variant - _endPos = _startPos + _varAlt.size(); + fileReader->getField(4, _varAlt); + fileReader->getField(3, _varRef); + if (_varAlt[0] == '<') { + //this is a structural variant. Need to parse the tags to find the endpos. + _endPos = _startPos + fileReader->getVcfSVlen(); + } else { + //endPos is just the startPos plus the length of the variant + _endPos = _startPos + _varRef.size(); + } int2str(_endPos, _endPosStr); - fileReader->getField(2, _name); - fileReader->getField(4, _varRef); fileReader->getField(5, _score); return initOtherFieldsFromFile(fileReader); @@ -33,8 +37,8 @@ bool VcfRecord::initFromFile(SingleLineDelimTextFileReader *fileReader) void VcfRecord::clear() { BedPlusInterval::clear(); - _varAlt.release(); _varRef.release(); + _varAlt.release(); } void VcfRecord::print(QuickString &outBuf) const { @@ -70,10 +74,10 @@ void VcfRecord::printOtherFields(QuickString &outBuf) const { outBuf.append('\t'); outBuf.append(_name); outBuf.append('\t'); - outBuf.append(_varAlt); - outBuf.append('\t'); outBuf.append(_varRef); outBuf.append('\t'); + outBuf.append(_varAlt); + outBuf.append('\t'); outBuf.append(_score); for (int i= 0; i < (int)_otherIdxs.size(); i++) { outBuf.append('\t'); @@ -98,10 +102,10 @@ const QuickString &VcfRecord::getField(int fieldNum) const case 3: return _name; case 4: - return _varAlt; + return _varRef; break; case 5: - return _varRef; + return _varAlt; break; case 6: return _score; diff --git a/src/utils/FileRecordTools/Records/VcfRecord.h b/src/utils/FileRecordTools/Records/VcfRecord.h index 72aafe5fef77d8e8e159085650d619a6c1ff9ce6..10229046650ae76d252d14ace6e0b5a4b0eb41b8 100644 --- a/src/utils/FileRecordTools/Records/VcfRecord.h +++ b/src/utils/FileRecordTools/Records/VcfRecord.h @@ -32,8 +32,8 @@ public: virtual const QuickString &getField(int fieldNum) const; protected: - QuickString _varAlt; QuickString _varRef; + QuickString _varAlt; void printOtherFields(QuickString &outBuf) const; }; diff --git a/src/utils/RecordOutputMgr/RecordOutputMgr.cpp b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp index 2c7dcd495365b4318f53b970220588aca51da103..fcde19077eaf84399131b03d74c475e2f7647377 100644 --- a/src/utils/RecordOutputMgr/RecordOutputMgr.cpp +++ b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp @@ -210,6 +210,7 @@ void RecordOutputMgr::printRecord(RecordKeyVector &keyList, RecordKeyVector *blo if ((static_cast<ContextIntersect *>(_context))->getWriteAllOverlap()) { // -wao the user wants to force the reporting of 0 overlap if (printKeyAndTerminate(keyList)) { + _currBamBlockList = NULL; return; } @@ -223,6 +224,7 @@ void RecordOutputMgr::printRecord(RecordKeyVector &keyList, RecordKeyVector *blo else if ((static_cast<ContextIntersect *>(_context))->getLeftJoin()) { if (printKeyAndTerminate(keyList)) { _currBamBlockList = NULL; + return; } tab(); @@ -230,11 +232,13 @@ void RecordOutputMgr::printRecord(RecordKeyVector &keyList, RecordKeyVector *blo newline(); if (needsFlush()) flush(); _currBamBlockList = NULL; + return; } } else { if (printBamRecord(keyList, true) == BAM_AS_BAM) { _currBamBlockList = NULL; + return; } int hitIdx = 0; @@ -252,14 +256,17 @@ void RecordOutputMgr::printRecord(RecordKeyVector &keyList, RecordKeyVector *blo newline(); } _currBamBlockList = NULL; + return; } else if (_context->getProgram() == ContextBase::MAP) { printKeyAndTerminate(keyList); _currBamBlockList = NULL; + return; } else if (_context->getProgram() == ContextBase::MERGE) { printKeyAndTerminate(keyList); _currBamBlockList = NULL; + return; } } @@ -380,6 +387,7 @@ void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record newline(); if (needsFlush()) flush(); } + const_cast<Record *>(keyRecord)->adjustZeroLength(); } void RecordOutputMgr::reportOverlapSummary(RecordKeyVector &keyList) diff --git a/src/utils/general/ParseTools.cpp b/src/utils/general/ParseTools.cpp index 04782a717630a0c0457df8cfa3b80427b0170002..e85065ad3a93d3671ca8da437ce824b09600b2fe 100644 --- a/src/utils/general/ParseTools.cpp +++ b/src/utils/general/ParseTools.cpp @@ -2,6 +2,9 @@ #include <climits> #include <cctype> #include <cstring> +#include <cstdio> +#include <cstdlib> +#include <sstream> //This functions recognizes only numbers with digits, plus sign, minus sign, decimal point, e, or E. Hexadecimal and pointers not currently supported. bool isNumeric(const QuickString &str) { @@ -19,12 +22,26 @@ int str2chrPos(const QuickString &str) { } int str2chrPos(const char *str, size_t ulen) { + if (ulen == 0) { ulen = strlen(str); } + + //first test for exponents / scientific notation + bool hasExponent = false; + for (size_t i=0; i < ulen; i++) { + if (str[i] == 'e' || str[i] == 'E') { + std::istringstream ss(str); + double retVal; + ss >> retVal; + return (int)retVal; + } + } + int len=(int)ulen; if (len < 1 || len > 10) { - return INT_MIN; //can't do more than 9 digits and a minus sign + fprintf(stderr, "***** ERROR: too many digits/characters for integer conversion in string %s. Exiting...\n", str); + exit(1); } register int sum=0; @@ -39,9 +56,15 @@ int str2chrPos(const char *str, size_t ulen) { for (int i=startPos; i < len; i++) { char currChar = str[i]; + if (currChar == 'e' || currChar == 'E') { + //default to atoi for scientific notation + return atoi(str); + } if (!isdigit(currChar)) { - return INT_MIN; + fprintf(stderr, "***** ERROR: illegal character '%c' found in integer conversion of string %s. Exiting...\n", currChar, str); + exit(1); } + int dig = currChar - 48; //ascii code for zero. int power = len -i -1; @@ -77,7 +100,7 @@ int str2chrPos(const char *str, size_t ulen) { sum += dig *1000000000; break; default: - return INT_MIN; + return 0; break; } } diff --git a/test/intersect/a_testZeroLen.bed b/test/intersect/a_testZeroLen.bed new file mode 100644 index 0000000000000000000000000000000000000000..0877a02764b3dc18e5bcf9a84ebb0b8d744aaf8a --- /dev/null +++ b/test/intersect/a_testZeroLen.bed @@ -0,0 +1,3 @@ +chr1 5 15 r1 +chr1 7 12 r3 +chr1 20 25 r2 diff --git a/test/intersect/a_vcfSVtest.vcf b/test/intersect/a_vcfSVtest.vcf new file mode 100644 index 0000000000000000000000000000000000000000..96857e47f295951de99757a9e345ef342fde7573 --- /dev/null +++ b/test/intersect/a_vcfSVtest.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.1 +19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene +19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| diff --git a/test/intersect/b_testZeroLen.bed b/test/intersect/b_testZeroLen.bed new file mode 100644 index 0000000000000000000000000000000000000000..4217235505760648c2357f39cca1c58ac15a68a3 --- /dev/null +++ b/test/intersect/b_testZeroLen.bed @@ -0,0 +1,3 @@ +chr1 9 9 m3 +chr1 50 150 m1 +chr2 20 25 m2 diff --git a/test/intersect/b_vcfSVtest.vcf b/test/intersect/b_vcfSVtest.vcf new file mode 100644 index 0000000000000000000000000000000000000000..46284d952307141df64250e9fe05072629d4cc5f --- /dev/null +++ b/test/intersect/b_vcfSVtest.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.1 +19 256900 791255 G T 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene +19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 266003 791258 A C 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| diff --git a/test/intersect/new_test-intersect.sh b/test/intersect/new_test-intersect.sh index 7efa95f1325fb6a1cb3ca6cd68963d7b2fd2dd7d..d289137c6040417382a1a9ee444ba7bdbb5dc150 100755 --- a/test/intersect/new_test-intersect.sh +++ b/test/intersect/new_test-intersect.sh @@ -764,3 +764,53 @@ $BT intersect -a oneRecordNoNewline.bed -b oneRecordNoNewline.bed > obs check obs exp rm obs exp +########################################################### +# Test zero length intersections in non-bam files. +############################################################ +echo " intersect.new.t65...\c" +echo \ +"chr1 5 15 r1 chr1 9 9 m3 0 +chr1 7 12 r3 chr1 9 9 m3 0" > exp +$BT intersect -a a_testZeroLen.bed -b b_testZeroLen.bed -wo > obs +check exp obs +rm exp obs + +########################################################### +# Test zero length intersections in non-bam files, -sorted +############################################################ +echo " intersect.new.t66...\c" +echo \ +"chr1 5 15 r1 chr1 9 9 m3 0 +chr1 7 12 r3 chr1 9 9 m3 0" > exp +$BT intersect -a a_testZeroLen.bed -b b_testZeroLen.bed -wo -sorted> obs +check exp obs +rm exp obs + +########################################################### +# Test vcf struct var intersection +############################################################ +echo " intersect.new.t67...\c" +echo \ +"19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 256900 791255 G T 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene 19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene +19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 266003 791258 A C 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||" > exp +$BT intersect -a a_vcfSVtest.vcf -b b_vcfSVtest.vcf -wa -wb >obs +check exp obs +rm exp obs + +########################################################### +# Test vcf struct var intersection, sorted +############################################################ +echo " intersect.new.t68...\c" +echo \ +"19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 256900 791255 G T 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene 19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene +19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 266003 791258 A C 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||" > exp +$BT intersect -a a_vcfSVtest.vcf -b b_vcfSVtest.vcf -wa -wb -sorted >obs +check exp obs +rm exp obs + diff --git a/test/merge/expFormat.bed b/test/merge/expFormat.bed new file mode 100644 index 0000000000000000000000000000000000000000..d7bd5182a5aa9ee9cb9fb3807fdd18f0b99872c4 --- /dev/null +++ b/test/merge/expFormat.bed @@ -0,0 +1 @@ +chr1 8e02 830 diff --git a/test/merge/test-merge.sh b/test/merge/test-merge.sh index 586940bf9617b369f6c9c3150f3e167bdbe15f24..6488991a0448ca4a6ae9f7520ed097540ec3f119 100644 --- a/test/merge/test-merge.sh +++ b/test/merge/test-merge.sh @@ -518,3 +518,27 @@ chr1 30 100" > exp $BT merge -i a.bed -iobuf 8192 > obs check exp obs rm exp obs + +########################################################### +# Test that scientific notation is allowed for coordinates +########################################################### +echo " merge.t43...\c" +echo \ +"chr1 800 830" > exp +$BT merge -i expFormat.bed > obs +check exp obs +rm obs exp + + +########################################################### +# Test that struct vars in VCF get correct length +########################################################### +echo " merge.t44...\c" +echo \ +"19 252805 257416 +19 260364 261044 +19 265133 265691 +19 265985 266386" > exp +$BT merge -i vcfSVtest.vcf > obs +check exp obs +rm obs exp diff --git a/test/merge/vcfSVtest.vcf b/test/merge/vcfSVtest.vcf new file mode 100644 index 0000000000000000000000000000000000000000..96857e47f295951de99757a9e345ef342fde7573 --- /dev/null +++ b/test/merge/vcfSVtest.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.1 +19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene +19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| +19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||