Skip to content
Snippets Groups Projects
Commit 0b8612d3 authored by Neil Kindlon's avatar Neil Kindlon
Browse files

Read VCF struct vars length from SVLEN tag instead of using length of varRef field

parent 0ea1769d
No related branches found
No related tags found
No related merge requests found
...@@ -137,3 +137,20 @@ bool SingleLineDelimTextFileReader::findDelimiters() { ...@@ -137,3 +137,20 @@ bool SingleLineDelimTextFileReader::findDelimiters() {
} }
return true; return true;
} }
int SingleLineDelimTextFileReader::getVcfSVlen() {
int startPos = _delimPositions[VCF_TAG_FIELD] +1;
const char *startPtr = strstr(_sLine.c_str() + startPos, "SVLEN=") +6;
const char *endPtr = strchr(startPtr, ';');
const char *midPtr = strchr(startPtr, ',');
int endCoord = -1;
if (midPtr != NULL && midPtr < endPtr) {
//comma found in the number, that means there are two numbers
int num1 = str2chrPos(startPtr, midPtr - startPtr);
int num2 = str2chrPos(midPtr +1, endPtr - (midPtr +1));
endCoord = max(abs(num1), abs(num2));
} else {
endCoord = abs(str2chrPos(startPtr, endPtr - startPtr));
}
return endCoord;
}
...@@ -15,6 +15,10 @@ using namespace std; ...@@ -15,6 +15,10 @@ using namespace std;
class SingleLineDelimTextFileReader : public FileReader { class SingleLineDelimTextFileReader : public FileReader {
public: public:
//Allow VCF records to access a specialized private method.
//See end of class declaration for details.
friend class VcfRecord;
SingleLineDelimTextFileReader(int numFields, char delimChar = '\t'); SingleLineDelimTextFileReader(int numFields, char delimChar = '\t');
~SingleLineDelimTextFileReader(); ~SingleLineDelimTextFileReader();
...@@ -26,6 +30,8 @@ public: ...@@ -26,6 +30,8 @@ public:
virtual void appendField(int fieldNum, QuickString &str) const; virtual void appendField(int fieldNum, QuickString &str) const;
virtual const QuickString &getHeader() const { return _header; } virtual const QuickString &getHeader() const { return _header; }
virtual bool hasHeader() const { return _fullHeaderFound; } virtual bool hasHeader() const { return _fullHeaderFound; }
protected: protected:
int _numFields; int _numFields;
char _delimChar; char _delimChar;
...@@ -40,6 +46,12 @@ protected: ...@@ -40,6 +46,12 @@ protected:
bool detectAndHandleHeader(); bool detectAndHandleHeader();
bool findDelimiters(); bool findDelimiters();
//This is actually a very specialized function strictly for VCF
//records to read and process extra information about Structural Variants.
static const int VCF_TAG_FIELD = 7;
int getVcfSVlen();
}; };
......
...@@ -18,13 +18,17 @@ bool VcfRecord::initFromFile(SingleLineDelimTextFileReader *fileReader) ...@@ -18,13 +18,17 @@ bool VcfRecord::initFromFile(SingleLineDelimTextFileReader *fileReader)
_startPos--; // VCF is one-based. Here we intentionally don't decrement the string version, _startPos--; // VCF is one-based. Here we intentionally don't decrement the string version,
//because we'll still want to output the one-based number in the print methods, even though //because we'll still want to output the one-based number in the print methods, even though
//internally we decrement the integer to comply with the 0-based format common to other records. //internally we decrement the integer to comply with the 0-based format common to other records.
fileReader->getField(4, _varRef);
fileReader->getField(3, _varAlt); fileReader->getField(3, _varAlt);
//endPos is just the startPos plus the length of the variant if (_varRef[0] == '<') {
_endPos = _startPos + _varAlt.size(); //this is a structural variant. Need to parse the tags to find the endpos.
_endPos = _startPos + fileReader->getVcfSVlen();
} else {
//endPos is just the startPos plus the length of the variant
_endPos = _startPos + _varAlt.size();
}
int2str(_endPos, _endPosStr); int2str(_endPos, _endPosStr);
fileReader->getField(2, _name); fileReader->getField(2, _name);
fileReader->getField(4, _varRef);
fileReader->getField(5, _score); fileReader->getField(5, _score);
return initOtherFieldsFromFile(fileReader); return initOtherFieldsFromFile(fileReader);
......
...@@ -528,3 +528,17 @@ echo \ ...@@ -528,3 +528,17 @@ echo \
$BT merge -i expFormat.bed > obs $BT merge -i expFormat.bed > obs
check exp obs check exp obs
rm obs exp rm obs exp
###########################################################
# Test that struct vars in VCF get correct length
###########################################################
echo " merge.t44...\c"
echo \
"19 252805 257416
19 260364 261044
19 265133 265691
19 265985 266386" > exp
$BT merge -i vcfSVtest.vcf > obs
check exp obs
rm obs exp
##fileformat=VCFv4.1
19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene
19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment