Commit ca31d1f2 authored by Neil Kindlon's avatar Neil Kindlon
Browse files

Addressed >2 numbers in VCF SVLEN, can appear at endl w/ tab, semi-colon, newline or null

parent 5f0362f8
......@@ -139,6 +139,11 @@ bool SingleLineDelimTextFileReader::findDelimiters() {
}
int SingleLineDelimTextFileReader::getVcfSVlen() {
// The SVLEN field can appear anywhere in the info tag, and may be followed by a semi-colon, tab, newline, or end in NULL.
// it can contain one, two, or more numbers, which would be separated by a comma.
// We want the minimum number.
int startPos = _delimPositions[VCF_TAG_FIELD] +1;
const char *startPtr = strstr(_sLine.c_str() + startPos, "SVLEN=");
if (startPtr == NULL) {
......@@ -146,16 +151,30 @@ int SingleLineDelimTextFileReader::getVcfSVlen() {
return 1;
}
startPtr +=6; // length of label "SVLEN="
const char *endPtr = strchr(startPtr, ';');
const char *midPtr = strchr(startPtr, ',');
int endCoord = -1;
if (midPtr != NULL && midPtr < endPtr) {
//comma found in the number, that means there are two numbers
int num1 = str2chrPos(startPtr, midPtr - startPtr);
int num2 = str2chrPos(midPtr +1, endPtr - (midPtr +1));
endCoord = min(abs(num1), abs(num2));
} else {
endCoord = abs(str2chrPos(startPtr, endPtr - startPtr));
}
return endCoord;
const char *currPtr = startPtr;
const char *endPtr = _sLine.c_str() + _sLine.size();
int minVal = INT_MAX;
int currVal = 0;
QuickString currValStr;
while (1) {
if (currPtr == endPtr || *currPtr == ';' || *currPtr == '\t' || *currPtr == '\n' || *currPtr == ',') {
if (currPtr > startPtr) {
currValStr.assign(startPtr, currPtr - startPtr);
currVal = abs(str2chrPos(currValStr));
if (currVal < minVal) minVal = currVal;
startPtr = currPtr;
}
if (currPtr == endPtr || *currPtr != ',') {
//if end of line or non-comma delimiter, break.
break;
} else {
//skip the comma
startPtr++;
}
}
currPtr++;
};
return minVal;
}
##fileformat=VCFv4.1
chr1 1 a G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
chr1 4 a G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||;SVLEN=-45,800,2
\ No newline at end of file
......@@ -582,6 +582,35 @@ $BT intersect -a bug223_sv1_a.vcf -b bug223_sv1_b.vcf | cut -f1-6 > obs
check exp obs
rm exp obs
##################################################################
# see that SVLEN in VCF files can handle multiple numbers,
# at end of line, followed by NULL.
##################################################################
echo " intersect.t48...\c"
echo \
"chr1 1 a G <DEL> 70.90
chr1 1 a G <DEL> 70.90
chr1 4 a G <DEL> 70.90
chr1 4 a G <DEL> 70.90" > exp
$BT intersect -a bug223_d.vcf -b bug223_d.vcf | cut -f1-6 > obs
check exp obs
rm exp obs
##################################################################
# see that SVLEN in VCF files can handle multiple numbers,
# at end of line, followed by a tab
##################################################################
echo " intersect.t49...\c"
echo \
"chr1 1 a G <DEL> 70.90
chr1 1 a G <DEL> 70.90
chr1 4 a G <DEL> 70.90
chr1 4 a G <DEL> 70.90" > exp
$BT intersect -a bug223_e.vcf -b bug223_e.vcf | cut -f1-6 > obs
check exp obs
rm exp obs
cd multi_intersect
bash test-multi_intersect.sh
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment