Commit 43006c92 authored by Neil Kindlon's avatar Neil Kindlon
Browse files

Fixed BedPlus bug, now only requires first 3 fields instead of 6

parent 5b4e0f77
......@@ -14,7 +14,7 @@ BedPlusInterval::~BedPlusInterval()
}
const BedPlusInterval &BedPlusInterval::operator=(const BedPlusInterval &other) {
Bed6Interval::operator=(other);
Bed3Interval::operator=(other);
int otherSize = other._otherIdxs.size();
int mySize = _otherIdxs.size();
......@@ -40,7 +40,7 @@ const BedPlusInterval &BedPlusInterval::operator=(const BedPlusInterval &other)
bool BedPlusInterval::initFromFile(SingleLineDelimTextFileReader *fileReader)
{
return (Bed6Interval::initFromFile(fileReader) && initOtherFieldsFromFile(fileReader));
return (Bed3Interval::initFromFile(fileReader) && initOtherFieldsFromFile(fileReader));
}
bool BedPlusInterval::initOtherFieldsFromFile(SingleLineDelimTextFileReader *fileReader)
......@@ -63,7 +63,7 @@ bool BedPlusInterval::initOtherFieldsFromFile(SingleLineDelimTextFileReader *fil
}
void BedPlusInterval::clear() {
Bed6Interval::clear();
Bed3Interval::clear();
_numPrintFields = 0;
for (int i=0; i < (int)_otherIdxs.size(); i++) {
_otherIdxs[i]->clear();
......@@ -72,36 +72,26 @@ void BedPlusInterval::clear() {
void BedPlusInterval::print(QuickString &outBuf) const
{
Bed6Interval::print(outBuf);
for (int i=0; i < (int)_otherIdxs.size(); i++) {
outBuf.append('\t');
outBuf.append(*(_otherIdxs[i]));
}
Bed3Interval::print(outBuf);
printOtherFields(outBuf);
}
void BedPlusInterval::print(QuickString &outBuf, int start, int end) const
{
Bed6Interval::print(outBuf, start, end);
for (int i=0; i < (int)_otherIdxs.size(); i++) {
outBuf.append('\t');
outBuf.append(*(_otherIdxs[i]));
}
Bed3Interval::print(outBuf, start, end);
printOtherFields(outBuf);
}
void BedPlusInterval::print(QuickString &outBuf, const QuickString & start, const QuickString & end) const
{
Bed6Interval::print(outBuf, start, end);
for (int i=0; i < (int)_otherIdxs.size(); i++) {
outBuf.append('\t');
outBuf.append(*(_otherIdxs[i]));
}
Bed3Interval::print(outBuf, start, end);
printOtherFields(outBuf);
}
void BedPlusInterval::printNull(QuickString &outBuf) const
{
Bed6Interval::printNull(outBuf);
Bed3Interval::printNull(outBuf);
for (int i=startOtherIdx; i < _numPrintFields; i++) {
outBuf.append("\t.");
}
......@@ -109,13 +99,13 @@ void BedPlusInterval::printNull(QuickString &outBuf) const
const QuickString &BedPlusInterval::getField(int fieldNum) const
{
//a request for any of the first six fields will retrieve
//chrom, start, end, name, score, and strand, in that order.
//A request for field 6+ will go to the otherIdxs.
//a request for any of the first three fields will retrieve
//chrom, start, end, in that order.
//A request for field 3+ will go to the otherIdxs.
if (fieldNum > startOtherIdx && fieldNum <= startOtherIdx + (int)_otherIdxs.size()) {
return (*(_otherIdxs[fieldNum - startOtherIdx - 1]));
}
return Bed6Interval::getField(fieldNum);
return Bed3Interval::getField(fieldNum);
}
bool BedPlusInterval::isNumericField(int fieldNum) {
......@@ -128,7 +118,14 @@ bool BedPlusInterval::isNumericField(int fieldNum) {
if (fieldNum > startOtherIdx) {
return true;
} else {
return Bed6Interval::isNumericField(fieldNum);
return Bed3Interval::isNumericField(fieldNum);
}
}
void BedPlusInterval::printOtherFields(QuickString &outBuf) const {
for (int i=0; i < (int)_otherIdxs.size(); i++) {
outBuf.append('\t');
outBuf.append(*(_otherIdxs[i]));
}
}
......@@ -8,12 +8,12 @@
#ifndef BEDPLUSINTERVAL_H_
#define BEDPLUSINTERVAL_H_
#include "Bed6Interval.h"
#include "Bed3Interval.h"
#include <vector>
class SingleLineDelimTextFileReader;
class BedPlusInterval : public Bed6Interval {
class BedPlusInterval : public Bed3Interval {
public:
friend class FreeList<BedPlusInterval>;
......@@ -42,13 +42,13 @@ public:
protected:
virtual ~BedPlusInterval();
bool initOtherFieldsFromFile(SingleLineDelimTextFileReader *fileReader);
vector<QuickString *> _otherIdxs;
static const int startOtherIdx = 6; //first six fields have names, and are not stored in otherIdxs.
static const int startOtherIdx = 3; //first six fields have names, and are not stored in otherIdxs.
int _numPrintFields;
virtual ~BedPlusInterval();
bool initOtherFieldsFromFile(SingleLineDelimTextFileReader *fileReader);
virtual void printOtherFields(QuickString &outBuf) const;
};
......
......@@ -70,7 +70,7 @@ void VcfRecord::print(QuickString &outBuf, const QuickString & start, const Quic
}
void VcfRecord::printNull(QuickString &outBuf) const {
outBuf.append(".\t-1\t.\t.\t.\t-1");
outBuf.append(".\t-1\t.");
for (int i= startOtherIdx; i < _numPrintFields; i++) {
outBuf.append("\t.");
}
......@@ -85,7 +85,7 @@ void VcfRecord::printOtherFields(QuickString &outBuf) const {
outBuf.append(_varAlt);
outBuf.append('\t');
outBuf.append(_score);
for (int i= 0; i < (int)_otherIdxs.size(); i++) {
for (int i= constPrintStartIdx; i < (int)_otherIdxs.size(); i++) {
outBuf.append('\t');
outBuf.append(*(_otherIdxs[i]));
}
......
......@@ -19,9 +19,9 @@ public:
VcfRecord() {}
virtual bool initFromFile(SingleLineDelimTextFileReader *);
virtual void clear();
virtual void print(QuickString &outBuf) const;
virtual void print(QuickString &outBuf, int start, int end) const;
virtual void print(QuickString &outBuf, const QuickString & start, const QuickString & end) const;
void print(QuickString &outBuf) const;
void print(QuickString &outBuf, int start, int end) const;
void print(QuickString &outBuf, const QuickString & start, const QuickString & end) const;
virtual void printNull(QuickString &outBuf) const;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::VCF_RECORD_TYPE; }
......@@ -34,6 +34,7 @@ public:
protected:
QuickString _varRef;
QuickString _varAlt;
static const int constPrintStartIdx = 3;
void printOtherFields(QuickString &outBuf) const;
};
......
......@@ -184,13 +184,13 @@ bool FileRecordTypeChecker::handleTextFormat(const char *buffer, size_t len)
_fourthFieldNumeric = false;
_recordType = BED4_RECORD_TYPE;
}
} else if (_numFields == 5) {
} else if (_numFields == 5 && passesBed5()) {
_recordType = BED5_RECORD_TYPE;
} else if (_numFields == 6) {
} else if (_numFields == 6 && passesBed6()) {
_recordType = BED6_RECORD_TYPE;
} else if (_numFields == 12) {
} else if (_numFields == 12 && passesBed12()) {
_recordType = BED12_RECORD_TYPE;
} else if (_numFields >6) {
} else if (_numFields >3) {
_recordType = BED_PLUS_RECORD_TYPE;
}
return true;
......@@ -392,3 +392,22 @@ void FileRecordTypeChecker::setBam()
_isBinary = true;
_isBAM = true;
}
bool FileRecordTypeChecker::passesBed5() {
return _isBed && _numFields == 5 && isNumeric(_tokenizer.getElem(4));
}
bool FileRecordTypeChecker::passesBed6() {
return (_isBed && _numFields == 6 && isStrandField(5));
}
bool FileRecordTypeChecker::passesBed12() {
return (isStrandField(5) && isNumeric(_tokenizer.getElem(6)) &&
isNumeric(_tokenizer.getElem(7)) && isNumeric(_tokenizer.getElem(9)));
}
bool FileRecordTypeChecker::isStrandField(int field) {
const QuickString &strandChar = _tokenizer.getElem(field);
return (strandChar == "+" || strandChar == "-" || strandChar == ".");
}
......@@ -124,9 +124,10 @@ private:
bool isVCFformat(const char *buffer);
bool isGFFformat();
bool delimiterTesting(vector<int> &counts, char suspectChar);
bool isStrandField(int field);
bool passesBed5();
bool passesBed6();
bool passesBed12();
};
......
......@@ -794,8 +794,7 @@ rm exp obs
############################################################
echo " intersect.new.t67...\c"
echo \
"19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 256900 791255 G T 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene 19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene
"19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene 19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene
19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 266003 791258 A C 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||" > exp
......@@ -808,8 +807,7 @@ rm exp obs
############################################################
echo " intersect.new.t68...\c"
echo \
"19 252806 791255 G <DEL> 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 256900 791255 G T 70.90 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-389,-4611;END=253195;STR=+-:4;IMPRECISE;CIPOS=-2,137;CIEND=0,0;EVENT=791255;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene 19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene
"19 260365 791256 C <DEL> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene 19 260800 791256 C <INS> 33.71 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=680;END=261045;STR=+-:4;IMPRECISE;CIPOS=-1,257;CIEND=0,0;EVENT=791256;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=upstream_gene_variant|||ENSG00000271846|CTD-3113P16.9|ENST00000607399|||||processed_pseudogene
19 265134 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 265500 791257 A <DEL> 20.25 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-558;END=265692;STR=+-:4;IMPRECISE;CIPOS=-1,196;CIEND=0,0;EVENT=791257;SUP=4;PESUP=4;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||
19 265986 791258 A <DEL> 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant|||||||||| 19 266003 791258 A C 22.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-401;END=266387;STR=+-:6;IMPRECISE;CIPOS=-2,87;CIEND=0,0;EVENT=791258;SUP=6;PESUP=6;SRSUP=0;EV=PE;PRIN;CSQ=intergenic_variant||||||||||" > exp
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment