Commit 61b7a976 authored by arq5x's avatar arq5x
Browse files

add methods to extract column from record by its col. number

parent c380cf59
/*
* BamRecord.cpp
*
* Created on: Jan 14, 2014
* Author: nek3d
*/
#include "BamRecord.h"
#include "BamFileReader.h"
#include "RecordKeyList.h"
......@@ -15,143 +22,155 @@ BamRecord::~BamRecord()
const BamRecord &BamRecord::operator=(const BamRecord &other)
{
Bed6Interval::operator=(other);
_bamAlignment = other._bamAlignment;
return *this;
Bed6Interval::operator=(other);
_bamAlignment = other._bamAlignment;
return *this;
}
bool BamRecord::initFromFile(FileReader *fileReader)
{
BamFileReader *bamFileReader = static_cast<BamFileReader*>(fileReader);
return initFromFile(bamFileReader);
BamFileReader *bamFileReader = static_cast<BamFileReader*>(fileReader);
return initFromFile(bamFileReader);
}
bool BamRecord::initFromFile(BamFileReader *bamFileReader)
{
bamFileReader->getChrName(_chrName);
_bamChromId = bamFileReader->getCurrChromdId();
_startPos = bamFileReader->getStartPos();
int2str(_startPos, _startPosStr);
_endPos = bamFileReader->getEndPos();
int2str(_endPos, _endPosStr);
bamFileReader->getName(_name);
bamFileReader->getScore(_score);
char strandChar = bamFileReader->getStrand();
setStrand(strandChar);
_bamAlignment = bamFileReader->getAlignment();
_isUnmapped = !_bamAlignment.IsMapped();
_isMateUnmapped = !_bamAlignment.IsMateMapped();
return true;
bamFileReader->getChrName(_chrName);
_bamChromId = bamFileReader->getCurrChromdId();
_startPos = bamFileReader->getStartPos();
int2str(_startPos, _startPosStr);
_endPos = bamFileReader->getEndPos();
int2str(_endPos, _endPosStr);
bamFileReader->getName(_name);
bamFileReader->getScore(_score);
char strandChar = bamFileReader->getStrand();
setStrand(strandChar);
_bamAlignment = bamFileReader->getAlignment();
_isUnmapped = !_bamAlignment.IsMapped();
_isMateUnmapped = !_bamAlignment.IsMateMapped();
return true;
}
void BamRecord::clear()
{
Bed6Interval::clear();
_bamChromId = -1;
//Clear the BamAlignment object. Sadly, it does not have a clear() method,
//so we have to do each member manually.
_bamAlignment.Name.clear();
_bamAlignment.Length = 0;
_bamAlignment.QueryBases.clear();
_bamAlignment.AlignedBases.clear();
_bamAlignment.Qualities.clear();
_bamAlignment.TagData.clear();
_bamAlignment.RefID = -1;
_bamAlignment.Position = -1;
_bamAlignment.Bin = 0;
_bamAlignment.MapQuality = 0;
_bamAlignment.AlignmentFlag = 0;
_bamAlignment.CigarData.clear();
_bamAlignment.MateRefID = -1;
_bamAlignment.MatePosition = -1;
_bamAlignment.InsertSize = -1;
_bamAlignment.Filename.clear();
_bamAlignment.SupportData.AllCharData.clear();
_bamAlignment.SupportData.BlockLength = 0;
_bamAlignment.SupportData.NumCigarOperations = 0;
_bamAlignment.SupportData.QueryNameLength = 0;
_bamAlignment.SupportData.QuerySequenceLength = 0;
_bamAlignment.SupportData.HasCoreOnly = false;
_bamAlignment.ErrorString.clear();
Bed6Interval::clear();
_bamChromId = -1;
//Clear the BamAlignment object. Sadly, it does not have a clear() method,
//so we have to do each member manually.
_bamAlignment.Name.clear();
_bamAlignment.Length = 0;
_bamAlignment.QueryBases.clear();
_bamAlignment.AlignedBases.clear();
_bamAlignment.Qualities.clear();
_bamAlignment.TagData.clear();
_bamAlignment.RefID = -1;
_bamAlignment.Position = -1;
_bamAlignment.Bin = 0;
_bamAlignment.MapQuality = 0;
_bamAlignment.AlignmentFlag = 0;
_bamAlignment.CigarData.clear();
_bamAlignment.MateRefID = -1;
_bamAlignment.MatePosition = -1;
_bamAlignment.InsertSize = -1;
_bamAlignment.Filename.clear();
_bamAlignment.SupportData.AllCharData.clear();
_bamAlignment.SupportData.BlockLength = 0;
_bamAlignment.SupportData.NumCigarOperations = 0;
_bamAlignment.SupportData.QueryNameLength = 0;
_bamAlignment.SupportData.QuerySequenceLength = 0;
_bamAlignment.SupportData.HasCoreOnly = false;
_bamAlignment.ErrorString.clear();
}
void BamRecord::print(QuickString &outBuf, RecordKeyList *keyList) const
{
Bed6Interval::print(outBuf);
Bed6Interval::print(outBuf);
printRemainingBamFields(outBuf, keyList);
}
void BamRecord::print(QuickString &outBuf, int start, int end, RecordKeyList *keyList) const
{
Bed6Interval::print(outBuf, start, end);
Bed6Interval::print(outBuf, start, end);
printRemainingBamFields(outBuf, keyList);
}
void BamRecord::print(QuickString &outBuf, const QuickString & start, const QuickString & end, RecordKeyList *keyList) const
{
Bed6Interval::print(outBuf, start, end);
Bed6Interval::print(outBuf, start, end);
printRemainingBamFields(outBuf, keyList);
}
void BamRecord::printNull(QuickString &outBuf) const
{
Bed6Interval::printNull(outBuf);
outBuf.append("\t.\t.\t.\t.\t.\t.", 12);
Bed6Interval::printNull(outBuf);
outBuf.append("\t.\t.\t.\t.\t.\t.", 12);
}
void BamRecord::printRemainingBamFields(QuickString &outBuf, RecordKeyList *keyList) const
{
outBuf.append('\t');
outBuf.append(_bamAlignment.Position);
outBuf.append('\t');
outBuf.append(_endPos);
outBuf.append("\t0,0,0", 6);
outBuf.append('\t');
int numBlocks = (int)keyList->size();
if (numBlocks > 0) {
outBuf.append(numBlocks);
vector<int> blockLengths;
vector<int> blockStarts;
for (RecordKeyList::const_iterator_type iter = keyList->begin(); iter != keyList->end(); iter = keyList->next()) {
const Record *block = iter->value();
blockLengths.push_back(block->getEndPos() - block->getStartPos());
blockStarts.push_back(block->getStartPos() - _bamAlignment.Position);
}
outBuf.append('\t');
for (int i=0; i < (int)blockLengths.size(); i++) {
outBuf.append(blockLengths[i]);
outBuf.append(',');
}
outBuf.append('\t');
for (int i=0; i < (int)blockStarts.size(); i++) {
outBuf.append( blockStarts[i]);
outBuf.append(',');
}
}
else {
outBuf.append("1\t0,\t0,");
}
outBuf.append('\t');
outBuf.append(_bamAlignment.Position);
outBuf.append('\t');
outBuf.append(_endPos);
outBuf.append("\t0,0,0", 6);
outBuf.append('\t');
int numBlocks = (int)keyList->size();
if (numBlocks > 0) {
outBuf.append(numBlocks);
vector<int> blockLengths;
vector<int> blockStarts;
for (RecordKeyList::const_iterator_type iter = keyList->begin(); iter != keyList->end(); iter = keyList->next()) {
const Record *block = iter->value();
blockLengths.push_back(block->getEndPos() - block->getStartPos());
blockStarts.push_back(block->getStartPos() - _bamAlignment.Position);
}
outBuf.append('\t');
for (int i=0; i < (int)blockLengths.size(); i++) {
outBuf.append(blockLengths[i]);
outBuf.append(',');
}
outBuf.append('\t');
for (int i=0; i < (int)blockStarts.size(); i++) {
outBuf.append( blockStarts[i]);
outBuf.append(',');
}
}
else {
outBuf.append("1\t0,\t0,");
}
}
void BamRecord::printUnmapped(QuickString &outBuf) const {
outBuf.append(_chrName.empty() ? "." : _chrName);
outBuf.append("\t-1\t-1\t");
outBuf.append(_name.empty() ? "." : _name);
outBuf.append('\t');
outBuf.append(_score.empty() ? "." : _score);
outBuf.append("\t.\t-1\t-1\t-1\t0,0,0\t0\t.\t."); // dot for strand, -1 for blockStarts and blockEnd
outBuf.append(_chrName.empty() ? "." : _chrName);
outBuf.append("\t-1\t-1\t");
outBuf.append(_name.empty() ? "." : _name);
outBuf.append('\t');
outBuf.append(_score.empty() ? "." : _score);
outBuf.append("\t.\t-1\t-1\t-1\t0,0,0\t0\t.\t."); // dot for strand, -1 for blockStarts and blockEnd
}
const QuickString &BamRecord::getField(int fieldNum) const
{
//TBD: Determine what correct behavior should be.
//I.e. if users requests field 2, do they want Flag
//for Bam Records, or startPos for all records? -NEK 1/14/14.
return Bed6Interval::getField(fieldNum);
}
......@@ -38,6 +38,9 @@ public:
const BamTools::BamAlignment &getAlignment() const { return _bamAlignment; }
int getBamChromId() const { return _bamChromId; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 12; }
protected:
BamTools::BamAlignment _bamAlignment;
int _bamChromId; //different from chromId, because BAM file may be in different order
......
......@@ -61,3 +61,21 @@ void Bed3Interval::print(QuickString &outBuf, const QuickString & start, const Q
void Bed3Interval::printNull(QuickString &outBuf) const {
outBuf.append(".\t-1\t-1", 7);
}
const QuickString &Bed3Interval::getField(int fieldNum) const
{
switch (fieldNum) {
case 1:
return _chrName;
break;
case 2:
return _startPosStr;
break;
case 3:
return _endPosStr;
break;
default:
return Record::getField(fieldNum);
break;
}
}
......@@ -29,6 +29,9 @@ public:
virtual void printNull(QuickString &outBuf) const;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BED3_RECORD_TYPE; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 3; }
protected:
virtual ~Bed3Interval();
......
......@@ -48,3 +48,15 @@ void Bed4Interval::printNull(QuickString &outBuf) const
outBuf.append("\t.", 2);
}
const QuickString &Bed4Interval::getField(int fieldNum) const
{
switch (fieldNum) {
case 4:
return _name;
break;
default:
return Bed3Interval::getField(fieldNum);
break;
}
}
......@@ -26,6 +26,10 @@ public:
virtual void printNull(QuickString &outBuf) const;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BED4_RECORD_TYPE; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 4; }
protected:
virtual ~Bed4Interval();
};
......
......@@ -55,3 +55,18 @@ void Bed5Interval::printNull(QuickString &outBuf) const
outBuf.append("\t.\t-1", 5);
}
const QuickString &Bed5Interval::getField(int fieldNum) const
{
switch (fieldNum) {
case 4:
return _name;
break;
case 5:
return _score;
break;
default:
return Bed3Interval::getField(fieldNum);
break;
}
}
......@@ -25,6 +25,10 @@ public:
virtual void printNull(QuickString &outBuf) const;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BED5_RECORD_TYPE; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 5; }
protected:
virtual ~Bed5Interval();
};
......
......@@ -64,3 +64,20 @@ void Bed6Interval::printNull(QuickString &outBuf) const
outBuf.append("\t.\t-1\t.", 7);
}
const QuickString &Bed6Interval::getField(int fieldNum) const
{
switch (fieldNum) {
case 4:
return _name;
break;
case 5:
return _score;
break;
case 6:
return _strand;
break;
default:
return Bed3Interval::getField(fieldNum);
break;
}
}
......@@ -25,6 +25,10 @@ public:
virtual void printNull(QuickString &outBuf) const;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BED6_RECORD_TYPE; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 6; }
protected:
virtual ~Bed6Interval();
};
......
......@@ -48,3 +48,15 @@ void BedGraphInterval::printNull(QuickString &outBuf) const
outBuf.append("\t.", 2);
}
const QuickString &BedGraphInterval::getField(int fieldNum) const
{
switch (fieldNum) {
case 4:
return _name;
break;
default:
return Bed3Interval::getField(fieldNum);
break;
}
}
......@@ -25,6 +25,10 @@ public:
virtual void printNull(QuickString &outBuf) const;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BEDGRAPH_RECORD_TYPE; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 4; }
protected:
virtual ~BedGraphInterval();
};
......
......@@ -105,36 +105,15 @@ void BedPlusInterval::printNull(QuickString &outBuf) const
for (int i=startOtherIdx; i < _numPrintFields; i++) {
outBuf.append("\t.");
}
}
QuickString BedPlusInterval::getField(int fieldNum) const
const QuickString &BedPlusInterval::getField(int fieldNum) const
{
//a request for any of the first six fields will retrieve
//chrom, start, end, name, score, and strand, in that order.
//A request for field 6+ will go to the otherIdxs.
switch (fieldNum) {
case 0:
return _chrName;
break; //redundant after a return, but good practice anyway.
case 1:
return _startPosStr;
break;
case 2:
return _endPosStr;
break;
case 3:
return _name;
break;
case 4:
return _score;
break;
case 5:
return _strand;
break;
default:
return (*(_otherIdxs[fieldNum - startOtherIdx]));
break;
if (fieldNum > startOtherIdx && fieldNum <= startOtherIdx + (int)_otherIdxs.size()) {
return (*(_otherIdxs[fieldNum - startOtherIdx - 1]));
}
return Bed6Interval::getField(fieldNum);
}
......@@ -30,7 +30,9 @@ public:
//if the number of fields frequently differ between this object and the one being copied.
const BedPlusInterval &operator=(const BedPlusInterval &other);
virtual QuickString getField(int fieldNum) const;
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return startOtherIdx + _otherIdxs.size(); }
virtual void setField(int fieldNum, const QuickString &str) { (*(_otherIdxs[fieldNum])) = str; }
virtual void setField(int fieldNum, const string &str) { (*(_otherIdxs[fieldNum])) = str; }
virtual void setField(int fieldNum, const char *str) { (*(_otherIdxs[fieldNum])) = str; }
......
......@@ -192,3 +192,17 @@ ostream &operator << (ostream &out, const Record &record)
out << errBuf;
return out;
}
const QuickString &Record::getField(int fieldNum) const
{
// try {
// _column_vec.push_back(hits[i].fields.at(_column));
// }
// catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column " << fieldNum <<
" , but record only has fields 1 - " << getNumFields() << ". Exiting." << endl
<< endl << "*****" << endl;
exit(1);
// }
}
......@@ -82,6 +82,9 @@ public:
virtual void setScore(const string &chr) { _score = chr; }
virtual void setScore(const char *chr) { _score = chr; }
virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const = 0;
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::UNKNOWN_RECORD_TYPE; }
virtual bool coordsValid(); //test that no coords negative, end not less than start, check zeroLength (see below).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment