Code owners
Assign users and groups as approvers for specific file changes. Learn more.
RecordOutputMgr.cpp 12.81 KiB
/*
* RecordOutputMgr.cpp
*
* Created on: May 28, 2013
* Author: nek3d
*/
#include "RecordOutputMgr.h"
#include "ContextBase.h"
#include "ContextIntersect.h"
#include "BlockMgr.h"
#include "Bed3Interval.h"
#include "Bed4Interval.h"
#include "BedGraphInterval.h"
#include "Bed5Interval.h"
#include "Bed6Interval.h"
#include "BedPlusInterval.h"
#include "Bed12Interval.h"
#include "BamRecord.h"
#include "VcfRecord.h"
#include "GffRecord.h"
#include <cstdio>
RecordOutputMgr::RecordOutputMgr()
: _context(NULL),
_printable(true),
_bamWriter(NULL),
_currBamBlockList(NULL),
_bamBlockMgr(NULL)
{
_bamBlockMgr = new BlockMgr();
}
RecordOutputMgr::~RecordOutputMgr()
{
if (_outBuf.size() > 0) {
flush();
}
if (_bamWriter != NULL) {
_bamWriter->Close();
delete _bamWriter;
_bamWriter = NULL;
}
delete _bamBlockMgr;
_bamBlockMgr = NULL;
}
bool RecordOutputMgr::init(ContextBase *context) {
_context = context;
if (_context->getOutputFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
//set-up BAM writer.
_bamWriter = new BamTools::BamWriter();
_bamWriter->SetCompressionMode(_context->getUncompressedBam() ? BamTools::BamWriter::Uncompressed : BamTools::BamWriter::Compressed);
_bamWriter->Open("stdout", _context->getHeader(_context->getBamHeaderAndRefIdx()).c_str(), _context->getReferences(_context->getBamHeaderAndRefIdx()));
} else {
//for everything but BAM, we'll copy output to an output buffer before printing.
_outBuf.reserve(MAX_OUTBUF_SIZE);
}
if (_context->getProgram() == ContextBase::INTERSECT) {
if ((static_cast<ContextIntersect *>(_context))->getAnyHit() || (static_cast<ContextIntersect *>(_context))->getNoHit() ||
(static_cast<ContextIntersect *>(_context))->getWriteCount()) {
_printable = false;
}
}
return true;
}
//void RecordOutputMgr::printHeader(const string &header)
//{
// _outBuf.append(header);
//}
bool RecordOutputMgr::printKeyAndTerminate(RecordKeyList &keyList) {
printBamType bamCode = printBamRecord(keyList);
if (bamCode == BAM_AS_BAM) {
return true;
} else if (bamCode == NOT_BAM) {
keyList.getKey()->print(_outBuf);
return false;
}
//otherwise, it was BAM_AS_BED, and the key was printed.
return false;
}
RecordOutputMgr::printBamType RecordOutputMgr::printBamRecord(RecordKeyList &keyList, bool bamOutputOnly)
{
const Record *record = keyList.getKey();
if (record->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
if (_context->getOutputFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
_bamWriter->SaveAlignment(static_cast<const BamRecord *>(record)->getAlignment());
return BAM_AS_BAM;
} else {
if (!bamOutputOnly) {
if (record->isUnmapped()) {
record->printUnmapped(_outBuf);
} else {
static_cast<const BamRecord *>(record)->print(_outBuf, _currBamBlockList);
}
}
return BAM_AS_BED;
}
}
return NOT_BAM;
}
void RecordOutputMgr::printRecord(const Record *record)
{
RecordKeyList keyList(record);
printRecord(keyList);
}
void RecordOutputMgr::printRecord(const Record *record, const QuickString & value)
{
printRecord(record);
_outBuf.append(value);
newline();
if (needsFlush()) {
flush();
}
}
void RecordOutputMgr::printRecord(RecordKeyList &keyList) {
if (keyList.getKey()->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
RecordKeyList blockList(keyList.getKey());
bool deleteBlocks = false;
_bamBlockMgr->getBlocks(blockList, deleteBlocks);
printRecord(keyList, &blockList);
if (deleteBlocks) {
_bamBlockMgr->deleteBlocks(blockList);
}
return;
}
printRecord(keyList, NULL);
}
void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockList)
{
if (needsFlush()) {
flush();
}
//The first time we print a record is when we print any header, because the header
//hasn't been read from the query file until after the first record has also been read.
if (_context->getPrintHeader()) {
checkForHeader();
}
const_cast<Record *>(keyList.getKey())->undoZeroLength();
_currBamBlockList = blockList;
if (_context->getProgram() == ContextBase::INTERSECT) {
if (_printable) {
if (keyList.empty()) {
if ((static_cast<ContextIntersect *>(_context))->getWriteAllOverlap()) {
// -wao the user wants to force the reporting of 0 overlap
if (printKeyAndTerminate(keyList)) {
_currBamBlockList = NULL;
return;
}
tab();
null(false, true);
tab();
_outBuf.append('0');
newline();
if (needsFlush()) flush();
}
else if ((static_cast<ContextIntersect *>(_context))->getLeftJoin()) {
if (printKeyAndTerminate(keyList)) {
_currBamBlockList = NULL;
return;
}
tab();
null(false, true);
newline();
if (needsFlush()) flush();
_currBamBlockList = NULL;
return;
}
} else {
if (printBamRecord(keyList, true) == BAM_AS_BAM) {
_currBamBlockList = NULL;
return;
}
int hitIdx = 0;
for (RecordKeyList::const_iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
reportOverlapDetail(keyList.getKey(), iter->value(), hitIdx);
hitIdx++;
}
}
} else { // not printable
reportOverlapSummary(keyList);
}
_currBamBlockList = NULL;
} else if (_context->getProgram() == ContextBase::SAMPLE) {
if (!printKeyAndTerminate(keyList)) {
newline();
}
_currBamBlockList = NULL;
return;
} else if (_context->getProgram() == ContextBase::MAP) {
if (!printKeyAndTerminate(keyList)) {
tab();
}
_currBamBlockList = NULL;
return;
}
}
void RecordOutputMgr::checkForHeader() {
if (_context->getProgram() == ContextBase::INTERSECT ||
_context->getProgram() == ContextBase::MAP) {
if (_context->getPrintHeader()) {
_outBuf.append(_context->getHeader((static_cast<ContextIntersect *>(_context))->getQueryFileIdx()));
}
} else if (_context->getProgram() == ContextBase::SAMPLE) {
if (_context->getPrintHeader()) {
_outBuf.append(_context->getHeader(_context->getInputFileIdx()));
}
}
_context->setPrintHeader(false);
if (needsFlush()) flush();
}
void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record *hitRecord, int hitIdx)
{
//get the max start and min end as strings.
const_cast<Record *>(hitRecord)->undoZeroLength();
const QuickString *startStr = NULL;
const QuickString *endStr = NULL;
int maxStart = 0;
int minEnd = 0;
int keyStart = keyRecord->getStartPos();
int keyEnd = keyRecord->getEndPos();
int hitStart = hitRecord->getStartPos();
int hitEnd = hitRecord->getEndPos();
if ( keyStart>= hitStart) {
//the key start is after the hit start, but we need to check and make sure the hit end is at least after the keyStart.
//The reason for this is that, in some rare cases, such as both the key and hit having been zero length intervals,
//the normal process for intersection that allows us to simply report the maxStart and minEnd do not necessarily apply.
if (hitEnd >= keyStart) {
//this is ok. We have a normal intersection where the key comes after the hit.
maxStart = keyStart;
startStr = &(keyRecord->getStartPosStr());
minEnd = min(keyEnd, hitEnd);
endStr = keyRecord->getEndPos() < hitRecord->getEndPos() ? &(keyRecord->getEndPosStr()) : &(hitRecord->getEndPosStr());
} else {
//this is the weird case of not a "real" intersection. The keyStart is greater than the hitEnd. So just report the key as is.
maxStart = keyStart;
minEnd = keyEnd;
startStr = &(keyRecord->getStartPosStr());
endStr = &(keyRecord->getEndPosStr());
}
} else {
//all of the above, but backwards. keyStart is before hitStart.
if (keyEnd >= hitStart) {
//normal intersection, key first
maxStart = hitStart;
startStr = &(hitRecord->getStartPosStr());
minEnd = min(keyEnd, hitEnd);
endStr = keyRecord->getEndPos() < hitRecord->getEndPos() ? &(keyRecord->getEndPosStr()) : &(hitRecord->getEndPosStr());
} else {
//this is the weird case of not a "real" intersection. The hitStart is greater than the keyEnd. So just report the hit as is.
maxStart = hitStart;
minEnd = hitEnd;
startStr = &(hitRecord->getStartPosStr());
endStr = &(hitRecord->getEndPosStr());
}
}
if (!(static_cast<ContextIntersect *>(_context))->getWriteA() && !(static_cast<ContextIntersect *>(_context))->getWriteB()
&& !(static_cast<ContextIntersect *>(_context))->getWriteOverlap() && !(static_cast<ContextIntersect *>(_context))->getLeftJoin()) {
printKey(keyRecord, *startStr, *endStr);
newline();
if (needsFlush()) flush();
}
else if (((static_cast<ContextIntersect *>(_context))->getWriteA() &&
(static_cast<ContextIntersect *>(_context))->getWriteB()) || (static_cast<ContextIntersect *>(_context))->getLeftJoin()) {
printKey(keyRecord);
tab();
hitRecord->print(_outBuf);
newline();
if (needsFlush()) flush();
}
else if ((static_cast<ContextIntersect *>(_context))->getWriteA()) {
printKey(keyRecord);
newline();
if (needsFlush()) flush();
}
else if ((static_cast<ContextIntersect *>(_context))->getWriteB()) {
printKey(keyRecord, *startStr, *endStr);
tab();
hitRecord->print(_outBuf);
newline();
if (needsFlush()) flush();
}
else if ((static_cast<ContextIntersect *>(_context))->getWriteOverlap()) {
int printOverlapBases = 0;
if (_context->getObeySplits()) {
printOverlapBases = _splitInfo->getOverlapBases(hitIdx);
} else {
printOverlapBases = minEnd - maxStart;
}
printKey(keyRecord);
tab();
hitRecord->print(_outBuf);
tab();
int2str(printOverlapBases, _outBuf, true);
newline();
if (needsFlush()) flush();
}
}
void RecordOutputMgr::reportOverlapSummary(RecordKeyList &keyList)
{
int numOverlapsFound = (int)keyList.size();
if ((static_cast<ContextIntersect *>(_context))->getAnyHit() && numOverlapsFound > 0) {
if (printKeyAndTerminate(keyList)) {
return;
}
newline();
if (needsFlush()) flush();
} else if ((static_cast<ContextIntersect *>(_context))->getWriteCount()) {
if (printKeyAndTerminate(keyList)) {
return;
}
tab();
int2str(numOverlapsFound, _outBuf, true);
newline();
if (needsFlush()) flush();
} else if ((static_cast<ContextIntersect *>(_context))->getNoHit() && numOverlapsFound == 0) {
if (printKeyAndTerminate(keyList)) {
return;
}
newline();
if (needsFlush()) flush();
}
}
void RecordOutputMgr::null(bool queryType, bool dbType)
{
FileRecordTypeChecker::RECORD_TYPE recordType = FileRecordTypeChecker::UNKNOWN_RECORD_TYPE;
if (_context->getProgram() == ContextBase::INTERSECT) {
if (queryType) {
recordType = (static_cast<ContextIntersect *>(_context))->getQueryRecordType();
} else if (dbType) {
recordType = (static_cast<ContextIntersect *>(_context))->getDatabaseRecordType();
}
} else if (_context->getProgram() == ContextBase::SAMPLE) {
recordType = _context->getInputRecordType();
}
//This is kind of a hack. Need an instance of the correct class of record in order to call it's printNull method.
Record *dummyRecord = NULL;
switch (recordType) {
case FileRecordTypeChecker::BED3_RECORD_TYPE:
dummyRecord = new Bed3Interval();
break;
case FileRecordTypeChecker::BED4_RECORD_TYPE:
dummyRecord = new Bed4Interval();
break;
case FileRecordTypeChecker::BEDGRAPH_RECORD_TYPE:
dummyRecord = new BedGraphInterval();
break;
case FileRecordTypeChecker::BED5_RECORD_TYPE:
dummyRecord = new Bed5Interval();
break;
case FileRecordTypeChecker::BED6_RECORD_TYPE:
dummyRecord = new Bed6Interval();
break;
case FileRecordTypeChecker::BED12_RECORD_TYPE:
dummyRecord = new Bed12Interval();
break;
case FileRecordTypeChecker::BED_PLUS_RECORD_TYPE:
dummyRecord = new BedPlusInterval();
(static_cast<BedPlusInterval *>(dummyRecord))->setNumPrintFields((static_cast<ContextIntersect *>(_context))->getMaxNumDatabaseFields());
break;
case FileRecordTypeChecker::VCF_RECORD_TYPE:
dummyRecord = new VcfRecord();
(static_cast<VcfRecord *>(dummyRecord))->setNumPrintFields((static_cast<ContextIntersect *>(_context))->getMaxNumDatabaseFields());
break;
case FileRecordTypeChecker::BAM_RECORD_TYPE:
dummyRecord = new BamRecord();
break;
case FileRecordTypeChecker::GFF_RECORD_TYPE:
dummyRecord = new GffRecord();
(static_cast<GffRecord *>(dummyRecord))->setNumFields((static_cast<ContextIntersect *>(_context))->getMaxNumDatabaseFields());
break;
default:
break;
}
dummyRecord->printNull(_outBuf);
delete dummyRecord;
}
void RecordOutputMgr::printKey(const Record *key, const QuickString & start, const QuickString & end)
{
if (key->getType() != FileRecordTypeChecker::BAM_RECORD_TYPE) {
key->print(_outBuf, start, end);
} else {
static_cast<const BamRecord *>(key)->print(_outBuf, start, end, _currBamBlockList);
}
}
void RecordOutputMgr::printKey(const Record *key)
{
if (key->getType() != FileRecordTypeChecker::BAM_RECORD_TYPE) {
key->print(_outBuf);
} else {
static_cast<const BamRecord *>(key)->print(_outBuf, _currBamBlockList);
}
}
void RecordOutputMgr::flush() {
fwrite(_outBuf.c_str(), 1, _outBuf.size(), stdout);
_outBuf.clear();
}