From 602239934347bcc3954ba8c38a39950907c746ae Mon Sep 17 00:00:00 2001 From: nkindlon <nek3d@virginia.edu> Date: Tue, 24 Sep 2013 15:16:07 -0400 Subject: [PATCH] handling for unmapped Bam records --- src/regressTest/RegressTest.cpp | 3 ++- src/utils/FileRecordTools/FileRecordMgr.cpp | 18 +++++++++++++++--- .../FileRecordTools/Records/BamRecord.cpp | 1 + src/utils/FileRecordTools/Records/BamRecord.h | 7 +++---- src/utils/FileRecordTools/Records/Record.cpp | 10 +++++++++- src/utils/FileRecordTools/Records/Record.h | 6 ++++++ 6 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/regressTest/RegressTest.cpp b/src/regressTest/RegressTest.cpp index 2612b8d4..4eaa8bcc 100644 --- a/src/regressTest/RegressTest.cpp +++ b/src/regressTest/RegressTest.cpp @@ -16,7 +16,8 @@ const string RegressTest::_randomCmd = "RANDOM"; const string RegressTest::_space = " "; const string RegressTest::_redirect = " > "; const string RegressTest::_devNull = " /dev/null "; -const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo --echo-map --bp-ovr 1 "; +//const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo --echo-map --bp-ovr 1 "; +const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedops --intersect "; RegressTest::RegressTest() diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp index 0657e1eb..dfb5d4b5 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.cpp +++ b/src/utils/FileRecordTools/FileRecordMgr.cpp @@ -152,13 +152,25 @@ void FileRecordMgr::assignChromId(Record *record) { void FileRecordMgr::testInputSortOrder(Record *record) { - //user specified that file must be sorted. Check that it is so. + // User specified that file must be sorted. Check that it is so. // TBD: In future versions, we might not want/need all files to be sorted, // even if the -sorted option is used, depending on number of input files // and program being run. Should that occur, this block will need adjusting. // NEK - 9/5/13 + + // Special: For BAM records that aren't mapped, we actually don't want + // to test the sort order. Another ugly hack sponsored by the letters B, A, and M. + if (record->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE && (static_cast<const BamRecord *>(record))->isUnmapped()) { + return; + } + + const QuickString &currChrom = record->getChrName(); + int currStart = record->getStartPos(); + if (record->isZeroLength()) { + currStart++; + } if (currChrom != _prevChrom) { if ( _foundChroms.find(currChrom) != _foundChroms.end()) { //this is a different chrom than the last record had, but we've already seen this chrom. @@ -179,10 +191,10 @@ void FileRecordMgr::testInputSortOrder(Record *record) _prevStart = INT_MAX; record->setChromId(_prevChromId); } - } else if (record->getStartPos() < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order. + } else if (currStart < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order. sortError(record, false); } - _prevStart = record->getStartPos(); + _prevStart = currStart; } diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp index ba7e1cc2..4b628613 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.cpp +++ b/src/utils/FileRecordTools/Records/BamRecord.cpp @@ -43,6 +43,7 @@ bool BamRecord::initFromFile(BamFileReader *bamFileReader) setStrand(strandChar); _bamAlignment = bamFileReader->getAlignment(); + _isUnmapped = !_bamAlignment.IsMapped(); return true; } diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h index 574da454..bd275e79 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.h +++ b/src/utils/FileRecordTools/Records/BamRecord.h @@ -39,14 +39,13 @@ public: int getBamChromId() const { return _bamChromId; } protected: - virtual ~BamRecord(); - void printRemainingBamFields(); - - BamTools::BamAlignment _bamAlignment; int _bamChromId; //different from chromId, because BAM file may be in different order //than the genomeFile. + virtual ~BamRecord(); + void printRemainingBamFields(); + }; diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp index 96e4eab0..f4203153 100644 --- a/src/utils/FileRecordTools/Records/Record.cpp +++ b/src/utils/FileRecordTools/Records/Record.cpp @@ -7,7 +7,8 @@ Record::Record() _startPos(-1), _endPos(-1), _strand(UNKNOWN), - _zeroLength(false) + _zeroLength(false), + _isUnmapped(false) { } @@ -35,6 +36,9 @@ void Record::clear() { _strand = UNKNOWN; _startPosStr.clear(); _endPosStr.clear(); + _zeroLength = false; + _isUnmapped = false; + } void Record::setStrand(char val) @@ -130,6 +134,10 @@ bool Record::intersects(const Record *record, bool Record::sameChromIntersects(const Record *record, bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const { + // Special: For BAM records that are unmapped, intersect should automatically return false + if (_isUnmapped || record->_isUnmapped) { + return false; + } //If user requested hits only on same strand, or only on different strands, //rule out different strandedness first. diff --git a/src/utils/FileRecordTools/Records/Record.h b/src/utils/FileRecordTools/Records/Record.h index b2004231..47cf3b34 100644 --- a/src/utils/FileRecordTools/Records/Record.h +++ b/src/utils/FileRecordTools/Records/Record.h @@ -87,6 +87,11 @@ public: virtual void undoZeroLength(); //change it back just before output; virtual bool isZeroLength() const { return _zeroLength; } + // "Unmapped" only applies to BamRecord, but for design reasons, it has to be here, + // because we want to short circuit the intersects method if either record is an unmapped + // Bam record. + bool isUnmapped() const { return _isUnmapped; } + virtual bool operator < (const Record &other) const; virtual bool operator > (const Record &other) const; @@ -126,6 +131,7 @@ protected: QuickString _score; strandType _strand; bool _zeroLength; + bool _isUnmapped; }; -- GitLab