diff --git a/src/regressTest/RegressTest.cpp b/src/regressTest/RegressTest.cpp index 2612b8d450d42d036ffc84ed0fa8c8f5e54f1866..4eaa8bccb3367a59a7237e25743748a0b70a6678 100644 --- a/src/regressTest/RegressTest.cpp +++ b/src/regressTest/RegressTest.cpp @@ -16,7 +16,8 @@ const string RegressTest::_randomCmd = "RANDOM"; const string RegressTest::_space = " "; const string RegressTest::_redirect = " > "; const string RegressTest::_devNull = " /dev/null "; -const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo --echo-map --bp-ovr 1 "; +//const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo --echo-map --bp-ovr 1 "; +const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedops --intersect "; RegressTest::RegressTest() diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp index 0657e1ebbdf6ec493759dc2d945d850fdcbc2515..dfb5d4b547a41fe3ddd29622ddbc812e9639035d 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.cpp +++ b/src/utils/FileRecordTools/FileRecordMgr.cpp @@ -152,13 +152,25 @@ void FileRecordMgr::assignChromId(Record *record) { void FileRecordMgr::testInputSortOrder(Record *record) { - //user specified that file must be sorted. Check that it is so. + // User specified that file must be sorted. Check that it is so. // TBD: In future versions, we might not want/need all files to be sorted, // even if the -sorted option is used, depending on number of input files // and program being run. Should that occur, this block will need adjusting. // NEK - 9/5/13 + + // Special: For BAM records that aren't mapped, we actually don't want + // to test the sort order. Another ugly hack sponsored by the letters B, A, and M. + if (record->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE && (static_cast<const BamRecord *>(record))->isUnmapped()) { + return; + } + + const QuickString &currChrom = record->getChrName(); + int currStart = record->getStartPos(); + if (record->isZeroLength()) { + currStart++; + } if (currChrom != _prevChrom) { if ( _foundChroms.find(currChrom) != _foundChroms.end()) { //this is a different chrom than the last record had, but we've already seen this chrom. @@ -179,10 +191,10 @@ void FileRecordMgr::testInputSortOrder(Record *record) _prevStart = INT_MAX; record->setChromId(_prevChromId); } - } else if (record->getStartPos() < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order. + } else if (currStart < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order. sortError(record, false); } - _prevStart = record->getStartPos(); + _prevStart = currStart; } diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp index ba7e1cc22780435cae11de5df37d12913c086bba..4b6286138d4542d5e80db48e91c8a316b92a8492 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.cpp +++ b/src/utils/FileRecordTools/Records/BamRecord.cpp @@ -43,6 +43,7 @@ bool BamRecord::initFromFile(BamFileReader *bamFileReader) setStrand(strandChar); _bamAlignment = bamFileReader->getAlignment(); + _isUnmapped = !_bamAlignment.IsMapped(); return true; } diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h index 574da4546c1d95fee8c534c6b15510264c0ed788..bd275e7918176e1fe9bdfd63df78721506262e28 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.h +++ b/src/utils/FileRecordTools/Records/BamRecord.h @@ -39,14 +39,13 @@ public: int getBamChromId() const { return _bamChromId; } protected: - virtual ~BamRecord(); - void printRemainingBamFields(); - - BamTools::BamAlignment _bamAlignment; int _bamChromId; //different from chromId, because BAM file may be in different order //than the genomeFile. + virtual ~BamRecord(); + void printRemainingBamFields(); + }; diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp index 96e4eab004c41ca23193ab0384c43c1004d63c45..f42031531f2a81eefc0b8f4c91b8b22c286b1ee0 100644 --- a/src/utils/FileRecordTools/Records/Record.cpp +++ b/src/utils/FileRecordTools/Records/Record.cpp @@ -7,7 +7,8 @@ Record::Record() _startPos(-1), _endPos(-1), _strand(UNKNOWN), - _zeroLength(false) + _zeroLength(false), + _isUnmapped(false) { } @@ -35,6 +36,9 @@ void Record::clear() { _strand = UNKNOWN; _startPosStr.clear(); _endPosStr.clear(); + _zeroLength = false; + _isUnmapped = false; + } void Record::setStrand(char val) @@ -130,6 +134,10 @@ bool Record::intersects(const Record *record, bool Record::sameChromIntersects(const Record *record, bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const { + // Special: For BAM records that are unmapped, intersect should automatically return false + if (_isUnmapped || record->_isUnmapped) { + return false; + } //If user requested hits only on same strand, or only on different strands, //rule out different strandedness first. diff --git a/src/utils/FileRecordTools/Records/Record.h b/src/utils/FileRecordTools/Records/Record.h index b2004231c14f41a4b316bfef155baaeeb5c96168..47cf3b34b5803849b65f359846a332ca2abda908 100644 --- a/src/utils/FileRecordTools/Records/Record.h +++ b/src/utils/FileRecordTools/Records/Record.h @@ -87,6 +87,11 @@ public: virtual void undoZeroLength(); //change it back just before output; virtual bool isZeroLength() const { return _zeroLength; } + // "Unmapped" only applies to BamRecord, but for design reasons, it has to be here, + // because we want to short circuit the intersects method if either record is an unmapped + // Bam record. + bool isUnmapped() const { return _isUnmapped; } + virtual bool operator < (const Record &other) const; virtual bool operator > (const Record &other) const; @@ -126,6 +131,7 @@ protected: QuickString _score; strandType _strand; bool _zeroLength; + bool _isUnmapped; };