From 602239934347bcc3954ba8c38a39950907c746ae Mon Sep 17 00:00:00 2001
From: nkindlon <nek3d@virginia.edu>
Date: Tue, 24 Sep 2013 15:16:07 -0400
Subject: [PATCH] handling for unmapped Bam records

---
 src/regressTest/RegressTest.cpp                |  3 ++-
 src/utils/FileRecordTools/FileRecordMgr.cpp    | 18 +++++++++++++++---
 .../FileRecordTools/Records/BamRecord.cpp      |  1 +
 src/utils/FileRecordTools/Records/BamRecord.h  |  7 +++----
 src/utils/FileRecordTools/Records/Record.cpp   | 10 +++++++++-
 src/utils/FileRecordTools/Records/Record.h     |  6 ++++++
 6 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/regressTest/RegressTest.cpp b/src/regressTest/RegressTest.cpp
index 2612b8d4..4eaa8bcc 100644
--- a/src/regressTest/RegressTest.cpp
+++ b/src/regressTest/RegressTest.cpp
@@ -16,7 +16,8 @@ const string RegressTest::_randomCmd = "RANDOM";
 const string RegressTest::_space = " ";
 const string RegressTest::_redirect = " > ";
 const string RegressTest::_devNull = " /dev/null ";
-const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo  --echo-map  --bp-ovr 1 ";
+//const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedmap --echo  --echo-map  --bp-ovr 1 ";
+const string RegressTest::_bedOpsCmd = "/home/nek3d/testWorkspace/bedops/bin/bedops --intersect ";
 
 
 RegressTest::RegressTest()
diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp
index 0657e1eb..dfb5d4b5 100644
--- a/src/utils/FileRecordTools/FileRecordMgr.cpp
+++ b/src/utils/FileRecordTools/FileRecordMgr.cpp
@@ -152,13 +152,25 @@ void FileRecordMgr::assignChromId(Record *record) {
 
 void FileRecordMgr::testInputSortOrder(Record *record)
 {
-	//user specified that file must be sorted. Check that it is so.
+	// User specified that file must be sorted. Check that it is so.
 	// TBD: In future versions, we might not want/need all files to be sorted,
 	// even if the -sorted option is used, depending on number of input files
 	// and program being run. Should that occur, this block will need adjusting.
 	// NEK - 9/5/13
 
+
+	// Special: For BAM records that aren't mapped, we actually don't want
+	// to test the sort order. Another ugly hack sponsored by the letters B, A, and M.
+	if (record->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE && (static_cast<const BamRecord *>(record))->isUnmapped()) {
+		return;
+	}
+
+
 	const QuickString &currChrom = record->getChrName();
+	int currStart = record->getStartPos();
+	if (record->isZeroLength()) {
+		currStart++;
+	}
 	if (currChrom != _prevChrom) {
 		if ( _foundChroms.find(currChrom) != _foundChroms.end()) {
 			//this is a different chrom than the last record had, but we've already seen this chrom.
@@ -179,10 +191,10 @@ void FileRecordMgr::testInputSortOrder(Record *record)
 			_prevStart = INT_MAX;
 			record->setChromId(_prevChromId);
 		}
-	} else if (record->getStartPos() < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order.
+	} else if (currStart < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order.
 		sortError(record, false);
 	}
-	_prevStart = record->getStartPos();
+	_prevStart = currStart;
 
 }
 
diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp
index ba7e1cc2..4b628613 100644
--- a/src/utils/FileRecordTools/Records/BamRecord.cpp
+++ b/src/utils/FileRecordTools/Records/BamRecord.cpp
@@ -43,6 +43,7 @@ bool BamRecord::initFromFile(BamFileReader *bamFileReader)
 	setStrand(strandChar);
 
 	_bamAlignment = bamFileReader->getAlignment();
+	_isUnmapped = !_bamAlignment.IsMapped();
 	return true;
 }
 
diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h
index 574da454..bd275e79 100644
--- a/src/utils/FileRecordTools/Records/BamRecord.h
+++ b/src/utils/FileRecordTools/Records/BamRecord.h
@@ -39,14 +39,13 @@ public:
 	int getBamChromId() const { return _bamChromId; }
 
 protected:
-	virtual ~BamRecord();
-	void printRemainingBamFields();
-
-
 	BamTools::BamAlignment _bamAlignment;
 	int _bamChromId; //different from chromId, because BAM file may be in different order
 	//than the genomeFile.
 
+	virtual ~BamRecord();
+	void printRemainingBamFields();
+
 };
 
 
diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp
index 96e4eab0..f4203153 100644
--- a/src/utils/FileRecordTools/Records/Record.cpp
+++ b/src/utils/FileRecordTools/Records/Record.cpp
@@ -7,7 +7,8 @@ Record::Record()
   _startPos(-1),
   _endPos(-1),
   _strand(UNKNOWN),
-  _zeroLength(false)
+  _zeroLength(false),
+  _isUnmapped(false)
 {
 }
 
@@ -35,6 +36,9 @@ void Record::clear() {
 	_strand = UNKNOWN;
 	_startPosStr.clear();
 	_endPosStr.clear();
+	_zeroLength = false;
+	_isUnmapped = false;
+
 }
 
 void Record::setStrand(char val)
@@ -130,6 +134,10 @@ bool Record::intersects(const Record *record,
 bool Record::sameChromIntersects(const Record *record,
 		bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const
 {
+	// Special: For BAM records that are unmapped, intersect should automatically return false
+	if (_isUnmapped || record->_isUnmapped) {
+		return false;
+	}
 
 	//If user requested hits only on same strand, or only on different strands,
 	//rule out different strandedness first.
diff --git a/src/utils/FileRecordTools/Records/Record.h b/src/utils/FileRecordTools/Records/Record.h
index b2004231..47cf3b34 100644
--- a/src/utils/FileRecordTools/Records/Record.h
+++ b/src/utils/FileRecordTools/Records/Record.h
@@ -87,6 +87,11 @@ public:
 	virtual void undoZeroLength(); //change it back just before output;
 	virtual bool isZeroLength() const { return _zeroLength; }
 
+	// "Unmapped" only applies to BamRecord, but for design reasons, it has to be here,
+	// because we want to short circuit the intersects method if either record is an unmapped
+	// Bam record.
+	bool isUnmapped() const { return _isUnmapped; }
+
 
 	virtual bool operator < (const Record &other) const;
 	virtual bool operator > (const Record &other) const;
@@ -126,6 +131,7 @@ protected:
 	QuickString _score;
 	strandType _strand;
 	bool _zeroLength;
+	bool _isUnmapped;
 };
 
 
-- 
GitLab