From 8fe692c5aebc051c41879fb431710fc4a5e8559d Mon Sep 17 00:00:00 2001
From: nkindlon <nek3d@virginia.edu>
Date: Wed, 9 Oct 2013 14:53:09 -0400
Subject: [PATCH] Moved short circuit for BAM unmapped reads to
 Record::sameChromIntersect for when query record isn't BAM but DB record is.

---
 src/utils/Contexts/Context.cpp                |  7 ++-
 src/utils/Contexts/Context.h                  |  4 ++
 src/utils/FileRecordTools/FileRecordMgr.cpp   |  2 +-
 src/utils/FileRecordTools/RecordOutputMgr.cpp |  4 +-
 .../FileRecordTools/Records/BamRecord.cpp     | 44 +++++++++++++------
 src/utils/FileRecordTools/Records/BamRecord.h |  3 --
 src/utils/FileRecordTools/Records/Record.cpp  |  5 +++
 7 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/src/utils/Contexts/Context.cpp b/src/utils/Contexts/Context.cpp
index ffc22b2d..c201fbe5 100644
--- a/src/utils/Contexts/Context.cpp
+++ b/src/utils/Contexts/Context.cpp
@@ -39,6 +39,7 @@ Context::Context()
   _databaseFileIdx(-1),
   _bamHeaderAndRefIdx(-1),
   _maxNumDatabaseFields(0),
+  _useFullBamTags(false),
   _reportCount(false),
   _maxDistance(0),
   _reportNames(false),
@@ -234,6 +235,10 @@ bool Context::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
             setUncompressedBam(true);
             markUsed(i - skipFirstArgs);
         }
+        else if (strcmp(argv[i], "-fbam") == 0) {
+        	setUseFullBamTags(true);
+            markUsed(i - skipFirstArgs);
+        }
         else if(strcmp(argv[i], "-sorted") == 0) {
             setSortedInput(true);
             markUsed(i - skipFirstArgs);
@@ -327,7 +332,7 @@ bool Context::cmdArgsValid()
 
 int Context::getBamHeaderAndRefIdx() {
 	if (_bamHeaderAndRefIdx != -1) {
-		//already found which BAM file to usefor the header
+		//already found which BAM file to use for the header
 		return _bamHeaderAndRefIdx;
 	}
 	if (_inputFiles[_queryFileIdx]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) {
diff --git a/src/utils/Contexts/Context.h b/src/utils/Contexts/Context.h
index 5638e087..53000b1c 100644
--- a/src/utils/Contexts/Context.h
+++ b/src/utils/Contexts/Context.h
@@ -162,6 +162,9 @@ public:
 	bool getPrintable() const { return _printable; }
 	void setPrintable(bool val) { _printable = val; }
 
+	bool getUseFullBamTags() const { return _useFullBamTags; }
+	void setUseFullBamTags(bool val) { _useFullBamTags = val; }
+
 	//
 	// MERGE METHODS
 	//
@@ -234,6 +237,7 @@ protected:
     int _databaseFileIdx;
     int _bamHeaderAndRefIdx;
     int _maxNumDatabaseFields;
+    bool _useFullBamTags;
 
 	bool _reportCount;
 	int _maxDistance;
diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp
index 9bc93bdf..ab3b516b 100644
--- a/src/utils/FileRecordTools/FileRecordMgr.cpp
+++ b/src/utils/FileRecordTools/FileRecordMgr.cpp
@@ -227,7 +227,7 @@ void FileRecordMgr::allocateFileReader()
 
 	case FileRecordTypeChecker::BAM_FILE_TYPE:
 		_fileReader = new BamFileReader();
-		(static_cast<BamFileReader *>(_fileReader))->setUseTags(_useFullBamTags);
+		(static_cast<BamFileReader *>(_fileReader))->setUseTags(_context->getUseFullBamTags());
 		break;
 	default:
 		break;
diff --git a/src/utils/FileRecordTools/RecordOutputMgr.cpp b/src/utils/FileRecordTools/RecordOutputMgr.cpp
index 4f10ff3b..b6b8fd29 100644
--- a/src/utils/FileRecordTools/RecordOutputMgr.cpp
+++ b/src/utils/FileRecordTools/RecordOutputMgr.cpp
@@ -114,7 +114,9 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
 	if (needsFlush()) {
 		flush();
 	}
-
+//	if (keyList.getKey()->getChrName() == "chr1" && keyList.getKey()->getStartPos() == 11996) {
+//		printf("Break point here.\n");
+//	}
 	//The first time we print a record is when we print any header, because the header
 	//hasn't been read from the query file until after the first record has also been read.
 	if (_context->getPrintHeader()) {
diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp
index 0ea849a3..fda5a28b 100644
--- a/src/utils/FileRecordTools/Records/BamRecord.cpp
+++ b/src/utils/FileRecordTools/Records/BamRecord.cpp
@@ -52,9 +52,37 @@ void BamRecord::clear()
 {
 	Bed6Interval::clear();
 	_bamChromId = -1;
-	//For now, we're going to not clear the BamAlignment object, as all of its
-	//fields will be reset next time it is used anyway. If testing shows this to be a
-	//problem, we'll revisit.
+
+
+	//Clear the BamAlignment object. Sadly, it does not have a clear() method,
+	//so we have to do each member manually.
+	_bamAlignment.Name.clear();
+	_bamAlignment.Length = 0;
+	_bamAlignment.QueryBases.clear();
+	_bamAlignment.AlignedBases.clear();
+	_bamAlignment.Qualities.clear();
+	_bamAlignment.TagData.clear();
+	_bamAlignment.RefID = -1;
+	_bamAlignment.Position = -1;
+	_bamAlignment.Bin = 0;
+	_bamAlignment.MapQuality = 0;
+	_bamAlignment.AlignmentFlag = 0;
+	_bamAlignment.CigarData.clear();
+	_bamAlignment.MateRefID = -1;
+	_bamAlignment.MatePosition = -1;
+	_bamAlignment.InsertSize = -1;
+	_bamAlignment.Filename.clear();
+
+	_bamAlignment.SupportData.AllCharData.clear();
+	_bamAlignment.SupportData.BlockLength = 0;
+	_bamAlignment.SupportData.NumCigarOperations = 0;
+	_bamAlignment.SupportData.QueryNameLength = 0;
+	_bamAlignment.SupportData.QuerySequenceLength = 0;
+	_bamAlignment.SupportData.HasCoreOnly = false;
+
+	_bamAlignment.ErrorString.clear();
+
+
 }
 
 void BamRecord::print(QuickString &outBuf, RecordKeyList *keyList) const
@@ -127,13 +155,3 @@ void BamRecord::printUnmapped(QuickString &outBuf) const {
 	outBuf.append(_score);
 	outBuf.append("\t.\t-1\t-1\t-1\t0,0,0\t0\t.\t."); // dot for strand, -1 for blockStarts and blockEnd
 }
-
-bool BamRecord::sameChromIntersects(const Record *record,
-		bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const
-{
-	// Special: For BAM records that are unmapped, intersect should automatically return false
-	if (_isUnmapped || record->isUnmapped()) {
-		return false;
-	}
-	return Record::sameChromIntersects(record, wantSameStrand, wantDiffStrand, overlapFraction, reciprocal);
-}
diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h
index 3029b28e..262ef1ab 100644
--- a/src/utils/FileRecordTools/Records/BamRecord.h
+++ b/src/utils/FileRecordTools/Records/BamRecord.h
@@ -38,9 +38,6 @@ public:
 	const BamTools::BamAlignment &getAlignment() const { return _bamAlignment; }
 	int getBamChromId() const { return _bamChromId; }
 
-	virtual bool sameChromIntersects(const Record *otherRecord,
-			bool sameStrand, bool diffStrand, float overlapFraction, bool reciprocal) const;
-
 protected:
 	BamTools::BamAlignment _bamAlignment;
 	int _bamChromId; //different from chromId, because BAM file may be in different order
diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp
index 8ec1efd5..cb5e5f44 100644
--- a/src/utils/FileRecordTools/Records/Record.cpp
+++ b/src/utils/FileRecordTools/Records/Record.cpp
@@ -135,6 +135,11 @@ bool Record::intersects(const Record *record,
 bool Record::sameChromIntersects(const Record *record,
 		bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const
 {
+	// Special: For records that are unmapped, intersect should automatically return false
+	if (_isUnmapped || record->isUnmapped()) {
+		return false;
+	}
+
 	//If user requested hits only on same strand, or only on different strands,
 	//rule out different strandedness first.
 	//If the strand is unknown in either case, then queries regarding strandedness
-- 
GitLab