From 8fe692c5aebc051c41879fb431710fc4a5e8559d Mon Sep 17 00:00:00 2001 From: nkindlon <nek3d@virginia.edu> Date: Wed, 9 Oct 2013 14:53:09 -0400 Subject: [PATCH] Moved short circuit for BAM unmapped reads to Record::sameChromIntersect for when query record isn't BAM but DB record is. --- src/utils/Contexts/Context.cpp | 7 ++- src/utils/Contexts/Context.h | 4 ++ src/utils/FileRecordTools/FileRecordMgr.cpp | 2 +- src/utils/FileRecordTools/RecordOutputMgr.cpp | 4 +- .../FileRecordTools/Records/BamRecord.cpp | 44 +++++++++++++------ src/utils/FileRecordTools/Records/BamRecord.h | 3 -- src/utils/FileRecordTools/Records/Record.cpp | 5 +++ 7 files changed, 50 insertions(+), 19 deletions(-) diff --git a/src/utils/Contexts/Context.cpp b/src/utils/Contexts/Context.cpp index ffc22b2d..c201fbe5 100644 --- a/src/utils/Contexts/Context.cpp +++ b/src/utils/Contexts/Context.cpp @@ -39,6 +39,7 @@ Context::Context() _databaseFileIdx(-1), _bamHeaderAndRefIdx(-1), _maxNumDatabaseFields(0), + _useFullBamTags(false), _reportCount(false), _maxDistance(0), _reportNames(false), @@ -234,6 +235,10 @@ bool Context::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { setUncompressedBam(true); markUsed(i - skipFirstArgs); } + else if (strcmp(argv[i], "-fbam") == 0) { + setUseFullBamTags(true); + markUsed(i - skipFirstArgs); + } else if(strcmp(argv[i], "-sorted") == 0) { setSortedInput(true); markUsed(i - skipFirstArgs); @@ -327,7 +332,7 @@ bool Context::cmdArgsValid() int Context::getBamHeaderAndRefIdx() { if (_bamHeaderAndRefIdx != -1) { - //already found which BAM file to usefor the header + //already found which BAM file to use for the header return _bamHeaderAndRefIdx; } if (_inputFiles[_queryFileIdx]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) { diff --git a/src/utils/Contexts/Context.h b/src/utils/Contexts/Context.h index 5638e087..53000b1c 100644 --- a/src/utils/Contexts/Context.h +++ b/src/utils/Contexts/Context.h @@ -162,6 +162,9 @@ public: bool getPrintable() const { return _printable; } void setPrintable(bool val) { _printable = val; } + bool getUseFullBamTags() const { return _useFullBamTags; } + void setUseFullBamTags(bool val) { _useFullBamTags = val; } + // // MERGE METHODS // @@ -234,6 +237,7 @@ protected: int _databaseFileIdx; int _bamHeaderAndRefIdx; int _maxNumDatabaseFields; + bool _useFullBamTags; bool _reportCount; int _maxDistance; diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp index 9bc93bdf..ab3b516b 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.cpp +++ b/src/utils/FileRecordTools/FileRecordMgr.cpp @@ -227,7 +227,7 @@ void FileRecordMgr::allocateFileReader() case FileRecordTypeChecker::BAM_FILE_TYPE: _fileReader = new BamFileReader(); - (static_cast<BamFileReader *>(_fileReader))->setUseTags(_useFullBamTags); + (static_cast<BamFileReader *>(_fileReader))->setUseTags(_context->getUseFullBamTags()); break; default: break; diff --git a/src/utils/FileRecordTools/RecordOutputMgr.cpp b/src/utils/FileRecordTools/RecordOutputMgr.cpp index 4f10ff3b..b6b8fd29 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.cpp +++ b/src/utils/FileRecordTools/RecordOutputMgr.cpp @@ -114,7 +114,9 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi if (needsFlush()) { flush(); } - +// if (keyList.getKey()->getChrName() == "chr1" && keyList.getKey()->getStartPos() == 11996) { +// printf("Break point here.\n"); +// } //The first time we print a record is when we print any header, because the header //hasn't been read from the query file until after the first record has also been read. if (_context->getPrintHeader()) { diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp index 0ea849a3..fda5a28b 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.cpp +++ b/src/utils/FileRecordTools/Records/BamRecord.cpp @@ -52,9 +52,37 @@ void BamRecord::clear() { Bed6Interval::clear(); _bamChromId = -1; - //For now, we're going to not clear the BamAlignment object, as all of its - //fields will be reset next time it is used anyway. If testing shows this to be a - //problem, we'll revisit. + + + //Clear the BamAlignment object. Sadly, it does not have a clear() method, + //so we have to do each member manually. + _bamAlignment.Name.clear(); + _bamAlignment.Length = 0; + _bamAlignment.QueryBases.clear(); + _bamAlignment.AlignedBases.clear(); + _bamAlignment.Qualities.clear(); + _bamAlignment.TagData.clear(); + _bamAlignment.RefID = -1; + _bamAlignment.Position = -1; + _bamAlignment.Bin = 0; + _bamAlignment.MapQuality = 0; + _bamAlignment.AlignmentFlag = 0; + _bamAlignment.CigarData.clear(); + _bamAlignment.MateRefID = -1; + _bamAlignment.MatePosition = -1; + _bamAlignment.InsertSize = -1; + _bamAlignment.Filename.clear(); + + _bamAlignment.SupportData.AllCharData.clear(); + _bamAlignment.SupportData.BlockLength = 0; + _bamAlignment.SupportData.NumCigarOperations = 0; + _bamAlignment.SupportData.QueryNameLength = 0; + _bamAlignment.SupportData.QuerySequenceLength = 0; + _bamAlignment.SupportData.HasCoreOnly = false; + + _bamAlignment.ErrorString.clear(); + + } void BamRecord::print(QuickString &outBuf, RecordKeyList *keyList) const @@ -127,13 +155,3 @@ void BamRecord::printUnmapped(QuickString &outBuf) const { outBuf.append(_score); outBuf.append("\t.\t-1\t-1\t-1\t0,0,0\t0\t.\t."); // dot for strand, -1 for blockStarts and blockEnd } - -bool BamRecord::sameChromIntersects(const Record *record, - bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const -{ - // Special: For BAM records that are unmapped, intersect should automatically return false - if (_isUnmapped || record->isUnmapped()) { - return false; - } - return Record::sameChromIntersects(record, wantSameStrand, wantDiffStrand, overlapFraction, reciprocal); -} diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h index 3029b28e..262ef1ab 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.h +++ b/src/utils/FileRecordTools/Records/BamRecord.h @@ -38,9 +38,6 @@ public: const BamTools::BamAlignment &getAlignment() const { return _bamAlignment; } int getBamChromId() const { return _bamChromId; } - virtual bool sameChromIntersects(const Record *otherRecord, - bool sameStrand, bool diffStrand, float overlapFraction, bool reciprocal) const; - protected: BamTools::BamAlignment _bamAlignment; int _bamChromId; //different from chromId, because BAM file may be in different order diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp index 8ec1efd5..cb5e5f44 100644 --- a/src/utils/FileRecordTools/Records/Record.cpp +++ b/src/utils/FileRecordTools/Records/Record.cpp @@ -135,6 +135,11 @@ bool Record::intersects(const Record *record, bool Record::sameChromIntersects(const Record *record, bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const { + // Special: For records that are unmapped, intersect should automatically return false + if (_isUnmapped || record->isUnmapped()) { + return false; + } + //If user requested hits only on same strand, or only on different strands, //rule out different strandedness first. //If the strand is unknown in either case, then queries regarding strandedness -- GitLab