diff --git a/src/nekSandbox1/nekSandboxMain.cpp b/src/nekSandbox1/nekSandboxMain.cpp index a2922f921aafced8fb74979185703e0ab8d2220e..cdc1e0a99bf360ef0142658497d424118d538741 100644 --- a/src/nekSandbox1/nekSandboxMain.cpp +++ b/src/nekSandbox1/nekSandboxMain.cpp @@ -33,6 +33,46 @@ int nek_sandbox1_main(int argc,char** argv) cerr << "Error: Need one input file. Use \"-\" for stdin." << endl; } + ifstream myFile(argv[1]); + if (!myFile.good()) { + cerr << "Error: Can't open genome file" << argv[1] << "Exiting..." << endl; + exit(1); + } + string sLine; + vector<QuickString> fields; + QuickString chrName; + + vector<QuickString> chroms; + chroms.push_back("1"); + chroms.push_back("2"); + chroms.push_back("10"); + chroms.push_back("11"); + + vector<int> chromCounts(4, 0); + int chromIdx = 0; + while (!myFile.eof()) { + sLine.clear(); + fields.clear(); + getline(myFile, sLine); + if (sLine[0] == '@') { + cout << sLine << endl; + continue; + } + Tokenize(sLine.c_str(), fields); + const QuickString &currChrom = fields[2]; + if (currChrom == chroms[chromIdx]) { + cout << sLine << endl; + chromCounts[chromIdx]++; + if (chromCounts[chromIdx] >= 3000) { + chromIdx++; + } + if (chromIdx > 3) { + break; + } + } + } + + return 0; Context context; context.addInputFile(argv[1]); diff --git a/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp b/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp index 2fc71a357a81b4823a79dfca71d1dc570ce59f9b..c00b58316d96ead3e80a876d4b8f3ba0b0257647 100644 --- a/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp +++ b/src/utils/FileRecordTools/FileReaders/BamFileReader.cpp @@ -73,7 +73,7 @@ void BamFileReader::getChrName(QuickString &str) const str = _references[refId].RefName; } -int BamFileReader::getChrId() const +int BamFileReader::getBamChrId() const { return _bamAlignment.RefID; } diff --git a/src/utils/FileRecordTools/FileReaders/BamFileReader.h b/src/utils/FileRecordTools/FileReaders/BamFileReader.h index 9f8cb426c8bbe913f7989ff8fa1a3dc4ce74bd2d..29032886bdf60b1cf4fa9afd235c85324bce1fbf 100644 --- a/src/utils/FileRecordTools/FileReaders/BamFileReader.h +++ b/src/utils/FileRecordTools/FileReaders/BamFileReader.h @@ -41,7 +41,7 @@ public: void getChrName(QuickString &) const; - int getChrId() const; + int getBamChrId() const; int getStartPos() const; int getEndPos() const; void getName(QuickString &) const; diff --git a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp index d3c20bef8db98d3b4c6ee5dc1f14be063e1bef6f..12739abe41b42c9412c989387e846cd5464eaf01 100644 --- a/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp +++ b/src/utils/FileRecordTools/FileReaders/SingleLineDelimTextFileReader.cpp @@ -75,10 +75,6 @@ bool SingleLineDelimTextFileReader::readEntry() if (!findDelimiters()) { return false; } - if (_context->hasGenomeFile()) { - getField(0, _currChromStr); - _currChromId = _context->getGenomeFile()->getChromId(_currChromStr); - } return true; } diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp index 61a92216636aeec6fc286164f78bfe826fd3b5d4..3721f9616e6fff9128fad5c643e9c5b644f6b30c 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.cpp +++ b/src/utils/FileRecordTools/FileRecordMgr.cpp @@ -128,12 +128,24 @@ Record *FileRecordMgr::allocateAndGetNextRecord() //test for sorted order, if necessary. if (_context->getSortedInput()) { testInputSortOrder(record); + } else { + assignChromId(record); } _totalRecordLength += (unsigned long)(record->getEndPos() - record->getStartPos()); return record; } -void FileRecordMgr::testInputSortOrder(const Record *record) +void FileRecordMgr::assignChromId(Record *record) { + const QuickString &currChrom = record->getChrName(); + if (currChrom != _prevChrom && _context->hasGenomeFile()) { + _prevChromId = _context->getGenomeFile()->getChromId(currChrom); + record->setChromId(_prevChromId); + } else { + record->setChromId(_prevChromId); + } +} + +void FileRecordMgr::testInputSortOrder(Record *record) { //user specified that file must be sorted. Check that it is so. // TBD: In future versions, we might not want/need all files to be sorted, @@ -150,7 +162,6 @@ void FileRecordMgr::testInputSortOrder(const Record *record) //new chrom has not been seen before. //TBD: test genome file for ChromId. if (_context->hasGenomeFile()) { - //For BAM records, the chromId of the BAM file will not necessarily be the same as the one from the genome file. int currChromId = _context->getGenomeFile()->getChromId(currChrom); if (currChromId < _prevChromId) { sortError(record, true); @@ -161,6 +172,7 @@ void FileRecordMgr::testInputSortOrder(const Record *record) _foundChroms.insert(currChrom); _prevChrom = currChrom; _prevStart = INT_MAX; + record->setChromId(_prevChromId); } } else if (record->getStartPos() < _prevStart) { //same chrom as last record, but with lower startPos, so still out of order. sortError(record, false); diff --git a/src/utils/FileRecordTools/FileRecordMgr.h b/src/utils/FileRecordTools/FileRecordMgr.h index d19b31bb20312654d85a59b2fd15e852fbc985e2..4c73d0faf25745c40e8ad705bcd96ea2cac92de3 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.h +++ b/src/utils/FileRecordTools/FileRecordMgr.h @@ -173,7 +173,8 @@ private: void allocateFileReader(); - void testInputSortOrder(const Record *record); + void testInputSortOrder(Record *record); + void assignChromId(Record *); void sortError(const Record *record, bool genomeFileError); void deleteAllMergedItemsButKey(RecordKeyList &recList); diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp index 94c1fcd9b213e8ba515561378bf14781a68b96f2..ba7e1cc22780435cae11de5df37d12913c086bba 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.cpp +++ b/src/utils/FileRecordTools/Records/BamRecord.cpp @@ -3,6 +3,7 @@ #include "RecordKeyList.h" BamRecord::BamRecord() +: _bamChromId(-1) { } @@ -31,7 +32,7 @@ bool BamRecord::initFromFile(BamFileReader *bamFileReader) { bamFileReader->getChrName(_chrName); - _chrId = bamFileReader->getCurrChromdId(); + _bamChromId = bamFileReader->getCurrChromdId(); _startPos = bamFileReader->getStartPos(); int2str(_startPos, _startPosStr); _endPos = bamFileReader->getEndPos(); @@ -48,7 +49,7 @@ bool BamRecord::initFromFile(BamFileReader *bamFileReader) void BamRecord::clear() { Bed6Interval::clear(); - + _bamChromId = -1; //For now, we're going to not clear the BamAlignment object, as all of its //fields will be reset next time it is used anyway. If testing shows this to be a //problem, we'll revisit. diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h index b412b96e61c656107dc81108a2a282aa4c30ae3b..574da4546c1d95fee8c534c6b15510264c0ed788 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.h +++ b/src/utils/FileRecordTools/Records/BamRecord.h @@ -36,6 +36,7 @@ public: virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BAM_RECORD_TYPE; } const BamTools::BamAlignment &getAlignment() const { return _bamAlignment; } + int getBamChromId() const { return _bamChromId; } protected: virtual ~BamRecord(); @@ -43,6 +44,8 @@ protected: BamTools::BamAlignment _bamAlignment; + int _bamChromId; //different from chromId, because BAM file may be in different order + //than the genomeFile. }; diff --git a/src/utils/FileRecordTools/Records/Bed3Interval.cpp b/src/utils/FileRecordTools/Records/Bed3Interval.cpp index 5e285dc809f0094686cf2de28cd077c96954c88e..742ea7b9824cece5d29fe05e9c039d4b4e9b3081 100644 --- a/src/utils/FileRecordTools/Records/Bed3Interval.cpp +++ b/src/utils/FileRecordTools/Records/Bed3Interval.cpp @@ -24,7 +24,6 @@ bool Bed3Interval::initFromFile(FileReader *fileReader) bool Bed3Interval::initFromFile(SingleLineDelimTextFileReader *fileReader) { fileReader->getField(0, _chrName); - _chrId = fileReader->getCurrChromdId(); fileReader->getField(1, _startPosStr); fileReader->getField(2, _endPosStr); _startPos = str2chrPos(_startPosStr); diff --git a/src/utils/FileRecordTools/Records/GffRecord.cpp b/src/utils/FileRecordTools/Records/GffRecord.cpp index 5a6cc6fa5459f4e7aa9be7632399d994fddf26c7..cceb01ac140e37372bf039bb55e99256f1159418 100644 --- a/src/utils/FileRecordTools/Records/GffRecord.cpp +++ b/src/utils/FileRecordTools/Records/GffRecord.cpp @@ -24,7 +24,6 @@ void GffRecord::clear() bool GffRecord::initFromFile(SingleLineDelimTextFileReader *fileReader) { fileReader->getField(0, _chrName); - _chrId = fileReader->getCurrChromdId(); fileReader->getField(3, _startPosStr); _startPos = str2chrPos(_startPosStr); _startPos--; // VCF is one-based. Here we intentionally don't decrement the string version,