From 696f8a89b29c34b783da0086c79a1ca2edbbc521 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Wed, 7 Dec 2011 21:32:02 -0500 Subject: [PATCH] Use new GetNextBed() interface. Redux. --- src/annotateBed/annotateBed.cpp | 6 +- src/bed12ToBed6/bed12ToBed6.cpp | 6 +- src/bedToBam/bedToBam.cpp | 10 ++-- src/bedToIgv/bedToIgv.cpp | 7 +-- src/closestBed/closestBed.cpp | 6 +- src/coverageBed/coverageBed.cpp | 8 +-- src/fastaFromBed/fastaFromBed.cpp | 6 +- src/flankBed/flankBed.cpp | 11 +--- src/genomeCoverageBed/genomeCoverageBed.cpp | 10 ++-- src/intersectBed/intersectBed.cpp | 7 +-- src/linksBed/linksBed.cpp | 8 +-- src/multiBamCov/multiBamCov.cpp | 6 +- src/nucBed/nucBed.cpp | 6 +- src/shuffleBed/shuffleBed.cpp | 22 +++----- src/slopBed/slopBed.cpp | 11 +--- src/subtractBed/subtractBed.cpp | 8 +-- src/utils/bedFile/bedFile.cpp | 59 +++++++++----------- src/utils/bedFile/bedFile.h | 8 ++- src/utils/chromsweep/chromsweep.cpp | 62 +++++++++------------ src/utils/chromsweep/chromsweep.h | 10 +--- src/windowBed/windowBed.cpp | 8 +-- 21 files changed, 109 insertions(+), 176 deletions(-) diff --git a/src/annotateBed/annotateBed.cpp b/src/annotateBed/annotateBed.cpp index 0914acc9..1e5366cf 100644 --- a/src/annotateBed/annotateBed.cpp +++ b/src/annotateBed/annotateBed.cpp @@ -114,13 +114,11 @@ void BedAnnotate::AnnotateBed() { for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) { // grab the current annotation file. BedFile *anno = _annoFiles[annoIndex]; - int lineNum = 0; BED a, nullBed; - BedLineStatus bedStatus; // process each entry in the current anno file - while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (anno->GetNextBed(a)) { + if (anno->_status == BED_VALID) { _bed->countListHits(a, annoIndex, _sameStrand, _diffStrand); a = nullBed; } diff --git a/src/bed12ToBed6/bed12ToBed6.cpp b/src/bed12ToBed6/bed12ToBed6.cpp index 6726cc34..8858dbf0 100644 --- a/src/bed12ToBed6/bed12ToBed6.cpp +++ b/src/bed12ToBed6/bed12ToBed6.cpp @@ -136,12 +136,10 @@ void ProcessBed(istream &bedInput, BedFile *bed) { // process each BED entry and convert to BAM BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; // open the BED file for reading. bed->Open(); - while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (bed->GetNextBed(bedEntry)) { + if (bed->_status == BED_VALID) { bedVector bedBlocks; // vec to store the discrete BED "blocks" from a splitBedIntoBlocks(bedEntry, bedBlocks); diff --git a/src/bedToBam/bedToBam.cpp b/src/bedToBam/bedToBam.cpp index 1c717612..3fab9af0 100644 --- a/src/bedToBam/bedToBam.cpp +++ b/src/bedToBam/bedToBam.cpp @@ -185,19 +185,17 @@ void ProcessBed(BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual, boo // process each BED entry and convert to BAM BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; // open the BED file for reading. bed->Open(); - while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (bed->GetNextBed(bedEntry)) { + if (bed->_status == BED_VALID) { BamAlignment bamEntry; if (bed->bedType >= 4) { - ConvertBedToBam(bedEntry, bamEntry, chromToId, isBED12, mapQual, lineNum); + ConvertBedToBam(bedEntry, bamEntry, chromToId, isBED12, mapQual, bed->_lineNum); writer->SaveAlignment(bamEntry); } else { - cerr << "Error: BED entry without name found at line: " << lineNum << ". Exiting!" << endl; + cerr << "Error: BED entry without name found at line: " << bed->_lineNum << ". Exiting!" << endl; exit (1); } bedEntry = nullBed; diff --git a/src/bedToIgv/bedToIgv.cpp b/src/bedToIgv/bedToIgv.cpp index 7aab2191..ec0e3f6a 100644 --- a/src/bedToIgv/bedToIgv.cpp +++ b/src/bedToIgv/bedToIgv.cpp @@ -224,13 +224,10 @@ void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, s BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - bed->Open(); // process each BED entry and convert to an IGV request - while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (bed->GetNextBed(bedEntry)) { + if (bed->_status == BED_VALID) { string filename = bedEntry.chrom + "_" + ToString(bedEntry.start) + "_" + ToString(bedEntry.end); string locus = bedEntry.chrom + ":" + ToString(bedEntry.start - slop) + "-" + ToString(bedEntry.end + slop); diff --git a/src/closestBed/closestBed.cpp b/src/closestBed/closestBed.cpp index 735e2a9e..2cf6bd4f 100644 --- a/src/closestBed/closestBed.cpp +++ b/src/closestBed/closestBed.cpp @@ -215,10 +215,8 @@ void BedClosest::FindClosestBed() { _bedB->loadBedFileIntoMap(); BED a, nullBed; - int lineNum = 0; // current input line number vector<BED> hits; // vector of potential hits hits.reserve(100); - BedLineStatus bedStatus; _bedA->Open(); // report A's header first if asked. @@ -226,8 +224,8 @@ void BedClosest::FindClosestBed() { _bedA->PrintHeader(); } // process each entry in A in search of the closest feature in B - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bedA->GetNextBed(a)) { + if (_bedA->_status == BED_VALID) { FindWindowOverlaps(a, hits); hits.clear(); a = nullBed; diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp index b6163516..f3e71d52 100644 --- a/src/coverageBed/coverageBed.cpp +++ b/src/coverageBed/coverageBed.cpp @@ -51,14 +51,11 @@ void BedCoverage::CollectCoverageBed() { // that we can easily compare "A" to it for overlaps _bedB->loadBedCovFileIntoMap(); - int lineNum = 0; // current input line number BED a, nullBed; - BedLineStatus bedStatus; - _bedA->Open(); // process each entry in A - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bedA->GetNextBed(a)) { + if (_bedA->_status == BED_VALID) { // process the BED entry as a single block if (_obeySplits == false) _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly); @@ -66,7 +63,6 @@ void BedCoverage::CollectCoverageBed() { else { bedVector bedBlocks; splitBedIntoBlocks(a, bedBlocks); - // use countSplitHits to avoid over-counting each split chunk // as distinct read coverage. _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly); diff --git a/src/fastaFromBed/fastaFromBed.cpp b/src/fastaFromBed/fastaFromBed.cpp index 41ddebd8..ce7eb203 100644 --- a/src/fastaFromBed/fastaFromBed.cpp +++ b/src/fastaFromBed/fastaFromBed.cpp @@ -104,13 +104,11 @@ void Bed2Fa::ExtractDNA() { fr->open(_dbFile, memmap); BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; string sequence; _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(bed)) { + if (_bed->_status == BED_VALID) { // make sure we are extracting >= 1 bp if (bed.zeroLength == false) { size_t seqLength = fr->sequenceLength(bed.chrom); diff --git a/src/flankBed/flankBed.cpp b/src/flankBed/flankBed.cpp index f7e12495..14ef0ab5 100644 --- a/src/flankBed/flankBed.cpp +++ b/src/flankBed/flankBed.cpp @@ -41,9 +41,7 @@ BedFlank::~BedFlank(void) { void BedFlank::FlankBed() { - int lineNum = 0; BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; _bed->Open(); // report A's header first if asked. @@ -51,10 +49,8 @@ void BedFlank::FlankBed() { _bed->PrintHeader(); } - bedStatus = _bed->GetNextBed(bedEntry, lineNum); - while (bedStatus != BED_INVALID) { - if (bedStatus == BED_VALID) { - + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { int leftFlank = _leftFlank; int rightFlank = _rightFlank; if (_fractional == true) { @@ -70,9 +66,8 @@ void BedFlank::FlankBed() { { AddStrandedFlank(bedEntry, leftFlank, rightFlank); } - bedEntry = nullBed; } - bedStatus = _bed->GetNextBed(bedEntry, lineNum); + bedEntry = nullBed; } _bed->Close(); } diff --git a/src/genomeCoverageBed/genomeCoverageBed.cpp b/src/genomeCoverageBed/genomeCoverageBed.cpp index a97c8964..2b960273 100644 --- a/src/genomeCoverageBed/genomeCoverageBed.cpp +++ b/src/genomeCoverageBed/genomeCoverageBed.cpp @@ -146,21 +146,19 @@ void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks) { void BedGenomeCoverage::CoverageBed() { BED a, nullBed; - int lineNum = 0; // current input line number - BedLineStatus bedStatus; ResetChromCoverage(); _bed->Open(); - while ( (bedStatus = _bed->GetNextBed(a, lineNum)) != BED_INVALID ) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(a)) { + if (_bed->_status == BED_VALID) { if (_filterByStrand == true) { if (a.strand.empty()) { - cerr << "Input error: Interval is missing a strand value on line " << lineNum << "." <<endl; + cerr << "Input error: Interval is missing a strand value on line " << _bed->_lineNum << "." <<endl; exit(1); } if ( ! (a.strand == "-" || a.strand == "+") ) { - cerr << "Input error: Invalid strand value (" << a.strand << ") on line " << lineNum << "." << endl; + cerr << "Input error: Invalid strand value (" << a.strand << ") on line " << _bed->_lineNum << "." << endl; exit(1); } // skip if the strand is not what the user requested. diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index 6a48b880..735fd8d7 100644 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -199,7 +199,6 @@ void BedIntersect::IntersectBed() { vector<BED> hits; hits.reserve(100); BED a, nullBed; - BedLineStatus bedStatus; // open the "A" file, process each BED entry and searh for overlaps. _bedA->Open(); @@ -207,8 +206,8 @@ void BedIntersect::IntersectBed() { if (_printHeader == true) { _bedA->PrintHeader(); } - while ((bedStatus = _bedA->GetNextBed(a)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bedA->GetNextBed(a)) { + if (_bedA->_status == BED_VALID) { // treat the BED as a single "block" if (_obeySplits == false) { FindOverlaps(a, hits); @@ -233,7 +232,7 @@ void BedIntersect::IntersectBed() { } else { // use the chromsweep algorithm to detect overlaps on the fly. - ChromSweep sweep = ChromSweep(_bedA, _bedB, _sameStrand, _diffStrand); + ChromSweep sweep = ChromSweep(_bedA, _bedB, _sameStrand, _diffStrand, _printHeader); pair<BED, vector<BED> > hit_set; hit_set.second.reserve(10000); diff --git a/src/linksBed/linksBed.cpp b/src/linksBed/linksBed.cpp index 2adb5dea..b72372b1 100644 --- a/src/linksBed/linksBed.cpp +++ b/src/linksBed/linksBed.cpp @@ -101,13 +101,11 @@ void BedLinks::CreateLinks() { cout << "<table border=\"0\" align=\"justify\"" << endl; cout << "<h3>BED Entries from: stdin </h3>" << endl; - int lineNum = 0; - BED bedEntry, nullBed; - BedLineStatus bedStatus; + BED bedEntry, nullBed; _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { WriteURL(bedEntry, base); bedEntry = nullBed; } diff --git a/src/multiBamCov/multiBamCov.cpp b/src/multiBamCov/multiBamCov.cpp index 4c3c7396..b99774b5 100644 --- a/src/multiBamCov/multiBamCov.cpp +++ b/src/multiBamCov/multiBamCov.cpp @@ -58,15 +58,13 @@ void MultiCovBam::CollectCoverage() // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; _bed->Open(); // loop through each BED entry, jump to it, // and collect coverage from each BAM - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) + while (_bed->GetNextBed(bed)) { - if (bedStatus == BED_VALID) + if (_bed->_status == BED_VALID) { // initialize counts for each file to 0 vector<int> counts(_bam_files.size(), 0); diff --git a/src/nucBed/nucBed.cpp b/src/nucBed/nucBed.cpp index d5777864..f70e582b 100644 --- a/src/nucBed/nucBed.cpp +++ b/src/nucBed/nucBed.cpp @@ -111,13 +111,11 @@ void NucBed::ProfileDNA() { bool headerReported = false; BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; string sequence; _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(bed)) { + if (_bed->_status == BED_VALID) { if (headerReported == false) { PrintHeader(); headerReported = true; diff --git a/src/shuffleBed/shuffleBed.cpp b/src/shuffleBed/shuffleBed.cpp index 374459b0..48d22888 100644 --- a/src/shuffleBed/shuffleBed.cpp +++ b/src/shuffleBed/shuffleBed.cpp @@ -79,14 +79,10 @@ BedShuffle::~BedShuffle(void) { void BedShuffle::Shuffle() { - - int lineNum = 0; BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { ChooseLocus(bedEntry); _bed->reportBedNewLine(bedEntry); bedEntry = nullBed; @@ -99,13 +95,11 @@ void BedShuffle::Shuffle() { void BedShuffle::ShuffleWithExclusions() { - int lineNum = 0; BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { // keep looking as long as the chosen // locus happens to overlap with regions // that the user wishes to exclude. @@ -122,7 +116,7 @@ void BedShuffle::ShuffleWithExclusions() { if (tries > MAX_TRIES) { - cerr << "Error, line " << lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl; + cerr << "Error, line " << _bed->_lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl; } else { _bed->reportBedNewLine(bedEntry); @@ -136,13 +130,11 @@ void BedShuffle::ShuffleWithExclusions() { void BedShuffle::ShuffleWithInclusions() { - int lineNum = 0; BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { // choose a new locus ChooseLocusFromInclusionFile(bedEntry); _bed->reportBedNewLine(bedEntry); diff --git a/src/slopBed/slopBed.cpp b/src/slopBed/slopBed.cpp index 68eda5a2..76257e9c 100644 --- a/src/slopBed/slopBed.cpp +++ b/src/slopBed/slopBed.cpp @@ -40,19 +40,15 @@ BedSlop::~BedSlop(void) { void BedSlop::SlopBed() { - int lineNum = 0; BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; _bed->Open(); // report header first if asked. if (_printHeader == true) { _bed->PrintHeader(); - } - - bedStatus = _bed->GetNextBed(bedEntry, lineNum); - while (bedStatus != BED_INVALID) { - if (bedStatus == BED_VALID) { + } + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { if (_fractional == false) { AddSlop(bedEntry, (int) _leftSlop, (int) _rightSlop); } @@ -64,7 +60,6 @@ void BedSlop::SlopBed() { _bed->reportBedNewLine(bedEntry); bedEntry = nullBed; } - bedStatus = _bed->GetNextBed(bedEntry, lineNum); } _bed->Close(); } diff --git a/src/subtractBed/subtractBed.cpp b/src/subtractBed/subtractBed.cpp index e259787b..dded5da4 100644 --- a/src/subtractBed/subtractBed.cpp +++ b/src/subtractBed/subtractBed.cpp @@ -156,15 +156,13 @@ void BedSubtract::SubtractBed() { _bedB->loadBedFileIntoMap(); BED a, nullBed; - BedLineStatus bedStatus; - int lineNum = 0; // current input line number - vector<BED> hits; // vector of potential hits + vector<BED> hits; // reserve some space hits.reserve(100); _bedA->Open(); - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bedA->GetNextBed(a)) { + if (_bedA->_status == BED_VALID) { FindAndSubtractOverlaps(a, hits); hits.clear(); a = nullBed; diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index 2d59aa15..9fa075f2 100644 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -167,7 +167,7 @@ void BedFile::Seek(unsigned long offset) { // Jump to a specific byte in the file bool BedFile::Empty(void) { - return _bedStream->eof(); + return _status == BED_INVALID || _status == BED_BLANK; } // Close the BED file @@ -216,7 +216,7 @@ void BedFile::PrintHeader(void) { } -BedLineStatus BedFile::GetNextBed(BED &bed, bool forceSorted) { +bool BedFile::GetNextBed(BED &bed, bool forceSorted) { // make sure there are still lines to process. // if so, tokenize, validate and return the BED entry. @@ -233,17 +233,16 @@ BedLineStatus BedFile::GetNextBed(BED &bed, bool forceSorted) { _firstLine = false; } // load the BED struct as long as it's a valid BED entry. - BedLineStatus status = parseLine(bed, _bedFields); - if (!forceSorted) { - return status; - } - else if (status == BED_VALID) { + _status = parseLine(bed, _bedFields); + if (_status == BED_INVALID) return false; + + if (_status == BED_VALID) { if (bed.chrom == _prev_chrom) { if ((int) bed.start >= _prev_start) { _prev_chrom = bed.chrom; _prev_start = bed.start; } - else { + else if (forceSorted) { cerr << "ERROR: input file: (" << bedFile << ") is not sorted by chrom then start" << endl; exit(1); } @@ -252,15 +251,16 @@ BedLineStatus BedFile::GetNextBed(BED &bed, bool forceSorted) { _prev_chrom = bed.chrom; _prev_start = bed.start; } - return status; - } - else { - return status; + return true; } + else if (_status == BED_HEADER) {return true;} + // i.e, BED_BLANK + else {return false;} } // default if file is closed or EOF - return BED_INVALID; + _status = BED_INVALID; + return false; } @@ -268,10 +268,9 @@ bool BedFile::GetNextMergedBed(BED &merged_bed) { if (_bedStream->good()) { BED bed; - BedLineStatus bedStatus; // force sorting; hence third param = true - while ((bedStatus = GetNextBed(bed, true)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (GetNextBed(bed, true)) { + if (_status == BED_VALID) { if (((int) bed.start - _merged_end > 0) || (_merged_end < 0) || (bed.chrom != _merged_chrom)) @@ -300,7 +299,7 @@ bool BedFile::GetNextMergedBed(BED &merged_bed) { } } // handle the last merged block in the file. - if (bedStatus == BED_INVALID) + if (_status == BED_INVALID) { merged_bed.chrom = _merged_chrom; merged_bed.start = _merged_start; @@ -664,12 +663,10 @@ void BedFile::setBedType (int colNums) { void BedFile::loadBedFileIntoMap() { BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; Open(); - while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (GetNextBed(bedEntry)) { + if (_status == BED_VALID) { BIN bin = getBin(bedEntry.start, bedEntry.end); bedMap[bedEntry.chrom][bin].push_back(bedEntry); bedEntry = nullBed; @@ -682,12 +679,10 @@ void BedFile::loadBedFileIntoMap() { void BedFile::loadBedCovFileIntoMap() { BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; Open(); - while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (GetNextBed(bedEntry)) { + if (_status == BED_VALID) { BIN bin = getBin(bedEntry.start, bedEntry.end); BEDCOV bedCov; @@ -712,12 +707,10 @@ void BedFile::loadBedCovFileIntoMap() { void BedFile::loadBedCovListFileIntoMap() { BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; Open(); - while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (GetNextBed(bedEntry)) { + if (_status == BED_VALID) { BIN bin = getBin(bedEntry.start, bedEntry.end); BEDCOVLIST bedCovList; @@ -741,12 +734,10 @@ void BedFile::loadBedCovListFileIntoMap() { void BedFile::loadBedFileIntoMapNoBin() { BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - + Open(); - while ((bedStatus = this->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (GetNextBed(bedEntry)) { + if (_status == BED_VALID) { bedMapNoBin[bedEntry.chrom].push_back(bedEntry); bedEntry = nullBed; } diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 9cd78e4a..0206de8f 100644 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -427,7 +427,7 @@ public: void GetLine(void); // Get the next BED entry in an opened BED file. - BedLineStatus GetNextBed (BED &bed, bool forceSorted = false); + bool GetNextBed (BED &bed, bool forceSorted = false); // Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file // NOTE: assumes input file is sorted by chrom then start @@ -487,7 +487,9 @@ public: masterBedCovListMap bedCovListMap; masterBedMap bedMap; masterBedMapNoBin bedMapNoBin; - + + BedLineStatus _status; + int _lineNum; private: // data @@ -497,7 +499,7 @@ private: FileType _fileType; // what is the file type? (BED? GFF? VCF?) istream *_bedStream; string _bedLine; - int _lineNum; + string _header; bool _firstLine; vector<string> _bedFields; diff --git a/src/utils/chromsweep/chromsweep.cpp b/src/utils/chromsweep/chromsweep.cpp index 0ae73b59..70eac679 100644 --- a/src/utils/chromsweep/chromsweep.cpp +++ b/src/utils/chromsweep/chromsweep.cpp @@ -14,52 +14,43 @@ #include <queue> bool after(const BED &a, const BED &b); -void report_hits(const BED &curr_qy, const vector<BED> &hits); -vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); - /* // constructor using existing BedFile pointers */ -ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) -: _bedA(bedA) -, _bedB(bedB) +ChromSweep::ChromSweep(BedFile *query, BedFile *db, bool sameStrand, bool diffStrand, bool printHeader) +: _query(query) +, _db(db) , _sameStrand(sameStrand) , _diffStrand(diffStrand) { - // prime the results pump. - _qy_lineNum = 0; - _db_lineNum = 0; + _hits.reserve(100000); + _cache.reserve(100000); - _hits.reserve(1000); - _cache.reserve(1000); + _query->Open(); + if (printHeader) _query->PrintHeader(); + _db->Open(); - _bedA->Open(); - _bedB->Open(); - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + _query->GetNextBed(_curr_qy); + _db->GetNextBed(_curr_db); } /* Constructor with filenames */ -ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) +ChromSweep::ChromSweep(string &queryFile, string &dbFile) { - // prime the results pump. - _qy_lineNum = 0; - _db_lineNum = 0; - _hits.reserve(100000); _cache.reserve(100000); - _bedA = new BedFile(bedAFile); - _bedB = new BedFile(bedBFile); + _query = new BedFile(queryFile); + _db = new BedFile(dbFile); - _bedA->Open(); - _bedB->Open(); + _query->Open(); + _db->Open(); - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + _query->GetNextBed(_curr_qy); + _db->GetNextBed(_curr_db); } @@ -71,7 +62,7 @@ ChromSweep::~ChromSweep(void) { void ChromSweep::ScanCache() { - if (_qy_status != BED_INVALID) { + if (_query->_status != BED_INVALID) { vector<BED>::iterator c = _cache.begin(); while (c != _cache.end()) { @@ -92,14 +83,13 @@ void ChromSweep::ScanCache() { bool ChromSweep::ChromChange() { // the files are on the same chrom - if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { + if (_curr_qy.chrom == _curr_db.chrom) { return false; } // the query is ahead of the database. fast-forward the database to catch-up. else if (_curr_qy.chrom > _curr_db.chrom) { - while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) + while (_db->GetNextBed(_curr_db, true) && _curr_db.chrom < _curr_qy.chrom) { - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); } _cache.clear(); return false; @@ -119,7 +109,7 @@ bool ChromSweep::ChromChange() _results.push(make_pair(_curr_qy, _no_hits)); _cache.clear(); } - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _query->GetNextBed(_curr_qy, true); _curr_chrom = _curr_qy.chrom; return true; } @@ -146,26 +136,28 @@ bool ChromSweep::IsValidHit(const BED &query, const BED &db) { bool ChromSweep::Next(pair<BED, vector<BED> > &next) { - if (!_bedA->Empty()) { + if (!_query->Empty()) { // have we changed chromosomes? if (ChromChange() == false) { // scan the database cache for hits ScanCache(); // advance the db until we are ahead of the query. update hits and cache as necessary - while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) + while (!_db->Empty() && + _curr_qy.chrom == _curr_db.chrom && + !(after(_curr_db, _curr_qy))) { if (IsValidHit(_curr_qy, _curr_db)) { _hits.push_back(_curr_db); } _cache.push_back(_curr_db); - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + _db->GetNextBed(_curr_db, true); } // add the hits for this query to the pump _results.push(make_pair(_curr_qy, _hits)); // reset for the next query _hits.clear(); _curr_qy = _nullBed; - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _query->GetNextBed(_curr_qy, true); _curr_chrom = _curr_qy.chrom; } } diff --git a/src/utils/chromsweep/chromsweep.h b/src/utils/chromsweep/chromsweep.h index af5f6798..5f83a24a 100644 --- a/src/utils/chromsweep/chromsweep.h +++ b/src/utils/chromsweep/chromsweep.h @@ -30,10 +30,10 @@ public: // A is the query and B is the database // constructor using existing BedFile pointers - ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand = false, bool diffStrand = false); + ChromSweep(BedFile *query, BedFile *db, bool sameStrand = false, bool diffStrand = false, bool printHeader = false); // constructor using filenames - ChromSweep(string &bedAFile, string &bedBFile); + ChromSweep(string &queryFile, string &dbFile); // destructor ~ChromSweep(void); @@ -57,7 +57,7 @@ public: private: // instances of a bed file class. - BedFile *_bedA, *_bedB; + BedFile *_query, *_db; // do we care about strandedness. bool _sameStrand, _diffStrand; // a cache of still active features from the database file @@ -73,10 +73,6 @@ private: BED _curr_qy, _curr_db; // a cache of the current chrom from the query. used to handle chrom changes. string _curr_chrom; - // the current line status in the database and query files - BedLineStatus _qy_status, _db_status; - // the current line numbers in the database and query files - int _qy_lineNum, _db_lineNum; // private methods. private: diff --git a/src/windowBed/windowBed.cpp b/src/windowBed/windowBed.cpp index b93ea4ae..63d97745 100644 --- a/src/windowBed/windowBed.cpp +++ b/src/windowBed/windowBed.cpp @@ -129,9 +129,7 @@ void BedWindow::WindowIntersectBed() { _bedB->loadBedFileIntoMap(); BED a, nullBed; - int lineNum = 0; // current input line number - BedLineStatus bedStatus; - vector<BED> hits; // vector of potential hits + vector<BED> hits; hits.reserve(100); _bedA->Open(); @@ -139,8 +137,8 @@ void BedWindow::WindowIntersectBed() { if (_printHeader == true) { _bedA->PrintHeader(); } - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { + while (_bedA->GetNextBed(a)) { + if (_bedA->_status == BED_VALID) { FindWindowOverlaps(a, hits); hits.clear(); a = nullBed; -- GitLab