diff --git a/src/shuffleBed/shuffleBed.cpp b/src/shuffleBed/shuffleBed.cpp index d0a39a3cf3fa4ad2dc58de0b4dcaf07479db1a06..9e36bebb22d2fb54a2d1f75ad5e9071bcd2c77fb 100644 --- a/src/shuffleBed/shuffleBed.cpp +++ b/src/shuffleBed/shuffleBed.cpp @@ -18,7 +18,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, float overlapFraction, int seed, - bool chooseChrom, bool isBedpe, size_t maxTries) { + bool chooseChrom, bool isBedpe, size_t maxTries, + bool noOverlapping) { _bedFile = bedFile; _genomeFile = genomeFile; @@ -32,7 +33,7 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, _chooseChrom = chooseChrom; _isBedpe = isBedpe; _maxTries = maxTries; - + _noOverlapping = noOverlapping; // use the supplied seed for the random // number generation if given. else, @@ -61,6 +62,12 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, _exclude = new BedFile(excludeFile); _exclude->loadBedFileIntoMap(); } + else if (_noOverlapping) { + // create an empty map that we add to as we iterate. + _exclude = new BedFile(); + // force down correct code-path. + _haveExclude = true; + } if (_haveInclude) { _include = new BedFile(includeFile); @@ -99,6 +106,9 @@ void BedShuffle::Shuffle() { while (_bed->GetNextBed(bedEntry)) { if (_bed->_status == BED_VALID) { ChooseLocus(bedEntry); + if(_noOverlapping){ + _exclude->addBEDIntoMap(bedEntry); + } _bed->reportBedNewLine(bedEntry); } } @@ -150,7 +160,7 @@ void BedShuffle::ShuffleWithExclusions() { _overlapFraction, false); tries++; } while ((haveOverlap == true) && (tries <= _maxTries)); - + if (tries > _maxTries) { cerr << "Error, line " << _bed->_lineNum @@ -159,6 +169,10 @@ void BedShuffle::ShuffleWithExclusions() { << "excluded regions. Ignoring entry and moving on." << endl; } else { + if(_noOverlapping){ + // future entries cannot overlap this one + _exclude->addBEDIntoMap(bedEntry); + } _bed->reportBedNewLine(bedEntry); } } @@ -237,8 +251,15 @@ void BedShuffle::ShuffleWithInclusions() { tries++; } while ((bedEntry.end > chromSize) && (tries <= _maxTries)); - - _bed->reportBedNewLine(bedEntry); + if (tries > _maxTries) { + cerr << "Error, line " << _bed->_lineNum + << ": tried " << _maxTries + << " potential loci for entry, but could not avoid " + << "excluded regions. Ignoring entry and moving on." + << endl; } + else { + _bed->reportBedNewLine(bedEntry); + } } } _bed->Close(); @@ -280,6 +301,9 @@ void BedShuffle::ShuffleWithInclusionsAndExclusions() { << endl; } else { _bed->reportBedNewLine(bedEntry); + if (_noOverlapping){ + _exclude->addBEDIntoMap(bedEntry); + } } } } diff --git a/src/shuffleBed/shuffleBed.h b/src/shuffleBed/shuffleBed.h index 61dc625a90fb6a53743e9ce0909a8a1a74796c6b..2623c174106ab75e2355947069659a0856a40b2c 100644 --- a/src/shuffleBed/shuffleBed.h +++ b/src/shuffleBed/shuffleBed.h @@ -39,7 +39,7 @@ public: bool haveInclude, bool sameChrom, float overlapFraction, int seed, bool chooseChrom, bool isBedpe, - size_t _maxTries); + size_t _maxTries, bool noOverlapping); // destructor ~BedShuffle(void); @@ -59,6 +59,7 @@ private: bool _chooseChrom; bool _isBedpe; size_t _maxTries; + bool _noOverlapping; // The BED file from which to compute coverage. diff --git a/src/shuffleBed/shuffleBedMain.cpp b/src/shuffleBed/shuffleBedMain.cpp index 2a7fea25a57b6c6e1be669dcd5d652406bb95984..cea9e4d5e85cbf77f3031970a6644be292fd15dd 100644 --- a/src/shuffleBed/shuffleBedMain.cpp +++ b/src/shuffleBed/shuffleBedMain.cpp @@ -46,6 +46,7 @@ int shuffle_main(int argc, char* argv[]) { bool chooseChrom = false; bool isBedpe = false; size_t maxTries = 1000; + bool noOverlapping = false; for(int i = 1; i < argc; i++) { @@ -121,6 +122,9 @@ int shuffle_main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) { isBedpe = true; } + else if(PARAMETER_CHECK("-noOverlapping", 14, parameterLength)) { + noOverlapping = true; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; @@ -139,7 +143,7 @@ int shuffle_main(int argc, char* argv[]) { haveInclude, sameChrom, overlapFraction, seed, chooseChrom, isBedpe, - maxTries); + maxTries, noOverlapping); delete bc; return 0; } @@ -194,6 +198,7 @@ void shuffle_help(void) { cerr << "\t-maxTries\t" << "\n\t\tMax. number of attempts to find a home for a shuffled interval" << endl; cerr << "\t\tin the presence of -incl or -excl." << endl; cerr << "\t\tDefault = 1000." << endl; + cerr << "\t-noOverlapping\t" << "\n\t\tdont allow shuffled intervals to overlap." << endl; cerr << "Notes: " << endl; cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index 521f43d97ce0c937d0febdc208cbb648c7b13bb9..ea993affada562802248ebfeee62c74f431b963f 100644 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -83,6 +83,18 @@ BedFile::BedFile(string &bedFile) _total_length(0) {} +BedFile::BedFile(void) +: _isGff(false), + _isVcf(false), + _typeIsKnown(false), + _merged_start(-1), + _merged_end(-1), + _merged_chrom(""), + _prev_start(-1), + _prev_chrom(""), + _total_length(0) +{} + // Destructor BedFile::~BedFile(void) { } @@ -656,6 +668,11 @@ void BedFile::loadBedFileIntoMap() { Close(); } +void BedFile::addBEDIntoMap(BED bedEntry) { + BIN bin = getBin(bedEntry.start, bedEntry.end); + bedMap[bedEntry.chrom][bin].push_back(bedEntry); +} + void BedFile::loadBedCovFileIntoMap() { diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 029b7e98837d3ee577b7a205e5ec3d3d0325d6f9..37d8dda799ce0e6110eda3ec04be230dda2637f0 100644 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -380,6 +380,7 @@ public: // Constructor BedFile(string &); + BedFile(void); // Destructor ~BedFile(void); @@ -418,6 +419,9 @@ public: // vector of BEDs void loadBedFileIntoMap(); + // load a BED entry into and existing map + void addBEDIntoMap(BED bedEntry); + // load a BED file into a map keyed by chrom, then bin. value is // vector of BEDCOVs void loadBedCovFileIntoMap();