diff --git a/src/shuffleBed/shuffleBed.cpp b/src/shuffleBed/shuffleBed.cpp index 8a3074985e9e5acdd7677c925edbef9db3dbe910..cad7a98af3464de6d3d0c40e2ed5d6e0fb50ad92 100644 --- a/src/shuffleBed/shuffleBed.cpp +++ b/src/shuffleBed/shuffleBed.cpp @@ -78,6 +78,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, ShuffleWithExclusions(); else if (_haveExclude == false && _haveInclude == true) ShuffleWithInclusions(); + else if (_haveExclude == true && _haveInclude == true) + ShuffleWithInclusionsAndExclusions(); else Shuffle(); } @@ -108,7 +110,8 @@ void BedShuffle::Shuffle() { BEDPE bedEntry, nullBedPE; _bedpe->Open(); - while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) + while ((status = + _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) { if (status == BED_VALID) { ChoosePairedLocus(bedEntry); @@ -168,7 +171,8 @@ void BedShuffle::ShuffleWithExclusions() { BEDPE bedEntry; _bedpe->Open(); - while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) + while ((status = + _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) { if (status == BED_VALID) { // keep looking as long as the chosen @@ -232,6 +236,48 @@ void BedShuffle::ShuffleWithInclusions() { } +void BedShuffle::ShuffleWithInclusionsAndExclusions() { + + BED bedEntry; // used to store the current BED line from the BED file. + + _bed->Open(); + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { + + // keep looking as long as the chosen + // locus happens to overlap with regions + // that the user wishes to exclude. + int tries = 0; + bool haveOverlap = false; + do + { + // choose a new locus + ChooseLocusFromInclusionFile(bedEntry); + + haveOverlap = _exclude->anyHits(bedEntry.chrom, + bedEntry.start, + bedEntry.end, + bedEntry.strand, + false, false, + _overlapFraction, false); + tries++; + } while ((haveOverlap == true) && (tries <= MAX_TRIES)); + + + if (tries > MAX_TRIES) { + cerr << "Error, line " << _bed->_lineNum + << ": tried " << MAX_TRIES + << " potential loci for entry, but could not avoid " + << "excluded regions. Ignoring entry and moving on." + << endl; } + else { + _bed->reportBedNewLine(bedEntry); + } + } + } + _bed->Close(); +} + void BedShuffle::ChooseLocus(BED &bedEntry) { string randomChrom; diff --git a/src/shuffleBed/shuffleBed.h b/src/shuffleBed/shuffleBed.h index 648a34d4c5b474e77486f13a1b667e2a86b9db7b..a2b8d3951f8860e5a762c843a679c9d7addc68b3 100644 --- a/src/shuffleBed/shuffleBed.h +++ b/src/shuffleBed/shuffleBed.h @@ -79,6 +79,7 @@ private: void Shuffle(); void ShuffleWithExclusions(); void ShuffleWithInclusions(); + void ShuffleWithInclusionsAndExclusions(); void ChooseLocus(BED &); void ChooseLocusFromInclusionFile(BED &); diff --git a/src/shuffleBed/shuffleBedMain.cpp b/src/shuffleBed/shuffleBedMain.cpp index 8edb6a40b28e505adffd76124e2ea5f3589bef90..13b9d3390d4a45787060f9d15307c63b4a380715 100644 --- a/src/shuffleBed/shuffleBedMain.cpp +++ b/src/shuffleBed/shuffleBedMain.cpp @@ -125,11 +125,13 @@ int shuffle_main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; showHelp = true; } - + + /* if (haveInclude && haveExclude) { cerr << endl << "*****" << endl << "*****ERROR: Cannot use -incl and -excl together." << endl << "*****" << endl; showHelp = true; } + */ if (!showHelp) { BedShuffle *bc = new BedShuffle(bedFile, genomeFile, excludeFile,