From 1d75b2e610f32d90bcb22298d0a138e8f70edad5 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Sun, 25 Nov 2012 21:34:29 -0500 Subject: [PATCH] first cut at allowing -incl and -excl --- src/shuffleBed/shuffleBed.cpp | 50 +++++++++++++++++++++++++++++-- src/shuffleBed/shuffleBed.h | 1 + src/shuffleBed/shuffleBedMain.cpp | 4 ++- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/shuffleBed/shuffleBed.cpp b/src/shuffleBed/shuffleBed.cpp index 8a307498..cad7a98a 100644 --- a/src/shuffleBed/shuffleBed.cpp +++ b/src/shuffleBed/shuffleBed.cpp @@ -78,6 +78,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, ShuffleWithExclusions(); else if (_haveExclude == false && _haveInclude == true) ShuffleWithInclusions(); + else if (_haveExclude == true && _haveInclude == true) + ShuffleWithInclusionsAndExclusions(); else Shuffle(); } @@ -108,7 +110,8 @@ void BedShuffle::Shuffle() { BEDPE bedEntry, nullBedPE; _bedpe->Open(); - while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) + while ((status = + _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) { if (status == BED_VALID) { ChoosePairedLocus(bedEntry); @@ -168,7 +171,8 @@ void BedShuffle::ShuffleWithExclusions() { BEDPE bedEntry; _bedpe->Open(); - while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) + while ((status = + _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) { if (status == BED_VALID) { // keep looking as long as the chosen @@ -232,6 +236,48 @@ void BedShuffle::ShuffleWithInclusions() { } +void BedShuffle::ShuffleWithInclusionsAndExclusions() { + + BED bedEntry; // used to store the current BED line from the BED file. + + _bed->Open(); + while (_bed->GetNextBed(bedEntry)) { + if (_bed->_status == BED_VALID) { + + // keep looking as long as the chosen + // locus happens to overlap with regions + // that the user wishes to exclude. + int tries = 0; + bool haveOverlap = false; + do + { + // choose a new locus + ChooseLocusFromInclusionFile(bedEntry); + + haveOverlap = _exclude->anyHits(bedEntry.chrom, + bedEntry.start, + bedEntry.end, + bedEntry.strand, + false, false, + _overlapFraction, false); + tries++; + } while ((haveOverlap == true) && (tries <= MAX_TRIES)); + + + if (tries > MAX_TRIES) { + cerr << "Error, line " << _bed->_lineNum + << ": tried " << MAX_TRIES + << " potential loci for entry, but could not avoid " + << "excluded regions. Ignoring entry and moving on." + << endl; } + else { + _bed->reportBedNewLine(bedEntry); + } + } + } + _bed->Close(); +} + void BedShuffle::ChooseLocus(BED &bedEntry) { string randomChrom; diff --git a/src/shuffleBed/shuffleBed.h b/src/shuffleBed/shuffleBed.h index 648a34d4..a2b8d395 100644 --- a/src/shuffleBed/shuffleBed.h +++ b/src/shuffleBed/shuffleBed.h @@ -79,6 +79,7 @@ private: void Shuffle(); void ShuffleWithExclusions(); void ShuffleWithInclusions(); + void ShuffleWithInclusionsAndExclusions(); void ChooseLocus(BED &); void ChooseLocusFromInclusionFile(BED &); diff --git a/src/shuffleBed/shuffleBedMain.cpp b/src/shuffleBed/shuffleBedMain.cpp index 8edb6a40..13b9d339 100644 --- a/src/shuffleBed/shuffleBedMain.cpp +++ b/src/shuffleBed/shuffleBedMain.cpp @@ -125,11 +125,13 @@ int shuffle_main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; showHelp = true; } - + + /* if (haveInclude && haveExclude) { cerr << endl << "*****" << endl << "*****ERROR: Cannot use -incl and -excl together." << endl << "*****" << endl; showHelp = true; } + */ if (!showHelp) { BedShuffle *bc = new BedShuffle(bedFile, genomeFile, excludeFile, -- GitLab