diff --git a/src/shuffleBed/Makefile b/src/shuffleBed/Makefile index a85f2df607778018ecec266f2dc574f9f806c558..2f945715cff8508e0167776f96e6b658064f4493 100644 --- a/src/shuffleBed/Makefile +++ b/src/shuffleBed/Makefile @@ -7,7 +7,7 @@ BIN_DIR = ../../bin/ # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/bedFilePE/ \ - -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/shuffleBed/shuffleBed.cpp b/src/shuffleBed/shuffleBed.cpp index 9e36bebb22d2fb54a2d1f75ad5e9071bcd2c77fb..518e23784478b2a79f1fbee1da069228ac679a2e 100644 --- a/src/shuffleBed/shuffleBed.cpp +++ b/src/shuffleBed/shuffleBed.cpp @@ -19,7 +19,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, bool haveInclude, bool sameChrom, float overlapFraction, int seed, bool chooseChrom, bool isBedpe, size_t maxTries, - bool noOverlapping) { + bool noOverlapping, bool preventExceedingChromEnd) +{ _bedFile = bedFile; _genomeFile = genomeFile; @@ -34,6 +35,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, _isBedpe = isBedpe; _maxTries = maxTries; _noOverlapping = noOverlapping; + _preventExceedingChromEnd = preventExceedingChromEnd; + // use the supplied seed for the random // number generation if given. else, @@ -337,6 +340,14 @@ void BedShuffle::ChooseLocus(BED &bedEntry) { bedEntry.start = location.second; bedEntry.end = bedEntry.start + length; chromSize = _genome->getChromSize(location.first); + + if ((bedEntry.end > chromSize) && + (_preventExceedingChromEnd == false)) + { + bedEntry.end = chromSize; + break; + } + } while (bedEntry.end > chromSize); // keep looking if we have exceeded the end of the chrom. } @@ -361,6 +372,14 @@ void BedShuffle::ChooseLocus(BED &bedEntry) { bedEntry.start = randomStart; bedEntry.end = randomStart + length; } + + if ((bedEntry.end > chromSize) && + (_preventExceedingChromEnd == false)) + { + bedEntry.end = chromSize; + break; + } + } while (bedEntry.end > chromSize); } } diff --git a/src/shuffleBed/shuffleBed.h b/src/shuffleBed/shuffleBed.h index e80b11ecfe629f2659ce9c7e5889c165b22ce213..a53f44f6cfc2bf9b5cd91c4048a46107dd3eeb22 100644 --- a/src/shuffleBed/shuffleBed.h +++ b/src/shuffleBed/shuffleBed.h @@ -11,7 +11,7 @@ ******************************************************************************/ #include "bedFile.h" #include "bedFilePE.h" -#include "GenomeFile.h" +#include "genomeFile.h" #include <vector> #include <iostream> @@ -39,7 +39,8 @@ public: bool haveInclude, bool sameChrom, float overlapFraction, int seed, bool chooseChrom, bool isBedpe, - size_t _maxTries, bool noOverlapping); + size_t _maxTries, bool noOverlapping, + bool preventExceedingChromEnd); // destructor ~BedShuffle(void); @@ -60,6 +61,7 @@ private: bool _isBedpe; size_t _maxTries; bool _noOverlapping; + bool _preventExceedingChromEnd; // The BED file from which to compute coverage. diff --git a/src/shuffleBed/shuffleBedMain.cpp b/src/shuffleBed/shuffleBedMain.cpp index 5acbef8855e2ffb9695b39d7127e718c8e8174bc..c53dad503d77397e22c4630155c0881436dfccd3 100644 --- a/src/shuffleBed/shuffleBedMain.cpp +++ b/src/shuffleBed/shuffleBedMain.cpp @@ -47,6 +47,7 @@ int shuffle_main(int argc, char* argv[]) { bool isBedpe = false; size_t maxTries = 1000; bool noOverlapping = false; + bool allowBeyondChromEnd = false; for(int i = 1; i < argc; i++) { @@ -125,6 +126,9 @@ int shuffle_main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-noOverlapping", 14, parameterLength)) { noOverlapping = true; } + else if(PARAMETER_CHECK("-allowBeyondChromEnd", 20, parameterLength)) { + allowBeyondChromEnd = true; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; @@ -143,7 +147,8 @@ int shuffle_main(int argc, char* argv[]) { haveInclude, sameChrom, overlapFraction, seed, chooseChrom, isBedpe, - maxTries, noOverlapping); + maxTries, noOverlapping, + !(allowBeyondChromEnd)); delete bc; return 0; } @@ -198,7 +203,14 @@ void shuffle_help(void) { cerr << "\t-maxTries\t" << "\n\t\tMax. number of attempts to find a home for a shuffled interval" << endl; cerr << "\t\tin the presence of -incl or -excl." << endl; cerr << "\t\tDefault = 1000." << endl; - cerr << "\t-noOverlapping\t" << "\n\t\tDon't allow shuffled intervals to overlap." << endl; + cerr << "\t-noOverlapping\t" << "\n\t\tDon't allow shuffled intervals to overlap." << endl; + + cerr << "\t-allowBeyondChromEnd\t" << "\n\t\tAllow shuffled intervals to be relocated to a position" << endl; + cerr << "\t\tin which the entire original interval cannot fit w/o exceeding" << endl; + cerr << "\t\tthe end of the chromosome. In this case, the end coordinate of the" << endl; + cerr << "\t\tshuffled interval will be set to the chromosome's length." << endl; + cerr << "\t\tBy default, an interval's original length must be fully-contained" << endl; + cerr << "\t\twithin the chromosome." << endl; cerr << "Notes: " << endl; cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl;