Skip to content
Snippets Groups Projects
Commit 5b55adb5 authored by arq5x's avatar arq5x
Browse files

[ENH] add -allowBeyondChromEnd feature to shuffle.

parent 0199576f
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ BIN_DIR = ../../bin/ ...@@ -7,7 +7,7 @@ BIN_DIR = ../../bin/
# ------------------- # -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/bedFilePE/ \ -I$(UTILITIES_DIR)/bedFilePE/ \
-I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/genomeFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/fileType/ \
......
...@@ -19,7 +19,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, ...@@ -19,7 +19,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile,
bool haveInclude, bool sameChrom, bool haveInclude, bool sameChrom,
float overlapFraction, int seed, float overlapFraction, int seed,
bool chooseChrom, bool isBedpe, size_t maxTries, bool chooseChrom, bool isBedpe, size_t maxTries,
bool noOverlapping) { bool noOverlapping, bool preventExceedingChromEnd)
{
_bedFile = bedFile; _bedFile = bedFile;
_genomeFile = genomeFile; _genomeFile = genomeFile;
...@@ -34,6 +35,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, ...@@ -34,6 +35,8 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile,
_isBedpe = isBedpe; _isBedpe = isBedpe;
_maxTries = maxTries; _maxTries = maxTries;
_noOverlapping = noOverlapping; _noOverlapping = noOverlapping;
_preventExceedingChromEnd = preventExceedingChromEnd;
// use the supplied seed for the random // use the supplied seed for the random
// number generation if given. else, // number generation if given. else,
...@@ -337,6 +340,14 @@ void BedShuffle::ChooseLocus(BED &bedEntry) { ...@@ -337,6 +340,14 @@ void BedShuffle::ChooseLocus(BED &bedEntry) {
bedEntry.start = location.second; bedEntry.start = location.second;
bedEntry.end = bedEntry.start + length; bedEntry.end = bedEntry.start + length;
chromSize = _genome->getChromSize(location.first); chromSize = _genome->getChromSize(location.first);
if ((bedEntry.end > chromSize) &&
(_preventExceedingChromEnd == false))
{
bedEntry.end = chromSize;
break;
}
} while (bedEntry.end > chromSize); } while (bedEntry.end > chromSize);
// keep looking if we have exceeded the end of the chrom. // keep looking if we have exceeded the end of the chrom.
} }
...@@ -361,6 +372,14 @@ void BedShuffle::ChooseLocus(BED &bedEntry) { ...@@ -361,6 +372,14 @@ void BedShuffle::ChooseLocus(BED &bedEntry) {
bedEntry.start = randomStart; bedEntry.start = randomStart;
bedEntry.end = randomStart + length; bedEntry.end = randomStart + length;
} }
if ((bedEntry.end > chromSize) &&
(_preventExceedingChromEnd == false))
{
bedEntry.end = chromSize;
break;
}
} while (bedEntry.end > chromSize); } while (bedEntry.end > chromSize);
} }
} }
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
******************************************************************************/ ******************************************************************************/
#include "bedFile.h" #include "bedFile.h"
#include "bedFilePE.h" #include "bedFilePE.h"
#include "GenomeFile.h" #include "genomeFile.h"
#include <vector> #include <vector>
#include <iostream> #include <iostream>
...@@ -39,7 +39,8 @@ public: ...@@ -39,7 +39,8 @@ public:
bool haveInclude, bool sameChrom, bool haveInclude, bool sameChrom,
float overlapFraction, int seed, float overlapFraction, int seed,
bool chooseChrom, bool isBedpe, bool chooseChrom, bool isBedpe,
size_t _maxTries, bool noOverlapping); size_t _maxTries, bool noOverlapping,
bool preventExceedingChromEnd);
// destructor // destructor
~BedShuffle(void); ~BedShuffle(void);
...@@ -60,6 +61,7 @@ private: ...@@ -60,6 +61,7 @@ private:
bool _isBedpe; bool _isBedpe;
size_t _maxTries; size_t _maxTries;
bool _noOverlapping; bool _noOverlapping;
bool _preventExceedingChromEnd;
// The BED file from which to compute coverage. // The BED file from which to compute coverage.
......
...@@ -47,6 +47,7 @@ int shuffle_main(int argc, char* argv[]) { ...@@ -47,6 +47,7 @@ int shuffle_main(int argc, char* argv[]) {
bool isBedpe = false; bool isBedpe = false;
size_t maxTries = 1000; size_t maxTries = 1000;
bool noOverlapping = false; bool noOverlapping = false;
bool allowBeyondChromEnd = false;
for(int i = 1; i < argc; i++) { for(int i = 1; i < argc; i++) {
...@@ -125,6 +126,9 @@ int shuffle_main(int argc, char* argv[]) { ...@@ -125,6 +126,9 @@ int shuffle_main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-noOverlapping", 14, parameterLength)) { else if(PARAMETER_CHECK("-noOverlapping", 14, parameterLength)) {
noOverlapping = true; noOverlapping = true;
} }
else if(PARAMETER_CHECK("-allowBeyondChromEnd", 20, parameterLength)) {
allowBeyondChromEnd = true;
}
else { else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true; showHelp = true;
...@@ -143,7 +147,8 @@ int shuffle_main(int argc, char* argv[]) { ...@@ -143,7 +147,8 @@ int shuffle_main(int argc, char* argv[]) {
haveInclude, sameChrom, haveInclude, sameChrom,
overlapFraction, seed, overlapFraction, seed,
chooseChrom, isBedpe, chooseChrom, isBedpe,
maxTries, noOverlapping); maxTries, noOverlapping,
!(allowBeyondChromEnd));
delete bc; delete bc;
return 0; return 0;
} }
...@@ -198,7 +203,14 @@ void shuffle_help(void) { ...@@ -198,7 +203,14 @@ void shuffle_help(void) {
cerr << "\t-maxTries\t" << "\n\t\tMax. number of attempts to find a home for a shuffled interval" << endl; cerr << "\t-maxTries\t" << "\n\t\tMax. number of attempts to find a home for a shuffled interval" << endl;
cerr << "\t\tin the presence of -incl or -excl." << endl; cerr << "\t\tin the presence of -incl or -excl." << endl;
cerr << "\t\tDefault = 1000." << endl; cerr << "\t\tDefault = 1000." << endl;
cerr << "\t-noOverlapping\t" << "\n\t\tDon't allow shuffled intervals to overlap." << endl; cerr << "\t-noOverlapping\t" << "\n\t\tDon't allow shuffled intervals to overlap." << endl;
cerr << "\t-allowBeyondChromEnd\t" << "\n\t\tAllow shuffled intervals to be relocated to a position" << endl;
cerr << "\t\tin which the entire original interval cannot fit w/o exceeding" << endl;
cerr << "\t\tthe end of the chromosome. In this case, the end coordinate of the" << endl;
cerr << "\t\tshuffled interval will be set to the chromosome's length." << endl;
cerr << "\t\tBy default, an interval's original length must be fully-contained" << endl;
cerr << "\t\twithin the chromosome." << endl;
cerr << "Notes: " << endl; cerr << "Notes: " << endl;
cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment