Skip to content
Snippets Groups Projects
Commit 4f951770 authored by Aaron's avatar Aaron
Browse files

Added new -sorted option to intersectBed. No memory and filthy fast.

parent 0eb0f045
No related branches found
No related tags found
No related merge requests found
...@@ -18,7 +18,6 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ ...@@ -18,7 +18,6 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/bedToBam \ $(SRC_DIR)/bedToBam \
$(SRC_DIR)/bedToIgv \ $(SRC_DIR)/bedToIgv \
$(SRC_DIR)/bed12ToBed6 \ $(SRC_DIR)/bed12ToBed6 \
$(SRC_DIR)/chromsweep \
$(SRC_DIR)/closestBed \ $(SRC_DIR)/closestBed \
$(SRC_DIR)/complementBed \ $(SRC_DIR)/complementBed \
$(SRC_DIR)/coverageBed \ $(SRC_DIR)/coverageBed \
...@@ -45,6 +44,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ ...@@ -45,6 +44,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \ UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \
$(SRC_DIR)/utils/bedFile \ $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/bedGraphFile \ $(SRC_DIR)/utils/bedGraphFile \
$(SRC_DIR)/utils/chromsweep \
$(SRC_DIR)/utils/gzstream \ $(SRC_DIR)/utils/gzstream \
$(SRC_DIR)/utils/fileType \ $(SRC_DIR)/utils/fileType \
$(SRC_DIR)/utils/bedFilePE \ $(SRC_DIR)/utils/bedFilePE \
......
/*****************************************************************************
chromsweepMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "chromsweep.h"
#include "version.h"
using namespace std;
// define our program name
#define PROGRAM_NAME "chromsweep"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void ShowHelp(void);
int main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string bedAFile;
string bedBFile;
// input arguments
float overlapFraction = 1E-9;
bool haveBedA = false;
bool haveBedB = false;
bool noHit = false;
bool anyHit = false;
bool writeA = false;
bool writeB = false;
bool writeCount = false;
bool writeOverlap = false;
bool writeAllOverlap = false;
bool haveFraction = false;
bool reciprocalFraction = false;
bool forceStrand = false;
bool obeySplits = false;
bool inputIsBam = false;
bool outputIsBam = true;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) ShowHelp();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-a", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
outputIsBam = false;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
inputIsBam = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;
bedBFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
outputIsBam = false;
}
else if(PARAMETER_CHECK("-u", 2, parameterLength)) {
anyHit = true;
}
else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
if ((i+1) < argc) {
haveFraction = true;
overlapFraction = atof(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {
writeA = true;
}
else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {
writeB = true;
}
else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {
writeOverlap = true;
}
else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {
writeAllOverlap = true;
writeOverlap = true;
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
writeCount = true;
}
else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
reciprocalFraction = true;
}
else if (PARAMETER_CHECK("-v", 2, parameterLength)) {
noHit = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
obeySplits = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBedA || !haveBedB) {
cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && noHit) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -v, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeB && writeCount) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeCount && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeA && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wa OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeB && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (reciprocalFraction && !haveFraction) {
cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeCount) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeB) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wb, not both." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
ChromSweep *sweep = new ChromSweep(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
writeAllOverlap, overlapFraction, noHit, writeCount, forceStrand,
reciprocalFraction, obeySplits, inputIsBam, outputIsBam);
pair<BED, vector<BED> > hit_set;
while (sweep->Next(hit_set)) {
sweep->ReportQuery(hit_set.first);
cout << hit_set.second.size() << "\n";
}
delete sweep;
return 0;
}
else {
ShowHelp();
}
}
void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "Summary: Report overlaps between two feature files." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-abam\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;
cerr << "\t-bed\t" << "When using BAM input (-abam), write output as BED. The default" << endl;
cerr << "\t\tis to write output in BAM when using -abam." << endl << endl;
cerr << "\t-wa\t" << "Write the original entry in A for each overlap." << endl << endl;
cerr << "\t-wb\t" << "Write the original entry in B for each overlap." << endl;
cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;
cerr << "\t-wo\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl;
cerr << "\t\t Only A features with overlap are reported." << endl << endl;
cerr << "\t-wao\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlapping features restricted by -f and -r." << endl;
cerr << "\t\t However, A features w/o overlap are also reported" << endl;
cerr << "\t\t with a NULL B feature and overlap = 0." << endl << endl;
cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-c\t" << "For each entry in A, report the number of overlaps with B." << endl;
cerr << "\t\t- Reports 0 for A entries that have no overlap with B." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;
cerr << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl;
cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl;
cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl;
cerr << "\t-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;
cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;
cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only report hits in B that" << endl;
cerr << "\t\toverlap A on the same strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
// end the program here
exit(1);
}
...@@ -12,13 +12,15 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ ...@@ -12,13 +12,15 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools-Ancillary -I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/chromsweep \
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= intersectMain.cpp intersectBed.cpp SOURCES= intersectMain.cpp intersectBed.cpp
OBJECTS= $(SOURCES:.cpp=.o) OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o _EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o chromsweep.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= intersectBed PROGRAM= intersectBed
...@@ -42,6 +44,7 @@ $(EXT_OBJECTS): ...@@ -42,6 +44,7 @@ $(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/chromsweep/
clean: clean:
@echo "Cleaning up." @echo "Cleaning up."
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
/************************************ /************************************
Helper functions Helper functions
************************************/ ************************************/
bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool printable) { bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {
// how many overlaps are there b/w the bed and the set of hits? // how many overlaps are there b/w the bed and the set of hits?
int s, e, overlapBases; int s, e, overlapBases;
...@@ -37,7 +37,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print ...@@ -37,7 +37,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print
if (_reciprocal == false) { if (_reciprocal == false) {
hitsFound = true; hitsFound = true;
numOverlaps++; numOverlaps++;
if (printable == true) if (_printable == true)
ReportOverlapDetail(overlapBases, a, *h, s, e); ReportOverlapDetail(overlapBases, a, *h, s, e);
} }
// we require there to be sufficient __reciprocal__ overlap // we require there to be sufficient __reciprocal__ overlap
...@@ -47,7 +47,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print ...@@ -47,7 +47,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print
if (bOverlap >= _overlapFraction) { if (bOverlap >= _overlapFraction) {
hitsFound = true; hitsFound = true;
numOverlaps++; numOverlaps++;
if (printable == true) if (_printable == true)
ReportOverlapDetail(overlapBases, a, *h, s, e); ReportOverlapDetail(overlapBases, a, *h, s, e);
} }
} }
...@@ -66,7 +66,8 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print ...@@ -66,7 +66,8 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print
BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit, BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand, float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam) { bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
bool sortedInput) {
_bedAFile = bedAFile; _bedAFile = bedAFile;
_bedBFile = bedBFile; _bedBFile = bedBFile;
...@@ -85,11 +86,13 @@ BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit, ...@@ -85,11 +86,13 @@ BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
_bamInput = bamInput; _bamInput = bamInput;
_bamOutput = bamOutput; _bamOutput = bamOutput;
_isUncompressedBam = isUncompressedBam; _isUncompressedBam = isUncompressedBam;
_sortedInput = sortedInput;
// create new BED file objects for A and B // should we print each overlap, or does the user want summary information?
_bedA = new BedFile(bedAFile); _printable = true;
_bedB = new BedFile(bedBFile); if (_anyHit || _noHit || _writeCount)
_printable = false;
if (_bamInput == false) if (_bamInput == false)
IntersectBed(); IntersectBed();
else else
...@@ -108,14 +111,9 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) { ...@@ -108,14 +111,9 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
bool hitsFound = false; bool hitsFound = false;
// should we print each overlap, or does the user want summary information?
bool printable = true;
if (_anyHit || _noHit || _writeCount)
printable = false;
// collect and report the sufficient hits // collect and report the sufficient hits
_bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
hitsFound = processHits(a, hits, printable); hitsFound = processHits(a, hits);
return hitsFound; return hitsFound;
} }
...@@ -190,42 +188,61 @@ bool BedIntersect::FindOneOrMoreOverlap(const BED &a) { ...@@ -190,42 +188,61 @@ bool BedIntersect::FindOneOrMoreOverlap(const BED &a) {
void BedIntersect::IntersectBed() { void BedIntersect::IntersectBed() {
// load the "B" file into a map in order to // create new BED file objects for A and B
// compare each entry in A to it in search of overlaps. _bedA = new BedFile(_bedAFile);
_bedB->loadBedFileIntoMap(); _bedB = new BedFile(_bedBFile);
int lineNum = 0; if (_sortedInput == false) {
vector<BED> hits; // load the "B" file into a map in order to
hits.reserve(100); // compare each entry in A to it in search of overlaps.
BED a, nullBed; _bedB->loadBedFileIntoMap();
BedLineStatus bedStatus;
int lineNum = 0;
// open the "A" file, process each BED entry and searh for overlaps. vector<BED> hits;
_bedA->Open(); hits.reserve(100);
while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { BED a, nullBed;
if (bedStatus == BED_VALID) { BedLineStatus bedStatus;
// treat the BED as a single "block"
if (_obeySplits == false) { // open the "A" file, process each BED entry and searh for overlaps.
FindOverlaps(a, hits); _bedA->Open();
hits.clear(); while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
a = nullBed; if (bedStatus == BED_VALID) {
} // treat the BED as a single "block"
// split the BED12 into blocks and look for overlaps in each discrete block if (_obeySplits == false) {
else { FindOverlaps(a, hits);
bedVector bedBlocks; // vec to store the discrete BED "blocks"
splitBedIntoBlocks(a, lineNum, bedBlocks);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
FindOverlaps(*bedItr, hits);
hits.clear(); hits.clear();
a = nullBed;
}
// split the BED12 into blocks and look for overlaps in each discrete block
else {
bedVector bedBlocks; // vec to store the discrete BED "blocks"
splitBedIntoBlocks(a, lineNum, bedBlocks);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
FindOverlaps(*bedItr, hits);
hits.clear();
}
a = nullBed;
} }
a = nullBed;
} }
} }
_bedA->Close();
}
else {
// use the chromsweep algorithm to detect overlaps on the fly.
ChromSweep sweep = ChromSweep(_bedA, _bedB, _anyHit, _writeA, _writeB, _writeOverlap,
_writeAllOverlap, _overlapFraction, _noHit, _writeCount, _sameStrand, _diffStrand,
_reciprocal, _obeySplits, _bamInput, _bamOutput);
// hit_set.first = current entry (a) from A
// hit_set.second = list of hits in B for this a
pair<BED, vector<BED> > hit_set;
while (sweep.Next(hit_set)) {
processHits(hit_set.first, hit_set.second);
}
} }
_bedA->Close();
} }
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#define INTERSECTBED_H #define INTERSECTBED_H
#include "bedFile.h" #include "bedFile.h"
#include "chromsweep.h"
#include "api/BamReader.h" #include "api/BamReader.h"
#include "api/BamWriter.h" #include "api/BamWriter.h"
#include "api/BamAux.h" #include "api/BamAux.h"
...@@ -37,7 +37,8 @@ public: ...@@ -37,7 +37,8 @@ public:
BedIntersect(string bedAFile, string bedBFile, bool anyHit, BedIntersect(string bedAFile, string bedBFile, bool anyHit,
bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand, float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam); bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
bool sortedInput);
// destructor // destructor
~BedIntersect(void); ~BedIntersect(void);
...@@ -67,7 +68,9 @@ private: ...@@ -67,7 +68,9 @@ private:
bool _bamInput; bool _bamInput;
bool _bamOutput; bool _bamOutput;
bool _isUncompressedBam; bool _isUncompressedBam;
bool _sortedInput;
bool _printable;
// instance of a bed file class. // instance of a bed file class.
BedFile *_bedA, *_bedB; BedFile *_bedA, *_bedB;
...@@ -80,7 +83,7 @@ private: ...@@ -80,7 +83,7 @@ private:
void IntersectBam(string bamFile); void IntersectBam(string bamFile);
bool processHits(const BED &a, const vector<BED> &hits, bool printable); bool processHits(const BED &a, const vector<BED> &hits);
bool FindOverlaps(const BED &a, vector<BED> &hits); bool FindOverlaps(const BED &a, vector<BED> &hits);
......
...@@ -53,6 +53,7 @@ int main(int argc, char* argv[]) { ...@@ -53,6 +53,7 @@ int main(int argc, char* argv[]) {
bool inputIsBam = false; bool inputIsBam = false;
bool outputIsBam = true; bool outputIsBam = true;
bool uncompressedBam = false; bool uncompressedBam = false;
bool sortedInput = false;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -142,6 +143,9 @@ int main(int argc, char* argv[]) { ...@@ -142,6 +143,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) { else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {
uncompressedBam = true; uncompressedBam = true;
} }
else if(PARAMETER_CHECK("-sorted", 7, parameterLength)) {
sortedInput = true;
}
else { else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true; showHelp = true;
...@@ -208,7 +212,7 @@ int main(int argc, char* argv[]) { ...@@ -208,7 +212,7 @@ int main(int argc, char* argv[]) {
BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap, BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
writeAllOverlap, overlapFraction, noHit, writeCount, sameStrand, diffStrand, writeAllOverlap, overlapFraction, noHit, writeCount, sameStrand, diffStrand,
reciprocalFraction, obeySplits, inputIsBam, outputIsBam, uncompressedBam); reciprocalFraction, obeySplits, inputIsBam, outputIsBam, uncompressedBam, sortedInput);
delete bi; delete bi;
return 0; return 0;
} }
...@@ -281,6 +285,8 @@ void ShowHelp(void) { ...@@ -281,6 +285,8 @@ void ShowHelp(void) {
cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl; cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
cerr << "\t-sorted\t" << "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input" << endl << endl;
// end the program here // end the program here
exit(1); exit(1);
......
UTILITIES_DIR = ../utils/ OBJ_DIR = ../../../obj/
OBJ_DIR = ../../obj/ BIN_DIR = ../../../bin/
BIN_DIR = ../../bin/ UTILITIES_DIR = ../../utils/
# ------------------- # -------------------
# define our includes # define our includes
# ------------------- # -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/version/ \
-I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/fileType/ -I$(UTILITIES_DIR)/fileType/
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= chromsweepMain.cpp chromsweep.cpp SOURCES= chromsweep.cpp
OBJECTS= $(SOURCES:.cpp=.o) OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o _EXT_OBJECTS=lineFileUtilities.o fileType.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= chromsweep
all: $(PROGRAM)
.PHONY: all
$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
@echo " * linking $(PROGRAM)"
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
$(BUILT_OBJECTS): $(SOURCES) $(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp @echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
$(EXT_OBJECTS): $(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ @$(MAKE) --no-print-directory -C $(INCLUDES)
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
clean: clean:
@echo "Cleaning up." @echo "Cleaning up."
@rm -f $(OBJ_DIR)/* $(BIN_DIR)/* @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
.PHONY: clean .PHONY: clean
\ No newline at end of file
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#include "lineFileUtilities.h" #include "lineFileUtilities.h"
#include "chromsweep.h" #include "chromsweep.h"
#include <queue> #include <queue>
#include <set>
bool after(const BED &a, const BED &b); bool after(const BED &a, const BED &b);
void report_hits(const BED &curr_qy, const vector<BED> &hits); void report_hits(const BED &curr_qy, const vector<BED> &hits);
...@@ -22,13 +21,13 @@ vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector ...@@ -22,13 +21,13 @@ vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector
/* /*
Constructor Constructor
*/ */
ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit, ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool anyHit,
bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
float overlapFraction, bool noHit, bool writeCount, bool forceStrand, float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) { bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {
_bedAFile = bedAFile; _bedA = bedA;
_bedBFile = bedBFile; _bedB = bedB;
_anyHit = anyHit; _anyHit = anyHit;
_noHit = noHit; _noHit = noHit;
_writeA = writeA; _writeA = writeA;
...@@ -37,20 +36,12 @@ ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit, ...@@ -37,20 +36,12 @@ ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit,
_writeAllOverlap = writeAllOverlap; _writeAllOverlap = writeAllOverlap;
_writeCount = writeCount; _writeCount = writeCount;
_overlapFraction = overlapFraction; _overlapFraction = overlapFraction;
_forceStrand = forceStrand; _sameStrand = sameStrand;
_diffStrand = diffStrand;
_reciprocal = reciprocal; _reciprocal = reciprocal;
_obeySplits = obeySplits; _obeySplits = obeySplits;
_bamInput = bamInput; _bamInput = bamInput;
_bamOutput = bamOutput; _bamOutput = bamOutput;
if (_anyHit || _noHit || _writeCount)
_printable = false;
else
_printable = true;
// create new BED file objects for A and B
_bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile);
// prime the results pump. // prime the results pump.
_qy_lineNum = 0; _qy_lineNum = 0;
......
...@@ -13,13 +13,6 @@ ...@@ -13,13 +13,6 @@
#define CHROMSWEEP_H #define CHROMSWEEP_H
#include "bedFile.h" #include "bedFile.h"
// #include "BamReader.h"
// #include "BamWriter.h"
// #include "BamAncillary.h"
// #include "BamAux.h"
// using namespace BamTools;
#include <vector> #include <vector>
#include <queue> #include <queue>
#include <iostream> #include <iostream>
...@@ -34,9 +27,9 @@ class ChromSweep { ...@@ -34,9 +27,9 @@ class ChromSweep {
public: public:
// constructor // constructor
ChromSweep(string bedAFile, string bedBFile, bool anyHit, ChromSweep(BedFile *bedA, BedFile *bedB, bool anyHit,
bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
float overlapFraction, bool noHit, bool writeCount, bool forceStrand, float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput); bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput);
// destructor // destructor
...@@ -59,7 +52,8 @@ private: ...@@ -59,7 +52,8 @@ private:
bool _writeOverlap; bool _writeOverlap;
bool _writeAllOverlap; bool _writeAllOverlap;
bool _forceStrand; bool _sameStrand;
bool _diffStrand;
bool _reciprocal; bool _reciprocal;
float _overlapFraction; float _overlapFraction;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment