diff --git a/Makefile b/Makefile index 211eff54775984fc7f6eaa90765b163b83f081c6..4b1f5242e9b4cb6ac7769973c926bce3eca72513 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,11 @@ export BIN_DIR = bin export SRC_DIR = src export UTIL_DIR = src/utils export CXX = g++ -ifeq ($(DEBUG),1) -export CXXFLAGS = -Wall -O0 -g -fno-inline -fkeep-inline-functions -D_FILE_OFFSET_BITS=64 -fPIC -DDEBUG -D_DEBUG -else +#ifeq ($(DEBUG),1) +#export CXXFLAGS = -Wall -O0 -g -fno-inline -fkeep-inline-functions -D_FILE_OFFSET_BITS=64 -fPIC -DDEBUG -D_DEBUG +#else export CXXFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -fPIC -endif +#endif export LIBS = -lz export BT_ROOT = src/utils/BamTools/ @@ -42,7 +42,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ $(SRC_DIR)/genomeCoverageBed \ $(SRC_DIR)/getOverlap \ $(SRC_DIR)/groupBy \ - $(SRC_DIR)/intersectBed \ + $(SRC_DIR)/intersectFile \ $(SRC_DIR)/jaccard \ $(SRC_DIR)/linksBed \ $(SRC_DIR)/maskFastaFromBed \ @@ -50,10 +50,12 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ $(SRC_DIR)/mergeBed \ $(SRC_DIR)/multiBamCov \ $(SRC_DIR)/multiIntersectBed \ + $(SRC_DIR)/nekSandbox1 \ $(SRC_DIR)/nucBed \ $(SRC_DIR)/pairToBed \ $(SRC_DIR)/pairToPair \ $(SRC_DIR)/randomBed \ + $(SRC_DIR)/regressTest \ $(SRC_DIR)/reldist \ $(SRC_DIR)/shuffleBed \ $(SRC_DIR)/slopBed \ @@ -65,12 +67,17 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ $(SRC_DIR)/windowMaker UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \ + $(SRC_DIR)/utils/BinTree \ $(SRC_DIR)/utils/version \ $(SRC_DIR)/utils/bedGraphFile \ $(SRC_DIR)/utils/chromsweep \ + $(SRC_DIR)/utils/Contexts \ + $(SRC_DIR)/utils/FileRecordTools \ + $(SRC_DIR)/utils/general \ $(SRC_DIR)/utils/gzstream \ $(SRC_DIR)/utils/fileType \ $(SRC_DIR)/utils/bedFilePE \ + $(SRC_DIR)/utils/NewChromsweep \ $(SRC_DIR)/utils/sequenceUtilities \ $(SRC_DIR)/utils/tabFile \ $(SRC_DIR)/utils/BamTools \ @@ -78,7 +85,7 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \ $(SRC_DIR)/utils/BlockedIntervals \ $(SRC_DIR)/utils/Fasta \ $(SRC_DIR)/utils/VectorOps \ - $(SRC_DIR)/utils/genomeFile + $(SRC_DIR)/utils/GenomeFile BUILT_OBJECTS = $(OBJ_DIR)/*.o diff --git a/src/bedToBam/Makefile b/src/bedToBam/Makefile index e527b3199e65d65b127244b18a8a843d9cee8ada..01a548a66679eac647868c3c4030498ddc529a87 100644 --- a/src/bedToBam/Makefile +++ b/src/bedToBam/Makefile @@ -8,7 +8,7 @@ BIN_DIR = ../../bin/ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/version/ \ -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/BamTools/include \ diff --git a/src/bedToBam/bedToBam.cpp b/src/bedToBam/bedToBam.cpp index 6de2708f7e58d7dde8f0efddcd0025a494d74b69..f7261286ffe8fa58a5e4bcb82dcd14d1b7baae05 100644 --- a/src/bedToBam/bedToBam.cpp +++ b/src/bedToBam/bedToBam.cpp @@ -11,7 +11,7 @@ ******************************************************************************/ #include "lineFileUtilities.h" #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include "version.h" diff --git a/src/bedToIgv/Makefile b/src/bedToIgv/Makefile index b6c8a689b7f51fb219b4192cc0bc5a68303ee5f1..bdd1969b4eb4914d7b0aeac5e0d9a67a821d402c 100644 --- a/src/bedToIgv/Makefile +++ b/src/bedToIgv/Makefile @@ -6,7 +6,7 @@ BIN_DIR = ../../bin/ # define our includes # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/bedToIgv/bedToIgv.cpp b/src/bedToIgv/bedToIgv.cpp index 41ace10bd53de0371fe381e6be98a73c2658e2e4..26b33d6e30e8ed06d7a6f2776fef058bc191f914 100644 --- a/src/bedToIgv/bedToIgv.cpp +++ b/src/bedToIgv/bedToIgv.cpp @@ -11,7 +11,7 @@ ******************************************************************************/ #include "lineFileUtilities.h" #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include "version.h" #include <vector> diff --git a/src/bedpeToBam/Makefile b/src/bedpeToBam/Makefile index 9df3c21fb0e9954f0fdfbf60359611258969d488..625234fc69328ecc278ca9acfdf22c9b3fbaf6ae 100644 --- a/src/bedpeToBam/Makefile +++ b/src/bedpeToBam/Makefile @@ -9,7 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/bedFilePE/ \ -I$(UTILITIES_DIR)/version/ \ -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/BamTools/include \ diff --git a/src/bedpeToBam/bedpeToBam.cpp b/src/bedpeToBam/bedpeToBam.cpp index 0e6ef2e4e1419e8998f10e650ecfc0b8de18047a..9a31081b83c418f022029b2812502574466ee1fa 100644 --- a/src/bedpeToBam/bedpeToBam.cpp +++ b/src/bedpeToBam/bedpeToBam.cpp @@ -11,7 +11,7 @@ ******************************************************************************/ #include "lineFileUtilities.h" #include "bedFilePE.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include "version.h" diff --git a/src/bedtools.cpp b/src/bedtools.cpp index 11ff3a6296074279dbebadd59aef1ed1af611255..c0e98d3c155150b34bd878a5d4e2cca4f58aa312 100644 --- a/src/bedtools.cpp +++ b/src/bedtools.cpp @@ -45,6 +45,7 @@ int closest_main(int argc, char* argv[]); // int cluster_main(int argc, char* argv[]); // int complement_main(int argc, char* argv[]);// int coverage_main(int argc, char* argv[]); // +int regress_test_main(int argc, char **argv); // int expand_main(int argc, char* argv[]);// int fastafrombed_main(int argc, char* argv[]);// int flank_main(int argc, char* argv[]); // @@ -59,6 +60,7 @@ int map_main(int argc, char* argv[]); // int merge_main(int argc, char* argv[]); // int multibamcov_main(int argc, char* argv[]);// int multiintersect_main(int argc, char* argv[]);// +int nek_sandbox1_main(int argc, char* argv[]);// int nuc_main(int argc, char* argv[]);// int pairtobed_main(int argc, char* argv[]);// int pairtopair_main(int argc, char* argv[]);// @@ -136,9 +138,8 @@ int main(int argc, char *argv[]) else if (sub_cmd == "makewindows") return windowmaker_main(argc-1, argv+1); else if (sub_cmd == "groupby") return groupby_main(argc-1, argv+1); else if (sub_cmd == "expand") return expand_main(argc-1, argv+1); - - - + else if (sub_cmd == "neksb1") return nek_sandbox1_main(argc-1, argv+1); + else if (sub_cmd == "regresstest") return regress_test_main(argc, argv); //this command does need all the orig args. // help else if (sub_cmd == "-h" || sub_cmd == "--help" || sub_cmd == "-help") @@ -230,7 +231,7 @@ int bedtools_help(void) cout << "[ BAM focused tools ]" << endl; cout << " multicov " << "Counts coverage from multiple BAMs at specific intervals.\n"; cout << " tag " << "Tag BAM alignments based on overlaps with interval files.\n"; - + cout << endl; cout << "[ Statistical relationships ]" << endl; cout << " jaccard " << "Calculate the Jaccard statistic b/w two sets of intervals.\n"; diff --git a/src/complementBed/Makefile b/src/complementBed/Makefile index 8b80c316002bb11494e821e8352354a8d933cadf..30f70b2c3b49f39ef38a6d232687ca5717ecb8bc 100644 --- a/src/complementBed/Makefile +++ b/src/complementBed/Makefile @@ -6,7 +6,7 @@ BIN_DIR = ../../bin/ # define our includes # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/complementBed/complementBed.h b/src/complementBed/complementBed.h index 29a6f3345eb521615a3ee4b3f2aa1844b2ed04ab..a6603899222a221ad1fb1f4ab9a37cd2d67eccb5 100644 --- a/src/complementBed/complementBed.h +++ b/src/complementBed/complementBed.h @@ -10,7 +10,7 @@ Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include <vector> #include <bitset> diff --git a/src/flankBed/Makefile b/src/flankBed/Makefile index e8ef3e49f26ad9ad3c944007a0dfd8e8d53b5943..ad3f0a1838d7d63282f3c07b7c088839d1d35254 100644 --- a/src/flankBed/Makefile +++ b/src/flankBed/Makefile @@ -6,7 +6,7 @@ BIN_DIR = ../../bin/ # define our includes # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/flankBed/flankBed.h b/src/flankBed/flankBed.h index f344f60f18686f84f796618a4ad1659a52430189..995d13a04dd67d1082f0b762a39d840cbd3db6d9 100644 --- a/src/flankBed/flankBed.h +++ b/src/flankBed/flankBed.h @@ -11,7 +11,7 @@ ******************************************************************************/ #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include <vector> #include <iostream> diff --git a/src/genomeCoverageBed/Makefile b/src/genomeCoverageBed/Makefile index 8936fffc8d921254f1fa906a76a6507103ceb983..83e5563ef869ce7f0385de3cd5906db580442ebd 100644 --- a/src/genomeCoverageBed/Makefile +++ b/src/genomeCoverageBed/Makefile @@ -7,7 +7,7 @@ BIN_DIR = ../../bin/ # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/BamTools/include \ diff --git a/src/genomeCoverageBed/genomeCoverageBed.h b/src/genomeCoverageBed/genomeCoverageBed.h index f2a877fd097a0a86bdc381e687ebd86151ecda16..09df91c64deae3e925014542c404bbd5ea759023 100644 --- a/src/genomeCoverageBed/genomeCoverageBed.h +++ b/src/genomeCoverageBed/genomeCoverageBed.h @@ -10,7 +10,7 @@ aaronquinlan@gmail.com Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include "BlockedIntervals.h" #include "api/BamReader.h" diff --git a/src/intersectBed/Makefile b/src/intersectBed/Makefile deleted file mode 100644 index 33fc0917468234c31d91530ead52684104abc4cf..0000000000000000000000000000000000000000 --- a/src/intersectBed/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BlockedIntervals \ - -I$(UTILITIES_DIR)/BamTools-Ancillary \ - -I$(UTILITIES_DIR)/chromsweep \ - -I$(UTILITIES_DIR)/version/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= intersectMain.cpp intersectBed.cpp intersectBed.h -OBJECTS= intersectMain.o intersectBed.o -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= intersectBed - -all: $(BUILT_OBJECTS) - -.PHONY: all - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(DFLAGS) $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/intersectMain.o $(OBJ_DIR)/intersectBed.o - -.PHONY: clean diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp deleted file mode 100644 index 5d115abf9169c2b2c8021cb76d2fd2c3951eb339..0000000000000000000000000000000000000000 --- a/src/intersectBed/intersectBed.cpp +++ /dev/null @@ -1,401 +0,0 @@ -/***************************************************************************** - intersectBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "intersectBed.h" - -/************************************ -Helper functions -************************************/ -bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) { - bool hitsFound = false; - if (_printable == true) { - // -wao the user wants to force the reporting of 0 overlap - if (hits.size() == 0) { - if (_writeAllOverlap) { - _bedA->reportBedTab(a); - _bedB->reportNullBedTab(); - printf("0\n"); - } - else if (_leftJoin) { - _bedA->reportBedTab(a); - _bedB->reportNullBedNewLine(); - } - } - else { - vector<BED>::const_iterator h = hits.begin(); - vector<BED>::const_iterator hitsEnd = hits.end(); - for (; h != hitsEnd; ++h) { - CHRPOS s = max(a.start, h->start); - CHRPOS e = min(a.end, h->end); - int overlapBases = (e - s); - ReportOverlapDetail(overlapBases, a, *h, s, e); - hitsFound = true; - } - } - } - else { - ReportOverlapSummary(a, hits.size()); - } - return hitsFound; -} - - -/* - Constructor -*/ -BedIntersect::BedIntersect(string bedAFile, string bedBFile, - bool anyHit, bool writeA, - bool writeB, bool writeOverlap, - bool writeAllOverlap, float overlapFraction, - bool noHit, bool leftJoin, - bool writeCount, bool sameStrand, - bool diffStrand, bool reciprocal, - bool obeySplits, bool bamInput, - bool bamOutput, bool isUncompressedBam, - bool sortedInput, bool printHeader) -{ - _bedAFile = bedAFile; - _bedBFile = bedBFile; - _anyHit = anyHit; - _noHit = noHit; - _leftJoin = leftJoin; - _writeA = writeA; - _writeB = writeB; - _writeOverlap = writeOverlap; - _writeAllOverlap = writeAllOverlap; - _writeCount = writeCount; - _overlapFraction = overlapFraction; - _sameStrand = sameStrand; - _diffStrand = diffStrand; - _reciprocal = reciprocal; - _obeySplits = obeySplits; - _bamInput = bamInput; - _bamOutput = bamOutput; - _isUncompressedBam = isUncompressedBam; - _sortedInput = sortedInput; - _printHeader = printHeader; - - // should we print each overlap, or does the user want summary information? - _printable = true; - if (_anyHit || _noHit || _writeCount) - _printable = false; - - if (_bamInput == false) - IntersectBed(); - else - IntersectBam(bedAFile); -} - - -/* - Destructor -*/ -BedIntersect::~BedIntersect(void) { -} - - -bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) { - bool hitsFound = false; - - // collect and report the sufficient hits - _bedB->allHits(a.chrom, a.start, a.end, a.strand, - hits, _sameStrand, _diffStrand, - _overlapFraction, _reciprocal); - hitsFound = processHits(a, hits); - return hitsFound; -} - - -bool BedIntersect::FindBlockedOverlaps(const BED &a, - const vector<BED> &a_blocks, - const vector<BED> &hits, - bool a_is_bam) -{ - int a_footprint = GetTotalBlockLength(a_blocks); - // container to store the set of raw hits - // that actually overlap the A blocks - bedVector valid_hits; - valid_hits.reserve(hits.size()); - - // 1. Loop through each raw hit (outer loop) - // 2. Break the raw hit into it;s blocks - // and see of one of the hit blocks (inner loop) - // overlaps one of a's blocks (inner, inner loop) - // 3. If so, mark the hit as valid and add it to the valid_set. - // Otherwise, the hit only overlapped the span of a and not - // the individual blocks. Thus, it doesn't count. - bedVector::const_iterator hItr = hits.begin(); - bedVector::const_iterator hEnd = hits.end(); - for (; hItr != hEnd; ++hItr) { - // break the hit into blocks - bedVector hitBlocks; - GetBedBlocks(*hItr, hitBlocks); - int b_footprint = GetTotalBlockLength(hitBlocks); - // test to see if there is a valid hit with one of the blocks - bool valid_hit = false; - int total_overlap = 0; - bedVector::const_iterator hbItr = hitBlocks.begin(); - bedVector::const_iterator hbEnd = hitBlocks.end(); - for (; hbItr != hbEnd; ++hbItr) { - // look for overlaps between this hit/block and each block in a - bedVector::const_iterator a_blockItr = a_blocks.begin(); - bedVector::const_iterator a_blockEnd = a_blocks.end(); - for (; a_blockItr != a_blockEnd; ++a_blockItr) { - int hs = max(a_blockItr->start, hbItr->start); - int he = min(a_blockItr->end, hbItr->end); - int overlap = he - hs; - if (overlap > 0) { - valid_hit = true; - total_overlap += overlap; - } - } - } - if (valid_hit) { - // require sufficient overlap fraction (reciprocal or otherwise) - // w.r.t to the "footprint" (i.e., the total length of each block) - if ( ((float) total_overlap - / - (float) a_footprint) > _overlapFraction) - { - if (_reciprocal && ((float) total_overlap / - (float) b_footprint) > _overlapFraction) - { - valid_hits.push_back(*hItr); - } - else if (!_reciprocal) { - valid_hits.push_back(*hItr); - } - } - } - } - if (!a_is_bam) { - return processHits(a, valid_hits); - } - else - return !valid_hits.empty(); -} - - -void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a, - const BED &b, CHRPOS s, CHRPOS e) -{ - // default. simple intersection only - if (_writeA == false && _writeB == false && - _writeOverlap == false && _leftJoin == false) - { - _bedA->reportBedRangeNewLine(a,s,e); - } - // -wa -wb write the original A and B - else if ((_writeA == true && _writeB == true) || _leftJoin == true) { - _bedA->reportBedTab(a); - _bedB->reportBedNewLine(b); - } - // -wa write just the original A - else if (_writeA == true) { - _bedA->reportBedNewLine(a); - } - // -wb write the intersected portion of A and the original B - else if (_writeB == true) { - _bedA->reportBedRangeTab(a,s,e); - _bedB->reportBedNewLine(b); - } - // -wo write the original A and B plus the no. of overlapping bases. - else if (_writeOverlap == true) { - _bedA->reportBedTab(a); - _bedB->reportBedTab(b); - if (b.zeroLength == false) printf("%d\n", overlapBases); - else printf("0\n"); - } -} - - -void BedIntersect::ReportOverlapSummary(const BED &a, const int &numOverlapsFound) { - // -u just report the fact that there was >= 1 overlaps - if (_anyHit && (numOverlapsFound >= 1)) { - _bedA->reportBedNewLine(a); - } - // -c report the total number of features overlapped in B - else if (_writeCount) { - _bedA->reportBedTab(a); - printf("%d\n", numOverlapsFound); - } - // -v report iff there were no overlaps - else if (_noHit && (numOverlapsFound == 0)) { - _bedA->reportBedNewLine(a); - } -} - - -void BedIntersect::IntersectBed() { - - // create new BED file objects for A and B - _bedA = new BedFile(_bedAFile); - _bedB = new BedFile(_bedBFile); - - if (_sortedInput == false) { - // load the "B" file into a map in order to - // compare each entry in A to it in search of overlaps. - _bedB->loadBedFileIntoMap(); - - vector<BED> hits; - hits.reserve(100); - BED a; - - // open the "A" file, process each BED entry and searh for overlaps. - _bedA->Open(); - // report A's header first if asked. - if (_printHeader == true) { - _bedA->PrintHeader(); - } - while (_bedA->GetNextBed(a)) { - if (_bedA->_status == BED_VALID) { - // treat the BED as a single "block" - if (_obeySplits == false) - FindOverlaps(a, hits); - // split the BED12 into blocks and look for - // overlaps in each discrete block - else { - // find the hits that overlap with the - // full span of the blocked BED - _bedB->allHits(a.chrom, a.start, a.end, a.strand, - hits, _sameStrand, _diffStrand, - 0.0, false); - // break a into discrete blocks, as we need to - // measure overlap with the individual blocks, - // not the full span. - bedVector a_blocks; - GetBedBlocks(a, a_blocks); - // find the overlaps between the block in A and B - // last parm is false as a is not a BAM entry - FindBlockedOverlaps(a, a_blocks, hits, false); - } - hits.clear(); - } - } - _bedA->Close(); - } - else { - // use the chromsweep algorithm to detect overlaps on the fly. - ChromSweep sweep = ChromSweep(_bedA, _bedB, - _sameStrand, _diffStrand, - _overlapFraction, _reciprocal, - false, _printHeader); - - pair<BED, vector<BED> > hit_set; - hit_set.second.reserve(10000); - while (sweep.Next(hit_set)) { - if (_obeySplits == false) { - processHits(hit_set.first, hit_set.second); - } - else { - bedVector a_blocks; - GetBedBlocks(hit_set.first, a_blocks); - FindBlockedOverlaps(hit_set.first, a_blocks, - hit_set.second, false); - } - } - } -} - - -void BedIntersect::IntersectBam(string bamFile) { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB = new BedFile(_bedBFile); - _bedB->loadBedFileIntoMap(); - - // create a dummy BED A file for printing purposes if not - // using BAM output. - if (_bamOutput == false) { - _bedA = new BedFile(_bedAFile); - _bedA->bedType = 12; - } - // open the BAM file - BamReader reader; - BamWriter writer; - if (!reader.Open(bamFile)) { - cerr << "Failed to open BAM file " << bamFile << endl; - exit(1); - } - - // get header & reference information - string bamHeader = reader.GetHeaderText(); - RefVector refs = reader.GetReferenceData(); - // open a BAM output to stdout if we are writing BAM - if (_bamOutput == true) { - // set compression mode - BamWriter::CompressionMode compressionMode = BamWriter::Compressed; - if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; - writer.SetCompressionMode(compressionMode); - // open our BAM writer - writer.Open("stdout", bamHeader, refs); - } - vector<BED> hits; - // reserve some space - hits.reserve(100); - BamAlignment bam; - // get each set of alignments for each pair. - while (reader.GetNextAlignment(bam)) { - - // save an unaligned read if -v - if (!bam.IsMapped()) { - if ((_noHit == true) && (_bamOutput == true)) - writer.SaveAlignment(bam); - continue; - } - // break alignment into discrete blocks, - bedVector bed_blocks; - string chrom = refs.at(bam.RefID).RefName; - GetBamBlocks(bam, chrom, bed_blocks, false, true); - // create a basic BED entry from the BAM alignment - BED bed; - MakeBedFromBam(bam, chrom, bed_blocks, bed); - bool overlapsFound = false; - if ((_bamOutput == true) && (_obeySplits == false)) - { - overlapsFound = _bedB->anyHits(bed.chrom, bed.start, bed.end, - bed.strand, _sameStrand, _diffStrand, - _overlapFraction, _reciprocal); - } - else if ( ((_bamOutput == true) && (_obeySplits == true)) || - ((_bamOutput == false) && (_obeySplits == true)) ) - { - // find the hits that overlap with the full span of the blocked BED - _bedB->allHits(bed.chrom, bed.start, bed.end, bed.strand, - hits, _sameStrand, _diffStrand, - _overlapFraction, _reciprocal); - // find the overlaps between the block in A and B - overlapsFound = - FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput); - } - else if ((_bamOutput == false) && (_obeySplits == false)) - { - FindOverlaps(bed, hits); - } - // save the BAM alignment if overlap reqs. were met - if (_bamOutput == true) { - if ((overlapsFound == true) && (_noHit == false)) - writer.SaveAlignment(bam); - else if ((overlapsFound == false) && (_noHit == true)) - writer.SaveAlignment(bam); - } - hits.clear(); - } - - // close the relevant BAM files. - reader.Close(); - if (_bamOutput == true) { - writer.Close(); - } -} - diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h deleted file mode 100644 index 5a10a81db0d276cda62613114934368bad04eb27..0000000000000000000000000000000000000000 --- a/src/intersectBed/intersectBed.h +++ /dev/null @@ -1,102 +0,0 @@ -/***************************************************************************** - intersectBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef INTERSECTBED_H -#define INTERSECTBED_H - -#include "bedFile.h" -#include "chromsweep.h" -#include "api/BamReader.h" -#include "api/BamWriter.h" -#include "api/BamAux.h" -#include "BlockedIntervals.h" -#include "BamAncillary.h" -using namespace BamTools; - - -#include <vector> -#include <iostream> -#include <fstream> -#include <stdlib.h> -using namespace std; - - - -class BedIntersect { - -public: - - // constructor - BedIntersect(string bedAFile, string bedBFile, bool anyHit, - bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, - float overlapFraction, bool noHit, bool leftJoin, bool writeCount, bool sameStrand, bool diffStrand, - bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam, - bool sortedInput, bool printHeader); - - // destructor - ~BedIntersect(void); - -private: - - //------------------------------------------------ - // private attributes - //------------------------------------------------ - string _bedAFile; - string _bedBFile; - - bool _writeA; // should the original A feature be reported? - bool _writeB; // should the original B feature be reported? - bool _writeOverlap; - bool _writeAllOverlap; - - bool _sameStrand; - bool _diffStrand; - bool _reciprocal; - float _overlapFraction; - - bool _anyHit; - bool _noHit; - bool _leftJoin; - bool _writeCount; // do we want a count of the number of overlaps in B? - bool _obeySplits; - bool _bamInput; - bool _bamOutput; - bool _isUncompressedBam; - bool _sortedInput; - bool _printable; - bool _printHeader; - - // instance of a bed file class. - BedFile *_bedA, *_bedB; - - //------------------------------------------------ - // private methods - //------------------------------------------------ - void IntersectBed(istream &bedInput); - - void IntersectBed(); - - void IntersectBam(string bamFile); - - bool processHits(const BED &a, const vector<BED> &hits); - - bool FindOverlaps(const BED &a, vector<BED> &hits); - - bool FindBlockedOverlaps(const BED &a, const vector<BED> &a_blocks, - const vector<BED> &hits, bool a_is_bam); - - void ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e); - - void ReportOverlapSummary(const BED &a, const int &numOverlapsFound); - -}; - -#endif /* INTERSECTBED_H */ diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp deleted file mode 100644 index be3991a4daee23db5729cb96b520206301773fe4..0000000000000000000000000000000000000000 --- a/src/intersectBed/intersectMain.cpp +++ /dev/null @@ -1,327 +0,0 @@ -/***************************************************************************** - intersectMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "intersectBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "bedtools intersect" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void intersect_help(void); - -int intersect_main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedAFile; - string bedBFile; - - // input arguments - float overlapFraction = 1E-9; - - bool haveBedA = false; - bool haveBedB = false; - bool noHit = false; - bool leftJoin = false; - bool anyHit = false; - bool writeA = false; - bool writeB = false; - bool writeCount = false; - bool writeOverlap = false; - bool writeAllOverlap = false; - bool haveFraction = false; - bool reciprocalFraction = false; - bool sameStrand = false; - bool diffStrand = false; - bool obeySplits = false; - bool inputIsBam = false; - bool outputIsBam = true; - bool uncompressedBam = false; - bool sortedInput = false; - bool printHeader = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) intersect_help(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-a", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - outputIsBam = false; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-abam", 5, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - inputIsBam = true; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedB = true; - bedBFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { - outputIsBam = false; - } - else if(PARAMETER_CHECK("-u", 2, parameterLength)) { - anyHit = true; - } - else if(PARAMETER_CHECK("-f", 2, parameterLength)) { - if ((i+1) < argc) { - haveFraction = true; - overlapFraction = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-wa", 3, parameterLength)) { - writeA = true; - } - else if(PARAMETER_CHECK("-wb", 3, parameterLength)) { - writeB = true; - } - else if(PARAMETER_CHECK("-wo", 3, parameterLength)) { - writeOverlap = true; - } - else if(PARAMETER_CHECK("-wao", 4, parameterLength)) { - writeAllOverlap = true; - writeOverlap = true; - } - else if(PARAMETER_CHECK("-c", 2, parameterLength)) { - writeCount = true; - } - else if(PARAMETER_CHECK("-r", 2, parameterLength)) { - reciprocalFraction = true; - } - else if (PARAMETER_CHECK("-v", 2, parameterLength)) { - noHit = true; - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - sameStrand = true; - } - else if (PARAMETER_CHECK("-S", 2, parameterLength)) { - diffStrand = true; - } - else if (PARAMETER_CHECK("-split", 6, parameterLength)) { - obeySplits = true; - } - else if (PARAMETER_CHECK("-loj", 4, parameterLength)) { - leftJoin = true; - } - else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) { - uncompressedBam = true; - } - else if(PARAMETER_CHECK("-sorted", 7, parameterLength)) { - sortedInput = true; - } - else if(PARAMETER_CHECK("-header", 7, parameterLength)) { - printHeader = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBedA || !haveBedB) { - cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; - showHelp = true; - } - - if (anyHit && noHit) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -v, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (writeB && writeCount) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (writeCount && writeOverlap) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (writeA && writeOverlap) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -wa OR -wo, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (writeB && writeOverlap) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (reciprocalFraction && !haveFraction) { - cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl; - showHelp = true; - } - - if (anyHit && writeCount) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (anyHit && writeB) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wb, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (anyHit && writeOverlap) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wo, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (sameStrand && diffStrand) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (inputIsBam && writeB && outputIsBam) { - cerr << endl << "*****" << endl << "*****WARNING: -wb is ignored with -abam" << endl << "*****" << endl; - } - - if (inputIsBam && leftJoin) { - cerr << endl << "*****" << endl << "*****WARNING: -loj is ignored with -abam" << endl << "*****" << endl; - } - - if (outputIsBam && (writeCount || writeOverlap || writeAllOverlap)) - { - outputIsBam = false; - } - - if (!showHelp) { - - BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap, - writeAllOverlap, overlapFraction, noHit, leftJoin, writeCount, sameStrand, diffStrand, - reciprocalFraction, obeySplits, inputIsBam, outputIsBam, uncompressedBam, - sortedInput, printHeader); - delete bi; - return 0; - } - else { - intersect_help(); - return 0; - } -} - -void intersect_help(void) { - - cerr << "\nTool: bedtools intersect (aka intersectBed)" << endl; - cerr << "Version: " << VERSION << "\n"; - cerr << "Summary: Report overlaps between two feature files." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - - cerr << "\t-abam\t" << "The A input file is in BAM format. Output will be BAM as well. Replaces -a." << endl << endl; - - cerr << "\t-ubam\t" << "Write uncompressed BAM output. Default writes compressed BAM." << endl << endl; - - cerr << "\t-bed\t" << "When using BAM input (-abam), write output as BED. The default" << endl; - cerr << "\t\tis to write output in BAM when using -abam." << endl << endl; - - cerr << "\t-wa\t" << "Write the original entry in A for each overlap." << endl << endl; - - cerr << "\t-wb\t" << "Follow the A entry with the original " - << "entry in B for each overlap." << endl; - cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl; - - cerr << "\t-loj\t" << "Perform a \"left outer join\". That is, for each feature in A" << endl; - cerr << "\t\treport each overlap with B. If no overlaps are found, " << endl; - cerr << "\t\treport a NULL feature for B." << endl << endl; - - cerr << "\t-wo\t" << "Write the original A and B entries plus the number of base" << endl; - cerr << "\t\tpairs of overlap between the two features." << endl; - cerr << "\t\t- Overlaps restricted by -f and -r." << endl; - cerr << "\t\t Only A features with overlap are reported." << endl << endl; - - cerr << "\t-wao\t" << "Write the original A and B entries plus the number of base" << endl; - cerr << "\t\tpairs of overlap between the two features." << endl; - cerr << "\t\t- Overlapping features restricted by -f and -r." << endl; - cerr << "\t\t However, A features w/o overlap are also reported" << endl; - cerr << "\t\t with a NULL B feature and overlap = 0." << endl << endl; - - cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl; - cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl; - cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl; - - cerr << "\t-c\t" << "For each entry in A, report the number of overlaps with B." << endl; - cerr << "\t\t- Reports 0 for A entries that have no overlap with B." << endl; - cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl; - - cerr << "\t-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl; - cerr << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl; - - cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl; - cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; - cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl; - - cerr << "\t-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl; - cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl; - cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl; - - cerr << "\t-s\t" << "Require same strandedness. That is, only report hits in B" << endl; - cerr << "\t\tthat overlap A on the _same_ strand." << endl; - cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; - - cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in B" << endl; - cerr << "\t\tthat overlap A on the _opposite_ strand." << endl; - cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; - - cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl; - - cerr << "\t-sorted\t" << "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input" << endl << endl; - - cerr << "\t-header\t" << "Print the header from the A file prior to results." << endl << endl; - - cerr << "Notes: " << endl; - cerr << "\t(1) When a BAM file is used for the A file, the alignment is retained if overlaps exist," << endl; - cerr << "\tand exlcuded if an overlap cannot be found. If multiple overlaps exist, they are not" << endl; - cerr << "\treported, as we are only testing for one or more overlaps." << endl << endl; - - // end the program here - exit(1); - -} diff --git a/src/multiIntersectBed/Makefile b/src/multiIntersectBed/Makefile index 6ebf6050d40b1e163a12a2b521680eccffb20df5..e43e7554964982ed756417d078d3bb882831bfd4 100644 --- a/src/multiIntersectBed/Makefile +++ b/src/multiIntersectBed/Makefile @@ -7,7 +7,7 @@ BIN_DIR = ../../bin/ # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/Point/ \ diff --git a/src/multiIntersectBed/multiIntersectBed.h b/src/multiIntersectBed/multiIntersectBed.h index 949cea1b7d86a96800f79bf9f5c73e917df5939e..fb757a8bcf4a06e78265d73c8eef99460c5d58ac 100644 --- a/src/multiIntersectBed/multiIntersectBed.h +++ b/src/multiIntersectBed/multiIntersectBed.h @@ -17,7 +17,7 @@ #include <vector> #include <string> #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include "Point.h" class MultiIntersectBed diff --git a/src/multiIntersectBed/multiIntersectBedMain.cpp b/src/multiIntersectBed/multiIntersectBedMain.cpp index 3ac39d15db49530cb98147a90938cce9d11c7765..5b8d1b2724df8732aeb7cf4b73c61e5de31c33eb 100644 --- a/src/multiIntersectBed/multiIntersectBedMain.cpp +++ b/src/multiIntersectBed/multiIntersectBedMain.cpp @@ -19,7 +19,7 @@ #include <getopt.h> #include <libgen.h> //for basename() -#include "genomeFile.h" +#include "GenomeFile.h" #include "multiIntersectBed.h" #include "version.h" diff --git a/src/randomBed/Makefile b/src/randomBed/Makefile index 3a9ee898ad36288629e4df0e269aca9ff82e96b5..2c795c2477909489f30708e888c4debd789d37d2 100644 --- a/src/randomBed/Makefile +++ b/src/randomBed/Makefile @@ -5,7 +5,7 @@ BIN_DIR = ../../bin/ # ------------------- # define our includes # ------------------- -INCLUDES = -I$(UTILITIES_DIR)/genomeFile/ \ +INCLUDES = -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/version/ diff --git a/src/randomBed/randomBed.h b/src/randomBed/randomBed.h index f922a38eb4f1d5226b11868b91ef29bb995c4510..4002c1986b2101124ec8f10e2a3af1c5cbb7de8b 100644 --- a/src/randomBed/randomBed.h +++ b/src/randomBed/randomBed.h @@ -9,7 +9,7 @@ Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ -#include "genomeFile.h" +#include "GenomeFile.h" #include <vector> #include <iostream> diff --git a/src/shuffleBed/Makefile b/src/shuffleBed/Makefile index 2f945715cff8508e0167776f96e6b658064f4493..a85f2df607778018ecec266f2dc574f9f806c558 100644 --- a/src/shuffleBed/Makefile +++ b/src/shuffleBed/Makefile @@ -7,7 +7,7 @@ BIN_DIR = ../../bin/ # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/bedFilePE/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/shuffleBed/shuffleBed.h b/src/shuffleBed/shuffleBed.h index 2623c174106ab75e2355947069659a0856a40b2c..e80b11ecfe629f2659ce9c7e5889c165b22ce213 100644 --- a/src/shuffleBed/shuffleBed.h +++ b/src/shuffleBed/shuffleBed.h @@ -11,7 +11,7 @@ ******************************************************************************/ #include "bedFile.h" #include "bedFilePE.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include <vector> #include <iostream> diff --git a/src/slopBed/Makefile b/src/slopBed/Makefile index ebdaba35b406d4e4dae8eadef89f7319df88313d..d63d924733f1db248f531ebab1ce968b8a76c982 100644 --- a/src/slopBed/Makefile +++ b/src/slopBed/Makefile @@ -6,7 +6,7 @@ BIN_DIR = ../../bin/ # define our includes # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/slopBed/slopBed.h b/src/slopBed/slopBed.h index 7de0afd728cce49b85f31f0ec60c1f2efbcd4001..1fd1aba2ccc5de333734dc345f1a35fa2f41fdc6 100644 --- a/src/slopBed/slopBed.h +++ b/src/slopBed/slopBed.h @@ -11,7 +11,7 @@ ******************************************************************************/ #include "bedFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include <vector> #include <iostream> diff --git a/src/unionBedGraphs/Makefile b/src/unionBedGraphs/Makefile index 1d255d067ad7d1d8739bdd4d3c3f43d8a561e6cc..b0c0d8d688319293fd812abd4ed731839aed52aa 100755 --- a/src/unionBedGraphs/Makefile +++ b/src/unionBedGraphs/Makefile @@ -7,7 +7,7 @@ BIN_DIR = ../../bin/ # ------------------- INCLUDES = -I$(UTILITIES_DIR)/bedGraphFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/Point/ \ diff --git a/src/unionBedGraphs/unionBedGraphs.h b/src/unionBedGraphs/unionBedGraphs.h index 25cbe4e73994343ee149f60b9c9cb83925d04d95..c0f5929e4fbe6a70d686ab71d46bcd69c58fd0b1 100644 --- a/src/unionBedGraphs/unionBedGraphs.h +++ b/src/unionBedGraphs/unionBedGraphs.h @@ -16,7 +16,7 @@ #include <vector> #include <string> #include "bedGraphFile.h" -#include "genomeFile.h" +#include "GenomeFile.h" #include "Point.h" class UnionBedGraphs diff --git a/src/unionBedGraphs/unionBedGraphsMain.cpp b/src/unionBedGraphs/unionBedGraphsMain.cpp index 6411adffbe69f543feb6ea33612a2c6ee20faa21..46d36184d5f6cc88d15312c72f95461122a41cba 100644 --- a/src/unionBedGraphs/unionBedGraphsMain.cpp +++ b/src/unionBedGraphs/unionBedGraphsMain.cpp @@ -19,7 +19,7 @@ #include <getopt.h> #include <libgen.h> //for basename() -#include "genomeFile.h" +#include "GenomeFile.h" #include "unionBedGraphs.h" #include "version.h" diff --git a/src/utils/BamTools-Ancillary/Makefile b/src/utils/BamTools-Ancillary/Makefile index a0024c1bb0df614c274423ec4c1428e30cc98710..8d64745eff9c594a1e88c88f877c95af491995f4 100644 --- a/src/utils/BamTools-Ancillary/Makefile +++ b/src/utils/BamTools-Ancillary/Makefile @@ -12,7 +12,7 @@ INCLUDES = -I$(UTILITIES_DIR)/BamTools/include \ # define our source and object files # ---------------------------------- SOURCES= BamAncillary.cpp BamAncillary.h -OBJECTS= BamAncillary.o +OBJECTS= $(SOURCES:.cpp=.o) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) all: $(BUILT_OBJECTS) @@ -27,4 +27,4 @@ clean: @echo "Cleaning up." @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* -.PHONY: clean +.PHONY: clean \ No newline at end of file diff --git a/src/utils/BamTools/src/api/BamAlignment.h b/src/utils/BamTools/src/api/BamAlignment.h index a2349ea9ce9b254066450dc953b99925a8f03b56..fb1e8722d9fc70bbc118552e209fba44ef2dbc07 100644 --- a/src/utils/BamTools/src/api/BamAlignment.h +++ b/src/utils/BamTools/src/api/BamAlignment.h @@ -139,7 +139,9 @@ struct API_EXPORT BamAlignment { unsigned int& numBytesParsed) const; // internal data - private: + public: //had to change this from private. + //needed to expose some members of support data that + //were otherwise inaccessible when using only core alignments. struct BamAlignmentSupportData { @@ -163,6 +165,7 @@ struct API_EXPORT BamAlignment { BamAlignmentSupportData SupportData; friend class Internal::BamReaderPrivate; friend class Internal::BamWriterPrivate; + friend class BamFileReader; mutable std::string ErrorString; // mutable to allow updates even in logically const methods //! \endinternal diff --git a/src/utils/BamTools/src/api/BamReader.cpp b/src/utils/BamTools/src/api/BamReader.cpp index ae2adec94e498dadc97115807ab0d52dbe178804..b92c71355aca2fa8be427e59f2778e97b0d9e4fd 100644 --- a/src/utils/BamTools/src/api/BamReader.cpp +++ b/src/utils/BamTools/src/api/BamReader.cpp @@ -259,6 +259,11 @@ bool BamReader::Open(const std::string& filename) { return d->Open(filename); } +bool BamReader::OpenStream(std::istream* stream) { + return d->OpenStream(stream); +} + + /*! \fn bool BamReader::OpenIndex(const std::string& indexFilename) \brief Opens a BAM index file. diff --git a/src/utils/BamTools/src/api/BamReader.h b/src/utils/BamTools/src/api/BamReader.h index fb9064d9994d08a4c2afe177b2ca205c1841da4c..5660040ca4e9647e7fc1fe59789782e6bfde8c1c 100644 --- a/src/utils/BamTools/src/api/BamReader.h +++ b/src/utils/BamTools/src/api/BamReader.h @@ -46,6 +46,8 @@ class API_EXPORT BamReader { bool Jump(int refID, int position = 0); // opens a BAM file bool Open(const std::string& filename); + bool OpenStream(std::istream* stream); + // returns internal file pointer to beginning of alignment data bool Rewind(void); // sets the target region of interest diff --git a/src/utils/BamTools/src/api/internal/bam/BamReader_p.cpp b/src/utils/BamTools/src/api/internal/bam/BamReader_p.cpp index 75d727928bb6757c49f033ea938e2fa4e81a5f20..70782a76117e78247eb690b2ff1f24bc547cd20c 100644 --- a/src/utils/BamTools/src/api/internal/bam/BamReader_p.cpp +++ b/src/utils/BamTools/src/api/internal/bam/BamReader_p.cpp @@ -388,6 +388,35 @@ bool BamReaderPrivate::Open(const string& filename) { } } +bool BamReaderPrivate::OpenStream(std::istream* stream) { + + try { + + // make sure we're starting with fresh state + Close(); + + // open BgzfStream + m_stream.OpenStream(stream, IBamIODevice::ReadOnly); + + // load BAM metadata + LoadHeaderData(); + LoadReferenceData(); + + // store "filename" & offset of first alignment + m_filename = "stream"; // or whatever + m_alignmentsBeginOffset = m_stream.Tell(); + + // return success + return true; + + } catch ( BamException& e ) { + const string error = e.what(); + const string message = string("could not open input stream: \n\t") + error; + SetErrorString("BamReader::Open", message); + return false; + } +} + bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) { if ( m_randomAccessController.OpenIndex(indexFilename, this) ) diff --git a/src/utils/BamTools/src/api/internal/bam/BamReader_p.h b/src/utils/BamTools/src/api/internal/bam/BamReader_p.h index e8db646e28034eaf20095b31a1fda2b3cd30a8a2..3164d6fc82af19da8983b3325544a62d605d1939 100644 --- a/src/utils/BamTools/src/api/internal/bam/BamReader_p.h +++ b/src/utils/BamTools/src/api/internal/bam/BamReader_p.h @@ -47,6 +47,7 @@ class BamReaderPrivate { const std::string Filename(void) const; bool IsOpen(void) const; bool Open(const std::string& filename); + bool OpenStream(std::istream* stream); bool Rewind(void); bool SetRegion(const BamRegion& region); diff --git a/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.cpp b/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.cpp index f9c7694ec179e0265c7468dacd98cb097b72a621..a1fdd3d12633762780764f0ea44ddc92da7e8d8d 100644 --- a/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.cpp +++ b/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.cpp @@ -12,6 +12,8 @@ #include "api/internal/io/BamFtp_p.h" #include "api/internal/io/BamHttp_p.h" #include "api/internal/io/BamPipe_p.h" +#include "api/internal/io/StdStreamDevice_p.h" + using namespace BamTools; using namespace BamTools::Internal; @@ -35,3 +37,9 @@ IBamIODevice* BamDeviceFactory::CreateDevice(const string& source) { // otherwise assume a "normal" file return new BamFile(source); } + +IBamIODevice* BamDeviceFactory::CreateDevice(std::istream *stream) { + // compile-time sanity check + BT_ASSERT_X( stream, "BamDeviceFactory::CreateDevice - null input stream"); + return new StdStreamDevice(stream); +} diff --git a/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.h b/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.h index 1d48533d8687773384b4582f2e006a07612890eb..00bb07d1be4c4b0ca3ac84a04b7240ad61e242bb 100644 --- a/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.h +++ b/src/utils/BamTools/src/api/internal/io/BamDeviceFactory_p.h @@ -29,6 +29,7 @@ namespace Internal { class BamDeviceFactory { public: static IBamIODevice* CreateDevice(const std::string& source); + static IBamIODevice* CreateDevice(std::istream *stream); }; } // namespace Internal diff --git a/src/utils/BamTools/src/api/internal/io/BgzfStream_p.cpp b/src/utils/BamTools/src/api/internal/io/BgzfStream_p.cpp index 7f73d67fea9c43fd387523fce4ade4c49ed3dc5b..fe90e5f877a897cd475a31f7195acdb013e989fc 100644 --- a/src/utils/BamTools/src/api/internal/io/BgzfStream_p.cpp +++ b/src/utils/BamTools/src/api/internal/io/BgzfStream_p.cpp @@ -286,6 +286,28 @@ void BgzfStream::Open(const string& filename, const IBamIODevice::OpenMode mode) } } +void BgzfStream::OpenStream(std::istream* stream, const IBamIODevice::OpenMode mode) { + + // close current device if necessary + Close(); + BT_ASSERT_X( (m_device == 0), "BgzfStream::Open() - unable to properly close previous IO device" ); + + // run-time sanity check + if ( stream == NULL ) + throw BamException("BgzfStream::Open", "null input stream"); + + // retrieve new IO device for std::stream* + m_device = BamDeviceFactory::CreateDevice(stream); + BT_ASSERT_X( m_device, "BgzfStream::Open() - unable to create IO device from stream" ); + + // if device fails to open + if ( !m_device->Open(mode) ) { + const string deviceError = m_device->GetErrorString(); + const string message = string("could not open BGZF stream: \n\t") + deviceError; + throw BamException("BgzfStream::Open", message); + } +} + // reads BGZF data into a byte buffer size_t BgzfStream::Read(char* data, const size_t dataLength) { @@ -345,6 +367,8 @@ void BgzfStream::ReadBlock(void) { // read block header from file char header[Constants::BGZF_BLOCK_HEADER_LENGTH]; + memset(header, 0, Constants::BGZF_BLOCK_HEADER_LENGTH); + int64_t numBytesRead = m_device->Read(header, Constants::BGZF_BLOCK_HEADER_LENGTH); // check for device error diff --git a/src/utils/BamTools/src/api/internal/io/BgzfStream_p.h b/src/utils/BamTools/src/api/internal/io/BgzfStream_p.h index 47b360904740420ff919748a07fdf09756c6e111..9297820b9bc7d4a9c031fe7414bb8bb253ed687f 100644 --- a/src/utils/BamTools/src/api/internal/io/BgzfStream_p.h +++ b/src/utils/BamTools/src/api/internal/io/BgzfStream_p.h @@ -45,6 +45,8 @@ class BgzfStream { bool IsOpen(void) const; // opens the BGZF file void Open(const std::string& filename, const IBamIODevice::OpenMode mode); + + void OpenStream(std::istream* stream, const IBamIODevice::OpenMode mode); // reads BGZF data into a byte buffer size_t Read(char* data, const size_t dataLength); // seek to position in BGZF file diff --git a/src/utils/BamTools/src/api/internal/io/NetUnix_p.h b/src/utils/BamTools/src/api/internal/io/NetUnix_p.h index f06c4df72f01910c42ef13ed39f7d83768965088..8cf75f8d300464eac4c06ca864e7caa535722d72 100644 --- a/src/utils/BamTools/src/api/internal/io/NetUnix_p.h +++ b/src/utils/BamTools/src/api/internal/io/NetUnix_p.h @@ -23,7 +23,6 @@ #ifndef _WIN32 // <-- source files only include the proper Net*_p.h, but this is a double-check #include <arpa/inet.h> -#include <netinet/in.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/stat.h> diff --git a/src/utils/BamTools/src/api/internal/io/StdStreamDevice_p.h b/src/utils/BamTools/src/api/internal/io/StdStreamDevice_p.h new file mode 100644 index 0000000000000000000000000000000000000000..a957c2645572f23bb37f3120a94e594115e32591 --- /dev/null +++ b/src/utils/BamTools/src/api/internal/io/StdStreamDevice_p.h @@ -0,0 +1,54 @@ +/* + * StdStreamDevice_p.h + * + * Created on: Apr 24, 2013 + * Author: nek3d + */ + +#ifndef STDSTREAMDEVICE_P_H_ +#define STDSTREAMDEVICE_P_H_ + +//NOTE: The stream management methods in this class are placeholders. The class is not intended to manage a +// a stream. Rather, it is intended to accept a stream that is managed elsewhere, and allow that stream to +//be used with bamtools. + +#include "api/IBamIODevice.h" +#include <cstring> //for strlen +#include <istream> + +namespace BamTools { +namespace Internal { + +class StdStreamDevice : public IBamIODevice { + + // ctor & dtor + public: + StdStreamDevice(std::istream *stream) : m_stream(stream) { + IBamIODevice::m_mode = IBamIODevice::ReadOnly; + } + ~StdStreamDevice() {} + + // IBamIODevice implementation + public: + void Close(void) {} + bool IsOpen(void) const { return true; } // assume stream passed in is always open. + bool IsRandomAccess(void) const { return false; } + bool Open(const IBamIODevice::OpenMode mode) { return true; } //stream is assumed opened by caller. + int64_t Read(char* data, const unsigned int numBytes) { + m_stream->read(data, numBytes); + return (int64_t)(numBytes); + } + bool Seek(const int64_t& position, const int origin = SEEK_SET) { return false; } + + //Not sure what Tell and Write should do for streams. + int64_t Tell(void) const { return 0; } + int64_t Write(const char* data, const unsigned int numBytes) { return 0; } + + private: + std::istream* m_stream; +}; + +} // namespace Internal +} // namespace BamTools + +#endif /* STDSTREAMDEVICE_P_H_ */ diff --git a/src/utils/fileType/Makefile b/src/utils/fileType/Makefile index 91585d5ed8b7d7f5e1ed2268f980fa5924e841d5..50ed87b54bbe2e5cec0e4a45d7b420dfbb82deae 100644 --- a/src/utils/fileType/Makefile +++ b/src/utils/fileType/Makefile @@ -4,26 +4,28 @@ UTILITIES_DIR = ../../utils/ # ------------------- # define our includes # ------------------- -INCLUDES = +INCLUDES = -I$(UTILITIES_DIR)/BamTools/include/ \ + -I$(UTILITIES_DIR)/general # ---------------------------------- # define our source and object files # ---------------------------------- -SOURCES= fileType.cpp fileType.h -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS= -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +SOURCES= fileType.cpp fileType.h FileRecordTypeChecker.cpp FileRecordTypeChecker.h +OBJECTS= fileType.o FileRecordTypeChecker.o +#_EXT_OBJECTS=ParseTools.o QuickString.o +#EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +all: $(BUILT_OBJECTS) + +.PHONY: all + $(BUILT_OBJECTS): $(SOURCES) @echo " * compiling" $(*F).cpp @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(INCLUDES) - clean: @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + @rm -f $(OBJ_DIR)/fileType.o $(OBJ_DIR)/FileRecordTypeChecker.o .PHONY: clean \ No newline at end of file diff --git a/src/utils/fileType/fileType.cpp b/src/utils/fileType/fileType.cpp index 431782acf826f6884bd4b7c559fc3615c1ad091d..0b05acb18ffa42d7df6c33630f6bedc0d9d48b19 100644 --- a/src/utils/fileType/fileType.cpp +++ b/src/utils/fileType/fileType.cpp @@ -38,7 +38,9 @@ bool isRegularFile(const string& filename) { returns TRUE if the file has a GZIP header. Should only be run on regular files. */ -bool isGzipFile(istream *file) { +bool isGzipFile(istream *file) { + //see http://www.gzip.org/zlib/rfc-gzip.html#file-format + /* 11-Sep-2011: We now only peek at the first byte and test for GZIPiness. @@ -52,19 +54,18 @@ bool isGzipFile(istream *file) { // unsigned char cm; } gzip_header; - /* - 26-Dec-2012: - Just peek at the first byte instead of reading it so that we don't - affect the istream's failbit. This modification was wisely proposed - by John Marshall in response to Issue 30: - https://github.com/arq5x/bedtools/issues/30 - */ - - // Check to see if the first byte matches expectations for a - // GZIP header. - // Deatils: http://www.gzip.org/zlib/rfc-gzip.html#file-format - if (file->peek() != 0x1f) + if (!file->read((char*)&gzip_header, sizeof(gzip_header))) { return false; - else + } + + if ( gzip_header.id1 == 0x1f ) +// && +// gzip_header.id2 == 0x8b +// && +// gzip_header.cm == 8 ) + { return true; + } + file->putback(gzip_header.id1); + return false; } diff --git a/src/utils/genomeFile/Makefile b/src/utils/genomeFile/Makefile deleted file mode 100644 index 680343c4edef69838cbacc54580fab1096a34490..0000000000000000000000000000000000000000 --- a/src/utils/genomeFile/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= genomeFile.cpp genomeFile.h -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean diff --git a/src/utils/genomeFile/genomeFile.cpp b/src/utils/genomeFile/genomeFile.cpp deleted file mode 100644 index b84948e6604776b55794cf3000e396e928ff0cb4..0000000000000000000000000000000000000000 --- a/src/utils/genomeFile/genomeFile.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/***************************************************************************** - genomeFile.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "genomeFile.h" - - -GenomeFile::GenomeFile(const string &genomeFile) { - _genomeFile = genomeFile; - loadGenomeFileIntoMap(); -} - -GenomeFile::GenomeFile(const RefVector &genome) { - for (size_t i = 0; i < genome.size(); ++i) { - string chrom = genome[i].RefName; - int length = genome[i].RefLength; - - _chromSizes[chrom] = length; - _chromList.push_back(chrom); - } -} - -// Destructor -GenomeFile::~GenomeFile(void) { -} - - -void GenomeFile::loadGenomeFileIntoMap() { - - string genomeLine; - int lineNum = 0; - vector<string> genomeFields; // vector for a GENOME entry - - // open the GENOME file for reading - ifstream genome(_genomeFile.c_str(), ios::in); - if ( !genome ) { - cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - while (getline(genome, genomeLine)) { - - Tokenize(genomeLine,genomeFields); // load the fields into the vector - lineNum++; - - // ignore a blank line - if (genomeFields.size() > 0) { - if (genomeFields[0].find("#") != 0) { - // we need at least 2 columns - if (genomeFields.size() >= 2) { - char *p2End; - long c2; - // make sure the second column is numeric. - c2 = strtol(genomeFields[1].c_str(), &p2End, 10); - - // strtol will set p2End to the start of the string if non-integral, base 10 - if (p2End != genomeFields[1].c_str()) { - string chrom = genomeFields[0]; - int size = atoi(genomeFields[1].c_str()); - _chromSizes[chrom] = size; - _chromList.push_back(chrom); - _startOffsets.push_back(_genomeLength); - _genomeLength += size; - } - } - else { - cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")"; - cerr << " at line " << lineNum << ". Exiting." << endl; - exit (1); - } - } - } - genomeFields.clear(); - } -} - -pair<string, uint32_t> GenomeFile::projectOnGenome(uint32_t genome_pos) { - // search for the chrom that the position belongs on. - // add 1 to genome position b/c of zero-based, half open. - vector<uint32_t>::const_iterator low = - lower_bound(_startOffsets.begin(), _startOffsets.end(), genome_pos + 1); - - // use the iterator to identify the appropriate index - // into the chrom name and start vectors - int i = int(low-_startOffsets.begin()); - string chrom = _chromList[i - 1]; - uint32_t start = genome_pos - _startOffsets[i - 1]; - return make_pair(chrom, start); -} - -uint32_t GenomeFile::getChromSize(const string &chrom) { - chromToSizes::const_iterator chromIt = _chromSizes.find(chrom); - if (chromIt != _chromSizes.end()) - return _chromSizes[chrom]; - else - return -1; // chrom not found. -} - -uint32_t GenomeFile::getGenomeSize(void) { - return _genomeLength; -} - -vector<string> GenomeFile::getChromList() { - return _chromList; -} - -int GenomeFile::getNumberOfChroms() { - return _chromList.size(); -} - -string GenomeFile::getGenomeFileName() { - return _genomeFile; -} diff --git a/src/utils/genomeFile/genomeFile.h b/src/utils/genomeFile/genomeFile.h deleted file mode 100644 index a71d61ec0ff6b191abdbff78b0f85628da4ecdbc..0000000000000000000000000000000000000000 --- a/src/utils/genomeFile/genomeFile.h +++ /dev/null @@ -1,72 +0,0 @@ -/***************************************************************************** - genomeFile.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef GENOMEFILE_H -#define GENOMEFILE_H - -#include <map> -#include <string> -#include <iostream> -#include <sstream> -#include <fstream> -#include <cstring> -#include <cstdio> -#include <algorithm> // for bsearch lower_bound() -#include "api/BamReader.h" -#include "api/BamAux.h" -using namespace BamTools; - -using namespace std; - - -// typedef for mapping b/w chrom name and it's size in b.p. -typedef map<string, int, std::less<string> > chromToSizes; - - -class GenomeFile { - -public: - - // Constructor using a file - GenomeFile(const string &genomeFile); - - // Constructor using a vector of BamTools RefVector - GenomeFile(const RefVector &genome); - - // Destructor - ~GenomeFile(void); - - // load a GENOME file into a map keyed by chrom. value is size of chrom. - void loadGenomeFileIntoMap(); - - pair<string, uint32_t> projectOnGenome(uint32_t genome_pos); - - uint32_t getChromSize(const string &chrom); // return the size of a chromosome - uint32_t getGenomeSize(void); // return the total size of the geonome - vector<string> getChromList(); // return a list of chrom names - int getNumberOfChroms(); // return the number of chroms - string getGenomeFileName(); // return the name of the genome file - - - - -private: - string _genomeFile; - chromToSizes _chromSizes; - vector<string> _chromList; - - // projecting chroms into a single coordinate system - uint32_t _genomeLength; - vector<uint32_t> _startOffsets; - -}; - -#endif /* GENOMEFILE_H */ diff --git a/src/windowMaker/Makefile b/src/windowMaker/Makefile index b51e487be536abb6b3b29b8d723d502c39c236ab..35c6ca3524721b8fa90489b58c3cbbae747dcda0 100644 --- a/src/windowMaker/Makefile +++ b/src/windowMaker/Makefile @@ -5,7 +5,7 @@ BIN_DIR = ../../bin/ # ------------------- # define our includes # ------------------- -INCLUDES = -I$(UTILITIES_DIR)/genomeFile/ \ +INCLUDES = -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \ diff --git a/src/windowMaker/windowMaker.h b/src/windowMaker/windowMaker.h index b06b46ec337766073f324d797f6d58cb96080630..3c7d14fa7f2cc67c763d457e31796a0bc9576ffe 100644 --- a/src/windowMaker/windowMaker.h +++ b/src/windowMaker/windowMaker.h @@ -9,7 +9,7 @@ aaronquinlan@gmail.com Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ -#include "genomeFile.h" +#include "GenomeFile.h" #include "bedFile.h" using namespace std;