diff --git a/src/bedToBam/bedToBam.cpp b/src/bedToBam/bedToBam.cpp index 2a14c9e853d6f5bef292e761df2767e13fe9502d..7a3718e9d8c8d0385627a841d3be318b022aa7af 100755 --- a/src/bedToBam/bedToBam.cpp +++ b/src/bedToBam/bedToBam.cpp @@ -191,12 +191,12 @@ void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED1 RefVector refs; string bamHeader; map<string, int, std::less<string> > chromToId; - MakeBamHeader(genome->genomeFile, refs, bamHeader, chromToId); // add the reference headers to the BAM file writer.Open("stdout", bamHeader, refs); + string bedLine; int lineNum = 0; // current input line number vector<string> bedFields; // vector for a BED entry diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index 3191a9ae848ccaffc45b8a3ae0016aba409e54fd..f441662bd5c47517c6422cdd2afd5b650fd9617f 100755 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -170,8 +170,9 @@ bool BedIntersect::FindOneOrMoreOverlap(const BED &a) { } -void BedIntersect::IntersectBed(istream &bedInput) { +void BedIntersect::IntersectBed() { +/* // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedFileIntoMap(); @@ -198,6 +199,21 @@ void BedIntersect::IntersectBed(istream &bedInput) { // reset for the next input line bedFields.clear(); } + */ + _bedB->loadBedFileIntoMap(); + + int lineNum = 0; + vector<BED> hits; + hits.reserve(100); + BED a; + + _bedA->Open(); + while (_bedA->GetNextBed(a, lineNum) == true) { + FindOverlaps(a, hits); + hits.clear(); + } + _bedA->Close(); + } @@ -273,7 +289,7 @@ void BedIntersect::IntersectBam(string bamFile) { void BedIntersect::DetermineBedInput() { - + // dealing with a proper file if (_bedA->bedFile != "stdin") { // it's BED or GFF @@ -283,19 +299,14 @@ void BedIntersect::DetermineBedInput() { cerr << "Error: The requested bed file (" << _bedA->bedFile << ") could not be opened. Exiting!" << endl; exit (1); } - IntersectBed(beds); - } - // it's BAM - else { - IntersectBam(_bedA->bedFile); + IntersectBed(); } } // reading from stdin else { // it's BED or GFF if (_bamInput == false) { - IntersectBed(cin); - } + IntersectBed(); // it's BAM else { IntersectBam("stdin"); diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h index 3d9524622bc14c4fc39e5fb3317aee1ded10a92f..e49f9fa1bda1b1f1dcae0a9d5f7914dc8184fc55 100755 --- a/src/intersectBed/intersectBed.h +++ b/src/intersectBed/intersectBed.h @@ -72,6 +72,9 @@ private: // private methods //------------------------------------------------ void IntersectBed(istream &bedInput); + + void IntersectBed(); + void IntersectBam(string bamFile); bool FindOverlaps(const BED &a, vector<BED> &hits); diff --git a/src/utils/BamTools/BamAux.h b/src/utils/BamTools/BamAux.h index 393c55b45db9b9eb5738b3df4287408180f4ee0c..97342bf9ca943ba3b58f865ee82ae4b58578bf60 100644 --- a/src/utils/BamTools/BamAux.h +++ b/src/utils/BamTools/BamAux.h @@ -62,7 +62,12 @@ const int BAM_LIDX_SHIFT = 14; // Explicit variable sizes const int BT_SIZEOF_INT = 4; -struct CigarOp; +// ARQ: moved this here. +struct CigarOp { + char Type; // Operation type (MIDNSHP) + uint32_t Length; // Operation length (number of bases) +}; + struct BamAlignment { @@ -190,6 +195,27 @@ struct BamAlignment { return true; } + // ARQ: + // compute and retuen the alignment end coordinate based on the CIGAR string. + int GetAlignmentEnd(bool usePadded = false) const { + // initialize alignment end to starting position + int alignEnd = Position; + + // iterate over cigar operations + std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin(); + std::vector<CigarOp>::const_iterator cigarEnd = CigarData.end(); + for ( ; cigarIter != cigarEnd; ++cigarIter) { + char cigarType = (*cigarIter).Type; + if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) { + alignEnd += (*cigarIter).Length; + } + else if (cigarType == 'I' && usePadded == true) { + alignEnd += (*cigarIter).Length; + } + } + return alignEnd; + } + private: static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) { switch(storageType) { @@ -266,11 +292,6 @@ struct BamAlignment { // ---------------------------------------------------------------- // Auxiliary data structs & typedefs -struct CigarOp { - char Type; // Operation type (MIDNSHP) - uint32_t Length; // Operation length (number of bases) -}; - struct RefData { // data members std::string RefName; // Name of reference sequence diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index b72f119643d215d5168016e0d1c120a6e14397bf..cb5b8fde9de92a037ab62bcfe490ea4dfe66d4fd 100755 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -95,6 +95,97 @@ int getBin(int start, int end) { } +/******************************************* +Class methods +*******************************************/ + +// Constructor +BedFile::BedFile(string &bedFile) +: bedFile(bedFile) +{} + +// Destructor +BedFile::~BedFile(void) { +} + + +void BedFile::Open(void) { + + if (bedFile == "stdin") { + bedStream = &cin; + } + else { + size_t foundPos; + foundPos = bedFile.find_last_of(".gz"); + // is this a GZIPPED BED file? + if (foundPos == bedFile.size() - 1) { + igzstream beds(bedFile.c_str(), ios::in); + if ( !beds ) { + cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + // if so, close it (this was just a test) + beds.close(); + // now set a pointer to the stream so that we + // can read the file later on. + // Thank God for Josuttis, p. 631! + bedStream = new igzstream(bedFile.c_str(), ios::in); + } + } + // not GZIPPED. + else { + + ifstream beds(bedFile.c_str(), ios::in); + // can we open the file? + if ( !beds ) { + cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + // if so, close it (this was just a test) + beds.close(); + // now set a pointer to the stream so that we + // can read the file later on. + // Thank God for Josuttis, p. 631! + bedStream = new ifstream(bedFile.c_str(), ios::in); + } + } + } +} + + +// Close the BED file +void BedFile::Close(void) { + delete bedStream; +} + + +bool BedFile::GetNextBed (BED &bed, int &lineNum) { + + // make sure there are still lines to process. + // if so, tokenize, validate and return the BED entry. + if (bedStream->good()) { + string bedLine; + vector<string> bedFields; + bedFields.reserve(12); + + // parse the bedStream pointer + getline(*bedStream, bedLine); + lineNum++; + + // split into a string vector. + Tokenize(bedLine,bedFields); + + // load the BED struct as long as it's a valid BED entry. + if (parseLine(bed, bedFields, lineNum)) { + return true; + } + return false; + } +} + + void BedFile::FindOverlapsPerBin(string chrom, int start, int end, string strand, vector<BED> &hits, bool forceStrand) { int startBin, endBin; diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 86eef4cb702bf0dec50a9de07e7f32034134d1d3..960dfc60059b7832d71fccd0dc6a297442f209d6 100755 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -144,6 +144,12 @@ public: // Destructor ~BedFile(void); + + void Open(void); + + void Close(void); + + bool GetNextBed (BED &bed, int &lineNum); // load a BED file into a map keyed by chrom, then bin. value is vector of BEDs void loadBedFileIntoMap(); diff --git a/src/utils/genomeFile/genomeFile.cpp b/src/utils/genomeFile/genomeFile.cpp index 3ad1869f61a09155391c0a56a54a8e27b165667e..4146a55f9b4c20f59891b3b3dc344f2889eb7037 100755 --- a/src/utils/genomeFile/genomeFile.cpp +++ b/src/utils/genomeFile/genomeFile.cpp @@ -14,7 +14,7 @@ GenomeFile::GenomeFile(const string &genomeFile) { - this->genomeFile = genomeFile; + _genomeFile = genomeFile; loadGenomeFileIntoMap(); } @@ -30,9 +30,9 @@ void GenomeFile::loadGenomeFileIntoMap() { vector<string> genomeFields; // vector for a GENOME entry // open the GENOME file for reading - ifstream genome(genomeFile.c_str(), ios::in); + ifstream genome(_genomeFile.c_str(), ios::in); if ( !genome ) { - cerr << "Error: The requested genome file (" <<genomeFile << ") could not be opened. Exiting!" << endl; + cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl; exit (1); } @@ -62,7 +62,7 @@ void GenomeFile::loadGenomeFileIntoMap() { } } else { - cerr << "Less than the req'd two fields were encountered in the genome file (" <<genomeFile << ")"; + cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")"; cerr << " at line " << lineNum << ". Exiting." << endl; exit (1); } diff --git a/src/utils/genomeFile/genomeFile.h b/src/utils/genomeFile/genomeFile.h index a57456a2fe6860b7b9cf558c45e7305622de6874..200cc6948f8a16ee68f76aa374827fcc91228290 100755 --- a/src/utils/genomeFile/genomeFile.h +++ b/src/utils/genomeFile/genomeFile.h @@ -43,9 +43,11 @@ public: int getChromSize(const string &chrom); vector<string> getChromList(); int getNumberOfChroms(); - + + + private: - string genomeFile; + string _genomeFile; chromToSizes _chromSizes; vector<string> _chromList; };