diff --git a/src/fastaFromBed/fastaFromBed.cpp b/src/fastaFromBed/fastaFromBed.cpp index d34eed03e4e27bac7f327beb4c07ea2947644229..b15e5071b989df7b4d8a5439a6191bfbd35e4671 100644 --- a/src/fastaFromBed/fastaFromBed.cpp +++ b/src/fastaFromBed/fastaFromBed.cpp @@ -13,38 +13,17 @@ #include "fastaFromBed.h" -Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, - string &fastaOutFile, bool &useFasta, bool &useStrand) { - - if (useName) { - _useName = true; - } +Bed2Fa::Bed2Fa(const string &dbFile, const string &bedFile, bool useFasta, bool useStrand, bool useName, bool useFull) { _dbFile = dbFile; _bedFile = bedFile; - _fastaOutFile = fastaOutFile; _useFasta = useFasta; _useStrand = useStrand; + _useName = useName; + _useFull = useFull; _bed = new BedFile(_bedFile); - // Figure out what the output file should be. - if (fastaOutFile == "stdout") { - _faOut = &cout; - } - else { - // Make sure we can open the file. - ofstream fa(fastaOutFile.c_str(), ios::out); - if ( !fa ) { - cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - else { - fa.close(); - _faOut = new ofstream(fastaOutFile.c_str(), ios::out); - } - } - // Extract the requested intervals from the FASTA input file. ExtractDNA(); } @@ -66,22 +45,28 @@ void Bed2Fa::ReportDNA(const BED &bed, string &dna) { if (!(_useName)) { if (_useFasta == true) { if (_useStrand == true) - *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; + cout << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; else - *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; + cout << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; } else { - if (_useStrand == true) - *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; - else - *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; + if (_useFull == true) { + _bed->reportBedTab(bed); + cout << dna << endl; + } + else { + if (_useStrand == true) + cout << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; + else + cout << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; + } } } else { if (_useFasta == true) - *_faOut << ">" << bed.name << endl << dna << endl; + cout << ">" << bed.name << endl << dna << endl; else - *_faOut << bed.name << "\t" << dna << endl; + cout << bed.name << "\t" << dna << endl; } } diff --git a/src/fastaFromBed/fastaFromBed.h b/src/fastaFromBed/fastaFromBed.h index 93ebd611d07374172e45cdcd115d56712e5a10bc..6a05b9c7271f22d4ed8b73217e9f94cf101e2005 100644 --- a/src/fastaFromBed/fastaFromBed.h +++ b/src/fastaFromBed/fastaFromBed.h @@ -29,8 +29,8 @@ class Bed2Fa { public: // constructor - Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile, - bool &useFasta, bool &useStrand); + Bed2Fa(const string &dbFile, const string &bedFile, + bool useFasta, bool useStrand, bool useName, bool useFull); // destructor ~Bed2Fa(void); @@ -40,17 +40,15 @@ public: private: - - bool _useName; string _dbFile; string _bedFile; - string _fastaOutFile; bool _useFasta; - bool _useStrand; + bool _useStrand; // should we extract a specific strand? + bool _useName; // should we use the BED name for the FASTA header? + bool _useFull; // should we use the full BED entry for the tabular output? // instance of a bed file class. BedFile *_bed; - ostream *_faOut; }; #endif diff --git a/src/fastaFromBed/fastaFromBedMain.cpp b/src/fastaFromBed/fastaFromBedMain.cpp index c31676d93228329dad44141c43b5d790aa8471d9..679dee3a9fb6ddad322bd253798b3d0ba1fd0168 100644 --- a/src/fastaFromBed/fastaFromBedMain.cpp +++ b/src/fastaFromBed/fastaFromBedMain.cpp @@ -39,8 +39,8 @@ int main(int argc, char* argv[]) { // checks for existence of parameters bool haveFastaDb = false; bool haveBed = false; - bool haveFastaOut = false; bool useNameOnly = false; + bool useFullBedEntry = false; bool useFasta = true; bool useStrand = false; @@ -70,13 +70,6 @@ int main(int argc, char* argv[]) { i++; } } - else if(PARAMETER_CHECK("-fo", 3, parameterLength)) { - if ((i+1) < argc) { - haveFastaOut = true; - fastaOutFile = argv[i + 1]; - i++; - } - } else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { if ((i+1) < argc) { haveBed = true; @@ -87,6 +80,10 @@ int main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-name", 5, parameterLength)) { useNameOnly = true; } + else if(PARAMETER_CHECK("-full", 5, parameterLength)) { + useFullBedEntry = true; + useFasta = false; + } else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { useFasta = false; } @@ -99,13 +96,17 @@ int main(int argc, char* argv[]) { } } - if (!haveFastaDb || !haveFastaOut || !haveBed) { + if (!haveFastaDb || !haveBed) { + showHelp = true; + } + if (useFullBedEntry && useNameOnly) { + cerr << "*****ERROR: Cannot use both -name and -full. Choose one or the other.*****" << endl << endl; showHelp = true; } if (!showHelp) { - Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand); + Bed2Fa *b2f = new Bed2Fa(fastaDbFile, bedFile, useFasta, useStrand, useNameOnly, useFullBedEntry); delete b2f; return 0; @@ -123,16 +124,16 @@ void ShowHelp(void) { cerr << "Summary: Extract DNA sequences into a fasta file based on feature coordinates." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> -fo <fasta> " << endl << endl; + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> " << endl << endl; cerr << "Options: " << endl; cerr << "\t-fi\tInput FASTA file" << endl; cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl; - cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; cerr << "\t-name\tUse the name field for the FASTA header" << endl; cerr << "\t-tab\tWrite output in TAB delimited format." << endl; cerr << "\t\t- Default is FASTA format." << endl << endl; + cerr << "\t-full\tUse the full BED entry when using -tab. Default is chr:start-end." << endl; cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl; cerr << "\t\tthe sequence will be reverse complemented." << endl;