diff --git a/src/fastaFromBed/fastaFromBed.cpp b/src/fastaFromBed/fastaFromBed.cpp index b15e5071b989df7b4d8a5439a6191bfbd35e4671..d34eed03e4e27bac7f327beb4c07ea2947644229 100644 --- a/src/fastaFromBed/fastaFromBed.cpp +++ b/src/fastaFromBed/fastaFromBed.cpp @@ -13,17 +13,38 @@ #include "fastaFromBed.h" -Bed2Fa::Bed2Fa(const string &dbFile, const string &bedFile, bool useFasta, bool useStrand, bool useName, bool useFull) { +Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, + string &fastaOutFile, bool &useFasta, bool &useStrand) { + + if (useName) { + _useName = true; + } _dbFile = dbFile; _bedFile = bedFile; + _fastaOutFile = fastaOutFile; _useFasta = useFasta; _useStrand = useStrand; - _useName = useName; - _useFull = useFull; _bed = new BedFile(_bedFile); + // Figure out what the output file should be. + if (fastaOutFile == "stdout") { + _faOut = &cout; + } + else { + // Make sure we can open the file. + ofstream fa(fastaOutFile.c_str(), ios::out); + if ( !fa ) { + cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + fa.close(); + _faOut = new ofstream(fastaOutFile.c_str(), ios::out); + } + } + // Extract the requested intervals from the FASTA input file. ExtractDNA(); } @@ -45,28 +66,22 @@ void Bed2Fa::ReportDNA(const BED &bed, string &dna) { if (!(_useName)) { if (_useFasta == true) { if (_useStrand == true) - cout << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; + *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; else - cout << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; + *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; } else { - if (_useFull == true) { - _bed->reportBedTab(bed); - cout << dna << endl; - } - else { - if (_useStrand == true) - cout << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; - else - cout << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; - } + if (_useStrand == true) + *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; + else + *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; } } else { if (_useFasta == true) - cout << ">" << bed.name << endl << dna << endl; + *_faOut << ">" << bed.name << endl << dna << endl; else - cout << bed.name << "\t" << dna << endl; + *_faOut << bed.name << "\t" << dna << endl; } } diff --git a/src/fastaFromBed/fastaFromBed.h b/src/fastaFromBed/fastaFromBed.h index 6a05b9c7271f22d4ed8b73217e9f94cf101e2005..93ebd611d07374172e45cdcd115d56712e5a10bc 100644 --- a/src/fastaFromBed/fastaFromBed.h +++ b/src/fastaFromBed/fastaFromBed.h @@ -29,8 +29,8 @@ class Bed2Fa { public: // constructor - Bed2Fa(const string &dbFile, const string &bedFile, - bool useFasta, bool useStrand, bool useName, bool useFull); + Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile, + bool &useFasta, bool &useStrand); // destructor ~Bed2Fa(void); @@ -40,15 +40,17 @@ public: private: + + bool _useName; string _dbFile; string _bedFile; + string _fastaOutFile; bool _useFasta; - bool _useStrand; // should we extract a specific strand? - bool _useName; // should we use the BED name for the FASTA header? - bool _useFull; // should we use the full BED entry for the tabular output? + bool _useStrand; // instance of a bed file class. BedFile *_bed; + ostream *_faOut; }; #endif diff --git a/src/fastaFromBed/fastaFromBedMain.cpp b/src/fastaFromBed/fastaFromBedMain.cpp index 679dee3a9fb6ddad322bd253798b3d0ba1fd0168..c31676d93228329dad44141c43b5d790aa8471d9 100644 --- a/src/fastaFromBed/fastaFromBedMain.cpp +++ b/src/fastaFromBed/fastaFromBedMain.cpp @@ -39,8 +39,8 @@ int main(int argc, char* argv[]) { // checks for existence of parameters bool haveFastaDb = false; bool haveBed = false; + bool haveFastaOut = false; bool useNameOnly = false; - bool useFullBedEntry = false; bool useFasta = true; bool useStrand = false; @@ -70,6 +70,13 @@ int main(int argc, char* argv[]) { i++; } } + else if(PARAMETER_CHECK("-fo", 3, parameterLength)) { + if ((i+1) < argc) { + haveFastaOut = true; + fastaOutFile = argv[i + 1]; + i++; + } + } else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { if ((i+1) < argc) { haveBed = true; @@ -80,10 +87,6 @@ int main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-name", 5, parameterLength)) { useNameOnly = true; } - else if(PARAMETER_CHECK("-full", 5, parameterLength)) { - useFullBedEntry = true; - useFasta = false; - } else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { useFasta = false; } @@ -96,17 +99,13 @@ int main(int argc, char* argv[]) { } } - if (!haveFastaDb || !haveBed) { - showHelp = true; - } - if (useFullBedEntry && useNameOnly) { - cerr << "*****ERROR: Cannot use both -name and -full. Choose one or the other.*****" << endl << endl; + if (!haveFastaDb || !haveFastaOut || !haveBed) { showHelp = true; } if (!showHelp) { - Bed2Fa *b2f = new Bed2Fa(fastaDbFile, bedFile, useFasta, useStrand, useNameOnly, useFullBedEntry); + Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand); delete b2f; return 0; @@ -124,16 +123,16 @@ void ShowHelp(void) { cerr << "Summary: Extract DNA sequences into a fasta file based on feature coordinates." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> " << endl << endl; + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> -fo <fasta> " << endl << endl; cerr << "Options: " << endl; cerr << "\t-fi\tInput FASTA file" << endl; cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl; + cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; cerr << "\t-name\tUse the name field for the FASTA header" << endl; cerr << "\t-tab\tWrite output in TAB delimited format." << endl; cerr << "\t\t- Default is FASTA format." << endl << endl; - cerr << "\t-full\tUse the full BED entry when using -tab. Default is chr:start-end." << endl; cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl; cerr << "\t\tthe sequence will be reverse complemented." << endl;