Skip to content
Snippets Groups Projects
Commit 7207d6ea authored by Aaron's avatar Aaron
Browse files

Added -full to fastaFromBed

parent 7894c168
No related branches found
No related tags found
No related merge requests found
......@@ -13,38 +13,17 @@
#include "fastaFromBed.h"
Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile,
string &fastaOutFile, bool &useFasta, bool &useStrand) {
if (useName) {
_useName = true;
}
Bed2Fa::Bed2Fa(const string &dbFile, const string &bedFile, bool useFasta, bool useStrand, bool useName, bool useFull) {
_dbFile = dbFile;
_bedFile = bedFile;
_fastaOutFile = fastaOutFile;
_useFasta = useFasta;
_useStrand = useStrand;
_useName = useName;
_useFull = useFull;
_bed = new BedFile(_bedFile);
// Figure out what the output file should be.
if (fastaOutFile == "stdout") {
_faOut = &cout;
}
else {
// Make sure we can open the file.
ofstream fa(fastaOutFile.c_str(), ios::out);
if ( !fa ) {
cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl;
exit (1);
}
else {
fa.close();
_faOut = new ofstream(fastaOutFile.c_str(), ios::out);
}
}
// Extract the requested intervals from the FASTA input file.
ExtractDNA();
}
......@@ -66,22 +45,28 @@ void Bed2Fa::ReportDNA(const BED &bed, string &dna) {
if (!(_useName)) {
if (_useFasta == true) {
if (_useStrand == true)
*_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl;
cout << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl;
else
*_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl;
cout << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl;
}
else {
if (_useStrand == true)
*_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl;
else
*_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl;
if (_useFull == true) {
_bed->reportBedTab(bed);
cout << dna << endl;
}
else {
if (_useStrand == true)
cout << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl;
else
cout << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl;
}
}
}
else {
if (_useFasta == true)
*_faOut << ">" << bed.name << endl << dna << endl;
cout << ">" << bed.name << endl << dna << endl;
else
*_faOut << bed.name << "\t" << dna << endl;
cout << bed.name << "\t" << dna << endl;
}
}
......
......@@ -29,8 +29,8 @@ class Bed2Fa {
public:
// constructor
Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile,
bool &useFasta, bool &useStrand);
Bed2Fa(const string &dbFile, const string &bedFile,
bool useFasta, bool useStrand, bool useName, bool useFull);
// destructor
~Bed2Fa(void);
......@@ -40,17 +40,15 @@ public:
private:
bool _useName;
string _dbFile;
string _bedFile;
string _fastaOutFile;
bool _useFasta;
bool _useStrand;
bool _useStrand; // should we extract a specific strand?
bool _useName; // should we use the BED name for the FASTA header?
bool _useFull; // should we use the full BED entry for the tabular output?
// instance of a bed file class.
BedFile *_bed;
ostream *_faOut;
};
#endif
......@@ -39,8 +39,8 @@ int main(int argc, char* argv[]) {
// checks for existence of parameters
bool haveFastaDb = false;
bool haveBed = false;
bool haveFastaOut = false;
bool useNameOnly = false;
bool useFullBedEntry = false;
bool useFasta = true;
bool useStrand = false;
......@@ -70,13 +70,6 @@ int main(int argc, char* argv[]) {
i++;
}
}
else if(PARAMETER_CHECK("-fo", 3, parameterLength)) {
if ((i+1) < argc) {
haveFastaOut = true;
fastaOutFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
if ((i+1) < argc) {
haveBed = true;
......@@ -87,6 +80,10 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-name", 5, parameterLength)) {
useNameOnly = true;
}
else if(PARAMETER_CHECK("-full", 5, parameterLength)) {
useFullBedEntry = true;
useFasta = false;
}
else if(PARAMETER_CHECK("-tab", 4, parameterLength)) {
useFasta = false;
}
......@@ -99,13 +96,17 @@ int main(int argc, char* argv[]) {
}
}
if (!haveFastaDb || !haveFastaOut || !haveBed) {
if (!haveFastaDb || !haveBed) {
showHelp = true;
}
if (useFullBedEntry && useNameOnly) {
cerr << "*****ERROR: Cannot use both -name and -full. Choose one or the other.*****" << endl << endl;
showHelp = true;
}
if (!showHelp) {
Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand);
Bed2Fa *b2f = new Bed2Fa(fastaDbFile, bedFile, useFasta, useStrand, useNameOnly, useFullBedEntry);
delete b2f;
return 0;
......@@ -123,16 +124,16 @@ void ShowHelp(void) {
cerr << "Summary: Extract DNA sequences into a fasta file based on feature coordinates." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> -fo <fasta> " << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> " << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-fi\tInput FASTA file" << endl;
cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl;
cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl;
cerr << "\t-name\tUse the name field for the FASTA header" << endl;
cerr << "\t-tab\tWrite output in TAB delimited format." << endl;
cerr << "\t\t- Default is FASTA format." << endl << endl;
cerr << "\t-full\tUse the full BED entry when using -tab. Default is chr:start-end." << endl;
cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl;
cerr << "\t\tthe sequence will be reverse complemented." << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment