Skip to content
Snippets Groups Projects
Commit da9026ba authored by Brent Pedersen's avatar Brent Pedersen
Browse files

start of allowing to get concatenated sequence of exons

parent bab2179f
No related branches found
No related tags found
No related merge requests found
...@@ -9,7 +9,9 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ ...@@ -9,7 +9,9 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/genomeFile/ \ -I$(UTILITIES_DIR)/genomeFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/sequenceUtilities/ \ -I$(UTILITIES_DIR)/sequenceUtilities/ \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/Fasta/ \ -I$(UTILITIES_DIR)/Fasta/ \
-I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/version/
...@@ -34,4 +36,4 @@ clean: ...@@ -34,4 +36,4 @@ clean:
@echo "Cleaning up." @echo "Cleaning up."
@rm -f $(OBJ_DIR)/fastaFromBedMain.o $(OBJ_DIR)/fastaFromBed.o @rm -f $(OBJ_DIR)/fastaFromBedMain.o $(OBJ_DIR)/fastaFromBed.o
.PHONY: clean .PHONY: clean
\ No newline at end of file
...@@ -11,10 +11,11 @@ ...@@ -11,10 +11,11 @@
******************************************************************************/ ******************************************************************************/
#include "lineFileUtilities.h" #include "lineFileUtilities.h"
#include "fastaFromBed.h" #include "fastaFromBed.h"
#include "bedFile.h"
Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile,
const string &fastaOutFile, bool useFasta, bool useStrand) { const string &fastaOutFile, bool useFasta, bool useStrand, bool useExons) {
_useName = useName; _useName = useName;
_dbFile = dbFile; _dbFile = dbFile;
...@@ -22,6 +23,7 @@ Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, ...@@ -22,6 +23,7 @@ Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile,
_fastaOutFile = fastaOutFile; _fastaOutFile = fastaOutFile;
_useFasta = useFasta; _useFasta = useFasta;
_useStrand = useStrand; _useStrand = useStrand;
_useExons = useExons;
_bed = new BedFile(_bedFile); _bed = new BedFile(_bedFile);
...@@ -120,7 +122,20 @@ void Bed2Fa::ExtractDNA() { ...@@ -120,7 +122,20 @@ void Bed2Fa::ExtractDNA() {
if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) if ( (bed.start <= seqLength) && (bed.end <= seqLength) )
{ {
int length = bed.end - bed.start; int length = bed.end - bed.start;
sequence = fr->getSubSequence(bed.chrom, bed.start, length); if(_useExons){
bedVector bedBlocks; // vec to store the discrete BED "blocks"
GetBedBlocks(bed, bedBlocks);
sequence.clear();
for (int i = 0; i < (int) bedBlocks.size(); ++i) {
#cerr << bedBlocks[i].start << " " << bedBlocks[i].end << endl;
sequence += fr->getSubSequence(bed.chrom,
bedBlocks[i].start,
bedBlocks[i].end - bedBlocks[i].start);
}
} else {
sequence = fr->getSubSequence(bed.chrom, bed.start, length);
}
ReportDNA(bed, sequence); ReportDNA(bed, sequence);
} }
else else
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#define FASTAFROMBED_H #define FASTAFROMBED_H
#include "bedFile.h" #include "bedFile.h"
#include "BlockedIntervals.h"
#include "sequenceUtils.h" #include "sequenceUtils.h"
#include "Fasta.h" #include "Fasta.h"
#include <vector> #include <vector>
...@@ -30,7 +31,7 @@ public: ...@@ -30,7 +31,7 @@ public:
// constructor // constructor
Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile, Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile,
bool useFasta, bool useStrand); bool useFasta, bool useStrand, bool useExons);
// destructor // destructor
~Bed2Fa(void); ~Bed2Fa(void);
...@@ -47,6 +48,7 @@ private: ...@@ -47,6 +48,7 @@ private:
string _fastaOutFile; string _fastaOutFile;
bool _useFasta; bool _useFasta;
bool _useStrand; bool _useStrand;
bool _useExons;
// instance of a bed file class. // instance of a bed file class.
BedFile *_bed; BedFile *_bed;
......
...@@ -43,6 +43,7 @@ int fastafrombed_main(int argc, char* argv[]) { ...@@ -43,6 +43,7 @@ int fastafrombed_main(int argc, char* argv[]) {
bool useNameOnly = false; bool useNameOnly = false;
bool useFasta = true; bool useFasta = true;
bool useStrand = false; bool useStrand = false;
bool useExons = false;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -87,6 +88,9 @@ int fastafrombed_main(int argc, char* argv[]) { ...@@ -87,6 +88,9 @@ int fastafrombed_main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-name", 5, parameterLength)) { else if(PARAMETER_CHECK("-name", 5, parameterLength)) {
useNameOnly = true; useNameOnly = true;
} }
else if(PARAMETER_CHECK("-exons", 6, parameterLength)) {
useExons = true;
}
else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { else if(PARAMETER_CHECK("-tab", 4, parameterLength)) {
useFasta = false; useFasta = false;
} }
...@@ -105,7 +109,7 @@ int fastafrombed_main(int argc, char* argv[]) { ...@@ -105,7 +109,7 @@ int fastafrombed_main(int argc, char* argv[]) {
if (!showHelp) { if (!showHelp) {
Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand); Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand, useExons);
delete b2f; delete b2f;
} }
else { else {
...@@ -127,6 +131,7 @@ void fastafrombed_help(void) { ...@@ -127,6 +131,7 @@ void fastafrombed_help(void) {
cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl; cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl;
cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl;
cerr << "\t-name\tUse the name field for the FASTA header" << endl; cerr << "\t-name\tUse the name field for the FASTA header" << endl;
cerr << "\t-exons\tgiven a bed12, concat the sequence from exons" << endl;
cerr << "\t-tab\tWrite output in TAB delimited format." << endl; cerr << "\t-tab\tWrite output in TAB delimited format." << endl;
cerr << "\t\t- Default is FASTA format." << endl << endl; cerr << "\t\t- Default is FASTA format." << endl << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment