From da9026ba33206cd1a307a9e8639bf32722cb6b4f Mon Sep 17 00:00:00 2001 From: Brent Pedersen <bpederse@gmail.com> Date: Mon, 29 Oct 2012 11:40:15 -0600 Subject: [PATCH] start of allowing to get concatenated sequence of exons --- src/fastaFromBed/Makefile | 4 +++- src/fastaFromBed/fastaFromBed.cpp | 19 +++++++++++++++++-- src/fastaFromBed/fastaFromBed.h | 4 +++- src/fastaFromBed/fastaFromBedMain.cpp | 7 ++++++- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/fastaFromBed/Makefile b/src/fastaFromBed/Makefile index 7b6d9823..dfbaeaa7 100644 --- a/src/fastaFromBed/Makefile +++ b/src/fastaFromBed/Makefile @@ -9,7 +9,9 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ -I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/genomeFile/ \ -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/sequenceUtilities/ \ + -I$(UTILITIES_DIR)/BlockedIntervals \ -I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/Fasta/ \ -I$(UTILITIES_DIR)/version/ @@ -34,4 +36,4 @@ clean: @echo "Cleaning up." @rm -f $(OBJ_DIR)/fastaFromBedMain.o $(OBJ_DIR)/fastaFromBed.o -.PHONY: clean \ No newline at end of file +.PHONY: clean diff --git a/src/fastaFromBed/fastaFromBed.cpp b/src/fastaFromBed/fastaFromBed.cpp index 1ad92a9f..399ac3dd 100644 --- a/src/fastaFromBed/fastaFromBed.cpp +++ b/src/fastaFromBed/fastaFromBed.cpp @@ -11,10 +11,11 @@ ******************************************************************************/ #include "lineFileUtilities.h" #include "fastaFromBed.h" +#include "bedFile.h" Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, - const string &fastaOutFile, bool useFasta, bool useStrand) { + const string &fastaOutFile, bool useFasta, bool useStrand, bool useExons) { _useName = useName; _dbFile = dbFile; @@ -22,6 +23,7 @@ Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, _fastaOutFile = fastaOutFile; _useFasta = useFasta; _useStrand = useStrand; + _useExons = useExons; _bed = new BedFile(_bedFile); @@ -120,7 +122,20 @@ void Bed2Fa::ExtractDNA() { if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) { int length = bed.end - bed.start; - sequence = fr->getSubSequence(bed.chrom, bed.start, length); + if(_useExons){ + bedVector bedBlocks; // vec to store the discrete BED "blocks" + GetBedBlocks(bed, bedBlocks); + sequence.clear(); + for (int i = 0; i < (int) bedBlocks.size(); ++i) { + #cerr << bedBlocks[i].start << " " << bedBlocks[i].end << endl; + sequence += fr->getSubSequence(bed.chrom, + bedBlocks[i].start, + bedBlocks[i].end - bedBlocks[i].start); + + } + } else { + sequence = fr->getSubSequence(bed.chrom, bed.start, length); + } ReportDNA(bed, sequence); } else diff --git a/src/fastaFromBed/fastaFromBed.h b/src/fastaFromBed/fastaFromBed.h index 3f5f454b..0ab73118 100644 --- a/src/fastaFromBed/fastaFromBed.h +++ b/src/fastaFromBed/fastaFromBed.h @@ -13,6 +13,7 @@ #define FASTAFROMBED_H #include "bedFile.h" +#include "BlockedIntervals.h" #include "sequenceUtils.h" #include "Fasta.h" #include <vector> @@ -30,7 +31,7 @@ public: // constructor Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile, - bool useFasta, bool useStrand); + bool useFasta, bool useStrand, bool useExons); // destructor ~Bed2Fa(void); @@ -47,6 +48,7 @@ private: string _fastaOutFile; bool _useFasta; bool _useStrand; + bool _useExons; // instance of a bed file class. BedFile *_bed; diff --git a/src/fastaFromBed/fastaFromBedMain.cpp b/src/fastaFromBed/fastaFromBedMain.cpp index 9be9afb3..c5da8be2 100644 --- a/src/fastaFromBed/fastaFromBedMain.cpp +++ b/src/fastaFromBed/fastaFromBedMain.cpp @@ -43,6 +43,7 @@ int fastafrombed_main(int argc, char* argv[]) { bool useNameOnly = false; bool useFasta = true; bool useStrand = false; + bool useExons = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -87,6 +88,9 @@ int fastafrombed_main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-name", 5, parameterLength)) { useNameOnly = true; } + else if(PARAMETER_CHECK("-exons", 6, parameterLength)) { + useExons = true; + } else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { useFasta = false; } @@ -105,7 +109,7 @@ int fastafrombed_main(int argc, char* argv[]) { if (!showHelp) { - Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand); + Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand, useExons); delete b2f; } else { @@ -127,6 +131,7 @@ void fastafrombed_help(void) { cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl; cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; cerr << "\t-name\tUse the name field for the FASTA header" << endl; + cerr << "\t-exons\tgiven a bed12, concat the sequence from exons" << endl; cerr << "\t-tab\tWrite output in TAB delimited format." << endl; cerr << "\t\t- Default is FASTA format." << endl << endl; -- GitLab