From da9026ba33206cd1a307a9e8639bf32722cb6b4f Mon Sep 17 00:00:00 2001
From: Brent Pedersen <bpederse@gmail.com>
Date: Mon, 29 Oct 2012 11:40:15 -0600
Subject: [PATCH] start of allowing to get concatenated sequence of exons

---
 src/fastaFromBed/Makefile             |  4 +++-
 src/fastaFromBed/fastaFromBed.cpp     | 19 +++++++++++++++++--
 src/fastaFromBed/fastaFromBed.h       |  4 +++-
 src/fastaFromBed/fastaFromBedMain.cpp |  7 ++++++-
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/src/fastaFromBed/Makefile b/src/fastaFromBed/Makefile
index 7b6d9823..dfbaeaa7 100644
--- a/src/fastaFromBed/Makefile
+++ b/src/fastaFromBed/Makefile
@@ -9,7 +9,9 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
            -I$(UTILITIES_DIR)/gzstream/ \
            -I$(UTILITIES_DIR)/genomeFile/ \
            -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
            -I$(UTILITIES_DIR)/sequenceUtilities/ \
+           -I$(UTILITIES_DIR)/BlockedIntervals \
            -I$(UTILITIES_DIR)/fileType/ \
            -I$(UTILITIES_DIR)/Fasta/ \
            -I$(UTILITIES_DIR)/version/
@@ -34,4 +36,4 @@ clean:
 	@echo "Cleaning up."
 	@rm -f $(OBJ_DIR)/fastaFromBedMain.o $(OBJ_DIR)/fastaFromBed.o
 
-.PHONY: clean
\ No newline at end of file
+.PHONY: clean
diff --git a/src/fastaFromBed/fastaFromBed.cpp b/src/fastaFromBed/fastaFromBed.cpp
index 1ad92a9f..399ac3dd 100644
--- a/src/fastaFromBed/fastaFromBed.cpp
+++ b/src/fastaFromBed/fastaFromBed.cpp
@@ -11,10 +11,11 @@
 ******************************************************************************/
 #include "lineFileUtilities.h"
 #include "fastaFromBed.h"
+#include "bedFile.h"
 
 
 Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile,
-    const string &fastaOutFile, bool useFasta, bool useStrand) {
+    const string &fastaOutFile, bool useFasta, bool useStrand, bool useExons) {
 
     _useName      = useName;
     _dbFile       = dbFile;
@@ -22,6 +23,7 @@ Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile,
     _fastaOutFile = fastaOutFile;
     _useFasta     = useFasta;
     _useStrand    = useStrand;
+    _useExons     = useExons;
 
     _bed = new BedFile(_bedFile);
 
@@ -120,7 +122,20 @@ void Bed2Fa::ExtractDNA() {
                     if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) 
                     {
                         int length = bed.end - bed.start;
-                        sequence = fr->getSubSequence(bed.chrom, bed.start, length);
+                        if(_useExons){
+                            bedVector bedBlocks;  // vec to store the discrete BED "blocks"
+                            GetBedBlocks(bed, bedBlocks);
+                            sequence.clear();
+                            for (int i = 0; i < (int) bedBlocks.size(); ++i) {
+                                #cerr << bedBlocks[i].start << " " << bedBlocks[i].end << endl;
+                                sequence += fr->getSubSequence(bed.chrom,
+                                        bedBlocks[i].start,
+                                        bedBlocks[i].end - bedBlocks[i].start);
+
+                            }
+                        } else {
+                            sequence = fr->getSubSequence(bed.chrom, bed.start, length);
+                        }
                         ReportDNA(bed, sequence);
                     }
                     else
diff --git a/src/fastaFromBed/fastaFromBed.h b/src/fastaFromBed/fastaFromBed.h
index 3f5f454b..0ab73118 100644
--- a/src/fastaFromBed/fastaFromBed.h
+++ b/src/fastaFromBed/fastaFromBed.h
@@ -13,6 +13,7 @@
 #define FASTAFROMBED_H
 
 #include "bedFile.h"
+#include "BlockedIntervals.h"
 #include "sequenceUtils.h"
 #include "Fasta.h"
 #include <vector>
@@ -30,7 +31,7 @@ public:
 
     // constructor
     Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile,
-        bool useFasta, bool useStrand);
+        bool useFasta, bool useStrand, bool useExons);
 
     // destructor
     ~Bed2Fa(void);
@@ -47,6 +48,7 @@ private:
     string _fastaOutFile;
     bool _useFasta;
     bool _useStrand;
+    bool _useExons;
 
     // instance of a bed file class.
     BedFile  *_bed;
diff --git a/src/fastaFromBed/fastaFromBedMain.cpp b/src/fastaFromBed/fastaFromBedMain.cpp
index 9be9afb3..c5da8be2 100644
--- a/src/fastaFromBed/fastaFromBedMain.cpp
+++ b/src/fastaFromBed/fastaFromBedMain.cpp
@@ -43,6 +43,7 @@ int fastafrombed_main(int argc, char* argv[]) {
     bool useNameOnly = false;
     bool useFasta = true;
     bool useStrand = false;
+    bool useExons = false;
 
     // check to see if we should print out some help
     if(argc <= 1) showHelp = true;
@@ -87,6 +88,9 @@ int fastafrombed_main(int argc, char* argv[]) {
         else if(PARAMETER_CHECK("-name", 5, parameterLength)) {
             useNameOnly = true;
         }
+        else if(PARAMETER_CHECK("-exons", 6, parameterLength)) {
+            useExons = true;
+        }
         else if(PARAMETER_CHECK("-tab", 4, parameterLength)) {
             useFasta = false;
         }
@@ -105,7 +109,7 @@ int fastafrombed_main(int argc, char* argv[]) {
 
     if (!showHelp) {
 
-        Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand);
+        Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand, useExons);
         delete b2f;
     }
     else {
@@ -127,6 +131,7 @@ void fastafrombed_help(void) {
     cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl;
     cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl;
     cerr << "\t-name\tUse the name field for the FASTA header" << endl;
+    cerr << "\t-exons\tgiven a bed12, concat the sequence from exons" << endl;
 
     cerr << "\t-tab\tWrite output in TAB delimited format." << endl;
     cerr << "\t\t- Default is FASTA format." << endl << endl;
-- 
GitLab