From d4448d675902c3faa5ecb785992ce760b46b4efe Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Fri, 17 Jun 2011 16:27:28 -0400
Subject: [PATCH] First cut at new multiBamCov tool.  I like.

---
 Makefile                            |   1 +
 src/multiBamCov/Makefile            |  48 ++++++++++++
 src/multiBamCov/multiBamCov.cpp     | 114 ++++++++++++++++++++++++++++
 src/multiBamCov/multiBamCov.h       |  55 ++++++++++++++
 src/multiBamCov/multiBamCovMain.cpp | 114 ++++++++++++++++++++++++++++
 5 files changed, 332 insertions(+)
 create mode 100644 src/multiBamCov/Makefile
 create mode 100644 src/multiBamCov/multiBamCov.cpp
 create mode 100644 src/multiBamCov/multiBamCov.h
 create mode 100644 src/multiBamCov/multiBamCovMain.cpp

diff --git a/Makefile b/Makefile
index 0a1fa06e..4abfdd2a 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
 		  $(SRC_DIR)/linksBed \
 		  $(SRC_DIR)/maskFastaFromBed \
 		  $(SRC_DIR)/mergeBed \
+		  $(SRC_DIR)/multiBamCov \
 		  $(SRC_DIR)/nucBed \
 		  $(SRC_DIR)/overlap \
 		  $(SRC_DIR)/pairToBed \
diff --git a/src/multiBamCov/Makefile b/src/multiBamCov/Makefile
new file mode 100644
index 00000000..79412902
--- /dev/null
+++ b/src/multiBamCov/Makefile
@@ -0,0 +1,48 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= multiBamCovMain.cpp multiBamCov.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= multiBamCov
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+	@echo "  * linking $(PROGRAM)"
+	@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools
+
+$(BUILT_OBJECTS): $(SOURCES)
+	@echo "  * compiling" $(*F).cpp
+	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+		
+clean:
+	@echo "Cleaning up."
+	@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
diff --git a/src/multiBamCov/multiBamCov.cpp b/src/multiBamCov/multiBamCov.cpp
new file mode 100644
index 00000000..3d53c8ef
--- /dev/null
+++ b/src/multiBamCov/multiBamCov.cpp
@@ -0,0 +1,114 @@
+/*****************************************************************************
+  multiBamCov.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "multiBamCov.h"
+#include "api/BamMultiReader.h"
+
+
+/*
+    Constructor
+*/
+MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file) 
+:
+_bam_files(bam_files),
+_bed_file(bed_file)  
+{
+	_bed = new BedFile(_bed_file);
+    LoadBamFileMap();
+}
+
+
+/*
+    Destructor
+*/
+MultiCovBam::~MultiCovBam(void) 
+{}
+
+
+
+void MultiCovBam::CollectCoverage()
+{
+    BamMultiReader reader;
+    reader.SetIndexCacheMode(BamIndex::NoIndexCaching);
+    
+    if ( !reader.Open(_bam_files) )
+    {
+        cerr << "Could not open input BAM files." << endl; return;
+    }
+    else
+    {
+        // attempt to find index files
+        reader.LocateIndexes();
+
+        // if index data available for all BAM files, we can use SetRegion
+        if ( reader.HasIndexes() ) {
+            BED bed, nullBed;
+            int lineNum = 0;
+            BedLineStatus bedStatus;
+
+            _bed->Open();
+            // loop through each BED entry, jump to it, 
+            // and collect coverage from each BAM
+            while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
+            {
+                if (bedStatus == BED_VALID)
+                {
+                    reader.SetRegion(reader.GetReferenceID(bed.chrom),
+                                           (int) bed.start,
+                                           reader.GetReferenceID(bed.chrom),
+                                           (int) bed.end);
+                                           
+                    // everything checks out, just iterate through specified region, counting alignments
+                    vector<int> counts(_bam_files.size());
+                    BamAlignment al;
+                    while ( reader.GetNextAlignmentCore(al) )
+                    {
+                        // lookup the offset of the file name and tabulate coverage
+                        // for the appropriate file
+                        counts[bamFileMap[al.Filename]]++;
+                    }
+                    // report the cov at this interval for each file and reset
+                    _bed->reportBedTab(bed);
+                    ReportCounts(counts);
+                    bed = nullBed;
+                }
+            }
+            _bed->Close();
+        }
+        else {
+            cerr << "Could not find indexes." << endl;
+            reader.Close();
+            exit(1);
+        }
+    }
+}
+
+
+void MultiCovBam::LoadBamFileMap(void) 
+{
+    for (size_t i = 0; i < _bam_files.size(); ++i)
+    {
+        bamFileMap[_bam_files[i]] = i;
+    }
+}
+
+void MultiCovBam::ReportCounts(const vector<int> &counts) 
+{
+    for (size_t i = 0; i < counts.size(); ++i)
+    {
+        if (i < counts.size() - 1)
+            cout << counts[i] << "\t";
+        else
+            cout << counts[i];
+    }
+    cout << endl;
+}
diff --git a/src/multiBamCov/multiBamCov.h b/src/multiBamCov/multiBamCov.h
new file mode 100644
index 00000000..2e8f0fb6
--- /dev/null
+++ b/src/multiBamCov/multiBamCov.h
@@ -0,0 +1,55 @@
+/*****************************************************************************
+  multiBamCov.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef MULTICOVBAM_H
+#define MULTICOVBAM_H
+
+#include "bedFile.h"
+#include "api/BamMultiReader.h"
+using namespace BamTools;
+
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+using namespace std;
+
+
+
+class MultiCovBam {
+
+public:
+
+    // constructor
+    MultiCovBam(const vector<string> &bam_files, const string bed_file);
+
+    // destructor
+    ~MultiCovBam(void);
+
+    void CollectCoverage();
+
+private:
+
+    //------------------------------------------------
+    // private attributes
+    //------------------------------------------------
+    vector<string> _bam_files;
+    string _bed_file;
+	BedFile *_bed;
+
+    map<string, int> bamFileMap;
+    
+    void LoadBamFileMap(void);
+    void ReportCounts(const vector<int> &counts);
+};
+
+#endif /* MULTIBAMCOV_H */
diff --git a/src/multiBamCov/multiBamCovMain.cpp b/src/multiBamCov/multiBamCovMain.cpp
new file mode 100644
index 00000000..66552e69
--- /dev/null
+++ b/src/multiBamCov/multiBamCovMain.cpp
@@ -0,0 +1,114 @@
+/*****************************************************************************
+  multiBamCovMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "multiBamCov.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "multiBamCov"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile;
+    vector<string> bamFiles;
+
+    // input arguments
+    bool haveBed           = false;
+    bool haveBams          = false;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBed = true;
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-bams", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBams = true;
+                i = i+1;
+                string file = argv[i];
+                while (file[0] != '-' && i < argc) {
+                    bamFiles.push_back(file);
+                    i++;
+                    if (i < argc)
+                        file = argv[i];
+                }
+                i--;
+            }
+        }
+    }
+
+    if (!showHelp) {
+        MultiCovBam *mc = new MultiCovBam(bamFiles, bedFile);
+        mc->CollectCoverage();
+        delete mc;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Counts sequence coverage for multiple bams at specific loci." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -bams aln.1.bam aln.2.bam ... aln.n.bam -bed <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-bams\t"         << "The bam files." << endl << endl;
+
+    cerr << "\t-bed\t"         << "The bed file." << endl << endl;
+
+
+
+    // end the program here
+    exit(1);
+
+}
-- 
GitLab