From 0b3f2511e7b895c261997384f7747e941da88492 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Thu, 23 Jun 2011 15:06:17 -0400
Subject: [PATCH] Pushed tagBam to GitHub

---
 Makefile                  |   1 +
 src/tagBam/Makefile       |  51 ++++++++++++++
 src/tagBam/tagBam.cpp     | 108 +++++++++++++++++++++++++++++
 src/tagBam/tagBam.h       |  74 ++++++++++++++++++++
 src/tagBam/tagBamMain.cpp | 141 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 375 insertions(+)
 create mode 100644 src/tagBam/Makefile
 create mode 100644 src/tagBam/tagBam.cpp
 create mode 100644 src/tagBam/tagBam.h
 create mode 100644 src/tagBam/tagBamMain.cpp

diff --git a/Makefile b/Makefile
index 8e1fa0b4..4ee8b3e1 100644
--- a/Makefile
+++ b/Makefile
@@ -37,6 +37,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
 		  $(SRC_DIR)/slopBed \
 		  $(SRC_DIR)/sortBed \
 		  $(SRC_DIR)/subtractBed \
+		  $(SRC_DIR)/tagBam \
 		  $(SRC_DIR)/unionBedGraphs \
 		  $(SRC_DIR)/windowBed
 
diff --git a/src/tagBam/Makefile b/src/tagBam/Makefile
new file mode 100644
index 00000000..b2dbcc4c
--- /dev/null
+++ b/src/tagBam/Makefile
@@ -0,0 +1,51 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+           -I$(UTILITIES_DIR)/BamTools-Ancillary
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= tagBamMain.cpp tagBam.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= tagBam
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+	@echo "  * linking $(PROGRAM)"
+	@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)  -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools
+
+$(BUILT_OBJECTS): $(SOURCES)
+	@echo "  * compiling" $(*F).cpp
+	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/	
+		
+clean:
+	@echo "Cleaning up."
+	@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
diff --git a/src/tagBam/tagBam.cpp b/src/tagBam/tagBam.cpp
new file mode 100644
index 00000000..3a65db8d
--- /dev/null
+++ b/src/tagBam/tagBam.cpp
@@ -0,0 +1,108 @@
+/*****************************************************************************
+  tagBam.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "tagBam.h"
+
+// build
+TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames,
+            const vector<string> &annoLables, bool forceStrand) :
+
+    _bamFile(bamFile),
+    _annoFileNames(annoFileNames),
+    _annoLabels(annoLables),
+    _forceStrand(forceStrand)
+{}
+
+
+// destroy and delete the open file pointers
+TagBam::~TagBam(void) {
+    delete _bed;
+    CloseAnnoFiles();
+}
+
+
+void TagBam::OpenAnnoFiles() {
+    for (size_t i=0; i < _annoFileNames.size(); ++i) {
+        BedFile *file = new BedFile(_annoFileNames[i]);
+        file->loadBedFileIntoMap();
+        _annoFiles.push_back(file);
+    }
+}
+
+
+void TagBam::CloseAnnoFiles() {
+    for (size_t i=0; i < _annoFiles.size(); ++i) {
+        BedFile *file = _annoFiles[i];
+        delete file;
+        _annoFiles[i] = NULL;
+    }
+}
+
+bool TagBam::FindOneOrMoreOverlap(const BED &a, BedFile *bedFile) {
+    return bedFile->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
+                                                _forceStrand, 0);
+}
+
+void TagBam::Tag() {
+
+    // open the annotations files for processing;
+    OpenAnnoFiles();
+
+    // open the BAM file
+    BamReader reader;
+    BamWriter writer;
+    reader.Open(_bamFile);
+    // get header & reference information
+    string bamHeader  = reader.GetHeaderText();
+    RefVector refs = reader.GetReferenceData();
+
+    // set compression mode
+    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+//    if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
+    writer.SetCompressionMode(compressionMode);
+    // open our BAM writer
+    writer.Open("stdout", bamHeader, refs);
+
+    // rip through the BAM file and test for overlaps with each annotation file.
+    BamAlignment al;
+    while (reader.GetNextAlignment(al)) {
+        if (al.IsMapped() == true) {
+            BED a;
+            a.chrom = refs.at(al.RefID).RefName;
+            a.start = al.Position;
+            a.end   = al.GetEndPosition(false, false);
+            if (al.IsReverseStrand()) a.strand = "-";
+            a.strand = "+";
+            
+            ostringstream annotations;
+            // annotate the BAM file based on overlaps with the annotation files.
+            for (size_t i = 0; i < _annoFiles.size(); ++i) 
+            {
+                // grab the current annotation file.
+                BedFile *anno = _annoFiles[i];
+                // add the label for this annotation file to tag if there is overlap
+                if (FindOneOrMoreOverlap(a, anno)) {
+                    annotations << _annoLabels[i] << ";";
+                }
+            }
+            // were there any overlaps with which to make a tag?
+            if (annotations.str().size() > 0) {
+                al.AddTag("YB", "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
+            }
+            writer.SaveAlignment(al);
+        }
+    }
+    reader.Close();
+
+    // close the annotations files;
+    CloseAnnoFiles();
+}
diff --git a/src/tagBam/tagBam.h b/src/tagBam/tagBam.h
new file mode 100644
index 00000000..85ada900
--- /dev/null
+++ b/src/tagBam/tagBam.h
@@ -0,0 +1,74 @@
+/*****************************************************************************
+  tagBam.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef TAGBAM_H
+#define TAGBAM_H
+
+#include "bedFile.h"
+
+#include "version.h"
+#include "api/BamReader.h"
+#include "api/BamWriter.h"
+#include "api/BamAux.h"
+#include "BamAncillary.h"
+using namespace BamTools;
+
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class TagBam {
+
+public:
+
+    // constructor
+    TagBam(const string &bamFile, const vector<string> &annoFileNames,
+                const vector<string> &annoLabels, bool forceStrand);
+
+    // destructor
+    ~TagBam(void);
+
+    // annotate the BAM file with all of the annotation files.
+    void Tag();
+
+private:
+
+    // input files.
+    string _bamFile;
+    vector<string> _annoFileNames;
+    vector<string> _annoLabels;
+
+    // instance of a bed file class.
+    BedFile *_bed;
+    vector<BedFile*> _annoFiles;
+
+    // do we care about strandedness when tagging?
+    bool _forceStrand;
+
+    // private function for reporting coverage information
+    void ReportAnnotations();
+
+    void OpenAnnoFiles();
+
+    void CloseAnnoFiles();
+    
+    bool FindOneOrMoreOverlap(const BED &a, BedFile *bedFile);
+};
+#endif /* TAGBAM_H */
diff --git a/src/tagBam/tagBamMain.cpp b/src/tagBam/tagBamMain.cpp
new file mode 100644
index 00000000..f66ae83b
--- /dev/null
+++ b/src/tagBam/tagBamMain.cpp
@@ -0,0 +1,141 @@
+/*****************************************************************************
+  annotateMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "tagBam.h"
+#include "version.h"
+
+using namespace std;
+
+// define the version
+#define PROGRAM_NAME "tagBam"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input file
+    string bamFile;
+
+    // parm flags
+    bool forceStrand    = false;
+    bool haveBam        = false;
+    bool haveFiles      = false;
+    bool haveLabels     = false;
+
+    // list of annotation files / names
+    vector<string> inputFiles;
+    vector<string> inputLabels;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBam  = true;
+                bamFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-files", 6, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFiles = true;
+                i = i+1;
+                string file = argv[i];
+                while (file[0] != '-' && i < argc) {
+                    inputFiles.push_back(file);
+                    i++;
+                    if (i < argc)
+                        file = argv[i];
+                }
+                i--;
+            }
+        }
+        else if(PARAMETER_CHECK("-tags", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveLabels = true;
+                i = i+1;
+                string label = argv[i];
+                while (label[0] != '-' && i < argc) {
+                    inputLabels.push_back(label);
+                    i++;
+                    if (i < argc)
+                        label = argv[i];
+                }
+                i--;
+            }
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            forceStrand = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBam || !haveFiles || !haveLabels) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files, and -labels. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, forceStrand);
+        ba->Tag();
+        delete ba;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Annotates a BAM file based on overlaps with multiple BED/GFF/VCF files" << endl;
+    cerr << "\t on the intervals in -i." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <BAM> -files FILE1 FILE2 .. FILEn -labels LAB1 LAB2 ,,, LABn" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-s\t"            << "Force strandedness.  That is, only tag alignments that have the same" << endl;
+    cerr                        << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
+
+    exit(1);
+}
-- 
GitLab