From 2db206caea5a3ce69a280b33cc65a0e6b1b50b2b Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Thu, 15 Sep 2011 15:39:47 -0400
Subject: [PATCH] gcB no longer requires a genome file for BAM. New chromsweep
 tool.

---
 Makefile                                     |   5 +-
 src/bedToBam/bedToBam.cpp                    |   2 +-
 src/bedToIgv/Makefile                        |   8 +-
 src/chromsweep/Makefile                      |  46 ++++
 src/chromsweep/chromsweep.cpp                | 176 ++++++++++++
 src/chromsweep/chromsweep.h                  |  96 +++++++
 src/chromsweep/chromsweepMain.cpp            | 271 +++++++++++++++++++
 src/complementBed/Makefile                   |   8 +-
 src/flankBed/Makefile                        |   8 +-
 src/genomeCoverageBed/genomeCoverageBed.cpp  |  21 +-
 src/genomeCoverageBed/genomeCoverageBed.h    |   1 +
 src/genomeCoverageBed/genomeCoverageMain.cpp |   3 +-
 src/pairToBed/pairToBed.h                    |   1 -
 src/pairToBed/pairToBedMain.cpp              |   1 -
 src/shuffleBed/Makefile                      |   8 +-
 src/slopBed/Makefile                         |   8 +-
 src/unionBedGraphs/Makefile                  |   3 +-
 src/utils/bedFile/bedFile.cpp                |   5 +
 src/utils/bedFile/bedFile.h                  |   2 +
 src/utils/genomeFile/Makefile                |  13 +-
 src/utils/genomeFile/genomeFile.cpp          |  10 +
 src/utils/genomeFile/genomeFile.h            |   8 +-
 22 files changed, 678 insertions(+), 26 deletions(-)
 create mode 100644 src/chromsweep/Makefile
 create mode 100644 src/chromsweep/chromsweep.cpp
 create mode 100644 src/chromsweep/chromsweep.h
 create mode 100644 src/chromsweep/chromsweepMain.cpp

diff --git a/Makefile b/Makefile
index 996fa1a2..d1d4924f 100644
--- a/Makefile
+++ b/Makefile
@@ -18,6 +18,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
 		  $(SRC_DIR)/bedToBam \
 		  $(SRC_DIR)/bedToIgv \
 		  $(SRC_DIR)/bed12ToBed6 \
+		  $(SRC_DIR)/chromsweep \
 		  $(SRC_DIR)/closestBed \
 		  $(SRC_DIR)/complementBed \
 		  $(SRC_DIR)/coverageBed \
@@ -44,7 +45,6 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
 UTIL_SUBDIRS =	$(SRC_DIR)/utils/lineFileUtilities \
 				$(SRC_DIR)/utils/bedFile \
 				$(SRC_DIR)/utils/bedGraphFile \
-				$(SRC_DIR)/utils/genomeFile \
 				$(SRC_DIR)/utils/gzstream \
 				$(SRC_DIR)/utils/fileType \
 				$(SRC_DIR)/utils/bedFilePE \
@@ -52,7 +52,8 @@ UTIL_SUBDIRS =	$(SRC_DIR)/utils/lineFileUtilities \
 				$(SRC_DIR)/utils/tabFile \
 				$(SRC_DIR)/utils/BamTools \
 				$(SRC_DIR)/utils/BamTools-Ancillary \
-				$(SRC_DIR)/utils/Fasta
+				$(SRC_DIR)/utils/Fasta \
+				$(SRC_DIR)/utils/genomeFile
 
 all:
 	[ -d $(OBJ_DIR) ] || mkdir -p $(OBJ_DIR)
diff --git a/src/bedToBam/bedToBam.cpp b/src/bedToBam/bedToBam.cpp
index 41509d6f..2843a560 100644
--- a/src/bedToBam/bedToBam.cpp
+++ b/src/bedToBam/bedToBam.cpp
@@ -203,7 +203,7 @@ void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED1
             bedEntry = nullBed;
         }
     }
-    // close up
+    //close up
     bed->Close();
     writer->Close();
 }
diff --git a/src/bedToIgv/Makefile b/src/bedToIgv/Makefile
index 382b8830..3ad616b9 100644
--- a/src/bedToIgv/Makefile
+++ b/src/bedToIgv/Makefile
@@ -5,7 +5,13 @@ BIN_DIR = ../../bin/
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/genomeFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/chromsweep/Makefile b/src/chromsweep/Makefile
new file mode 100644
index 00000000..4cd8f20f
--- /dev/null
+++ b/src/chromsweep/Makefile
@@ -0,0 +1,46 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= chromsweepMain.cpp chromsweep.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= chromsweep
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+	@echo "  * linking $(PROGRAM)"
+	@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+	@echo "  * compiling" $(*F).cpp
+	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+		
+clean:
+	@echo "Cleaning up."
+	@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
diff --git a/src/chromsweep/chromsweep.cpp b/src/chromsweep/chromsweep.cpp
new file mode 100644
index 00000000..ce912f4b
--- /dev/null
+++ b/src/chromsweep/chromsweep.cpp
@@ -0,0 +1,176 @@
+/*****************************************************************************
+  chromsweep.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "chromsweep.h"
+#include <queue>
+#include <set>
+
+bool after(const BED &a, const BED &b);
+void report_hits(const BED &curr_qy, const vector<BED> &hits);
+vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits);
+
+
+/*
+    Constructor
+*/
+ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit,
+                           bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
+                           float overlapFraction, bool noHit, bool writeCount, bool forceStrand,
+                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {
+
+    _bedAFile            = bedAFile;
+    _bedBFile            = bedBFile;
+    _anyHit              = anyHit;
+    _noHit               = noHit;
+    _writeA              = writeA;
+    _writeB              = writeB;
+    _writeOverlap        = writeOverlap;
+    _writeAllOverlap     = writeAllOverlap;
+    _writeCount          = writeCount;
+    _overlapFraction     = overlapFraction;
+    _forceStrand         = forceStrand;
+    _reciprocal          = reciprocal;
+    _obeySplits          = obeySplits;
+    _bamInput            = bamInput;
+    _bamOutput           = bamOutput;
+
+    if (_anyHit || _noHit || _writeCount)
+        _printable = false;
+    else
+        _printable = true;
+
+    // create new BED file objects for A and B
+    _bedA = new BedFile(bedAFile);
+    _bedB = new BedFile(bedBFile);
+
+    Sweep();
+}
+
+
+/*
+    Destructor
+*/
+ChromSweep::~ChromSweep(void) {
+}
+
+
+bool after(const BED &a, const BED &b) {
+    return (a.start >= b.end);
+}
+
+void ChromSweep::ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits) {
+    if (qy_status != BED_INVALID) {
+        vector<BED>::iterator c        = db_cache.begin();
+        while (c != db_cache.end())
+        {
+            if ((curr_qy.chrom == c->chrom) && !(after(curr_qy, *c))) {
+                if (overlaps(curr_qy.start, curr_qy.end, c->start, c->end) > 0) {
+                    hits.push_back(*c);
+                }
+                ++c;
+            }
+            else {
+                c = db_cache.erase(c);
+            }
+        }
+    }
+}
+
+
+void ChromSweep::ChromCheck(BED &curr_qy, BED &curr_db, 
+                            BedLineStatus &qy_status, BedLineStatus &db_status,
+                            int &qy_lineNum, int &db_lineNum,
+                            vector<BED> &db_cache, vector<BED> &hits) 
+{
+    if ((curr_qy.chrom == curr_db.chrom) || (db_status == BED_INVALID) || (qy_status == BED_INVALID)) {
+        return;
+    }
+    
+    if (curr_qy.chrom > curr_db.chrom) {
+        while (!_bedB->Empty() && curr_db.chrom < curr_qy.chrom)
+        {
+            db_status = _bedB->GetNextBed(curr_db, db_lineNum);
+        }
+        db_cache.clear();
+    }
+    else if (curr_qy.chrom < curr_db.chrom) {
+        // report hits for the remaining queries on this chrom
+        BED tmp_curr_qy = curr_qy;
+        while (!_bedA->Empty() && tmp_curr_qy.chrom == curr_qy.chrom)
+        {
+            //db_cache = ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
+            ScanCache(tmp_curr_qy, qy_status, db_cache, hits);
+
+            ReportHits(tmp_curr_qy, hits);
+            qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
+            hits.clear();
+        }
+        // now fast forward query to catch up to database
+        while (!_bedA->Empty() && tmp_curr_qy.chrom < curr_db.chrom)
+        {
+            // hits is empty to reflect the fact that no hits are found in catch-up mode
+            ReportHits(tmp_curr_qy, hits);
+            qy_status = _bedA->GetNextBed(tmp_curr_qy, qy_lineNum);
+        }
+        curr_qy = tmp_curr_qy;
+        db_cache.clear();
+    }
+}
+
+
+void ChromSweep::ReportHits(const BED &curr_qy, const vector<BED> &hits) {
+    _bedA->reportBedTab(curr_qy);
+    cout << hits.size() << endl;
+}
+
+
+void ChromSweep::Sweep() {
+
+    int qy_lineNum = 0;
+    int db_lineNum = 0;
+
+    // current feature from each file
+    BED curr_qy, curr_db;
+    
+    // status of the current lines
+    BedLineStatus qy_status, db_status;
+    vector<BED> db_cache;
+    vector<BED> hits;
+    
+    // open the files; get the first line from each
+    _bedA->Open();
+    _bedB->Open();
+    qy_status = _bedA->GetNextBed(curr_qy, qy_lineNum);
+    db_status = _bedB->GetNextBed(curr_db, db_lineNum);
+    while (!_bedA->Empty()) {
+        // have we changed chromosomes?
+        ChromCheck(curr_qy, curr_db, qy_status, db_status, qy_lineNum, db_lineNum, db_cache, hits);
+        // scan the database cache for hits
+        //db_cache = ScanCache(curr_qy, qy_status, db_cache, hits);
+        ScanCache(curr_qy, qy_status, db_cache, hits);
+        // advance the db until we are ahead of the query. update hits and cache as necessary
+        while (!_bedB->Empty() && 
+               curr_qy.chrom == curr_db.chrom &&
+               !(after(curr_db, curr_qy)))
+        {
+            if (overlaps(curr_qy.start, curr_qy.end, curr_db.start, curr_db.end) > 0) {
+                hits.push_back(curr_db);
+            }
+            db_cache.push_back(curr_db);
+            db_status = _bedB->GetNextBed(curr_db, db_lineNum);
+        }
+        // report the hits for this query and reset for the next.
+        ReportHits(curr_qy, hits);
+        hits.clear();
+        qy_status = _bedA->GetNextBed(curr_qy, qy_lineNum);
+    }
+}
diff --git a/src/chromsweep/chromsweep.h b/src/chromsweep/chromsweep.h
new file mode 100644
index 00000000..0ea74440
--- /dev/null
+++ b/src/chromsweep/chromsweep.h
@@ -0,0 +1,96 @@
+/*****************************************************************************
+  chromsweepBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef CHROMSWEEP_H
+#define CHROMSWEEP_H
+
+#include "bedFile.h"
+// #include "BamReader.h"
+// #include "BamWriter.h"
+// #include "BamAncillary.h"
+// #include "BamAux.h"
+// using namespace BamTools;
+
+
+#include <vector>
+#include <queue>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+using namespace std;
+
+
+
+class ChromSweep {
+
+public:
+
+    // constructor
+    ChromSweep(string bedAFile, string bedBFile, bool anyHit,
+                               bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
+                               float overlapFraction, bool noHit, bool writeCount, bool forceStrand,
+                               bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput);
+
+    // destructor
+    ~ChromSweep(void);
+
+private:
+
+    //------------------------------------------------
+    // private attributes
+    //------------------------------------------------
+    string _bedAFile;
+    string _bedBFile;
+
+    bool  _writeA;            // should the original A feature be reported?
+    bool  _writeB;            // should the original B feature be reported?
+    bool  _writeOverlap;
+    bool  _writeAllOverlap;
+
+    bool  _forceStrand;
+    bool  _reciprocal;
+    float _overlapFraction;
+
+    bool  _anyHit;
+    bool  _noHit;
+    bool  _writeCount;        // do we want a count of the number of overlaps in B?
+    bool  _obeySplits;
+    bool  _bamInput;
+    bool  _bamOutput;
+
+    bool _printable;
+
+    queue<BED*> _outputBuffer;
+    bool  _lastPick;
+
+    map<string, vector<BED*> > _windowA;
+    map<string, vector<BED*> > _windowB;
+
+    // instance of a bed file class.
+    BedFile *_bedA, *_bedB;
+
+    //------------------------------------------------
+    // private methods
+    //------------------------------------------------
+    void ScanCache(const BED &curr_qy, BedLineStatus qy_status, vector<BED> &db_cache, vector<BED> &hits);
+
+    void ChromCheck(BED &curr_qy, BED &curr_db, 
+                    BedLineStatus &qy_status, BedLineStatus &db_status,
+                    int &qy_lineNum, int &db_lineNum,
+                    vector<BED> &db_cache, vector<BED> &hits);
+    
+    void Sweep();
+    void ReportHits(const BED &curr_qy, const vector<BED> &hits);
+
+
+};
+
+#endif /* CHROMSWEEP_H */
diff --git a/src/chromsweep/chromsweepMain.cpp b/src/chromsweep/chromsweepMain.cpp
new file mode 100644
index 00000000..913a2a0c
--- /dev/null
+++ b/src/chromsweep/chromsweepMain.cpp
@@ -0,0 +1,271 @@
+/*****************************************************************************
+  chromsweepMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "chromsweep.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "chromsweep"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedAFile;
+    string bedBFile;
+
+    // input arguments
+    float overlapFraction = 1E-9;
+
+    bool haveBedA           = false;
+    bool haveBedB           = false;
+    bool noHit              = false;
+    bool anyHit             = false;
+    bool writeA             = false;
+    bool writeB             = false;
+    bool writeCount         = false;
+    bool writeOverlap       = false;
+    bool writeAllOverlap    = false;
+    bool haveFraction       = false;
+    bool reciprocalFraction = false;
+    bool forceStrand        = false;
+    bool obeySplits         = false;
+    bool inputIsBam         = false;
+    bool outputIsBam        = true;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedA = true;
+                outputIsBam = false;
+                bedAFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedA = true;
+                inputIsBam = true;
+                bedAFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedB = true;
+                bedBFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
+            outputIsBam = false;
+        }
+        else if(PARAMETER_CHECK("-u", 2, parameterLength)) {
+            anyHit = true;
+        }
+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFraction = true;
+                overlapFraction = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {
+            writeA = true;
+        }
+        else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {
+            writeB = true;
+        }
+        else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {
+            writeOverlap = true;
+        }
+        else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {
+            writeAllOverlap = true;
+            writeOverlap = true;
+        }
+        else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
+            writeCount = true;
+        }
+        else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
+            reciprocalFraction = true;
+        }
+        else if (PARAMETER_CHECK("-v", 2, parameterLength)) {
+            noHit = true;
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            forceStrand = true;
+        }
+        else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
+            obeySplits = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBedA || !haveBedB) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (anyHit && noHit) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -v, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (writeB && writeCount) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (writeCount && writeOverlap) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (writeA && writeOverlap) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -wa OR -wo, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (writeB && writeOverlap) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (reciprocalFraction && !haveFraction) {
+        cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (anyHit && writeCount) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (anyHit && writeB) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wb, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (anyHit && writeOverlap) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wo, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+
+    if (!showHelp) {
+
+        ChromSweep *bi = new ChromSweep(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
+                                            writeAllOverlap, overlapFraction, noHit, writeCount, forceStrand,
+                                            reciprocalFraction, obeySplits, inputIsBam, outputIsBam);
+        delete bi;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Report overlaps between two feature files." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-abam\t"         << "The A input file is in BAM format.  Output will be BAM as well." << endl << endl;
+
+    cerr << "\t-bed\t"          << "When using BAM input (-abam), write output as BED. The default" << endl;
+    cerr                        << "\t\tis to write output in BAM when using -abam." << endl << endl;
+
+    cerr << "\t-wa\t"           << "Write the original entry in A for each overlap." << endl << endl;
+
+    cerr << "\t-wb\t"           << "Write the original entry in B for each overlap." << endl;
+    cerr                        << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;
+
+    cerr << "\t-wo\t"           << "Write the original A and B entries plus the number of base" << endl;
+    cerr                        << "\t\tpairs of overlap between the two features." << endl;
+    cerr                        << "\t\t- Overlaps restricted by -f and -r." << endl;
+    cerr                        << "\t\t  Only A features with overlap are reported." << endl << endl;
+
+    cerr << "\t-wao\t"          << "Write the original A and B entries plus the number of base" << endl;
+    cerr                        << "\t\tpairs of overlap between the two features." << endl;
+    cerr                        << "\t\t- Overlapping features restricted by -f and -r." << endl;
+    cerr                        << "\t\t  However, A features w/o overlap are also reported" << endl;
+    cerr                        << "\t\t  with a NULL B feature and overlap = 0." << endl << endl;
+
+    cerr << "\t-u\t"            << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
+    cerr                        << "\t\t- In other words, just report the fact >=1 hit was found." << endl;
+    cerr                        << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
+
+    cerr << "\t-c\t"            << "For each entry in A, report the number of overlaps with B." << endl;
+    cerr                        << "\t\t- Reports 0 for A entries that have no overlap with B." << endl;
+    cerr                        << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
+
+    cerr << "\t-v\t"            << "Only report those entries in A that have _no overlaps_ with B." << endl;
+    cerr                        << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl;
+
+    cerr << "\t-f\t"            << "Minimum overlap required as a fraction of A." << endl;
+    cerr                        << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
+    cerr                        << "\t\t- FLOAT (e.g. 0.50)" << endl << endl;
+
+    cerr << "\t-r\t"            << "Require that the fraction overlap be reciprocal for A and B." << endl;
+    cerr                        << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;
+    cerr                        << "\t\t  that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;
+
+    cerr << "\t-s\t"            << "Force strandedness.  That is, only report hits in B that" << endl;
+    cerr                        << "\t\toverlap A on the same strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
+
+    cerr << "\t-split\t"        << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
+
+
+    // end the program here
+    exit(1);
+
+}
diff --git a/src/complementBed/Makefile b/src/complementBed/Makefile
index 1e980bd6..2f542063 100644
--- a/src/complementBed/Makefile
+++ b/src/complementBed/Makefile
@@ -5,7 +5,13 @@ BIN_DIR = ../../bin/
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/genomeFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/flankBed/Makefile b/src/flankBed/Makefile
index bb3416e7..1da20693 100644
--- a/src/flankBed/Makefile
+++ b/src/flankBed/Makefile
@@ -5,7 +5,13 @@ BIN_DIR = ../../bin/
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/genomeFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/genomeCoverageBed/genomeCoverageBed.cpp b/src/genomeCoverageBed/genomeCoverageBed.cpp
index cb0dfc95..eecc73ca 100644
--- a/src/genomeCoverageBed/genomeCoverageBed.cpp
+++ b/src/genomeCoverageBed/genomeCoverageBed.cpp
@@ -13,7 +13,7 @@ Licenced under the GNU General Public License 2.0 license.
 #include "genomeCoverageBed.h"
 
 
-BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile, 
+BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
                                      bool eachBase, bool startSites, 
                                      bool bedGraph, bool bedGraphAll,
                                      int max, float scale,
@@ -43,15 +43,20 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
     _currChromName = "";
     _currChromSize = 0 ;
 
-    _bed = new BedFile(bedFile);
-    _genome = new GenomeFile(genomeFile);
-
+    
+    if (_bamInput == false) {
+        _genome = new GenomeFile(genomeFile);
+    }
+    
     PrintTrackDefinitionLine();
 
-    if (_bamInput == false)
+    if (_bamInput == false) {
+        _bed = new BedFile(bedFile);
         CoverageBed();
-    else
-        CoverageBam(_bed->bedFile);
+    }
+    else {
+        CoverageBam(_bedFile);
+    }
 }
 
 void BedGenomeCoverage::PrintTrackDefinitionLine()
@@ -214,6 +219,8 @@ void BedGenomeCoverage::CoverageBam(string bamFile) {
     string header = reader.GetHeaderText();
     RefVector refs = reader.GetReferenceData();
 
+    // load the BAM header references into a BEDTools "genome file"
+    _genome = new GenomeFile(refs);
     // convert each aligned BAM entry to BED
     // and compute coverage on B
     BamAlignment bam;
diff --git a/src/genomeCoverageBed/genomeCoverageBed.h b/src/genomeCoverageBed/genomeCoverageBed.h
index 8d2f7458..5e6ef4b4 100644
--- a/src/genomeCoverageBed/genomeCoverageBed.h
+++ b/src/genomeCoverageBed/genomeCoverageBed.h
@@ -90,6 +90,7 @@ private:
     // methods
     void CoverageBed();
     void CoverageBam(string bamFile);
+    void LoadBamHeaderIntoGenomeFile(const string &bamFile);
     void ReportChromCoverage(const vector<DEPTH> &, const int &chromSize, const string &chrom, chromHistMap&);
     void ReportGenomeCoverage(chromHistMap &chromDepthHist);
     void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom);
diff --git a/src/genomeCoverageBed/genomeCoverageMain.cpp b/src/genomeCoverageBed/genomeCoverageMain.cpp
index fa57e403..a316e45a 100644
--- a/src/genomeCoverageBed/genomeCoverageMain.cpp
+++ b/src/genomeCoverageBed/genomeCoverageMain.cpp
@@ -163,7 +163,7 @@ int main(int argc, char* argv[]) {
     }
 
     // make sure we have both input files
-    if (!haveBed || !haveGenome) {
+    if (!haveBed && !haveGenome && !bamInput) {
       cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl;
       showHelp = true;
     }
@@ -197,7 +197,6 @@ int main(int argc, char* argv[]) {
     }
     
     if (!showHelp) {
-
         BedGenomeCoverage *bc = new BedGenomeCoverage(bedFile, genomeFile, eachBase,
                                                       startSites, bedGraph, bedGraphAll,
                                                       max, scale, bamInput, obeySplits,
diff --git a/src/pairToBed/pairToBed.h b/src/pairToBed/pairToBed.h
index d881c565..20ceb426 100644
--- a/src/pairToBed/pairToBed.h
+++ b/src/pairToBed/pairToBed.h
@@ -44,7 +44,6 @@ public:
     // constructor
     BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
         string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance);
-
     // destructor
     ~BedIntersectPE(void);
 
diff --git a/src/pairToBed/pairToBedMain.cpp b/src/pairToBed/pairToBedMain.cpp
index f29cf978..38b8714a 100644
--- a/src/pairToBed/pairToBedMain.cpp
+++ b/src/pairToBed/pairToBedMain.cpp
@@ -154,7 +154,6 @@ int main(int argc, char* argv[]) {
         cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
         showHelp = true;
     }
-
     if (!showHelp) {
 
         BedIntersectPE *bi = new BedIntersectPE(bedAFile, bedBFile, overlapFraction,
diff --git a/src/shuffleBed/Makefile b/src/shuffleBed/Makefile
index 42e6cf97..0265ae05 100644
--- a/src/shuffleBed/Makefile
+++ b/src/shuffleBed/Makefile
@@ -5,7 +5,13 @@ BIN_DIR = ../../bin/
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/genomeFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/slopBed/Makefile b/src/slopBed/Makefile
index ba798c52..aed0b82d 100644
--- a/src/slopBed/Makefile
+++ b/src/slopBed/Makefile
@@ -5,7 +5,13 @@ BIN_DIR = ../../bin/
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/genomeFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/unionBedGraphs/Makefile b/src/unionBedGraphs/Makefile
index 00b488a6..bca92f26 100755
--- a/src/unionBedGraphs/Makefile
+++ b/src/unionBedGraphs/Makefile
@@ -10,7 +10,8 @@ INCLUDES = -I$(UTILITIES_DIR)/bedGraphFile/ \
 	   -I$(UTILITIES_DIR)/genomeFile/ \
 	   -I$(UTILITIES_DIR)/version/ \
 	   -I$(UTILITIES_DIR)/gzstream/ \
-	   -I$(UTILITIES_DIR)/fileType/
+	   -I$(UTILITIES_DIR)/fileType/ \
+	   -I$(UTILITIES_DIR)/BamTools/include
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp
index 970be657..0abb6973 100644
--- a/src/utils/bedFile/bedFile.cpp
+++ b/src/utils/bedFile/bedFile.cpp
@@ -156,6 +156,11 @@ void BedFile::Seek(unsigned long offset) {
     _bedStream->seekg(offset);
 }
 
+// Jump to a specific byte in the file
+bool BedFile::Empty() {
+    return _bedStream->eof();
+}
+
 
 // Close the BED file
 void BedFile::Close(void) {
diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h
index 71afd096..425c8fd7 100644
--- a/src/utils/bedFile/bedFile.h
+++ b/src/utils/bedFile/bedFile.h
@@ -407,6 +407,8 @@ public:
 
     // Jump to a specific byte in the file
     void Seek(unsigned long offset);
+    
+    bool Empty();
 
     // Close an opened BED file.
     void Close(void);
diff --git a/src/utils/genomeFile/Makefile b/src/utils/genomeFile/Makefile
index a48c6fba..e5339c68 100644
--- a/src/utils/genomeFile/Makefile
+++ b/src/utils/genomeFile/Makefile
@@ -1,10 +1,12 @@
 OBJ_DIR = ../../../obj/
 BIN_DIR = ../../../bin/
-UTILITIES_DIR = ../../utils/
+UTILITIES_DIR = ../
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/fileType/
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include/	
 
 # ----------------------------------
 # define our source and object files
@@ -17,13 +19,14 @@ BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
 
 $(BUILT_OBJECTS): $(SOURCES)
 	@echo "  * compiling" $(*F).cpp
-	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib
 
 $(EXT_OBJECTS):
-	@$(MAKE) --no-print-directory -C -W $(INCLUDES)
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
 
 clean:
 	@echo "Cleaning up."
 	@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
 
-.PHONY: clean
\ No newline at end of file
+.PHONY: clean
diff --git a/src/utils/genomeFile/genomeFile.cpp b/src/utils/genomeFile/genomeFile.cpp
index 67a280ea..d94e9226 100644
--- a/src/utils/genomeFile/genomeFile.cpp
+++ b/src/utils/genomeFile/genomeFile.cpp
@@ -18,6 +18,16 @@ GenomeFile::GenomeFile(const string &genomeFile) {
     loadGenomeFileIntoMap();
 }
 
+GenomeFile::GenomeFile(const RefVector &genome) {
+    for (size_t i = 0; i < genome.size(); ++i) {
+        string chrom = genome[i].RefName;
+        int length = genome[i].RefLength;
+        
+        _chromSizes[chrom] = length;
+        _chromList.push_back(chrom);
+    }
+}
+
 // Destructor
 GenomeFile::~GenomeFile(void) {
 }
diff --git a/src/utils/genomeFile/genomeFile.h b/src/utils/genomeFile/genomeFile.h
index 5da8d6b9..415be847 100644
--- a/src/utils/genomeFile/genomeFile.h
+++ b/src/utils/genomeFile/genomeFile.h
@@ -19,6 +19,9 @@
 #include <fstream>
 #include <cstring>
 #include <cstdio>
+#include "api/BamReader.h"
+#include "api/BamAux.h"
+using namespace BamTools;
 
 using namespace std;
 
@@ -31,8 +34,11 @@ class GenomeFile {
 
 public:
 
-    // Constructor
+    // Constructor using a file
     GenomeFile(const string &genomeFile);
+    
+    // Constructor using a vector of BamTools RefVector
+    GenomeFile(const RefVector &genome);
 
     // Destructor
     ~GenomeFile(void);
-- 
GitLab