Skip to content
Snippets Groups Projects
Commit d4448d67 authored by Aaron's avatar Aaron
Browse files

First cut at new multiBamCov tool. I like.

parent 773e1ae4
No related branches found
No related tags found
No related merge requests found
......@@ -28,6 +28,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/linksBed \
$(SRC_DIR)/maskFastaFromBed \
$(SRC_DIR)/mergeBed \
$(SRC_DIR)/multiBamCov \
$(SRC_DIR)/nucBed \
$(SRC_DIR)/overlap \
$(SRC_DIR)/pairToBed \
......
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/version/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/BamTools/include
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= multiBamCovMain.cpp multiBamCov.cpp
OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= multiBamCov
all: $(PROGRAM)
.PHONY: all
$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
@echo " * linking $(PROGRAM)"
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
$(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
.PHONY: clean
/*****************************************************************************
multiBamCov.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "multiBamCov.h"
#include "api/BamMultiReader.h"
/*
Constructor
*/
MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file)
:
_bam_files(bam_files),
_bed_file(bed_file)
{
_bed = new BedFile(_bed_file);
LoadBamFileMap();
}
/*
Destructor
*/
MultiCovBam::~MultiCovBam(void)
{}
void MultiCovBam::CollectCoverage()
{
BamMultiReader reader;
reader.SetIndexCacheMode(BamIndex::NoIndexCaching);
if ( !reader.Open(_bam_files) )
{
cerr << "Could not open input BAM files." << endl; return;
}
else
{
// attempt to find index files
reader.LocateIndexes();
// if index data available for all BAM files, we can use SetRegion
if ( reader.HasIndexes() ) {
BED bed, nullBed;
int lineNum = 0;
BedLineStatus bedStatus;
_bed->Open();
// loop through each BED entry, jump to it,
// and collect coverage from each BAM
while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
{
if (bedStatus == BED_VALID)
{
reader.SetRegion(reader.GetReferenceID(bed.chrom),
(int) bed.start,
reader.GetReferenceID(bed.chrom),
(int) bed.end);
// everything checks out, just iterate through specified region, counting alignments
vector<int> counts(_bam_files.size());
BamAlignment al;
while ( reader.GetNextAlignmentCore(al) )
{
// lookup the offset of the file name and tabulate coverage
// for the appropriate file
counts[bamFileMap[al.Filename]]++;
}
// report the cov at this interval for each file and reset
_bed->reportBedTab(bed);
ReportCounts(counts);
bed = nullBed;
}
}
_bed->Close();
}
else {
cerr << "Could not find indexes." << endl;
reader.Close();
exit(1);
}
}
}
void MultiCovBam::LoadBamFileMap(void)
{
for (size_t i = 0; i < _bam_files.size(); ++i)
{
bamFileMap[_bam_files[i]] = i;
}
}
void MultiCovBam::ReportCounts(const vector<int> &counts)
{
for (size_t i = 0; i < counts.size(); ++i)
{
if (i < counts.size() - 1)
cout << counts[i] << "\t";
else
cout << counts[i];
}
cout << endl;
}
/*****************************************************************************
multiBamCov.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef MULTICOVBAM_H
#define MULTICOVBAM_H
#include "bedFile.h"
#include "api/BamMultiReader.h"
using namespace BamTools;
#include <vector>
#include <iostream>
#include <fstream>
#include <stdlib.h>
using namespace std;
class MultiCovBam {
public:
// constructor
MultiCovBam(const vector<string> &bam_files, const string bed_file);
// destructor
~MultiCovBam(void);
void CollectCoverage();
private:
//------------------------------------------------
// private attributes
//------------------------------------------------
vector<string> _bam_files;
string _bed_file;
BedFile *_bed;
map<string, int> bamFileMap;
void LoadBamFileMap(void);
void ReportCounts(const vector<int> &counts);
};
#endif /* MULTIBAMCOV_H */
/*****************************************************************************
multiBamCovMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "multiBamCov.h"
#include "version.h"
using namespace std;
// define our program name
#define PROGRAM_NAME "multiBamCov"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void ShowHelp(void);
int main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string bedFile;
vector<string> bamFiles;
// input arguments
bool haveBed = false;
bool haveBams = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) ShowHelp();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
if ((i+1) < argc) {
haveBed = true;
bedFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-bams", 5, parameterLength)) {
if ((i+1) < argc) {
haveBams = true;
i = i+1;
string file = argv[i];
while (file[0] != '-' && i < argc) {
bamFiles.push_back(file);
i++;
if (i < argc)
file = argv[i];
}
i--;
}
}
}
if (!showHelp) {
MultiCovBam *mc = new MultiCovBam(bamFiles, bedFile);
mc->CollectCoverage();
delete mc;
return 0;
}
else {
ShowHelp();
}
}
void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "Summary: Counts sequence coverage for multiple bams at specific loci." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -bams aln.1.bam aln.2.bam ... aln.n.bam -bed <bed/gff/vcf>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-bams\t" << "The bam files." << endl << endl;
cerr << "\t-bed\t" << "The bed file." << endl << endl;
// end the program here
exit(1);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment