Skip to content
Snippets Groups Projects
Commit 7b52f3e3 authored by Aaron's avatar Aaron
Browse files

Multiple changes.

	1. Added BAM support to windowBed.
	2. Forced correct BEDPE ordering for bamToBed.
	3. Added default overlapFraction = 0.0 to bedFile prototype.
parent 2232f606
No related branches found
No related tags found
No related merge requests found
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
using namespace BamTools; using namespace BamTools;
#include <vector> #include <vector>
#include <algorithm> // for swap()
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <stdlib.h> #include <stdlib.h>
...@@ -38,7 +39,7 @@ void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance); ...@@ -38,7 +39,7 @@ void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);
void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance); void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance);
void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, string color = "255,0,0"); void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, string color = "255,0,0");
void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam1, void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,
const RefVector &refs, bool useEditDistance); const RefVector &refs, bool useEditDistance);
void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts, void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,
...@@ -153,7 +154,7 @@ void ShowHelp(void) { ...@@ -153,7 +154,7 @@ void ShowHelp(void) {
cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl; cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl;
cerr << "\t\t- Default for BED is to use mapping quality." << endl; cerr << "\t\t- Default for BED is to use mapping quality." << endl;
cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl; cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl;
cerr << "\t\t of the two mapping qualities for the pair." << endl; cerr << "\t\t the two mapping qualities for the pair." << endl;
cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl; cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl;
cerr << "\t\t distance from the two mates is reported." << endl << endl; cerr << "\t\t distance from the two mates is reported." << endl << endl;
...@@ -394,7 +395,16 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec ...@@ -394,7 +395,16 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec
cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
exit(1); exit(1);
} }
} }
// swap the ends if necessary
if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {
swap(chrom1, chrom2);
swap(start1, start2);
swap(end1, end2);
swap(strand1, strand2);
if (useEditDistance == true) swap(editDistance1, editDistance2);
}
// report BEDPE using min mapQuality // report BEDPE using min mapQuality
if (useEditDistance == false) { if (useEditDistance == false) {
...@@ -422,6 +432,7 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec ...@@ -422,6 +432,7 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec
} }
} }
// deprecated. // deprecated.
bool IsCorrectMappingForBEDPE (const BamAlignment &bam) { bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {
......
...@@ -232,7 +232,7 @@ void ShowHelp(void) { ...@@ -232,7 +232,7 @@ void ShowHelp(void) {
cerr << "\t-wao\t" << "Write the original A and B entries plus the number of base" << endl; cerr << "\t-wao\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl; cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlapping features restricted by -f and -r." << endl; cerr << "\t\t- Overlapping features restricted by -f and -r." << endl;
cerr << "\t\t However, A features w/o overlap are also reported" << endl << endl; cerr << "\t\t However, A features w/o overlap are also reported" << endl;
cerr << "\t\t with a NULL B feature and overlap = 0." << endl << endl; cerr << "\t\t with a NULL B feature and overlap = 0." << endl << endl;
cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl; cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
......
...@@ -178,11 +178,11 @@ public: ...@@ -178,11 +178,11 @@ public:
// return true if at least one overlap was found. otherwise, return false. // return true if at least one overlap was found. otherwise, return false.
bool FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand, bool FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand,
bool forceStrand, float overlapFraction); bool forceStrand, float overlapFraction = 0.0);
// return true if at least one __reciprocal__ overlap was found. otherwise, return false. // return true if at least one __reciprocal__ overlap was found. otherwise, return false.
bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand, bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand,
bool forceStrand, float overlapFraction); bool forceStrand, float overlapFraction = 0.0);
// Given a chrom, start, end and strand for a single feature, // Given a chrom, start, end and strand for a single feature,
......
...@@ -9,14 +9,14 @@ BIN_DIR = ../../bin/ ...@@ -9,14 +9,14 @@ BIN_DIR = ../../bin/
# ------------------- # -------------------
# define our includes # define our includes
# ------------------- # -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/gzstream/
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= windowMain.cpp windowBed.cpp SOURCES= windowMain.cpp windowBed.cpp
OBJECTS= $(SOURCES:.cpp=.o) OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o _EXT_OBJECTS=bedFile.o lineFileUtilities.o BamReader.o BamWriter.o BGZF.o gzstream.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= windowBed PROGRAM= windowBed
...@@ -37,7 +37,8 @@ $(BUILT_OBJECTS): $(SOURCES) ...@@ -37,7 +37,8 @@ $(BUILT_OBJECTS): $(SOURCES)
$(EXT_OBJECTS): $(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream.o/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
clean: clean:
@echo "Cleaning up." @echo "Cleaning up."
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
/* /*
Constructor Constructor
*/ */
BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, bool anyHit, bool noHit,
bool &writeCount, bool &strandWindows, bool &matchOnStrand) { bool writeCount, bool strandWindows, bool matchOnStrand, bool bamInput, bool bamOutput) {
_bedAFile = bedAFile; _bedAFile = bedAFile;
_bedBFile = bedBFile; _bedBFile = bedBFile;
...@@ -30,12 +30,26 @@ BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rig ...@@ -30,12 +30,26 @@ BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rig
_writeCount = writeCount; _writeCount = writeCount;
_strandWindows = strandWindows; _strandWindows = strandWindows;
_matchOnStrand = matchOnStrand; _matchOnStrand = matchOnStrand;
_bamInput = bamInput;
_bamOutput = bamOutput;
_bedA = new BedFile(bedAFile); _bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile); _bedB = new BedFile(bedBFile);
// do the work // dealing with a proper file
WindowIntersectBed(); if (_bedA->bedFile != "stdin") {
if (_bamInput == false)
WindowIntersectBed();
else
WindowIntersectBam(_bedA->bedFile);
}
// reading from stdin
else {
if (_bamInput == false)
WindowIntersectBed();
else
WindowIntersectBam("stdin");
}
} }
...@@ -48,7 +62,7 @@ BedWindow::~BedWindow(void) { ...@@ -48,7 +62,7 @@ BedWindow::~BedWindow(void) {
void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) { void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) {
/* /*
Adjust the start and end of a based on the requested window Adjust the start and end of a based on the requested window
...@@ -58,31 +72,8 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) { ...@@ -58,31 +72,8 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
// according to the slop requested (slop = 0 by default) // according to the slop requested (slop = 0 by default)
int aFudgeStart = 0; int aFudgeStart = 0;
int aFudgeEnd; int aFudgeEnd;
AddWindow(a, aFudgeStart, aFudgeEnd);
// Does the user want to treat the windows based on strand?
// If so,
// if "+", then left is left and right is right
// if "-", the left is right and right is left.
if (_strandWindows) {
if (a.strand == "+") {
if ((a.start - _leftSlop) > 0) aFudgeStart = a.start - _leftSlop;
else aFudgeStart = 0;
aFudgeEnd = a.end + _rightSlop;
}
else {
if ((a.start - _rightSlop) > 0) aFudgeStart = a.start - _rightSlop;
else aFudgeStart = 0;
aFudgeEnd = a.end + _leftSlop;
}
}
// If not, add the windows irrespective of strand
else {
if ((a.start - _leftSlop) > 0) aFudgeStart = a.start - _leftSlop;
else aFudgeStart = 0;
aFudgeEnd = a.end + _rightSlop;
}
/* /*
Now report the hits (if any) based on the window around a. Now report the hits (if any) based on the window around a.
*/ */
...@@ -122,6 +113,19 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) { ...@@ -122,6 +113,19 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
} }
} }
bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) {
// update the current feature's start and end
// according to the slop requested (slop = 0 by default)
int aFudgeStart = 0;
int aFudgeEnd;
AddWindow(a, aFudgeStart, aFudgeEnd);
bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnStrand);
return overlapsFound;
}
void BedWindow::WindowIntersectBed() { void BedWindow::WindowIntersectBed() {
...@@ -148,3 +152,99 @@ void BedWindow::WindowIntersectBed() { ...@@ -148,3 +152,99 @@ void BedWindow::WindowIntersectBed() {
} }
void BedWindow::WindowIntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// open our BAM writer
writer.Open("stdout", header, refs);
}
vector<BED> hits; // vector of potential hits
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
bool overlapsFound;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
overlapsFound = FindOneOrMoreWindowOverlaps(a);
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true)
writer.SaveAlignment(bam);
}
}
else {
FindWindowOverlaps(a, hits);
hits.clear();
}
}
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
void BedWindow::AddWindow(const BED &a, int &fudgeStart, int &fudgeEnd) {
// Does the user want to treat the windows based on strand?
// If so,
// if "+", then left is left and right is right
// if "-", the left is right and right is left.
if (_strandWindows) {
if (a.strand == "+") {
if ((a.start - _leftSlop) > 0) fudgeStart = a.start - _leftSlop;
else fudgeStart = 0;
fudgeEnd = a.end + _rightSlop;
}
else {
if ((a.start - _rightSlop) > 0) fudgeStart = a.start - _rightSlop;
else fudgeStart = 0;
fudgeEnd = a.end + _leftSlop;
}
}
// If not, add the windows irrespective of strand
else {
if ((a.start - _leftSlop) > 0) fudgeStart = a.start - _leftSlop;
else fudgeStart = 0;
fudgeEnd = a.end + _rightSlop;
}
}
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
#ifndef WINDOWBED_H #ifndef WINDOWBED_H
#define WINDOWBED_H #define WINDOWBED_H
#include "BamReader.h"
#include "BamWriter.h"
#include "BamAux.h"
using namespace BamTools;
#include "bedFile.h" #include "bedFile.h"
#include <vector> #include <vector>
#include <iostream> #include <iostream>
...@@ -27,8 +32,8 @@ class BedWindow { ...@@ -27,8 +32,8 @@ class BedWindow {
public: public:
// constructor // constructor
BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, bool anyHit, bool noHit,
bool &writeCount, bool &strandWindows, bool &matchOnStrand); bool writeCount, bool strandWindows, bool matchOnStrand, bool bamInput, bool bamOutput);
// destructor // destructor
~BedWindow(void); ~BedWindow(void);
...@@ -44,13 +49,18 @@ private: ...@@ -44,13 +49,18 @@ private:
bool _noHit; bool _noHit;
bool _strandWindows; bool _strandWindows;
bool _matchOnStrand; bool _matchOnStrand;
bool _bamInput;
bool _bamOutput;
// instance of a bed file class. // instance of a bed file class.
BedFile *_bedA, *_bedB; BedFile *_bedA, *_bedB;
// methods // methods
void WindowIntersectBed(); void WindowIntersectBed();
void FindWindowOverlaps(BED &a, vector<BED> &hits); void WindowIntersectBam(string bamFile);
void FindWindowOverlaps(const BED &a, vector<BED> &hits);
bool FindOneOrMoreWindowOverlaps(const BED &a);
void AddWindow(const BED &a, int &fudgeStart, int &fudgeEnd);
}; };
#endif /* WINDOWBED_H */ #endif /* WINDOWBED_H */
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
using namespace std; using namespace std;
// define the version // define the version
#define PROGRAM_NAME "windowBed" #define PROGRAM_NAME "windowBed"
...@@ -24,6 +23,7 @@ using namespace std; ...@@ -24,6 +23,7 @@ using namespace std;
// function declarations // function declarations
void ShowHelp(void); void ShowHelp(void);
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
// our configuration variables // our configuration variables
...@@ -47,6 +47,8 @@ int main(int argc, char* argv[]) { ...@@ -47,6 +47,8 @@ int main(int argc, char* argv[]) {
bool haveRight = false; bool haveRight = false;
bool strandWindows = false; bool strandWindows = false;
bool matchOnStrand = false; bool matchOnStrand = false;
bool inputIsBam = false;
bool outputIsBam = true;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -74,12 +76,23 @@ int main(int argc, char* argv[]) { ...@@ -74,12 +76,23 @@ int main(int argc, char* argv[]) {
i++; i++;
} }
} }
else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
inputIsBam = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) { else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
haveBedB = true; haveBedB = true;
bedBFile = argv[i + 1]; bedBFile = argv[i + 1];
i++; i++;
} }
}
else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
outputIsBam = false;
} }
else if(PARAMETER_CHECK("-u", 2, parameterLength)) { else if(PARAMETER_CHECK("-u", 2, parameterLength)) {
anyHit = true; anyHit = true;
...@@ -161,7 +174,8 @@ int main(int argc, char* argv[]) { ...@@ -161,7 +174,8 @@ int main(int argc, char* argv[]) {
} }
if (!showHelp) { if (!showHelp) {
BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit, noHit, writeCount, strandWindows, matchOnStrand); BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit,
noHit, writeCount, strandWindows, matchOnStrand, inputIsBam, outputIsBam);
delete bi; delete bi;
return 0; return 0;
} }
...@@ -170,6 +184,7 @@ int main(int argc, char* argv[]) { ...@@ -170,6 +184,7 @@ int main(int argc, char* argv[]) {
} }
} }
void ShowHelp(void) { void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
...@@ -183,6 +198,12 @@ void ShowHelp(void) { ...@@ -183,6 +198,12 @@ void ShowHelp(void) {
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl;
cerr << "Options: " << endl; cerr << "Options: " << endl;
cerr << "\t-abam\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;
cerr << "\t-bed\t" << "When using BAM input (-abam), write output as BED. The default" << endl;
cerr << "\t\tis to write output in BAM when using -abam." << endl << endl;
cerr << "\t-w\t" << "Base pairs added upstream and downstream of each entry" << endl; cerr << "\t-w\t" << "Base pairs added upstream and downstream of each entry" << endl;
cerr << "\t\tin A when searching for overlaps in B." << endl; cerr << "\t\tin A when searching for overlaps in B." << endl;
cerr << "\t\t- Creates symterical \"windows\" around A." << endl; cerr << "\t\t- Creates symterical \"windows\" around A." << endl;
...@@ -220,5 +241,4 @@ void ShowHelp(void) { ...@@ -220,5 +241,4 @@ void ShowHelp(void) {
// end the program here // end the program here
exit(1); exit(1);
}
} \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment