Skip to content
Snippets Groups Projects
Commit 7b52f3e3 authored by Aaron's avatar Aaron
Browse files

Multiple changes.

	1. Added BAM support to windowBed.
	2. Forced correct BEDPE ordering for bamToBed.
	3. Added default overlapFraction = 0.0 to bedFile prototype.
parent 2232f606
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@
using namespace BamTools;
#include <vector>
#include <algorithm> // for swap()
#include <iostream>
#include <fstream>
#include <stdlib.h>
......@@ -38,7 +39,7 @@ void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);
void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance);
void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, string color = "255,0,0");
void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam1,
void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,
const RefVector &refs, bool useEditDistance);
void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,
......@@ -153,7 +154,7 @@ void ShowHelp(void) {
cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl;
cerr << "\t\t- Default for BED is to use mapping quality." << endl;
cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl;
cerr << "\t\t of the two mapping qualities for the pair." << endl;
cerr << "\t\t the two mapping qualities for the pair." << endl;
cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl;
cerr << "\t\t distance from the two mates is reported." << endl << endl;
......@@ -394,7 +395,16 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec
cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
exit(1);
}
}
}
// swap the ends if necessary
if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {
swap(chrom1, chrom2);
swap(start1, start2);
swap(end1, end2);
swap(strand1, strand2);
if (useEditDistance == true) swap(editDistance1, editDistance2);
}
// report BEDPE using min mapQuality
if (useEditDistance == false) {
......@@ -422,6 +432,7 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec
}
}
// deprecated.
bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {
......
......@@ -232,7 +232,7 @@ void ShowHelp(void) {
cerr << "\t-wao\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlapping features restricted by -f and -r." << endl;
cerr << "\t\t However, A features w/o overlap are also reported" << endl << endl;
cerr << "\t\t However, A features w/o overlap are also reported" << endl;
cerr << "\t\t with a NULL B feature and overlap = 0." << endl << endl;
cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
......
......@@ -178,11 +178,11 @@ public:
// return true if at least one overlap was found. otherwise, return false.
bool FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand,
bool forceStrand, float overlapFraction);
bool forceStrand, float overlapFraction = 0.0);
// return true if at least one __reciprocal__ overlap was found. otherwise, return false.
bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand,
bool forceStrand, float overlapFraction);
bool forceStrand, float overlapFraction = 0.0);
// Given a chrom, start, end and strand for a single feature,
......
......@@ -9,14 +9,14 @@ BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/gzstream/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= windowMain.cpp windowBed.cpp
OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o
_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamReader.o BamWriter.o BGZF.o gzstream.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= windowBed
......@@ -37,7 +37,8 @@ $(BUILT_OBJECTS): $(SOURCES)
$(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream.o/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
clean:
@echo "Cleaning up."
......
......@@ -16,8 +16,8 @@
/*
Constructor
*/
BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit,
bool &writeCount, bool &strandWindows, bool &matchOnStrand) {
BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, bool anyHit, bool noHit,
bool writeCount, bool strandWindows, bool matchOnStrand, bool bamInput, bool bamOutput) {
_bedAFile = bedAFile;
_bedBFile = bedBFile;
......@@ -30,12 +30,26 @@ BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rig
_writeCount = writeCount;
_strandWindows = strandWindows;
_matchOnStrand = matchOnStrand;
_bamInput = bamInput;
_bamOutput = bamOutput;
_bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile);
// do the work
WindowIntersectBed();
// dealing with a proper file
if (_bedA->bedFile != "stdin") {
if (_bamInput == false)
WindowIntersectBed();
else
WindowIntersectBam(_bedA->bedFile);
}
// reading from stdin
else {
if (_bamInput == false)
WindowIntersectBed();
else
WindowIntersectBam("stdin");
}
}
......@@ -48,7 +62,7 @@ BedWindow::~BedWindow(void) {
void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) {
/*
Adjust the start and end of a based on the requested window
......@@ -58,31 +72,8 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
// according to the slop requested (slop = 0 by default)
int aFudgeStart = 0;
int aFudgeEnd;
AddWindow(a, aFudgeStart, aFudgeEnd);
// Does the user want to treat the windows based on strand?
// If so,
// if "+", then left is left and right is right
// if "-", the left is right and right is left.
if (_strandWindows) {
if (a.strand == "+") {
if ((a.start - _leftSlop) > 0) aFudgeStart = a.start - _leftSlop;
else aFudgeStart = 0;
aFudgeEnd = a.end + _rightSlop;
}
else {
if ((a.start - _rightSlop) > 0) aFudgeStart = a.start - _rightSlop;
else aFudgeStart = 0;
aFudgeEnd = a.end + _leftSlop;
}
}
// If not, add the windows irrespective of strand
else {
if ((a.start - _leftSlop) > 0) aFudgeStart = a.start - _leftSlop;
else aFudgeStart = 0;
aFudgeEnd = a.end + _rightSlop;
}
/*
Now report the hits (if any) based on the window around a.
*/
......@@ -122,6 +113,19 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
}
}
bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) {
// update the current feature's start and end
// according to the slop requested (slop = 0 by default)
int aFudgeStart = 0;
int aFudgeEnd;
AddWindow(a, aFudgeStart, aFudgeEnd);
bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnStrand);
return overlapsFound;
}
void BedWindow::WindowIntersectBed() {
......@@ -148,3 +152,99 @@ void BedWindow::WindowIntersectBed() {
}
void BedWindow::WindowIntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// open our BAM writer
writer.Open("stdout", header, refs);
}
vector<BED> hits; // vector of potential hits
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
bool overlapsFound;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
overlapsFound = FindOneOrMoreWindowOverlaps(a);
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true)
writer.SaveAlignment(bam);
}
}
else {
FindWindowOverlaps(a, hits);
hits.clear();
}
}
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
void BedWindow::AddWindow(const BED &a, int &fudgeStart, int &fudgeEnd) {
// Does the user want to treat the windows based on strand?
// If so,
// if "+", then left is left and right is right
// if "-", the left is right and right is left.
if (_strandWindows) {
if (a.strand == "+") {
if ((a.start - _leftSlop) > 0) fudgeStart = a.start - _leftSlop;
else fudgeStart = 0;
fudgeEnd = a.end + _rightSlop;
}
else {
if ((a.start - _rightSlop) > 0) fudgeStart = a.start - _rightSlop;
else fudgeStart = 0;
fudgeEnd = a.end + _leftSlop;
}
}
// If not, add the windows irrespective of strand
else {
if ((a.start - _leftSlop) > 0) fudgeStart = a.start - _leftSlop;
else fudgeStart = 0;
fudgeEnd = a.end + _rightSlop;
}
}
......@@ -12,6 +12,11 @@
#ifndef WINDOWBED_H
#define WINDOWBED_H
#include "BamReader.h"
#include "BamWriter.h"
#include "BamAux.h"
using namespace BamTools;
#include "bedFile.h"
#include <vector>
#include <iostream>
......@@ -27,8 +32,8 @@ class BedWindow {
public:
// constructor
BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit,
bool &writeCount, bool &strandWindows, bool &matchOnStrand);
BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, bool anyHit, bool noHit,
bool writeCount, bool strandWindows, bool matchOnStrand, bool bamInput, bool bamOutput);
// destructor
~BedWindow(void);
......@@ -44,13 +49,18 @@ private:
bool _noHit;
bool _strandWindows;
bool _matchOnStrand;
bool _bamInput;
bool _bamOutput;
// instance of a bed file class.
BedFile *_bedA, *_bedB;
// methods
void WindowIntersectBed();
void FindWindowOverlaps(BED &a, vector<BED> &hits);
void WindowIntersectBam(string bamFile);
void FindWindowOverlaps(const BED &a, vector<BED> &hits);
bool FindOneOrMoreWindowOverlaps(const BED &a);
void AddWindow(const BED &a, int &fudgeStart, int &fudgeEnd);
};
#endif /* WINDOWBED_H */
......@@ -14,7 +14,6 @@
using namespace std;
// define the version
#define PROGRAM_NAME "windowBed"
......@@ -24,6 +23,7 @@ using namespace std;
// function declarations
void ShowHelp(void);
int main(int argc, char* argv[]) {
// our configuration variables
......@@ -47,6 +47,8 @@ int main(int argc, char* argv[]) {
bool haveRight = false;
bool strandWindows = false;
bool matchOnStrand = false;
bool inputIsBam = false;
bool outputIsBam = true;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -74,12 +76,23 @@ int main(int argc, char* argv[]) {
i++;
}
}
else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
inputIsBam = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;
bedBFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
outputIsBam = false;
}
else if(PARAMETER_CHECK("-u", 2, parameterLength)) {
anyHit = true;
......@@ -161,7 +174,8 @@ int main(int argc, char* argv[]) {
}
if (!showHelp) {
BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit, noHit, writeCount, strandWindows, matchOnStrand);
BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit,
noHit, writeCount, strandWindows, matchOnStrand, inputIsBam, outputIsBam);
delete bi;
return 0;
}
......@@ -170,6 +184,7 @@ int main(int argc, char* argv[]) {
}
}
void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
......@@ -183,6 +198,12 @@ void ShowHelp(void) {
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-abam\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;
cerr << "\t-bed\t" << "When using BAM input (-abam), write output as BED. The default" << endl;
cerr << "\t\tis to write output in BAM when using -abam." << endl << endl;
cerr << "\t-w\t" << "Base pairs added upstream and downstream of each entry" << endl;
cerr << "\t\tin A when searching for overlaps in B." << endl;
cerr << "\t\t- Creates symterical \"windows\" around A." << endl;
......@@ -220,5 +241,4 @@ void ShowHelp(void) {
// end the program here
exit(1);
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment