Commit 31878f3c authored by arq5x's avatar arq5x
Browse files

merge conflicts

parents 5b47f91c 7b2a5605
......@@ -31,7 +31,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/bedpeToBam \
$(SRC_DIR)/bedToIgv \
$(SRC_DIR)/bed12ToBed6 \
$(SRC_DIR)/closestBed \
$(SRC_DIR)/closestFile \
$(SRC_DIR)/clusterBed \
$(SRC_DIR)/complementBed \
$(SRC_DIR)/coverageBed \
......
......@@ -141,9 +141,9 @@ int main(int argc, char *argv[])
else if (sub_cmd == "makewindows") return windowmaker_main(argc-1, argv+1);
else if (sub_cmd == "groupby") return groupby_main(argc-1, argv+1);
else if (sub_cmd == "expand") return expand_main(argc-1, argv+1);
else if (sub_cmd == "sample") return sample_main(argc-1, argv+1);
else if (sub_cmd == "neksb1") return nek_sandbox1_main(argc-1, argv+1);
else if (sub_cmd == "regresstest") return regress_test_main(argc, argv); //this command does need all the orig args.
else if (sub_cmd == "sample") return sample_main(argc-1, argv+1);
else if (sub_cmd == "neksb1") return nek_sandbox1_main(argc-1, argv+1);
else if (sub_cmd == "regresstest") return regress_test_main(argc, argv); //this command does need all the orig args.
// help
else if (sub_cmd == "-h" || sub_cmd == "--help" ||
sub_cmd == "-help")
......
/*****************************************************************************
closestBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "closestBed.h"
const int MAXSLOP = 256000000; // 2*MAXSLOP = 512 megabases.
// We don't want to keep looking if we
// can't find a nearby feature within 512 Mb.
const int SLOPGROWTH = 2048000;
/*
Constructor
*/
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand,
bool diffStrand, string &tieMode, bool reportDistance,
bool signDistance, string &_strandedDistMode,
bool ignoreOverlaps, bool ignoreUpstream,
bool ignoreDownstream, bool printHeader,
bool diffNames)
: _bedAFile(bedAFile)
, _bedBFile(bedBFile)
, _tieMode(tieMode)
, _sameStrand(sameStrand)
, _diffStrand(diffStrand)
, _reportDistance(reportDistance)
, _signDistance(signDistance)
, _strandedDistMode(_strandedDistMode)
, _ignoreOverlaps(ignoreOverlaps)
, _ignoreUpstream(ignoreUpstream)
, _ignoreDownstream(ignoreDownstream)
, _printHeader(printHeader)
, _diffNames(diffNames)
{
_bedA = new BedFile(_bedAFile);
_bedB = new BedFile(_bedBFile);
FindClosestBed();
}
/*
Destructor
*/
BedClosest::~BedClosest(void) {
}
void BedClosest::ReportClosestNotFound(const BED &a) {
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportNullBedTab();
cout << -1 << endl;
}
else
_bedB->reportNullBedNewLine();
}
void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
int slop = 0; // start out just looking for overlaps
// within the current bin (~128Kb)
// update the current feature's start and end
CHRPOS aFudgeStart = 0;
CHRPOS aFudgeEnd;
int numOverlaps = 0;
vector<BED> closestB;
CHRPOS minDistance = INT_MAX;
int32_t curDistance = INT_MAX;
vector<int32_t> distances;
// is there at least one feature in B on the same chrom
// as the current A feature?
if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {
while ((numOverlaps == 0) && (slop <= MAXSLOP)) {
// add some slop (starting at 0 bases) to a in hopes
// of finding a hit in B
if ((static_cast<int>(a.start) - slop) > 0)
aFudgeStart = a.start - slop;
else
aFudgeStart = 0;
if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))
aFudgeEnd = a.end + slop;
else
aFudgeEnd = 2 * MAXSLOP;
// THE HEAVY LIFTING
// search for hits with the current slop added
_bedB->allHits(a.chrom, aFudgeStart, aFudgeEnd, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
vector<BED>::const_iterator h = hits.begin();
vector<BED>::const_iterator hitsEnd = hits.end();
for (; h != hitsEnd; ++h) {
// skip the hit if the user doesn't want to allow same names.
if ((_diffNames == true) && (a.name == h->name))
continue;
// do the actual features overlap?
int s = max(a.start, h->start);
int e = min(a.end, h->end);
int overlapBases = (e - s);
// make sure we allow overlapping features.
if ((overlapBases > 0) && (_ignoreOverlaps == true))
continue;
else
numOverlaps++;
// there is overlap. make sure we allow overlapping features ()
if (overlapBases > 0) {
curDistance = 0;
if (curDistance < (int32_t) minDistance) {
closestB.clear();
distances.clear();
}
minDistance = 0;
closestB.push_back(*h);
distances.push_back(0);
}
// the hit is to the "left" of A
else if (h->end <= a.start) {
curDistance = (a.start - h->end) + 1;
if (_signDistance) {
if ((_strandedDistMode == "ref")
|| (_strandedDistMode == "a" && a.strand != "-")
|| (_strandedDistMode == "b" && h->strand == "-"))
{
// hit is "upstream" of A
if (_ignoreUpstream) {
numOverlaps--;
continue;
}
else {
curDistance = -curDistance;
}
}
else if (_ignoreDownstream) {
numOverlaps--;
continue;
}
}
if (abs(curDistance) < (int32_t) minDistance) {
minDistance = abs(curDistance);
closestB.clear();
closestB.push_back(*h);
distances.clear();
distances.push_back(curDistance);
}
else if (abs(curDistance) == (int32_t) minDistance) {
minDistance = abs(curDistance);
closestB.push_back(*h);
distances.push_back(curDistance);
}
}
// the hit is to the "right" of A
else if (h->start >= a.end) {
curDistance = (h->start - a.end) + 1;
if (_signDistance) {
if ((_strandedDistMode == "a" && a.strand == "-")
|| (_strandedDistMode == "b" && h->strand != "-"))
{
// hit is "upstream" of A
if (_ignoreUpstream) {
numOverlaps--;
continue;
}
else{
curDistance = -curDistance;
}
}
else if (_ignoreDownstream){
numOverlaps--;
continue;
}
}
if (abs(curDistance) < (int32_t) minDistance) {
minDistance = abs(curDistance);
closestB.clear();
closestB.push_back(*h);
distances.clear();
distances.push_back(curDistance);
}
else if (abs(curDistance) == (int32_t) minDistance) {
minDistance = abs(curDistance);
closestB.push_back(*h);
distances.push_back(curDistance);
}
}
}
// if no overlaps were found, we'll widen the range
// by SLOPGROWTH in each direction and search again.
slop += SLOPGROWTH;
} // while ((numOverlaps == 0) && (slop <= MAXSLOP))
// report the closest feature(s) in B to the current A feature.
// obey the user's reporting request (_tieMode)
if (numOverlaps > 0) {
if (closestB.size() == 1 ||
(_tieMode == "first" && closestB.size() > 0))
{
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportBedTab(closestB[0]);
cout << distances[0] << endl;
}
else
_bedB->reportBedNewLine(closestB[0]);
}
else {
if (_tieMode == "all") {
size_t i = 0;
vector<BED>::iterator b = closestB.begin();
for (; b != closestB.end(); ++b)
{
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportBedTab(*b);
cout << distances[i++] <<endl;
}
else
_bedB->reportBedNewLine(*b);
}
}
else if (_tieMode == "last" && closestB.size() > 0) {
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportBedTab(closestB[closestB.size()-1]);
cout << distances[distances.size() - 1]<<endl;
}
else
_bedB->reportBedNewLine(closestB[closestB.size()-1]);
}
}
}
// there were B features on the same chrom as A, but there were
// none that met the user's criteria (e.g., that it be on the
// same chrom)
else if (numOverlaps == 0) {
ReportClosestNotFound(a);
}
}
// there is no feature in B on the same chromosome as A
else {
ReportClosestNotFound(a);
}
}
void BedClosest::FindClosestBed() {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
BED a;
vector<BED> hits; // vector of potential hits
hits.reserve(100);
_bedA->Open();
// report A's header first if asked.
if (_printHeader == true) {
_bedA->PrintHeader();
}
// process each entry in A in search of the closest feature in B
while (_bedA->GetNextBed(a)) {
if (_bedA->_status == BED_VALID) {
FindWindowOverlaps(a, hits);
hits.clear();
}
}
_bedA->Close();
}
// END ClosestBed
/*****************************************************************************
closestBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef CLOSESTBED_H
#define CLOSESTBED_H
#include "bedFile.h"
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;
//************************************************
// Class methods and elements
//************************************************
class BedClosest {
public:
// constructor
BedClosest(string &bedAFile, string &bedBFile,
bool sameStrand, bool diffStrand, string &tieMode,
bool reportDistance, bool signDistance, string &strandedDistMode,
bool ignoreOverlaps, bool ignoreUpstream, bool ignoreDownstream,
bool printHeader, bool diffNames);
// destructor
~BedClosest(void);
// find the closest feature in B to A
void FindClosestBed();
private:
// data
string _bedAFile;
string _bedBFile;
string _tieMode;
bool _sameStrand;
bool _diffStrand;
bool _reportDistance;
bool _signDistance;
string _strandedDistMode;
bool _ignoreOverlaps;
bool _ignoreUpstream;
bool _ignoreDownstream;
bool _printHeader;
bool _diffNames;
BedFile *_bedA, *_bedB;
// methods
void FindWindowOverlaps(BED &, vector<BED> &);
void ReportClosestNotFound(const BED &a);
};
#endif /* CLOSEST_H */
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= closestMain.cpp closestBed.cpp closestBed.h
OBJECTS= closestMain.o closestBed.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
all: $(BUILT_OBJECTS)
.PHONY: all
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/closestMain.o $(OBJ_DIR)/closestBed.o
.PHONY: clean
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= closestMain.cpp closestFile.cpp closestFile.h
OBJECTS= closestMain.o closestFile.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
all: $(BUILT_OBJECTS)
.PHONY: all
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/closestMain.o $(OBJ_DIR)/closestFile.o
.PHONY: clean
/*
* newClosestFile.cpp
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#include "FileRecordMgr.h"
#include "RecordOutputMgr.h"
#include "closestFile.h"
#include "CloseSweep.h"
ClosestFile::ClosestFile(ContextClosest *context)
: _context(context),
_recordOutputMgr(NULL)
{
_recordOutputMgr = new RecordOutputMgr();
_recordOutputMgr->init(_context);
}
ClosestFile::~ClosestFile() {
delete _recordOutputMgr;
}
bool ClosestFile::getClosest() {
CloseSweep sweep(_context);
if (!sweep.init()) {
return false;
}
RecordKeyVector hitSet;
while (sweep.next(hitSet)) {
if (_context->reportDistance()) {
_recordOutputMgr->printClosest(hitSet, &(sweep.getDistances()));
} else {
_recordOutputMgr->printClosest(hitSet, NULL);
}
}
return true;
}
/*
* newClosestFile.h
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#ifndef NEWCLOSESTFILE_H_
#define NEWCLOSESTFILE_H_
#include "ContextClosest.h"
using namespace std;
class RecordOutputMgr;
class ClosestFile {
public:
ClosestFile(ContextClosest *context);
~ClosestFile(void);
bool getClosest();
private:
ContextClosest *_context;
RecordOutputMgr *_recordOutputMgr;
};
#endif /* NEWCLOSESTFILE_H_ */
/*****************************************************************************
closestMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "closestBed.h"
#include "version.h"
using namespace std;
// define our program name
#define PROGRAM_NAME "bedtools closest"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void closest_help(void);
int closest_main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string bedAFile;
string bedBFile;
string tieMode = "all";
string strandedDistMode = "";
bool haveBedA = false;
bool haveBedB = false;
bool haveTieMode = false;
bool sameStrand = false;
bool diffStrand = false;
bool ignoreOverlaps = false;
bool ignoreUpstream = false;
bool ignoreDownstream = false;
bool reportDistance = false;
bool signDistance = false;
bool haveStrandedDistMode = false;
bool printHeader = false;
bool diffNames = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if( (PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) closest_help();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-a", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;