Commit 31878f3c authored by arq5x's avatar arq5x

merge conflicts

parents 5b47f91c 7b2a5605
......@@ -31,7 +31,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/bedpeToBam \
$(SRC_DIR)/bedToIgv \
$(SRC_DIR)/bed12ToBed6 \
$(SRC_DIR)/closestBed \
$(SRC_DIR)/closestFile \
$(SRC_DIR)/clusterBed \
$(SRC_DIR)/complementBed \
$(SRC_DIR)/coverageBed \
......
......@@ -141,9 +141,9 @@ int main(int argc, char *argv[])
else if (sub_cmd == "makewindows") return windowmaker_main(argc-1, argv+1);
else if (sub_cmd == "groupby") return groupby_main(argc-1, argv+1);
else if (sub_cmd == "expand") return expand_main(argc-1, argv+1);
else if (sub_cmd == "sample") return sample_main(argc-1, argv+1);
else if (sub_cmd == "neksb1") return nek_sandbox1_main(argc-1, argv+1);
else if (sub_cmd == "regresstest") return regress_test_main(argc, argv); //this command does need all the orig args.
else if (sub_cmd == "sample") return sample_main(argc-1, argv+1);
else if (sub_cmd == "neksb1") return nek_sandbox1_main(argc-1, argv+1);
else if (sub_cmd == "regresstest") return regress_test_main(argc, argv); //this command does need all the orig args.
// help
else if (sub_cmd == "-h" || sub_cmd == "--help" ||
sub_cmd == "-help")
......
This diff is collapsed.
/*****************************************************************************
closestBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef CLOSESTBED_H
#define CLOSESTBED_H
#include "bedFile.h"
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;
//************************************************
// Class methods and elements
//************************************************
class BedClosest {
public:
// constructor
BedClosest(string &bedAFile, string &bedBFile,
bool sameStrand, bool diffStrand, string &tieMode,
bool reportDistance, bool signDistance, string &strandedDistMode,
bool ignoreOverlaps, bool ignoreUpstream, bool ignoreDownstream,
bool printHeader, bool diffNames);
// destructor
~BedClosest(void);
// find the closest feature in B to A
void FindClosestBed();
private:
// data
string _bedAFile;
string _bedBFile;
string _tieMode;
bool _sameStrand;
bool _diffStrand;
bool _reportDistance;
bool _signDistance;
string _strandedDistMode;
bool _ignoreOverlaps;
bool _ignoreUpstream;
bool _ignoreDownstream;
bool _printHeader;
bool _diffNames;
BedFile *_bedA, *_bedB;
// methods
void FindWindowOverlaps(BED &, vector<BED> &);
void ReportClosestNotFound(const BED &a);
};
#endif /* CLOSEST_H */
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= closestMain.cpp closestBed.cpp closestBed.h
OBJECTS= closestMain.o closestBed.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
all: $(BUILT_OBJECTS)
.PHONY: all
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/closestMain.o $(OBJ_DIR)/closestBed.o
.PHONY: clean
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= closestMain.cpp closestFile.cpp closestFile.h
OBJECTS= closestMain.o closestFile.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
all: $(BUILT_OBJECTS)
.PHONY: all
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/closestMain.o $(OBJ_DIR)/closestFile.o
.PHONY: clean
/*
* newClosestFile.cpp
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#include "FileRecordMgr.h"
#include "RecordOutputMgr.h"
#include "closestFile.h"
#include "CloseSweep.h"
ClosestFile::ClosestFile(ContextClosest *context)
: _context(context),
_recordOutputMgr(NULL)
{
_recordOutputMgr = new RecordOutputMgr();
_recordOutputMgr->init(_context);
}
ClosestFile::~ClosestFile() {
delete _recordOutputMgr;
}
bool ClosestFile::getClosest() {
CloseSweep sweep(_context);
if (!sweep.init()) {
return false;
}
RecordKeyVector hitSet;
while (sweep.next(hitSet)) {
if (_context->reportDistance()) {
_recordOutputMgr->printClosest(hitSet, &(sweep.getDistances()));
} else {
_recordOutputMgr->printClosest(hitSet, NULL);
}
}
return true;
}
/*
* newClosestFile.h
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#ifndef NEWCLOSESTFILE_H_
#define NEWCLOSESTFILE_H_
#include "ContextClosest.h"
using namespace std;
class RecordOutputMgr;
class ClosestFile {
public:
ClosestFile(ContextClosest *context);
~ClosestFile(void);
bool getClosest();
private:
ContextClosest *_context;
RecordOutputMgr *_recordOutputMgr;
};
#endif /* NEWCLOSESTFILE_H_ */
/*
* ContextClosest.cpp
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#include "ContextClosest.h"
ContextClosest::ContextClosest()
: _haveTieMode(false),
_ignoreOverlaps(false),
_ignoreUpstream(false),
_ignoreDownstream(false),
_reportDistance(false),
_signDistance(false),
_haveStrandedDistMode(false),
_diffNames(false),
_tieMode(ALL_TIES),
_strandedDistMode(REF_DIST),
_multiDbMode(EACH_DB)
{
// closest requires sorted input
setSortedInput(true);
}
ContextClosest::~ContextClosest(){
}
bool ContextClosest::parseCmdArgs(int argc, char **argv, int skipFirstArgs){
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
_argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
}
if (strcmp(_argv[_i], "-c") == 0) {
//bypass intersect's use of the -c option, because -c
//means writeCount for intersect, but means columns for map.
if (!ContextBase::handle_c()) return false;
}
else if (strcmp(_argv[_i], "-d") == 0) {
if (!handle_d()) return false;
}
else if (strcmp(_argv[_i], "-D") == 0) {
if (!handle_D()) return false;
}
else if (strcmp(_argv[_i], "-io") == 0) {
if (!handle_io()) return false;
}
else if (strcmp(_argv[_i], "-iu") == 0) {
if (!handle_iu()) return false;
}
else if (strcmp(_argv[_i], "-id") == 0) {
if (!handle_id()) return false;
}
else if (strcmp(_argv[_i], "-N") == 0) {
if (!handle_N()) return false;
}
else if (strcmp(_argv[_i], "-t") == 0) {
if (!handle_t()) return false;
}
else if (strcmp(_argv[_i], "-mdb") == 0) {
if (!handle_mdb()) return false;
}
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
bool ContextClosest::isValidState(){
if (!ContextIntersect::isValidState()) return false;
// make sure we have both input files
if (_haveTieMode && (_tieMode != ALL_TIES) && (_tieMode != FIRST_TIE)
&& (_tieMode != LAST_TIE)) {
_errorMsg = "\n*****\n*****ERROR: Request \"all\" or \"first\" or \"last\" for Tie Mode (-t)\n*****\n";
return false;
}
if (_haveStrandedDistMode && (_strandedDistMode != A_DIST) && (_strandedDistMode != B_DIST)
&& (_strandedDistMode != REF_DIST)) {
_errorMsg = "\n*****\n*****ERROR: Request \"a\" or \"b\" or \"ref\" for Stranded Distance Mode (-D)\n*****\n";
return false;
}
if (_ignoreUpstream && _ignoreDownstream) {
_errorMsg = "\n*****\n*****ERROR: Request either -iu OR -id, not both.\n*****\n";
return false;
}
if ((_ignoreUpstream || _ignoreDownstream) && ! _haveStrandedDistMode) {
_errorMsg = "\n*****\n*****ERROR: When requesting -iu or -id, you also need to specify -D.\n*****\n";
return false;
}
return true;
}
bool ContextClosest::handle_d() {
_reportDistance = true;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextClosest::handle_D() {
bool strandError = false;
if ((_i+1) < _argc) {
_reportDistance = true;
_signDistance = true;
_haveStrandedDistMode = true;
QuickString modeStr(_argv[_i + 1]);
if (modeStr == "ref") {
_strandedDistMode = REF_DIST;
} else if (modeStr == "a") {
_strandedDistMode = A_DIST;
} else if (modeStr == "b") {
_strandedDistMode = B_DIST;
} else {
strandError = true;
}
} else {
strandError = true;
}
if (!strandError) {
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
_errorMsg = "*****ERROR: -D option must be followed with \"ref\", \"a\", or \"b\"";
return false;
}
bool ContextClosest::handle_io() {
_ignoreOverlaps = true;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextClosest::handle_iu() {
_ignoreUpstream = true;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextClosest::handle_id() {
_ignoreUpstream = true;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextClosest::handle_N() {
_diffNames = true;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextClosest::handle_t()
{
bool tieError = false;
if ((_i+1) < _argc) {
_haveTieMode = true;
QuickString tieStr(_argv[_i+1]);
if (tieStr == "all") {
_tieMode = ALL_TIES;
} else if (tieStr == "first") {
_tieMode = FIRST_TIE;
} else if (tieStr == "last") {
_tieMode = LAST_TIE;
} else {
tieError = true;
}
} else {
tieError = true;
}
if (!tieError) {
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
_errorMsg = "*****ERROR: Request \"all\", \"first\", \"last\" for Tie Mode (-t)";
return false;
}
bool ContextClosest::handle_mdb()
{
bool mdbError = false;
if ((_i+1) < _argc) {
QuickString mdbStr(_argv[_i+1]);
if (mdbStr == "each") {
_multiDbMode = EACH_DB;
} else if (mdbStr == "all") {
_multiDbMode = ALL_DBS;
} else {
mdbError = true;
}
} else {
mdbError = true;
}
if (!mdbError) {
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
_errorMsg = "*****ERROR: Request \"each\" or \"last\" for Multiple Database Mode (-mdb)";
return false;
}
/*
* ContextClosest.h
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#ifndef CONTEXTCLOSEST_H_
#define CONTEXTCLOSEST_H_
#include "ContextIntersect.h"
class ContextClosest : public ContextIntersect {
public:
ContextClosest();
virtual ~ContextClosest();
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
virtual bool hasIntersectMethods() const { return true; }
virtual bool isValidState();
bool hasTieMode() const { return _haveTieMode; }
bool ignoreOverlaps() const { return _ignoreOverlaps; }
bool ignoreUpstream() const { return _ignoreUpstream; }
bool ignoreDownstream() const { return _ignoreDownstream; }
bool reportDistance() const { return _reportDistance; }
bool signDistance() const { return _signDistance; }
bool hasStrandedDistMode() const { return _haveStrandedDistMode; }
bool diffNames() const { return _diffNames; }
typedef enum { FIRST_TIE, LAST_TIE, ALL_TIES} tieModeType;
tieModeType getTieMode() const { return _tieMode; }
typedef enum { REF_DIST, A_DIST, B_DIST} strandedDistanceModeType;
strandedDistanceModeType getStrandedDistMode() const { return _strandedDistMode; }
typedef enum { EACH_DB, ALL_DBS } multiDbModeType;
multiDbModeType getMultiDbMode() const { return _multiDbMode; }
private:
bool _haveTieMode;
bool _ignoreOverlaps;
bool _ignoreUpstream;
bool _ignoreDownstream;
bool _reportDistance;
bool _signDistance;
bool _haveStrandedDistMode;
bool _diffNames;
tieModeType _tieMode;
strandedDistanceModeType _strandedDistMode;
multiDbModeType _multiDbMode;
bool handle_d();
bool handle_D();
bool handle_io();
bool handle_iu();
bool handle_id();
bool handle_N();
bool handle_t();
bool handle_mdb();
};
#endif /* CONTEXTCLOSEST_H_ */
......@@ -19,9 +19,10 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextMap.cpp \
ContextMap.h ContextSample.cpp ContextSample.h ContextMerge.h ContextMerge.cpp ContextJaccard.h ContextJaccard.cpp ContextFisher.h ContextFisher.cpp
OBJECTS= ContextBase.o ContextIntersect.o ContextMap.o ContextSample.o ContextMerge.o ContextJaccard.o ContextFisher.o
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextFisher.cpp ContextFisher.h ContextMap.cpp \
ContextMap.h ContextSample.cpp ContextSample.h ContextMerge.h ContextMerge.cpp ContextJaccard.h ContextJaccard.cpp \
ContextClosest.cpp ContextClosest.h
OBJECTS= ContextBase.o ContextIntersect.o ContextFisher.o ContextMap.o ContextSample.o ContextMerge.o ContextJaccard.o ContextClosest.o
_EXT_OBJECTS=ParseTools.o QuickString.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
......@@ -38,10 +39,11 @@ clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/ContextBase.o \
$(OBJ_DIR)/ContextIntersect.o \
$(OBJ_DIR)/ContextFisher.o \
$(OBJ_DIR)/ContextMap.o \
$(OBJ_DIR)/ContextSample.o \
$(OBJ_DIR)/ContextMerge.o \
$(OBJ_DIR)/ContextJaccard.o \
$(OBJ_DIR)/ContextFisher.o \
$(OBJ_DIR)/ContextClosest.o \
.PHONY: clean
......@@ -122,7 +122,7 @@ bool Record::chromAfter(const Record *other) const
bool Record::after(const Record *other) const
{
return (_chrId == other->_chrId && _startPos >= other->_endPos);
return (sameChrom(other) && _startPos >= other->_endPos);
}
bool Record::intersects(const Record *record,
......
This diff is collapsed.
/*
* CloseSweep.h
*
* Created on: Sep 25, 2014
* Author: nek3d
*/
#ifndef CLOSESWEEP_H_
#define CLOSESWEEP_H_
#include "NewChromsweep.h"
class ContextClosest;
class CloseSweep : public NewChromSweep {
public:
CloseSweep(ContextClosest *context);
~CloseSweep(void);
const vector<int> &getDistances() { return _finalDistances; }
private:
ContextClosest *_context;
typedef vector<const Record * > distRecVecType;
vector<distRecVecType *> _minUpstreamRecs;
vector<int> _minUpstreamDist;
vector<distRecVecType *> _minDownstreamRecs;
vector<int> _minDownstreamDist;
vector<distRecVecType *> _overlapRecs;
vector<int> _maxPrevLeftClosestEndPos;
vector<int> _finalDistances;
//structs to help with finding closest among all of multiple dbs.
RecordKeyVector _copyRetList;
vector<int> _copyDists;
//override these two methods from chromsweep
void masterScan(RecordKeyVector &retList);
void scanCache(int dbIdx, RecordKeyVector &retList);
bool chromChange(int dbIdx, RecordKeyVector &retList);
typedef enum { IGNORE, DELETE } rateOvlpType;
rateOvlpType considerRecord(const Record *cacheRec, int dbIdx, bool &stopScanning);
void finalizeSelections(int dbIdx, RecordKeyVector &retList);
void checkMultiDbs(RecordKeyVector &retList);
};
#endif /* CLOSESWEEP_H_ */
OBJ_DIR = ../../../obj/
BIN_DIR = ../../../bin/
UTILITIES_DIR = ../../utils/
# -------------------
# define our includes
# -------------------