Commit a1b9dcff authored by nkindlon's avatar nkindlon
Browse files

forgot intersectFile

parent 69d82a78
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= intersectMain.cpp intersectFile.cpp intersectFile.h
OBJECTS= intersectMain.o intersectFile.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= intersectFile
all: $(BUILT_OBJECTS)
.PHONY: all
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(DFLAGS) $(INCLUDES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/intersectMain.o $(OBJ_DIR)/intersectFile.o
.PHONY: clean
/*****************************************************************************
intersectFile.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "intersectFile.h"
#include "Context.h"
#include "FileRecordMgr.h"
#include "NewChromsweep.h"
#include "BinTree.h"
#include "RecordOutputMgr.h"
FileIntersect::FileIntersect(Context *context)
: _context(context),
_blockMgr(NULL),
_recordOutputMgr(NULL)
{
_blockMgr = new BlockMgr();
_blockMgr->setContext(context);
_recordOutputMgr = new RecordOutputMgr();
}
FileIntersect::~FileIntersect(void) {
if (_blockMgr != NULL) {
delete _blockMgr;
_blockMgr = NULL;
}
delete _recordOutputMgr;
}
void FileIntersect::processHits(RecordKeyList &hits) {
if (hits.getKey()->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
RecordKeyList blockList(hits.getKey());
bool deleteBlocks = false;
_blockMgr->getBlocks(blockList, deleteBlocks);
_recordOutputMgr->printRecord(hits, &blockList);
if (deleteBlocks) {
_blockMgr->deleteBlocks(blockList);
}
return;
}
_recordOutputMgr->printRecord(hits);
}
bool FileIntersect::intersectFiles()
{
if (_context->getSortedInput()) {
return processSortedFiles();
}
return processUnsortedFiles();
}
bool FileIntersect::processSortedFiles()
{
// use the chromsweep algorithm to detect overlaps on the fly.
NewChromSweep sweep(_context);
if (!sweep.init()) {
return false;
}
if (!_recordOutputMgr->init(_context)) {
return false;
}
RecordKeyList hitSet;
while (sweep.next(hitSet)) {
if (_context->getObeySplits()) {
RecordKeyList keySet(hitSet.getKey());
RecordKeyList resultSet(hitSet.getKey());
_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
processHits(resultSet);
} else {
processHits(hitSet);
}
}
return true;
}
bool FileIntersect::processUnsortedFiles()
{
const QuickString &databaseFilename = _context->getDatabaseFileName();
BinTree *binTree = new BinTree(_context->getDatabaseFileIdx(), _context);
FileRecordMgr *queryFRM = new FileRecordMgr(_context->getQueryFileIdx(), _context);
if (!queryFRM->open()) {
return false;
}
if (!binTree->loadDB()) {
fprintf(stderr, "Error: Unable to load database file %s.\n", databaseFilename.c_str());
delete binTree;
exit(1);
}
_context->determineOutputType();
_recordOutputMgr->init(_context);
while (!queryFRM->eof()) {
Record *queryRecord = queryFRM->allocateAndGetNextRecord();
if (queryRecord == NULL) {
continue;
}
RecordKeyList hitSet(queryRecord);
binTree->getHits(queryRecord, hitSet);
if (_context->getObeySplits()) {
RecordKeyList keySet(hitSet.getKey());
RecordKeyList resultSet;
_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
processHits(resultSet);
} else {
processHits(hitSet);
}
queryFRM->deleteRecord(queryRecord);
}
queryFRM->close();
//clean up.
delete queryFRM;
delete binTree;
return true;
}
/*****************************************************************************
intersectFile.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef INTERSECTFILE_H
#define INTERSECTFILE_H
using namespace std;
#include "RecordKeyList.h"
using namespace std;
class Context;
class BlockMgr;
class RecordOutputMgr;
class FileIntersect {
public:
FileIntersect(Context *context);
~FileIntersect(void);
bool intersectFiles();
private:
Context *_context;
Record *_queryRec;
Record *_databaseRec;
BlockMgr *_blockMgr;
RecordOutputMgr *_recordOutputMgr;
void processHits(RecordKeyList &hits);
bool processSortedFiles();
bool processUnsortedFiles();
};
#endif /* INTERSECTFILE_H */
/*****************************************************************************
intersectMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
using namespace std;
#include "intersectFile.h"
#include "version.h"
#include "Context.h"
// define our program name
#define PROGRAM_NAME "bedtools intersect"
void intersect_help(void);
int intersect_main(int argc, char* argv[]) {
Context *context = new Context();
context->parseCmdArgs(argc, argv, 1);
if (!context->isValidState()) {
fprintf(stderr, "%s\n", context->getErrorMsg().c_str());
intersect_help();
return 0;
}
FileIntersect *fileIntersect = new FileIntersect(context);
bool retVal = fileIntersect->intersectFiles();
delete fileIntersect;
delete context;
return retVal ? 0 : 1;
}
void intersect_help(void) {
cerr << "\nTool: bedtools intersect (aka intersectBed)" << endl;
cerr << "Version: " << VERSION << "\n";
cerr << "Summary: Report overlaps between two feature files." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-abam\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;
cerr << "\t-ubam\t" << "Write uncompressed BAM output. Default writes compressed BAM." << endl << endl;
cerr << "\t-bed\t" << "When using BAM input (-abam), write output as BED. The default" << endl;
cerr << "\t\tis to write output in BAM when using -abam." << endl << endl;
cerr << "\t-wa\t" << "Write the original entry in A for each overlap." << endl << endl;
cerr << "\t-wb\t" << "Write the original entry in B for each overlap." << endl;
cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;
cerr << "\t-loj\t" << "Perform a \"left outer join\". That is, for each feature in A" << endl;
cerr << "\t\treport each overlap with B. If no overlaps are found, " << endl;
cerr << "\t\treport a NULL feature for B." << endl << endl;
cerr << "\t-wo\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl;
cerr << "\t\t Only A features with overlap are reported." << endl << endl;
cerr << "\t-wao\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlapping features restricted by -f and -r." << endl;
cerr << "\t\t However, A features w/o overlap are also reported" << endl;
cerr << "\t\t with a NULL B feature and overlap = 0." << endl << endl;
cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-c\t" << "For each entry in A, report the number of overlaps with B." << endl;
cerr << "\t\t- Reports 0 for A entries that have no overlap with B." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;
cerr << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl;
cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl;
cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl;
cerr << "\t-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;
cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;
cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;
cerr << "\t-s\t" << "Require same strandedness. That is, only report hits in B" << endl;
cerr << "\t\tthat overlap A on the _same_ strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in B" << endl;
cerr << "\t\tthat overlap A on the _opposite_ strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
cerr << "\t-sorted\t" << "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input" << endl << endl;
cerr << "\t-header\t" << "Print the header from the A file prior to results." << endl << endl;
cerr << "Notes: " << endl;
cerr << "\t(1) When a BAM file is used for the A file, the alignment is retained if overlaps exist," << endl;
cerr << "\tand exlcuded if an overlap cannot be found. If multiple overlaps exist, they are not" << endl;
cerr << "\treported, as we are only testing for one or more overlaps." << endl << endl;
// end the program here
exit(1);
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment