Commit 4b5d122d authored by Aaron Quinlan's avatar Aaron Quinlan
Browse files

Merge pull request #15 from nkindlon/master

Refactored Context class into Base and Derived classes for cleaner futur...
parents 393fc3b9 434901e5
......@@ -11,14 +11,14 @@
******************************************************************************/
#include "intersectFile.h"
#include "Context.h"
#include "ContextIntersect.h"
#include "FileRecordMgr.h"
#include "NewChromsweep.h"
#include "BinTree.h"
#include "RecordOutputMgr.h"
FileIntersect::FileIntersect(Context *context)
FileIntersect::FileIntersect(ContextIntersect *context)
: _context(context),
_blockMgr(NULL),
_recordOutputMgr(NULL)
......@@ -37,16 +37,6 @@ FileIntersect::~FileIntersect(void) {
}
void FileIntersect::processHits(RecordKeyList &hits) {
// if (hits.getKey()->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
// RecordKeyList blockList(hits.getKey());
// bool deleteBlocks = false;
// _blockMgr->getBlocks(blockList, deleteBlocks);
// _recordOutputMgr->printRecord(hits, &blockList);
// if (deleteBlocks) {
// _blockMgr->deleteBlocks(blockList);
// }
// return;
// }
_recordOutputMgr->printRecord(hits);
}
......
......@@ -18,20 +18,20 @@ using namespace std;
using namespace std;
class Context;
class ContextIntersect;
class BlockMgr;
class RecordOutputMgr;
class FileIntersect {
public:
FileIntersect(Context *context);
FileIntersect(ContextIntersect *context);
~FileIntersect(void);
bool intersectFiles();
private:
Context *_context;
ContextIntersect *_context;
Record *_queryRec;
Record *_databaseRec;
BlockMgr *_blockMgr;
......
......@@ -12,7 +12,7 @@
using namespace std;
#include "intersectFile.h"
#include "Context.h"
#include "ContextIntersect.h"
// define our program name
#define PROGRAM_NAME "bedtools intersect"
......@@ -21,7 +21,7 @@ void intersect_help(void);
int intersect_main(int argc, char* argv[]) {
Context *context = new Context();
ContextIntersect *context = new ContextIntersect();
if (!context->parseCmdArgs(argc, argv, 1) || context->getShowHelp() || !context->isValidState()) {
if (!context->getErrorMsg().empty()) {
cerr << context->getErrorMsg() << endl;
......
using namespace std;
#include "Context.h"
#include "ContextIntersect.h"
#include "FileRecordMgr.h"
#include <iostream>
#include <cstdio>
......@@ -128,7 +128,7 @@ int nek_sandbox1_main(int argc,char** argv)
//
// return 0;
//
Context context;
ContextIntersect context;
context.addInputFile(argv[1]);
context.setSortedInput(true);
// context.setObeySplits(true);
......
......@@ -6,7 +6,6 @@
#include "ParseTools.h"
#include <sys/stat.h>
#include <ctime>
#include "Context.h"
const string RegressTest::_hardOptsCmd = "HARD_OPTIONS";
const string RegressTest::_correctCmd = "CORRECT";
......
......@@ -6,7 +6,7 @@
*/
#include "SampleFile.h"
#include "Context.h"
#include "ContextSample.h"
#include "FileRecordMgr.h"
#include "RecordOutputMgr.h"
......@@ -14,7 +14,7 @@ static const bool SampleRecordLtFn(const Record *rec1, const Record *rec2) {
return (*rec1 < *rec2);
}
SampleFile::SampleFile(Context *context)
SampleFile::SampleFile(ContextSample *context)
: _context(context),
_inputFile(NULL),
_outputMgr(NULL),
......
......@@ -10,7 +10,7 @@
using namespace std;
#include "Context.h"
#include "ContextSample.h"
#include "Record.h"
#include <vector>
......@@ -20,12 +20,12 @@ class RecordOutputMgr;
class SampleFile {
public:
SampleFile(Context *context);
SampleFile(ContextSample *context);
~SampleFile();
bool takeSample();
private:
Context *_context;
ContextSample *_context;
FileRecordMgr *_inputFile;
RecordOutputMgr *_outputMgr;
vector<Record *> _samples;
......
......@@ -6,7 +6,7 @@
*/
#include <iostream>
#include "Context.h"
#include "ContextSample.h"
#include "SampleFile.h"
#define PROGRAM_NAME "bedtools sample"
......@@ -15,7 +15,7 @@ void sample_help(void);
int sample_main(int argc, char **argv)
{
Context *context = new Context();
ContextSample *context = new ContextSample();
if (!context->parseCmdArgs(argc, argv, 1) || context->getShowHelp() || !context->isValidState()) {
if (!context->getErrorMsg().empty()) {
cerr << context->getErrorMsg() << endl;
......
......@@ -2,7 +2,7 @@
#include "FileRecordMgr.h"
BinTree::BinTree(int databaseFileIdx, Context *context)
BinTree::BinTree(int databaseFileIdx, ContextIntersect *context)
: _databaseFileIdx(databaseFileIdx),
_context(context),
_binOffsetsExtended(NULL),
......
......@@ -17,24 +17,23 @@ using namespace std;
#include "QuickString.h"
#include "RecordKeyList.h"
#include "Context.h"
#include "ContextIntersect.h"
class FileRecordMgr;
class Record;
class BinTree {
public:
BinTree(int databaseFileIdx, Context *context);
BinTree(int databaseFileIdx, ContextIntersect *context);
~BinTree();
bool loadDB();
void getHits(Record *record, RecordKeyList &hitSet);
private:
int _databaseFileIdx;
Context *_context;
ContextIntersect *_context;
//
// BIN HANDLING
......
/*
* ContextBase.cpp
*
* Created on: Feb 12, 2013
* Author: nek3d
*/
#include "ContextBase.h"
#include <unistd.h>
#include <sys/types.h>
ContextBase::ContextBase()
:
_program(UNSPECIFIED_PROGRAM),
_useMergedIntervals(false),
_genomeFile(NULL),
_outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE),
_outputTypeDetermined(false),
_skipFirstArgs(0),
_showHelp(false),
_obeySplits(false),
_uncompressedBam(false),
_useBufferedOutput(true),
_anyHit(false),
_noHit(false),
_writeA(false),
_writeB(false),
_leftJoin(false),
_writeCount(false),
_writeOverlap(false),
_writeAllOverlap(false),
_haveFraction(false),
_overlapFraction(1E-9),
_reciprocal(false),
_sameStrand(false),
_diffStrand(false),
_sortedInput(false),
_printHeader(false),
_printable(true),
_explicitBedOutput(false),
_queryFileIdx(-1),
_databaseFileIdx(-1),
_bamHeaderAndRefIdx(-1),
_maxNumDatabaseFields(0),
_useFullBamTags(false),
_reportCount(false),
_maxDistance(0),
_reportNames(false),
_reportScores(false),
_numOutputRecords(0),
_hasConstantSeed(false),
_seed(0),
_forwardOnly(false),
_reverseOnly(false)
{
_programNames["intersect"] = INTERSECT;
_programNames["sample"] = SAMPLE;
_validScoreOps.insert("sum");
_validScoreOps.insert("max");
_validScoreOps.insert("min");
_validScoreOps.insert("mean");
_validScoreOps.insert("mode");
_validScoreOps.insert("median");
_validScoreOps.insert("antimode");
_validScoreOps.insert("collapse");
}
ContextBase::~ContextBase()
{
if (_genomeFile != NULL) {
delete _genomeFile;
_genomeFile = NULL;
}
}
bool ContextBase::determineOutputType() {
if (_outputTypeDetermined) {
return true;
}
//test whether output should be BED or BAM.
//If the user explicitly requested BED, then it's BED.
if (getExplicitBedOutput()) {
setOutputFileType(FileRecordTypeChecker::SINGLE_LINE_DELIM_TEXT_FILE_TYPE);
_outputTypeDetermined = true;
return true;
}
//Otherwise, if there are any BAM files in the input,
//then the output should be BAM.
for (_i = 0; _i < (int)_inputFiles.size(); _i++) {
if (_inputFiles[_i]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) {
setOutputFileType(FileRecordTypeChecker::BAM_FILE_TYPE);
_bamHeaderAndRefIdx = _i;
_outputTypeDetermined = true;
return true;
}
}
//Okay, it's bed.
setOutputFileType(FileRecordTypeChecker::SINGLE_LINE_DELIM_TEXT_FILE_TYPE);
_outputTypeDetermined = true;
return true;
}
void ContextBase::openGenomeFile(const QuickString &genomeFilename)
{
_genomeFile = new NewGenomeFile(genomeFilename.c_str());
}
void ContextBase::openGenomeFile(const BamTools::RefVector &refVector)
{
_genomeFile = new NewGenomeFile(refVector);
}
bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
_argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
}
if (strcmp(_argv[_i], "-i") == 0) {
if (!handle_i()) return false;
}
else if (strcmp(_argv[_i], "-g") == 0) {
if (!handle_g()) return false;
}
else if ((strcmp(_argv[_i], "-h") == 0) || (strcmp(_argv[_i], "--help") == 0)) {
if (!handle_h()) return false;
}
else if (strcmp(_argv[_i], "-split") == 0) {
if (!handle_split()) return false;
}
else if (strcmp(_argv[_i], "-bed") == 0) {
if (!handle_bed()) return false;
}
else if (strcmp(_argv[_i], "-ubam") == 0) {
if (!handle_ubam()) return false;
}
else if (strcmp(_argv[_i], "-fbam") == 0) {
if (!handle_fbam()) return false;
}
else if(strcmp(_argv[_i], "-sorted") == 0) {
if (!handle_sorted()) return false;
}
else if (strcmp(_argv[_i], "-nobuf") == 0) {
if (!handle_nobuf()) return false;
}
else if (strcmp(_argv[_i], "-header") == 0) {
if (!handle_header()) return false;
}
else if (strcmp(_argv[_i], "-n") == 0) {
if (!handle_n()) return false;
}
else if (strcmp(_argv[_i], "-seed") == 0) {
if (!handle_seed()) return false;
}
}
return true;
}
bool ContextBase::isValidState()
{
return cmdArgsValid();
}
bool ContextBase::cmdArgsValid()
{
bool retval = true;
for (_i = _skipFirstArgs; _i < _argc; _i++) {
if (!isUsed(_i - _skipFirstArgs)) {
_errorMsg += "\n***** ERROR: Unrecognized parameter: ";
_errorMsg += _argv[_i];
_errorMsg += " *****";
retval = false;
}
}
return retval;
}
int ContextBase::getBamHeaderAndRefIdx() {
if (_bamHeaderAndRefIdx != -1) {
//already found which BAM file to use for the header
return _bamHeaderAndRefIdx;
}
if (_inputFiles[_queryFileIdx]._fileType == FileRecordTypeChecker::BAM_FILE_TYPE) {
_bamHeaderAndRefIdx = _queryFileIdx;
} else {
_bamHeaderAndRefIdx = _databaseFileIdx;
}
return _bamHeaderAndRefIdx;
}
int ContextBase::getUnspecifiedSeed()
{
// thanks to Rob Long for the tip.
_seed = (unsigned)time(0)+(unsigned)getpid();
srand(_seed);
return _seed;
}
bool ContextBase::handle_bed()
{
setExplicitBedOutput(true);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_fbam()
{
setUseFullBamTags(true);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_g()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -g option given, but no genome file specified. *****";
return false;
}
openGenomeFile(_argv[_i+1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_h()
{
setShowHelp(true);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_header()
{
setPrintHeader(true);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_i()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -i option given, but no input file specified. *****";
return false;
}
addInputFile(_argv[_i+1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_n()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -n option given, but no number of output records specified. *****";
return false;
}
setNumOutputRecords(atoi(_argv[_i + 1]));
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_nobuf()
{
setUseBufferedOutput(false);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_seed()
{
if (_argc <= _i+1) {
_errorMsg = "\n***** ERROR: -seed option given, but no seed specified. *****";
return false;
}
_hasConstantSeed = true;
_seed = atoi(_argv[_i+1]);
srand(_seed);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_split()
{
setObeySplits(true);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_sorted()
{
setSortedInput(true);
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextBase::handle_ubam()
{
setUncompressedBam(true);
markUsed(_i - _skipFirstArgs);
return true;
}
/*
* ContextBase.h
*
* Created on: Feb 11, 2013
* Author: nek3d
*/
#ifndef CONTEXTBASE_H_
#define CONTEXTBASE_H_
// The Context class handles the settings for an operation,
// such as merge, intersect, jaccard, etc.
//
// Settings include the input and output parameters, such as input
// files, file types (if explicitly provided), genome files,
// run options, output format, etc.
#include <cstdlib>
#include "version.h"
#include "BedtoolsTypes.h"
#include "FileRecordTypeChecker.h"
#include "NewGenomeFile.h"
#include "api/BamReader.h"
#include "api/BamAux.h"
class ContextBase {
public:
ContextBase();
virtual ~ContextBase();
typedef FileRecordTypeChecker::FILE_TYPE ContextFileType;
typedef FileRecordTypeChecker::RECORD_TYPE ContextRecordType;
typedef enum {UNSPECIFIED_PROGRAM, INTERSECT, WINDOW, CLOSEST, COVERAGE, MAP, GENOMECOV, MERGE, CLUSTER,
COMPLEMENT, SUBTRACT, SLOP, FLANK, SORT, RANDOM, SAMPLE, SHUFFLE, ANNOTATE, MULTIINTER, UNIONBEDG, PAIRTOBED,
PAIRTOPAIR,BAMTOBED, BEDTOBAM, BEDTOFASTQ, BEDPETOBAM, BED12TOBED6, GETFASTA, MASKFASTA, NUC,
MULTICOV, TAG, JACCARD, OVERLAP, IGV, LINKS,MAKEWINDOWS, GROUPBY, EXPAND } PROGRAM_TYPE;
PROGRAM_TYPE getProgram() const { return _program; }
void setProgram(PROGRAM_TYPE program) { _program = program; }
void addInputFile(const QuickString &inputFile,
ContextFileType explicitFileType = FileRecordTypeChecker::UNKNOWN_FILE_TYPE,
ContextRecordType explicitRecordType = FileRecordTypeChecker::UNKNOWN_RECORD_TYPE) {
_inputFiles.push_back(FileEntryType(inputFile, explicitFileType, explicitRecordType));
}
int getNumInputFiles() const { return _inputFiles.size(); }
const QuickString &getInputFileName(int fileNum) const { return _inputFiles[fileNum]._fileName; }
ContextFileType getInputFileType(int fileNum) const { return _inputFiles[fileNum]._fileType; }
void setInputFileType(int fileNum, ContextFileType fileType) { _inputFiles[fileNum]._fileType = fileType; }
ContextRecordType getInputRecordType(int fileNum) const { return _inputFiles[fileNum]._recordType; }
void setInputRecordType(int fileNum, ContextRecordType recordType) { _inputFiles[fileNum]._recordType = recordType; }
//HERE ARE SOME SIMPLER VERSIONS OF THE ABOVE FOR APPS THAT HAVE ONLY ONE INPUT FILE
const QuickString &getInputFileName() const { return _inputFiles[0]._fileName; }
ContextFileType getInputFileType() const { return _inputFiles[0]._fileType; }
void setInputFileType(ContextFileType fileType) { _inputFiles[0]._fileType = fileType; }
ContextRecordType getInputRecordType() const { return _inputFiles[0]._recordType; }
void setInputRecordType(ContextRecordType recordType) { _inputFiles[0]._recordType = recordType; }
int getInputFileIdx() const { return 0; }
virtual bool determineOutputType();
const QuickString &getHeader(int fileIdx) { return _headers[fileIdx]; }
void setHeader(int fileIdx, const QuickString &header) { _headers[fileIdx] = header; }
const BamTools::RefVector &getReferences(int fileIdx) { return _references[fileIdx]; }
void setReferences(int fileIdx, const BamTools::RefVector &refs) { _references[fileIdx] = refs; }
int getBamHeaderAndRefIdx(); //return idx of 1st query that is BAM. If none, first DB that is BAM.
bool getUseMergedIntervals() const { return _useMergedIntervals; }
void setUseMergedIntervals(bool val) { _useMergedIntervals = val; }
void openGenomeFile(const QuickString &genomeFilename);
void openGenomeFile(const BamTools::RefVector &refVector);
bool hasGenomeFile() const { return _genomeFile != NULL; }
NewGenomeFile *getGenomeFile() const { return _genomeFile; }
void setOutputFileType(ContextFileType fileType) { _outputFileType = fileType; }
ContextFileType getOutputFileType() const { return _outputFileType; }