Skip to content
Snippets Groups Projects
Commit 8b6ed4be authored by nkindlon's avatar nkindlon
Browse files

Merge branch 'master' of https://github.com/nkindlon/bedtools2

parents bcbd75b7 f36d6db4
No related branches found
No related tags found
No related merge requests found
Showing
with 651 additions and 587 deletions
...@@ -47,7 +47,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ ...@@ -47,7 +47,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/linksBed \ $(SRC_DIR)/linksBed \
$(SRC_DIR)/maskFastaFromBed \ $(SRC_DIR)/maskFastaFromBed \
$(SRC_DIR)/mapFile \ $(SRC_DIR)/mapFile \
$(SRC_DIR)/mergeBed \ $(SRC_DIR)/mergeFile \
$(SRC_DIR)/multiBamCov \ $(SRC_DIR)/multiBamCov \
$(SRC_DIR)/multiIntersectBed \ $(SRC_DIR)/multiIntersectBed \
$(SRC_DIR)/nekSandbox1 \ $(SRC_DIR)/nekSandbox1 \
......
...@@ -81,7 +81,7 @@ bool FileIntersect::processUnsortedFiles() ...@@ -81,7 +81,7 @@ bool FileIntersect::processUnsortedFiles()
while (!queryFRM->eof()) { while (!queryFRM->eof()) {
Record *queryRecord = queryFRM->allocateAndGetNextRecord(); Record *queryRecord = queryFRM->getNextRecord();
if (queryRecord == NULL) { if (queryRecord == NULL) {
continue; continue;
} }
......
/*****************************************************************************
mergeBed.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "mergeBed.h"
void BedMerge::ReportMergedNames(const vector<string> &names) {
if (names.size() > 0) {
printf("\t");
vector<string>::const_iterator nameItr = names.begin();
vector<string>::const_iterator nameEnd = names.end();
for (; nameItr != nameEnd; ++nameItr) {
if (nameItr < (nameEnd - 1))
cout << *nameItr << _delimiter;
else
cout << *nameItr;
}
}
else {
cerr << endl
<< "*****" << endl
<< "*****ERROR: "
<< "No names found to report for the -names option. Exiting."
<< endl
<< "*****" << endl;
exit(1);
}
}
void BedMerge::ReportMergedScores(const vector<string> &scores) {
// setup a VectorOps instances for the list of scores.
// VectorOps methods used for each possible operation.
VectorOps vo(scores);
std::stringstream buffer;
if (scores.size() > 0) {
if (_scoreOp == "sum")
buffer << setprecision (PRECISION) << vo.GetSum();
else if (_scoreOp == "min")
buffer << setprecision (PRECISION) << vo.GetMin();
else if (_scoreOp == "max")
buffer << setprecision (PRECISION) << vo.GetMax();
else if (_scoreOp == "mean")
buffer << setprecision (PRECISION) << vo.GetMean();
else if (_scoreOp == "median")
buffer << setprecision (PRECISION) << vo.GetMedian();
else if (_scoreOp == "mode")
buffer << setprecision (PRECISION) << vo.GetMode();
else if (_scoreOp == "antimode")
buffer << setprecision (PRECISION) << vo.GetAntiMode();
else if (_scoreOp == "collapse")
buffer << setprecision (PRECISION) << vo.GetCollapse(_delimiter);
cout << "\t" << buffer.str();
}
else {
cerr << endl
<< "*****" << endl
<< "*****ERROR: No scores found to report for the -scores option. Exiting." << endl
<< "*****" << endl;
exit(1);
}
}
// ===============
// = Constructor =
// ===============
BedMerge::BedMerge(string &bedFile,
bool numEntries,
int maxDistance,
bool forceStrand,
bool reportNames,
bool reportScores,
const string &scoreOp,
const string &delimiter) :
_bedFile(bedFile),
_numEntries(numEntries),
_forceStrand(forceStrand),
_reportNames(reportNames),
_reportScores(reportScores),
_scoreOp(scoreOp),
_maxDistance(maxDistance),
_delimiter(delimiter)
{
_bed = new BedFile(bedFile);
if (_forceStrand == false)
MergeBed();
else
MergeBedStranded();
}
// =================
// = Destructor =
// =================
BedMerge::~BedMerge(void) {
}
// ===============================================
// Convenience method for reporting merged blocks
// ================================================
void BedMerge::Report(string chrom, int start,
int end, const vector<string> &names,
const vector<string> &scores, int mergeCount)
{
// ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
//if (_bed->isZeroBased == false) {start++;}
printf("%s\t%d\t%d", chrom.c_str(), start, end);
// just the merged intervals
if (_numEntries == false && _reportNames == false &&
_reportScores == false) {
printf("\n");
}
// merged intervals and counts
else if (_numEntries == true && _reportNames == false &&
_reportScores == false) {
printf("\t%d\n", mergeCount);
}
// merged intervals, counts, and scores
else if (_numEntries == true && _reportNames == false &&
_reportScores == true) {
printf("\t%d", mergeCount);
ReportMergedScores(scores);
printf("\n");
}
// merged intervals, counts, and names
else if (_numEntries == true && _reportNames == true &&
_reportScores == false) {
ReportMergedNames(names);
printf("\t%d\n", mergeCount);
}
// merged intervals, counts, names, and scores
else if (_numEntries == true && _reportNames == true &&
_reportScores == true) {
ReportMergedNames(names);
ReportMergedScores(scores);
printf("\t%d\n", mergeCount);
}
// merged intervals and names
else if (_numEntries == false && _reportNames == true &&
_reportScores == false) {
ReportMergedNames(names);
printf("\n");
}
// merged intervals and scores
else if (_numEntries == false && _reportNames == false &&
_reportScores == true) {
ReportMergedScores(scores);
printf("\n");
}
// merged intervals, names, and scores
else if (_numEntries == false && _reportNames == true &&
_reportScores == true) {
ReportMergedNames(names);
ReportMergedScores(scores);
printf("\n");
}
}
// =========================================================
// Convenience method for reporting merged blocks by strand
// =========================================================
void BedMerge::ReportStranded(string chrom, int start,
int end, const vector<string> &names,
const vector<string> &scores, int mergeCount,
string strand)
{
// ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
//if (_bed->isZeroBased == false) {start++;}
printf("%s\t%d\t%d", chrom.c_str(), start, end);
// just the merged intervals
if (_numEntries == false && _reportNames == false &&
_reportScores == false) {
printf("\t\t\t%s\n", strand.c_str());
}
// merged intervals and counts
else if (_numEntries == true && _reportNames == false &&
_reportScores == false) {
printf("\t\t%d\t%s\n", mergeCount, strand.c_str());
}
// merged intervals, counts, and scores
else if (_numEntries == true && _reportNames == false &&
_reportScores == true) {
printf("\t%d", mergeCount);
ReportMergedScores(scores);
printf("\t%s\n", strand.c_str());
}
// merged intervals, counts, and names
else if (_numEntries == true && _reportNames == true &&
_reportScores == false) {
ReportMergedNames(names);
printf("\t%d\t%s", mergeCount, strand.c_str());
printf("\n");
}
// merged intervals, counts, names, and scores
else if (_numEntries == true && _reportNames == true &&
_reportScores == true) {
ReportMergedNames(names);
ReportMergedScores(scores);
printf("\t%s\t%d", strand.c_str(), mergeCount);
printf("\n");
}
// merged intervals and names
else if (_numEntries == false && _reportNames == true &&
_reportScores == false) {
ReportMergedNames(names);
printf("\t.\t%s\n", strand.c_str());
}
// merged intervals and scores
else if (_numEntries == false && _reportNames == false &&
_reportScores == true) {
printf("\t");
ReportMergedScores(scores);
printf("\t%s\n", strand.c_str());
}
// merged intervals, names, and scores
else if (_numEntries == false && _reportNames == true &&
_reportScores == true) {
ReportMergedNames(names);
ReportMergedScores(scores);
printf("\t%s\n", strand.c_str());
}
}
// =====================================================
// = Merge overlapping BED entries into a single entry =
// =====================================================
void BedMerge::MergeBed() {
int mergeCount = 1;
vector<string> names;
vector<string> scores;
int start = -1;
int end = -1;
BED prev, curr;
_bed->Open();
while (_bed->GetNextBed(curr, true)) { // true = force sorted intervals
if (_bed->_status != BED_VALID)
continue;
// new block, no overlap
if ( (((int) curr.start - end) > _maxDistance) ||
(curr.chrom != prev.chrom))
{
if (start >= 0) {
Report(prev.chrom, start, end, names, scores, mergeCount);
// reset
mergeCount = 1;
names.clear();
scores.clear();
}
start = curr.start;
end = curr.end;
if (!curr.name.empty())
names.push_back(curr.name);
if (!curr.score.empty())
scores.push_back(curr.score);
}
// same block, overlaps
else {
if ((int) curr.end > end)
end = curr.end;
if (!curr.name.empty())
names.push_back(curr.name);
if (!curr.score.empty())
scores.push_back(curr.score);
mergeCount++;
}
prev = curr;
}
if (start >= 0) {
Report(prev.chrom, start, end, names, scores, mergeCount);
}
}
// ===============================================================================
// = Merge overlapping BED entries into a single entry, accounting for strandedness =
// ================================================================================
void BedMerge::MergeBedStranded() {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bed->loadBedFileIntoMapNoBin();
// loop through each chromosome and merge their BED entries
masterBedMapNoBin::const_iterator m = _bed->bedMapNoBin.begin();
masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();
for (; m != mEnd; ++m) {
// bedList is already sorted by start position.
string chrom = m->first;
// make a list of the two strands to merge separately.
vector<string> strands(2);
strands[0] = "+";
strands[1] = "-";
// do two passes, one for each strand.
for (unsigned int s = 0; s < strands.size(); s++) {
int mergeCount = 1;
int numOnStrand = 0;
vector<string> names;
vector<string> scores;
// merge overlapping features for this chromosome.
int start = -1;
int end = -1;
vector<BED>::const_iterator bedItr = m->second.begin();
vector<BED>::const_iterator bedEnd = m->second.end();
for (; bedItr != bedEnd; ++bedItr) {
// if forcing strandedness, move on if the hit
// is not on the current strand.
if (bedItr->strand != strands[s]) { continue; }
else { numOnStrand++; }
if ( (((int) bedItr->start - end) > _maxDistance) ||
(end < 0))
{
if (start >= 0) {
ReportStranded(chrom, start, end, names,
scores, mergeCount, strands[s]);
// reset
mergeCount = 1;
names.clear();
scores.clear();
}
start = bedItr->start;
end = bedItr->end;
if (!bedItr->name.empty()) names.push_back(bedItr->name);
if (!bedItr->score.empty()) scores.push_back(bedItr->score);
}
else {
if ((int) bedItr-> end > end) end = bedItr->end;
mergeCount++;
if (!bedItr->name.empty()) names.push_back(bedItr->name);
if (!bedItr->score.empty()) scores.push_back(bedItr->score);
}
}
if (start >= 0) {
ReportStranded(chrom, start, end, names,
scores, mergeCount, strands[s]);
}
}
}
}
/*****************************************************************************
mergeBed.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "bedFile.h"
#include <vector>
#include <algorithm>
#include <numeric>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <limits.h>
#include <stdlib.h>
#include "VectorOps.h"
using namespace std;
const int PRECISION = 21;
//************************************************
// Class methods and elements
//************************************************
class BedMerge {
public:
// constructor
BedMerge(string &bedFile, bool numEntries,
int maxDistance, bool forceStrand,
bool reportNames, bool reportScores,
const string &scoreOp, const string &delimiter);
// destructor
~BedMerge(void);
void MergeBed();
void MergeBedStranded();
private:
string _bedFile;
bool _numEntries;
bool _forceStrand;
bool _reportNames;
bool _reportScores;
string _scoreOp;
int _maxDistance;
string _delimiter;
// instance of a bed file class.
BedFile *_bed;
void Report(string chrom, int start, int end,
const vector<string> &names,
const vector<string> &scores,
int mergeCount);
void ReportStranded(string chrom, int start, int end,
const vector<string> &names,
const vector<string> &scores,
int mergeCount,
string strand);
void ReportMergedNames(const vector<string> &names);
void ReportMergedScores(const vector<string> &scores);
};
...@@ -5,18 +5,29 @@ BIN_DIR = ../../bin/ ...@@ -5,18 +5,29 @@ BIN_DIR = ../../bin/
# ------------------- # -------------------
# define our includes # define our includes
# ------------------- # -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \ -I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/gzstream/ \ -I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/fileType/ \ -I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/VectorOps/ \ -I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/version/
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= mergeMain.cpp mergeBed.cpp mergeBed.h SOURCES= mergeMain.cpp mergeFile.cpp mergeFile.h
OBJECTS= mergeMain.o mergeBed.o OBJECTS= mergeMain.o mergeFile.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
...@@ -30,6 +41,6 @@ $(BUILT_OBJECTS): $(SOURCES) ...@@ -30,6 +41,6 @@ $(BUILT_OBJECTS): $(SOURCES)
clean: clean:
@echo "Cleaning up." @echo "Cleaning up."
@rm -f $(OBJ_DIR)/mergeMain.o $(OBJ_DIR)/mergeBed.o @rm -f $(OBJ_DIR)/mergeMain.o $(OBJ_DIR)/mergeFile.o
.PHONY: clean .PHONY: clean
\ No newline at end of file
/*****************************************************************************
mergeFile.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "mergeFile.h"
MergeFile::MergeFile(ContextMerge *context)
: _context(context),
_recordOutputMgr(NULL)
{
_recordOutputMgr = new RecordOutputMgr();
_recordOutputMgr->init(_context);
}
MergeFile::~MergeFile()
{
delete _recordOutputMgr;
_recordOutputMgr = NULL;
}
bool MergeFile::merge()
{
RecordKeyList hitSet;
FileRecordMgr *frm = _context->getFile(0);
while (!frm->eof()) {
Record *key = frm->getNextRecord(&hitSet);
if (key == NULL) continue;
_recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet));
}
return true;
}
/*****************************************************************************
mergeFile.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef MERGE_FILE_H_
#define MERGE_FILE_H_
//************************************************
// Class methods and elements
//************************************************
#include "ContextMerge.h"
#include "RecordOutputMgr.h"
class MergeFile {
public:
MergeFile(ContextMerge *context);
~MergeFile();
bool merge();
private:
ContextMerge *_context;
RecordOutputMgr *_recordOutputMgr;
};
#endif
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
Licenced under the GNU General Public License 2.0 license. Licenced under the GNU General Public License 2.0 license.
******************************************************************************/ ******************************************************************************/
#include "mergeBed.h" #include "mergeFile.h"
#include "version.h" #include "version.h"
using namespace std; using namespace std;
...@@ -26,113 +26,21 @@ void merge_help(void); ...@@ -26,113 +26,21 @@ void merge_help(void);
int merge_main(int argc, char* argv[]) { int merge_main(int argc, char* argv[]) {
// our configuration variables ContextMerge *context = new ContextMerge();
bool showHelp = false; if (!context->parseCmdArgs(argc, argv, 1) || context->getShowHelp() || !context->isValidState()) {
if (!context->getErrorMsg().empty()) {
// input files cerr << context->getErrorMsg() << endl;
string bedFile = "stdin";
int maxDistance = 0;
string scoreOp = "";
// input arguments
bool haveBed = true;
bool numEntries = false;
bool haveMaxDistance = false;
bool forceStrand = false;
bool reportNames = false;
bool reportScores = false;
string delimiter = ",";
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
} }
}
if(showHelp) merge_help();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-i", 2, parameterLength)) {
if ((i+1) < argc) {
bedFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-n", 2, parameterLength)) {
numEntries = true;
}
else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
if ((i+1) < argc) {
haveMaxDistance = true;
maxDistance = atoi(argv[i + 1]);
i++;
}
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else if (PARAMETER_CHECK("-nms", 4, parameterLength)) {
reportNames = true;
}
else if (PARAMETER_CHECK("-scores", 7, parameterLength)) {
reportScores = true;
if ((i+1) < argc) {
scoreOp = argv[i + 1];
i++;
}
}
else if (PARAMETER_CHECK("-delim", 6, parameterLength)) {
if ((i+1) < argc) {
delimiter = argv[i + 1];
i++;
}
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBed) {
cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl;
showHelp = true;
}
if ((reportScores == true) && (scoreOp != "sum")
&& (scoreOp != "max") && (scoreOp != "min")
&& (scoreOp != "mean") && (scoreOp != "mode")
&& (scoreOp != "median") && (scoreOp != "antimode")
&& (scoreOp != "collapse"))
{
cerr << endl
<< "*****"
<< endl
<< "*****ERROR: Invalid scoreOp selection \""
<< scoreOp
<< endl
<< "\" *****"
<< endl;
showHelp = true;
}
if (!showHelp) {
BedMerge *bm = new BedMerge(bedFile, numEntries,
maxDistance, forceStrand,
reportNames, reportScores,
scoreOp, delimiter);
delete bm;
}
else {
merge_help(); merge_help();
delete context;
return 0;
} }
return 0; MergeFile *mergeFile = new MergeFile(context);
bool retVal = mergeFile->merge();
delete mergeFile;
delete context;
return retVal ? 0 : 1;
} }
void merge_help(void) { void merge_help(void) {
......
...@@ -6,7 +6,7 @@ using namespace std; ...@@ -6,7 +6,7 @@ using namespace std;
#include <cstdio> #include <cstdio>
#include "RecordKeyList.h" #include "RecordKeyList.h"
#include "NewChromsweep.h" #include "NewChromsweep.h"
#include "DualQueue.h" //#include "DualQueue.h"
#include "ParseTools.h" #include "ParseTools.h"
#include <sstream> #include <sstream>
#include <iomanip> #include <iomanip>
...@@ -145,7 +145,7 @@ int nek_sandbox1_main(int argc,char** argv) ...@@ -145,7 +145,7 @@ int nek_sandbox1_main(int argc,char** argv)
// bool headerFound = false; // bool headerFound = false;
// QuickString outbuf; // QuickString outbuf;
// while (!frm.eof()) { // while (!frm.eof()) {
// Record *record = frm.allocateAndGetNextRecord(); // Record *record = frm.getNextRecord();
// if (!headerFound && frm.hasHeader()) { // if (!headerFound && frm.hasHeader()) {
// cout << frm.getHeader() << endl; // cout << frm.getHeader() << endl;
// headerFound = true; // headerFound = true;
...@@ -287,7 +287,7 @@ void testDualQueue(Context *context) { ...@@ -287,7 +287,7 @@ void testDualQueue(Context *context) {
printf("Original record order is:\n"); printf("Original record order is:\n");
while (!frm.eof()) { while (!frm.eof()) {
Record *record = frm.allocateAndGetNextRecord(); Record *record = frm.getNextRecord();
if (record == NULL) { if (record == NULL) {
continue; continue;
} }
......
...@@ -51,7 +51,7 @@ bool SampleFile::takeSample() ...@@ -51,7 +51,7 @@ bool SampleFile::takeSample()
while (!_inputFile->eof()) { while (!_inputFile->eof()) {
Record *record = _inputFile->allocateAndGetNextRecord(); Record *record = _inputFile->getNextRecord();
if (record == NULL) { if (record == NULL) {
continue; continue;
} }
......
...@@ -74,7 +74,7 @@ void BinTree::loadDB() ...@@ -74,7 +74,7 @@ void BinTree::loadDB()
Record *record = NULL; Record *record = NULL;
while (!_databaseFile->eof()) { while (!_databaseFile->eof()) {
record = _databaseFile->allocateAndGetNextRecord(); record = _databaseFile->getNextRecord();
//In addition to NULL records, we also don't want to add unmapped reads. //In addition to NULL records, we also don't want to add unmapped reads.
if (record == NULL || record->isUnmapped()) { if (record == NULL || record->isUnmapped()) {
continue; continue;
......
...@@ -13,7 +13,6 @@ ContextBase::ContextBase() ...@@ -13,7 +13,6 @@ ContextBase::ContextBase()
: :
_program(UNSPECIFIED_PROGRAM), _program(UNSPECIFIED_PROGRAM),
_allFilesOpened(false), _allFilesOpened(false),
_useMergedIntervals(false),
_genomeFile(NULL), _genomeFile(NULL),
_outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE),
_outputTypeDetermined(false), _outputTypeDetermined(false),
...@@ -45,7 +44,6 @@ ContextBase::ContextBase() ...@@ -45,7 +44,6 @@ ContextBase::ContextBase()
_maxNumDatabaseFields(0), _maxNumDatabaseFields(0),
_useFullBamTags(false), _useFullBamTags(false),
_reportCount(false), _reportCount(false),
_maxDistance(0),
_reportNames(false), _reportNames(false),
_reportScores(false), _reportScores(false),
_numOutputRecords(0), _numOutputRecords(0),
...@@ -53,11 +51,16 @@ ContextBase::ContextBase() ...@@ -53,11 +51,16 @@ ContextBase::ContextBase()
_seed(0), _seed(0),
_forwardOnly(false), _forwardOnly(false),
_reverseOnly(false), _reverseOnly(false),
_hasColumnOpsMethods(false) _hasColumnOpsMethods(false),
_desiredStrand(FileRecordMergeMgr::ANY_STRAND),
_maxDistance(0),
_useMergedIntervals(false)
{ {
_programNames["intersect"] = INTERSECT; _programNames["intersect"] = INTERSECT;
_programNames["sample"] = SAMPLE; _programNames["sample"] = SAMPLE;
_programNames["map"] = MAP; _programNames["map"] = MAP;
_programNames["merge"] = MERGE;
if (hasColumnOpsMethods()) { if (hasColumnOpsMethods()) {
_keyListOps = new KeyListOps(); _keyListOps = new KeyListOps();
...@@ -233,11 +236,12 @@ bool ContextBase::openFiles() { ...@@ -233,11 +236,12 @@ bool ContextBase::openFiles() {
_files.resize(_fileNames.size()); _files.resize(_fileNames.size());
for (int i = 0; i < (int)_fileNames.size(); i++) { for (int i = 0; i < (int)_fileNames.size(); i++) {
FileRecordMgr *frm = new FileRecordMgr(_fileNames[i], _sortedInput); FileRecordMgr *frm = getNewFRM(_fileNames[i]);
if (hasGenomeFile()) { if (hasGenomeFile()) {
frm->setGenomeFile(_genomeFile); frm->setGenomeFile(_genomeFile);
} }
frm->setFullBamFlags(_useFullBamTags); frm->setFullBamFlags(_useFullBamTags);
frm->setIsSorted(_sortedInput);
if (!frm->open()) { if (!frm->open()) {
return false; return false;
} }
...@@ -391,8 +395,9 @@ bool ContextBase::handle_c() ...@@ -391,8 +395,9 @@ bool ContextBase::handle_c()
markUsed(_i - _skipFirstArgs); markUsed(_i - _skipFirstArgs);
_i++; _i++;
markUsed(_i - _skipFirstArgs); markUsed(_i - _skipFirstArgs);
return true;
} }
return true; return false;
} }
...@@ -412,7 +417,7 @@ bool ContextBase::handle_o() ...@@ -412,7 +417,7 @@ bool ContextBase::handle_o()
} }
// for col ops, -null is a NULL vakue assigned // for col ops, -null is a NULL value assigned
// when no overlaps are detected. // when no overlaps are detected.
bool ContextBase::handle_null() bool ContextBase::handle_null()
{ {
...@@ -424,8 +429,9 @@ bool ContextBase::handle_null() ...@@ -424,8 +429,9 @@ bool ContextBase::handle_null()
markUsed(_i - _skipFirstArgs); markUsed(_i - _skipFirstArgs);
_i++; _i++;
markUsed(_i - _skipFirstArgs); markUsed(_i - _skipFirstArgs);
return true;
} }
return true; return false;
} }
//for col ops, delimStr will appear between each item in //for col ops, delimStr will appear between each item in
...@@ -459,3 +465,13 @@ const QuickString &ContextBase::getColumnOpsVal(RecordKeyList &keyList) const { ...@@ -459,3 +465,13 @@ const QuickString &ContextBase::getColumnOpsVal(RecordKeyList &keyList) const {
return _keyListOps->getOpVals(keyList); return _keyListOps->getOpVals(keyList);
} }
FileRecordMgr *ContextBase::getNewFRM(const QuickString &filename) {
if (!_useMergedIntervals) {
return new FileRecordMgr(filename);
} else {
FileRecordMergeMgr *frm = new FileRecordMergeMgr(filename);
frm->setStrandType(_desiredStrand);
frm->setMaxDistance(_maxDistance);
return frm;
}
}
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "version.h" #include "version.h"
#include "BedtoolsTypes.h" #include "BedtoolsTypes.h"
#include "FileRecordTypeChecker.h" #include "FileRecordTypeChecker.h"
#include "FileRecordMgr.h" #include "FileRecordMergeMgr.h"
#include "NewGenomeFile.h" #include "NewGenomeFile.h"
#include "api/BamReader.h" #include "api/BamReader.h"
#include "api/BamAux.h" #include "api/BamAux.h"
...@@ -59,6 +59,7 @@ public: ...@@ -59,6 +59,7 @@ public:
bool getUseMergedIntervals() const { return _useMergedIntervals; } bool getUseMergedIntervals() const { return _useMergedIntervals; }
void setUseMergedIntervals(bool val) { _useMergedIntervals = val; } void setUseMergedIntervals(bool val) { _useMergedIntervals = val; }
FileRecordMergeMgr::WANTED_STRAND_TYPE getDesiredStrand() const { return _desiredStrand; }
void openGenomeFile(const QuickString &genomeFilename); void openGenomeFile(const QuickString &genomeFilename);
void openGenomeFile(const BamTools::RefVector &refVector); void openGenomeFile(const BamTools::RefVector &refVector);
...@@ -106,23 +107,23 @@ public: ...@@ -106,23 +107,23 @@ public:
virtual bool getUseFullBamTags() const { return _useFullBamTags; } virtual bool getUseFullBamTags() const { return _useFullBamTags; }
virtual void setUseFullBamTags(bool val) { _useFullBamTags = val; } virtual void setUseFullBamTags(bool val) { _useFullBamTags = val; }
// // //
// MERGE METHODS // // MERGE METHODS
// // //
virtual bool getReportCount() const { return _reportCount; } // virtual bool getReportCount() const { return _reportCount; }
virtual void setReportCount(bool val) { _reportCount = val; } // virtual void setReportCount(bool val) { _reportCount = val; }
//
virtual int getMaxDistance() const { return _maxDistance; } // virtual int getMaxDistance() const { return _maxDistance; }
virtual void setMaxDistance(int distance) { _maxDistance = distance; } // virtual void setMaxDistance(int distance) { _maxDistance = distance; }
//
virtual bool getReportNames() const { return _reportNames; } // virtual bool getReportNames() const { return _reportNames; }
virtual void setReportNames(bool val) { _reportNames = val; } // virtual void setReportNames(bool val) { _reportNames = val; }
//
virtual bool getReportScores() const { return _reportScores; } // virtual bool getReportScores() const { return _reportScores; }
virtual void setReportScores(bool val) { _reportScores = val; } // virtual void setReportScores(bool val) { _reportScores = val; }
//
virtual const QuickString &getScoreOp() const { return _scoreOp; } // virtual const QuickString &getScoreOp() const { return _scoreOp; }
virtual void setScoreOp(const QuickString &op) { _scoreOp = op; } // virtual void setScoreOp(const QuickString &op) { _scoreOp = op; }
// METHODS FOR PROGRAMS WITH USER_SPECIFIED NUMBER // METHODS FOR PROGRAMS WITH USER_SPECIFIED NUMBER
...@@ -160,7 +161,6 @@ protected: ...@@ -160,7 +161,6 @@ protected:
bool _allFilesOpened; bool _allFilesOpened;
map<QuickString, PROGRAM_TYPE> _programNames; map<QuickString, PROGRAM_TYPE> _programNames;
bool _useMergedIntervals;
NewGenomeFile *_genomeFile; NewGenomeFile *_genomeFile;
ContextFileType _outputFileType; ContextFileType _outputFileType;
...@@ -200,7 +200,6 @@ protected: ...@@ -200,7 +200,6 @@ protected:
int _maxNumDatabaseFields; int _maxNumDatabaseFields;
bool _useFullBamTags; bool _useFullBamTags;
bool _reportCount; bool _reportCount;
int _maxDistance;
bool _reportNames; bool _reportNames;
bool _reportScores; bool _reportScores;
QuickString _scoreOp; QuickString _scoreOp;
...@@ -212,14 +211,22 @@ protected: ...@@ -212,14 +211,22 @@ protected:
bool _forwardOnly; bool _forwardOnly;
bool _reverseOnly; bool _reverseOnly;
//Members for column operations
bool _hasColumnOpsMethods; bool _hasColumnOpsMethods;
KeyListOps *_keyListOps; KeyListOps *_keyListOps;
QuickString _nullStr; //placeholder return value when col ops aren't valid. QuickString _nullStr; //placeholder return value when col ops aren't valid.
//Members for merged records
FileRecordMergeMgr::WANTED_STRAND_TYPE _desiredStrand;
int _maxDistance;
bool _useMergedIntervals;
void markUsed(int i) { _argsProcessed[i] = true; } void markUsed(int i) { _argsProcessed[i] = true; }
bool isUsed(int i) const { return _argsProcessed[i]; } bool isUsed(int i) const { return _argsProcessed[i]; }
bool cmdArgsValid(); bool cmdArgsValid();
bool openFiles(); bool openFiles();
virtual FileRecordMgr *getNewFRM(const QuickString &filename);
//set cmd line params and counter, i, as members so code //set cmd line params and counter, i, as members so code
//is more readable (as opposed to passing all 3 everywhere). //is more readable (as opposed to passing all 3 everywhere).
......
...@@ -47,13 +47,3 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { ...@@ -47,13 +47,3 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
} }
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs); return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
} }
//
//
//bool ContextMap::isValidState()
//{
// if (!ContextIntersect::isValidState()) {
// return false;
// }
//}
//
//
...@@ -15,12 +15,8 @@ class ContextMap : public ContextIntersect { ...@@ -15,12 +15,8 @@ class ContextMap : public ContextIntersect {
public: public:
ContextMap(); ContextMap();
virtual ~ContextMap(); virtual ~ContextMap();
// virtual bool isValidState();
//
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs); virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
//
virtual bool hasIntersectMethods() const { return true; } virtual bool hasIntersectMethods() const { return true; }
//
private: private:
......
/*
* ContextMerge.cpp
*
* Created on: Mar 26, 2014
* Author: nek3d
*/
#include "ContextMerge.h"
ContextMerge::ContextMerge()
{
setUseMergedIntervals(true);
setColumnOpsMethods(true);
//merge has no default columnOps the way map does, so we'll need to clear those.
_keyListOps->setColumns("");
_keyListOps->setOperations("");
}
ContextMerge::~ContextMerge()
{
}
bool ContextMerge::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
{
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
_argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
}
else if (strcmp(_argv[_i], "-n") == 0) {
if (!handle_n()) return false;
}
else if (strcmp(_argv[_i], "-nms") == 0) {
if (!handle_nms()) return false;
}
else if (strcmp(_argv[_i], "-scores") == 0) {
if (!handle_scores()) return false;
}
else if (strcmp(_argv[_i], "-delim") == 0) {
if (!handle_delim()) return false;
}
else if (strcmp(_argv[_i], "-d") == 0) {
if (!handle_d()) return false;
}
else if (strcmp(_argv[_i], "-s") == 0) {
if (!handle_s()) return false;
}
else if (strcmp(_argv[_i], "-S") == 0) {
if (!handle_S()) return false;
}
}
return ContextBase::parseCmdArgs(argc, argv, _skipFirstArgs);
}
bool ContextMerge::isValidState()
{
// Special: The merge program does not have default
//column operations, so if none were entered, disable column ops.
if (_keyListOps->getColumns().empty() && _keyListOps->getOperations().empty()) {
setColumnOpsMethods(false);
delete _keyListOps;
_keyListOps = NULL;
}
if (!ContextBase::isValidState()) {
return false;
}
if (_files.size() != 1) {
_errorMsg = "\n***** ERROR: input file not specified. *****";
// Allow one and only input file for now
return false;
}
return true;
}
bool ContextMerge::handle_d() {
if ((_i+1) < _argc) {
if (isNumeric(_argv[_i+1])) {
int dist = str2chrPos(_argv[_i+1]);
if (dist >=0 ) {
_maxDistance = dist;
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
}
}
_errorMsg = "\n***** ERROR: -d option must be followed by an integer value *****";
return false;
}
bool ContextMerge::handle_n()
{
//This is the same as telling map "-c any -o count"
_keyListOps->addColumns("1"); //doesn't really matter which column, but the default column
//for keyListOps is score, which not every record necessarily has.
_keyListOps->addOperations("count");
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextMerge::handle_nms()
{
//This is the same as telling map "-c 4 -o collapse"
_keyListOps->addColumns("4");
_keyListOps->addOperations("collapse");
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextMerge::handle_scores()
{
if ((_i+1) < _argc) {
_keyListOps->addColumns("5");
_keyListOps->addOperations(_argv[_i+1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
_errorMsg = "\n***** ERROR: -scores option given, but no operation specified. *****";
return false;
}
bool ContextMerge::handle_s() {
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_EITHER;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextMerge::handle_S() {
if ((_i+1) < _argc) {
if (_argv[_i+1][0] == '+') {
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_FORWARD;
} else if (_argv[_i+1][0] == '-') {
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_REVERSE;
}
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
return true;
}
_errorMsg = "\n***** ERROR: -S option must be followed by + or -. *****";
return false;
}
/*
* ContextMerge.h
*
* Created on: Mar 26, 2014
* Author: nek3d
*/
#ifndef CONTEXTMERGE_H_
#define CONTEXTMERGE_H_
#include "ContextBase.h"
#include "FileRecordMergeMgr.h"
class ContextMerge: public ContextBase {
public:
ContextMerge();
~ContextMerge();
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
virtual bool isValidState();
protected:
bool handle_d();
bool handle_n();
bool handle_nms();
bool handle_scores();
bool handle_s();
bool handle_S();
};
#endif /* CONTEXTMERGE_H_ */
...@@ -19,8 +19,9 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \ ...@@ -19,8 +19,9 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextMap.cpp ContextMap.h ContextSample.cpp ContextSample.h SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextMap.cpp \
OBJECTS= ContextBase.o ContextIntersect.o ContextMap.o ContextSample.o ContextMap.h ContextSample.cpp ContextSample.h ContextMerge.h ContextMerge.cpp
OBJECTS= ContextBase.o ContextIntersect.o ContextMap.o ContextSample.o ContextMerge.o
_EXT_OBJECTS=ParseTools.o QuickString.o _EXT_OBJECTS=ParseTools.o QuickString.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
...@@ -38,6 +39,7 @@ clean: ...@@ -38,6 +39,7 @@ clean:
@rm -f $(OBJ_DIR)/ContextBase.o \ @rm -f $(OBJ_DIR)/ContextBase.o \
$(OBJ_DIR)/ContextIntersect.o \ $(OBJ_DIR)/ContextIntersect.o \
$(OBJ_DIR)/ContextMap.o \ $(OBJ_DIR)/ContextMap.o \
$(OBJ_DIR)/ContextSample.o $(OBJ_DIR)/ContextSample.o \
$(OBJ_DIR)/ContextMerge.o \
.PHONY: clean .PHONY: clean
\ No newline at end of file
/*
* FileRecordMergeMgr.cpp
*
* Created on: Mar 19, 2014
* Author: nek3d
*/
#include "FileRecordMergeMgr.h"
FileRecordMergeMgr::FileRecordMergeMgr(const QuickString & filename)
: FileRecordMgr(filename),
_desiredStrand(ANY_STRAND),
_maxDistance(0)
{
}
//Record *FileRecordMergeMgr::allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand, int maxDistance) {
// RecordKeyList recList;
// if (!allocateAndGetNextMergedRecord(recList, desiredStrand, maxDistance)) {
// return NULL;
// }
// deleteAllMergedItemsButKey(recList);
// return const_cast<Record *>(recList.getKey()); //want key to be non-const
//}
Record *FileRecordMergeMgr::getNextRecord(RecordKeyList *recList)
{
if (!recList->allClear()) {
deleteMergedRecord(*recList);
}
_mustBeForward = _desiredStrand == SAME_STRAND_FORWARD;
_mustBeReverse = _desiredStrand == SAME_STRAND_REVERSE;
Record *startRecord = tryToTakeFromStorage();
// if we couldn't use a previously stored record for starters,
//then begin with a new one that matches strand criteria.
while (startRecord == NULL) {
startRecord = FileRecordMgr::getNextRecord();
if (startRecord == NULL) { //hit EOF!!
return NULL;
}
if ((_mustBeForward && (startRecord->getStrandVal() != Record::FORWARD)) || (_mustBeReverse && (startRecord->getStrandVal() != Record::REVERSE))) {
//record is reverse, only want forward, OR record is forward, wanted reverse
deleteRecord(startRecord);
startRecord = NULL;
}
if (startRecord->getStrandVal() == Record::UNKNOWN && _desiredStrand != ANY_STRAND) {
//there is an unknown strand, but the user specified strandedness.
deleteRecord(startRecord);
startRecord = NULL;
}
}
// OK!! We have a start record! Re-evaluate strand requirements for next recored.
_mustBeForward = _desiredStrand == SAME_STRAND_FORWARD || (_desiredStrand == SAME_STRAND_EITHER && (startRecord->getStrandVal() == Record::FORWARD));
_mustBeReverse = _desiredStrand == SAME_STRAND_REVERSE || (_desiredStrand == SAME_STRAND_EITHER && (startRecord->getStrandVal() == Record::REVERSE));
bool mustKeepOpposite = (_desiredStrand == SAME_STRAND_EITHER);
const QuickString &currChrom = startRecord->getChrName();
_foundChroms.insert(currChrom);
bool madeComposite = false;
recList->push_back(startRecord);
recList->setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more.
Record::strandType currStrand = startRecord->getStrandVal();
bool mustMatchStrand = _desiredStrand != ANY_STRAND;
int currEnd = startRecord->getEndPos();
//now look for more records to merge with this one.
//stop when they're out of range, not on the same chromosome, or we hit EOF.
//ignore if they don't comply with strand.
Record *nextRecord = NULL;
while (nextRecord == NULL) {
bool takenFromStorage = false;
nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage();
if (nextRecord == NULL) {
nextRecord = FileRecordMgr::getNextRecord();
} else {
takenFromStorage = true;
}
if (nextRecord == NULL) { // EOF hit
break;
}
//delete any record from file with an unknown strand if we are doing stranded merge, but first check
//that it's chrom was the same and it's not out of range. If either is true, stop scanning.
bool mustDelete = (mustMatchStrand && nextRecord->getStrandVal() == Record::UNKNOWN);
//check that we are still on the same chromosome.
const QuickString &newChrom = nextRecord->getChrName();
if (newChrom != currChrom) { //hit a different chromosome.
if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) {
//haven't seen this chromosome before, sort order is already enforced in the base class method.
if (!mustDelete) {
addToStorage(nextRecord);
} else {
deleteRecord(nextRecord);
}
nextRecord = NULL;
break;
}
}
//check whether it's in range
int nextStart = nextRecord->getStartPos();
if (nextStart > currEnd + _maxDistance) {
//no, it's out of range.
if (!mustDelete) {
addToStorage(nextRecord);
} else {
deleteRecord(nextRecord);
}
nextRecord = NULL;
break;
}
// NOW, going back, we can delete any unknown strand records. But don't stop scanning.
if (mustDelete) {
deleteRecord(nextRecord);
nextRecord = NULL;
continue;
}
//if taken from file, and wrong strand, store or delete.
if (!takenFromStorage && ((_mustBeForward && (nextRecord->getStrandVal() != Record::FORWARD)) || (_mustBeReverse && (nextRecord->getStrandVal() != Record::REVERSE)))) {
if (mustKeepOpposite) {
addToStorage(nextRecord);
} else {
deleteRecord(nextRecord);
}
nextRecord = NULL;
continue; //get the next record
}
//ok, they're on the same chrom and in range, and the strand is good. Do a merge.
recList->push_back(nextRecord);
madeComposite = true;
int nextEnd = nextRecord->getEndPos();
if (nextEnd > currEnd) {
currEnd = nextEnd;
}
nextRecord = NULL;
}
if (madeComposite) {
Record *newKey = _recordMgr->allocateRecord();
(*newKey) = (*startRecord);
newKey->setEndPos(currEnd);
recList->setKey(newKey);
}
_totalMergedRecordLength += (unsigned long)(recList->getKey()->getEndPos() - recList->getKey()->getStartPos());
return const_cast<Record *>(recList->getKey());
}
void FileRecordMergeMgr::addToStorage(Record *record) {
//if the strand requirements are strict, and the record doesn't match,
//store in the "round file".
if ((_desiredStrand == SAME_STRAND_FORWARD && record->getStrandVal() != Record::FORWARD) ||
(_desiredStrand == SAME_STRAND_REVERSE && record->getStrandVal() != Record::REVERSE) ||
(_desiredStrand != ANY_STRAND && record->getStrandVal() == Record::UNKNOWN)) {
deleteRecord(record);
return;
}
_storedRecords.push(record);
}
Record *FileRecordMergeMgr::tryToTakeFromStorage() {
Record *record = _storedRecords.top();
if (record != NULL) {
_storedRecords.pop();
}
return record;
}
Record *FileRecordMergeMgr::tryToTakeFromStorage(Record::strandType strand) {
Record *record = _storedRecords.top(strand);
if (record != NULL) {
_storedRecords.pop(strand);
}
return record;
}
void FileRecordMergeMgr::deleteMergedRecord(RecordKeyList &recList)
{
deleteAllMergedItemsButKey(recList);
deleteRecord(recList.getKey());
recList.setKey(NULL);
}
void FileRecordMergeMgr::deleteAllMergedItemsButKey(RecordKeyList &recList) {
//if the key is also in the list, this method won't delete it.
for (RecordKeyList::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
if (iter->value() == recList.getKey()) {
continue;
}
deleteRecord(iter->value());
}
recList.clearList();
}
/*
* FileRecordMergeMgr.h
*
* Created on: Mar 19, 2014
* Author: nek3d
*/
#ifndef FILERECORDMERGEMGR_H_
#define FILERECORDMERGEMGR_H_
#include "FileRecordMgr.h"
#include "StrandQueue.h"
class FileRecordMergeMgr : public FileRecordMgr {
public:
FileRecordMergeMgr(const QuickString & filename);
//////////////////////////////////////////////////////////////////////////////////
//
// MERGED RECORDS
//
// This will give a single "meta" record containing "flattened" or merged records.
//
// Pass an empty RecordKeyList. When done, will have a pair: 1st is the final merged record,
// second is list of constituent Records merged.
//
///////////////////////////////////////////////////////////////////////////////////
Record *getNextRecord(RecordKeyList *keyList = NULL);
void deleteMergedRecord(RecordKeyList &recList); // MUST use this method for cleanup!
typedef enum { SAME_STRAND_FORWARD, //must all be forward strand
SAME_STRAND_REVERSE, //must all be reverse strand
SAME_STRAND_EITHER, //must be same strand, but can be either forward or reverse
ANY_STRAND } //do no care about strand (Default value)
WANTED_STRAND_TYPE;
void setStrandType(WANTED_STRAND_TYPE strand) { _desiredStrand = strand; }
void setMaxDistance(int maxDistance) { _maxDistance = maxDistance; }
private:
WANTED_STRAND_TYPE _desiredStrand;
int _maxDistance;
StrandQueue _storedRecords;
void deleteAllMergedItemsButKey(RecordKeyList &recList);
void addToStorage(Record *record);
Record *tryToTakeFromStorage();
Record *tryToTakeFromStorage(Record::strandType strand);
};
#endif /* FILERECORDMERGEMGR_H_ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment