Skip to content
Snippets Groups Projects
Commit f36d6db4 authored by nkindlon's avatar nkindlon
Browse files

Merge converted to PFM, first check-in

parent ed71c8e0
No related branches found
No related tags found
No related merge requests found
Showing
with 279 additions and 400 deletions
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include "Record.h" #include "Record.h"
#include "NewGenomeFile.h" #include "NewGenomeFile.h"
FileRecordMgr::FileRecordMgr(const QuickString &filename, bool isSorted) FileRecordMgr::FileRecordMgr(const QuickString &filename)
: :
_filename(filename), _filename(filename),
_bufStreamMgr(NULL), _bufStreamMgr(NULL),
...@@ -12,7 +12,7 @@ FileRecordMgr::FileRecordMgr(const QuickString &filename, bool isSorted) ...@@ -12,7 +12,7 @@ FileRecordMgr::FileRecordMgr(const QuickString &filename, bool isSorted)
_fileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), _fileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE),
_recordType(FileRecordTypeChecker::UNKNOWN_RECORD_TYPE), _recordType(FileRecordTypeChecker::UNKNOWN_RECORD_TYPE),
_recordMgr(NULL), _recordMgr(NULL),
_isSortedInput(isSorted), _isSortedInput(false),
_freeListBlockSize(512), _freeListBlockSize(512),
_useFullBamTags(false), _useFullBamTags(false),
_prevStart(INT_MAX), _prevStart(INT_MAX),
...@@ -88,7 +88,7 @@ bool FileRecordMgr::eof(){ ...@@ -88,7 +88,7 @@ bool FileRecordMgr::eof(){
return _fileReader->eof(); return _fileReader->eof();
} }
Record *FileRecordMgr::allocateAndGetNextRecord() Record *FileRecordMgr::getNextRecord(RecordKeyList *keyList)
{ {
if (!_fileReader->isOpen()) { if (!_fileReader->isOpen()) {
return NULL; return NULL;
...@@ -120,6 +120,9 @@ Record *FileRecordMgr::allocateAndGetNextRecord() ...@@ -120,6 +120,9 @@ Record *FileRecordMgr::allocateAndGetNextRecord()
} }
assignChromId(record); assignChromId(record);
_totalRecordLength += (unsigned long)(record->getEndPos() - record->getStartPos()); _totalRecordLength += (unsigned long)(record->getEndPos() - record->getStartPos());
if (keyList != NULL) {
keyList->setKey(record);
}
return record; return record;
} }
...@@ -198,6 +201,10 @@ void FileRecordMgr::deleteRecord(const Record *record) { ...@@ -198,6 +201,10 @@ void FileRecordMgr::deleteRecord(const Record *record) {
_recordMgr->deleteRecord(record); _recordMgr->deleteRecord(record);
} }
void FileRecordMgr::deleteRecord(RecordKeyList *keyList) {
_recordMgr->deleteRecord(keyList->getKey());
}
void FileRecordMgr::allocateFileReader() void FileRecordMgr::allocateFileReader()
{ {
switch (_fileType) { switch (_fileType) {
...@@ -224,175 +231,3 @@ const BamTools::RefVector & FileRecordMgr::getBamReferences() { ...@@ -224,175 +231,3 @@ const BamTools::RefVector & FileRecordMgr::getBamReferences() {
} }
return static_cast<BamFileReader *>(_fileReader)->getReferences(); return static_cast<BamFileReader *>(_fileReader)->getReferences();
} }
#ifdef false
Record *FileRecordMgr::allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand, int maxDistance) {
RecordKeyList recList;
if (!allocateAndGetNextMergedRecord(recList, desiredStrand, maxDistance)) {
return NULL;
}
deleteAllMergedItemsButKey(recList);
return const_cast<Record *>(recList.getKey()); //want key to be non-const
}
bool FileRecordMgr::allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand, int maxDistance)
{
if (!recList.allClear()) {
deleteMergedRecord(recList);
}
_mustBeForward = desiredStrand == SAME_STRAND_FORWARD;
_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE;
Record *startRecord = tryToTakeFromStorage();
// if we couldn't use a previously stored record for starters,
//then begin with a new one that matches strand criteria.
while (startRecord == NULL) {
startRecord = allocateAndGetNextRecord();
if (startRecord == NULL) { //hit EOF!!
return false;
}
if (_mustBeForward && !startRecord->getStrand()) {
//record is reverse, wanted forward.
addToStorage(startRecord);
startRecord = NULL;
} else if (_mustBeReverse && startRecord->getStrand()) {
//record is forward, wanted reverse
addToStorage(startRecord);
startRecord = NULL;
}
}
// OK!! We have a start record!
_mustBeForward = desiredStrand == SAME_STRAND_FORWARD || (desiredStrand == SAME_STRAND_EITHER && startRecord->getStrand());
_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE || (desiredStrand == SAME_STRAND_EITHER && !startRecord->getStrand());
const QuickString &currChrom = startRecord->getChrName();
_foundChroms.insert(currChrom);
bool madeComposite = false;
recList.push_back(startRecord);
recList.setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more.
bool currStrand = startRecord->getStrand();
bool mustMatchStrand = desiredStrand != ANY_STRAND;
int currEnd = startRecord->getEndPos();
//now look for more records to merge with this one.
//stop when they're out of range, not on the same chromosome, or we hit EOF.
//ignore if they don't comply with strand.
Record *nextRecord = NULL;
while (nextRecord == NULL) {
bool takenFromStorage = false;
nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage();
if (nextRecord == NULL) {
nextRecord = allocateAndGetNextRecord();
} else {
takenFromStorage = true;
}
if (nextRecord == NULL) { // EOF hit
break;
}
const QuickString &newChrom = nextRecord->getChrName();
if (newChrom != currChrom) { //hit a different chromosome.
if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) {
//haven't seen this chromosome before.
addToStorage(nextRecord);
break;
} else {
//different strand, but we've already seen this chrom. File is not sorted.
fprintf(stderr, "ERROR: Input file %s is not sorted by chromosome, startPos.\n", _context->getInputFileName(_contextFileIdx).c_str());
deleteRecord(nextRecord);
deleteMergedRecord(recList);
exit(1);
}
}
int nextStart = nextRecord->getStartPos();
//is the record out of range?
if (nextStart > currEnd + maxDistance) {
//yes, it's out of range.
addToStorage(nextRecord);
break;
}
//ok, they're on the same chrom and in range. Are we happy with the strand?
if (mustMatchStrand && nextRecord->getStrand() != currStrand) {
//no, we're not.
addToStorage(nextRecord);
nextRecord = NULL;
continue;
}
//everything's good! do a merge.
recList.push_back(nextRecord);
madeComposite = true;
int nextEnd = nextRecord->getEndPos();
if (nextEnd > currEnd) {
currEnd = nextEnd;
}
nextRecord = NULL;
}
if (madeComposite) {
Record *newKey = _recordMgr->allocateRecord();
(*newKey) = (*startRecord);
newKey->setEndPos(currEnd);
recList.setKey(newKey);
}
_totalMergedRecordLength += (unsigned long)(recList.getKey()->getEndPos() - recList.getKey()->getStartPos());
return true;
}
void FileRecordMgr::addToStorage(Record *record) {
_storedRecords.push(record);
}
Record *FileRecordMgr::tryToTakeFromStorage() {
Record *record = _storedRecords.empty() ? NULL : const_cast<Record *>(_storedRecords.top());
if (record != NULL) {
_storedRecords.pop();
}
return record;
}
Record *FileRecordMgr::tryToTakeFromStorage(bool strand) {
Record *record = NULL;
if(strand) {
if (_storedRecords.emptyForward()) {
return NULL;
} else {
record = const_cast<Record *>(_storedRecords.topForward());
_storedRecords.popForward();
return record;
}
} else {
if (_storedRecords.emptyReverse()) {
return NULL;
} else {
record = const_cast<Record *>(_storedRecords.topReverse());
_storedRecords.popReverse();
return record;
}
}
}
void FileRecordMgr::deleteMergedRecord(RecordKeyList &recList)
{
deleteAllMergedItemsButKey(recList);
deleteRecord(recList.getKey());
recList.setKey(NULL);
}
void FileRecordMgr::deleteAllMergedItemsButKey(RecordKeyList &recList) {
//if the key is also in the list, this method won't delete it.
for (RecordKeyList::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
if (iter->value() == recList.getKey()) {
continue;
}
deleteRecord(iter->value());
}
recList.clearList();
}
#endif
...@@ -32,12 +32,20 @@ class NewGenomeFile; ...@@ -32,12 +32,20 @@ class NewGenomeFile;
class FileRecordMgr { class FileRecordMgr {
public: public:
FileRecordMgr(const QuickString & filename, bool isSorted = false); FileRecordMgr(const QuickString & filename);
~FileRecordMgr(); virtual ~FileRecordMgr();
bool open(); bool open();
void close(); void close();
bool eof(); bool eof();
//This is an all-in-one method to give the user a new record that is initialized with
//the next entry in the data file.
//NOTE!! User MUST pass back the returned pointer to deleteRecord method for cleanup!
//Also Note! User must check for NULL returned, meaning we failed to get the next record.
virtual Record *getNextRecord(RecordKeyList *keyList = NULL);
void deleteRecord(const Record *);
virtual void deleteRecord(RecordKeyList *keyList);
const QuickString &getFileName() const { return _filename;} const QuickString &getFileName() const { return _filename;}
bool hasHeader() const { return _fileReader->hasHeader(); } bool hasHeader() const { return _fileReader->hasHeader(); }
const QuickString &getHeader() const { return _fileReader->getHeader(); } const QuickString &getHeader() const { return _fileReader->getHeader(); }
...@@ -69,55 +77,6 @@ public: ...@@ -69,55 +77,6 @@ public:
const BamTools::RefVector &getBamReferences(); const BamTools::RefVector &getBamReferences();
int getNumFields() const { return _fileReader->getNumFields(); } int getNumFields() const { return _fileReader->getNumFields(); }
//This is an all-in-one method to give the user a new record that is initialized with
//the next entry in the data file.
//NOTE!! User MUST pass back the returned pointer to deleteRecord method for cleanup!
//Also Note! User must check for NULL returned, meaning we failed to get the next record.
Record *allocateAndGetNextRecord();
void deleteRecord(const Record *);
#ifdef false
//////////////////////////////////////////////////////////////////////////////////
//
// MERGED RECORDS
//
//this will give a single "meta" record containing "flattened" or merged records.
//
// 1st ARG: Pass an empty RecordKeyList. When done, will have a pair: 1st is the final merged record,
// second is list of constituent Records merged.
// ** NOTE ** If the RecordKeyList is not empty, this method will empty it for you and delete all contents!
//
// 2nd ARG: Choose from WANT_STRAND_TYPE, defined below below
//
// 3rd ARG: allows for nearby records, i.e. maxDistance 100 will merge records <= 100 bases apart. Default 0 means only
// merge records that actually intersect.
//
// Return value: true if any records found. False if eof hit before records matching requested parameters found.
typedef enum { SAME_STRAND_FORWARD, //must all be forward strand
SAME_STRAND_REVERSE, //must all be reverse strand
SAME_STRAND_EITHER, //must be same strand, but can be either forward or reverse
ANY_STRAND } //do no care about strand (Default value)
WANT_STRAND_TYPE;
//
// WARNING!! Specifying a strand will keep all records on the other strand in memory!!
// This is done so that requests for records on that other strand can still be met.
// For now, use this method at any time to purge the kept records from memory, such as
// when changing chromosomes, for example.
void purgeKeepList();
bool allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand = ANY_STRAND, int maxDistance = 0);
void deleteMergedRecord(RecordKeyList &recList); // MUST use this method for cleanup!
//this method will allocate a new record of merged records, but the returned record should only be passed to the deleteRecord method
//for cleanup, not to the delete mmerged record.
Record *allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand = ANY_STRAND, int maxDistance = 0);
//
// END MERGED RECORDS
//
//////////////////////////////////////////////////////////////////////////////////
#endif
//File statistics //File statistics
unsigned long getTotalRecordLength() const { return _totalRecordLength; } //sum of length of all returned records unsigned long getTotalRecordLength() const { return _totalRecordLength; } //sum of length of all returned records
...@@ -140,7 +99,9 @@ public: ...@@ -140,7 +99,9 @@ public:
_hasGenomeFile = true; _hasGenomeFile = true;
} }
private: void setIsSorted(bool val) { _isSortedInput = val; }
protected:
QuickString _filename; QuickString _filename;
BufferedStreamMgr *_bufStreamMgr; BufferedStreamMgr *_bufStreamMgr;
...@@ -158,8 +119,6 @@ private: ...@@ -158,8 +119,6 @@ private:
int _prevStart; int _prevStart;
int _prevChromId; int _prevChromId;
//members for handling merged records
// DualQueue<Record *, DualQueueAscending > _storedRecords;
bool _mustBeForward; bool _mustBeForward;
bool _mustBeReverse; bool _mustBeReverse;
...@@ -177,16 +136,6 @@ private: ...@@ -177,16 +136,6 @@ private:
void testInputSortOrder(Record *record); void testInputSortOrder(Record *record);
void assignChromId(Record *); void assignChromId(Record *);
void sortError(const Record *record, bool genomeFileError); void sortError(const Record *record, bool genomeFileError);
#ifdef false
void deleteAllMergedItemsButKey(RecordKeyList &recList);
void addToStorage(Record *record);
Record *tryToTakeFromStorage();
Record *tryToTakeFromStorage(bool strand);
#endif
}; };
......
...@@ -21,8 +21,8 @@ SUBDIRS = ./FileReaders \ ...@@ -21,8 +21,8 @@ SUBDIRS = ./FileReaders \
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= FileRecordMgr.cpp FileRecordMgr.h SOURCES= FileRecordMgr.cpp FileRecordMgr.h FileRecordMergeMgr.cpp FileRecordMergeMgr.h
OBJECTS= FileRecordMgr.o RecordOutputMgr.o OBJECTS= FileRecordMgr.o FileRecordMergeMgr.o
_EXT_OBJECTS=SingleLineDelimTextFileReader.o BamFileReader.o Bed3Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \ _EXT_OBJECTS=SingleLineDelimTextFileReader.o BamFileReader.o Bed3Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \
SingleLineDelimTransferBuffer.o FileRecordTypeChecker.o QuickString.o ParseTools.o RecordKeyList.o BufferedStreamMgr.o SingleLineDelimTransferBuffer.o FileRecordTypeChecker.o QuickString.o ParseTools.o RecordKeyList.o BufferedStreamMgr.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
...@@ -31,6 +31,8 @@ BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) ...@@ -31,6 +31,8 @@ BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
$(BUILT_OBJECTS): $(SOURCES) $(SUBDIRS) $(BUILT_OBJECTS): $(SOURCES) $(SUBDIRS)
@echo " * compiling FileRecordMgr.cpp" @echo " * compiling FileRecordMgr.cpp"
@$(CXX) -c -o $(OBJ_DIR)/FileRecordMgr.o FileRecordMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) @$(CXX) -c -o $(OBJ_DIR)/FileRecordMgr.o FileRecordMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
@echo " * compiling FileRecordMergeMgr.cpp"
@$(CXX) -c -o $(OBJ_DIR)/FileRecordMergeMgr.o FileRecordMergeMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
...@@ -42,10 +44,8 @@ $(SUBDIRS): $(OBJ_DIR) ...@@ -42,10 +44,8 @@ $(SUBDIRS): $(OBJ_DIR)
clean: clean:
@echo "Cleaning up." @echo "Cleaning up."
@rm -f $(OBJ_DIR)/FileRecordMgr.o @rm -f $(OBJ_DIR)/FileRecordMgr.o
@rm -f $(OBJ_DIR)/RecordMgr.o
@rm -f $(OBJ_DIR)/FileRecordTypeChecker.o @rm -f $(OBJ_DIR)/FileRecordTypeChecker.o
@rm -f $(OBJ_DIR)/SingleLineDelimTextFileReader.o @rm -f $(OBJ_DIR)/FileRecordMergeMgr.o
@rm -f $(OBJ_DIR)/SingleLineDelimTransferBuffer.o
.PHONY: clean .PHONY: clean
\ No newline at end of file
...@@ -21,9 +21,9 @@ SOURCES= RecordMgr.cpp RecordMgr.h Record.h Record.cpp Bed3Interval.h Bed3Interv ...@@ -21,9 +21,9 @@ SOURCES= RecordMgr.cpp RecordMgr.h Record.h Record.cpp Bed3Interval.h Bed3Interv
Bed4Interval.h Bed4Interval.cpp BedGraphInterval.h BedGraphInterval.cpp Bed5Interval.h Bed5Interval.cpp \ Bed4Interval.h Bed4Interval.cpp BedGraphInterval.h BedGraphInterval.cpp Bed5Interval.h Bed5Interval.cpp \
Bed6Interval.h Bed6Interval.cpp \ Bed6Interval.h Bed6Interval.cpp \
BedPlusInterval.h BedPlusInterval.cpp Bed12Interval.h Bed12Interval.cpp BamRecord.h BamRecord.cpp VcfRecord.h VcfRecord.cpp \ BedPlusInterval.h BedPlusInterval.cpp Bed12Interval.h Bed12Interval.cpp BamRecord.h BamRecord.cpp VcfRecord.h VcfRecord.cpp \
GffRecord.h GffRecord.cpp RecordKeyList.h RecordKeyList.cpp BlockMgr.h BlockMgr.cpp GffRecord.h GffRecord.cpp RecordKeyList.h RecordKeyList.cpp BlockMgr.h BlockMgr.cpp StrandQueue.h StrandQueue.cpp
OBJECTS= RecordMgr.o Record.o Bed3Interval.o Bed4Interval.o BedGraphInterval.o Bed5Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \ OBJECTS= RecordMgr.o Record.o Bed3Interval.o Bed4Interval.o BedGraphInterval.o Bed5Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \
VcfRecord.o GffRecord.o RecordKeyList.o BlockMgr.o VcfRecord.o GffRecord.o RecordKeyList.o BlockMgr.o StrandQueue.o
_EXT_OBJECTS=ParseTools.o QuickString.o ChromIdLookup.o _EXT_OBJECTS=ParseTools.o QuickString.o ChromIdLookup.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
...@@ -40,6 +40,6 @@ clean: ...@@ -40,6 +40,6 @@ clean:
@echo "Cleaning up." @echo "Cleaning up."
@rm -f $(OBJ_DIR)/RecordMgr.o $(OBJ_DIR)/Record.o $(OBJ_DIR)/Bed3Interval.o $(OBJ_DIR)/Bed4Interval.o \ @rm -f $(OBJ_DIR)/RecordMgr.o $(OBJ_DIR)/Record.o $(OBJ_DIR)/Bed3Interval.o $(OBJ_DIR)/Bed4Interval.o \
$(OBJ_DIR)/BedGraphInterval.o $(OBJ_DIR)/Bed5Interval.o $(OBJ_DIR)/Bed6Interval.o \ $(OBJ_DIR)/BedGraphInterval.o $(OBJ_DIR)/Bed5Interval.o $(OBJ_DIR)/Bed6Interval.o \
$(OBJ_DIR)/BedPlusInterval.o $(OBJ_DIR)/Bed12Interval.o $(OBJ_DIR)/BamRecord.o $(OBJ_DIR)/VcfRecord.o $(OBJ_DIR)/GffRecord.o $(OBJ_DIR)/BlockMgr.o $(OBJ_DIR)/BedPlusInterval.o $(OBJ_DIR)/Bed12Interval.o $(OBJ_DIR)/BamRecord.o $(OBJ_DIR)/VcfRecord.o $(OBJ_DIR)/GffRecord.o $(OBJ_DIR)/BlockMgr.o $(OBJ_DIR)/StrandQueue.o
.PHONY: clean .PHONY: clean
\ No newline at end of file
...@@ -152,6 +152,9 @@ protected: ...@@ -152,6 +152,9 @@ protected:
bool _isMateUnmapped; bool _isMateUnmapped;
}; };
class RecordPtrSortFunctor {
public:
bool operator()(const Record *rec1, const Record *rec2) const { return *rec1 > *rec2; }
};
#endif /* RECORD_H_ */ #endif /* RECORD_H_ */
/*
* StrandQueue.cpp
*
* Created on: Mar 31, 2014
* Author: nek3d
*/
#include "StrandQueue.h"
StrandQueue::StrandQueue() {
for (int i=0; i < NUM_QUEUES; i++) {
queueType *queue = new queueType();
_queues.push_back(queue);
}
_strandIdxs.resize(3);
_strandIdxs[0] = Record::FORWARD;
_strandIdxs[1] = Record::REVERSE;
_strandIdxs[2] = Record::UNKNOWN;
}
StrandQueue::~StrandQueue() {
for (int i=0; i < NUM_QUEUES; i++) {
delete _queues[i];
}
}
Record *StrandQueue::top() const
{
int minIdx = getMinIdx();
if (minIdx == -1) return NULL;
return const_cast<Record *>(_queues[minIdx]->top());
}
void StrandQueue::pop() {
int minIdx = getMinIdx();
if (minIdx == -1) return;
_queues[minIdx]->pop();
}
Record * StrandQueue::top(Record::strandType strand) const {
const Record *record = NULL;
switch (strand) {
case Record::FORWARD:
if (_queues[0]->empty()) return NULL;
record = _queues[0]->top();
break;
case Record::REVERSE:
if (_queues[1]->empty()) return NULL;
record = _queues[1]->top();
break;
case Record::UNKNOWN:
if (_queues[0]->empty()) return NULL;
record = _queues[2]->top();
break;
default:
break;
}
return const_cast<Record *>(record);
}
void StrandQueue::pop(Record::strandType strand) const {
switch (strand) {
case Record::FORWARD:
if (_queues[0]->empty()) return;
_queues[0]->pop();
break;
case Record::REVERSE:
if (_queues[1]->empty()) return;
_queues[1]->pop();
break;
case Record::UNKNOWN:
if (_queues[2]->empty()) return;
_queues[2]->pop();
break;
default:
break;
}
}
void StrandQueue::push(Record *record) {
switch (record->getStrandVal()) {
case Record::FORWARD:
_queues[0]->push(record);
break;
case Record::REVERSE:
_queues[1]->push(record);
break;
case Record::UNKNOWN:
_queues[2]->push(record);
break;
default:
break;
}
}
size_t StrandQueue::size() const {
size_t sumSize = 0;
for (int i = 0; i < NUM_QUEUES; i++) {
sumSize += _queues[i]->size();
}
return sumSize;
}
bool StrandQueue::empty() const {
for (int i = 0; i < NUM_QUEUES; i++) {
if (!_queues[i]->empty()) {
return false;
}
}
return true;
}
int StrandQueue::getMinIdx() const {
if (empty()) return -1;
const Record *minRec = NULL;
int minIdx = -1;
for (int i = 0; i < NUM_QUEUES; i++) {
if (_queues[i]->empty()) continue;
const Record *currTop = _queues[i]->top();
if (currTop == NULL) continue;
if (minRec == NULL || *currTop < *minRec) {
minRec = currTop;
minIdx = i;
}
}
return minIdx;
}
/*
* StrandQueue.h
*
* Created on: Jan 29, 2013
* Author: nek3d
*/
#ifndef STRANDQUEUE_H_
#define STRANDQUEUE_H_
using namespace std;
#include <vector>
#include <queue>
#include <cstdio>
#include <cstdlib>
#include "Record.h"
class StrandQueue {
public:
StrandQueue();
~StrandQueue();
Record * top() const;
void pop();
Record * top(Record::strandType strand) const;
void pop(Record::strandType strand) const;
void push(Record *record);
size_t size() const;
bool empty() const;
private:
// static RecordPtrSortFunctor _recSortFunctor;
typedef priority_queue<Record *, vector<const Record *>, RecordPtrSortFunctor > queueType;
vector<queueType *> _queues;
static const int NUM_QUEUES = 3;
//we want to be able to iterate over the enumerated strand types in Record.h,
//which are FORWARD, REVERSE, and UNKNOWN. However, iterating over an enum is hard to
//do, so we'll use a suggestion found in a forum, and put the enum values into a vector.
vector<Record::strandType> _strandIdxs;
int getMinIdx() const; //will return the idx of queue with the current min val.
};
#endif // STRANDQUEUE_H_
File deleted
...@@ -18,10 +18,24 @@ public: ...@@ -18,10 +18,24 @@ public:
KeyListOps(); KeyListOps();
void setColumns(const QuickString &columns) { _columns = columns; } void setColumns(const QuickString &columns) { _columns = columns; }
void addColumns(const QuickString &newCols) {
if (!_columns.empty()) _columns += ",";
_columns += newCols;
}
void setOperations(const QuickString & operation) { _operations = operation; } void setOperations(const QuickString & operation) { _operations = operation; }
void addOperations(const QuickString &newOps) {
if (!_operations.empty()) _operations += ",";
_operations += newOps;
}
void setNullValue(const QuickString & nullValue) { _methods.setNullValue(nullValue); } void setNullValue(const QuickString & nullValue) { _methods.setNullValue(nullValue); }
void setDelimStr(const QuickString & delimStr) { _methods.setDelimStr(delimStr); } void setDelimStr(const QuickString & delimStr) { _methods.setDelimStr(delimStr); }
const QuickString &getColumns() { return _columns; }
const QuickString &getOperations() { return _operations; }
const QuickString &getNullValue() { return _methods.getNullValue(); }
const QuickString &getDelimStr() { return _methods.getDelimStr(); }
void setKeyList(RecordKeyList *keyList) { _methods.setKeyList(keyList); } void setKeyList(RecordKeyList *keyList) { _methods.setKeyList(keyList); }
typedef enum { SUM, MEAN, STDDEV, SAMPLE_STDDEV, MEDIAN, MODE, ANTIMODE, MIN, MAX, ABSMIN, ABSMAX, COUNT, DISTINCT, COUNT_DISTINCT, typedef enum { SUM, MEAN, STDDEV, SAMPLE_STDDEV, MEDIAN, MODE, ANTIMODE, MIN, MAX, ABSMIN, ABSMAX, COUNT, DISTINCT, COUNT_DISTINCT,
......
...@@ -185,7 +185,7 @@ bool NewChromSweep::next(RecordKeyList &next) { ...@@ -185,7 +185,7 @@ bool NewChromSweep::next(RecordKeyList &next) {
void NewChromSweep::nextRecord(bool query) { void NewChromSweep::nextRecord(bool query) {
if (query) { if (query) {
// if (!_context->getUseMergedIntervals()) { // if (!_context->getUseMergedIntervals()) {
_currQueryRec = _queryFRM->allocateAndGetNextRecord(); _currQueryRec = _queryFRM->getNextRecord();
// } else { // } else {
// _currQueryRec = _queryFRM->allocateAndGetNextMergedRecord(_context->getSameStrand() ? FileRecordMgr::SAME_STRAND_EITHER : FileRecordMgr::ANY_STRAND); // _currQueryRec = _queryFRM->allocateAndGetNextMergedRecord(_context->getSameStrand() ? FileRecordMgr::SAME_STRAND_EITHER : FileRecordMgr::ANY_STRAND);
// } // }
...@@ -194,7 +194,7 @@ void NewChromSweep::nextRecord(bool query) { ...@@ -194,7 +194,7 @@ void NewChromSweep::nextRecord(bool query) {
} }
} else { //database } else { //database
// if (!_context->getUseMergedIntervals()) { // if (!_context->getUseMergedIntervals()) {
_currDatabaseRec = _databaseFRM->allocateAndGetNextRecord(); _currDatabaseRec = _databaseFRM->getNextRecord();
// } else { // } else {
// _currDatabaseRec = _databaseFRM->allocateAndGetNextMergedRecord(_context->getSameStrand() ? FileRecordMgr::SAME_STRAND_EITHER : FileRecordMgr::ANY_STRAND); // _currDatabaseRec = _databaseFRM->allocateAndGetNextMergedRecord(_context->getSameStrand() ? FileRecordMgr::SAME_STRAND_EITHER : FileRecordMgr::ANY_STRAND);
// } // }
......
...@@ -76,9 +76,15 @@ bool RecordOutputMgr::printKeyAndTerminate(RecordKeyList &keyList) { ...@@ -76,9 +76,15 @@ bool RecordOutputMgr::printKeyAndTerminate(RecordKeyList &keyList) {
if (bamCode == BAM_AS_BAM) { if (bamCode == BAM_AS_BAM) {
return true; return true;
} else if (bamCode == NOT_BAM) { } else if (bamCode == NOT_BAM) {
keyList.getKey()->print(_outBuf); if (_context->getProgram() == ContextBase::MERGE) {
//when printing merged records, we want to force the printing into
//bed3 format, which is surprisingly difficult to do. Had to use the following:
const Bed3Interval *bed3 = static_cast<const Bed3Interval *>(keyList.getKey());
bed3->Bed3Interval::print(_outBuf);
} else {
keyList.getKey()->print(_outBuf);
}
return false; return false;
} }
//otherwise, it was BAM_AS_BED, and the key was printed. //otherwise, it was BAM_AS_BED, and the key was printed.
return false; return false;
...@@ -114,6 +120,7 @@ void RecordOutputMgr::printRecord(const Record *record) ...@@ -114,6 +120,7 @@ void RecordOutputMgr::printRecord(const Record *record)
void RecordOutputMgr::printRecord(const Record *record, const QuickString & value) void RecordOutputMgr::printRecord(const Record *record, const QuickString & value)
{ {
_afterVal = value;
printRecord(record); printRecord(record);
_outBuf.append(value); _outBuf.append(value);
newline(); newline();
...@@ -206,6 +213,17 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi ...@@ -206,6 +213,17 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
} }
_currBamBlockList = NULL; _currBamBlockList = NULL;
return; return;
} else if (_context->getProgram() == ContextBase::MERGE) {
if (!printKeyAndTerminate(keyList)) {
if (_context->getDesiredStrand() != FileRecordMergeMgr::ANY_STRAND) {
//add the sign of the record
tab();
_outBuf.append(keyList.getKey()->getStrand());
}
if (!_afterVal.empty()) tab();
}
_currBamBlockList = NULL;
return;
} }
} }
......
...@@ -44,6 +44,7 @@ private: ...@@ -44,6 +44,7 @@ private:
// //
BlockMgr *_bamBlockMgr; BlockMgr *_bamBlockMgr;
const BlockMgr *_splitInfo; const BlockMgr *_splitInfo;
QuickString _afterVal; //to store values to be printed after record, such as column operations.
//some helper functions to neaten the code. //some helper functions to neaten the code.
void tab() { _outBuf.append('\t'); } void tab() { _outBuf.append('\t'); }
void newline() { _outBuf.append('\n'); } void newline() { _outBuf.append('\n'); }
......
/*
* DualQueue.h
*
* Created on: Jan 29, 2013
* Author: nek3d
*/
#ifdef false
#ifndef DUALQUEUE_H_
#define DUALQUEUE_H_
using namespace std;
#include <vector>
#include <queue>
#include <cstdio>
#include <cstdlib>
template <class T> class DualQueueAscending {
public:
bool operator() ( const T &item1, const T &item2) const {
printf("\n\nIn comparison method:\n item1=\n");
// item1->print();
printf("\nitem2=\n");
// item2->print();
printf("\n");
if( *(item1) < *(item2) ) {
printf("Item1 less than item2. Returning false.\n");
return false;
}
printf("Item1 not less than item2. Returning true.\n");
return true;
}
};
template <class T> class DualQueueDescending {
public:
bool operator() ( const T &item1, const T &item2) const {
if( *(item2) < *(item1) ) {
return false;
}
return true;
}
};
template <class T, template<class T> class CompareFunc> class DualQueue {
public:
DualQueue() {}
~DualQueue() {}
const T & top() const {
if (empty()) {
fprintf(stderr, "ERROR. Tried to top from empty dualQueue.\n");
exit(1);
}
if (emptyForward()) {
return topReverse();
}
if (emptyReverse()) {
return topForward();
}
return (topFowardHigherPriorityThanTopReverse() ? topForward() : topReverse());
}
void pop() {
if (empty()) {
fprintf(stderr, "ERROR. Tried to pop from empty dualQueue.\n");
exit(1);
}
if (emptyForward()) {
popReverse();
return;
}
if (emptyReverse()) {
popForward();
return;
}
topFowardHigherPriorityThanTopReverse() ? popForward() : popReverse();
}
void push(const T &item) { item->getStrand() ? pushForward(item) : pushReverse(item); }
size_t size() const { return sizeForward() + sizeReverse(); }
bool empty() const { return _forwardQueue.empty() && _reverseQueue.empty(); }
const T & topForward() const { return _forwardQueue.top(); }
void popForward() { _forwardQueue.pop(); }
void pushForward(const T &item) { _forwardQueue.push(item); }
size_t sizeForward() const { return _forwardQueue.size(); }
bool emptyForward() const { return _forwardQueue.empty(); }
const T & topReverse() const { return _reverseQueue.top(); }
void popReverse() { _reverseQueue.pop(); }
void pushReverse(const T &item) { _reverseQueue.push(item); }
size_t sizeReverse() const { return _reverseQueue.size(); }
bool emptyReverse() const { return _reverseQueue.empty(); }
private:
typedef priority_queue<T, vector<T>, CompareFunc<T> > queueType;
queueType _forwardQueue;
queueType _reverseQueue;
bool topFowardHigherPriorityThanTopReverse() const {
printf("\n\nIn priority method:\n TopForward=\n");
// topForward()->print();
printf("\nTopReverse=\n");
// topReverse()->print();
printf("\n");
if (CompareFunc<T>()(topForward(), topReverse())) {
printf("Forward higher priority than reverse.\n");
return true;
} else {
printf("Reverse higher priority than forward.\n");
return false;
}
}
};
#endif /* DUALQUEUE_H_ */
#endif
...@@ -19,6 +19,9 @@ int str2chrPos(const QuickString &str) { ...@@ -19,6 +19,9 @@ int str2chrPos(const QuickString &str) {
} }
int str2chrPos(const char *str, size_t ulen) { int str2chrPos(const char *str, size_t ulen) {
if (ulen == 0) {
ulen = strlen(str);
}
int len=(int)ulen; int len=(int)ulen;
if (len < 1 || len > 10) { if (len < 1 || len > 10) {
return INT_MIN; //can't do more than 9 digits and a minus sign return INT_MIN; //can't do more than 9 digits and a minus sign
......
...@@ -22,7 +22,7 @@ bool isNumeric(const QuickString &str); ...@@ -22,7 +22,7 @@ bool isNumeric(const QuickString &str);
//Empty strings, too long strings, or strings containing anything other than //Empty strings, too long strings, or strings containing anything other than
//digits (with the excpetion of a minus sign in the first position) //digits (with the excpetion of a minus sign in the first position)
//will result in error. Errors return INT_MIN. //will result in error. Errors return INT_MIN.
int str2chrPos(const char *str, size_t len); int str2chrPos(const char *str, size_t len = 0);
int str2chrPos(const QuickString &str); int str2chrPos(const QuickString &str);
......
...@@ -30,18 +30,22 @@ $BT merge -i a.bed > obs ...@@ -30,18 +30,22 @@ $BT merge -i a.bed > obs
check obs exp check obs exp
rm obs exp rm obs exp
###########################################################
#
# NOTE: Testing for sorted input is now deprecated, as the
# FileRecordMgr is already testing for that.
#
########################################################### ###########################################################
# Test #2 # Test #2
# Enforce coordinate sorted input. # Enforce coordinate sorted input.
########################################################### ###########################################################
echo " merge.t2...\c" #echo " merge.t2...\c"
command -v tac 2>/dev/null || alias tac="sed '1!G;h;\$!d'" #command -v tac 2>/dev/null || alias tac="sed '1!G;h;\$!d'"
tac a.bed | $BT merge -i - 2> obs #tac a.bed | $BT merge -i - 2> obs
echo "ERROR: input file: (-) is not sorted by chrom then start. #echo "ERROR: input file: (-) is not sorted by chrom then start.
The start coordinate at line 3 is less than the start at line 2" > exp # The start coordinate at line 3 is less than the start at line 2" > exp
check obs exp #check obs exp
rm obs exp #rm obs exp
########################################################### ###########################################################
...@@ -64,11 +68,9 @@ rm obs exp ...@@ -64,11 +68,9 @@ rm obs exp
########################################################### ###########################################################
echo " merge.t4...\c" echo " merge.t4...\c"
echo \ echo \
"chr1 10 20 "*****
***** ***** ERROR: Requested column 4, but database file a.bed only has fields 1 - 3." > exp
*****ERROR: No names found to report for the -names option. Exiting. $BT merge -i a.bed -nms 2>&1 > /dev/null | head -3 | tail -2 > obs
*****" > exp
$BT merge -i a.bed -nms > obs 2>&1
check obs exp check obs exp
rm obs exp rm obs exp
...@@ -130,7 +132,7 @@ chr1 30 100 a2,a3,a4 9 3 ...@@ -130,7 +132,7 @@ chr1 30 100 a2,a3,a4 9 3
chr2 10 20 a1 5 1 chr2 10 20 a1 5 1
chr2 30 40 a2 6 1 chr2 30 40 a2 6 1
chr2 42 100 a3,a4 15 2" > exp chr2 42 100 a3,a4 15 2" > exp
$BT merge -i a.full.bed -nms -n -scores sum> obs $BT merge -i a.full.bed -nms -scores sum -n> obs
check obs exp check obs exp
rm obs exp rm obs exp
...@@ -139,15 +141,15 @@ rm obs exp ...@@ -139,15 +141,15 @@ rm obs exp
########################################################### ###########################################################
echo " merge.t9...\c" echo " merge.t9...\c"
echo \ echo \
"chr1 10 20 a1 1 + 1 "chr1 10 20 + a1 1 1
chr1 30 40 a2 2 + 1 chr1 30 40 + a2 2 1
chr1 45 100 a4 4 + 1 chr1 40 50 - a3 3 1
chr1 40 50 a3 3 - 1 chr1 45 100 + a4 4 1
chr2 10 20 a1 5 + 1 chr2 10 20 + a1 5 1
chr2 30 40 a2 6 + 1 chr2 30 40 + a2 6 1
chr2 42 50 a3 7 + 1 chr2 42 50 + a3 7 1
chr2 45 100 a4 8 - 1" > exp chr2 45 100 - a4 8 1" > exp
$BT merge -i a.full.bed -s -nms -n -scores sum> obs $BT merge -i a.full.bed -s -nms -scores sum -n> obs
check obs exp check obs exp
rm obs exp rm obs exp
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment