Merge converted to PFM, first check-in

f36d6db4 · nkindlon · ed71c8e0 · f36d6db4 · f36d6db4 · f36d6db4
Commit f36d6db4 authored 10 years ago by nkindlon
--- a/src/utils/FileRecordTools/FileRecordMgr.cpp
+++ b/src/utils/FileRecordTools/FileRecordMgr.cpp
@@ -4,7 +4,7 @@
 #include "Record.h"
 #include "NewGenomeFile.h"
-FileRecordMgr::FileRecordMgr(const QuickString &filename, bool isSorted)
+FileRecordMgr::FileRecordMgr(const QuickString &filename)
 :
  _filename(filename),
  _bufStreamMgr(NULL),
@@ -12,7 +12,7 @@ FileRecordMgr::FileRecordMgr(const QuickString &filename, bool isSorted)
  _fileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE),
  _recordType(FileRecordTypeChecker::UNKNOWN_RECORD_TYPE),
  _recordMgr(NULL),
-  _isSortedInput(isSorted),
+  _isSortedInput(false),
  _freeListBlockSize(512),
  _useFullBamTags(false),
  _prevStart(INT_MAX),
@@ -88,7 +88,7 @@ bool FileRecordMgr::eof(){
 	return _fileReader->eof();
 }
-Record *FileRecordMgr::allocateAndGetNextRecord()
+Record *FileRecordMgr::getNextRecord(RecordKeyList *keyList)
 {
 	if (!_fileReader->isOpen()) {
 		return NULL;
@@ -120,6 +120,9 @@ Record *FileRecordMgr::allocateAndGetNextRecord()
 	}
 	assignChromId(record);
 	_totalRecordLength += (unsigned long)(record->getEndPos() - record->getStartPos());
+	if (keyList != NULL) {
+		keyList->setKey(record);
+	}
 	return record;
 }
@@ -198,6 +201,10 @@ void FileRecordMgr::deleteRecord(const Record *record) {
 	_recordMgr->deleteRecord(record);
 }
+void FileRecordMgr::deleteRecord(RecordKeyList *keyList) {
+	_recordMgr->deleteRecord(keyList->getKey());
+}
 void FileRecordMgr::allocateFileReader()
 {
 	switch (_fileType) {
@@ -224,175 +231,3 @@ const BamTools::RefVector & FileRecordMgr::getBamReferences() {
 	}
 	return static_cast<BamFileReader *>(_fileReader)->getReferences();
 }
-#ifdef false
-Record *FileRecordMgr::allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand, int maxDistance) {
-	RecordKeyList recList;
-	if (!allocateAndGetNextMergedRecord(recList, desiredStrand, maxDistance)) {
-		return NULL;
-	}
-	deleteAllMergedItemsButKey(recList);
-	return const_cast<Record *>(recList.getKey()); //want key to be non-const
-}
-bool FileRecordMgr::allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand, int maxDistance)
-{
-	if (!recList.allClear()) {
-		deleteMergedRecord(recList);
-	}
-	_mustBeForward = desiredStrand == SAME_STRAND_FORWARD;
-	_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE;
-	Record *startRecord = tryToTakeFromStorage();
-	// if we couldn't use a previously stored record for starters,
-	//then begin with a new one that matches strand criteria.
-	while (startRecord == NULL) {
-		startRecord = allocateAndGetNextRecord();
-		if (startRecord == NULL) { //hit EOF!!
-			return false;
-		}
-		if (_mustBeForward && !startRecord->getStrand()) {
-			//record is reverse, wanted forward.
-			addToStorage(startRecord);
-			startRecord = NULL;
-		} else if (_mustBeReverse && startRecord->getStrand()) {
-			//record is forward, wanted reverse
-			addToStorage(startRecord);
-			startRecord = NULL;
-		}
-	}
-	// OK!! We have a start record!
-	_mustBeForward = desiredStrand == SAME_STRAND_FORWARD || (desiredStrand == SAME_STRAND_EITHER && startRecord->getStrand());
-	_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE || (desiredStrand == SAME_STRAND_EITHER && !startRecord->getStrand());
-	const QuickString &currChrom = startRecord->getChrName();
-	_foundChroms.insert(currChrom);
-	bool madeComposite = false;
-	recList.push_back(startRecord);
-	recList.setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more.
-	bool currStrand = startRecord->getStrand();
-	bool mustMatchStrand = desiredStrand != ANY_STRAND;
-	int currEnd = startRecord->getEndPos();
-	//now look for more records to merge with this one.
-	//stop when they're out of range, not on the same chromosome, or we hit EOF.
-	//ignore if they don't comply with strand.
-	Record *nextRecord = NULL;
-	while (nextRecord == NULL) {
-		bool takenFromStorage = false;
-		nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage();
-		if (nextRecord == NULL) {
-			nextRecord = allocateAndGetNextRecord();
-		} else {
-			takenFromStorage = true;
-		}
-		if (nextRecord == NULL) { // EOF hit
-			break;
-		}
-		const QuickString &newChrom = nextRecord->getChrName();
-		if (newChrom != currChrom) { //hit a different chromosome.
-			if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) {
-				//haven't seen this chromosome before.
-				addToStorage(nextRecord);
-				break;
-			} else {
-				//different strand, but we've already seen this chrom. File is not sorted.
-				fprintf(stderr, "ERROR: Input file %s is not sorted by chromosome, startPos.\n", _context->getInputFileName(_contextFileIdx).c_str());
-				deleteRecord(nextRecord);
-				deleteMergedRecord(recList);
-				exit(1);
-			}
-		}
-		int nextStart = nextRecord->getStartPos();
-		//is the record out of range?
-		if (nextStart > currEnd + maxDistance) {
-			//yes, it's out of range.
-			addToStorage(nextRecord);
-			break;
-		}
-		//ok, they're on the same chrom and in range. Are we happy with the strand?
-		if (mustMatchStrand && nextRecord->getStrand() != currStrand) {
-			//no, we're not.
-			addToStorage(nextRecord);
-			nextRecord = NULL;
-			continue;
-		}
-		//everything's good! do a merge.
-		recList.push_back(nextRecord);
-		madeComposite = true;
-		int nextEnd = nextRecord->getEndPos();
-		if (nextEnd > currEnd) {
-			currEnd = nextEnd;
-		}
-		nextRecord = NULL;
-	}
-	if (madeComposite) {
-		Record *newKey = _recordMgr->allocateRecord();
-		(*newKey) = (*startRecord);
-		newKey->setEndPos(currEnd);
-		recList.setKey(newKey);
-	}
-	_totalMergedRecordLength += (unsigned long)(recList.getKey()->getEndPos() - recList.getKey()->getStartPos());
-	return true;
-}
-void FileRecordMgr::addToStorage(Record *record) {
-	_storedRecords.push(record);
-}
-Record *FileRecordMgr::tryToTakeFromStorage() {
-	Record *record = _storedRecords.empty() ? NULL : const_cast<Record *>(_storedRecords.top());
-	if (record != NULL) {
-		_storedRecords.pop();
-	}
-	return record;
-}
-Record *FileRecordMgr::tryToTakeFromStorage(bool strand) {
-	Record *record = NULL;
-	if(strand) {
-		if (_storedRecords.emptyForward()) {
-			return NULL;
-		} else {
-			record = const_cast<Record *>(_storedRecords.topForward());
-			_storedRecords.popForward();
-			return record;
-		}
-	} else {
-		if (_storedRecords.emptyReverse()) {
-			return NULL;
-		} else {
-			record = const_cast<Record *>(_storedRecords.topReverse());
-			_storedRecords.popReverse();
-			return record;
-		}
-	}
-}
-void FileRecordMgr::deleteMergedRecord(RecordKeyList &recList)
-{
-	deleteAllMergedItemsButKey(recList);
-	deleteRecord(recList.getKey());
-	recList.setKey(NULL);
-}
-void FileRecordMgr::deleteAllMergedItemsButKey(RecordKeyList &recList) {
-	//if the key is also in the list, this method won't delete it.
-	for (RecordKeyList::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
-		if (iter->value() == recList.getKey()) {
-			continue;
-		}
-		deleteRecord(iter->value());
-	}
-	recList.clearList();
-}
-#endif
--- a/src/utils/FileRecordTools/FileRecordMgr.h
+++ b/src/utils/FileRecordTools/FileRecordMgr.h
@@ -32,12 +32,20 @@ class NewGenomeFile;
 class FileRecordMgr {
 public:
-	FileRecordMgr(const QuickString & filename, bool isSorted = false);
+	FileRecordMgr(const QuickString & filename);
-	~FileRecordMgr();
+	virtual ~FileRecordMgr();
 	bool open();
 	void close();
 	bool eof();
+	//This is an all-in-one method to give the user a new record that is initialized with
+	//the next entry in the data file.
+	//NOTE!! User MUST pass back the returned pointer to deleteRecord method for cleanup!
+	//Also Note! User must check for NULL returned, meaning we failed to get the next record.
+	virtual Record *getNextRecord(RecordKeyList *keyList = NULL);
+	void deleteRecord(const Record *);
+	virtual void deleteRecord(RecordKeyList *keyList);
 	const QuickString &getFileName() const { return _filename;}
 	bool hasHeader() const { return _fileReader->hasHeader(); }
 	const QuickString &getHeader() const { return _fileReader->getHeader(); }
@@ -69,55 +77,6 @@ public:
 	const BamTools::RefVector &getBamReferences();
 	int getNumFields() const { return _fileReader->getNumFields(); }
-	//This is an all-in-one method to give the user a new record that is initialized with
-	//the next entry in the data file.
-	//NOTE!! User MUST pass back the returned pointer to deleteRecord method for cleanup!
-	//Also Note! User must check for NULL returned, meaning we failed to get the next record.
-	Record *allocateAndGetNextRecord();
-	void deleteRecord(const Record *);
-#ifdef false
-	//////////////////////////////////////////////////////////////////////////////////
-	//
-	// 			MERGED RECORDS
-	//
-	//this will give a single "meta" record containing "flattened" or merged records.
-	//
-	// 1st ARG: Pass an empty RecordKeyList. When done, will have a pair: 1st is the final merged record,
-	//			second is list of constituent Records merged.
-	//			** NOTE ** If the RecordKeyList is not empty, this method will empty it for you and delete all contents!
-	//
-	// 2nd ARG: Choose from WANT_STRAND_TYPE, defined below below
-	//
-	// 3rd ARG: allows for nearby records, i.e. maxDistance 100 will merge records <= 100 bases apart. Default 0 means only
-	//			merge records that actually intersect.
-	//
-	// Return value: true if any records found. False if eof hit before records matching requested parameters found.
-	typedef enum { SAME_STRAND_FORWARD, //must all be forward strand
-			SAME_STRAND_REVERSE, //must all be reverse strand
-			SAME_STRAND_EITHER, //must be same strand, but can be either forward or reverse
-			ANY_STRAND } //do no care about strand (Default value)
-	WANT_STRAND_TYPE;
-	//
-	// WARNING!! Specifying a strand will keep all records on the other strand in memory!!
-	// This is done so that requests for records on that other strand can still be met.
-	// For now, use this method at any time to purge the kept records from memory, such as
-	// when changing chromosomes, for example.
-	void purgeKeepList();
-	bool allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand = ANY_STRAND, int maxDistance = 0);
-	void deleteMergedRecord(RecordKeyList &recList); // MUST use this method for cleanup!
-	//this method will allocate a new record of merged records, but the returned record should only be passed to the deleteRecord method
-	//for cleanup, not to the delete mmerged record.
-	Record *allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand = ANY_STRAND, int maxDistance = 0);
-	//
-	// 				END MERGED RECORDS
-	//
-	//////////////////////////////////////////////////////////////////////////////////
-#endif
 	//File statistics
 	unsigned long getTotalRecordLength() const { return _totalRecordLength; } //sum of length of all returned records
@@ -140,7 +99,9 @@ public:
 		_hasGenomeFile = true;
 	}
-private:
+	void setIsSorted(bool val) { _isSortedInput = val; }
+protected:
 	QuickString _filename;
 	BufferedStreamMgr *_bufStreamMgr;
@@ -158,8 +119,6 @@ private:
 	int _prevStart;
 	int _prevChromId;
-	//members for handling merged records
-//	DualQueue<Record *, DualQueueAscending > _storedRecords;
 	bool _mustBeForward;
 	bool _mustBeReverse;
@@ -177,16 +136,6 @@ private:
 	void testInputSortOrder(Record *record);
 	void assignChromId(Record *);
 	void sortError(const Record *record, bool genomeFileError);
-#ifdef false
-	void deleteAllMergedItemsButKey(RecordKeyList &recList);
-	void addToStorage(Record *record);
-	Record *tryToTakeFromStorage();
-	Record *tryToTakeFromStorage(bool strand);
-#endif
 };

--- a/src/utils/FileRecordTools/Makefile
+++ b/src/utils/FileRecordTools/Makefile
@@ -21,8 +21,8 @@ SUBDIRS = ./FileReaders \
 # ----------------------------------
 # define our source and object files
 # ----------------------------------
-SOURCES= FileRecordMgr.cpp FileRecordMgr.h
+SOURCES= FileRecordMgr.cpp FileRecordMgr.h FileRecordMergeMgr.cpp FileRecordMergeMgr.h 
-OBJECTS= FileRecordMgr.o RecordOutputMgr.o
+OBJECTS= FileRecordMgr.o FileRecordMergeMgr.o
 _EXT_OBJECTS=SingleLineDelimTextFileReader.o BamFileReader.o Bed3Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \
 	SingleLineDelimTransferBuffer.o FileRecordTypeChecker.o QuickString.o ParseTools.o RecordKeyList.o BufferedStreamMgr.o
 EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
@@ -31,6 +31,8 @@ BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
 $(BUILT_OBJECTS): $(SOURCES) $(SUBDIRS)
 	@echo "  * compiling FileRecordMgr.cpp"
 	@$(CXX) -c -o $(OBJ_DIR)/FileRecordMgr.o FileRecordMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+	@echo "  * compiling FileRecordMergeMgr.cpp"
+	@$(CXX) -c -o $(OBJ_DIR)/FileRecordMergeMgr.o FileRecordMergeMgr.cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
@@ -42,10 +44,8 @@ $(SUBDIRS): $(OBJ_DIR)
 clean:
 	@echo "Cleaning up."
 	@rm -f $(OBJ_DIR)/FileRecordMgr.o
-	@rm -f $(OBJ_DIR)/RecordMgr.o
 	@rm -f $(OBJ_DIR)/FileRecordTypeChecker.o
-	@rm -f $(OBJ_DIR)/SingleLineDelimTextFileReader.o
+	@rm -f $(OBJ_DIR)/FileRecordMergeMgr.o
-	@rm -f $(OBJ_DIR)/SingleLineDelimTransferBuffer.o
 .PHONY: clean
\ No newline at end of file
--- a/src/utils/FileRecordTools/Records/Makefile
+++ b/src/utils/FileRecordTools/Records/Makefile
@@ -21,9 +21,9 @@ SOURCES= RecordMgr.cpp RecordMgr.h Record.h Record.cpp Bed3Interval.h Bed3Interv
 	Bed4Interval.h Bed4Interval.cpp BedGraphInterval.h BedGraphInterval.cpp Bed5Interval.h Bed5Interval.cpp \
 	Bed6Interval.h Bed6Interval.cpp \
 	BedPlusInterval.h BedPlusInterval.cpp Bed12Interval.h Bed12Interval.cpp BamRecord.h BamRecord.cpp VcfRecord.h VcfRecord.cpp \
-	GffRecord.h GffRecord.cpp RecordKeyList.h RecordKeyList.cpp BlockMgr.h BlockMgr.cpp
+	GffRecord.h GffRecord.cpp RecordKeyList.h RecordKeyList.cpp BlockMgr.h BlockMgr.cpp StrandQueue.h StrandQueue.cpp
 OBJECTS= RecordMgr.o Record.o Bed3Interval.o Bed4Interval.o BedGraphInterval.o Bed5Interval.o Bed6Interval.o BedPlusInterval.o Bed12Interval.o BamRecord.o \
-		VcfRecord.o GffRecord.o RecordKeyList.o BlockMgr.o
+		VcfRecord.o GffRecord.o RecordKeyList.o BlockMgr.o StrandQueue.o
 _EXT_OBJECTS=ParseTools.o QuickString.o ChromIdLookup.o
 EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
 BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
@@ -40,6 +40,6 @@ clean:
 	@echo "Cleaning up."
 	@rm -f $(OBJ_DIR)/RecordMgr.o $(OBJ_DIR)/Record.o $(OBJ_DIR)/Bed3Interval.o $(OBJ_DIR)/Bed4Interval.o \
 		$(OBJ_DIR)/BedGraphInterval.o $(OBJ_DIR)/Bed5Interval.o $(OBJ_DIR)/Bed6Interval.o \
-		$(OBJ_DIR)/BedPlusInterval.o $(OBJ_DIR)/Bed12Interval.o $(OBJ_DIR)/BamRecord.o $(OBJ_DIR)/VcfRecord.o $(OBJ_DIR)/GffRecord.o $(OBJ_DIR)/BlockMgr.o
+		$(OBJ_DIR)/BedPlusInterval.o $(OBJ_DIR)/Bed12Interval.o $(OBJ_DIR)/BamRecord.o $(OBJ_DIR)/VcfRecord.o $(OBJ_DIR)/GffRecord.o $(OBJ_DIR)/BlockMgr.o $(OBJ_DIR)/StrandQueue.o
 .PHONY: clean
\ No newline at end of file
--- a/src/utils/FileRecordTools/Records/Record.h
+++ b/src/utils/FileRecordTools/Records/Record.h
@@ -152,6 +152,9 @@ protected:
 	bool _isMateUnmapped;
 };
+class RecordPtrSortFunctor {
+public:
+	bool operator()(const Record *rec1, const Record *rec2) const { return *rec1 > *rec2; }
+};
 #endif /* RECORD_H_ */
--- a/src/utils/FileRecordTools/Records/StrandQueue.cpp
+++ b/src/utils/FileRecordTools/Records/StrandQueue.cpp
+/*
+ * StrandQueue.cpp
+ *
+ *  Created on: Mar 31, 2014
+ *      Author: nek3d
+ */
+#include "StrandQueue.h"
+StrandQueue::StrandQueue() {
+	for (int i=0; i < NUM_QUEUES; i++) {
+		queueType *queue = new queueType();
+		_queues.push_back(queue);
+	}
+	_strandIdxs.resize(3);
+	_strandIdxs[0] = Record::FORWARD;
+	_strandIdxs[1] = Record::REVERSE;
+	_strandIdxs[2] = Record::UNKNOWN;
+}
+StrandQueue::~StrandQueue() {
+	for (int i=0; i < NUM_QUEUES; i++) {
+		delete _queues[i];
+	}
+}
+Record *StrandQueue::top() const
+{
+	int minIdx = getMinIdx();
+	if (minIdx == -1) return NULL;
+	return const_cast<Record *>(_queues[minIdx]->top());
+}
+void StrandQueue::pop() {
+	int minIdx = getMinIdx();
+	if (minIdx == -1) return;
+	_queues[minIdx]->pop();
+}
+Record * StrandQueue::top(Record::strandType strand) const {
+	const Record *record = NULL;
+	switch (strand) {
+	case Record::FORWARD:
+		if (_queues[0]->empty()) return NULL;
+		record = _queues[0]->top();
+		break;
+	case Record::REVERSE:
+		if (_queues[1]->empty()) return NULL;
+		record = _queues[1]->top();
+		break;
+	case Record::UNKNOWN:
+		if (_queues[0]->empty()) return NULL;
+		record = _queues[2]->top();
+		break;
+	default:
+		break;
+	}
+	return const_cast<Record *>(record);
+}
+void StrandQueue::pop(Record::strandType strand) const {
+	switch (strand) {
+	case Record::FORWARD:
+		if (_queues[0]->empty()) return;
+		_queues[0]->pop();
+		break;
+	case Record::REVERSE:
+		if (_queues[1]->empty()) return;
+		_queues[1]->pop();
+		break;
+	case Record::UNKNOWN:
+		if (_queues[2]->empty()) return;
+		_queues[2]->pop();
+		break;
+	default:
+		break;
+	}
+}
+void StrandQueue::push(Record *record) {
+	switch (record->getStrandVal()) {
+	case Record::FORWARD:
+		_queues[0]->push(record);
+		break;
+	case Record::REVERSE:
+		_queues[1]->push(record);
+		break;
+	case Record::UNKNOWN:
+		_queues[2]->push(record);
+		break;
+	default:
+		break;
+	}
+}
+size_t StrandQueue::size() const {
+	size_t sumSize = 0;
+	for (int i = 0; i < NUM_QUEUES; i++) {
+		sumSize += _queues[i]->size();
+	}
+	return sumSize;
+}
+bool StrandQueue::empty() const {
+	for (int i = 0; i < NUM_QUEUES; i++) {
+		if (!_queues[i]->empty()) {
+			return false;
+		}
+	}
+	return true;
+}
+int StrandQueue::getMinIdx() const {
+	if (empty()) return -1;
+	const Record *minRec = NULL;
+	int minIdx = -1;
+	for (int i = 0; i < NUM_QUEUES; i++) {
+		if (_queues[i]->empty()) continue;
+		const Record *currTop = _queues[i]->top();
+		if (currTop == NULL) continue;
+		if (minRec == NULL || *currTop < *minRec) {
+			minRec = currTop;
+			minIdx = i;
+		}
+	}
+	return minIdx;
+}
--- a/src/utils/FileRecordTools/Records/StrandQueue.h
+++ b/src/utils/FileRecordTools/Records/StrandQueue.h
+/*
+ * StrandQueue.h
+ *
+ *  Created on: Jan 29, 2013
+ *      Author: nek3d
+ */
+#ifndef STRANDQUEUE_H_
+#define STRANDQUEUE_H_
+using namespace std;
+#include <vector>
+#include <queue>
+#include <cstdio>
+#include <cstdlib>
+#include "Record.h"
+class StrandQueue {
+public:
+	StrandQueue();
+	~StrandQueue();
+	Record * top() const;
+	void pop();
+	Record * top(Record::strandType strand) const;
+	void pop(Record::strandType strand) const;
+	void push(Record *record);
+	size_t size() const;
+	bool empty() const;
+private:
+//	static RecordPtrSortFunctor _recSortFunctor;
+	typedef priority_queue<Record *, vector<const Record *>, RecordPtrSortFunctor > queueType;
+	vector<queueType *> _queues;
+	static const int NUM_QUEUES = 3;
+	//we want to be able to iterate over the enumerated strand types in Record.h,
+	//which are FORWARD, REVERSE, and UNKNOWN. However, iterating over an enum is hard to
+	//do, so we'll use a suggestion found in a forum, and put the enum values into a vector.
+	vector<Record::strandType> _strandIdxs;
+	int getMinIdx() const; //will return the idx of queue with the current min val.
+};
+#endif // STRANDQUEUE_H_
--- a/src/utils/FileRecordTools/Records/recordsTar.tar.gz
+++ b/src/utils/FileRecordTools/Records/recordsTar.tar.gz
--- a/src/utils/KeyListOps/KeyListOps.h
+++ b/src/utils/KeyListOps/KeyListOps.h
@@ -18,10 +18,24 @@ public:
 	KeyListOps();
 	void setColumns(const QuickString &columns) { _columns = columns; }
+	void addColumns(const QuickString &newCols) {
+		if (!_columns.empty()) _columns += ",";
+		_columns += newCols;
+	}
 	void setOperations(const QuickString & operation) { _operations = operation; }
+	void addOperations(const QuickString &newOps) {
+		if (!_operations.empty()) _operations += ",";
+		_operations += newOps;
+	}
 	void setNullValue(const QuickString & nullValue) { _methods.setNullValue(nullValue); }
 	void setDelimStr(const QuickString & delimStr) { _methods.setDelimStr(delimStr); }
+	const QuickString &getColumns() { return _columns; }
+	const QuickString &getOperations() { return _operations; }
+	const QuickString &getNullValue() { return _methods.getNullValue(); }
+	const QuickString &getDelimStr() { return _methods.getDelimStr(); }
 	void setKeyList(RecordKeyList *keyList) { _methods.setKeyList(keyList); }
 	typedef enum { SUM, MEAN, STDDEV, SAMPLE_STDDEV, MEDIAN, MODE, ANTIMODE, MIN, MAX, ABSMIN, ABSMAX, COUNT, DISTINCT, COUNT_DISTINCT,

--- a/src/utils/NewChromsweep/NewChromsweep.cpp
+++ b/src/utils/NewChromsweep/NewChromsweep.cpp
@@ -185,7 +185,7 @@ bool NewChromSweep::next(RecordKeyList &next) {
 void NewChromSweep::nextRecord(bool query) {
 	if (query) {
 //		if (!_context->getUseMergedIntervals()) {
-			_currQueryRec = _queryFRM->allocateAndGetNextRecord();
+			_currQueryRec = _queryFRM->getNextRecord();
 //		} else {
 //			_currQueryRec = _queryFRM->allocateAndGetNextMergedRecord(_context->getSameStrand() ? FileRecordMgr::SAME_STRAND_EITHER : FileRecordMgr::ANY_STRAND);
 //		}
@@ -194,7 +194,7 @@ void NewChromSweep::nextRecord(bool query) {
 		}
 	} else { //database
 //		if (!_context->getUseMergedIntervals()) {
-			_currDatabaseRec = _databaseFRM->allocateAndGetNextRecord();
+			_currDatabaseRec = _databaseFRM->getNextRecord();
 //		} else {
 //			_currDatabaseRec = _databaseFRM->allocateAndGetNextMergedRecord(_context->getSameStrand() ? FileRecordMgr::SAME_STRAND_EITHER : FileRecordMgr::ANY_STRAND);
 //		}

--- a/src/utils/RecordOutputMgr/RecordOutputMgr.cpp
+++ b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp
@@ -76,9 +76,15 @@ bool RecordOutputMgr::printKeyAndTerminate(RecordKeyList &keyList) {
 	if (bamCode == BAM_AS_BAM) {
 		return true;
 	} else if (bamCode == NOT_BAM) {
-		keyList.getKey()->print(_outBuf);
+		if (_context->getProgram() == ContextBase::MERGE) {
+			//when printing merged records, we want to force the printing into
+			//bed3 format, which is surprisingly difficult to do. Had to use the following:
+			const Bed3Interval *bed3 = static_cast<const Bed3Interval *>(keyList.getKey());
+			bed3->Bed3Interval::print(_outBuf);
+		} else {
+			keyList.getKey()->print(_outBuf);
+		}
 		return false;
 	}
 	//otherwise, it was BAM_AS_BED, and the key was printed.
 	return false;
@@ -114,6 +120,7 @@ void RecordOutputMgr::printRecord(const Record *record)
 void RecordOutputMgr::printRecord(const Record *record, const QuickString & value)
 {	
+	_afterVal = value;
 	printRecord(record);
 	_outBuf.append(value);
 	newline();
@@ -206,6 +213,17 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
 		}
 		_currBamBlockList = NULL;
 		return;
+	} else if (_context->getProgram() == ContextBase::MERGE) {
+		if (!printKeyAndTerminate(keyList)) {
+			if (_context->getDesiredStrand() != FileRecordMergeMgr::ANY_STRAND) {
+				//add the sign of the record
+				tab();
+				_outBuf.append(keyList.getKey()->getStrand());
+			}
+			if (!_afterVal.empty()) tab();
+		}
+		_currBamBlockList = NULL;
+		return;
 	}
 }

--- a/src/utils/RecordOutputMgr/RecordOutputMgr.h
+++ b/src/utils/RecordOutputMgr/RecordOutputMgr.h
@@ -44,6 +44,7 @@ private:
 	//
 	BlockMgr *_bamBlockMgr;
 	const BlockMgr *_splitInfo;
+	QuickString _afterVal; //to store values to be printed after record, such as column operations.
 	//some helper functions to neaten the code.
 	void tab() { _outBuf.append('\t'); }
 	void newline() { _outBuf.append('\n'); }

--- a/src/utils/general/DualQueue.h
+++ b/src/utils/general/DualQueue.h
-/*
- * DualQueue.h
- *
- *  Created on: Jan 29, 2013
- *      Author: nek3d
- */
-#ifdef false
-#ifndef DUALQUEUE_H_
-#define DUALQUEUE_H_
-using namespace std;
-#include <vector>
-#include <queue>
-#include <cstdio>
-#include <cstdlib>
-template <class T> class DualQueueAscending {
-public:
-	bool operator() ( const T &item1, const T &item2) const {
-		printf("\n\nIn comparison method:\n item1=\n");
-//		item1->print();
-		printf("\nitem2=\n");
-//		item2->print();
-		printf("\n");
-		if( *(item1) < *(item2) ) {
-			printf("Item1 less than item2. Returning false.\n");
-			return false;
-		}
-		printf("Item1 not less than item2. Returning true.\n");
-		return true;
-	}
-};
-template <class T> class DualQueueDescending {
-public:
-	bool operator() ( const T &item1, const T &item2) const {
-		if( *(item2) < *(item1) ) {
-			return false;
-		}
-		return true;
-	}
-};
-template <class T, template<class T> class CompareFunc> class DualQueue {
-public:
-	DualQueue() {}
-	~DualQueue() {}
-	const T & top() const {
-		if (empty()) {
-			fprintf(stderr, "ERROR. Tried to top from empty dualQueue.\n");
-			exit(1);
-		}
-		if (emptyForward()) {
-			return topReverse();
-		}
-		if (emptyReverse()) {
-			return topForward();
-		}
-		return (topFowardHigherPriorityThanTopReverse() ? topForward() : topReverse());
-	}
-	void pop() {
-		if (empty()) {
-			fprintf(stderr, "ERROR. Tried to pop from empty dualQueue.\n");
-			exit(1);
-		}
-		if (emptyForward()) {
-			popReverse();
-			return;
-		}
-		if (emptyReverse()) {
-			popForward();
-			return;
-		}
-		topFowardHigherPriorityThanTopReverse() ? popForward() : popReverse();
-	}
-	void push(const T &item) { item->getStrand() ? pushForward(item) : pushReverse(item); }
-	size_t size() const { return sizeForward() + sizeReverse(); }
-	bool empty() const { return _forwardQueue.empty() && _reverseQueue.empty(); }
-	const T & topForward() const { return _forwardQueue.top(); }
-	void popForward() { _forwardQueue.pop(); }
-	void pushForward(const T &item) { _forwardQueue.push(item); }
-	size_t sizeForward() const { return _forwardQueue.size(); }
-	bool emptyForward() const { return _forwardQueue.empty(); }
-	const T & topReverse() const { return _reverseQueue.top(); }
-	void popReverse() { _reverseQueue.pop(); }
-	void pushReverse(const T &item) { _reverseQueue.push(item); }
-	size_t sizeReverse() const { return _reverseQueue.size(); }
-	bool emptyReverse() const { return _reverseQueue.empty(); }
-private:
-	typedef priority_queue<T, vector<T>, CompareFunc<T> > queueType;
-	queueType _forwardQueue;
-	queueType _reverseQueue;
-	bool topFowardHigherPriorityThanTopReverse() const {
-		printf("\n\nIn priority method:\n TopForward=\n");
-//		topForward()->print();
-		printf("\nTopReverse=\n");
-//		topReverse()->print();
-		printf("\n");
-		if (CompareFunc<T>()(topForward(), topReverse())) {
-			printf("Forward higher priority than reverse.\n");
-			return true;
-		} else {
-			printf("Reverse higher priority than forward.\n");
-			return false;
-		}
-	}
-};
-#endif /* DUALQUEUE_H_ */
-#endif
--- a/src/utils/general/ParseTools.cpp
+++ b/src/utils/general/ParseTools.cpp
@@ -19,6 +19,9 @@ int str2chrPos(const QuickString &str) {
 }
 int str2chrPos(const char *str, size_t ulen) {
+	if (ulen == 0) {
+		ulen = strlen(str);
+	}
 	int len=(int)ulen;
 	if (len < 1 || len > 10) {
 		return INT_MIN; //can't do more than 9 digits and a minus sign

--- a/src/utils/general/ParseTools.h
+++ b/src/utils/general/ParseTools.h
@@ -22,7 +22,7 @@ bool isNumeric(const QuickString &str);
 //Empty strings, too long strings, or strings containing anything other than
 //digits (with the excpetion of a minus sign in the first position)
 //will result in error. Errors return INT_MIN.
-int str2chrPos(const char *str, size_t len);
+int str2chrPos(const char *str, size_t len = 0);
 int str2chrPos(const QuickString &str);

--- a/test/merge/test-merge.sh
+++ b/test/merge/test-merge.sh
@@ -30,18 +30,22 @@ $BT merge -i a.bed > obs
 check obs exp
 rm obs exp
+###########################################################
+#
+# NOTE: Testing for sorted input is now deprecated, as the
+# FileRecordMgr is already testing for that.
+#
 ###########################################################
 # Test #2
 #  Enforce coordinate sorted input.
 ###########################################################
-echo "    merge.t2...\c"
+#echo "    merge.t2...\c"
-command -v tac 2>/dev/null || alias tac="sed '1!G;h;\$!d'"
+#command -v tac 2>/dev/null || alias tac="sed '1!G;h;\$!d'"
-tac a.bed | $BT merge -i - 2> obs
+#tac a.bed | $BT merge -i - 2> obs
-echo "ERROR: input file: (-) is not sorted by chrom then start.
+#echo "ERROR: input file: (-) is not sorted by chrom then start.
-       The start coordinate at line 3 is less than the start at line 2" > exp
+#       The start coordinate at line 3 is less than the start at line 2" > exp
-check obs exp
+#check obs exp
-rm obs exp
+#rm obs exp
 ###########################################################
@@ -64,11 +68,9 @@ rm obs exp
 ###########################################################
 echo "    merge.t4...\c"
 echo \
-"chr1	10	20
+"*****
-*****
+***** ERROR: Requested column 4, but database file a.bed only has fields 1 - 3." > exp
-*****ERROR: No names found to report for the -names option. Exiting.
+$BT merge -i a.bed -nms 2>&1 > /dev/null | head -3 | tail -2 > obs
-*****" > exp
-$BT merge -i a.bed -nms > obs 2>&1
 check obs exp
 rm obs exp
@@ -130,7 +132,7 @@ chr1	30	100	a2,a3,a4	9	3
 chr2	10	20	a1	5	1
 chr2	30	40	a2	6	1
 chr2	42	100	a3,a4	15	2" > exp
-$BT merge -i a.full.bed -nms -n -scores sum> obs
+$BT merge -i a.full.bed -nms -scores sum -n> obs
 check obs exp
 rm obs exp
@@ -139,15 +141,15 @@ rm obs exp
 ###########################################################
 echo "    merge.t9...\c"
 echo \
-"chr1	10	20	a1	1	+	1
+"chr1	10	20	+	a1	1	1
-chr1	30	40	a2	2	+	1
+chr1	30	40	+	a2	2	1
-chr1	45	100	a4	4	+	1
+chr1	40	50	-	a3	3	1
-chr1	40	50	a3	3	-	1
+chr1	45	100	+	a4	4	1
-chr2	10	20	a1	5	+	1
+chr2	10	20	+	a1	5	1
-chr2	30	40	a2	6	+	1
+chr2	30	40	+	a2	6	1
-chr2	42	50	a3	7	+	1
+chr2	42	50	+	a3	7	1
-chr2	45	100	a4	8	-	1" > exp
+chr2	45	100	-	a4	8	1" > exp
-$BT merge -i a.full.bed -s -nms -n -scores sum> obs
+$BT merge -i a.full.bed -s -nms -scores sum -n> obs
 check obs exp
 rm obs exp