Commit c87afc9f authored by arq5x's avatar arq5x
Browse files

first pass at using Context and PFM for the map tool.

parent ce2d411b
......@@ -46,7 +46,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/jaccard \
$(SRC_DIR)/linksBed \
$(SRC_DIR)/maskFastaFromBed \
$(SRC_DIR)/mapBed \
$(SRC_DIR)/mapFile \
$(SRC_DIR)/mergeBed \
$(SRC_DIR)/multiBamCov \
$(SRC_DIR)/multiIntersectBed \
......
......@@ -5,22 +5,39 @@ BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/genomeFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/fileType/ \
# INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
# -I$(UTILITIES_DIR)/gzstream/ \
# -I$(UTILITIES_DIR)/genomeFile/ \
# -I$(UTILITIES_DIR)/lineFileUtilities/ \
# -I$(UTILITIES_DIR)/fileType/ \
# -I$(UTILITIES_DIR)/BamTools/include \
# -I$(UTILITIES_DIR)/BamTools-Ancillary \
# -I$(UTILITIES_DIR)/chromsweep \
# -I$(UTILITIES_DIR)/VectorOps \
# -I$(UTILITIES_DIR)/version/
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/chromsweep \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/VectorOps \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= mapMain.cpp mapBed.cpp mapBed.h
OBJECTS= mapMain.o mapBed.o
SOURCES= mapMain.cpp mapFile.cpp mapFile.h
OBJECTS= mapMain.o mapFile.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
all: $(BUILT_OBJECTS)
......@@ -33,6 +50,6 @@ $(BUILT_OBJECTS): $(SOURCES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/mapMain.o $(OBJ_DIR)/mapBed.o
@rm -f $(OBJ_DIR)/mapMain.o $(OBJ_DIR)/mapFile.o
.PHONY: clean
......@@ -9,9 +9,127 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "mapBed.h"
#include "mapFile.h"
#include "ContextMap.h"
#include "FileRecordMgr.h"
#include "NewChromsweep.h"
#include "BinTree.h"
#include "RecordOutputMgr.h"
const int PRECISION = 21;
FileMap::FileMap(ContextMap *context)
: _context(context),
_blockMgr(NULL),
_recordOutputMgr(NULL)
{
// FIX ME - block manager only works for intersect
//_blockMgr = new BlockMgr();
//_blockMgr->setContext(context);
_recordOutputMgr = new RecordOutputMgr();
}
FileMap::~FileMap(void) {
if (_blockMgr != NULL) {
delete _blockMgr;
_blockMgr = NULL;
}
delete _recordOutputMgr;
}
bool FileMap::mapFiles()
{
NewChromSweep sweep(_context);
if (!sweep.init()) {
return false;
}
if (!_recordOutputMgr->init(_context)) {
return false;
}
RecordKeyList hitSet;
while (sweep.next(hitSet)) {
//if (_context->getObeySplits()) {
// RecordKeyList keySet(hitSet.getKey());
// RecordKeyList resultSet(hitSet.getKey());
// _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
//} else {
//}
//_recordOutputMgr->printKeyAndTerminate(hitSet);
_recordOutputMgr->printRecord(hitSet.getKey(), MapHits(hitSet));
}
return true;
}
void FileMap::ExtractColumnFromHits(RecordKeyList &hits) {
for (RecordKeyList::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
try {
ostringstream startString;
startString << iter->value()->getStartPos();
//_column_vec.push_back(iter->value().fields.at(_column));
_column_vec.push_back(startString.str());
}
catch(std::out_of_range& e) {
exit(1);
}
/*
catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column ("
<< _column + 1
<< ") exceeds the number of columns in file at line "
<< _bedA->_lineNum << ". Exiting."
<< endl << "*****" << endl;
exit(1);
}
*/
}
}
string FileMap::MapHits(RecordKeyList &hits) {
ostringstream output;
if (hits.size() == 0)
// FIX ME
return "-1";
//return _nullValue;
ExtractColumnFromHits(hits);
string operation = _context->getColumnOperation();
VectorOps vo(_column_vec);
if (operation == "sum")
output << setprecision (PRECISION) << vo.GetSum();
else if (operation == "mean")
output << setprecision (PRECISION) << vo.GetMean();
else if (operation == "median")
output << setprecision (PRECISION) << vo.GetMedian();
else if (operation == "min")
output << setprecision (PRECISION) << vo.GetMin();
else if (operation == "max")
output << setprecision (PRECISION) << vo.GetMax();
else if (operation == "mode")
output << vo.GetMode();
else if (operation == "antimode")
output << vo.GetAntiMode();
else if (operation == "count")
output << setprecision (PRECISION) << vo.GetCount();
else if (operation == "count_distinct")
output << setprecision (PRECISION) << vo.GetCountDistinct();
else if (operation == "collapse")
output << vo.GetCollapse();
else if (operation == "distinct")
output << vo.GetDistinct();
else {
cerr << "ERROR: " << operation << " is an unrecoginzed operation\n";
exit(1);
}
_column_vec.clear();
return output.str();
}
/*
const int PRECISION = 21;
double GetUserColumn(const string s);
......@@ -120,3 +238,4 @@ void BedMap::ExtractColumnFromHits(const vector<BED> &hits) {
}
}
}
*/
/*****************************************************************************
mapBed.h
mapFile.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
......@@ -9,9 +9,52 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef MAPBED_H
#define MAPBED_H
#ifndef MAPFILE_H
#define MAPFILE_H
using namespace std;
#include <sstream>
#include <iomanip>
#include "VectorOps.h"
#include "RecordKeyList.h"
using namespace std;
class ContextMap;
class BlockMgr;
class RecordOutputMgr;
class FileMap {
public:
FileMap(ContextMap *context);
~FileMap(void);
bool mapFiles();
private:
ContextMap *_context;
Record *_queryRec;
Record *_databaseRec;
BlockMgr *_blockMgr;
RecordOutputMgr *_recordOutputMgr;
vector<string> _column_vec; // vector to hold current column's worth of data
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
string MapHits(RecordKeyList &hits);
void ExtractColumnFromHits(RecordKeyList &hits);
};
#endif /* MAPFILE_H */
/*
#include "bedFile.h"
#include "chromsweep.h"
#include "VectorOps.h"
......@@ -75,5 +118,5 @@ private:
string MapHits(const BED &a, const vector<BED> &hits);
void ExtractColumnFromHits(const vector<BED> &hits);
};
#endif /* MAPBED_H */
*/
//#endif /* MAPFILE_H */
......@@ -9,21 +9,37 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "mapBed.h"
#include "version.h"
using namespace std;
#include "mapFile.h"
#include "ContextMap.h"
// define our program name
#define PROGRAM_NAME "bedtools map"
void map_help(void);
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
int map_main(int argc, char* argv[]) {
ContextMap *context = new ContextMap();
if (!context->parseCmdArgs(argc, argv, 1) || context->getShowHelp() || !context->isValidState()) {
if (!context->getErrorMsg().empty()) {
cerr << context->getErrorMsg() << endl;
}
map_help();
delete context;
return 0;
}
FileMap *fileMap = new FileMap(context);
bool retVal = fileMap->mapFiles();
delete fileMap;
delete context;
return retVal ? 0 : 1;
}
// function declarations
void map_help(void);
/*
int map_main(int argc, char* argv[]) {
// our configuration variables
......@@ -158,6 +174,7 @@ int map_main(int argc, char* argv[]) {
return 0;
}
}
*/
void map_help(void) {
......
......@@ -55,6 +55,7 @@ ContextBase::ContextBase()
{
_programNames["intersect"] = INTERSECT;
_programNames["sample"] = SAMPLE;
_programNames["map"] = MAP;
_validScoreOps.insert("sum");
_validScoreOps.insert("max");
......
......@@ -210,7 +210,8 @@ protected:
int _bamHeaderAndRefIdx;
int _maxNumDatabaseFields;
bool _useFullBamTags;
string _columnOperation;
int _column;
bool _reportCount;
int _maxDistance;
bool _reportNames;
......
......@@ -15,8 +15,8 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextSample.cpp ContextSample.h
OBJECTS= ContextBase.o ContextIntersect.o ContextSample.o
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextMap.cpp ContextMap.h ContextSample.cpp ContextSample.h
OBJECTS= ContextBase.o ContextIntersect.o ContextMap.o ContextSample.o
_EXT_OBJECTS=ParseTools.o QuickString.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
......@@ -31,6 +31,9 @@ $(BUILT_OBJECTS): $(SOURCES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/ContextBase.o $(OBJ_DIR)/ContextIntersect.o $(OBJ_DIR)/ContextSample.o
@rm -f $(OBJ_DIR)/ContextBase.o \
$(OBJ_DIR)/ContextIntersect.o \
$(OBJ_DIR)/ContextMap.o \
$(OBJ_DIR)/ContextSample.o
.PHONY: clean
\ No newline at end of file
......@@ -117,6 +117,18 @@ void RecordOutputMgr::printRecord(const Record *record)
printRecord(keyList);
}
void RecordOutputMgr::printRecord(const Record *record, const string value)
{
RecordKeyList keyList(record);
printRecord(keyList);
_outBuf.append(value.c_str());
newline();
if (needsFlush()) {
flush();
}
}
void RecordOutputMgr::printRecord(RecordKeyList &keyList) {
if (keyList.getKey()->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
RecordKeyList blockList(keyList.getKey());
......@@ -129,7 +141,6 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList) {
return;
}
printRecord(keyList, NULL);
}
void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockList)
......@@ -192,6 +203,12 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
}
_currBlockList = NULL;
return;
} else if (_context->getProgram() == ContextBase::MAP) {
if (!printKeyAndTerminate(keyList)) {
tab();
}
_currBlockList = NULL;
return;
}
}
......
......@@ -27,6 +27,8 @@ public:
// void printHeader(const string &);
void printRecord(const Record *record);
void printRecord(RecordKeyList &keyList);
// Added by ARQ
void printRecord(const Record *record, const string value);
private:
typedef enum { NOT_BAM, BAM_AS_BAM, BAM_AS_BED} printBamType;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment