Skip to content
Snippets Groups Projects
Commit c87afc9f authored by arq5x's avatar arq5x
Browse files

first pass at using Context and PFM for the map tool.

parent ce2d411b
No related branches found
No related tags found
No related merge requests found
......@@ -46,7 +46,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/jaccard \
$(SRC_DIR)/linksBed \
$(SRC_DIR)/maskFastaFromBed \
$(SRC_DIR)/mapBed \
$(SRC_DIR)/mapFile \
$(SRC_DIR)/mergeBed \
$(SRC_DIR)/multiBamCov \
$(SRC_DIR)/multiIntersectBed \
......
......@@ -5,22 +5,39 @@ BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/genomeFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/fileType/ \
# INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
# -I$(UTILITIES_DIR)/gzstream/ \
# -I$(UTILITIES_DIR)/genomeFile/ \
# -I$(UTILITIES_DIR)/lineFileUtilities/ \
# -I$(UTILITIES_DIR)/fileType/ \
# -I$(UTILITIES_DIR)/BamTools/include \
# -I$(UTILITIES_DIR)/BamTools-Ancillary \
# -I$(UTILITIES_DIR)/chromsweep \
# -I$(UTILITIES_DIR)/VectorOps \
# -I$(UTILITIES_DIR)/version/
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src \
-I$(UTILITIES_DIR)/BlockedIntervals \
-I$(UTILITIES_DIR)/BamTools-Ancillary \
-I$(UTILITIES_DIR)/chromsweep \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/VectorOps \
-I$(UTILITIES_DIR)/BinTree \
-I$(UTILITIES_DIR)/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= mapMain.cpp mapBed.cpp mapBed.h
OBJECTS= mapMain.o mapBed.o
SOURCES= mapMain.cpp mapFile.cpp mapFile.h
OBJECTS= mapMain.o mapFile.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
all: $(BUILT_OBJECTS)
......@@ -33,6 +50,6 @@ $(BUILT_OBJECTS): $(SOURCES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/mapMain.o $(OBJ_DIR)/mapBed.o
@rm -f $(OBJ_DIR)/mapMain.o $(OBJ_DIR)/mapFile.o
.PHONY: clean
......@@ -9,9 +9,127 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "mapBed.h"
#include "mapFile.h"
#include "ContextMap.h"
#include "FileRecordMgr.h"
#include "NewChromsweep.h"
#include "BinTree.h"
#include "RecordOutputMgr.h"
const int PRECISION = 21;
FileMap::FileMap(ContextMap *context)
: _context(context),
_blockMgr(NULL),
_recordOutputMgr(NULL)
{
// FIX ME - block manager only works for intersect
//_blockMgr = new BlockMgr();
//_blockMgr->setContext(context);
_recordOutputMgr = new RecordOutputMgr();
}
FileMap::~FileMap(void) {
if (_blockMgr != NULL) {
delete _blockMgr;
_blockMgr = NULL;
}
delete _recordOutputMgr;
}
bool FileMap::mapFiles()
{
NewChromSweep sweep(_context);
if (!sweep.init()) {
return false;
}
if (!_recordOutputMgr->init(_context)) {
return false;
}
RecordKeyList hitSet;
while (sweep.next(hitSet)) {
//if (_context->getObeySplits()) {
// RecordKeyList keySet(hitSet.getKey());
// RecordKeyList resultSet(hitSet.getKey());
// _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
//} else {
//}
//_recordOutputMgr->printKeyAndTerminate(hitSet);
_recordOutputMgr->printRecord(hitSet.getKey(), MapHits(hitSet));
}
return true;
}
void FileMap::ExtractColumnFromHits(RecordKeyList &hits) {
for (RecordKeyList::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
try {
ostringstream startString;
startString << iter->value()->getStartPos();
//_column_vec.push_back(iter->value().fields.at(_column));
_column_vec.push_back(startString.str());
}
catch(std::out_of_range& e) {
exit(1);
}
/*
catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column ("
<< _column + 1
<< ") exceeds the number of columns in file at line "
<< _bedA->_lineNum << ". Exiting."
<< endl << "*****" << endl;
exit(1);
}
*/
}
}
string FileMap::MapHits(RecordKeyList &hits) {
ostringstream output;
if (hits.size() == 0)
// FIX ME
return "-1";
//return _nullValue;
ExtractColumnFromHits(hits);
string operation = _context->getColumnOperation();
VectorOps vo(_column_vec);
if (operation == "sum")
output << setprecision (PRECISION) << vo.GetSum();
else if (operation == "mean")
output << setprecision (PRECISION) << vo.GetMean();
else if (operation == "median")
output << setprecision (PRECISION) << vo.GetMedian();
else if (operation == "min")
output << setprecision (PRECISION) << vo.GetMin();
else if (operation == "max")
output << setprecision (PRECISION) << vo.GetMax();
else if (operation == "mode")
output << vo.GetMode();
else if (operation == "antimode")
output << vo.GetAntiMode();
else if (operation == "count")
output << setprecision (PRECISION) << vo.GetCount();
else if (operation == "count_distinct")
output << setprecision (PRECISION) << vo.GetCountDistinct();
else if (operation == "collapse")
output << vo.GetCollapse();
else if (operation == "distinct")
output << vo.GetDistinct();
else {
cerr << "ERROR: " << operation << " is an unrecoginzed operation\n";
exit(1);
}
_column_vec.clear();
return output.str();
}
/*
const int PRECISION = 21;
double GetUserColumn(const string s);
......@@ -120,3 +238,4 @@ void BedMap::ExtractColumnFromHits(const vector<BED> &hits) {
}
}
}
*/
/*****************************************************************************
mapBed.h
mapFile.h
(c) 2009 - Aaron Quinlan
Hall Laboratory
......@@ -9,9 +9,52 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef MAPBED_H
#define MAPBED_H
#ifndef MAPFILE_H
#define MAPFILE_H
using namespace std;
#include <sstream>
#include <iomanip>
#include "VectorOps.h"
#include "RecordKeyList.h"
using namespace std;
class ContextMap;
class BlockMgr;
class RecordOutputMgr;
class FileMap {
public:
FileMap(ContextMap *context);
~FileMap(void);
bool mapFiles();
private:
ContextMap *_context;
Record *_queryRec;
Record *_databaseRec;
BlockMgr *_blockMgr;
RecordOutputMgr *_recordOutputMgr;
vector<string> _column_vec; // vector to hold current column's worth of data
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
string MapHits(RecordKeyList &hits);
void ExtractColumnFromHits(RecordKeyList &hits);
};
#endif /* MAPFILE_H */
/*
#include "bedFile.h"
#include "chromsweep.h"
#include "VectorOps.h"
......@@ -75,5 +118,5 @@ private:
string MapHits(const BED &a, const vector<BED> &hits);
void ExtractColumnFromHits(const vector<BED> &hits);
};
#endif /* MAPBED_H */
*/
//#endif /* MAPFILE_H */
......@@ -9,21 +9,37 @@
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "mapBed.h"
#include "version.h"
using namespace std;
#include "mapFile.h"
#include "ContextMap.h"
// define our program name
#define PROGRAM_NAME "bedtools map"
void map_help(void);
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
int map_main(int argc, char* argv[]) {
ContextMap *context = new ContextMap();
if (!context->parseCmdArgs(argc, argv, 1) || context->getShowHelp() || !context->isValidState()) {
if (!context->getErrorMsg().empty()) {
cerr << context->getErrorMsg() << endl;
}
map_help();
delete context;
return 0;
}
FileMap *fileMap = new FileMap(context);
bool retVal = fileMap->mapFiles();
delete fileMap;
delete context;
return retVal ? 0 : 1;
}
// function declarations
void map_help(void);
/*
int map_main(int argc, char* argv[]) {
// our configuration variables
......@@ -158,6 +174,7 @@ int map_main(int argc, char* argv[]) {
return 0;
}
}
*/
void map_help(void) {
......
......@@ -55,6 +55,7 @@ ContextBase::ContextBase()
{
_programNames["intersect"] = INTERSECT;
_programNames["sample"] = SAMPLE;
_programNames["map"] = MAP;
_validScoreOps.insert("sum");
_validScoreOps.insert("max");
......
......@@ -210,7 +210,8 @@ protected:
int _bamHeaderAndRefIdx;
int _maxNumDatabaseFields;
bool _useFullBamTags;
string _columnOperation;
int _column;
bool _reportCount;
int _maxDistance;
bool _reportNames;
......
......@@ -15,8 +15,8 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextSample.cpp ContextSample.h
OBJECTS= ContextBase.o ContextIntersect.o ContextSample.o
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextMap.cpp ContextMap.h ContextSample.cpp ContextSample.h
OBJECTS= ContextBase.o ContextIntersect.o ContextMap.o ContextSample.o
_EXT_OBJECTS=ParseTools.o QuickString.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
......@@ -31,6 +31,9 @@ $(BUILT_OBJECTS): $(SOURCES)
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/ContextBase.o $(OBJ_DIR)/ContextIntersect.o $(OBJ_DIR)/ContextSample.o
@rm -f $(OBJ_DIR)/ContextBase.o \
$(OBJ_DIR)/ContextIntersect.o \
$(OBJ_DIR)/ContextMap.o \
$(OBJ_DIR)/ContextSample.o
.PHONY: clean
\ No newline at end of file
......@@ -117,6 +117,18 @@ void RecordOutputMgr::printRecord(const Record *record)
printRecord(keyList);
}
void RecordOutputMgr::printRecord(const Record *record, const string value)
{
RecordKeyList keyList(record);
printRecord(keyList);
_outBuf.append(value.c_str());
newline();
if (needsFlush()) {
flush();
}
}
void RecordOutputMgr::printRecord(RecordKeyList &keyList) {
if (keyList.getKey()->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
RecordKeyList blockList(keyList.getKey());
......@@ -129,7 +141,6 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList) {
return;
}
printRecord(keyList, NULL);
}
void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockList)
......@@ -192,6 +203,12 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
}
_currBlockList = NULL;
return;
} else if (_context->getProgram() == ContextBase::MAP) {
if (!printKeyAndTerminate(keyList)) {
tab();
}
_currBlockList = NULL;
return;
}
}
......
......@@ -27,6 +27,8 @@ public:
// void printHeader(const string &);
void printRecord(const Record *record);
void printRecord(RecordKeyList &keyList);
// Added by ARQ
void printRecord(const Record *record, const string value);
private:
typedef enum { NOT_BAM, BAM_AS_BAM, BAM_AS_BED} printBamType;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment