Skip to content
Snippets Groups Projects
Commit 2268ee4f authored by nkindlon's avatar nkindlon
Browse files

Refactored KeyListOps, Context, mapFile for KeyListOps re-usability by other tools.

parent 85df2b4a
No related branches found
No related tags found
No related merge requests found
Showing
with 216 additions and 390 deletions
...@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ ...@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/NewChromsweep \ -I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/BinTree \ -I$(UTILITIES_DIR)/BinTree \
......
...@@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ ...@@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/NewChromsweep \ -I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/VectorOps \ -I$(UTILITIES_DIR)/VectorOps \
-I$(UTILITIES_DIR)/BinTree \ -I$(UTILITIES_DIR)/BinTree \
......
...@@ -47,74 +47,11 @@ bool FileMap::mapFiles() ...@@ -47,74 +47,11 @@ bool FileMap::mapFiles()
RecordKeyList keySet(hitSet.getKey()); RecordKeyList keySet(hitSet.getKey());
RecordKeyList resultSet(hitSet.getKey()); RecordKeyList resultSet(hitSet.getKey());
_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
SummarizeHits(resultSet); _recordOutputMgr->printRecord(resultSet.getKey(), _context->getColumnOpsVal(resultSet));
_recordOutputMgr->printRecord(resultSet.getKey(), _output);
} else { } else {
SummarizeHits(hitSet); _recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet));
_recordOutputMgr->printRecord(hitSet.getKey(), _output);
} }
} }
return true; return true;
} }
void FileMap::ExtractColumnFromHits(RecordKeyList &hits) {
_column_vec.clear();
RecordKeyList::const_iterator_type iter = hits.begin();
for (; iter != hits.end(); iter = hits.next())
{
_column_vec.push_back(iter->value()->getField(_context->getColumn()).str());
}
}
void FileMap::SummarizeHits(RecordKeyList &hits) {
const QuickString & operation = _context->getColumnOperation();
_output.clear();
if (hits.size() == 0) {
if (operation == "count" || operation == "count_distinct")
_output.append("0");
else
_output.append(_context->getNullValue().str());
return;
}
_tmp_output.str("");
_tmp_output.clear();
ExtractColumnFromHits(hits);
VectorOps vo(_column_vec);
if (operation == "sum")
_tmp_output << setprecision (PRECISION) << vo.GetSum();
else if (operation == "mean")
_tmp_output << setprecision (PRECISION) << vo.GetMean();
else if (operation == "median")
_tmp_output << setprecision (PRECISION) << vo.GetMedian();
else if (operation == "min")
_tmp_output << setprecision (PRECISION) << vo.GetMin();
else if (operation == "max")
_tmp_output << setprecision (PRECISION) << vo.GetMax();
else if (operation == "absmin")
_tmp_output << setprecision (PRECISION) << vo.GetAbsMin();
else if (operation == "absmax")
_tmp_output << setprecision (PRECISION) << vo.GetAbsMax();
else if (operation == "mode")
_tmp_output << vo.GetMode();
else if (operation == "antimode")
_tmp_output << vo.GetAntiMode();
else if (operation == "count")
_tmp_output << setprecision (PRECISION) << vo.GetCount();
else if (operation == "count_distinct")
_tmp_output << setprecision (PRECISION) << vo.GetCountDistinct();
else if (operation == "collapse")
_tmp_output << vo.GetCollapse();
else if (operation == "distinct")
_tmp_output << vo.GetDistinct();
else {
cerr << "ERROR: " << operation << " is an unrecognized operation\n";
exit(1);
}
_output.append(_tmp_output.str());
}
...@@ -18,10 +18,11 @@ using namespace std; ...@@ -18,10 +18,11 @@ using namespace std;
#include <iomanip> #include <iomanip>
#include "VectorOps.h" #include "VectorOps.h"
#include "RecordKeyList.h" #include "RecordKeyList.h"
#include "KeyListOps.h"
#include "ContextMap.h"
using namespace std; using namespace std;
class ContextMap;
class BlockMgr; class BlockMgr;
class RecordOutputMgr; class RecordOutputMgr;
...@@ -35,90 +36,8 @@ public: ...@@ -35,90 +36,8 @@ public:
private: private:
ContextMap *_context; ContextMap *_context;
Record *_queryRec;
Record *_databaseRec;
BlockMgr *_blockMgr; BlockMgr *_blockMgr;
RecordOutputMgr *_recordOutputMgr; RecordOutputMgr *_recordOutputMgr;
vector<string> _column_vec; // vector to hold current column's worth of data
ostringstream _tmp_output;
QuickString _output; // placeholder for the results of mapping B to each a in A.
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
void SummarizeHits(RecordKeyList &hits);
void ExtractColumnFromHits(RecordKeyList &hits);
}; };
#endif /* MAPFILE_H */ #endif /* MAPFILE_H */
/*
#include "bedFile.h"
#include "chromsweep.h"
#include "VectorOps.h"
#include "api/BamReader.h"
#include "api/BamWriter.h"
#include "api/BamAux.h"
#include "BamAncillary.h"
using namespace BamTools;
#include <vector>
#include <iostream>
#include <algorithm>
#include <numeric>
#include <fstream>
#include <iomanip>
#include <stdlib.h>
using namespace std;
class BedMap {
public:
// constructor
BedMap(string bedAFile, string bedBFile, int column, string operation,
float overlapFraction, bool sameStrand,
bool diffStrand, bool reciprocal,
bool choseNullValue, string nullValue,
bool printHeader);
// destructor
~BedMap(void);
private:
//------------------------------------------------
// private attributes
//------------------------------------------------
string _bedAFile;
string _bedBFile;
int _column;
string _operation;
bool _sameStrand;
bool _diffStrand;
bool _reciprocal;
float _overlapFraction;
string _nullValue;
bool _printHeader;
// instance of a bed file class.
BedFile *_bedA, *_bedB;
vector<string> _column_vec; // vector to hold current column's worth of data
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
string MapHits(const BED &a, const vector<BED> &hits);
void ExtractColumnFromHits(const vector<BED> &hits);
};
*/
//#endif /* MAPFILE_H */
...@@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) { ...@@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) {
return retVal ? 0 : 1; return retVal ? 0 : 1;
} }
/*
int map_main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string bedAFile;
string bedBFile;
int column = 5;
string operation = "sum";
string nullValue = ".";
// input arguments
float overlapFraction = 1E-9;
bool haveBedA = false;
bool haveBedB = false;
bool haveColumn = false;
bool haveOperation = false;
bool haveFraction = false;
bool reciprocalFraction = false;
bool sameStrand = false;
bool diffStrand = false;
bool printHeader = false;
bool choseNullValue = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) map_help();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-a", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;
bedBFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
if ((i+1) < argc) {
haveColumn = true;
column = atoi(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-o", 2, parameterLength)) {
if ((i+1) < argc) {
haveOperation = true;
operation = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
if ((i+1) < argc) {
haveFraction = true;
overlapFraction = atof(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
reciprocalFraction = true;
}
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
sameStrand = true;
}
else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
diffStrand = true;
}
else if (PARAMETER_CHECK("-null", 5, parameterLength)) {
nullValue = argv[i + 1];
choseNullValue = true;
i++;
}
else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
printHeader = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
if (!haveBedA || !haveBedB) {
cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
showHelp = true;
}
if (reciprocalFraction && !haveFraction) {
cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
showHelp = true;
}
if (sameStrand && diffStrand) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedMap *bm = new BedMap(bedAFile, bedBFile, column, operation,
overlapFraction, sameStrand,
diffStrand, reciprocalFraction,
choseNullValue, nullValue,
printHeader);
delete bm;
return 0;
}
else {
map_help();
return 0;
}
}
*/
void map_help(void) { void map_help(void) {
cerr << "\nTool: bedtools map (aka mapBed)" << endl; cerr << "\nTool: bedtools map (aka mapBed)" << endl;
......
...@@ -10,6 +10,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ ...@@ -10,6 +10,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders \
-I$(UTILITIES_DIR)/FileRecordTools/Records \ -I$(UTILITIES_DIR)/FileRecordTools/Records \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/general \ -I$(UTILITIES_DIR)/general \
-I$(UTILITIES_DIR)/NewChromsweep \ -I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/GenomeFile/ \
......
...@@ -18,6 +18,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ ...@@ -18,6 +18,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders \
-I$(UTILITIES_DIR)/FileRecordTools/Records \ -I$(UTILITIES_DIR)/FileRecordTools/Records \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/general -I$(UTILITIES_DIR)/general
# ---------------------------------- # ----------------------------------
......
...@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ ...@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \ -I$(UTILITIES_DIR)/RecordOutputMgr/ \
-I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/version/
......
...@@ -11,6 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \ ...@@ -11,6 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src/ \ -I$(UTILITIES_DIR)/BamTools/src/ \
-I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/version/
......
...@@ -52,20 +52,16 @@ ContextBase::ContextBase() ...@@ -52,20 +52,16 @@ ContextBase::ContextBase()
_hasConstantSeed(false), _hasConstantSeed(false),
_seed(0), _seed(0),
_forwardOnly(false), _forwardOnly(false),
_reverseOnly(false) _reverseOnly(false),
_hasColumnOpsMethods(false)
{ {
_programNames["intersect"] = INTERSECT; _programNames["intersect"] = INTERSECT;
_programNames["sample"] = SAMPLE; _programNames["sample"] = SAMPLE;
_programNames["map"] = MAP; _programNames["map"] = MAP;
_validScoreOps.insert("sum"); if (hasColumnOpsMethods()) {
_validScoreOps.insert("max"); _keyListOps = new KeyListOps();
_validScoreOps.insert("min"); }
_validScoreOps.insert("mean");
_validScoreOps.insert("mode");
_validScoreOps.insert("median");
_validScoreOps.insert("antimode");
_validScoreOps.insert("collapse");
} }
ContextBase::~ContextBase() ContextBase::~ContextBase()
...@@ -79,6 +75,11 @@ ContextBase::~ContextBase() ...@@ -79,6 +75,11 @@ ContextBase::~ContextBase()
delete _files[i]; delete _files[i];
_files[i] = NULL; _files[i] = NULL;
} }
if (hasColumnOpsMethods()) {
delete _keyListOps;
_keyListOps = NULL;
}
} }
bool ContextBase::determineOutputType() { bool ContextBase::determineOutputType() {
...@@ -176,6 +177,19 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { ...@@ -176,6 +177,19 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else if (strcmp(_argv[_i], "-seed") == 0) { else if (strcmp(_argv[_i], "-seed") == 0) {
if (!handle_seed()) return false; if (!handle_seed()) return false;
} }
else if (strcmp(_argv[_i], "-o") == 0) {
if (!handle_o()) return false;
}
else if (strcmp(_argv[_i], "-c") == 0) {
if (!handle_c()) return false;
}
else if (strcmp(_argv[_i], "-null") == 0) {
if (!handle_null()) return false;
}
else if (strcmp(_argv[_i], "-delim") == 0) {
if (!handle_delim()) return false;
}
} }
return true; return true;
} }
...@@ -191,6 +205,12 @@ bool ContextBase::isValidState() ...@@ -191,6 +205,12 @@ bool ContextBase::isValidState()
if (!determineOutputType()) { if (!determineOutputType()) {
return false; return false;
} }
if (hasColumnOpsMethods()) {
FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0);
if (!_keyListOps->isValidColumnOps(dbFile)) {
return false;
}
}
return true; return true;
} }
...@@ -363,3 +383,85 @@ bool ContextBase::handle_ubam() ...@@ -363,3 +383,85 @@ bool ContextBase::handle_ubam()
markUsed(_i - _skipFirstArgs); markUsed(_i - _skipFirstArgs);
return true; return true;
} }
// Methods specific to column operations.
// for col ops, -c is the string of columns upon which to operate
bool ContextBase::handle_c()
{
if (!hasColumnOpsMethods()) {
return false;
}
if ((_i+1) < _argc) {
_keyListOps->setColumns(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
}
// for col ops, -o is the string of operations to apply to the columns (-c)
bool ContextBase::handle_o()
{
if (!hasColumnOpsMethods()) {
return false;
}
if ((_i+1) < _argc) {
_keyListOps->setOperations(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
}
// for col ops, -null is a NULL vakue assigned
// when no overlaps are detected.
bool ContextBase::handle_null()
{
if (!hasColumnOpsMethods()) {
return false;
}
if ((_i+1) < _argc) {
_keyListOps->setNullValue(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
}
//for col ops, delimStr will appear between each item in
//a collapsed but delimited list.
bool ContextBase::handle_delim()
{
if (!hasColumnOpsMethods()) {
return false;
}
if ((_i+1) < _argc) {
_keyListOps->setDelimStr(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
}
void ContextBase::setColumnOpsMethods(bool val)
{
_hasColumnOpsMethods = val;
if (val) {
_keyListOps = new KeyListOps();
}
}
const QuickString &ContextBase::getColumnOpsVal(RecordKeyList &keyList) const {
if (!hasColumnOpsMethods()) {
return _nullStr;
}
return _keyListOps->getOpVals(keyList);
}
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "NewGenomeFile.h" #include "NewGenomeFile.h"
#include "api/BamReader.h" #include "api/BamReader.h"
#include "api/BamAux.h" #include "api/BamAux.h"
#include "KeyListOps.h"
class ContextBase { class ContextBase {
...@@ -144,6 +145,13 @@ public: ...@@ -144,6 +145,13 @@ public:
//methods. //methods.
virtual bool hasIntersectMethods() const { return false; } virtual bool hasIntersectMethods() const { return false; }
// determine whether column operations like those used in map
// are available.
void setColumnOpsMethods(bool val);
virtual bool hasColumnOpsMethods() const { return _hasColumnOpsMethods; }
const QuickString &getColumnOpsVal(RecordKeyList &keyList) const;
//methods applicable only to column operations.
protected: protected:
PROGRAM_TYPE _program; PROGRAM_TYPE _program;
...@@ -191,15 +199,11 @@ protected: ...@@ -191,15 +199,11 @@ protected:
int _bamHeaderAndRefIdx; int _bamHeaderAndRefIdx;
int _maxNumDatabaseFields; int _maxNumDatabaseFields;
bool _useFullBamTags; bool _useFullBamTags;
QuickString _columnOperation;
int _column;
QuickString _nullValue;
bool _reportCount; bool _reportCount;
int _maxDistance; int _maxDistance;
bool _reportNames; bool _reportNames;
bool _reportScores; bool _reportScores;
QuickString _scoreOp; QuickString _scoreOp;
set<QuickString> _validScoreOps;
int _numOutputRecords; int _numOutputRecords;
...@@ -208,6 +212,10 @@ protected: ...@@ -208,6 +212,10 @@ protected:
bool _forwardOnly; bool _forwardOnly;
bool _reverseOnly; bool _reverseOnly;
bool _hasColumnOpsMethods;
KeyListOps *_keyListOps;
QuickString _nullStr; //placeholder return value when col ops aren't valid.
void markUsed(int i) { _argsProcessed[i] = true; } void markUsed(int i) { _argsProcessed[i] = true; }
bool isUsed(int i) const { return _argsProcessed[i]; } bool isUsed(int i) const { return _argsProcessed[i]; }
bool cmdArgsValid(); bool cmdArgsValid();
...@@ -231,6 +239,11 @@ protected: ...@@ -231,6 +239,11 @@ protected:
virtual bool handle_split(); virtual bool handle_split();
virtual bool handle_sorted(); virtual bool handle_sorted();
virtual bool handle_ubam(); virtual bool handle_ubam();
virtual bool handle_c();
virtual bool handle_o();
virtual bool handle_null();
virtual bool handle_delim();
}; };
#endif /* CONTEXTBASE_H_ */ #endif /* CONTEXTBASE_H_ */
...@@ -21,6 +21,8 @@ public: ...@@ -21,6 +21,8 @@ public:
//NOTE: Query and database files will only be marked as such by either the //NOTE: Query and database files will only be marked as such by either the
//parseCmdArgs method, or by explicitly setting them. //parseCmdArgs method, or by explicitly setting them.
FileRecordMgr *getQueryFile() { return getFile(_queryFileIdx); }
FileRecordMgr *getDatabaseFile() { return getFile(_databaseFileIdx); }
int getQueryFileIdx() const { return _queryFileIdx; } int getQueryFileIdx() const { return _queryFileIdx; }
void setQueryFileIdx(int idx) { _queryFileIdx = idx; } void setQueryFileIdx(int idx) { _queryFileIdx = idx; }
int getDatabaseFileIdx() const { return _databaseFileIdx; } int getDatabaseFileIdx() const { return _databaseFileIdx; }
......
...@@ -12,13 +12,7 @@ ContextMap::ContextMap() ...@@ -12,13 +12,7 @@ ContextMap::ContextMap()
// map requires sorted input // map requires sorted input
setSortedInput(true); setSortedInput(true);
setLeftJoin(true); setLeftJoin(true);
setColumnOpsMethods(true);
// default to BED score column
setColumn(5);
// default to "sum"
setColumnOperation("sum");
// default to "." as a NULL value
setNullValue('.');
} }
ContextMap::~ContextMap() ContextMap::~ContextMap()
...@@ -44,75 +38,22 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { ...@@ -44,75 +38,22 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
if (isUsed(_i - _skipFirstArgs)) { if (isUsed(_i - _skipFirstArgs)) {
continue; continue;
} }
else if (strcmp(_argv[_i], "-o") == 0) { if (strcmp(_argv[_i], "-c") == 0) {
if (!handle_o()) return false; //bypass intersect's use of the -c option, because -c
} //means writeCount for intersect, but means columns for map.
else if (strcmp(_argv[_i], "-c") == 0) { if (!ContextBase::handle_c()) return false;
if (!handle_c()) return false; }
}
else if (strcmp(_argv[_i], "-null") == 0) {
if (!handle_null()) return false;
}
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
bool ContextMap::isValidState()
{
if (!ContextIntersect::isValidState()) {
return false;
} }
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
if (getDatabaseFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
//throw Error
cerr << endl << "*****"
<< endl
<< "***** ERROR: BAM database file not currently supported for the map tool."
<< endl;
exit(1);
}
// TODO
// enforce any specific checks for Map.
return true;
}
// for map, -c is the column upon which to operate
bool ContextMap::handle_c()
{
if ((_i+1) < _argc) {
setColumn(atoi(_argv[_i + 1]));
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
}
// for map, -o is the operation to apply to the column (-c)
bool ContextMap::handle_o()
{
if ((_i+1) < _argc) {
setColumnOperation(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
}
// for map, -null is a NULL vakue assigned
// when no overlaps are detected.
bool ContextMap::handle_null()
{
if ((_i+1) < _argc) {
setNullValue(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
}
return true;
} }
//
//
//bool ContextMap::isValidState()
//{
// if (!ContextIntersect::isValidState()) {
// return false;
// }
//}
//
//
...@@ -9,30 +9,20 @@ ...@@ -9,30 +9,20 @@
#define CONTEXTMAP_H_ #define CONTEXTMAP_H_
#include "ContextIntersect.h" #include "ContextIntersect.h"
#include "KeyListOps.h"
class ContextMap : public ContextIntersect { class ContextMap : public ContextIntersect {
public: public:
ContextMap(); ContextMap();
virtual ~ContextMap(); virtual ~ContextMap();
virtual bool isValidState(); // virtual bool isValidState();
//
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs); virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
//
int getColumn() const { return _column; }
void setColumn(int column) { _column = column; }
const QuickString & getColumnOperation() const { return _columnOperation; }
void setColumnOperation(const QuickString & operation) { _columnOperation = operation; }
const QuickString & getNullValue() const { return _nullValue; }
void setNullValue(const QuickString & nullValue) { _nullValue = nullValue; }
virtual bool hasIntersectMethods() const { return true; } virtual bool hasIntersectMethods() const { return true; }
//
private: private:
virtual bool handle_c();
virtual bool handle_o();
virtual bool handle_null();
}; };
......
...@@ -9,6 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \ ...@@ -9,6 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src/ \ -I$(UTILITIES_DIR)/BamTools/src/ \
......
...@@ -172,5 +172,10 @@ const QuickString &BamRecord::getField(int fieldNum) const ...@@ -172,5 +172,10 @@ const QuickString &BamRecord::getField(int fieldNum) const
return Bed6Interval::getField(fieldNum); return Bed6Interval::getField(fieldNum);
} }
bool BamRecord::isNumericField(int fieldNum) {
//TBD: As with getField, this isn't defined for BAM.
return (fieldNum > 6 ? false : Bed6Interval::isNumericField(fieldNum));
}
...@@ -40,6 +40,7 @@ public: ...@@ -40,6 +40,7 @@ public:
virtual const QuickString &getField(int fieldNum) const; virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 12; } virtual int getNumFields() const { return 12; }
static bool isNumericField(int fieldNum);
protected: protected:
BamTools::BamAlignment _bamAlignment; BamTools::BamAlignment _bamAlignment;
......
...@@ -146,3 +146,29 @@ const QuickString &Bed12Interval::getField(int fieldNum) const ...@@ -146,3 +146,29 @@ const QuickString &Bed12Interval::getField(int fieldNum) const
} }
} }
bool Bed12Interval::isNumericField(int fieldNum) {
switch (fieldNum) {
case 7:
return true;
break;
case 8:
return true;
break;
case 9:
return false;
break;
case 10:
return true;
break;
case 11:
return false;
break;
case 12:
return false;
break;
default:
return Bed6Interval::isNumericField(fieldNum);
break;
}
}
...@@ -54,6 +54,7 @@ public: ...@@ -54,6 +54,7 @@ public:
virtual const QuickString &getField(int fieldNum) const; virtual const QuickString &getField(int fieldNum) const;
virtual int getNumFields() const { return 12; } virtual int getNumFields() const { return 12; }
static bool isNumericField(int fieldNum);
protected: protected:
......
...@@ -79,3 +79,23 @@ const QuickString &Bed3Interval::getField(int fieldNum) const ...@@ -79,3 +79,23 @@ const QuickString &Bed3Interval::getField(int fieldNum) const
break; break;
} }
} }
bool Bed3Interval::isNumericField(int fieldNum) {
switch (fieldNum) {
case 1:
return false; //chrom
break;
case 2:
return true; //startPos
break;
case 3:
return true; //endPos
break;
default:
cerr << endl << "*****" << endl
<< "*****ERROR: requested invalid column " << fieldNum << ". Exiting." << endl
<< endl << "*****" << endl;
exit(1);
break;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment