diff --git a/Makefile b/Makefile index 139988be1dae1d5c62c3a8148e28fbdb8ed18a4b..267674880c03306bea22789b9e2a518ab8b479fe 100644 --- a/Makefile +++ b/Makefile @@ -78,6 +78,7 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \ $(SRC_DIR)/utils/gzstream \ $(SRC_DIR)/utils/fileType \ $(SRC_DIR)/utils/bedFilePE \ + $(SRC_DIR)/utils/KeyListOps \ $(SRC_DIR)/utils/NewChromsweep \ $(SRC_DIR)/utils/sequenceUtilities \ $(SRC_DIR)/utils/tabFile \ diff --git a/src/mapFile/Makefile b/src/mapFile/Makefile index 17bb42df2d5c71d7eae0093ef8013c57c3fce42d..862824275d5b7559ed80993539f051cb3eb27d18 100644 --- a/src/mapFile/Makefile +++ b/src/mapFile/Makefile @@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ -I$(UTILITIES_DIR)/RecordOutputMgr/ \ + -I$(UTILITIES_DIR)/KeyListOps/ \ -I$(UTILITIES_DIR)/NewChromsweep \ -I$(UTILITIES_DIR)/VectorOps \ -I$(UTILITIES_DIR)/BinTree \ diff --git a/src/mapFile/mapFile.cpp b/src/mapFile/mapFile.cpp index 88dcc26fe8eb24eacf8108a577e8ab4e757b201b..7cff53109b6fff4d5184deb117d8a879ad39cf82 100644 --- a/src/mapFile/mapFile.cpp +++ b/src/mapFile/mapFile.cpp @@ -21,11 +21,14 @@ const int PRECISION = 21; FileMap::FileMap(ContextMap *context) : _context(context), _blockMgr(NULL), - _recordOutputMgr(NULL) + _recordOutputMgr(NULL), + _colOps(_context->getColOps()) { _blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal()); _recordOutputMgr = new RecordOutputMgr(); _recordOutputMgr->init(_context); + _keyListOps.setNullValue(_context->getNullValue()); + _keyListOps.setDelimStr(_context->getDelim()); } FileMap::~FileMap(void) { @@ -43,78 +46,174 @@ bool FileMap::mapFiles() } RecordKeyList hitSet; while (sweep.next(hitSet)) { + _outputValues.clear(); if (_context->getObeySplits()) { RecordKeyList keySet(hitSet.getKey()); RecordKeyList resultSet(hitSet.getKey()); _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); - SummarizeHits(resultSet); - _recordOutputMgr->printRecord(resultSet.getKey(), _output); + calculateOutput(resultSet); + _recordOutputMgr->printRecord(resultSet.getKey(), _outputValues); } else { - SummarizeHits(hitSet); - _recordOutputMgr->printRecord(hitSet.getKey(), _output); + calculateOutput(hitSet); + _recordOutputMgr->printRecord(hitSet.getKey(), _outputValues); } } return true; } -void FileMap::ExtractColumnFromHits(RecordKeyList &hits) { - _column_vec.clear(); - RecordKeyList::const_iterator_type iter = hits.begin(); - for (; iter != hits.end(); iter = hits.next()) - { - _column_vec.push_back(iter->value()->getField(_context->getColumn()).str()); - } -} - -void FileMap::SummarizeHits(RecordKeyList &hits) { - - const QuickString & operation = _context->getColumnOperation(); - _output.clear(); - - if (hits.size() == 0) { - if (operation == "count" || operation == "count_distinct") - _output.append("0"); - else - _output.append(_context->getNullValue().str()); - return; - } - - _tmp_output.str(""); - _tmp_output.clear(); - - ExtractColumnFromHits(hits); - - VectorOps vo(_column_vec); - if (operation == "sum") - _tmp_output << setprecision (PRECISION) << vo.GetSum(); - else if (operation == "mean") - _tmp_output << setprecision (PRECISION) << vo.GetMean(); - else if (operation == "median") - _tmp_output << setprecision (PRECISION) << vo.GetMedian(); - else if (operation == "min") - _tmp_output << setprecision (PRECISION) << vo.GetMin(); - else if (operation == "max") - _tmp_output << setprecision (PRECISION) << vo.GetMax(); - else if (operation == "absmin") - _tmp_output << setprecision (PRECISION) << vo.GetAbsMin(); - else if (operation == "absmax") - _tmp_output << setprecision (PRECISION) << vo.GetAbsMax(); - else if (operation == "mode") - _tmp_output << vo.GetMode(); - else if (operation == "antimode") - _tmp_output << vo.GetAntiMode(); - else if (operation == "count") - _tmp_output << setprecision (PRECISION) << vo.GetCount(); - else if (operation == "count_distinct") - _tmp_output << setprecision (PRECISION) << vo.GetCountDistinct(); - else if (operation == "collapse") - _tmp_output << vo.GetCollapse(); - else if (operation == "distinct") - _tmp_output << vo.GetDistinct(); - else { - cerr << "ERROR: " << operation << " is an unrecognized operation\n"; - exit(1); - } - _output.append(_tmp_output.str()); +void FileMap::calculateOutput(RecordKeyList &hits) +{ + //loop through all requested columns, and for each one, call the method needed + //for the operation specified. + _keyListOps.setKeyList(&hits); + + double val = 0.0; + for (int i=0; i < (int)_colOps.size(); i++) { + int col = _colOps[i].first; + KeyListOps::OP_TYPES opCode = _colOps[i].second; + + _keyListOps.setColumn(col); + switch (opCode) { + case KeyListOps::SUM: + val = _keyListOps.getSum(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::MEAN: + val = _keyListOps.getMean(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::STDDEV: + val = _keyListOps.getStddev(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::SAMPLE_STDDEV: + val = _keyListOps.getSampleStddev(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::MEDIAN: + val = _keyListOps.getMedian(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::MODE: + _outputValues.append(_keyListOps.getMode()); + break; + + case KeyListOps::ANTIMODE: + _outputValues.append(_keyListOps.getAntiMode()); + break; + + case KeyListOps::MIN: + val = _keyListOps.getMin(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::MAX: + val = _keyListOps.getMax(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::ABSMIN: + val = _keyListOps.getAbsMin(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + case KeyListOps::ABSMAX: + val = _keyListOps.getAbsMax(); + if (isnan(val)) { + _outputValues.append(_context->getNullValue()); + } else { + _outputValues.append(val); + } + break; + + case KeyListOps::COUNT: + _outputValues.append(_keyListOps.getCount()); + break; + + case KeyListOps::DISTINCT: + _outputValues.append(_keyListOps.getDistinct()); + break; + + case KeyListOps::COUNT_DISTINCT: + _outputValues.append(_keyListOps.getCountDistinct()); + break; + + case KeyListOps::DISTINCT_ONLY: + _outputValues.append(_keyListOps.getDistinctOnly()); + break; + + case KeyListOps::COLLAPSE: + _outputValues.append(_keyListOps.getCollapse()); + break; + + case KeyListOps::CONCAT: + _outputValues.append(_keyListOps.getConcat()); + break; + + case KeyListOps::FREQ_ASC: + _outputValues.append(_keyListOps.getFreqAsc()); + break; + + case KeyListOps::FREQ_DESC: + _outputValues.append(_keyListOps.getFreqDesc()); + break; + + case KeyListOps::FIRST: + _outputValues.append(_keyListOps.getFirst()); + break; + + case KeyListOps::LAST: + _outputValues.append(_keyListOps.getLast()); + break; + + case KeyListOps::INVALID: + default: + // Any unrecognized operation should have been handled already in the context validation. + // It's thus unnecessary to handle it here, but throw an error to help us know if future + // refactoring or code changes accidentally bypass the validation phase. + cerr << "ERROR: Invalid operation given for column " << col << ". Exiting..." << endl; + break; + } + //if this isn't the last column, add a tab. + if (i < (int)_colOps.size() -1) { + _outputValues.append('\t'); + } + } } diff --git a/src/mapFile/mapFile.h b/src/mapFile/mapFile.h index cb1da082182baec7a78fd12b480ef684fee57701..e2143ef0fb110c2bb3fbde3500b3a5ee25403826 100644 --- a/src/mapFile/mapFile.h +++ b/src/mapFile/mapFile.h @@ -18,10 +18,11 @@ using namespace std; #include <iomanip> #include "VectorOps.h" #include "RecordKeyList.h" +#include "KeyListOps.h" +#include "ContextMap.h" using namespace std; -class ContextMap; class BlockMgr; class RecordOutputMgr; @@ -35,90 +36,13 @@ public: private: ContextMap *_context; - Record *_queryRec; - Record *_databaseRec; BlockMgr *_blockMgr; RecordOutputMgr *_recordOutputMgr; + KeyListOps _keyListOps; + const ContextMap::colOpsType & _colOps; + QuickString _outputValues; // placeholder for the results of mapping B to each a in A. - vector<string> _column_vec; // vector to hold current column's worth of data - - ostringstream _tmp_output; - QuickString _output; // placeholder for the results of mapping B to each a in A. - //------------------------------------------------ - // private methods - //------------------------------------------------ - void Map(); - void SummarizeHits(RecordKeyList &hits); - void ExtractColumnFromHits(RecordKeyList &hits); - + void calculateOutput(RecordKeyList &hits); }; #endif /* MAPFILE_H */ - - -/* -#include "bedFile.h" -#include "chromsweep.h" -#include "VectorOps.h" -#include "api/BamReader.h" -#include "api/BamWriter.h" -#include "api/BamAux.h" -#include "BamAncillary.h" -using namespace BamTools; - - -#include <vector> -#include <iostream> -#include <algorithm> -#include <numeric> -#include <fstream> -#include <iomanip> -#include <stdlib.h> -using namespace std; - - - -class BedMap { - -public: - - // constructor - BedMap(string bedAFile, string bedBFile, int column, string operation, - float overlapFraction, bool sameStrand, - bool diffStrand, bool reciprocal, - bool choseNullValue, string nullValue, - bool printHeader); - - // destructor - ~BedMap(void); - -private: - - //------------------------------------------------ - // private attributes - //------------------------------------------------ - string _bedAFile; - string _bedBFile; - int _column; - string _operation; - bool _sameStrand; - bool _diffStrand; - bool _reciprocal; - float _overlapFraction; - string _nullValue; - bool _printHeader; - - // instance of a bed file class. - BedFile *_bedA, *_bedB; - - vector<string> _column_vec; // vector to hold current column's worth of data - - //------------------------------------------------ - // private methods - //------------------------------------------------ - void Map(); - string MapHits(const BED &a, const vector<BED> &hits); - void ExtractColumnFromHits(const vector<BED> &hits); -}; -*/ -//#endif /* MAPFILE_H */ diff --git a/src/mapFile/mapMain.cpp b/src/mapFile/mapMain.cpp index a9eeb36990267c69e78b473f8824483b75629cb6..f08e56b3905727b3333e4743e53b9bb9e9aaa239 100644 --- a/src/mapFile/mapMain.cpp +++ b/src/mapFile/mapMain.cpp @@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) { return retVal ? 0 : 1; } - -/* -int map_main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedAFile; - string bedBFile; - int column = 5; - string operation = "sum"; - string nullValue = "."; - - // input arguments - float overlapFraction = 1E-9; - - bool haveBedA = false; - bool haveBedB = false; - bool haveColumn = false; - bool haveOperation = false; - bool haveFraction = false; - bool reciprocalFraction = false; - bool sameStrand = false; - bool diffStrand = false; - bool printHeader = false; - bool choseNullValue = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) map_help(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-a", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedB = true; - bedBFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-c", 2, parameterLength)) { - if ((i+1) < argc) { - haveColumn = true; - column = atoi(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-o", 2, parameterLength)) { - if ((i+1) < argc) { - haveOperation = true; - operation = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-f", 2, parameterLength)) { - if ((i+1) < argc) { - haveFraction = true; - overlapFraction = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-r", 2, parameterLength)) { - reciprocalFraction = true; - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - sameStrand = true; - } - else if (PARAMETER_CHECK("-S", 2, parameterLength)) { - diffStrand = true; - } - else if (PARAMETER_CHECK("-null", 5, parameterLength)) { - nullValue = argv[i + 1]; - choseNullValue = true; - i++; - } - else if(PARAMETER_CHECK("-header", 7, parameterLength)) { - printHeader = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBedA || !haveBedB) { - cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; - showHelp = true; - } - - if (reciprocalFraction && !haveFraction) { - cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl; - showHelp = true; - } - - if (sameStrand && diffStrand) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - - BedMap *bm = new BedMap(bedAFile, bedBFile, column, operation, - overlapFraction, sameStrand, - diffStrand, reciprocalFraction, - choseNullValue, nullValue, - printHeader); - delete bm; - return 0; - } - else { - map_help(); - return 0; - } -} -*/ - void map_help(void) { cerr << "\nTool: bedtools map (aka mapBed)" << endl; diff --git a/src/utils/Contexts/ContextBase.cpp b/src/utils/Contexts/ContextBase.cpp index cd30b203881b61bfa81ec85ec6ac069bdceb2640..1f0c7a172e9d7af67377176dc400766bba43e276 100644 --- a/src/utils/Contexts/ContextBase.cpp +++ b/src/utils/Contexts/ContextBase.cpp @@ -57,15 +57,6 @@ ContextBase::ContextBase() _programNames["intersect"] = INTERSECT; _programNames["sample"] = SAMPLE; _programNames["map"] = MAP; - - _validScoreOps.insert("sum"); - _validScoreOps.insert("max"); - _validScoreOps.insert("min"); - _validScoreOps.insert("mean"); - _validScoreOps.insert("mode"); - _validScoreOps.insert("median"); - _validScoreOps.insert("antimode"); - _validScoreOps.insert("collapse"); } ContextBase::~ContextBase() diff --git a/src/utils/Contexts/ContextBase.h b/src/utils/Contexts/ContextBase.h index 872193fd5d8724538abf722e8e22b4cbfdf7efff..7846f6218f02e184248ceeb228f6dddb5206e48a 100644 --- a/src/utils/Contexts/ContextBase.h +++ b/src/utils/Contexts/ContextBase.h @@ -191,15 +191,11 @@ protected: int _bamHeaderAndRefIdx; int _maxNumDatabaseFields; bool _useFullBamTags; - QuickString _columnOperation; - int _column; - QuickString _nullValue; bool _reportCount; int _maxDistance; bool _reportNames; bool _reportScores; QuickString _scoreOp; - set<QuickString> _validScoreOps; int _numOutputRecords; diff --git a/src/utils/Contexts/ContextIntersect.h b/src/utils/Contexts/ContextIntersect.h index 0144a1210eef34278c19ac9fe529fd74db9ccfe6..b066e9465eac744b8bed21dcd3c39c935407345c 100644 --- a/src/utils/Contexts/ContextIntersect.h +++ b/src/utils/Contexts/ContextIntersect.h @@ -21,6 +21,8 @@ public: //NOTE: Query and database files will only be marked as such by either the //parseCmdArgs method, or by explicitly setting them. + FileRecordMgr *getQueryFile() { return getFile(_queryFileIdx); } + FileRecordMgr *getDatabaseFile() { return getFile(_databaseFileIdx); } int getQueryFileIdx() const { return _queryFileIdx; } void setQueryFileIdx(int idx) { _queryFileIdx = idx; } int getDatabaseFileIdx() const { return _databaseFileIdx; } diff --git a/src/utils/Contexts/ContextMap.cpp b/src/utils/Contexts/ContextMap.cpp index d94d08884aa1c3e2caef93939b84179b6332ff80..8b2027241934986a3abfe083cfa41b37e35c2b79 100644 --- a/src/utils/Contexts/ContextMap.cpp +++ b/src/utils/Contexts/ContextMap.cpp @@ -8,15 +8,16 @@ #include "ContextMap.h" ContextMap::ContextMap() +: _delimStr(",") { // map requires sorted input setSortedInput(true); setLeftJoin(true); // default to BED score column - setColumn(5); + setColumns("5"); // default to "sum" - setColumnOperation("sum"); + setOperations("sum"); // default to "." as a NULL value setNullValue('.'); } @@ -53,6 +54,10 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) { else if (strcmp(_argv[_i], "-null") == 0) { if (!handle_null()) return false; } + else if (strcmp(_argv[_i], "-delim") == 0) { + if (!handle_delim()) return false; + } + } return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs); } @@ -66,23 +71,64 @@ bool ContextMap::isValidState() if (getDatabaseFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { //throw Error - cerr << endl << "*****" - << endl + cerr << endl << "*****" << endl << "***** ERROR: BAM database file not currently supported for the map tool." << endl; exit(1); } - // TODO - // enforce any specific checks for Map. + + + //get the strings from context containing the comma-delimited lists of columns + //and operations. Split both of these into vectors. Get the operation code + //for each operation string. Finally, make a vector of pairs, where the first + //member of each pair is a column number, and the second member is the code for the + //operation to perform on that column. + + vector<QuickString> columnsVec; + vector<QuickString> opsVec; + int numCols = Tokenize(_columns, columnsVec, ','); + int numOps = Tokenize(_operations, opsVec, ','); + + if (numOps < 1 || numCols < 1) { + cerr << endl << "*****" << endl + << "***** ERROR: There must be at least one column and at least one operation named." << endl; + return false; + } + if (numOps > 1 && numCols != numOps) { + cerr << endl << "*****" << endl + << "***** ERROR: There are " << numCols <<" columns given, but there are " << numOps << " operations. " << endl; + cerr << "\tPlease provide either a single operation that will be applied to all listed columns, " << endl; + cerr << "\tor an operation for each column." << endl; + return false; + } + KeyListOps keyListOps; + for (int i=0; i < (int)columnsVec.size(); i++) { + int col = str2chrPos(columnsVec[i]); + + //check that the column number is valid + if (col < 1 || col > getDatabaseFile()->getNumFields()) { + cerr << endl << "*****" << endl << "***** ERROR: Requested column " << col << ", but database file " + << getDatabaseFileName() << " only has fields 1 - " << getDatabaseFile()->getNumFields() << "." << endl; + return false; + } + const QuickString &operation = opsVec.size() > 1 ? opsVec[i] : opsVec[0]; + KeyListOps::OP_TYPES opCode = keyListOps.getOpCode(operation); + if (opCode == KeyListOps::INVALID) { + cerr << endl << "*****" << endl + << "***** ERROR: " << operation << " is not a valid operation. " << endl; + return false; + } + _colOps.push_back(pair<int, KeyListOps::OP_TYPES>(col, opCode)); + } return true; } -// for map, -c is the column upon which to operate +// for map, -c is the string of columns upon which to operate bool ContextMap::handle_c() { if ((_i+1) < _argc) { - setColumn(atoi(_argv[_i + 1])); + setColumns(_argv[_i + 1]); markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); @@ -91,11 +137,11 @@ bool ContextMap::handle_c() } -// for map, -o is the operation to apply to the column (-c) +// for map, -o is the string of operations to apply to the columns (-c) bool ContextMap::handle_o() { if ((_i+1) < _argc) { - setColumnOperation(_argv[_i + 1]); + setOperations(_argv[_i + 1]); markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); @@ -116,3 +162,14 @@ bool ContextMap::handle_null() } return true; } + +bool ContextMap::handle_delim() +{ + if ((_i+1) < _argc) { + _delimStr = _argv[_i + 1]; + markUsed(_i - _skipFirstArgs); + _i++; + markUsed(_i - _skipFirstArgs); + } + return true; +} diff --git a/src/utils/Contexts/ContextMap.h b/src/utils/Contexts/ContextMap.h index b8ee57fdd8f6aff9fa0cc076a836985cbffb176f..460f93b29100cf4ad2d6fbc2bd5e9e1805956938 100644 --- a/src/utils/Contexts/ContextMap.h +++ b/src/utils/Contexts/ContextMap.h @@ -9,6 +9,7 @@ #define CONTEXTMAP_H_ #include "ContextIntersect.h" +#include "KeyListOps.h" class ContextMap : public ContextIntersect { public: @@ -18,21 +19,33 @@ public: virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs); - int getColumn() const { return _column; } - void setColumn(int column) { _column = column; } + const QuickString &getColumns() const { return _columns; } + void setColumns(const QuickString &columns) { _columns = columns; } - const QuickString & getColumnOperation() const { return _columnOperation; } - void setColumnOperation(const QuickString & operation) { _columnOperation = operation; } + const QuickString & getOperations() const { return _operations; } + void setOperations(const QuickString & operation) { _operations = operation; } const QuickString & getNullValue() const { return _nullValue; } void setNullValue(const QuickString & nullValue) { _nullValue = nullValue; } + const QuickString &getDelim() const { return _delimStr; } virtual bool hasIntersectMethods() const { return true; } + typedef vector<pair<int, KeyListOps::OP_TYPES> > colOpsType; + const colOpsType &getColOps() const { return _colOps; } + private: + QuickString _operations; + QuickString _columns; + QuickString _nullValue; + KeyListOps _keyListOps; + colOpsType _colOps; + QuickString _delimStr; + virtual bool handle_c(); virtual bool handle_o(); virtual bool handle_null(); + virtual bool handle_delim(); }; diff --git a/src/utils/Contexts/Makefile b/src/utils/Contexts/Makefile index 7ddc3c6c3cac4427c0578a33bc6822636bdb8a73..4b2ed4291d7bec5341c11eb7e82602dd7882de9e 100644 --- a/src/utils/Contexts/Makefile +++ b/src/utils/Contexts/Makefile @@ -9,6 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \ -I$(UTILITIES_DIR)/FileRecordTools/ \ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/KeyListOps/ \ -I$(UTILITIES_DIR)/GenomeFile/ \ -I$(UTILITIES_DIR)/BamTools/include \ -I$(UTILITIES_DIR)/BamTools/src/ \ diff --git a/src/utils/KeyListOps/KeyListOps.cpp b/src/utils/KeyListOps/KeyListOps.cpp new file mode 100644 index 0000000000000000000000000000000000000000..05a60402e867a641c616bb2b7d86cf599089d759 --- /dev/null +++ b/src/utils/KeyListOps/KeyListOps.cpp @@ -0,0 +1,405 @@ +/* + * KeyListOps.cpp + * + * Created on: Feb 6, 2014 + * Author: nek3d + */ + +#include "KeyListOps.h" +#include <cfloat> +#include <cmath> +#include <algorithm> + +KeyListOps::KeyListOps() +: _keyList(&_nullKeyList), + _column(1), + _nullVal("."), + _delimStr(","), + _iter(_nullKeyList.begin()) +{ + init(); + +} + +KeyListOps::KeyListOps(RecordKeyList *keyList, int column) +: _keyList(keyList), + _column(column), + _nullVal("."), + _delimStr(","), + _iter(keyList->begin()) +{ + init(); +} + +void KeyListOps::init() { + _opCodes["sum"] = SUM; + _opCodes["mean"] = MEAN; + _opCodes["stddev"] = STDDEV; + _opCodes["sample_stddev"] = SAMPLE_STDDEV; + _opCodes["median"] = MEDIAN; + _opCodes["mode"] = MODE; + _opCodes["antimode"] = ANTIMODE; + _opCodes["min"] = MIN; + _opCodes["max"] = MAX; + _opCodes["absmin"] = ABSMIN; + _opCodes["absmax"] = ABSMAX; + _opCodes["count"] = COUNT; + _opCodes["distinct"] = DISTINCT; + _opCodes["count_distinct"] = COUNT_DISTINCT; + _opCodes["distinct_only"] = DISTINCT_ONLY; + _opCodes["collapse"] = COLLAPSE; + _opCodes["concat"] = CONCAT; + _opCodes["freq_asc"] = FREQ_ASC; + _opCodes["freq_desc"] = FREQ_DESC; + _opCodes["first"] = FIRST; + _opCodes["last"] = LAST; +} + + +KeyListOps::~KeyListOps() { + +} + +KeyListOps::OP_TYPES KeyListOps::getOpCode(const QuickString &operation) const { + //If the operation does not exist, return INVALID. + //otherwise, return code for given operation. + map<QuickString, OP_TYPES>::const_iterator iter = _opCodes.find(operation); + if (iter == _opCodes.end()) { + return INVALID; + } + return iter->second; +} + +// return the total of the values in the vector +double KeyListOps::getSum() { + if (empty()) return NAN; + + double theSum = 0.0; + for (begin(); !end(); next()) { + theSum += getColValNum(); + } + return theSum; +} + +// return the average value in the vector +double KeyListOps::getMean() { + if (empty()) return NAN; + + return getSum() / (float)getCount(); +} + + + // return the standard deviation +double KeyListOps::getStddev() { + if (empty()) return NAN; + + double avg = getMean(); + double squareDiffSum = 0.0; + for (begin(); !end(); next()) { + double val = getColValNum(); + double diff = val - avg; + squareDiffSum += diff * diff; + } + return squareDiffSum / (float)getCount(); +} +// return the standard deviation +double KeyListOps::getSampleStddev() { + if (empty()) return NAN; + + double avg = getMean(); + double squareDiffSum = 0.0; + for (begin(); !end(); next()) { + double val = getColValNum(); + double diff = val - avg; + squareDiffSum += diff * diff; + } + return squareDiffSum / ((float)getCount() - 1.0); +} + +// return the median value in the vector +double KeyListOps::getMedian() { + if (empty()) return NAN; + + //get sorted vector. if even number of elems, return middle val. + //if odd, average of two. + toArray(true, ASC); + size_t count = getCount(); + if (count % 2) { + //odd number of elements. Take middle one. + return _numArray[count/2]; + } else { + //even numnber of elements. Take average of middle 2. + double sum = _numArray[count/2 -1] + _numArray[count/2]; + return sum / 2.0; + } +} + +// return the most common value in the vector +const QuickString &KeyListOps::getMode() { + if (empty()) return _nullVal; + + makeFreqMap(); + + //now pass through the freq map and keep track of which key has the highest occurance. + freqMapType::iterator maxIter = _freqMap.begin(); + int maxVal = 0; + for (; _freqIter != _freqMap.end(); _freqIter++) { + if (_freqIter->second > maxVal) { + maxIter = _freqIter; + maxVal = _freqIter->second; + } + } + _retStr = maxIter->first; + return _retStr; +} +// return the least common value in the vector +const QuickString &KeyListOps::getAntiMode() { + if (empty()) return _nullVal; + + makeFreqMap(); + + //now pass through the freq map and keep track of which key has the highest occurance. + freqMapType::iterator minIter = _freqMap.begin(); + int minVal = INT_MAX; + for (; _freqIter != _freqMap.end(); _freqIter++) { + if (_freqIter->second < minVal) { + minIter = _freqIter; + minVal = _freqIter->second; + } + } + _retStr = minIter->first; + return _retStr; +} +// return the minimum element of the vector +double KeyListOps::getMin() { + if (empty()) return NAN; + + double minVal = DBL_MAX; + for (begin(); !end(); next()) { + double currVal = getColValNum(); + minVal = (currVal < minVal) ? currVal : minVal; + } + return minVal; +} + +// return the maximum element of the vector +double KeyListOps::getMax() { + if (empty()) return NAN; + + double maxVal = DBL_MIN; + for (begin(); !end(); next()) { + double currVal = getColValNum(); + maxVal = (currVal > maxVal) ? currVal : maxVal; + } + return maxVal; +} + +// return the minimum absolute value of the vector +double KeyListOps::getAbsMin() { + if (empty()) return NAN; + + double minVal = DBL_MAX; + for (begin(); !end(); next()) { + double currVal = abs(getColValNum()); + minVal = (currVal < minVal) ? currVal : minVal; + } + return minVal; +} +// return the maximum absolute value of the vector +double KeyListOps::getAbsMax() { + if (empty()) return NAN; + + double maxVal = DBL_MIN; + for (begin(); !end(); next()) { + double currVal = abs(getColValNum()); + maxVal = (currVal > maxVal) ? currVal : maxVal; + } + return maxVal; +} +// return the count of element in the vector +uint32_t KeyListOps::getCount() { + return _keyList->size(); +} +// return a delimited list of the unique elements +const QuickString &KeyListOps::getDistinct() { + if (empty()) return _nullVal; + // separated list of unique values. If something repeats, only report once. + makeFreqMap(); + _retStr.clear(); + for (; _freqIter != _freqMap.end(); _freqIter++) { + if (_freqIter != _freqMap.begin()) _retStr += _delimStr; + _retStr.append(_freqIter->first); + } + return _retStr; +} + +const QuickString &KeyListOps::getDistinctOnly() { + if (empty()) return _nullVal; + + //separated list of only unique values. If item repeats, discard. + makeFreqMap(); + _retStr.clear(); + for (; _freqIter != _freqMap.end(); _freqIter++) { + if (_freqIter->second != 1) continue; + if (_freqIter != _freqMap.begin()) _retStr += _delimStr; + _retStr.append(_freqIter->first); + } + return _retStr; +} + +// return a the count of _unique_ elements in the vector +uint32_t KeyListOps::getCountDistinct() { + if (empty()) return 0; + + makeFreqMap(); + return _freqMap.size(); +} +// return a delimiter-separated list of elements +const QuickString &KeyListOps::getCollapse(const QuickString &delimiter) { + if (empty()) return _nullVal; + + //just put all items in one big separated list. + _retStr.clear(); + int i=0; + for (begin(); !end(); next()) { + if (i > 0) _retStr += _delimStr; + _retStr.append(getColVal()); + i++; + } + return _retStr; + +} +// return a concatenation of all elements in the vector +const QuickString &KeyListOps::getConcat() { + if (empty()) return _nullVal; + + //like collapse but w/o commas. Just a true concat of all vals. + //just swap out the delimChar with '' and call collapse, then + //restore the delimChar. + QuickString oldDelimStr(_delimStr); + _delimStr = ""; + getCollapse(); //this will store it's results in the _retStr method. + _delimStr = oldDelimStr; + return _retStr; +} + +// return a histogram of values and their freqs. in desc. order of frequency +const QuickString &KeyListOps::getFreqDesc() { + if (empty()) return _nullVal; + + //for each uniq val, report # occurances, in desc order. + makeFreqMap(); + //put freq map into multimap where key is the freq and val is the item. In other words, basically a reverse freq map. + histDescType hist; + for (; _freqIter != _freqMap.end(); _freqIter++) { + hist.insert(pair<int, QuickString>(_freqIter->second, _freqIter->first)); + } + //now iterate through the reverse map we just made and output it's pairs in val:key format. + _retStr.clear(); + for (histDescType::iterator histIter = hist.begin(); histIter != hist.end(); histIter++) { + if (histIter != hist.begin()) _retStr += _delimStr; + _retStr.append(histIter->second); + _retStr += ":"; + _retStr.append(histIter->first); + } + return _retStr; +} +// return a histogram of values and their freqs. in asc. order of frequency +const QuickString &KeyListOps::getFreqAsc() { + if (empty()) return _nullVal; + + //for each uniq val, report # occurances, in asc order. + makeFreqMap(); + //put freq map into multimap where key is the freq and val is the item. In other words, basically a reverse freq map. + histAscType hist; + for (; _freqIter != _freqMap.end(); _freqIter++) { + hist.insert(pair<int, QuickString>(_freqIter->second, _freqIter->first)); +// hist[*(_freqIter->second)] = _freqIter->first; + } + //now iterate through the reverse map we just made and output it's pairs in val:key format. + _retStr.clear(); + for (histAscType::iterator histIter = hist.begin(); histIter != hist.end(); histIter++) { + if (histIter != hist.begin()) _retStr += _delimStr; + _retStr.append(histIter->second); + _retStr += ":"; + _retStr.append(histIter->first); + } + return _retStr; +} +// return the first value in the list +const QuickString &KeyListOps::getFirst() { + if (empty()) return _nullVal; + + //just the first item. + begin(); + return getColVal(); +} +// return the last value in the list +const QuickString &KeyListOps::getLast() { + if (empty()) return _nullVal; + + //just the last item. + begin(); + for (size_t i = 0; i < getCount() -1; i++) { + next(); + } + return getColVal(); +} + +const QuickString &KeyListOps::getColVal() { + return _iter->value()->getField(_column); +} + +double KeyListOps::getColValNum() { + return atof(_iter->value()->getField(_column).c_str()); +} + +void KeyListOps::toArray(bool useNum, SORT_TYPE sortVal) { + + //TBD: optimize performance with better memory management. + if (useNum) { + _numArray.resize(_keyList->size()); + int i=0; + for (begin(); !end(); next()) { + _numArray[i] = getColValNum(); + i++; + } + } else { + _qsArray.resize(_keyList->size()); + int i=0; + for (begin(); !end(); next()) { + _qsArray[i] = getColVal(); + i++; + } + } + if (sortVal != UNSORTED) { + sortArray(useNum, sortVal == ASC); + } +} + +void KeyListOps::sortArray(bool useNum, bool ascOrder) +{ + if (useNum) { + if (ascOrder) { + sort(_numArray.begin(), _numArray.end(), less<double>()); + } else { + sort(_numArray.begin(), _numArray.end(), greater<double>()); + } + } else { + if (ascOrder) { + sort(_qsArray.begin(), _qsArray.end(), less<QuickString>()); + } else { + sort(_qsArray.begin(), _qsArray.end(), greater<QuickString>()); + } + } +} + +void KeyListOps::makeFreqMap() { + _freqMap.clear(); + + //make a map of values to their number of times occuring. + for (begin(); !end(); next()) { + _freqMap[getColVal()]++; + } + _freqIter = _freqMap.begin(); +} diff --git a/src/utils/KeyListOps/KeyListOps.h b/src/utils/KeyListOps/KeyListOps.h new file mode 100644 index 0000000000000000000000000000000000000000..e294f535f54355b26b6d2ffcac7e7f0f4f78c38a --- /dev/null +++ b/src/utils/KeyListOps/KeyListOps.h @@ -0,0 +1,117 @@ +/* + * KeyListOps.h + * + * Created on: Feb 6, 2014 + * Author: nek3d + */ + +#ifndef KEYLISTOPS_H_ +#define KEYLISTOPS_H_ + +using namespace std; + +#include <map> +#include <utility> //for pair +#include "QuickString.h" +#include <stdint.h> +#include "RecordKeyList.h" + +class KeyListOps { +public: + KeyListOps(); + KeyListOps(RecordKeyList *keyList, int column = 1); + ~KeyListOps(); + + + void setKeyList(RecordKeyList *keyList) { _keyList = keyList; } + void setColumn(int col) { _column = col; } + void setNullValue(const QuickString & nullVal) { _nullVal = nullVal; } + void setDelimStr(const QuickString &delimStr) { _delimStr = delimStr; } + + + typedef enum { SUM, MEAN, STDDEV, SAMPLE_STDDEV, MEDIAN, MODE, ANTIMODE, MIN, MAX, ABSMIN, ABSMAX, COUNT, DISTINCT, COUNT_DISTINCT, + DISTINCT_ONLY, COLLAPSE, CONCAT, FREQ_ASC, FREQ_DESC, FIRST, LAST, INVALID } OP_TYPES; + + OP_TYPES getOpCode(const QuickString &operation) const; + // return the total of the values in the vector + double getSum(); + // return the average value in the vector + double getMean(); + // return the standard deviation + double getStddev(); + // return the sample standard deviation + double getSampleStddev(); + // return the median value in the vector + double getMedian(); + // return the most common value in the vector + const QuickString &getMode(); + // return the least common value in the vector + const QuickString &getAntiMode(); + // return the minimum element of the vector + double getMin(); + // return the maximum element of the vector + double getMax(); + // return the minimum absolute value of the vector + double getAbsMin(); + // return the maximum absolute value of the vector + double getAbsMax(); + // return the count of element in the vector + uint32_t getCount(); + // return a the count of _unique_ elements in the vector + uint32_t getCountDistinct(); + // return only those elements that occur once + const QuickString &getDistinctOnly(); + // return a delimiter-separated list of elements + const QuickString & getCollapse(const QuickString & delimiter = ","); + // return a concatenation of all elements in the vector + const QuickString & getConcat(); + // return a comma-separated list of the _unique_ elements + const QuickString & getDistinct(); + // return a histogram of values and their freqs. in desc. order of frequency + const QuickString & getFreqDesc(); + // return a histogram of values and their freqs. in asc. order of frequency + const QuickString & getFreqAsc(); + // return the first value in the list + const QuickString & getFirst(); + // return the last value in the list + const QuickString & getLast(); + +private: + RecordKeyList *_keyList; + int _column; + QuickString _nullVal; + QuickString _delimStr; + QuickString _retStr; + + map<QuickString, OP_TYPES> _opCodes; + RecordKeyList _nullKeyList; //this has to exist just so we can initialize _iter, below. + RecordKeyList::const_iterator_type _iter; + + // Some methods need to put values into a vector, mostly for sorting. + vector<double> _numArray; + vector<QuickString> _qsArray; + + typedef map<QuickString, int> freqMapType; + freqMapType _freqMap; + freqMapType::iterator _freqIter; + + typedef enum { UNSORTED, ASC, DESC} SORT_TYPE; + + typedef multimap<int, QuickString, less<int> > histAscType; + typedef multimap<int, QuickString, greater<int> > histDescType; + void init(); + const QuickString &getColVal(); + double getColValNum(); + bool empty() { return _keyList->empty(); } + void begin() { _iter = _keyList->begin(); } + bool end() { return _iter == _keyList->end(); } + void next() { _iter = _keyList->next(); } + void toArray(bool useNum, SORT_TYPE sortVal = UNSORTED); + void sortArray(bool useNum, bool ascOrder); + void makeFreqMap(); + + +}; + + +#endif /* KEYLISTOPS_H_ */ diff --git a/src/utils/KeyListOps/Makefile b/src/utils/KeyListOps/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1797c83985ea50a7bec3979819b27cbd11d0b742 --- /dev/null +++ b/src/utils/KeyListOps/Makefile @@ -0,0 +1,39 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/general/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/Contexts/ \ + -I$(UTILITIES_DIR)/GenomeFile/ \ + -I$(UTILITIES_DIR)/FileRecordTools/ \ + -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \ + -I$(UTILITIES_DIR)/FileRecordTools/Records/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools/src/ \ + -I$(UTILITIES_DIR)/version/ + + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= KeyListOps.cpp KeyListOps.h +OBJECTS= KeyListOps.o +_EXT_OBJECTS= +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/KeyListOps.o + +.PHONY: clean \ No newline at end of file diff --git a/src/utils/general/Makefile b/src/utils/general/Makefile index 43dcfba076d000829c4f2842e16136f24e0ede32..0361fab41acde1e99e8268075d35b533dc3bfc8a 100644 --- a/src/utils/general/Makefile +++ b/src/utils/general/Makefile @@ -4,7 +4,7 @@ UTILITIES_DIR = ../../utils/ # ------------------- # define our includes # ------------------- -INCLUDES = +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ # ---------------------------------- # define our source and object files diff --git a/src/utils/general/QuickString.cpp b/src/utils/general/QuickString.cpp index 831f84ab9eaa3574ab316c9c0256531a4b3608f1..9e061866f62969778b829c247bad8d32345aad58 100644 --- a/src/utils/general/QuickString.cpp +++ b/src/utils/general/QuickString.cpp @@ -3,6 +3,7 @@ #include <cstdlib> #include <cstdio> #include "ParseTools.h" +#include "lineFileUtilities.h" QuickString::QuickString(size_t capacity) : _buffer(NULL), @@ -82,6 +83,35 @@ QuickString &QuickString::operator = (const QuickString & inBuf){ return *this; } +QuickString &QuickString::operator = (char val) { + clear(); + append(val); + return *this; +} +QuickString &QuickString::operator = (int val) { + clear(); + append(val); + return *this; +} + +QuickString &QuickString::operator = (uint32_t val) { + clear(); + append(val); + return *this; +} + +QuickString &QuickString::operator = (float val) { + clear(); + append(val); + return *this; +} + +QuickString &QuickString::operator = (double val) { + clear(); + append(val); + return *this; +} + QuickString &QuickString::operator += (const QuickString & inBuf) { @@ -107,6 +137,26 @@ QuickString &QuickString::operator += (const char *inBuf) return *this; } +QuickString &QuickString::operator += (int num) { + append(num); + return *this; +} + +QuickString &QuickString::operator += (uint32_t num) { + append(num); + return *this; +} + +QuickString &QuickString::operator += (float num) { + append(num); + return *this; +} + +QuickString &QuickString::operator += (double num) { + append(num); + return *this; +} + bool QuickString::operator == (const QuickString &qs) const { if ( _currSize != qs._currSize) { return false; @@ -194,6 +244,21 @@ void QuickString::append(const char *inBuf, size_t inBufLen) void QuickString::append(int num) { int2str(num, *this, true); } + +void QuickString::append(uint32_t num) { + int2str((int)num, *this, true); +} + +void QuickString::append(float num) { + append(ToString(num)); +} + +void QuickString::append(double num) { + append(ToString(num)); +} + + + QuickString &QuickString::assign(const char *inBuf, size_t inBufLen) { clear(); diff --git a/src/utils/general/QuickString.h b/src/utils/general/QuickString.h index 5fdc0fc4c77e8d6a01d1b157740a8d1052775054..a76e5ff223c9554ec42c09599cdb50e4c6b46aff 100644 --- a/src/utils/general/QuickString.h +++ b/src/utils/general/QuickString.h @@ -10,6 +10,7 @@ using namespace std; #include <string> +#include <stdint.h> #include <climits> #include <ostream> @@ -32,10 +33,19 @@ public: QuickString &operator = (const string &); QuickString &operator = (const char *); QuickString &operator = (const QuickString &); + QuickString &operator = (char); + QuickString &operator = (int); + QuickString &operator = (uint32_t); + QuickString &operator = (float); + QuickString &operator = (double); QuickString &operator += (const QuickString &); QuickString &operator += (const string &); QuickString &operator += (const char *); QuickString &operator += (char); + QuickString &operator += (int); + QuickString &operator += (uint32_t); + QuickString &operator += (float); + QuickString &operator += (double); friend ostream &operator << (ostream &out, const QuickString &str); bool operator == (const QuickString &) const; @@ -52,7 +62,16 @@ public: void append(const QuickString &str) { append(str.c_str(), str.size()); } void append(const char *buf, size_t bufLen); void append(char c); + + //These are not templated because float and double require a stringstream based + //implementation, while the integer append uses a much faster home-brewed algorithm + //for better performance. void append(int num); + void append(uint32_t num); + void append(float num); + void append(double num); + + QuickString &assign(const char *str, size_t n); void resize(size_t n, char c = '\0'); diff --git a/test/map/test-map.sh b/test/map/test-map.sh index 293d84e3f443ac7b2bbe405e36b77f752c7298f0..f62d364207c2612a368a30a513f0e5c9ff1462c6 100644 --- a/test/map/test-map.sh +++ b/test/map/test-map.sh @@ -499,10 +499,8 @@ echo " map.t33..\c" echo \ " ***** -*****ERROR: requested column 15 , but record only has fields 1 - 12. Exiting. - -*****" > exp -$BT map -a ivls.bed -b test.vcf -c 15 -o collapse 2> obs +***** ERROR: Requested column 15, but database file test.vcf only has fields 1 - 12." > exp +$BT map -a ivls.bed -b test.vcf -c 15 -o collapse 2>&1 > /dev/null | head -3> obs check obs exp rm obs exp @@ -624,12 +622,9 @@ echo " map.t41..\c" echo \ " ***** -*****ERROR: requested column 41 , but record only has fields 1 - 6. Exiting. - -*****" > exp -$BT map -a ivls.bed -b values5.bed -c 41 -o collapse 2> obs +***** ERROR: Requested column 41, but database file test.vcf only has fields 1 - 12." > exp +$BT map -a ivls.bed -b test.vcf -c 41 -o collapse 2>&1 > /dev/null | head -3> obs check obs exp - rm obs exp ########################################################### @@ -639,12 +634,9 @@ echo " map.t42..\c" echo \ " ***** -*****ERROR: requested column -1 , but record only has fields 1 - 6. Exiting. - -*****" > exp -$BT map -a ivls.bed -b values5.bed -c -1 -o collapse 2> obs +***** ERROR: Requested column -1, but database file test.vcf only has fields 1 - 12." > exp +$BT map -a ivls.bed -b test.vcf -c -1 -o collapse 2>&1 > /dev/null | head -3> obs check obs exp - rm obs exp ########################################################### @@ -654,12 +646,9 @@ echo " map.t43..\c" echo \ " ***** -*****ERROR: requested column 0 , but record only has fields 1 - 6. Exiting. - -*****" > exp -$BT map -a ivls.bed -b values5.bed -c 0 -o collapse 2> obs +***** ERROR: Requested column 0, but database file test.vcf only has fields 1 - 12." > exp +$BT map -a ivls.bed -b test.vcf -c 0 -o collapse 2>&1 > /dev/null | head -3> obs check obs exp - rm obs exp