diff --git a/src/mapFile/mapFile.cpp b/src/mapFile/mapFile.cpp index 360b6d69697713b770b7951bfd1dfe55bf15e648..3bd133d2b4fb13c01d58eecaa97dd9e48e091a37 100644 --- a/src/mapFile/mapFile.cpp +++ b/src/mapFile/mapFile.cpp @@ -55,187 +55,70 @@ bool FileMap::mapFiles() //} else { //} //_recordOutputMgr->printKeyAndTerminate(hitSet); - _recordOutputMgr->printRecord(hitSet.getKey(), MapHits(hitSet)); + SummarizeHits(hitSet); + _recordOutputMgr->printRecord(hitSet.getKey(), _output); } return true; } void FileMap::ExtractColumnFromHits(RecordKeyList &hits) { - for (RecordKeyList::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { - try { - ostringstream startString; - startString << iter->value()->getStartPos(); - //_column_vec.push_back(iter->value().fields.at(_column)); - _column_vec.push_back(startString.str()); - } - catch(std::out_of_range& e) { - exit(1); - } - /* - catch(std::out_of_range& e) { - cerr << endl << "*****" << endl - << "*****ERROR: requested column (" - << _column + 1 - << ") exceeds the number of columns in file at line " - << _bedA->_lineNum << ". Exiting." - << endl << "*****" << endl; - exit(1); - } - */ + _column_vec.clear(); + RecordKeyList::const_iterator_type iter = hits.begin(); + for (; iter != hits.end(); iter = hits.next()) + { + _column_vec.push_back(iter->value()->getField(_context->getColumn()).str()); } -} +} -string FileMap::MapHits(RecordKeyList &hits) { - - ostringstream output; - if (hits.size() == 0) - // FIX ME - return "-1"; - //return _nullValue; +void FileMap::SummarizeHits(RecordKeyList &hits) { - ExtractColumnFromHits(hits); + const QuickString & operation = _context->getColumnOperation(); + _output.clear(); + + if (hits.size() == 0) { + if (operation == "count" || operation == "count_distinct") + _output.append("0"); + else + _output.append(_context->getNullValue().str()); + return; + } - string operation = _context->getColumnOperation(); + _tmp_output.str(""); + _tmp_output.clear(); + + ExtractColumnFromHits(hits); VectorOps vo(_column_vec); if (operation == "sum") - output << setprecision (PRECISION) << vo.GetSum(); + _tmp_output << setprecision (PRECISION) << vo.GetSum(); else if (operation == "mean") - output << setprecision (PRECISION) << vo.GetMean(); + _tmp_output << setprecision (PRECISION) << vo.GetMean(); else if (operation == "median") - output << setprecision (PRECISION) << vo.GetMedian(); + _tmp_output << setprecision (PRECISION) << vo.GetMedian(); else if (operation == "min") - output << setprecision (PRECISION) << vo.GetMin(); + _tmp_output << setprecision (PRECISION) << vo.GetMin(); else if (operation == "max") - output << setprecision (PRECISION) << vo.GetMax(); + _tmp_output << setprecision (PRECISION) << vo.GetMax(); + else if (operation == "absmin") + _tmp_output << setprecision (PRECISION) << vo.GetAbsMin(); + else if (operation == "absmax") + _tmp_output << setprecision (PRECISION) << vo.GetAbsMax(); else if (operation == "mode") - output << vo.GetMode(); + _tmp_output << vo.GetMode(); else if (operation == "antimode") - output << vo.GetAntiMode(); + _tmp_output << vo.GetAntiMode(); else if (operation == "count") - output << setprecision (PRECISION) << vo.GetCount(); + _tmp_output << setprecision (PRECISION) << vo.GetCount(); else if (operation == "count_distinct") - output << setprecision (PRECISION) << vo.GetCountDistinct(); + _tmp_output << setprecision (PRECISION) << vo.GetCountDistinct(); else if (operation == "collapse") - output << vo.GetCollapse(); + _tmp_output << vo.GetCollapse(); else if (operation == "distinct") - output << vo.GetDistinct(); + _tmp_output << vo.GetDistinct(); else { cerr << "ERROR: " << operation << " is an unrecoginzed operation\n"; exit(1); } - _column_vec.clear(); - return output.str(); -} - - -/* -const int PRECISION = 21; -double GetUserColumn(const string s); - -// Constructor -BedMap::BedMap(string bedAFile, string bedBFile, int column, string operation, - float overlapFraction, bool sameStrand, - bool diffStrand, bool reciprocal, - bool choseNullValue, string nullValue, - bool printHeader) -{ - - _bedAFile = bedAFile; - _bedBFile = bedBFile; - _column = column - 1; // user's request is 1-based - _operation = operation; - _overlapFraction = overlapFraction; - _sameStrand = sameStrand; - _diffStrand = diffStrand; - _reciprocal = reciprocal; - _nullValue = nullValue; - _printHeader = printHeader; - - if (!choseNullValue && operation == "count") - _nullValue = "0"; - Map(); -} - -// Destructor -BedMap::~BedMap(void) -{} - -void BedMap::Map() { + _output.append(_tmp_output.str()); - // create new BED file objects for A and B - _bedA = new BedFile(_bedAFile); - _bedB = new BedFile(_bedBFile); - - // use the chromsweep algorithm to detect overlaps on the fly. - ChromSweep sweep = ChromSweep(_bedA, _bedB, - _sameStrand, _diffStrand, - _overlapFraction, _reciprocal, - false, _printHeader); - - pair<BED, vector<BED> > hit_set; - hit_set.second.reserve(10000); - while (sweep.Next(hit_set)) { - string result = MapHits(hit_set.first, hit_set.second); - _bedA->reportBedTab(hit_set.first); - printf("%s\n", result.c_str()); - } -} - - -string BedMap::MapHits(const BED &a, const vector<BED> &hits) { - - ostringstream output; - if (hits.size() == 0) - return _nullValue; - - ExtractColumnFromHits(hits); - VectorOps vo(_column_vec); - if (_operation == "sum") - output << setprecision (PRECISION) << vo.GetSum(); - else if (_operation == "mean") - output << setprecision (PRECISION) << vo.GetMean(); - else if (_operation == "median") - output << setprecision (PRECISION) << vo.GetMedian(); - else if (_operation == "min") - output << setprecision (PRECISION) << vo.GetMin(); - else if (_operation == "max") - output << setprecision (PRECISION) << vo.GetMax(); - else if (_operation == "mode") - output << vo.GetMode(); - else if (_operation == "antimode") - output << vo.GetAntiMode(); - else if (_operation == "count") - output << setprecision (PRECISION) << vo.GetCount(); - else if (_operation == "count_distinct") - output << setprecision (PRECISION) << vo.GetCountDistinct(); - else if (_operation == "collapse") - output << vo.GetCollapse(); - else if (_operation == "distinct") - output << vo.GetDistinct(); - else { - cerr << "ERROR: " << _operation << " is an unrecoginzed operation\n"; - exit(1); - } - _column_vec.clear(); - return output.str(); -} - - -void BedMap::ExtractColumnFromHits(const vector<BED> &hits) { - for (size_t i = 0; i < hits.size(); ++i) { - try { - _column_vec.push_back(hits[i].fields.at(_column)); - } - catch(std::out_of_range& e) { - cerr << endl << "*****" << endl - << "*****ERROR: requested column (" - << _column + 1 - << ") exceeds the number of columns in file at line " - << _bedA->_lineNum << ". Exiting." - << endl << "*****" << endl; - exit(1); - } - } } -*/ diff --git a/src/mapFile/mapFile.h b/src/mapFile/mapFile.h index 57e97556ceacdb41541a3e97a7fb20e667fa2d17..cb1da082182baec7a78fd12b480ef684fee57701 100644 --- a/src/mapFile/mapFile.h +++ b/src/mapFile/mapFile.h @@ -42,11 +42,13 @@ private: vector<string> _column_vec; // vector to hold current column's worth of data + ostringstream _tmp_output; + QuickString _output; // placeholder for the results of mapping B to each a in A. //------------------------------------------------ // private methods //------------------------------------------------ void Map(); - string MapHits(RecordKeyList &hits); + void SummarizeHits(RecordKeyList &hits); void ExtractColumnFromHits(RecordKeyList &hits); }; diff --git a/src/mapFile/mapMain.cpp b/src/mapFile/mapMain.cpp index bc437edb25380782bf39b58ad4e2323ed18213f3..fe87c9f3d96cc6b1b98b69607f6a40affab05597 100644 --- a/src/mapFile/mapMain.cpp +++ b/src/mapFile/mapMain.cpp @@ -191,7 +191,7 @@ void map_help(void) { cerr << "\t-o\t" << "Specify the operation that should be applied to -c." << endl; cerr << "\t\t Valid operations:" << endl; - cerr << "\t\t sum, min, max," << endl; + cerr << "\t\t sum, min, max, absmin, absmax," << endl; cerr << "\t\t mean, median," << endl; cerr << "\t\t collapse (i.e., print a comma separated list (duplicates allowed)), " << endl; cerr << "\t\t distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl; @@ -215,6 +215,9 @@ void map_help(void) { cerr << "\t\tthat overlap A on the _opposite_ strand." << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; + cerr << "\t-g\t" << "Provide a genome file to enforce consistent chromosome sort order" << endl; + cerr <<"\t\tacross input files." << endl << endl; + cerr << "\t-null\t" << "The value to print if no overlaps are found for an A interval." << endl; cerr << "\t\t- Default - \".\"" << endl << endl;