Skip to content
Snippets Groups Projects
Commit 823bfa7f authored by arq5x's avatar arq5x
Browse files

map uses PFM.

parent ca1d3bdd
No related branches found
No related tags found
No related merge requests found
......@@ -55,187 +55,70 @@ bool FileMap::mapFiles()
//} else {
//}
//_recordOutputMgr->printKeyAndTerminate(hitSet);
_recordOutputMgr->printRecord(hitSet.getKey(), MapHits(hitSet));
SummarizeHits(hitSet);
_recordOutputMgr->printRecord(hitSet.getKey(), _output);
}
return true;
}
void FileMap::ExtractColumnFromHits(RecordKeyList &hits) {
for (RecordKeyList::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
try {
ostringstream startString;
startString << iter->value()->getStartPos();
//_column_vec.push_back(iter->value().fields.at(_column));
_column_vec.push_back(startString.str());
}
catch(std::out_of_range& e) {
exit(1);
}
/*
catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column ("
<< _column + 1
<< ") exceeds the number of columns in file at line "
<< _bedA->_lineNum << ". Exiting."
<< endl << "*****" << endl;
exit(1);
}
*/
_column_vec.clear();
RecordKeyList::const_iterator_type iter = hits.begin();
for (; iter != hits.end(); iter = hits.next())
{
_column_vec.push_back(iter->value()->getField(_context->getColumn()).str());
}
}
}
string FileMap::MapHits(RecordKeyList &hits) {
ostringstream output;
if (hits.size() == 0)
// FIX ME
return "-1";
//return _nullValue;
void FileMap::SummarizeHits(RecordKeyList &hits) {
ExtractColumnFromHits(hits);
const QuickString & operation = _context->getColumnOperation();
_output.clear();
if (hits.size() == 0) {
if (operation == "count" || operation == "count_distinct")
_output.append("0");
else
_output.append(_context->getNullValue().str());
return;
}
string operation = _context->getColumnOperation();
_tmp_output.str("");
_tmp_output.clear();
ExtractColumnFromHits(hits);
VectorOps vo(_column_vec);
if (operation == "sum")
output << setprecision (PRECISION) << vo.GetSum();
_tmp_output << setprecision (PRECISION) << vo.GetSum();
else if (operation == "mean")
output << setprecision (PRECISION) << vo.GetMean();
_tmp_output << setprecision (PRECISION) << vo.GetMean();
else if (operation == "median")
output << setprecision (PRECISION) << vo.GetMedian();
_tmp_output << setprecision (PRECISION) << vo.GetMedian();
else if (operation == "min")
output << setprecision (PRECISION) << vo.GetMin();
_tmp_output << setprecision (PRECISION) << vo.GetMin();
else if (operation == "max")
output << setprecision (PRECISION) << vo.GetMax();
_tmp_output << setprecision (PRECISION) << vo.GetMax();
else if (operation == "absmin")
_tmp_output << setprecision (PRECISION) << vo.GetAbsMin();
else if (operation == "absmax")
_tmp_output << setprecision (PRECISION) << vo.GetAbsMax();
else if (operation == "mode")
output << vo.GetMode();
_tmp_output << vo.GetMode();
else if (operation == "antimode")
output << vo.GetAntiMode();
_tmp_output << vo.GetAntiMode();
else if (operation == "count")
output << setprecision (PRECISION) << vo.GetCount();
_tmp_output << setprecision (PRECISION) << vo.GetCount();
else if (operation == "count_distinct")
output << setprecision (PRECISION) << vo.GetCountDistinct();
_tmp_output << setprecision (PRECISION) << vo.GetCountDistinct();
else if (operation == "collapse")
output << vo.GetCollapse();
_tmp_output << vo.GetCollapse();
else if (operation == "distinct")
output << vo.GetDistinct();
_tmp_output << vo.GetDistinct();
else {
cerr << "ERROR: " << operation << " is an unrecoginzed operation\n";
exit(1);
}
_column_vec.clear();
return output.str();
}
/*
const int PRECISION = 21;
double GetUserColumn(const string s);
// Constructor
BedMap::BedMap(string bedAFile, string bedBFile, int column, string operation,
float overlapFraction, bool sameStrand,
bool diffStrand, bool reciprocal,
bool choseNullValue, string nullValue,
bool printHeader)
{
_bedAFile = bedAFile;
_bedBFile = bedBFile;
_column = column - 1; // user's request is 1-based
_operation = operation;
_overlapFraction = overlapFraction;
_sameStrand = sameStrand;
_diffStrand = diffStrand;
_reciprocal = reciprocal;
_nullValue = nullValue;
_printHeader = printHeader;
if (!choseNullValue && operation == "count")
_nullValue = "0";
Map();
}
// Destructor
BedMap::~BedMap(void)
{}
void BedMap::Map() {
_output.append(_tmp_output.str());
// create new BED file objects for A and B
_bedA = new BedFile(_bedAFile);
_bedB = new BedFile(_bedBFile);
// use the chromsweep algorithm to detect overlaps on the fly.
ChromSweep sweep = ChromSweep(_bedA, _bedB,
_sameStrand, _diffStrand,
_overlapFraction, _reciprocal,
false, _printHeader);
pair<BED, vector<BED> > hit_set;
hit_set.second.reserve(10000);
while (sweep.Next(hit_set)) {
string result = MapHits(hit_set.first, hit_set.second);
_bedA->reportBedTab(hit_set.first);
printf("%s\n", result.c_str());
}
}
string BedMap::MapHits(const BED &a, const vector<BED> &hits) {
ostringstream output;
if (hits.size() == 0)
return _nullValue;
ExtractColumnFromHits(hits);
VectorOps vo(_column_vec);
if (_operation == "sum")
output << setprecision (PRECISION) << vo.GetSum();
else if (_operation == "mean")
output << setprecision (PRECISION) << vo.GetMean();
else if (_operation == "median")
output << setprecision (PRECISION) << vo.GetMedian();
else if (_operation == "min")
output << setprecision (PRECISION) << vo.GetMin();
else if (_operation == "max")
output << setprecision (PRECISION) << vo.GetMax();
else if (_operation == "mode")
output << vo.GetMode();
else if (_operation == "antimode")
output << vo.GetAntiMode();
else if (_operation == "count")
output << setprecision (PRECISION) << vo.GetCount();
else if (_operation == "count_distinct")
output << setprecision (PRECISION) << vo.GetCountDistinct();
else if (_operation == "collapse")
output << vo.GetCollapse();
else if (_operation == "distinct")
output << vo.GetDistinct();
else {
cerr << "ERROR: " << _operation << " is an unrecoginzed operation\n";
exit(1);
}
_column_vec.clear();
return output.str();
}
void BedMap::ExtractColumnFromHits(const vector<BED> &hits) {
for (size_t i = 0; i < hits.size(); ++i) {
try {
_column_vec.push_back(hits[i].fields.at(_column));
}
catch(std::out_of_range& e) {
cerr << endl << "*****" << endl
<< "*****ERROR: requested column ("
<< _column + 1
<< ") exceeds the number of columns in file at line "
<< _bedA->_lineNum << ". Exiting."
<< endl << "*****" << endl;
exit(1);
}
}
}
*/
......@@ -42,11 +42,13 @@ private:
vector<string> _column_vec; // vector to hold current column's worth of data
ostringstream _tmp_output;
QuickString _output; // placeholder for the results of mapping B to each a in A.
//------------------------------------------------
// private methods
//------------------------------------------------
void Map();
string MapHits(RecordKeyList &hits);
void SummarizeHits(RecordKeyList &hits);
void ExtractColumnFromHits(RecordKeyList &hits);
};
......
......@@ -191,7 +191,7 @@ void map_help(void) {
cerr << "\t-o\t" << "Specify the operation that should be applied to -c." << endl;
cerr << "\t\t Valid operations:" << endl;
cerr << "\t\t sum, min, max," << endl;
cerr << "\t\t sum, min, max, absmin, absmax," << endl;
cerr << "\t\t mean, median," << endl;
cerr << "\t\t collapse (i.e., print a comma separated list (duplicates allowed)), " << endl;
cerr << "\t\t distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl;
......@@ -215,6 +215,9 @@ void map_help(void) {
cerr << "\t\tthat overlap A on the _opposite_ strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-g\t" << "Provide a genome file to enforce consistent chromosome sort order" << endl;
cerr <<"\t\tacross input files." << endl << endl;
cerr << "\t-null\t" << "The value to print if no overlaps are found for an A interval." << endl;
cerr << "\t\t- Default - \".\"" << endl << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment