Commit 8b8a9cd5 authored by Neil Kindlon's avatar Neil Kindlon
Browse files

Added distinct_sort_num option to VectorOps for groupBy.

parent ec745749
......@@ -83,8 +83,7 @@ int groupby_main(int argc, char* argv[]) {
bool InputHaveHeaderLine = false;
bool ignoreCase = false;
int precision = 21;
// check to see if we should print out some help
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
......@@ -186,6 +185,7 @@ int groupby_main(int argc, char* argv[]) {
i++;
}
}
else {
cerr << endl
<< "*****ERROR: Unrecognized parameter: "
......@@ -216,7 +216,7 @@ int groupby_main(int argc, char* argv[]) {
(ops[i] != "antimode") && (ops[i] != "stdev") &&
(ops[i] != "sstdev") && (ops[i] != "count") &&
(ops[i] != "count_distinct") && (ops[i] != "collapse") &&
(ops[i] != "distinct") && (ops[i] != "concat") &&
(ops[i] != "distinct") && (ops[i] != "distinct_sort_num") && (ops[i] != "concat") &&
(ops[i] != "freqdesc") && (ops[i] != "freqasc") &&
(ops[i] != "first") && (ops[i] != "last") )
{
......@@ -318,6 +318,7 @@ void groupby_help(void) {
cerr << "\t\t\t stdev, sstdev (sample standard dev.)," << endl;
cerr << "\t\t\t collapse (i.e., print a comma separated list (duplicates allowed)), " << endl;
cerr << "\t\t\t distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl;
cerr << "\t\t\t distinct_sort_num (as distinct, but sorted numerically), " << endl;
cerr << "\t\t\t concat (i.e., merge values into a single, non-delimited string), " << endl;
cerr << "\t\t\t freqdesc (i.e., print desc. list of values:freq)" << endl;
cerr << "\t\t\t freqasc (i.e., print asc. list of values:freq)" << endl;
......@@ -473,7 +474,7 @@ void GroupBy (const string &inFile,
void ReportSummary(const vector<string> &group,
const vector<vector<string> > &data,
const vector<string> &ops,
int precision)
int precision)
{
vector<string> result;
......@@ -497,6 +498,10 @@ void ReportSummary(const vector<string> &group,
else if (op == "distinct") {
result.push_back(vo.GetDistinct());
}
else if (op == "distinct_sort_num") {
result.push_back(vo.GetDistinctSortNum());
}
else if (op == "concat") {
result.push_back(vo.GetConcat());
}
......
......@@ -267,6 +267,25 @@ string VectorOps::GetDistinct(void)
return distinct.str();
}
string VectorOps::GetDistinctSortNum(void)
{
ostringstream distinct;
// remove duplicate entries from the vector
// http://stackoverflow.com/questions/1041620/most-efficient-way-to-erase-duplicates-and-sort-a-c-vector
// convert the vec of strings to a vec of doubles
transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble);
sort( _vecd.begin(), _vecd.end() );
_vecd.erase( unique( _vecd.begin(), _vecd.end() ), _vecd.end() );
for( size_t i = 0; i < _vecd.size(); i++ ) {
if (i>0)
distinct << ",";
distinct << _vecd[i];
}
return distinct.str();
}
string VectorOps::GetFreqDesc(void)
{
// compute the frequency of each unique value
......@@ -349,5 +368,3 @@ string VectorOps::GetLast(void)
{
return _vecs[_vecs.size() - 1];
}
......@@ -72,7 +72,9 @@ public:
// return a concatenation of all elements in the vector
string GetConcat(void);
// return a comma-separated list of the _unique_ elements
string GetDistinct(void);
string GetDistinct(void);
// return a comma-separated list of the _unique_ elements, sorted numerically
string GetDistinctSortNum(void);
// return a histogram of values and their freqs. in desc. order of frequency
string GetFreqDesc(void);
// return a histogram of values and their freqs. in asc. order of frequency
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment