groupBy.cpp 23 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/*****************************************************************************
groupBy.cpp

(c) 2009, 2010, 2011 - Aaron Quinlan
Center for Public Health Genomics
University of Virginia
aaronquinlan@gmail.com

Licenced under the MIT license.
******************************************************************************/
#include <vector>
#include <map>
#include <numeric>
#include <algorithm>
#include <iterator>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <string.h>
#include <exception>
#include <stdexcept> // out_of_range exception

#include "version.h"
#include "lineFileUtilities.h"
#include "tabFile.h"
Aaron's avatar
Aaron committed
30
#include "VectorOps.h"
31
32
33
34
35
36
37
38
39
40
41
using namespace std;


// define our program name
#define PROGRAM_NAME "bedtools groupby"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) ((strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen))
#define LOOKS_LIKE_A_PARAM(string) (strlen(string)>0 && string[0]=='-')

// function declarations
void groupby_help(void);
42
43
44
void GroupBy(const string &inFile, const vector<int> &groupColumns, 
             const vector<int> &opColumns, const vector<string> &ops, 
             const bool printOriginalLine, const bool printHeaderLine,
45
             const bool InputHaveHeaderLine, const bool ignoreCase, int precision);
46
47
48
49
50
51
52
53
54
55

void PrintHeaderLine(const vector<string> &InputFields, 
                     const vector<int> &groupColumns, 
                     const vector<int> &opColumns, 
                     const vector<string> &ops, 
                     const bool PrintFullInputLine, 
                     const bool InputHaveHeaderLine);

void ReportSummary(const vector<string> &group, 
                   const vector<vector<string> > &data, 
56
57
                   const vector<string> &ops,
                   int precision);
58
59
60
61
62
63
64

void addValue (const vector<string> &fromList, 
               vector<string> &toList, 
               int index,
               int lineNum, 
               const bool ignoreCase);

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
void TabPrintPost (string element);
void TabPrintPre (string element);
void CommaPrint (string element);

int groupby_main(int argc, char* argv[]) {

    // input files
    string inFile             = "stdin";
    string groupColumnsString = "1,2,3";
    string opsColumnString;
    string opsString;

    // our configuration variables
    bool showHelp          = false;
    bool haveOpColumns     = false;
    bool haveOps           = true;
    bool printOriginalLine = false;
    bool printHeaderLine   = false;
    bool InputHaveHeaderLine = false;
    bool ignoreCase    = false;
arq5x's avatar
arq5x committed
85
    int precision = 21;
86
     // check to see if we should print out some help
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    if(argc <= 1) showHelp = true;

    for(int i = 1; i < argc; i++) {
        int parameterLength = (int)strlen(argv[i]);

        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
        (PARAMETER_CHECK("--help", 5, parameterLength))) {
            showHelp = true;
        }
    }

    if(showHelp) groupby_help();

    // do some parsing (all of these parameters require 2 strings)
    for(int i = 1; i < argc; i++) {

        int parameterLength = (int)strlen(argv[i]);

        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
Aaron's avatar
Aaron committed
106
            if ((i+1) < argc) {
107
108
109
110
                inFile     = argv[i + 1];
                i++;
            }
        }
111
112
113
        else if (PARAMETER_CHECK("-grp", 4, parameterLength) || 
                 PARAMETER_CHECK("-g", 2, parameterLength)) 
        {
114
            if ((i+1) >= argc || LOOKS_LIKE_A_PARAM(argv[i+1])) {
115
116
117
                cerr << endl 
                     << "*****ERROR: -grp parameter requires a value." 
                     << endl << endl;
118
119
120
121
122
123
124
125
                groupby_help();
                break;
            }
            else {
                groupColumnsString     = argv[i + 1];
                i++;
            }
        }
126
127
128
        else if(PARAMETER_CHECK("-opCols", 7, parameterLength) || 
                PARAMETER_CHECK("-c", 2, parameterLength)) 
        {
129
            if ((i+1) >= argc || LOOKS_LIKE_A_PARAM(argv[i+1])) {
130
131
132
                cerr << endl 
                     << "*****ERROR: -opCols parameter requires a value." 
                     << endl << endl;
133
134
135
136
137
138
139
140
141
                groupby_help();
                break;
            }
            else {
                haveOpColumns       = true;
                opsColumnString     = argv[i + 1];
                i++;
            }
        }
142
143
144
        else if(PARAMETER_CHECK("-ops", 4, parameterLength) || 
                PARAMETER_CHECK("-o", 2, parameterLength)) 
        {
145
            if ((i+1) >= argc || LOOKS_LIKE_A_PARAM(argv[i+1])) {
146
147
148
                cerr << endl 
                     << "*****ERROR: -ops parameter requires a value." 
                     << endl << endl;
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
                groupby_help();
                break;
            }
            else {
                haveOps    = true;
                opsString  = argv[i + 1];
                i++;
            }
        }
        else if(PARAMETER_CHECK("-full", 5, parameterLength)) {
            printOriginalLine = true;
        }
        else if(PARAMETER_CHECK("-outheader", 10, parameterLength)) {
            printHeaderLine = true;
        }
        else if(PARAMETER_CHECK("-inheader", 9, parameterLength)) {
            InputHaveHeaderLine = true;
        }
        else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
            InputHaveHeaderLine = true;
            printHeaderLine = true;
        }
        else if(PARAMETER_CHECK("-ignorecase", 11, parameterLength)) {
            ignoreCase = true;
        }
174
175
176
177
178
179
180
181
182
183
184
185
186
187
        else if (PARAMETER_CHECK("-prec", 5, parameterLength))
        {
            if ((i+1) >= argc || LOOKS_LIKE_A_PARAM(argv[i+1])) {
                cerr << endl 
                     << "*****ERROR: -prec parameter requires a value." 
                     << endl << endl;
                groupby_help();
                break;
            }
            else {
                precision     = atoi(argv[i + 1]);
                i++;
            }
        }
188

189
        else {
190
191
192
193
            cerr << endl 
                 << "*****ERROR: Unrecognized parameter: " 
                 << argv[i] 
                 << " *****" << endl << endl;
194
195
196
197
198
            showHelp = true;
        }
    }

    if (!haveOpColumns) {
199
200
201
202
203
204
205
        cerr << endl 
             << "*****" 
             << endl 
             << "*****ERROR: Need -opCols." 
             << endl 
             << "*****" 
             << endl;
206
207
208
209
210
        showHelp = true;
    }
    // split the opsString into discrete operations and make sure they are all valid.
    vector<string> ops;
    opsString.erase(remove_if(opsString.begin(),opsString.end(),::isspace),opsString.end());
211
    Tokenize(opsString, ops, ',');
212
    for( size_t i = 0; i < ops.size(); i++ ) {
213
214
215
216
217
218
        if ((ops[i] != "sum")  && (ops[i] != "max") && 
            (ops[i] != "min") && (ops[i] != "mean") &&
            (ops[i] != "mode") && (ops[i] != "median") && 
            (ops[i] != "antimode") && (ops[i] != "stdev") &&
            (ops[i] != "sstdev") && (ops[i] != "count") && 
            (ops[i] != "count_distinct") && (ops[i] != "collapse") && 
219
            (ops[i] != "distinct") && (ops[i] != "distinct_sort_num") && (ops[i] != "concat") &&
220
221
            (ops[i] != "freqdesc") && (ops[i] != "freqasc") &&
            (ops[i] != "first") && (ops[i] != "last") ) 
222
        {
223
224
225
226
227
228
229
230
            cerr << endl 
                 << "*****" 
                 << endl 
                 << "*****ERROR: Invalid operation selection \"" 
                 << ops[i] 
                 << endl 
                 << "\"  *****" 
                 << endl;
231
232
233
234
235
236
237
238
            showHelp = true;
        }
    }
    if (!showHelp) {

        // Split the column string sent by the user into discrete column numbers
        // A comma separated string is expected.
        vector<int> groupColumnsInt;
239
        TokenizeColumns(groupColumnsString, groupColumnsInt);
240
241

        vector<int> opColumnsInt;
242
        TokenizeColumns(opsColumnString, opColumnsInt);
243
244
245
246
247

        // sanity check the group columns
        for(size_t i = 0; i < groupColumnsInt.size(); ++i) {
            int groupColumnInt = groupColumnsInt[i];
            if (groupColumnInt < 1) {
248
249
250
251
252
253
                cerr << endl 
                     << "*****" 
                     << endl 
                     << "*****ERROR: group columns must be >=1. " 
                     << endl 
                     << "*****" << endl;
254
255
256
257
258
259
260
261
                groupby_help();
            }
        }

        // sanity check the op columns
        for(size_t i = 0; i < opColumnsInt.size(); ++i) {
            int opColumnInt = opColumnsInt[i];
            if (opColumnInt < 1) {
262
263
264
265
266
267
268
                cerr << endl 
                     << "*****" 
                     << endl 
                     << "*****ERROR: op columns must be >=1. " 
                     << endl 
                     << "*****" 
                     << endl;
269
270
271
272
273
274
                groupby_help();
            }
        }

        // sanity check that there are equal number of opColumns and ops
        if (ops.size() != opColumnsInt.size()) {
275
276
277
278
279
280
281
            cerr << endl
                 << "*****" 
                 << endl
                 << "*****ERROR: There must be equal num. of ops and opCols. " 
                 << endl 
                 << "*****" 
                 << endl;
282
283
284
285
            groupby_help();
        }
        GroupBy(inFile, groupColumnsInt, opColumnsInt, ops,
            printOriginalLine, printHeaderLine, InputHaveHeaderLine,
286
            ignoreCase, precision);
287
288
289
290
291
292
293
294
295
    }
    else {
        groupby_help();
    }
    return 0;
}

void groupby_help(void) {

Aaron's avatar
Aaron committed
296
    cerr << "\nTool:    bedtools groupby " << endl;
297
298
299
300
301
302
303
304
305
306
307
308
309
310
    cerr << "Version: " << VERSION << "\n";    
    cerr << "Summary: Summarizes a dataset column based upon" << endl;
    cerr << "\t common column groupings. Akin to the SQL \"group by\" command." << endl << endl;

    cerr << "Usage:\t " << PROGRAM_NAME << " -g [group_column(s)] -c [op_column(s)] -o [ops] " << endl;
    cerr << "\t "     << "cat [FILE] | " << PROGRAM_NAME << " -g [group_column(s)] -c [op_column(s)] -o [ops] " << endl << endl;

    cerr << "Options: " << endl;
    cerr << "\t-i\t\t"        << "Input file. Assumes \"stdin\" if omitted." << endl << endl;

    cerr << "\t-g -grp\t\t"      << "Specify the columns (1-based) for the grouping." << endl;
    cerr                         << "\t\t\tThe columns must be comma separated." << endl;
    cerr                         << "\t\t\t- Default: 1,2,3" << endl << endl;

311
    cerr << "\t-c -opCols\t"     << "Specify the column (1-based) that should be summarized." << endl;
312
313
314
315
    cerr                         << "\t\t\t- Required." << endl << endl;

    cerr << "\t-o -ops\t\t"      << "Specify the operation that should be applied to opCol." << endl;
    cerr                         << "\t\t\tValid operations:" << endl;
Aaron's avatar
Aaron committed
316
    cerr                         << "\t\t\t    sum, count, count_distinct, min, max," << endl;
317
318
319
320
    cerr                         << "\t\t\t    mean, median, mode, antimode," << endl;
    cerr                         << "\t\t\t    stdev, sstdev (sample standard dev.)," << endl;
    cerr                         << "\t\t\t    collapse (i.e., print a comma separated list (duplicates allowed)), " << endl;
    cerr                         << "\t\t\t    distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl;
321
    cerr                         << "\t\t\t    distinct_sort_num (as distinct, but sorted numerically), " << endl;
322
323
324
    cerr                         << "\t\t\t    concat   (i.e., merge values into a single, non-delimited string), " << endl;
    cerr                         << "\t\t\t    freqdesc (i.e., print desc. list of values:freq)" << endl;
    cerr                         << "\t\t\t    freqasc (i.e., print asc. list of values:freq)" << endl;
325
326
327
    cerr                         << "\t\t\t    first (i.e., print first value)" << endl;
    cerr                         << "\t\t\t    last (i.e., print last value)" << endl;
        
328
329
    cerr                         << "\t\t\t- Default: sum" << endl << endl;

330
    cerr << "\t-full\t\t"   << "Print all columns from input file.  The first line in the group is used." << endl;
331
332
333
334
335
336
337
338
339
340
341
342
343
344
    cerr            << "\t\t\tDefault: print only grouped columns." << endl << endl;

    cerr << "\t-inheader\t" << "Input file has a header line - the first line will be ignored." << endl << endl ;

    cerr << "\t-outheader\t"    << "Print header line in the output, detailing the column names. " << endl;
    cerr            << "\t\t\tIf the input file has headers (-inheader), the output file" << endl;
    cerr            << "\t\t\twill use the input's column names." << endl;
    cerr            << "\t\t\tIf the input file has no headers, the output file" << endl;
    cerr            << "\t\t\twill use \"col_1\", \"col_2\", etc. as the column names." << endl << endl;

    cerr << "\t-header\t\t" << "same as '-inheader -outheader'" << endl << endl;

    cerr << "\t-ignorecase\t"   << "Group values regardless of upper/lower case." << endl << endl;

345
346
    cerr << "\t-prec\t"   << "Sets the decimal precision for output (Default: 5)" << endl << endl;

347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
    cerr << "Examples: " << endl;
    cerr << "\t$ cat ex1.out" << endl;
    cerr << "\tchr1 10  20  A   chr1    15  25  B.1 1000    ATAT" << endl;
    cerr << "\tchr1 10  20  A   chr1    25  35  B.2 10000   CGCG" << endl << endl;
    cerr << "\t$ groupBy -i ex1.out -g 1,2,3,4 -c 9 -o sum" << endl;
    cerr << "\tchr1 10  20  A   11000" << endl << endl;
    cerr << "\t$ groupBy -i ex1.out -grp 1,2,3,4 -opCols 9,9 -ops sum,max" << endl;
    cerr << "\tchr1 10  20  A   11000   10000" << endl << endl;
    cerr << "\t$ groupBy -i ex1.out -g 1,2,3,4 -c 8,9 -o collapse,mean" << endl;
    cerr << "\tchr1 10  20  A   B.1,B.2,    5500" << endl << endl;
    cerr << "\t$ cat ex1.out | groupBy -g 1,2,3,4 -c 8,9 -o collapse,mean" << endl;
    cerr << "\tchr1 10  20  A   B.1,B.2,    5500" << endl << endl;
    cerr << "\t$ cat ex1.out | groupBy -g 1,2,3,4 -c 10 -o concat" << endl;
    cerr << "\tchr1 10  20  A   ATATCGCG" << endl << endl;

    cerr << "Notes: " << endl;
    cerr << "\t(1)  The input file/stream should be sorted/grouped by the -grp. columns" << endl;
    cerr << "\t(2)  If -i is unspecified, input is assumed to come from stdin." << endl << endl;


    // end the program here
    exit(1);

}


void GroupBy (const string &inFile,
    const vector<int> &groupColumns,
    const vector<int> &opColumns,
    const vector<string> &ops,
    const bool printOriginalLine,
    const bool printHeaderLine,
    const bool InputHaveHeaderLine,
380
381
    const bool ignoreCase,
    int precision) {
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

    // current line number
    int lineNum = 0;
    // string representing current line
    string inLine;

    // vector of strings holding the tokenized current line
    vector<string>  inFields;
    vector<string>  inFieldsFirstLineInGroup;
    inFields.reserve(20);

    // keys for the current and previous group
    vector<string>  prevGroup(0);
    vector<string>  currGroup(0);

    // vector (one per column) of vector (one per value/column) of the opColumn values for the current group
    vector< vector<string> >  values;
    for( size_t i = 0; i < opColumns.size(); i++ ) {
        values.push_back( vector<string>() );
    }

    bool    first_line = true;

    // check the status of the current line
    TabLineStatus tabLineStatus;

    // open a new tab file, loop through it line by line
    // and summarize the data for a given group when the group
    // fields change
    TabFile *_tab = new TabFile(inFile);
    _tab->Open();
413
414
415
    while ((tabLineStatus = _tab->GetNextTabLine(inFields, lineNum)) !=     
            TAB_INVALID)
    {
416
        if ((tabLineStatus == TAB_VALID) || (tabLineStatus == TAB_HEADER)) {
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442

            if (first_line) {
                first_line = false;
                if (printHeaderLine)
                    PrintHeaderLine(inFields, groupColumns, opColumns, ops,
                    printOriginalLine, InputHaveHeaderLine);

                if (InputHaveHeaderLine) {
                    inFields.clear();
                    continue; // no need to process this line - it's the header
                }
            }

            if (inFieldsFirstLineInGroup.empty()) //first line in file? - save it
                inFieldsFirstLineInGroup = inFields;

            // build the group vector for the current line
            currGroup.clear();
            vector<int>::const_iterator gIt  = groupColumns.begin();
            vector<int>::const_iterator gEnd = groupColumns.end();
            for (; gIt != gEnd; ++gIt)
                addValue(inFields, currGroup, (*gIt-1), lineNum, ignoreCase);

            // there has been a group change
            if ((currGroup != prevGroup) && (prevGroup.size() > 0)) {
                // Summarize this group
443
                ReportSummary(printOriginalLine?inFieldsFirstLineInGroup:prevGroup, 
444
                          values, ops, precision);
445
446
447
448
                // reset and add the first value for the next group.
                values.clear();
                for( size_t i = 0; i < opColumns.size(); i++ ) {
                    values.push_back( vector<string>() );
449
450
451
                    addValue(inFields, values[i], 
                             opColumns[i]-1, lineNum, 
                             ignoreCase);
452
453
454
455
456
457
                }
                inFieldsFirstLineInGroup = inFields;
            }
            // we're still dealing with the same group
            else {
                for( size_t i = 0; i < opColumns.size(); i++ )
458
459
460
                    addValue(inFields, values[i], 
                             opColumns[i]-1, lineNum, 
                             ignoreCase);
461
462
463
464
465
466
467
            }
            // reset for the next line
            prevGroup = currGroup;
        }
        inFields.clear();
    }
    // report the last group
468
    ReportSummary(printOriginalLine?inFieldsFirstLineInGroup:currGroup, 
469
                  values, ops, precision);
470
471
472
473
    _tab->Close();
}


474
475
void ReportSummary(const vector<string> &group, 
                   const vector<vector<string> > &data, 
476
                   const vector<string> &ops,
477
                   int precision)
478
{
479
480

    vector<string> result;
481
482
    for( size_t i = 0; i < data.size(); i++ ) 
    {
483

Aaron's avatar
Aaron committed
484
        if (data[i].empty())
Aaron's avatar
Aaron committed
485
486
            continue;
            
487
488
        string op = ops[i];
        std::stringstream buffer;
Aaron's avatar
Aaron committed
489
        VectorOps vo(data[i]);
490
491

        if (op == "sum") {
492
            buffer << setprecision (precision) << vo.GetSum();
493
494
495
            result.push_back(buffer.str());
        }
        else if (op == "collapse") {
Aaron's avatar
Aaron committed
496
            result.push_back(vo.GetCollapse());
497
498
        }
        else if (op == "distinct") {
Aaron's avatar
Aaron committed
499
            result.push_back(vo.GetDistinct());
500
        }
501
502
503
504
        else if (op == "distinct_sort_num") {
            result.push_back(vo.GetDistinctSortNum());
        }

505
        else if (op == "concat") {
Aaron's avatar
Aaron committed
506
            result.push_back(vo.GetConcat());
507
508
        }
        else if (op == "min") {
509
            buffer << setprecision (precision) << vo.GetMin();
510
511
512
            result.push_back(buffer.str());
        }
        else if (op == "max") {
513
            buffer << setprecision (precision) << vo.GetMax();
514
515
516
            result.push_back(buffer.str());
        }
        else if (op == "mean") {
517
            buffer << setprecision (precision) << vo.GetMean();
518
519
520
            result.push_back(buffer.str());
        }
        else if (op == "median") {
521
            buffer << setprecision (precision) << vo.GetMedian();
522
523
524
            result.push_back(buffer.str());
        }
        else if (op == "count") {
525
            buffer << setprecision (precision) << data[i].size();
Aaron's avatar
Aaron committed
526
527
528
            result.push_back(buffer.str());
        }
        else if (op == "count_distinct") {
529
            buffer << setprecision (precision) << vo.GetCountDistinct();
530
531
            result.push_back(buffer.str());
        }
Aaron's avatar
Aaron committed
532
533
        else if (op == "mode") {
            result.push_back(vo.GetMode());
534
        }
Aaron's avatar
Aaron committed
535
536
537
538
539
540
541
542
543
544
        else if (op == "antimode") {
            result.push_back(vo.GetAntiMode());
        }
        else if (op == "freqdesc") {
            result.push_back(vo.GetFreqDesc());
        }
        else if (op == "freqasc") {
            result.push_back(vo.GetFreqAsc());
        }
        else if (op == "stdev") {
545
            buffer << setprecision (precision) << vo.GetStddev();
Aaron's avatar
Aaron committed
546
547
548
            result.push_back(buffer.str());
        }
        else if (op == "sstdev") {
549
            buffer << setprecision (precision) << vo.GetSstddev();
550
551
            result.push_back(buffer.str());
        }
552
553
554
555
556
557
        else if (op == "first") {
            result.push_back(vo.GetFirst());
        }
        else if (op == "last") {
            result.push_back(vo.GetLast());
        }
558
    }
Aaron's avatar
Aaron committed
559
    if (!result.empty()) {
Aaron's avatar
Aaron committed
560
561
562
563
564
        for_each(group.begin(), group.end(), TabPrintPost);
        cout << *result.begin();
        for_each(++result.begin(), result.end(), TabPrintPre);
        cout << endl; //Gets rid of extraneous tab
    }
565
566
567
568
569
570
571
572
573
574
575
}


void addValue (const vector<string> &fromList, vector<string> &toList, int index, int lineNum, const bool ignoreCase) {
    try {
        string s(fromList.at(index));
        if(ignoreCase)
            transform(s.begin(),s.end(),s.begin(),::tolower);
        toList.push_back(s);
    }
    catch(std::out_of_range& e) {
576
577
578
579
580
581
582
583
584
        cerr << endl 
             << "*****" 
             << endl 
             << "*****ERROR: requested column exceeds the number of columns in file at line "
             << lineNum 
             << ". Exiting." 
             << endl 
             << "*****" 
             << endl;
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
        exit(1);
    }
}


void TabPrintPost (string element) {
    cout << element << "\t";
}

void TabPrintPre (string element) {
    cout << "\t" << element;
}

void CommaPrint (string element) {
    cout << element << ",";
}

inline string ColumnHeaderName(const vector<string> &inFields, const size_t FieldIndex,
    bool InputHaveHeaderLine)
{
    stringstream s;
    if (InputHaveHeaderLine)
        s << inFields[FieldIndex-1];
    else
        s << "col_" << (FieldIndex);
    return s.str();
}

void PrintHeaderLine(const vector<string> &inFields,
    const vector<int> &groupColumns,
    const vector<int> &opColumns,
    const vector<string> &ops,
    const bool PrintFullInputLine,
    const bool InputHaveHeaderLine)
{
    vector<string> header;

    //Header fields of input file
    if (PrintFullInputLine) {
        //All input columns
        for (size_t i=0;i<inFields.size();++i)
626
627
            header.push_back( ColumnHeaderName(inFields, i+1, 
                                               InputHaveHeaderLine) );
628
629
630
    } else {
        //Only the columns that are actually used in the grouped operations
        for (size_t i=0;i<groupColumns.size();++i)
631
632
            header.push_back( ColumnHeaderName(inFields, groupColumns[i], 
                                               InputHaveHeaderLine) );
633
634
635
636
637
    }

    //Header fields of output columns, by operation
    for (size_t i=0; i<opColumns.size();++i) {
        stringstream s;
638
639
        s << ops[i] << "(" << ColumnHeaderName(inFields, opColumns[i], 
                                               InputHaveHeaderLine) << ")";
640
641
642
643
644
645
646
        header.push_back(s.str());
    }

    //print Header Line
    for (size_t i=0; i<header.size();++i)
        cout << header[i] << ((i<header.size()-1)?"\t":"\n");
}