windowMakerMain.cpp 13 KB
Newer Older
Aaron's avatar
Aaron committed
1
/*****************************************************************************
Aaron's avatar
Aaron committed
2
windowMakerMain.cpp
Aaron's avatar
Aaron committed
3

Aaron's avatar
Aaron committed
4
5
6
7
8
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Aaron's avatar
Aaron committed
9

Aaron's avatar
Aaron committed
10
Licenced under the GNU General Public License 2.0 license.
Aaron's avatar
Aaron committed
11
12
******************************************************************************/
#include "windowMaker.h"
Aaron's avatar
Aaron committed
13
#include "version.h"
Aaron's avatar
Aaron committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

using namespace std;

// define our program name
#define PROGRAM_NAME "bedtools makewindows"


// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)

// function declarations
void windowmaker_help(void);

int windowmaker_main(int argc, char* argv[]) {

    // our configuration variables
    bool showHelp = false;

    // input files
Aaron's avatar
Aaron committed
33
34
    string inputFile;
    WindowMaker::INPUT_FILE_TYPE inputFileType = WindowMaker::GENOME_FILE;
35
    WindowMaker::ID_METHOD idMethod = WindowMaker::ID_NONE;
Aaron's avatar
Aaron committed
36
37
38
39

    // parms
    uint32_t size = 0;
    uint32_t step = 0;
Aaron's avatar
Aaron committed
40
41
    uint32_t count = 0;

Aaron's avatar
Aaron committed
42
    bool haveGenome = false;
Aaron's avatar
Aaron committed
43
44
45
    bool haveBed = false;
    bool haveSize = false;
    bool haveCount = false;
jayhesselberth's avatar
jayhesselberth committed
46
    bool reverse = false;
Aaron's avatar
Aaron committed
47

Aaron's avatar
Aaron committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    for(int i = 1; i < argc; i++) {
        int parameterLength = (int)strlen(argv[i]);

        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
        (PARAMETER_CHECK("--help", 5, parameterLength))) {
            showHelp = true;
        }
    }

    if(showHelp) windowmaker_help();

    // do some parsing (all of these parameters require 2 strings)
    for(int i = 1; i < argc; i++) {

        int parameterLength = (int)strlen(argv[i]);

        if(PARAMETER_CHECK("-g", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveGenome = true;
Aaron's avatar
Aaron committed
67
68
69
70
71
72
73
74
75
76
                inputFile = argv[i + 1];
                inputFileType = WindowMaker::GENOME_FILE;
                i++;
            }
        }
        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveBed = true;
                inputFile = argv[i + 1];
                inputFileType = WindowMaker::BED_FILE;
Aaron's avatar
Aaron committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
                i++;
            }
        }
        else if(PARAMETER_CHECK("-w", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveSize = true;
                size = atoi(argv[i + 1]);
                step = size;
                i++;
            }
        }
        else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
            if ((i+1) < argc) {
                step = atoi(argv[i + 1]);
                i++;
            }
        }
Aaron's avatar
Aaron committed
94
95
96
97
98
99
100
        else if(PARAMETER_CHECK("-n", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveCount = true;
                count = atoi(argv[i + 1]);
                i++;
            }
        }
101
102
        else if(PARAMETER_CHECK("-reverse", 8, parameterLength)) {
            reverse = true;
jayhesselberth's avatar
jayhesselberth committed
103
        }
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
        else if(PARAMETER_CHECK("-i", 2, parameterLength)) {
            if ((i+1) < argc) {
                if (strcmp(argv[i+1],"winnum")==0)
                    idMethod = WindowMaker::ID_WINDOW_NUMBER;
                else if (strcmp(argv[i+1],"srcwinnum")==0)
                    idMethod = WindowMaker::ID_SOURCE_ID_WINDOW_NUMBER;
                else if (strcmp(argv[i+1],"src")==0)
                    idMethod = WindowMaker::ID_SOURCE_ID;
                else {
                    cerr << endl << "*****ERROR: Invalid ID method (" << argv[i+1] << "). Possible values are: winnum, srcwinnum" << endl << endl ;
                    showHelp = true;
                }
                i++;
            }
        }
Aaron's avatar
Aaron committed
119
        else {
Aaron's avatar
Aaron committed
120
            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
Aaron's avatar
Aaron committed
121
122
123
124
125
            showHelp = true;
        }
    }

    // make sure we have both input files
Aaron's avatar
Aaron committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
    if (!haveGenome && !haveBed) {
        cerr << endl << "*****" << endl << "*****ERROR: Need -g (genome file) or -b (BED file) for interval source. " << endl << "*****" << endl;
        showHelp = true;
    }
    if (haveGenome && haveBed) {
        cerr << endl << "*****" << endl << "*****ERROR: Can't combine -g (genome file) and -b (BED file). Please use one or the other." << endl << "*****" << endl;
        showHelp = true;
    }
    if (!haveSize && !haveCount) {
        cerr << endl << "*****" << endl << "*****ERROR: Need -w (window size) or -n (number of windows). " << endl << "*****" << endl;
        showHelp = true;
    }
    if (haveSize && haveCount) {
        cerr << endl << "*****" << endl << "*****ERROR: Can't combine -w (window size) and -n (number of windows). Please use one or the other. " << endl << "*****" << endl;
        showHelp = true;
Aaron's avatar
Aaron committed
141
    }
arq5x's avatar
arq5x committed
142
    if (step <= 0 && !haveBed && !haveCount && haveGenome) {
arq5x's avatar
arq5x committed
143
144
145
        cerr << endl << "*****" << endl << "*****ERROR: The step (-s) option must be greater than zero. " << endl << "*****" << endl;
        showHelp = true;
    }
Aaron's avatar
Aaron committed
146
    if (!showHelp) {
Aaron's avatar
Aaron committed
147
148
        WindowMaker *wm = NULL;
        if (haveCount)
jayhesselberth's avatar
jayhesselberth committed
149
150
            wm = new WindowMaker(inputFile, idMethod,
                                 inputFileType, count, reverse);
Aaron's avatar
Aaron committed
151
        if (haveSize)
jayhesselberth's avatar
jayhesselberth committed
152
153
            wm = new WindowMaker(inputFile, idMethod,
                                 inputFileType, size, step, reverse);
Aaron's avatar
Aaron committed
154
155
156
157
158
159
160
161
162
163
        delete wm;
    }
    else {
        windowmaker_help();
    }
    return 0;
}

void windowmaker_help(void) {

Aaron's avatar
Aaron committed
164
165
    cerr << "\nTool: bedtools makewindows" << endl;
    cerr << "Version: " << VERSION << "\n";
166
    cerr << "Summary: Makes adjacent or sliding windows across a genome or BED file." << endl << endl;
Aaron's avatar
Aaron committed
167

Aaron's avatar
Aaron committed
168
169
170
171
172
173
174
175
176
177
178
179
    cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] [-g <genome> OR -b <bed>]" << endl;
    cerr << " [ -w <window_size> OR -n <number of windows> ]" << endl << endl;

    cerr << "Input Options: " << endl;

    cerr << "\t-g <genome>" << endl;
    cerr << "\t\tGenome file size (see notes below)." << endl;
    cerr << "\t\tWindows will be created for each chromosome in the file." << endl << endl;

    cerr << "\t-b <bed>" << endl;
    cerr << "\t\tBED file (with chrom,start,end fields)." << endl;
    cerr << "\t\tWindows will be created for each interval in the file." << endl << endl;
Aaron's avatar
Aaron committed
180

Aaron's avatar
Aaron committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
    cerr << "Windows Output Options: " << endl;

    cerr << "\t-w <window_size>" << endl;
    cerr << "\t\tDivide each input interval (either a chromosome or a BED interval)" << endl;
    cerr << "\t\tto fixed-sized windows (i.e. same number of nucleotide in each window)." << endl;
    cerr << "\t\tCan be combined with -s <step_size>" << endl << endl;

    cerr << "\t-s <step_size>" << endl;
    cerr << "\t\tStep size: i.e., how many base pairs to step before" << endl;
    cerr << "\t\tcreating a new window. Used to create \"sliding\" windows." << endl;
    cerr << "\t\t- Defaults to window size (non-sliding windows)." << endl << endl;

    cerr << "\t-n <number_of_windows>" << endl;
    cerr << "\t\tDivide each input interval (either a chromosome or a BED interval)" << endl;
    cerr << "\t\tto fixed number of windows (i.e. same number of windows, with" << endl;
    cerr << "\t\tvarying window sizes)." << endl << endl;
Aaron's avatar
Aaron committed
197

198
199
200
201
    cerr << "\t-reverse" << endl;
    cerr << "\t\t Reverse numbering of windows in the output, i.e. report " << endl;
    cerr << "\t\t windows in decreasing order" << endl << endl;

202
203
204
205
206
207
208
209
210
    cerr << "ID Naming Options: " << endl;
    cerr << "\t-i src|winnum|srcwinnum" << endl;
    cerr << "\t\tThe default output is 3 columns: chrom, start, end ." << endl;
    cerr << "\t\tWith this option, a name column will be added." << endl;
    cerr << "\t\t \"-i src\" - use the source interval's name." << endl;
    cerr << "\t\t \"-i winnum\" - use the window number as the ID (e.g. 1,2,3,4...)." << endl;
    cerr << "\t\t \"-i srcwinnum\" - use the source interval's name with the window number." << endl;
    cerr << "\t\tSee below for usage examples." << endl << endl;

Aaron's avatar
Aaron committed
211
    cerr << "Notes: " << endl;
Aaron's avatar
Aaron committed
212
213
    cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl;
    cerr << "\t <chromName><TAB><chromSize>" << endl << endl;
Aaron's avatar
Aaron committed
214
215
216
217
218
219
220
221
222
223
    cerr << "\tFor example, Human (hg19):" << endl;
    cerr << "\tchr1\t249250621" << endl;
    cerr << "\tchr2\t243199373" << endl;
    cerr << "\t..." << endl;
    cerr << "\tchr18_gl000207_random\t4262" << endl << endl;

    cerr << "Tips: " << endl;
    cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
    cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
    cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl;
Aaron's avatar
Aaron committed
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
    cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl;

    cerr << "Examples: " << endl;
    cerr << " # Divide the human genome into windows of 1MB:" << endl;
    cerr << " $ " << PROGRAM_NAME << " -g hg19.txt -w 1000000" << endl;
    cerr << " chr1 0 1000000" << endl;
    cerr << " chr1 1000000 2000000" << endl;
    cerr << " chr1 2000000 3000000" << endl;
    cerr << " chr1 3000000 4000000" << endl;
    cerr << " chr1 4000000 5000000" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Divide the human genome into sliding (=overlapping) windows of 1MB, with 500KB overlap:" << endl;
    cerr << " $ " << PROGRAM_NAME << " -g hg19.txt -w 1000000 -s 500000" << endl;
    cerr << " chr1 0 1000000" << endl;
    cerr << " chr1 500000 1500000" << endl;
    cerr << " chr1 1000000 2000000" << endl;
    cerr << " chr1 1500000 2500000" << endl;
    cerr << " chr1 2000000 3000000" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Divide each chromosome in human genome to 1000 windows of equal size:" << endl;
    cerr << " $ " << PROGRAM_NAME << " -g hg19.txt -n 1000" << endl;
    cerr << " chr1 0 249251" << endl;
    cerr << " chr1 249251 498502" << endl;
    cerr << " chr1 498502 747753" << endl;
    cerr << " chr1 747753 997004" << endl;
    cerr << " chr1 997004 1246255" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Divide each interval in the given BED file into 10 equal-sized windows:" << endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5 60000 70000" << endl;
    cerr << " chr5 73000 90000" << endl;
    cerr << " chr5 100000 101000" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 10" << endl;
    cerr << " chr5 60000 61000" << endl;
    cerr << " chr5 61000 62000" << endl;
    cerr << " chr5 62000 63000" << endl;
    cerr << " chr5 63000 64000" << endl;
    cerr << " chr5 64000 65000" << endl;
    cerr << " ..." << endl;
    cerr << endl;

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
    cerr << " # Add a name column, based on the window number: "<< endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5  60000  70000 AAA" << endl;
    cerr << " chr5  73000  90000 BBB" << endl;
    cerr << " chr5 100000 101000 CCC" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i winnum" << endl;
    cerr << " chr5        60000   63334   1" << endl;
    cerr << " chr5        63334   66668   2" << endl;
    cerr << " chr5        66668   70000   3" << endl;
    cerr << " chr5        73000   78667   1" << endl;
    cerr << " chr5        78667   84334   2" << endl;
    cerr << " chr5        84334   90000   3" << endl;
    cerr << " chr5        100000  100334  1" << endl;
    cerr << " chr5        100334  100668  2" << endl;
    cerr << " chr5        100668  101000  3" << endl;
    cerr << " ..." << endl;
    cerr << endl;

jayhesselberth's avatar
jayhesselberth committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
    cerr << " # Reverse window numbers: "<< endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5  60000  70000 AAA" << endl;
    cerr << " chr5  73000  90000 BBB" << endl;
    cerr << " chr5 100000 101000 CCC" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i winnum -reverse" << endl;
    cerr << " chr5        60000   63334   3" << endl;
    cerr << " chr5        63334   66668   2" << endl;
    cerr << " chr5        66668   70000   1" << endl;
    cerr << " chr5        73000   78667   3" << endl;
    cerr << " chr5        78667   84334   2" << endl;
    cerr << " chr5        84334   90000   1" << endl;
    cerr << " chr5        100000  100334  3" << endl;
    cerr << " chr5        100334  100668  2" << endl;
    cerr << " chr5        100668  101000  1" << endl;
    cerr << " ..." << endl;
    cerr << endl;


308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
    cerr << " # Add a name column, based on the source ID + window number: "<< endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5  60000  70000 AAA" << endl;
    cerr << " chr5  73000  90000 BBB" << endl;
    cerr << " chr5 100000 101000 CCC" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i srcwinnum" << endl;
    cerr << " chr5        60000   63334   AAA_1" << endl;
    cerr << " chr5        63334   66668   AAA_2" << endl;
    cerr << " chr5        66668   70000   AAA_3" << endl;
    cerr << " chr5        73000   78667   BBB_1" << endl;
    cerr << " chr5        78667   84334   BBB_2" << endl;
    cerr << " chr5        84334   90000   BBB_3" << endl;
    cerr << " chr5        100000  100334  CCC_1" << endl;
    cerr << " chr5        100334  100668  CCC_2" << endl;
    cerr << " chr5        100668  101000  CCC_3" << endl;
    cerr << " ..." << endl;
    cerr << endl;
Aaron's avatar
Aaron committed
325
326
327
328
329



    cerr << endl;

Aaron's avatar
Aaron committed
330
331
332

    exit(1);

333
334
}