windowMakerMain.cpp 11.6 KB
Newer Older
Aaron's avatar
Aaron committed
1
/*****************************************************************************
Aaron's avatar
Aaron committed
2
windowMakerMain.cpp
Aaron's avatar
Aaron committed
3

Aaron's avatar
Aaron committed
4
5
6
7
8
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Aaron's avatar
Aaron committed
9

Aaron's avatar
Aaron committed
10
Licenced under the GNU General Public License 2.0 license.
Aaron's avatar
Aaron committed
11
12
******************************************************************************/
#include "windowMaker.h"
Aaron's avatar
Aaron committed
13
#include "version.h"
Aaron's avatar
Aaron committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

using namespace std;

// define our program name
#define PROGRAM_NAME "bedtools makewindows"


// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)

// function declarations
void windowmaker_help(void);

int windowmaker_main(int argc, char* argv[]) {

    // our configuration variables
    bool showHelp = false;

    // input files
Aaron's avatar
Aaron committed
33
34
    string inputFile;
    WindowMaker::INPUT_FILE_TYPE inputFileType = WindowMaker::GENOME_FILE;
35
    WindowMaker::ID_METHOD idMethod = WindowMaker::ID_NONE;
Aaron's avatar
Aaron committed
36
37
38
39

    // parms
    uint32_t size = 0;
    uint32_t step = 0;
Aaron's avatar
Aaron committed
40
41
    uint32_t count = 0;

Aaron's avatar
Aaron committed
42
    bool haveGenome = false;
Aaron's avatar
Aaron committed
43
44
45
46
    bool haveBed = false;
    bool haveSize = false;
    bool haveCount = false;

Aaron's avatar
Aaron committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
    for(int i = 1; i < argc; i++) {
        int parameterLength = (int)strlen(argv[i]);

        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
        (PARAMETER_CHECK("--help", 5, parameterLength))) {
            showHelp = true;
        }
    }

    if(showHelp) windowmaker_help();

    // do some parsing (all of these parameters require 2 strings)
    for(int i = 1; i < argc; i++) {

        int parameterLength = (int)strlen(argv[i]);

        if(PARAMETER_CHECK("-g", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveGenome = true;
Aaron's avatar
Aaron committed
66
67
68
69
70
71
72
73
74
75
                inputFile = argv[i + 1];
                inputFileType = WindowMaker::GENOME_FILE;
                i++;
            }
        }
        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveBed = true;
                inputFile = argv[i + 1];
                inputFileType = WindowMaker::BED_FILE;
Aaron's avatar
Aaron committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
                i++;
            }
        }
        else if(PARAMETER_CHECK("-w", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveSize = true;
                size = atoi(argv[i + 1]);
                step = size;
                i++;
            }
        }
        else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
            if ((i+1) < argc) {
                step = atoi(argv[i + 1]);
                i++;
            }
        }
Aaron's avatar
Aaron committed
93
94
95
96
97
98
99
        else if(PARAMETER_CHECK("-n", 2, parameterLength)) {
            if ((i+1) < argc) {
                haveCount = true;
                count = atoi(argv[i + 1]);
                i++;
            }
        }
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        else if(PARAMETER_CHECK("-i", 2, parameterLength)) {
            if ((i+1) < argc) {
                if (strcmp(argv[i+1],"winnum")==0)
                    idMethod = WindowMaker::ID_WINDOW_NUMBER;
                else if (strcmp(argv[i+1],"srcwinnum")==0)
                    idMethod = WindowMaker::ID_SOURCE_ID_WINDOW_NUMBER;
                else if (strcmp(argv[i+1],"src")==0)
                    idMethod = WindowMaker::ID_SOURCE_ID;
                else {
                    cerr << endl << "*****ERROR: Invalid ID method (" << argv[i+1] << "). Possible values are: winnum, srcwinnum" << endl << endl ;
                    showHelp = true;
                }
                i++;
            }
        }
Aaron's avatar
Aaron committed
115
        else {
Aaron's avatar
Aaron committed
116
            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
Aaron's avatar
Aaron committed
117
118
119
120
121
            showHelp = true;
        }
    }

    // make sure we have both input files
Aaron's avatar
Aaron committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    if (!haveGenome && !haveBed) {
        cerr << endl << "*****" << endl << "*****ERROR: Need -g (genome file) or -b (BED file) for interval source. " << endl << "*****" << endl;
        showHelp = true;
    }
    if (haveGenome && haveBed) {
        cerr << endl << "*****" << endl << "*****ERROR: Can't combine -g (genome file) and -b (BED file). Please use one or the other." << endl << "*****" << endl;
        showHelp = true;
    }
    if (!haveSize && !haveCount) {
        cerr << endl << "*****" << endl << "*****ERROR: Need -w (window size) or -n (number of windows). " << endl << "*****" << endl;
        showHelp = true;
    }
    if (haveSize && haveCount) {
        cerr << endl << "*****" << endl << "*****ERROR: Can't combine -w (window size) and -n (number of windows). Please use one or the other. " << endl << "*****" << endl;
        showHelp = true;
Aaron's avatar
Aaron committed
137
138
    }
    if (!showHelp) {
Aaron's avatar
Aaron committed
139
140
        WindowMaker *wm = NULL;
        if (haveCount)
141
            wm = new WindowMaker(inputFile, idMethod, inputFileType, count);
Aaron's avatar
Aaron committed
142
        if (haveSize)
143
            wm = new WindowMaker(inputFile, idMethod, inputFileType, size, step);
Aaron's avatar
Aaron committed
144
145
146
147
148
149
150
151
152
153
        delete wm;
    }
    else {
        windowmaker_help();
    }
    return 0;
}

void windowmaker_help(void) {

Aaron's avatar
Aaron committed
154
155
    cerr << "\nTool: bedtools makewindows" << endl;
    cerr << "Version: " << VERSION << "\n";
156
    cerr << "Summary: Makes adjacent or sliding windows across a genome or BED file." << endl << endl;
Aaron's avatar
Aaron committed
157

Aaron's avatar
Aaron committed
158
159
160
161
162
163
164
165
166
167
168
169
    cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] [-g <genome> OR -b <bed>]" << endl;
    cerr << " [ -w <window_size> OR -n <number of windows> ]" << endl << endl;

    cerr << "Input Options: " << endl;

    cerr << "\t-g <genome>" << endl;
    cerr << "\t\tGenome file size (see notes below)." << endl;
    cerr << "\t\tWindows will be created for each chromosome in the file." << endl << endl;

    cerr << "\t-b <bed>" << endl;
    cerr << "\t\tBED file (with chrom,start,end fields)." << endl;
    cerr << "\t\tWindows will be created for each interval in the file." << endl << endl;
Aaron's avatar
Aaron committed
170

Aaron's avatar
Aaron committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    cerr << "Windows Output Options: " << endl;

    cerr << "\t-w <window_size>" << endl;
    cerr << "\t\tDivide each input interval (either a chromosome or a BED interval)" << endl;
    cerr << "\t\tto fixed-sized windows (i.e. same number of nucleotide in each window)." << endl;
    cerr << "\t\tCan be combined with -s <step_size>" << endl << endl;

    cerr << "\t-s <step_size>" << endl;
    cerr << "\t\tStep size: i.e., how many base pairs to step before" << endl;
    cerr << "\t\tcreating a new window. Used to create \"sliding\" windows." << endl;
    cerr << "\t\t- Defaults to window size (non-sliding windows)." << endl << endl;

    cerr << "\t-n <number_of_windows>" << endl;
    cerr << "\t\tDivide each input interval (either a chromosome or a BED interval)" << endl;
    cerr << "\t\tto fixed number of windows (i.e. same number of windows, with" << endl;
    cerr << "\t\tvarying window sizes)." << endl << endl;
Aaron's avatar
Aaron committed
187

188
189
190
191
192
193
194
195
196
    cerr << "ID Naming Options: " << endl;
    cerr << "\t-i src|winnum|srcwinnum" << endl;
    cerr << "\t\tThe default output is 3 columns: chrom, start, end ." << endl;
    cerr << "\t\tWith this option, a name column will be added." << endl;
    cerr << "\t\t \"-i src\" - use the source interval's name." << endl;
    cerr << "\t\t \"-i winnum\" - use the window number as the ID (e.g. 1,2,3,4...)." << endl;
    cerr << "\t\t \"-i srcwinnum\" - use the source interval's name with the window number." << endl;
    cerr << "\t\tSee below for usage examples." << endl << endl;

Aaron's avatar
Aaron committed
197
    cerr << "Notes: " << endl;
Aaron's avatar
Aaron committed
198
199
    cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl;
    cerr << "\t <chromName><TAB><chromSize>" << endl << endl;
Aaron's avatar
Aaron committed
200
201
202
203
204
205
206
207
208
209
    cerr << "\tFor example, Human (hg19):" << endl;
    cerr << "\tchr1\t249250621" << endl;
    cerr << "\tchr2\t243199373" << endl;
    cerr << "\t..." << endl;
    cerr << "\tchr18_gl000207_random\t4262" << endl << endl;

    cerr << "Tips: " << endl;
    cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
    cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
    cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl;
Aaron's avatar
Aaron committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl;

    cerr << "Examples: " << endl;
    cerr << " # Divide the human genome into windows of 1MB:" << endl;
    cerr << " $ " << PROGRAM_NAME << " -g hg19.txt -w 1000000" << endl;
    cerr << " chr1 0 1000000" << endl;
    cerr << " chr1 1000000 2000000" << endl;
    cerr << " chr1 2000000 3000000" << endl;
    cerr << " chr1 3000000 4000000" << endl;
    cerr << " chr1 4000000 5000000" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Divide the human genome into sliding (=overlapping) windows of 1MB, with 500KB overlap:" << endl;
    cerr << " $ " << PROGRAM_NAME << " -g hg19.txt -w 1000000 -s 500000" << endl;
    cerr << " chr1 0 1000000" << endl;
    cerr << " chr1 500000 1500000" << endl;
    cerr << " chr1 1000000 2000000" << endl;
    cerr << " chr1 1500000 2500000" << endl;
    cerr << " chr1 2000000 3000000" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Divide each chromosome in human genome to 1000 windows of equal size:" << endl;
    cerr << " $ " << PROGRAM_NAME << " -g hg19.txt -n 1000" << endl;
    cerr << " chr1 0 249251" << endl;
    cerr << " chr1 249251 498502" << endl;
    cerr << " chr1 498502 747753" << endl;
    cerr << " chr1 747753 997004" << endl;
    cerr << " chr1 997004 1246255" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Divide each interval in the given BED file into 10 equal-sized windows:" << endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5 60000 70000" << endl;
    cerr << " chr5 73000 90000" << endl;
    cerr << " chr5 100000 101000" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 10" << endl;
    cerr << " chr5 60000 61000" << endl;
    cerr << " chr5 61000 62000" << endl;
    cerr << " chr5 62000 63000" << endl;
    cerr << " chr5 63000 64000" << endl;
    cerr << " chr5 64000 65000" << endl;
    cerr << " ..." << endl;
    cerr << endl;

257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
    cerr << " # Add a name column, based on the window number: "<< endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5  60000  70000 AAA" << endl;
    cerr << " chr5  73000  90000 BBB" << endl;
    cerr << " chr5 100000 101000 CCC" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i winnum" << endl;
    cerr << " chr5        60000   63334   1" << endl;
    cerr << " chr5        63334   66668   2" << endl;
    cerr << " chr5        66668   70000   3" << endl;
    cerr << " chr5        73000   78667   1" << endl;
    cerr << " chr5        78667   84334   2" << endl;
    cerr << " chr5        84334   90000   3" << endl;
    cerr << " chr5        100000  100334  1" << endl;
    cerr << " chr5        100334  100668  2" << endl;
    cerr << " chr5        100668  101000  3" << endl;
    cerr << " ..." << endl;
    cerr << endl;

    cerr << " # Add a name column, based on the source ID + window number: "<< endl;
    cerr << " $ cat input.bed" << endl;
    cerr << " chr5  60000  70000 AAA" << endl;
    cerr << " chr5  73000  90000 BBB" << endl;
    cerr << " chr5 100000 101000 CCC" << endl;
    cerr << " $ " << PROGRAM_NAME << " -b input.bed -n 3 -i srcwinnum" << endl;
    cerr << " chr5        60000   63334   AAA_1" << endl;
    cerr << " chr5        63334   66668   AAA_2" << endl;
    cerr << " chr5        66668   70000   AAA_3" << endl;
    cerr << " chr5        73000   78667   BBB_1" << endl;
    cerr << " chr5        78667   84334   BBB_2" << endl;
    cerr << " chr5        84334   90000   BBB_3" << endl;
    cerr << " chr5        100000  100334  CCC_1" << endl;
    cerr << " chr5        100334  100668  CCC_2" << endl;
    cerr << " chr5        100668  101000  CCC_3" << endl;
    cerr << " ..." << endl;
    cerr << endl;
Aaron's avatar
Aaron committed
292
293
294
295
296



    cerr << endl;

Aaron's avatar
Aaron committed
297
298
299

    exit(1);

300
301
}