bedtools.cpp 11 KB
Newer Older
Aaron's avatar
Aaron committed
1
2
3
/*****************************************************************************
  bedtools.cpp

Aaron's avatar
Aaron committed
4
5
6
7
8
9
10
11
  bedtools command line interface.  
  Thanks to Heng Li, as this interface is inspired and 
  based upon his samtools interface.

  (c) 2009-2011 - Aaron Quinlan
  Quinlan Laboratory
  Department of Public Health Sciences
  Center for Public Health genomics
Aaron's avatar
Aaron committed
12
13
  University of Virginia
  aaronquinlan@gmail.com
Aaron's avatar
Aaron committed
14
  
Aaron's avatar
Aaron committed
15
16
17
18
19
  Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include <iostream>
#include <fstream>
#include <stdlib.h>
Aaron's avatar
Aaron committed
20
#include <string>
21
#include "version.h"
Aaron's avatar
Aaron committed
22
23
24
25
26
27

using namespace std;

// define our program name
#define PROGRAM_NAME "bedtools"

Aaron's avatar
Aaron committed
28
// colors for the term's menu 
Aaron's avatar
Aaron committed
29
30
31
32
#define RESET "\033[m"
#define GREEN "\033[1;32m"
#define BLUE "\033[1;34m"
#define RED "\033[1;31m"
Aaron's avatar
Aaron committed
33

Aaron's avatar
Aaron committed
34
35
36
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)

Aaron's avatar
Aaron committed
37
38
39
40
41
42
43
int annotate_main(int argc, char* argv[]);//
int bamtobed_main(int argc, char* argv[]);//
int bed12tobed6_main(int argc, char* argv[]); //
int bedtobam_main(int argc, char* argv[]);//
int bedtoigv_main(int argc, char* argv[]);//
int bedpetobam_main(int argc, char* argv[]);//
int closest_main(int argc, char* argv[]); //
44
int cluster_main(int argc, char* argv[]); //
Aaron's avatar
Aaron committed
45
46
47
48
49
50
int complement_main(int argc, char* argv[]);//
int coverage_main(int argc, char* argv[]); //
int fastafrombed_main(int argc, char* argv[]);//
int flank_main(int argc, char* argv[]); //
int genomecoverage_main(int argc, char* argv[]);//
int getoverlap_main(int argc, char* argv[]);//
51
int groupby_main(int argc, char* argv[]);//
Aaron's avatar
Aaron committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
int intersect_main(int argc, char* argv[]); //
int links_main(int argc, char* argv[]);//
int maskfastafrombed_main(int argc, char* argv[]);//
int merge_main(int argc, char* argv[]); //
int multibamcov_main(int argc, char* argv[]);//
int multiintersect_main(int argc, char* argv[]);//
int nuc_main(int argc, char* argv[]);//
int pairtobed_main(int argc, char* argv[]);//
int pairtopair_main(int argc, char* argv[]);//
int shuffle_main(int argc, char* argv[]); //
int slop_main(int argc, char* argv[]); //
int sort_main(int argc, char* argv[]); //
int subtract_main(int argc, char* argv[]); //
int tagbam_main(int argc, char* argv[]);//
int unionbedgraphs_main(int argc, char* argv[]);//
int window_main(int argc, char* argv[]); //
Aaron's avatar
Aaron committed
68
int windowmaker_main(int argc, char* argv[]); //
69
70
int bedtools_help(void);
int bedtools_faq(void);
Aaron's avatar
Aaron committed
71
72
73
74
75


int main(int argc, char *argv[])
{
    // make sure the user at least entered a sub_command
Aaron's avatar
gussy    
Aaron committed
76
    if (argc < 2) return bedtools_help();
Aaron's avatar
Aaron committed
77

78
79
    std::string sub_cmd = argv[1];

Aaron's avatar
Aaron committed
80
    // genome arithmetic tools
81
82
83
84
85
86
    if (sub_cmd == "intersect")        return intersect_main(argc-1, argv+1);
    else if (sub_cmd == "window")      return window_main(argc-1, argv+1);
    else if (sub_cmd == "closest")     return closest_main(argc-1, argv+1);
    else if (sub_cmd == "coverage")    return coverage_main(argc-1, argv+1);
    else if (sub_cmd == "genomecov")   return genomecoverage_main(argc-1, argv+1);
    else if (sub_cmd == "merge")       return merge_main(argc-1, argv+1);
87
    else if (sub_cmd == "cluster")     return cluster_main(argc-1, argv+1);    
88
89
90
91
92
93
94
    else if (sub_cmd == "complement")  return complement_main(argc-1, argv+1);
    else if (sub_cmd == "subtract")    return subtract_main(argc-1, argv+1);
    else if (sub_cmd == "slop")        return slop_main(argc-1, argv+1);
    else if (sub_cmd == "flank")       return flank_main(argc-1, argv+1);
    else if (sub_cmd == "sort")        return sort_main(argc-1, argv+1);
    else if (sub_cmd == "shuffle")     return shuffle_main(argc-1, argv+1);
    else if (sub_cmd == "annotate")    return annotate_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
95
96

    // Multi-way file comparisonstools
97
98
    else if (sub_cmd == "multiinter")  return multiintersect_main(argc-1, argv+1);
    else if (sub_cmd == "unionbedg")   return unionbedgraphs_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
99
100

    // paired-end conversion tools
101
102
    else if (sub_cmd == "pairtobed")   return pairtobed_main(argc-1, argv+1);
    else if (sub_cmd == "pairtopair")  return pairtopair_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
103
104

    // format conversion tools
105
106
107
108
    else if (sub_cmd == "bamtobed")    return bamtobed_main(argc-1, argv+1);
    else if (sub_cmd == "bedtobam")    return bedtobam_main(argc-1, argv+1);
    else if (sub_cmd == "bedpetobam")  return bedpetobam_main(argc-1, argv+1);
    else if (sub_cmd == "bed12tobed6") return bed12tobed6_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
109
110

    // BAM-specific tools
111
112
    else if (sub_cmd == "multicov")    return multibamcov_main(argc-1, argv+1);
    else if (sub_cmd == "tag")         return tagbam_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
113
114

    // fasta tools
115
116
117
    else if (sub_cmd == "getfasta")    return fastafrombed_main(argc-1, argv+1);
    else if (sub_cmd == "maskfasta")   return maskfastafrombed_main(argc-1, argv+1);
    else if (sub_cmd == "nuc")         return nuc_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
118
119

    // misc. tools
120
121
122
    else if (sub_cmd == "overlap")     return getoverlap_main(argc-1, argv+1);
    else if (sub_cmd == "igv")         return bedtoigv_main(argc-1, argv+1);
    else if (sub_cmd == "links")       return links_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
123
    else if (sub_cmd == "makewindows") return windowmaker_main(argc-1, argv+1);
124
    else if (sub_cmd == "groupby")     return groupby_main(argc-1, argv+1);
Aaron's avatar
Aaron committed
125
    // help
Aaron's avatar
Aaron committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
    else if (sub_cmd == "-h" || sub_cmd == "--help" ||
             sub_cmd == "-help")
        return bedtools_help();

    // frequently asked questions
    else if (sub_cmd == "--FAQ" || sub_cmd == "--faq" ||
             sub_cmd == "-FAQ"  || sub_cmd == "-faq")
        return bedtools_faq();

    // verison information
    else if (sub_cmd == "-version" || sub_cmd == "--version")
        cout << "bedtools " << VERSION << endl;

    // verison information
    else if (sub_cmd == "-contact" || sub_cmd == "--contact")
    {
Aaron's avatar
Aaron committed
142
143
144
145
        cout << endl;
        cout << "- For further help, or to report a bug, please " << endl;
        cout << "  email the bedtools mailing list: " << endl;
        cout << "     bedtools-discuss@googlegroups.com" << endl << endl;
Aaron's avatar
Aaron committed
146

Aaron's avatar
Aaron committed
147
148
        cout << "- Stable releases of bedtools can be found at: " << endl;
        cout << "     http://bedtools.googlecode.com" << endl << endl;
Aaron's avatar
Aaron committed
149

Aaron's avatar
Aaron committed
150
151
        cout << "- The development repository can be found at: " << endl;
        cout << "     https://github.com/arq5x/bedtools" << endl << endl;
Aaron's avatar
Aaron committed
152
    }
Aaron's avatar
Aaron committed
153
    // unknown
Aaron's avatar
gussy    
Aaron committed
154
    else {
155
        // TODO: Implement a Levenstein-based "did you mean???"
Aaron's avatar
gussy    
Aaron committed
156
157
158
159
        cerr << "error: unrecognized command: " << argv[1] << endl << endl;
        return 1;
    }
    return 0;   
Aaron's avatar
Aaron committed
160
}
161
162
163

int bedtools_help(void)
{
Aaron's avatar
Aaron committed
164
165
166
167
168
169
170
171
172
173
174
175
176
    cout  << PROGRAM_NAME  << ": flexible tools for genome arithmetic and analysis.\n";
    cout << "usage:   bedtools <subcommand> [options]" << endl << endl;

    cout  << "The bedtools sub-commands include:" << endl;
    
    cout  << endl;
    cout  << " -Genome arithmetic:" << endl;
    cout  << "    intersect     "  << "Find overlapping intervals in various ways.\n";
    cout  << "    window        "  << "Find overlapping intervals within a window around an interval.\n";
    cout  << "    closest       "  << "Find the closest, potentially non-overlapping interval.\n";    
    cout  << "    coverage      "  << "Compute the coverage over defined intervals.\n";
    cout  << "    genomecov     "  << "Compute the coverage over an entire genome.\n";
    cout  << "    merge         "  << "Combine overlapping/nearby intervals into a single interval.\n";
177
    cout  << "    cluster       "  << "Cluster (but don't merge) overlapping/nearby intervals.\n";
Aaron's avatar
Aaron committed
178
179
180
181
182
183
184
    cout  << "    complement    "  << "Extract intervals _not_ represented by an interval file.\n";
    cout  << "    subtract      "  << "Remove intervals based on overlaps b/w two files.\n";
    cout  << "    slop          "  << "Adjust the size of intervals.\n";
    cout  << "    flank         "  << "Create new intervals from the flanks of existing intervals.\n";
    cout  << "    sort          "  << "Order the intervals in a file.\n";
    cout  << "    shuffle       "  << "Randomly redistrubute intervals in a genome.\n";
    cout  << "    annotate      "  << "Annotate coverage of features from multiple files.\n";
Aaron's avatar
gussy    
Aaron committed
185
    
Aaron's avatar
Aaron committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    cout  << endl;
    cout  << " -Multi-way file comparisons:" << endl;
    cout  << "    multiinter    "  << "Identifies common intervals among multiple interval files.\n";
    cout  << "    unionbedg     "  << "Combines coverage intervals from multiple BEDGRAPH files.\n";

    cout  << endl;
    cout  << " -Paired-end manipulation:" << endl;
    cout  << "    pairtobed     "  << "Find pairs that overlap intervals in various ways.\n";
    cout  << "    pairtopair    "  << "Find pairs that overlap other pairs in various ways.\n";

    cout  << endl;
    cout  << " -Format conversion:\n";   
    cout  << "    bamtobed      "  << "Convert BAM alignments to BED (& other) formats.\n";
    cout  << "    bedtobam      "  << "Convert intervals to BAM records.\n";
    cout  << "    bedpetobam    "  << "Convert BEDPE intervals to BAM records.\n";    
    cout  << "    bed12tobed6   "  << "Breaks BED12 intervals into discrete BED6 intervals.\n";

    cout  << endl;
    cout  << " -Fasta manipulation:\n";  
    cout  << "    getfasta      "  << "Use intervals to extract sequences from a FASTA file.\n";
    cout  << "    maskfasta     "  << "Use intervals to mask sequences from a FASTA file.\n";
    cout  << "    nuc           "  << "Profile the nucleotide content of intervals in a FASTA file.\n";

    cout  << endl;
    cout  << " -BAM focused tools:\n";   
    cout  << "    multicov      "  << "Counts coverage from multiple BAMs at specific intervals.\n";
    cout  << "    tag           "  << "Tag BAM alignments based on overlaps with interval files.\n";

    cout  << endl;
    cout  << " -Miscellaneous tools:\n"; 
    cout  << "    overlap       "  << "Computes the amount of overlap from two intervals.\n"; 
    cout  << "    igv           "  << "Create an IGV snapshot batch script.\n";
    cout  << "    links         "  << "Create a HTML page of links to UCSC locations.\n";
Aaron's avatar
Aaron committed
219
    cout  << "    makewindows   "  << "Make interval \"windows\" across a genome.\n";
220
    cout  << "    groupby       "  << "Group by common cols. & summarize oth. cols. (~ SQL \"groupBy\")\n";
Aaron's avatar
Aaron committed
221
222
223
224

    cout  << endl;
    cout  << " -General help:\n";
    cout  << "    --help        "  << "Print this help menu.\n";
Aaron's avatar
Aaron committed
225
    //cout  << "    --faq         "  << "Frequently asked questions.\n";  TODO
Aaron's avatar
Aaron committed
226
227
    cout  << "    --version     "  << "What version of bedtools are you using?.\n";
    cout  << "    --contact     "  << "Feature requests, bugs, mailing lists, etc.\n";
Aaron's avatar
gussy    
Aaron committed
228
229
230

    cout << "\n";
    return 1;
231
232
233
234
235
}


int bedtools_faq(void)
{
Aaron's avatar
gussy    
Aaron committed
236
    cout << "\n";
237

238
239
240
241
    cout << "Q1. How do I see the help for a given command?" << endl;
    cout << "A1. All BEDTools commands have a \"-h\" option. Additionally, some tools " << endl;
    cout << "    will provide the help menu if you just type the command line " << endl;
    cout << "    followed by enter. " << endl;
242

Aaron's avatar
gussy    
Aaron committed
243
244
    cout << "\n";
    return 1;
Aaron's avatar
Aaron committed
245
}