Commit 833bd238 authored by Christian Krause's avatar Christian Krause
Browse files

Merge branch 'master' into wip-issue-206

parents af0f9fb6 b0bc5b7e
......@@ -36,8 +36,8 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/bed12ToBed6 \
$(SRC_DIR)/closestFile \
$(SRC_DIR)/clusterBed \
$(SRC_DIR)/complementBed \
$(SRC_DIR)/coverageBed \
$(SRC_DIR)/complementFile \
$(SRC_DIR)/coverageFile \
$(SRC_DIR)/expand \
$(SRC_DIR)/fastaFromBed \
$(SRC_DIR)/flankBed \
......@@ -66,7 +66,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/sortBed \
$(SRC_DIR)/spacingFile \
$(SRC_DIR)/split \
$(SRC_DIR)/subtractBed \
$(SRC_DIR)/subtractFile \
$(SRC_DIR)/tagBam \
$(SRC_DIR)/unionBedGraphs \
$(SRC_DIR)/windowBed \
......@@ -79,6 +79,8 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/chromsweep \
$(SRC_DIR)/utils/Contexts \
$(SRC_DIR)/utils/FileRecordTools \
$(SRC_DIR)/utils/FileRecordTools/FileReaders \
$(SRC_DIR)/utils/FileRecordTools/Records \
$(SRC_DIR)/utils/general \
$(SRC_DIR)/utils/gzstream \
$(SRC_DIR)/utils/fileType \
......@@ -93,15 +95,47 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/Fasta \
$(SRC_DIR)/utils/VectorOps \
$(SRC_DIR)/utils/GenomeFile \
$(SRC_DIR)/utils/RecordOutputMgr
$(SRC_DIR)/utils/RecordOutputMgr \
$(SRC_DIR)/utils/ToolBase \
$(SRC_DIR)/utils/aux
BUILT_OBJECTS = $(OBJ_DIR)/*.o
INCLUDES = -I$(SRC_DIR)/utils/bedFile \
-I$(SRC_DIR)/utils/BinTree \
-I$(SRC_DIR)/utils/version \
-I$(SRC_DIR)/utils/bedGraphFile \
-I$(SRC_DIR)/utils/chromsweep \
-I$(SRC_DIR)/utils/Contexts \
-I$(SRC_DIR)/utils/FileRecordTools \
-I$(SRC_DIR)/utils/FileRecordTools/FileReaders \
-I$(SRC_DIR)/utils/FileRecordTools/Records \
-I$(SRC_DIR)/utils/general \
-I$(SRC_DIR)/utils/gzstream \
-I$(SRC_DIR)/utils/fileType \
-I$(SRC_DIR)/utils/bedFilePE \
-I$(SRC_DIR)/utils/KeyListOps \
-I$(SRC_DIR)/utils/NewChromsweep \
-I$(SRC_DIR)/utils/sequenceUtilities \
-I$(SRC_DIR)/utils/tabFile \
-I$(SRC_DIR)/utils/BamTools \
-I$(SRC_DIR)/utils/BamTools/include \
-I$(SRC_DIR)/utils/BamTools/src \
-I$(SRC_DIR)/utils/BamTools-Ancillary \
-I$(SRC_DIR)/utils/BlockedIntervals \
-I$(SRC_DIR)/utils/Fasta \
-I$(SRC_DIR)/utils/VectorOps \
-I$(SRC_DIR)/utils/GenomeFile \
-I$(SRC_DIR)/utils/RecordOutputMgr \
-I$(SRC_DIR)/utils/ToolBase \
-I$(SRC_DIR)/utils/aux \
all: print_banner $(OBJ_DIR) $(BIN_DIR) autoversion $(UTIL_SUBDIRS) $(SUBDIRS)
@echo "- Building main bedtools binary."
@$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c src/bedtools.cpp -o obj/bedtools.o -I$(UTIL_DIR)/version/
@$(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $(BIN_DIR)/bedtools $(BUILT_OBJECTS) -L$(UTIL_DIR)/BamTools/lib/ -lbamtools $(LIBS) $(LDFLAGS)
@$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c src/bedtools.cpp -o obj/bedtools.o $(INCLUDES)
@$(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $(BIN_DIR)/bedtools $(BUILT_OBJECTS) -L$(UTIL_DIR)/BamTools/lib/ -lbamtools $(LIBS) $(LDFLAGS) $(INCLUDES)
@echo "done."
@echo "- Creating executables for old CLI."
......@@ -145,7 +179,7 @@ $(UTIL_SUBDIRS) $(SUBDIRS): $(OBJ_DIR) $(BIN_DIR)
clean:
@$(MAKE) --no-print-directory --directory=$(BT_ROOT) clean_api
@echo " * Cleaning up."
@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
@rm -f $(VERSION_FILE) $(OBJ_DIR)/* $(BIN_DIR)/*
.PHONY: clean
test: all
......
bedtools - a swiss army knife for genome arithmetic
===================================================
**Current version**: 2.23.0
[Download current version](https://github.com/arq5x/bedtools2/releases/latest)
Note
-------
......
......@@ -28,7 +28,7 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest',
'sphinx.ext.intersphinx', 'sphinx.ext.todo',
'sphinx.ext.coverage', 'sphinx.ext.pngmath',
'sphinx.ext.ifconfig', 'sphinx.ext.viewcode',
'matplotlib.sphinxext.plot_directive']
'matplotlib.sphinxext.plot_directive']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['templates']
......@@ -44,16 +44,16 @@ master_doc = 'index'
# General information about the project.
project = u'bedtools'
copyright = u'2009 - 2014, Aaron R. Quinlan & Neil Kindlon'
copyright = u'2009 - 2015, Aaron R. Quinlan & Neil Kindlon'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '2.23.0'
version = '2.24.0'
# The full version, including alpha/beta/rc tags.
release = '2.23.0'
release = '2.24.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......@@ -187,7 +187,7 @@ htmlhelp_basename = 'bedtools-docs'
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'bedtools.tex', u'Bedtools Documentation',
u'Quinlan lab @ UVa', 'manual'),
u'Quinlan lab @ Univ. of Utah', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
......@@ -219,7 +219,7 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'bedtools', u'Bedtools Documentation', [u'UVa'], 1)
('index', 'bedtools', u'Bedtools Documentation', [u'UU'], 1)
]
# Example configuration for intersphinx: refer to the Python standard library.
......
......@@ -2,6 +2,51 @@
Release History
###############
Version 2.24.0 (27-May-2015)
============================
1. The `coverage` tool now takes advantage of pre-sorted intervals via the `-sorted` option. This allows the `coverage` tool to be much faster, use far less memory, and report coverage for intervals in their original order in the input file.
2. We have changed the behavior of the `coverage` tool such that it is consistent with the other tools. Specifically, coverage is now computed for the intervals in the A file based on the overlaps with the B file, rather than vice versa.
3. The ``subtract`` tool now supports pre-sorted data via the ``-sorted`` option and is therefore much faster and scalable.
4. The ``-nonamecheck`` option provides greater tolerance for chromosome labeling when using the ``-sorted`` option.
5. Support for multiple SVLEN tags in VCF format, and fixed a bug that failed to process SVLEN tags coming at the end of a VCF INFO field.
6. Support for reverse complementing IUPAC codes in the ``getfasta`` tool.
7. Provided greater flexibility for "BED+" files, where the first 3 columns are chrom, start, and end, and the remaining columns are free-form.
8. We now detect stale FAI files and recreate an index thanks to a fix from @gtamazian.
9. New feature from Pierre Lindenbaum allowing the ``sort`` tool to sort files based on the chromosome order in a ``faidx`` file.
10. Eliminated multiple compilation warnings thanks to John Marshall.
11. Fixed bug in handling INS variants in VCF files.
Version 2.23.0 (22-Feb-2015)
============================
1. Added ``-k`` option to the closest tool to report the k-closest features in one or more -b files.
2. Added ``-fd`` option to the closest tool to for the reporting of downstream features in one or more -b files. Requires -D to dictate how "downstream" should be defined.
3. Added ``-fu`` option to the closest tool to for the reporting of downstream features in one or more -b files. Requires -D to dictate how "downstream" should be defined.
4. Pierre Lindenbaum added a new split tool that will split an input file into multiple sub files. Unlike UNIX split, it can balance the chunking of the sub files not just by number of lines, but also by total number of base pairs in each sub file.
5. Added a new spacing tool that reports the distances between features in a file.
6. Jay Hesselberth added a ``-reverse`` option to the makewindows tool that reverses the order of the assigned window numbers.
7. Fixed a bug that caused incorrect reporting of overlap for zero-length BED records. Thanks to @roryk.
8. Fixed a bug that caused the map tool to not allow ``-b`` to be specified before ``-a``. Thanks to @semenko.
9. Fixed a bug in ``makewindows`` that mistakenly required ``-s`` with ``-n``.
Version 2.22.1 (01-Jan-2015)
============================
1. When using -sorted with intersect, map, and closest, bedtools can now detect and warn you when your input datasets employ different chromosome sorting orders.
2. Fixed multiple bugs in the new, faster closest tool. Specifically, the -iu, -id, and -D options were not behaving properly with the new "sweeping" algorithm that was implemented for the 2.22.0 release. Many thanks to Sol Katzman for reporting these issues and for providing a detailed analysis and example files.
3. We FINALLY wrote proper documentation for the closest tool (http://bedtools.readthedocs.org/en/latest/content/tools/closest.html)
4. Fixed bug in the tag tool when using -intervals, -names, or -scores. Thanks to Yarden Katz for reporting this.
5. Fixed issues with chromosome boundaries in the slop tool when using negative distances. Thanks to @acdaugherty!
6. Multiple improvements to the fisher tool. Added a -m option to the fisher tool to merge overlapping intervals prior to comparing overlaps between two input files. Thanks to@brentp
7. Fixed a bug in makewindows tool requiring the use of -b with -s.
8. Fixed a bug in intersect that prevented -split from detecting complete overlaps with -f 1. Thanks to @tleonardi .
9. Restored the default decimal precision to the groupby tool.
10. Added the -prec option to the merge and map tools to specific the decimal precision of the output.
Version 2.22.0 (12-Nov-2014)
============================
1. The "closest" tool now requires sorted files, but this requirement now enables it to simultaneously find the closest intervals from many (not just one) files.
2. We now have proper support for "imprecise" SVs in VCF format. This addresses a long standing (sorry) limitation in the way bedtools handles VCF files.
Version 2.21.0 (18-Sep-2014)
============================
1. Added ability to intersect against multiple `-b` files in the `intersect` tool.
......
......@@ -3,67 +3,101 @@
###############
*coverage*
###############
**coverageBed** computes both the *depth* and *breadth* of coverage of features in file A across the features
in file B. For example, **coverageBed** can compute the coverage of sequence alignments (file A) across 1
kilobase (arbitrary) windows (file B) tiling a genome of interest. One advantage that **coverageBed**
offers is that it not only *counts* the number of features that overlap an interval in file B, it also
computes the fraction of bases in B interval that were overlapped by one or more features. Thus,
**coverageBed** also computes the *breadth* of coverage for each interval in B.
The ``bedtools coverage`` tool computes both the *depth* and *breadth* of coverage of features in file B on the features
in file A. For example, ``bedtools coverage`` can compute the coverage of sequence alignments (file B) across 1
kilobase (arbitrary) windows (file A) tiling a genome of interest. One advantage that ``bedtools coverage``
offers is that it not only *counts* the number of features that overlap an interval in file A, it also
computes the fraction of bases in the interval in A that were overlapped by one or more features. Thus,
``bedtools coverage`` also computes the *breadth* of coverage observed for each interval in A.
==========================================================================
Usage and option summary
==========================================================================
Usage:
.. note::
::
If you are trying to compute coverage for very large files and are having trouble
with excessive memory usage, please presort your data by chromosome and
then by start position (e.g., ``sort -k1,1 -k2,2n in.bed > in.sorted.bed``
for BED files) and then use the ``-sorted`` option. This invokes a
memory-efficient algorithm designed for large files.
coverageBed [OPTIONS] -a <BED/GFF/VCF> -b <BED/GFF/VCF>
=========================== ===============================================================================================================================================================================================================
Option Description
=========================== ===============================================================================================================================================================================================================
**-abam** BAM file A. Each BAM alignment in A is compared to B in search of overlaps. Use "stdin" if passing A with a UNIX pipe: For example:
.. important::
As of version 2.24.0, the `coverage` tool has changed such that the coverage is
computed for the A file, not the B file. This changes the command line interface
to be consistent with the other tools. Also, the `coverage` tool
can accept multiple files for the `-b` option. This allows one to measure
coverage between a single query (`-a`) file and multiple database files (`-b`) at once!
| samtools view -b <BAM> | intersectBed -abam stdin -b genes.bed
**-s** Force strandedness. That is, only features in A are only counted towards coverage in B if they are the same strand. *By default, this is disabled and coverage is counted without respect to strand*.
**-hist** Report a histogram of coverage for each feature in B as well as a summary histogram for _all_ features in B.
| Output (tab delimited) after each feature in B:
| 1) depth
| 2) # bases at depth
| 3) size of B
| 4) % of B at depth
**-d** Report the depth at each position in each B feature. Positions reported are one based. Each position and depth follow the complete B feature.
**-split** Treat "split" BAM or BED12 entries as distinct BED intervals when computing coverage. For BAM files, this uses the CIGAR "N" and "D" operations to infer the blocks for computing coverage. For BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12).
=========================== ===============================================================================================================================================================================================================
.. seealso::
:doc:`../tools/intersect`
:doc:`../tools/genomecov`
===============================
Usage and option summary
===============================
**Usage**:
::
bedtools coverage [OPTIONS] -a <FILE> \
-b <FILE1, FILE2, ..., FILEN>
**(or)**:
::
coverageBed [OPTIONS] -a <FILE> \
-b <FILE1, FILE2, ..., FILEN>
=========================== =========================================================================================================================================================
Option Description
=========================== =========================================================================================================================================================
**-a** BAM/BED/GFF/VCF file "A". Each feature in A is compared to B in search of overlaps. Use "stdin" if passing A with a UNIX pipe.
**-b** One or more BAM/BED/GFF/VCF file(s) "B". Use "stdin" if passing B with a UNIX pipe.
**NEW!!!**: -b may be followed with multiple databases and/or wildcard (*) character(s).
**-abam** BAM file A. Each BAM alignment in A is compared to B in search of overlaps. Use "stdin" if passing A with a UNIX pipe: For example: samtools view -b <BAM> | bedtools intersect -abam stdin -b genes.bed. **Note**: no longer necessary after version 2.19.0
**-hist** | Report a histogram of coverage for each feature in A as well as a summary histogram for _all_ features in A.
| Output (tab delimited) after each feature in A:
| 1) depth
| 2) # bases at depth
| 3) size of A
| 4) % of A at depth
**-d** Report the depth at each position in each A feature. Positions reported are one based. Each position and depth follow the complete A feature.
**-counts** Only report the count of overlaps, don't compute fraction, etc. Restricted by -f and -r.
**-f** Minimum overlap required as a fraction of A. Default is 1E-9 (i.e. 1bp).
**-r** Require that the fraction of overlap be reciprocal for A and B. In other words, if -f is 0.90 and -r is used, this requires that B overlap at least 90% of A and that A also overlaps at least 90% of B.
**-s** Force "strandedness". That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.
**-S** Require different strandedness. That is, only report hits in B that overlap A on the _opposite_ strand. By default, overlaps are reported without respect to strand.
**-split** Treat "split" BAM (i.e., having an "N" CIGAR operation) or BED12 entries as distinct BED intervals.
**-sorted** For very large B files, invoke a "sweeping" algorithm that requires position-sorted (e.g., ``sort -k1,1 -k2,2n`` for BED files) input. When using -sorted, memory usage remains low even for very large files.
**-g** Specify a genome file the defines the expected chromosome order in the input files for use with the ``-sorted`` option.
**-header** Print the header from the A file prior to results.
**-sortout** When using *multiple databases* (`-b`), sort the output DB hits for each record.
**-nobuf** Disable buffered output. Using this option will cause each line of output to be printed as it is generated, rather than saved in a buffer. This will make printing large output files noticeably slower, but can be useful in conjunction with other software tools and scripts that need to process one line of bedtools output at a time.
**-iobuf** Follow with desired integer size of read buffer. Optional suffixes `K/M/G` supported. **Note**: currently has no effect with compressed files.
=========================== =========================================================================================================================================================
==========================================================================
Default behavior
==========================================================================
After each interval in B, **coverageBed** will report:
After each interval in A, ``bedtools coverage`` will report:
1) The number of features in A that overlapped (by at least one base pair) the B interval.
2) The number of bases in B that had non-zero coverage from features in A.
3) The length of the entry in B.
4) The fraction of bases in B that had non-zero coverage from features in A.
1) The number of features in B that overlapped (by at least one base pair) the A interval.
2) The number of bases in A that had non-zero coverage from features in B.
3) The length of the entry in A.
4) The fraction of bases in A that had non-zero coverage from features in B.
Below are the number of features in A (N=...) overlapping B and fraction of bases in B with coverage.
Below are the number of features in B (N=...) overlapping A and fraction of bases in A with coverage.
::
Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
BED FILE B *************** *************** ****** **************
BED FILE A *************** *************** ****** **************
BED File A ^^^^ ^^^^ ^^ ^^^^^^^^^ ^^^ ^^ ^^^^
BED File B ^^^^ ^^^^ ^^ ^^^^^^^^^ ^^^ ^^ ^^^^
^^^^^^^^ ^^^^^ ^^^^^ ^^
Result [ N=3, 10/15 ] [ N=1, 2/15 ] [N=1,6/6] [N=6, 12/14 ]
......@@ -71,20 +105,20 @@ Below are the number of features in A (N=...) overlapping B and fraction of base
For example:
::
.. code-block:: bash
$ cat A.bed
chr1 0 100
chr1 100 200
chr2 0 100
cat A.bed
$ cat B.bed
chr1 10 20
chr1 20 30
chr1 30 40
chr1 100 200
cat B.bed
chr1 0 100
chr1 100 200
chr2 0 100
coverageBed -a A.bed -b B.bed
$ bedtools coverage -a A.bed -b B.bed
chr1 0 100 3 30 100 0.3000000
chr1 100 200 1 100 100 1.0000000
chr2 0 100 0 0 100 0.0000000
......@@ -95,57 +129,58 @@ For example:
``-s`` Calculating coverage by strand
==========================================================================
Use the "**-s**" option if one wants to only count coverage if features in A are on the same strand as the
feature / window in B. This is especially useful for RNA-seq experiments.
feature / window in A. This is especially useful for RNA-seq experiments.
For example (note the difference in coverage with and without **-s**:
::
.. code-block:: bash
$ cat A.bed
chr1 0 100 b1 1 +
chr1 100 200 b2 1 -
chr2 0 100 b3 1 +
cat A.bed
$ cat B.bed
chr1 10 20 a1 1 -
chr1 20 30 a2 1 -
chr1 30 40 a3 1 -
chr1 100 200 a4 1 +
cat B.bed
chr1 0 100 b1 1 +
chr1 100 200 b2 1 -
chr2 0 100 b3 1 +
coverageBed -a A.bed -b B.bed
$ bedtools coverage -a A.bed -b B.bed
chr1 0 100 b1 1 + 3 30 100 0.3000000
chr1 100 200 b2 1 - 1 100 100 1.0000000
chr2 0 100 b3 1 + 0 0 100 0.0000000
coverageBed -a A.bed -b B.bed -s
$ bedtools coverage -a A.bed -b B.bed -s
chr1 0 100 b1 1 + 0 0 100 0.0000000
chr1 100 200 b2 1 - 0 0 100 0.0000000
chr2 0 100 b3 1 + 0 0 100 0.0000000
==========================================================================
``-hist`` Creating a histogram of coverage for each feature in the B file
``-hist`` Creating a histogram of coverage for each feature in the A file
==========================================================================
One should use the "**-hist**" option to create, for each interval in B, a histogram of coverage of the
features in A across B.
One should use the "**-hist**" option to create, for each interval in A, a histogram of coverage of the
features in B across A.
In this case, each entire feature in B will be reported, followed by the depth of coverage, the number of
bases at that depth, the size of the feature, and the fraction covered. After all of the features in B have
been reported, a histogram summarizing the coverage among all features in B will be reported.
In this case, each entire feature in A will be reported, followed by the depth of coverage, the number of
bases at that depth, the size of the feature, and the fraction covered. After all of the features in A have
been reported, a histogram summarizing the coverage among all features in A will be reported.
::
.. code-block:: bash
cat A.bed
$ cat A.bed
chr1 0 100 b1 1 +
chr1 100 200 b2 1 -
chr2 0 100 b3 1 +
$ cat B.bed
chr1 10 20 a1 1 -
chr1 20 30 a2 1 -
chr1 30 40 a3 1 -
chr1 100 200 a4 1 +
cat B.bed
chr1 0 100 b1 1 +
chr1 100 200 b2 1 -
chr2 0 100 b3 1 +
coverageBed -a A.bed -b B.bed -hist
$ bedtools coverage -a A.bed -b B.bed -hist
chr1 0 100 b1 1 + 0 70 100 0.7000000
chr1 0 100 b1 1 + 1 30 100 0.3000000
chr1 100 200 b2 1 - 1 100 100 1.0000000
......@@ -154,28 +189,27 @@ been reported, a histogram summarizing the coverage among all features in B will
all 1 130 300 0.4333333
===========================================================================
``-d`` Reporting the per-base of coverage for each feature in the B file
``-d`` Reporting the per-base of coverage for each feature in the A file
===========================================================================
One should use the "**-d**" option to create, for each interval in B, a detailed list of coverage at each of the
positions across each B interval.
One should use the "**-d**" option to create, for each interval in A, a detailed list of coverage at each of the
positions across each A interval.
The output will consist of a line for each one-based position in each B feature, followed by the coverage
The output will consist of a line for each one-based position in each A feature, followed by the coverage
detected at that position.
::
.. code-block:: bash
$ cat A.bed
chr1 0 10
cat A.bed
$ cat B.bed
chr1 0 5
chr1 3 8
chr1 4 8
chr1 5 9
cat B.bed
chr1 0 10
coverageBed -a A.bed -b B.bed -d
$ bedtools coverage -a A.bed -b B.bed -d
chr1 0 10 B 1 1
chr1 0 10 B 2 1
chr1 0 10 B 3 1
......@@ -186,14 +220,3 @@ detected at that position.
chr1 0 10 B 8 3
chr1 0 10 B 9 1
chr1 0 10 B 10 0
=============================================================================
``-split`` Reporting coverage with spliced alignments or blocked BED features
=============================================================================
As described in section 1.3.19, coverageBed will, by default, screen for overlaps against the entire span
of a spliced/split BAM alignment or blocked BED12 feature. When dealing with RNA-seq reads, for
example, one typically wants to only tabulate coverage for the portions of the reads that come from
exons (and ignore the interstitial intron sequence). The **-split** command allows for such coverage to be
performed.
......@@ -113,9 +113,17 @@ The ``-s`` option will only merge intervals that are overlapping/bookended
chr1 501 1000 a4 4 +
$ bedtools merge -i A.bed -s
chr1 100 250 +
chr1 501 1000 +
chr1 250 500 -
chr1 100 250
chr1 501 1000
chr1 250 500
To also report the strand, you could use the ``-c`` and ``-o`` operators (see below for more details):
.. code-block:: bash
$ bedtools merge -i A.bed -s -c 6 -o distinct
chr1 100 250 +
chr1 501 1000 +
==========================================================================
``-S`` Reporting merged intervals on a specific strand.
......
......@@ -17,8 +17,8 @@
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <string>
#include "version.h"
#include "BedtoolsDriver.h"
using namespace std;
......@@ -34,6 +34,9 @@ using namespace std;
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
bool sub_main(const QuickString &subCmd);
void showHelp(const QuickString &subCmd);
int annotate_main(int argc, char* argv[]);//
int bamtobed_main(int argc, char* argv[]);//
int bamtofastq_main(int argc, char* argv[]);//
......@@ -41,10 +44,10 @@ int bed12tobed6_main(int argc, char* argv[]); //
int bedtobam_main(int argc, char* argv[]);//
int bedtoigv_main(int argc, char* argv[]);//
int bedpetobam_main(int argc, char* argv[]);//
int closest_main(int argc, char* argv[]); //
void closest_help();
int cluster_main(int argc, char* argv[]); //
int complement_main(int argc, char* argv[]);//
int coverage_main(int argc, char* argv[]); //
void complement_help();
void coverage_help();
int regress_test_main(int argc, char **argv); //
int expand_main(int argc, char* argv[]);//
int fastafrombed_main(int argc, char* argv[]);//
......@@ -52,13 +55,14 @@ int flank_main(int argc, char* argv[]); //
int genomecoverage_main(int argc, char* argv[]);//
int getoverlap_main(int argc, char* argv[]);//
int groupby_main(int argc, char* argv[]);//
int intersect_main(int argc, char* argv[]); //
int jaccard_main(int argc, char* argv[]); //
int fisher_main(int argc, char* argv[]); //
void intersect_help();
void map_help();
void jaccard_help(); //
void fisher_help();
int links_main(int argc, char* argv[]);//
int maskfastafrombed_main(int argc, char* argv[]);//
int map_main(int argc, char* argv[]); //
int merge_main(int argc, char* argv[]); //
void merge_help();
int multibamcov_main(int argc, char* argv[]);//
int multiintersect_main(int argc, char* argv[]);//
int nek_sandbox1_main(int argc, char* argv[]);//
......@@ -67,13 +71,13 @@ int pairtobed_main(int argc, char* argv[]);//
int pairtopair_main(int argc, char* argv[]);//
int random_main(int argc, char* argv[]); //
int reldist_main(int argc, char* argv[]); //
int sample_main(int argc, char* argv[]); //
void sample_help();
int shuffle_main(int argc, char* argv[]); //
int slop_main(int argc, char* argv[]); //
int split_main(int argc, char* argv[]); //
int sort_main(int argc, char* argv[]); //
int spacing_main(int argc, char* argv[]); //
int subtract_main(int argc, char* argv[]); //
void spacing_help();
void subtract_help();
int tagbam_main(int argc, char* argv[]);//
int unionbedgraphs_main(int argc, char* argv[]);//
int window_main(int argc, char* argv[]); //
......@@ -87,83 +91,81 @@ int main(int argc, char *argv[])
// make sure the user at least entered a sub_command
if (argc < 2) return bedtools_help();
std::string sub_cmd = argv[1];
QuickString subCmd(argv[1]);
BedtoolsDriver btDriver;
if (btDriver.supports(subCmd)) {
if (btDriver.subMain(argc, argv)) {
return 0;
} else if (!btDriver.hadError()) {
showHelp(subCmd);
return 1;
}
}
// genome arithmetic tools
if (sub_cmd == "intersect") return intersect_main(argc-1, argv+1);
else if (sub_cmd == "window") return window_main(argc-1, argv+1);
else if (sub_cmd == "closest") return closest_main(argc-1, argv+1);
else if (sub_cmd == "coverage") return coverage_main(argc-1, argv+1);
else if (sub_cmd == "map") return map_main(argc-1, argv+1);
else if (sub_cmd == "genomecov") return genomecoverage_main(argc-1, argv+1);
else if (sub_cmd == "merge") return merge_main(argc-1, argv+1);
else if (sub_cmd == "cluster") return cluster_main(argc-1, argv+1);
else if (sub_cmd == "complement") return complement_main(argc-1, argv+1);
else if (sub_cmd == "subtract") return subtract_main(argc-1, argv+1);
else if (sub_cmd == "slop") return slop_main(argc-1, argv+1);
else if (sub_cmd == "split") return split_main(argc-1, argv+1);
else if (sub_cmd == "flank") return flank_main(argc-1, argv+1);
else if (sub_cmd == "sort") return sort_main(argc-1, argv+1);
else if (sub_cmd == "random") return random_main(argc-1, argv+1);
else if (sub_cmd == "shuffle") return shuffle_main(argc-1, argv+1);
else if (sub_cmd == "spacing") return spacing_main(argc-1, argv+1);
else if (sub_cmd == "annotate") return annotate_main(argc-1, argv+1);