first cut at sphinx-based docs

356dc38a · Aaron · ebd31372 · 356dc38a · 356dc38a · 356dc38a
Commit 356dc38a authored 12 years ago by Aaron
--- a/docs/_build/doctrees/content/mergeBed.doctree
+++ b/docs/_build/doctrees/content/mergeBed.doctree
--- a/docs/_build/doctrees/content/overlap.doctree
+++ b/docs/_build/doctrees/content/overlap.doctree
--- a/docs/_build/doctrees/content/overview.doctree
+++ b/docs/_build/doctrees/content/overview.doctree
--- a/docs/_build/doctrees/content/pairToBed.doctree
+++ b/docs/_build/doctrees/content/pairToBed.doctree
--- a/docs/_build/doctrees/content/pairToPair.doctree
+++ b/docs/_build/doctrees/content/pairToPair.doctree
--- a/docs/_build/doctrees/content/quick-start.doctree
+++ b/docs/_build/doctrees/content/quick-start.doctree
--- a/docs/_build/doctrees/content/shuffleBed.doctree
+++ b/docs/_build/doctrees/content/shuffleBed.doctree
--- a/docs/_build/doctrees/content/slopBed.doctree
+++ b/docs/_build/doctrees/content/slopBed.doctree
--- a/docs/_build/doctrees/content/sortBed.doctree
+++ b/docs/_build/doctrees/content/sortBed.doctree
--- a/docs/_build/doctrees/content/subtractBed.doctree
+++ b/docs/_build/doctrees/content/subtractBed.doctree
--- a/docs/_build/doctrees/content/unionBedGraphs.doctree
+++ b/docs/_build/doctrees/content/unionBedGraphs.doctree
--- a/docs/_build/doctrees/content/windowBed.doctree
+++ b/docs/_build/doctrees/content/windowBed.doctree
--- a/docs/_build/doctrees/environment.pickle
+++ b/docs/_build/doctrees/environment.pickle
--- a/docs/_build/doctrees/index.doctree
+++ b/docs/_build/doctrees/index.doctree
--- a/docs/_build/html/.buildinfo
+++ b/docs/_build/html/.buildinfo
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 74474c7b4889e5875af2f21f963837f1
+tags: fbb0d17656682115ca4d033fb2f83ba1
--- a/docs/_build/html/_sources/content/advanced-usage.txt
+++ b/docs/_build/html/_sources/content/advanced-usage.txt
+###############
+Advanced usage
+###############
+
+
+==========================================================================
+7.1 Mask all regions in a genome except for targeted capture regions.
+==========================================================================
+# Add 500 bp up and downstream of each probe
+::
+  slopBed -i probes.bed -b 500 > probes.500bp.bed
+  
+# Get a BED file of all regions not covered by the probes (+500 bp up/down)
+::
+  complementBed -i probes.500bp.bed -g hg18.genome > probes.500bp.complement.bed
+  
+# Create a masked genome where all bases are masked except for the probes +500bp
+::
+  maskFastaFromBed -in hg18.fa -bed probes.500bp.complement.bed -fo hg18.probecomplement.
+  masked.fa
+
+
+==========================================================================
+7.2 Screening for novel SNPs.
+==========================================================================
+# Find all SNPs that are not in dbSnp and not in the latest 1000 genomes calls
+::
+  intersectBed -a snp.calls.bed -b dbSnp.bed -v | intersectBed -a stdin -b 1KG.bed
+  -v > snp.calls.novel.bed
+
+
+
+==========================================================================
+7.3 Computing the coverage of features that align entirely within an
+interval.
+==========================================================================
+# By default, coverageBed counts any feature in A that overlaps B by >= 1 bp. If
+you want to require that a feature align entirely within B for it to be counted,
+you can first use intersectBed with the "-f 1.0" option.
+::
+  intersectBed -a features.bed -b windows.bed -f 1.0 | coverageBed -a stdin -b
+  windows.bed > windows.bed.coverage
+
+
+==========================================================================
+7.4 Computing the coverage of BAM alignments on exons.
+==========================================================================
+# One can combine SAMtools with BEDtools to compute coverage directly from the BAM
+data by using bamToBed.
+::
+  bamToBed -i reads.bam | coverageBed -a stdin -b exons.bed > exons.bed.coverage
+  
+# Take it a step further and require that coverage be from properly-paired reads.
+::
+  samtools view -bf 0x2 reads.bam | bamToBed -i stdin | coverageBed -a stdin -b
+  exons.bed > exons.bed.proper.coverage
+
+
+
+==========================================================================
+7.5 Computing coverage separately for each strand.
+==========================================================================
+# Use grep to only look at forward strand features (i.e. those that end in "+").
+::
+  bamToBed -i reads.bam | grep \+$ | coverageBed -a stdin -b genes.bed >
+  genes.bed.forward.coverage
+
+# Use grep to only look at reverse strand features (i.e. those that end in "-").
+::
+  bamToBed -i reads.bam | grep \-$ | coverageBed -a stdin -b genes.bed >
+  genes.bed.forward.coverage
+
+
+  
+==========================================================================
+7.6 Find structural variant calls that are private to one sample.
+==========================================================================
+# :
+::
+  pairToPair -a sample1.sv.bedpe -b othersamples.sv.bedpe -type neither >
+  sample1.sv.private.bedpe
+  
+  
+
+==================================================================================
+7.7 Exclude SV deletions that appear to be ALU insertions in the reference genome.
+==================================================================================
+# We'll require that 90% of the inner span of the deletion be overlapped by a
+recent ALU.
+::
+  pairToBed -a deletions.sv.bedpe -b ALUs.recent.bed -type notispan -f 0.80 >
+  deletions.notALUsinRef.bedpe
\ No newline at end of file
--- a/docs/_build/html/_sources/content/annotateBed.txt
+++ b/docs/_build/html/_sources/content/annotateBed.txt
+###############
+5.24 annotateBed
+###############
+**annotateBed** annotates one BED/VCF/GFF file with the coverage and number of overlaps observed
+from multiple other BED/VCF/GFF files. In this way, it allows one to ask to what degree one feature
+coincides with multiple other feature types with a single command.
+
+==========================================================================
+5.24.1 Usage and option summary
+==========================================================================
+Usage:
+::
+  annotateBed [OPTIONS] -i <BED/GFF/VCF> -files FILE1 FILE2 FILE3 ... FILEn
+  
+  
+===========================      ===============================================================================================================================================================================================================
+ Option                           Description
+ 
+===========================      ===============================================================================================================================================================================================================
+**-namesr**				         A list of names (one per file) to describe each file in -i. These names will be printed as a header line. 
+**-counts**					     Report the count of features in each file that overlap -i. Default behavior is to report the fraction of -i covered by each file.
+**-both**                        Report the count of features followed by the % coverage for each annotation file. Default is to report solely the fraction of -i covered by each file.
+**-s**                           Force strandedness. That is, only include hits in A that overlap B on the same strand. By default, hits are included without respect to strand.
+===========================      ===============================================================================================================================================================================================================
+
+
+
+
+
+==========================================================================
+5.24.2 Default behavior - annotate one file with coverage from others.
+==========================================================================
+By default, the fraction of each feature covered by each annotation file is reported after the complete
+feature in the file to be annotated.
+::
+  cat variants.bed
+  chr1 100  200   nasty 1  -
+  chr2 500  1000  ugly  2  +
+  chr3 1000 5000  big   3  -
+
+  cat genes.bed
+  chr1 150  200   geneA 1  +
+  chr1 175  250   geneB 2  +
+  chr3 0    10000 geneC 3  -
+
+  cat conserve.bed
+  chr1 0    10000 cons1 1  +
+  chr2 700  10000 cons2 2  -
+  chr3 4000 10000 cons3 3  +
+
+  cat known_var.bed
+  chr1 0    120   known1   -
+  chr1 150  160   known2   -
+  chr2 0    10000 known3   +
+
+  annotateBed -i variants.bed -files genes.bed conserv.bed known_var.bed
+  chr1 100  200  nasty 1 -  0.500000  1.000000  0.300000
+  chr2 500  1000 ugly  2 +  0.000000  0.600000  1.000000
+  chr3 1000 5000 big   3 -  1.000000  0.250000  0.000000
+
+
+==========================================================================
+5.24.3 Report the count of hits from the annotation files
+==========================================================================
+Figure:
+::
+  annotateBed -counts -i variants.bed -files genes.bed conserv.bed known_var.bed
+  chr1 100  200  nasty 1 - 2 1 2
+  chr2 500  1000 ugly  2 + 0 1 1
+  chr3 1000 5000 big   3 - 1 1 0
+
+
+
+==========================================================================
+5.24.4 Report both the count of hits and the fraction covered from the annotation files
+==========================================================================
+Figure:
+::
+  annotateBed -both -i variants.bed -files genes.bed conserv.bed known_var.bed
+  #chr start end  name  score +/-  cnt1 pct1     cnt2 pct2     cnt3 pct3
+  chr1 100   200  nasty 1     -    2    0.500000 1    1.000000 2    0.300000
+  chr2 500   1000 ugly  2     +    0    0.000000 1    0.600000 1    1.000000
+  chr3 1000  5000 big   3     -    1    1.000000 1    0.250000 0    0.000000
+
+
+  
+  
+==========================================================================
+5.24.5 Restrict the reporting to overlaps on the same strand.
+==========================================================================
+Note: Compare with the result from 5.24.3
+::
+  annotateBed -s -i variants.bed -files genes.bed conserv.bed known_var.bed
+  chr1  100   200   nasty  var1  -  0.000000  0.000000  0.000000
+  chr2  500   1000  ugly   var2  +  0.000000  0.000000  0.000000
+  chr3  1000  5000  big    var3  -  1.000000  0.000000  0.000000
+
+
+
--- a/docs/_build/html/_sources/content/bamToBed.txt
+++ b/docs/_build/html/_sources/content/bamToBed.txt
+###############
+5.4 bamToBed
+###############
+
+**bamToBed** is a general purpose tool that will convert sequence alignments in BAM format to either
+BED6, BED12 or BEDPE format. This enables one to convert BAM files for use with all of the other
+BEDTools. The CIGAR string is used to compute the alignment end coordinate in an "ungapped"
+fashion. That is, match ("M"), deletion ("D"), and splice ("N") operations are observed when computing
+alignment ends.
+
+============================================
+5.4.1 Usage and option summary
+============================================
+**Usage:**
+::
+  bamToBed [OPTIONS] -i <BAM>
+  
+
+======================           =========================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
+Option                              Description
+======================           =========================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
+**-bedpe**				         Write BAM alignments in BEDPE format. Only one alignment from paired-end reads will be reported. Specifically, it each mate is aligned to the same chromosome, the BAM alignment reported will be the one where the BAM insert size is greater than zero. When the mate alignments are interchromosomal, the lexicographically lower chromosome will be reported first. Lastly, when an end is unmapped, the chromosome and strand will be set to "." and the start and end coordinates will be set to -1. *By default, this is disabled and the output will be reported in BED format*.								 								 
+								 **NOTE: When using this option, it is required that the BAM file is sorted/grouped by the read name. This allows bamToBed to extract correct alignment coordinates for each end based on their respective CIGAR strings. It also assumes that the alignments for a given pair come in groups of twos. There is not yet a standard method for reporting multiple alignments using BAM. bamToBed will fail if an aligner does not report alignments in pairs**.							 
+                                 BAM files may be piped to bamToBed by specifying "-i stdin". See example below.
+**-bed12**					     Write "blocked" BED (a.k.a. BED12) format. This will convert "spliced" BAM alignments (denoted by the "N" CIGAR operation) to BED12.
+**-ed**					         Use the "edit distance" tag (NM) for the BED score field. Default for BED is to use mapping quality. Default for BEDPE is to use the *minimum* of the two mapping qualities for the pair. When -ed is used with -bedpe, the total edit distance from the two mates is reported.                                            
+**-tag**					     Use other *numeric* BAM alignment tag for BED score. Default for BED is to use mapping quality. Disallowed with BEDPE output.
+**-color**					     An R,G,B string for the color used with BED12 format. Default is (255,0,0).                              
+**-split**					     Report each portion of a "split" BAM (i.e., having an "N" CIGAR operation) alignment as a distinct BED intervals.			            
+======================           =========================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
+
+By default, each alignment in the BAM file is converted to a 6 column BED. The BED "name" field is
+comprised of the RNAME field in the BAM alignment. If mate information is available, the mate (e.g.,
+"/1" or "/2") field will be appended to the name. The "score" field is the mapping quality score from the
+BAM alignment, unless the **-ed** option is used.
+
+Examples:
+::
+  bamToBed -i reads.bam | head -5
+  chr7   118970079   118970129   TUPAC_0001:3:1:0:1452#0/1   37   -
+  chr7   118965072   118965122   TUPAC_0001:3:1:0:1452#0/2   37   +
+  chr11  46769934    46769984    TUPAC_0001:3:1:0:1472#0/1   37   -
+  
+  bamToBed -i reads.bam -tag NM | head -5
+  chr7   118970079   118970129   TUPAC_0001:3:1:0:1452#0/1   1    -
+  chr7   118965072   118965122   TUPAC_0001:3:1:0:1452#0/2   3    +
+  chr11  46769934    46769984    TUPAC_0001:3:1:0:1472#0/1   1    -
+  
+  bamToBed -i reads.bam -bedpe | head -3
+  chr7   118965072   118965122   chr7   118970079   118970129
+         TUPAC_0001:3:1:0:1452#0 37     +     -
+  chr11  46765606    46765656    chr11  46769934    46769984
+         TUPAC_0001:3:1:0:1472#0 37     +     -
+  chr20  54704674    54704724    chr20  54708987    54709037
+         TUPAC_0001:3:1:1:1833#0 37     +    
+
+		 
+One can easily use samtools and bamToBed together as part of a UNIX pipe. In this example, we will
+only convert properly-paired (BAM flag == 0x2) reads to BED format.
+::
+  samtools view -bf 0x2 reads.bam | bamToBed -i stdin | head
+  chr7   118970079   118970129   TUPAC_0001:3:1:0:1452#0/1   37   -
+  chr7   118965072   118965122   TUPAC_0001:3:1:0:1452#0/2   37   +
+  chr11  46769934    46769984    TUPAC_0001:3:1:0:1472#0/1   37   -
+  chr11  46765606    46765656    TUPAC_0001:3:1:0:1472#0/2   37   +
+  chr20  54704674    54704724    TUPAC_0001:3:1:1:1833#0/1   37   +
+  chr20  54708987    54709037    TUPAC_0001:3:1:1:1833#0/2   37   -
+  chrX   9380413     9380463     TUPAC_0001:3:1:1:285#0/1    0    -
+  chrX   9375861     9375911     TUPAC_0001:3:1:1:285#0/2    0    +
+  chrX   131756978   131757028   TUPAC_0001:3:1:2:523#0/1    37   +
+  chrX   131761790   131761840   TUPAC_0001:3:1:2:523#0/2    37   -
+
+  
+==================================================================
+5.4.2 (-split)Creating BED12 features from "spliced" BAM entries. 
+==================================================================
+bamToBed will, by default, create a BED6 feature that represents the entire span of a spliced/split
+BAM alignment. However, when using the **-split** command, a BED12 feature is reported where BED
+blocks will be created for each aligned portion of the sequencing read.
+::
+  Chromosome  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+             
+  Exons       ***************                                    **********
+  
+  BED/BAM A      ^^^^^^^^^^^^....................................^^^^
+  
+  Result      ===============                                    ====
+  
--- a/docs/_build/html/_sources/content/bed12ToBed6.txt
+++ b/docs/_build/html/_sources/content/bed12ToBed6.txt
+###############
+5.21 bed12ToBed6
+###############
+**bed12ToBed6** is a convenience tool that converts BED features in BED12 (a.k.a. "blocked" BED
+features such as genes) to discrete BED6 features. For example, in the case of a gene with six exons,
+bed12ToBed6 would create six separate BED6 features (i.e., one for each exon).
+
+
+==========================================================================
+5.21.1 Usage and option summary
+==========================================================================
+Usage:
+::
+  bed12ToBed6 [OPTIONS] -i <BED12>
+  
+===========================      ===============================================================================================================================================================================================================
+ Option                           Description
+===========================      ===============================================================================================================================================================================================================
+**-i**				             The BED12 file that should be split into discrete BED6 features. *Use "stdin" when using piped input*.
+===========================      ===============================================================================================================================================================================================================
+
+
+
+
+==========================================================================
+5.21.2 Default behavior
+==========================================================================
+Figure:
+::
+  head data/knownGene.hg18.chr21.bed | tail -n 3
+  chr21 10079666  10120808   uc002yiv.1  0  -  10081686  1 0 1 2 0 6 0 8
+        0     4   528,91,101,215, 0,1930,39750,40927,
+  chr21 10080031  10081687   uc002yiw.1  0  -  10080031  1 0 0 8 0 0 3 1
+        0     2   200,91,    0,1565,
+  chr21 10081660  10120796   uc002yix.2  0  -  10081660  1 0 0 8 1 6 6 0
+        0     3   27,101,223,0,37756,38913,
+  
+  head data/knownGene.hg18.chr21.bed | tail -n 3 | bed12ToBed6 -i stdin
+  chr21 10079666  10080194  uc002yiv.1 0  -
+  chr21 10081596  10081687  uc002yiv.1 0  -
+  chr21 10119416  10119517  uc002yiv.1 0  -
+  chr21 10120593  10120808  uc002yiv.1 0  -
+  chr21 10080031  10080231  uc002yiw.1 0  -
+  chr21 10081596  10081687  uc002yiw.1 0  -
+  chr21 10081660  10081687  uc002yix.2 0  -
+  chr21 10119416  10119517  uc002yix.2 0  -
+  chr21 10120573  10120796  uc002yix.2 0  -
+
+
--- a/docs/_build/html/_sources/content/bedToBam.txt
+++ b/docs/_build/html/_sources/content/bedToBam.txt
+###############
+5.18 bedToBam
+###############
+**bedToBam** converts features in a feature file to BAM format. This is useful as an efficient means of
+storing large genome annotations in a compact, indexed format for visualization purposes.
+
+==========================================================================
+5.18.1 Usage and option summary
+==========================================================================
+Usage:
+::
+  bedToBam [OPTIONS] -i <BED/GFF/VCF> -g <GENOME> > <BAM>
+  
+===========================      ===============================================================================================================================================================================================================
+ Option                           Description
+===========================      ===============================================================================================================================================================================================================
+**-mapq**				         Set a mapping quality (SAM MAPQ field) value for all BED entries. *Default: 255*			 
+**-ubam**					     Write uncompressed BAM output. The default is write compressed BAM output.
+**-bed12**                       Indicate that the input BED file is in BED12 (a.k.a "blocked" BED) format. In this case, bedToBam will convert blocked BED features (e.g., gene annotaions) into "spliced" BAM alignments by creating an appropriate CIGAR string.
+===========================      ===============================================================================================================================================================================================================
+
+
+
+
+==========================================================================
+5.18.2 Default behavior
+==========================================================================
+The default behavior is to assume that the input file is in unblocked format. For example:
+::
+  head -5 rmsk.hg18.chr21.bed
+  chr21 9719768  9721892  ALR/Alpha  1004  +
+  chr21 9721905  9725582  ALR/Alpha  1010  +
+  chr21 9725582  9725977  L1PA3 3288 +
+  chr21 9726021  9729309  ALR/Alpha  1051  +
+  chr21 9729320  9729809  L1PA3 3897 -
+
+  bedToBam -i rmsk.hg18.chr21.bed -g human.hg18.genome > rmsk.hg18.chr21.bam
+
+  samtools view rmsk.hg18.chr21.bam | head -5
+  ALR/Alpha  0   chr21 9719769  255  2124M *  0  0  *  *
+  ALR/Alpha  0   chr21 9721906  255  3677M *  0  0  *  *
+  L1PA3      0   chr21 9725583  255  395M  *  0  0  *  *
+  ALR/Alpha  0   chr21 9726022  255  3288M *  0  0  *  *
+  L1PA3      16  chr21 9729321  255  489M  *  0  0  *  *
+ 
+
+==========================================================================
+5.18.3 Creating "spliced" BAM entries from "blocked" BED features
+==========================================================================
+Optionally, **bedToBam** will create spliced BAM entries from "blocked" BED features by using the
+-bed12 option. This will create CIGAR strings in the BAM output that will be displayed as "spliced"
+alignments. The image illustrates this behavior, as the top track is a BAM representation (using
+bedToBam) of a BED file of UCSC genes.
+
+For example:
+::
+  bedToBam -i knownGene.hg18.chr21.bed -g human.hg18.genome -bed12 > knownGene.bam
+  
+  samtools view knownGene.bam | head -2
+  uc002yip.1  16   chr21 9928614   2                       5                        5
+  
+  298M1784N71M1411N93M3963N80M1927N106M3608N81M1769N62M11856N89M98N82M816N61M6910N65M
+  738N64M146N100M1647N120M6478N162M1485N51M6777N60M9274N54M880N54M1229N54M2377N54M112
+  68N58M2666N109M2885N158M     *   0  0  *  *
+  uc002yiq.1  16   chr21 9928614   2                       5                        5
+  
+  298M1784N71M1411N93M3963N80M1927N106M3608N81M1769N62M11856N89M98N82M816N61M6910N65M
+  738N64M146N100M1647N120M6478N162M1485N51M6777N60M10208N54M1229N54M2377N54M11268N58M
+  2666N109M2885N158M       *   0   0  *  *
+
+