From e5e21cfe8f8b05f98e836d5f41f58b3de735fd95 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Wed, 18 Nov 2009 21:01:47 -0500 Subject: [PATCH] Version 2.3. Just before paper submission. --- RELEASE_HISTORY | 13 +++++++++ USAGE_EXAMPLES | 7 ++--- scripts/gffToBed.py | 6 ++-- scripts/samToBed.py | 56 ++++++++++++++++++++++++------------- src/utils/version/version.h | 2 +- 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/RELEASE_HISTORY b/RELEASE_HISTORY index a1564518..5c1a445b 100644 --- a/RELEASE_HISTORY +++ b/RELEASE_HISTORY @@ -1,3 +1,16 @@ +Version 2.3.0 (11/18/2009) +1. Added four new tools: + - shuffleBed. Randomly permutes the locations of a BED file among a genome. Useful for testing for significant overlap enrichments. + - slopBed. Adds a requested number of base pairs to each end of a BED feature. Constrained by the size of each chromosome. + - maskFastaFromBed. Masks a FASTA file based on BED coordinates. Useful making custom genome files from targeted capture experiment, etc. + - pairToPair. Returns overlaps between two paired-end BED files. This is great for finding structural variants that are + private or shared among samples. + +2. Increased the speed of intersectBed by nearly 50%. +3. Improved /corrected some of the help messages. +4. Improved sanity checking for BED entries. + + Version 2.2.4 (10/27/2009) 1. Updated the mergeBed documentation to describe the -names option which allows one to report the names of the features that were merged (separated by semicolons). diff --git a/USAGE_EXAMPLES b/USAGE_EXAMPLES index 9dfdba4f..078499cc 100755 --- a/USAGE_EXAMPLES +++ b/USAGE_EXAMPLES @@ -1,13 +1,10 @@ ============================== -=== BEDTools Version 2.1.2 === +=== BEDTools === ============================== Created by Aaron Quinlan Spring 2009. Copyright 2009 Aaron Quinlan. All rights reserved. -http://people.virginia.edu/~arq5x/bedtools.html - -Free for non-profit or academic use. Please contact me for commercial use. - +http://code.google.com/p/bedtools ===Example Usage=== diff --git a/scripts/gffToBed.py b/scripts/gffToBed.py index 5200e090..6d72ea95 100755 --- a/scripts/gffToBed.py +++ b/scripts/gffToBed.py @@ -15,9 +15,9 @@ import re help_message = ''' -gffToBed -g <gff> - -OPTIONS: +gffToBed.py -g <gff> + +ABSTRACT: Converts genome annotations in GFF format to BED format. ''' """ diff --git a/scripts/samToBed.py b/scripts/samToBed.py index f3da61f0..ee541689 100755 --- a/scripts/samToBed.py +++ b/scripts/samToBed.py @@ -16,15 +16,30 @@ import re help_message = ''' -samToBed -s <sam> -t <alignment type> +samToBed.py -s <sam> -t <alignment type> + +ABSTRACT: Converts aligned reads in SAM format to BED format. OPTIONS: - -s The SAM file to be converted to BED + -s The SAM file to be converted to BED (use "stdin" for piped input) -t What types of alignments should be reported? "all" all aligned reads will be reported (Default) "con" only concordant pairs will be reported "dis" only discordant pairs will be reported +EXAMPLE: + Can be used with samtools to extract alignments and compare them to BED + annotations. + + (1) Land a BED file first. + $ samtools view reads.sorted.bam > read.sorted.sam + $ samToBed.py -s reads.sorted.sam -t all > reads.sorted.bed + $ intersectBed -a reads.sorted.bed -b refseq.bed > reads.intersect.refseq.bed + + (2) "One-liner. + $ samtools view reads.sorted.bam | samToBed.py -s stdin -t all | \ + intersectBed -a stdin -b refseq.bed > reads.intersect.refseq.bed + ''' @@ -35,15 +50,17 @@ class Usage(Exception): def processSAM(file, alignType): """ - Load a SAM file and convert each line to BED format. - - We avoid readlines() in this case, as SAM files can - be HUGE, and thus loading it into memory could be painful. - """ - for line in open(file,'r'): - samLine = splitLine(line.strip()) - makeBED(samLine, alignType) - f.close() + Read a SAM file (or stdin) and convert each line to BED format. + """ + if (file != "stdin"): + for line in open(file,'r'): + samLine = splitLine(line.strip()) + makeBED(samLine, alignType) + f.close() + else: + for line in sys.stdin: + samLine = splitLine(line.strip()) + makeBED(samLine, alignType) def makeBED(samFields, aType): @@ -116,17 +133,18 @@ def main(argv=None): samFile = value if option in ("-t", "--type"): aType = value - - try: - f = open(samFile, 'r') - except IOError, msg: - raise Usage(help_message) - + + if (samFile != "stdin"): + try: + f = open(samFile, 'r') + except IOError, msg: + raise Usage(help_message) + except Usage, err: - print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, str(err.msg) return 2 - # make a BED file of the SAM file. + processSAM(samFile, aType) if __name__ == "__main__": diff --git a/src/utils/version/version.h b/src/utils/version/version.h index a06efc6f..c8144615 100644 --- a/src/utils/version/version.h +++ b/src/utils/version/version.h @@ -3,6 +3,6 @@ // define the version. All tools in the // suite carry the same version number. -#define VERSION "2.2.4" +#define VERSION "2.3.0" #endif /* VERSION_H */ -- GitLab