#this script gets all genes with a function of interest as best annotation
#### and creates a number of plots which show from which genomes this function is expressed in the different samples and on the different omic levels
#### it also returns a workspace with the data
# it takes 3 ARGUMENTs when called: the function of interest, which mOTU annotation (best hit out of the mOTUs found at reads level ("mOTUpresent") or
#### of all ("mOTUbest")) to use and whether to order the plots by whether the donors of the samples have T1DM ("T1DM") or belong to a group defined in another file ("BG")
# the name of the function of interest is used to create a directory which houses all the output plots
# the script is constructed in two parts: the part that accesses the database and the part that makes the plots. The plotting needs a lot of additional informations
### and also the script 140510_heatmap2.R. The database access is found in lines 14-15, and 34-76. The rest is plotting.
# written by Anna Heintz-Buschart, this version is from October 2015
# this script takes a fasta file with contigs and the output of one Barrnap run to produce a fasta file with rRNA sequences and a .tab file in the style of prodigal
# 4 inputs: - the fasta file of the contigs
# - the .gff file from Barrnap
# - the name of the .tab output
# - the name of the fasta output
# 2 outputs: - a fasta file with the rRNA gene sequences, naming is the contig name appended with _r and a continuous number per contig
# - a table with contig name rRNA gene name (see above), the sense, length, start position, end position, completeness and kind (16S, 5S etc)
# Anna Heintz-Buschart, November 2014
usestrict;
useBio::DB::Fasta;
my$fastaFile=shift;
my$bacFile=shift;
my$listFile=shift;
my$geneFile=shift;
my%allContigs=();
my$db=Bio::DB::Fasta->new($fastaFile);
open(IN,$bacFile);
open(GEN,">",$geneFile)ordie"cannot open $geneFile\n";
open(TAB,">",$listFile)ordie"cannot open $listFile\n";
# this file uses the NCBI taxonomy to return the species, genus, family, order, class, phylum and kingdom of a taxon
# this script uses the NCBI taxonomy to return the species, genus, family, order, class, phylum and kingdom of a taxon
# for multiple taxa a least common ancestor is returned
# 4 inputs: - the path to a folder containing the NCBI taxdump
# - a file with multiple annotations for the same gene
...
...
@@ -8,7 +8,7 @@
# - the name of the output
# 1 output - a table for all genes in the same order as in the two input files with LCA, species, genus, family, order, class, phylum, kingdom (tab separated)
# the code is mostly borrowed from ???, with the exception of the assembly of the output table; Anna Heintz-Buschart, April 2014
# the code is mostly borrowed from Romain Studer (http://evosite3d.blogspot.de/2013/06/browsing-ncbi-taxonomy-with-python.html), with the exception of the assembly of the output table; Anna Heintz-Buschart, April 2014