Commit 6867908f authored by Valentina Galata's avatar Valentina Galata
Browse files

config: updated GDB

parent 91819cf9
......@@ -3,9 +3,9 @@
# Steps to be done
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps: ["preprocessing"]
steps_annotation: ["diamond", "rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case
steps_analysis: ["quast", "cdhit", "mash_dist"]
steps: ["preprocessing", "assembly", "polishing"]
steps_annotation: ["rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case
steps_analysis: ["quast", "cdhit", "diamond", "mash"]
steps_taxonomy: ["kraken2", "kaiju"]
############################################################
......@@ -49,9 +49,7 @@ data:
##############################
# Preprocessing
# TODO: installation ???
# Preprocessing: LR: Basecalling
# XXX
# https://community.nanoporetech.com/protocols/Guppy-protocol/v/GPB_2003_v1_revT_14Dec2018
guppy:
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
gpu:
......@@ -66,49 +64,31 @@ guppy:
gpu_device: "cuda:0"
threads: 20
# Preprocessing: SR
# https://github.com/OpenGene/fastp
fastp:
threads: 10
min_length: 40
# FastQ QC
# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
fastqc:
threads: 10
params: "-q -f fastq"
##############################
# Assembly
# List of assemblers for different read types: assembler names MUST be UNIQUE
# Allowed values:
# SR: megahit, metaspades
# Hy: metaspadeshybrid (metaspades w/ LR), operams
# LR: flye, wtdbg2, canu
# HyHy: imp3 (IMP3 assembly using LR and SR from metaT and metaG), assembly is not part of the pipeline
# Polishing w/ SR: suffix "_sr" for hybrid and LR assemblers
assemblers:
sr: ["megahit", "metaspades"]
lr: ["flye", "wtdbg2", "canu", "flye_sr", "wtdbg2_sr", "canu_sr"]
hy: ["metaspadeshybrid", "operams", "metaspadeshybrid_sr", "operams_sr"]
hyhy: ["imp3"]
lr: ["flye", "canu"]
hy: ["metaspadeshybrid", "operamsmegahit", "operamsmetaspades"]
# https://github.com/fenderglass/Flye
flye:
threads: 10
genome_size: "1g"
# https://github.com/ruanjue/wtdbg2
wtdbg2:
threads: 10
bin: "/scratch/users/sbusi/tools/wtdbg2/"
genome_size: "1g"
# https://canu.readthedocs.io/en/latest/
canu:
threads: 24
# mem: "64g"
genome_size: "1g"
# https://github.com/ablab/spades
......@@ -119,14 +99,12 @@ metaspades:
megahit:
threads: 10
# TODO: installation
# https://github.com/CSB5/OPERA-MS
operams:
threads: 10
bin: "/home/users/sbusi/apps/miniconda3/envs/operams/OPERA-MS/OPERA-MS.pl"
##############################
# Long-read assembly polishing
# Assembly polishing
# https://nanoporetech.github.io/medaka/index.html
medaka:
......@@ -140,14 +118,10 @@ racon:
##############################
# Mapping
# Mapper
# http://bio-bwa.sourceforge.net/
bwa:
threads: 10
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
# SAM utils
# http://www.htslib.org/doc/samtools.html
samtools:
sort:
......@@ -157,35 +131,28 @@ samtools:
##############################
# Annotation
# Sequence search
# https://github.com/bbuchfink/diamond
diamond:
threads: 20
db: "nr_uniprot_trembl.dmnd" # file name in "dbs" folder
# CRISPR
# https://github.com/dnasko/CASC
casc:
threads: 10
# CRISPR
# https://github.com/ctSkennerton/minced
# minced:
# Plasmid prediction
# https://github.com/smaegol/PlasFlow
plasflow:
threshold: 0.7 # class. prob. threshold
minlen: 1000 # rm contigs with length below this threshold
# plasflow:
# threshold: 0.7 # class. prob. threshold
# minlen: 1000 # rm contigs with length below this threshold
# AMR prediction
# https://github.com/arpcard/rgi
rgi:
threads: 5
db_url: "https://card.mcmaster.ca/latest/data"
alignment_tool: "DIAMOND"
# rRNA genes prediction
# https://github.com/tseemann/barrnap
barrnap:
threads: 5
......@@ -221,17 +188,17 @@ bbmap:
quast:
threads: 10
# Sequence search and clustering
# https://github.com/soedinglab/MMseqs2
# mmseqs2:
# threads: 30
# createdb: "--dbtype 2 --shuffle -v"
# easycluster: "--kmer-per-seq-scale 0.5 --cov-mode 0 -c 0.5 --min-seq-id 0.9"
# easylinclust: "--kmer-per-seq-scale 0.5 --cov-mode 0 -c 0.5 --min-seq-id 0.9"
# path: "/home/users/sbusi/apps/mmseqs/bin"
# createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
# rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
# convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
# https://github.com/marbl/mash
mash:
threads: 10
# https://github.com/marbl/MashMap
mashmap:
threads: 10
# https://github.com/ParBLiSS/FastANI
fastani:
threads: 10
##############################
# Taxonomy
......@@ -242,10 +209,6 @@ kraken2:
threads: 10
db: # dir. name in "dbs" folder
maxikraken: "maxikraken2_1903_140GB"
class:
sr: "--gzip-compressed --paired"
lr: ""
contigs: ""
# http://kaiju.binf.ku.dk/
# http://kaiju.binf.ku.dk/server
......@@ -257,33 +220,6 @@ kaiju:
kaiju_db_nr_euk: "kaiju_db_nr_euk_2020-05-25"
ranks: ["phylum", "class", "order", "family", "genus", "species"]
# XXX
# https://github.com/Ecogenomics/GTDBTk
GTDBTK: # dir. name in "dbs" folder
DATA: "gtdbtk_release89"
##############################
# MISC
# https://github.com/marbl/mash
mash:
threads: 10
##############################
# Binning
# DAS_Tool:
# path: "/home/users/sbusi/apps/DAS_Tool-master"
# bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# db: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# # Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# # dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
##############################
# ???
# nonpareil:
# memory: 4096
# threads: 14
# rebaler:
# threads: 28
......@@ -18,6 +18,6 @@ ONTP_SLURM="config/GDB/slurm.yaml"
ONTP_CLUSTER="sbatch -p {cluster.partition} -q {cluster.qos} {cluster.explicit} -N {cluster.nodes} -n {cluster.n} -c {threads} -t {cluster.time} --job-name={cluster.job-name}"
conda activate ${ONTP_ENV} && \
snakemake -s workflow/Snakefile -rp --jobs 10 --local-cores 1 \
snakemake -s workflow/Snakefile -rp --jobs 5 --local-cores 1 \
--configfile ${ONTP_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}"
......@@ -28,7 +28,7 @@ fastp_sr:
explicit: ""
rm_rrna_bbmap:
time: "00-6:00:00"
time: "00-4:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
......@@ -68,14 +68,6 @@ assembly_lr_flye:
n: 1
explicit: ""
assembly_lr_wtdbg2:
time: "00-8:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_lr_canu:
time: "05-00:00:00"
partition: "bigmem"
......@@ -109,17 +101,35 @@ assembly_hy_metaspades:
explicit: ""
assembly_hy_operams:
time: "00-4:00:00"
time: "00-12:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
filter_contigs:
time: "00-00:30:00"
partition: "batch"
qos: "qos-batch"
# Assembly polishing
polishing_racon_lr:
time: "00-6:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
polishing_racon_sr:
time: "00-6:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
polishing_medaka_lr:
time: "01-00:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
......@@ -173,23 +183,6 @@ mapping_bwa_mem_assembly_hyhy:
n: 1
explicit: ""
# Assembly polishing
polishing_racon_lr:
time: "00-18:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
polishing_racon_sr:
time: "00-18:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
# Annotation
annotation_prodigal:
time: "00-2:00:00"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment