Commit 7ccc3d89 authored by Valentina Galata's avatar Valentina Galata
Browse files

added configs for GDB (same as the ones in config/); mv sbatch.sh to config/ (issue #45)

parent afd8885b
############################################################
# STEPS
# Pipeline steps
# NOTE: no binning and taxonomic analysis
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis"]
steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis"]
# NOTE: currently not used
# Annotation sub-steps
# annotation_steps: ["plasmids", "crispr", "amr"]
# NOTE: currently not used
# Analysis sub-steps
# analysis_steps: ["quast", "prodigal", "cdhit", "mmseqs2"]
############################################################
# INPUT
# working directory: will contain the results
# work_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB"
work_dir: "/scratch/users/vgalata/ont_pilot"
# Paths WITHIN the working directory
# directory containing required DBs
db_dir: "dbs"
# results directory
results_dir: "results"
# Data paths: Use absolute paths or paths relative to the working directory !!!
data:
# Meta-genomics
metag:
sr:
r1: "data/raw/short_reads/ONT3_MG_xx_Rashi_S11_R1_001.fastq.gz"
r2: "data/raw/short_reads/ONT3_MG_xx_Rashi_S11_R2_001.fastq.gz"
ont:
# List of directories containing FAST5 files
dirs: ["data/multifast5"]
# List of FAST5 files
files: []
# Meta-transcriptomics
metat:
sr:
r1: "data/metaT/FastSelectFull1_MT_Rashi_S14_R1_001.fastq.gz" # leave empty if no data, i.e. ""
r2: "data/metaT/FastSelectFull1_MT_Rashi_S14_R2_001.fastq.gz" # leave empty if no data, i.e. ""
# Meta-proteomics
metap:
# TODO
# binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
############################################################
# TOOLS
##############################
# Preprocessing
# TODO: installation
# Preprocessing: LR: Basecalling
# XXX
guppy:
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
gpu:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
version: "3.6.0+98ff765"
records_per_fastq: 8000
chunk_size: 1000
chunks_per_runner: 1000
num_callers: 4
runners_per_device: 2
gpu_device: "cuda:0"
threads: 20
# Preprocessing: SR
# XXX
fastp:
threads: 10
min_length: 40
# FastQ QC
# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
fastqc:
threads: 10
params: "-q -f fastq"
##############################
# Assembly
# List of assemblers for different read types
assemblers:
sr: ["megahit", "metaspades"]
lr: ["flye"]
hy: ["metaspadeshybrid", "operams"]
# XXX
flye:
threads: 10
genome_size: "1g"
# XXX
metaspades:
threads: 10
# XXX
megahit:
threads: 10
# TODO: installation
# https://github.com/CSB5/OPERA-MS
operams:
threads: 10
bin: "/home/users/sbusi/apps/miniconda3/envs/operams/OPERA-MS/OPERA-MS.pl"
##############################
# Long-read assembly polishing
# XXX
medaka:
threads: 10 # do NOT set to large value (e.g. using 30 did not work)
model: r941_min_high # the MinION model, high accuarcy
# XXX
racon:
threads: 30
##############################
# Mapping
# Mapper
# http://bio-bwa.sourceforge.net/
bwa:
threads: 10
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
# SAM utils
# http://www.htslib.org/doc/samtools.html
samtools:
sort:
# threads: 10
chunk_size: "4G"
view:
# threads: 10
##############################
# Annotation
# TODO: data download
# Sequence search
# XXX
diamond:
threads: 20
#db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd"
# CRISPR
# https://github.com/dnasko/CASC
casc:
threads: 10
# path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/bin"
# perl5lib: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/lib/site_perl"
# CRISPR
# https://github.com/ctSkennerton/minced
minced:
# path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/minced/"
# Plasmid prediction
# https://github.com/smaegol/PlasFlow
plasflow:
threshold: 0.7 # class. prob. threshold
minlen: 1000 # rm contigs with length below this threshold
# AMR prediction
# https://github.com/arpcard/rgi
rgi:
threads: 5
db_url: "https://card.mcmaster.ca/latest/data"
alignment_tool: "DIAMOND" # DIAMOND or BLAST
##############################
# Analysis
# XXX
bbmap:
threads: 10
# Assembly quality
# XXX
quast:
threads: 10
# Sequence search and clustering
# https://github.com/soedinglab/MMseqs2
mmseqs2:
threads: 30
# path: "/home/users/sbusi/apps/mmseqs/bin"
# createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
# rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
# convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
# Seq. alignment
# https://mummer4.github.io/
mummer:
archive: "https://github.com/mummer4/mummer/releases/download/v3.9.4alpha/mummer-3.9.4alpha.tar.gz"
##############################
# Taxonomy
# XXX
# kraken2:
# db: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
# # XXX
# GTDBTK:
# DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
##############################
# Binning
# DAS_Tool:
# path: "/home/users/sbusi/apps/DAS_Tool-master"
# bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# db: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# # Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# # dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
##############################
# ???
# nonpareil:
# memory: 4096
# threads: 14
# rebaler:
# threads: 28
\ No newline at end of file
__default__:
time: "0-02:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
job-name: "ONT_pilot.{rule}"
# output: "slurm-%j.%N-%x.out"
# error: "slurm-%j.%N-%x.err"
# mail-type: "end"
# Preprocessing
guppy_gpu_basecalling:
time: "01-00:00:00"
partition: "gpu"
qos: "qos-gpu"
nodes: 1
n: 1
explicit: "--gres=gpu:1"
fastp_sr:
time: "00-01:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
# Assembly
assembly_lr_flye:
time: "00-8:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_sr_megahit:
time: "01-4:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_sr_metaspades:
time: "01-8:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_hy_metaspades:
time: "01-12:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_hy_operams:
time: "00-4:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
# Mapping
bwa_index_assembly:
time: "00-02:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
bwa_mem_assembly_sr:
time: "00-02:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
bwa_mem_assembly_lr:
time: "00-06:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
bwa_mem_assembly_hy:
time: "00-02:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
# Assembly polishing
bwa_index_assembly_polishing:
time: "00-02:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
bwa_mem_assembly_polishing:
time: "00-06:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
polishing_lr_racon:
time: "00-04:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
polishing_lr_medaka:
time: "00-12:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
# Annotation
annotation_prodigal:
time: "01-4:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
annotation_diamond_daa:
time: "01-12:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
annotation_plasflow:
time: "00-01:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
# "mmseq2_compare":
# {
# "n": 1,
# "ncpus": 12,
# "time": "00-04:00:00",
# "partition": "bigmem",
# "qos": "qos-bigmem",
# "mail-type": "end"
# },
# "run_nonpareil_on_short_reads":
# {
# "time": "00-04:00:00",
# "n": 1,
# "ncpus": 14,
# "partition": "bigmem",
# "qos": "qos-bigmem",
# "mail-type": "ALL"
# },
# "ANNOTATE":
# {
# "time": "00-00:01:00",
# "n": 1,
# "ncpus": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "mail-type": "ALL"
# },
# "COVERAGE_OF_REFERENCES":
# {
# "time": "00-00:01:00",
# "n": 1,
# "ncpus": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "mail-type": "ALL"
# },
# "ASSEMBLE_AND_COVERAGE":
# {
# "time": "00-00:01:00",
# "n": 1,
# "ncpus": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "mail-type": "ALL"
# },
# "run_rebaler_on_short_read_contigs":
# {
# "time": "00-04:00:00",
# "n": 1,
# "ncpus": 28,
# "partition": "batch",
# "qos": "qos-batch"
# },
# "maxbin2":
# {
# "time": "0-10:00:00",
# "n": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "ncpus": 24
# },
# "metabat2":
# {
# "time": "0-12:00:00",
# "n": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "ncpus": 24
# },
# "scaffold_list":
# {
# "time": "0-02:00:00",
# "n": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "ncpus": 2
# },
# "DAS_Tool":
# {
# "time": "0-6:00:00",
# "n": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "ncpus": 24
# },
# "gtdbtk":
# {
# "time": "1-00:00:00",
# "n": 1,
# "partition": "bigmem",
# "qos": "qos-bigmem",
# "ncpus": 8
# },
# "checkm":
# {
# "time": "1-00:00:00",
# "n": 1,
# "partition": "batch",
# "qos": "qos-batch",
# "ncpus": 24
# }
# }
#!/bin/bash -l
##############################
# SLURM
# NOTE: used for this script only, NOT for the snakemake call below
#SBATCH -J ONT_pilot
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH --time=2-00:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
##############################
# SNAKEMAKE
# conda env name
ONTP_ENV="ONT_pilot"
# number of cores for snakemake
ONTP_CORES=60
# snakemake file
ONTP_SMK="workflow/Snakefile"
# config file
ONTP_CONFIG="config/config.yaml" # USER INPUT REQUIRED
# slurm config file
ONTP_SLURM="config/slurm.yaml"
# slurm cluster call
ONTP_CLUSTER="-p {cluster.partition} -q {cluster.qos} {cluster.explicit} -N {cluster.nodes} -n {cluster.n} -c {threads} -t {cluster.time} --job-name={cluster.job-name}"
##############################
# IMP
# activate the env
conda activate ${ONTP_ENV}
# run the pipeline
snakemake -s ${ONTP_SMK} -rp --cores ${ONTP_CORES} --configfile ${ONTP_CONFIG} \
--use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "sbatch ${ONTP_CLUSTER}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment