Commit fd8b873f authored by Valentina Galata's avatar Valentina Galata
Browse files

config: updated aquifer

parent 69380046
work_dir: "/scratch/users/vgalata/Aquifer"
work_dir: "/scratch/users/vgalata/aquifer"
# input/output (can be relative to work_dir)
single_fast5_dir: "/scratch/users/sbusi/aquifer/" # leave empty, i.e. "", if input are multi-FAST5 files
......
......@@ -3,16 +3,16 @@
# Steps to be done
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps: ["assembly"]
steps_annotation: ["rgi", "plasflow", "minced", "barrnap", "kegg"] # prodigal is run in any case
steps_analysis: ["quast", "mash", "mashmap", "fastani", "mummer", "cdhit", "diamond", "cov"]
steps: ["preprocessing", "assembly"]
steps_annotation: ["rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case, no kegg
steps_analysis: ["quast", "mash", "mashmap", "fastani", "cdhit", "diamond"] # no mummer, cov
steps_taxonomy: ["kraken2", "kaiju"]
############################################################
# INPUT
# working directory: will contain the results (should be writeable)
work_dir: "/scratch/users/vgalata/Aquifer"
work_dir: "/scratch/users/sbusi/aquifer"
# Paths WITHIN the working directory
# directory containing required DBs (should be writeable)
......@@ -28,7 +28,7 @@ data:
r1: "/mnt/isilon/projects/ecosystem_biology/ONT_pilot/external_data/aquifer/sr/ERR3654041/ERR3654041_1.fastq.gz"
r2: "/mnt/isilon/projects/ecosystem_biology/ONT_pilot/external_data/aquifer/sr/ERR3654041/ERR3654041_2.fastq.gz"
ont:
fastq: "/scratch/users/vgalata/Aquifer/basecalling/lr.fastq.gz"
fastq: "/scratch/users/vgalata/aquifer/basecalling/lr.fastq.gz"
# Meta-transcriptomics
metat:
sr:
......@@ -59,8 +59,8 @@ fastqc:
# List of assemblers for different read types: assembler names MUST be UNIQUE
assemblers:
sr: ["megahit", "metaspades"]
lr: ["flye", "canu"]
hy: ["metaspadeshybrid", "operamsmegahit", "operamsmetaspades"]
lr: ["flye", "raven"]
hy: ["metaspadeshybrid"] # excluded: "operamsmegahit", "operamsmetaspades"
# https://github.com/fenderglass/Flye
flye:
......@@ -71,6 +71,10 @@ canu:
threads: 24
genome_size: "1g"
# https://github.com/lbcb-sci/raven
raven:
threads: 10
# https://github.com/ablab/spades
metaspades:
threads: 10
......@@ -81,7 +85,7 @@ megahit:
# https://github.com/CSB5/OPERA-MS
operams:
threads: 10
threads: 12
##############################
# Assembly polishing
......
......@@ -12,8 +12,8 @@
# conda env name or path
SMK_ENV="/scratch/users/vgalata/miniconda3/ONT_pilot" # CHANGE as needed
# config files
SMK_CONFIG="config/Aquifer/config.fast5.yaml"
SMK_SLURM="config/Aquifer/slurm.fast5.yaml"
SMK_CONFIG="config/aquifer/config.fast5.yaml"
SMK_SLURM="config/aquifer/slurm.fast5.yaml"
# slurm cluster call
SMK_CLUSTER="sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes} \
--ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
......
......@@ -10,15 +10,16 @@
#SBATCH -q long
# conda env name or path
SMK_ENV="/scratch/users/vgalata/miniconda3/ONT_pilot" # CHANGE as needed
SMK_ENV=$1
SMK_CONDA="/scratch/users/vgalata/miniconda3/ONT_pilot" # CHANGE as needed
# config files
SMK_CONFIG="config/Aquifer/config.yaml"
SMK_SLURM="config/Aquifer/slurm.yaml"
SMK_CONFIG="config/aquifer/config.yaml"
SMK_SLURM="config/aquifer/slurm.yaml"
# slurm cluster call
SMK_CLUSTER="sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes} \
--ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
conda activate ${SMK_ENV} && \
snakemake -s workflow/Snakefile -rp --jobs 5 --local-cores 1 \
--configfile ${SMK_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
snakemake -s workflow/Snakefile -rp --jobs 1 --local-cores 1 --rerun-incomplete \
--configfile ${SMK_CONFIG} --use-conda --conda-prefix ${SMK_CONDA} \
--cluster-config ${SMK_SLURM} --cluster "${SMK_CLUSTER}"
......@@ -5,7 +5,7 @@ __default__:
nodes: 1
ntasks: 1
explicit: ""
job-name: "ONT_Aquifer_FAST5.{rule}"
job-name: "ONT_aquifer_FAST5.{rule}"
create_multifast5s:
time: "00-04:00:00"
......
......@@ -5,32 +5,32 @@ __default__:
nodes: 1
ntasks: 1
explicit: ""
job-name: "ONT_Aquifer.{rule}"
job-name: "ONT_aquifer.{rule}"
# Preprocessing
fastp_sr:
time: "00-01:00:00"
time: "00-00:30:00"
partition: "batch"
rm_rrna_bbmap:
time: "00-4:00:00"
time: "00-01:30:00"
partition: "batch"
rm_host_bbmap_sr_metat:
time: "00-4:00:00"
time: "00-02:00:00"
partition: "bigmem"
rm_host_bbmap_sr_metag:
time: "00-4:00:00"
time: "00-02:00:00"
partition: "bigmem"
rm_host_bbmap_lr_metag:
time: "00-4:00:00"
time: "00-03:00:00"
partition: "bigmem"
# Assembly
assembly_lr_flye:
time: "00-8:00:00"
time: "00-14:00:00"
partition: "bigmem"
assembly_lr_canu:
......@@ -38,85 +38,150 @@ assembly_lr_canu:
partition: "bigmem"
qos: "--qos long"
assembly_lr_raven:
time: "00-08:00:00"
partition: "bigmem"
assembly_sr_megahit:
time: "01-4:00:00"
time: "00-08:00:00"
partition: "bigmem"
assembly_sr_metaspades:
time: "01-8:00:00"
time: "01-00:00:00"
partition: "bigmem"
assembly_hy_metaspades:
time: "01-12:00:00"
time: "01-00:00:00"
partition: "bigmem"
assembly_hy_operams:
time: "00-16:00:00"
time: "08-00:00:00"
partition: "bigmem"
qos: "--qos long"
# Assembly polishing
polishing_racon_lr:
time: "00-6:00:00"
time: "00-06:00:00"
partition: "bigmem"
polishing_racon_sr:
time: "00-6:00:00"
time: "00-06:00:00"
partition: "bigmem"
polishing_medaka_lr:
time: "01-00:00:00"
time: "01-12:00:00"
partition: "bigmem"
# Mapping
mapping_bwa_idx_asm:
time: "00-02:00:00"
time: "00-01:00:00"
partition: "batch"
mapping_bwa_mem_asm_sr:
time: "00-02:00:00"
partition: "batch"
time: "00-06:00:00"
partition: "bigmem"
mapping_bwa_mem_asm_lr:
time: "00-12:00:00"
partition: "batch"
partition: "bigmem"
mapping_bwa_mem_asm_hy:
time: "00-02:00:00"
time: "00-04:00:00"
partition: "batch"
# Annotation
annotation_prodigal:
time: "00-2:00:00"
time: "00-03:00:00"
partition: "batch"
annotation_hmm_kegg:
time: "00-8:00:00"
annotation_rgi:
time: "00-02:00:00"
partition: "batch"
annotation_minced:
time: "00-00:40:00"
partition: "batch"
annotation_plasflow:
time: "00-01:00:00"
partition: "bigmem"
annotation_barrnap:
time: "00-00:40:00"
partition: "batch"
annotation_hmm_kegg:
time: "00-12:00:00"
partition: "bigmem"
# Analysis
analysis_mashmap_one2one:
analysis_quast:
time: "00-00:20:00"
partition: "batch"
analysis_mash_sketch_sr:
time: "00-01:00:00"
partition: "batch"
analysis_mash_sketch_lr:
time: "00-01:00:00"
partition: "batch"
analysis_mash_reads:
time: "00-00:30:00"
partition: "batch"
analysis_mash_sketch_asm:
time: "00-00:20:00"
partition: "batch"
analysis_mash_asm:
time: "00-00:20:00"
partition: "batch"
analysis_mashmap_one2one:
time: "00-02:00:00"
partition: "batch"
analysis_fastani_one2one:
time: "00-02:00:00"
partition: "batch"
analysis_mummer_dnadiff:
time: "00-04:00:00"
partition: "bigmem"
analysis_cdhit:
time: "00-02:00:00"
partition: "batch"
analysis_diamond:
time: "00-02:00:00"
partition: "batch"
analysis_diamond_db:
time: "00-6:00:00"
time: "00-12:00:00"
partition: "batch"
analysis_genomecov_pergene:
time: "01-00:00:00"
partition: "batch"
analysis_genomecov_segmentation:
time: "00-12:00:00"
partition: "bigmem"
# Taxonomy
tax_kraken2_contigs:
time: "00-00:30:00"
partition: "bigmem"
tax_kraken2_sr:
time: "00-00:30:00"
time: "00-01:00:00"
partition: "bigmem"
tax_kraken2_lr:
time: "00-00:30:00"
time: "00-01:00:00"
partition: "bigmem"
tax_kaiju:
......@@ -124,5 +189,5 @@ tax_kaiju:
partition: "bigmem"
tax_kaiju_summary:
time: "00-00:10:00"
time: "00-00:30:00"
partition: "batch"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment