Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ESB
ONT_pilot_gitlab
Commits
778fc6a9
Commit
778fc6a9
authored
Nov 03, 2020
by
Valentina Galata
Browse files
config: updated GDB and Zymo
parent
f54b9e60
Changes
6
Hide whitespace changes
Inline
Side-by-side
config/GDB/config.yaml
View file @
778fc6a9
...
...
@@ -3,9 +3,9 @@
# Steps to be done
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps
:
[
"
preprocessing"
,
"
assembly"
,
"
mapping"
]
steps_annotation
:
[
"
rgi"
,
"
plasflow"
,
"
minced"
,
"
barrnap"
]
# prodigal is run in any case
steps_analysis
:
[
"
quast"
,
"
cdhit"
,
"
diamond"
,
"
mash"
]
steps
:
[
"
preprocessing"
,
"
assembly"
,
"
mapping"
,
"
annotation"
,
"
analysis"
]
steps_annotation
:
[
"
rgi"
,
"
plasflow"
,
"
minced"
,
"
barrnap"
,
"
kegg"
]
# prodigal is run in any case
steps_analysis
:
[
"
quast"
,
"
mash"
,
"
mashmap"
,
"
fastani"
,
"
mummer"
,
"
cdhit"
,
"
diamond"
]
steps_taxonomy
:
[
"
kraken2"
,
"
kaiju"
]
############################################################
...
...
@@ -183,6 +183,11 @@ bbmap:
# key: url of GZ archive
GCF_000001405.38_GRCh38.p12
:
"
ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz"
# HMMs
hmm
:
threads
:
10
kegg
:
"
KO_cdhitGe10000_160314.hmm"
# Assembly quality
# https://github.com/ablab/quast
quast
:
...
...
config/GDB/sbatch.sh
View file @
778fc6a9
...
...
@@ -15,7 +15,8 @@ SMK_ENV="/scratch/users/vgalata/miniconda3/ONT_pilot" # CHANGE as needed
SMK_CONFIG
=
"config/GDB/config.yaml"
SMK_SLURM
=
"config/GDB/slurm.yaml"
# slurm cluster call
SMK_CLUSTER
=
"sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes} --ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
SMK_CLUSTER
=
"sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes}
\
--ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
conda activate
${
SMK_ENV
}
&&
\
snakemake
-s
workflow/Snakefile
-rp
--jobs
7
--local-cores
1
\
...
...
config/GDB/slurm.yaml
View file @
778fc6a9
...
...
@@ -56,7 +56,7 @@ assembly_hy_metaspades:
partition
:
"
bigmem"
assembly_hy_operams
:
time
:
"
00-1
2
:00:00"
time
:
"
00-1
6
:00:00"
partition
:
"
bigmem"
# Assembly polishing
...
...
@@ -95,7 +95,7 @@ annotation_prodigal:
partition
:
"
batch"
annotation_hmm_kegg
:
time
:
"
00-
6
:00:00"
time
:
"
00-
8
:00:00"
partition
:
"
batch"
annotation_plasflow
:
...
...
config/Zymo/config.yaml
100755 → 100644
View file @
778fc6a9
...
...
@@ -4,8 +4,8 @@
# Steps to be done
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps
:
[
"
preprocessing"
,
"
assembly"
,
"
mapping"
,
"
annotation"
,
"
analysis"
,
"
taxonomy"
]
steps_annotation
:
[
"
diamond"
,
"
rgi"
,
"
plasflow"
,
"
minced"
,
"
barrnap"
]
# prodigal is run in any case
steps_analysis
:
[
"
quast"
,
"
cdhit
"
,
"
mash
_dist
"
]
steps_annotation
:
[
"
rgi"
,
"
plasflow"
,
"
minced"
,
"
barrnap"
,
"
kegg"
]
# prodigal is run in any case
steps_analysis
:
[
"
quast"
,
"
mash
"
,
"
mash
map"
,
"
fastani"
,
"
mummer"
,
"
cdhit"
,
"
diamond
"
]
steps_taxonomy
:
[
"
kraken2"
,
"
kaiju"
]
############################################################
...
...
@@ -16,8 +16,8 @@ work_dir: "/scratch/users/vgalata/Zymo"
# Paths WITHIN the working directory
# directory containing required DBs (should be writeable)
db_dir
:
"
db
s"
# results directory
db_dir
:
"
/mnt/lscratch/users/vgalata/ONT_pilot_DB
s"
# results directory
(will be created in work_dir)
results_dir
:
"
results"
# Data paths: Use absolute paths or paths relative to the working directory !!!
...
...
@@ -49,9 +49,7 @@ data:
##############################
# Preprocessing
# TODO: installation ???
# Preprocessing: LR: Basecalling
# XXX
# https://community.nanoporetech.com/protocols/Guppy-protocol/v/GPB_2003_v1_revT_14Dec2018
guppy
:
config
:
"
dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
gpu
:
...
...
@@ -66,49 +64,31 @@ guppy:
gpu_device
:
"
cuda:0"
threads
:
20
# Preprocessing: SR
# https://github.com/OpenGene/fastp
fastp
:
threads
:
10
min_length
:
40
# FastQ QC
# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
fastqc
:
threads
:
10
params
:
"
-q
-f
fastq"
##############################
# Assembly
# List of assemblers for different read types: assembler names MUST be UNIQUE
# Allowed values:
# SR: megahit, metaspades
# Hy: metaspadeshybrid (metaspades w/ LR), operams
# LR: flye, wtdbg2, canu
# HyHy: imp3 (IMP3 assembly using LR and SR from metaT and metaG), assembly is not part of the pipeline
# Polishing w/ SR: suffix "_sr" for hybrid and LR assemblers
assemblers
:
sr
:
[
"
megahit"
,
"
metaspades"
]
lr
:
[
"
flye"
,
"
wtdbg2"
,
"
flye_sr"
,
"
wtdbg2_sr"
]
hy
:
[
"
metaspadeshybrid"
,
"
operams"
,
"
metaspadeshybrid_sr"
,
"
operams_sr"
]
hyhy
:
[]
lr
:
[
"
flye"
,
"
canu"
]
hy
:
[
"
metaspadeshybrid"
,
"
operamsmegahit"
,
"
operamsmetaspades"
]
# https://github.com/fenderglass/Flye
flye
:
threads
:
10
genome_size
:
"
1g"
# https://github.com/ruanjue/wtdbg2
wtdbg2
:
threads
:
10
bin
:
"
/scratch/users/sbusi/tools/wtdbg2/"
genome_size
:
"
1g"
# https://canu.readthedocs.io/en/latest/
canu
:
threads
:
24
# mem: "64g"
genome_size
:
"
1g"
# https://github.com/ablab/spades
...
...
@@ -119,14 +99,12 @@ metaspades:
megahit
:
threads
:
10
# TODO: installation
# https://github.com/CSB5/OPERA-MS
operams
:
threads
:
10
bin
:
"
/home/users/sbusi/apps/miniconda3/envs/operams/OPERA-MS/OPERA-MS.pl"
##############################
#
Long-read a
ssembly polishing
#
A
ssembly polishing
# https://nanoporetech.github.io/medaka/index.html
medaka
:
...
...
@@ -140,14 +118,10 @@ racon:
##############################
# Mapping
# Mapper
# http://bio-bwa.sourceforge.net/
bwa
:
threads
:
10
long_reads_index
:
opts
:
"
-aY
-A
5
-B
11
-O
2,1
-E
4,3
-k
8
-W
16
-w
40
-r
1
-D
0
-y
20
-L
30,30
-T
2.5"
# SAM utils
# http://www.htslib.org/doc/samtools.html
samtools
:
sort
:
...
...
@@ -157,35 +131,28 @@ samtools:
##############################
# Annotation
# Sequence search
# https://github.com/bbuchfink/diamond
diamond
:
threads
:
20
db
:
"
/work/projects/ecosystem_biology/local_tools/databases/
nr_uniprot_trembl.dmnd"
#
TODO: data download
db
:
"
nr_uniprot_trembl.dmnd"
#
file name in "dbs" folder
# CRISPR
# https://github.com/dnasko/CASC
casc
:
threads
:
10
# CRISPR
# https://github.com/ctSkennerton/minced
# minced:
# Plasmid prediction
# https://github.com/smaegol/PlasFlow
plasflow
:
threshold
:
0.7
# class. prob. threshold
minlen
:
1000
# rm contigs with length below this threshold
#
plasflow:
#
threshold: 0.7 # class. prob. threshold
#
minlen: 1000 # rm contigs with length below this threshold
# AMR prediction
# https://github.com/arpcard/rgi
rgi
:
threads
:
5
db_url
:
"
https://card.mcmaster.ca/latest/data"
alignment_tool
:
"
DIAMOND"
# rRNA genes prediction
# https://github.com/tseemann/barrnap
barrnap
:
threads
:
5
...
...
@@ -202,42 +169,39 @@ cdhit:
# https://github.com/BioInfoTools/BBMap/
bbmap
:
threads
:
10
# References to be used (w/ md5sums)
rrna_refs
:
[
# c0cd2aa2e84e3e3977859c34feb63cd5 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5.8s-database-id98.fasta
# 703e4c270ab0a578deb4800c33b36367 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5s-database-id98.fasta
# 8b4e6c6f17f6f35444a60fdc915e052c /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-16s-id95.fasta
# ca4edcdddb98d7868f93e2308e297704 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-23s-id98.fasta
# db6e72022cf650c4b33bd888b92a0391 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-16s-id90.fasta
# f347d2f8f8ffbfa28c785e3a9fe3db79 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-23s-id98.fasta
# 878a413765d09c3ec75409fb1d1573f1 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-18s-id95.fasta
# cbb973e63f52981bd591de0404df5839 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-28s-id98.fast
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5.8s-database-id98.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5s-database-id98.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-16s-id95.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-23s-id98.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-16s-id90.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-23s-id98.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-18s-id95.fasta"
,
"
/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-28s-id98.fasta"
rrna_refs
:
[
# file names in "dbs" folder
"
sortmerna/rfam-5.8s-database-id98.fasta"
,
"
sortmerna/rfam-5s-database-id98.fasta"
,
"
sortmerna/silva-arc-16s-id95.fasta"
,
"
sortmerna/silva-arc-23s-id98.fasta"
,
"
sortmerna/silva-bac-16s-id90.fasta"
,
"
sortmerna/silva-bac-23s-id98.fasta"
,
"
sortmerna/silva-euk-18s-id95.fasta"
,
"
sortmerna/silva-euk-28s-id98.fasta"
]
host_refs
:
null
# HMMs
hmm
:
threads
:
10
kegg
:
"
KO_cdhitGe10000_160314.hmm"
# Assembly quality
# https://github.com/ablab/quast
quast
:
threads
:
10
#
Sequence search and clustering
# https://github.com/soedinglab/MMseqs2
# mmseqs2:
# threads: 30
# createdb: "--dbtype 2 --shuffle -v"
# easycluster: "--kmer-per-seq-scale 0.5 --cov-mode 0 -c 0.5 --min-seq-id 0.9"
# easylinclust: "--kmer-per-seq-scale 0.5 --cov-mode 0 -c 0.5 --min-seq-id 0.9"
# path: "/home/users/sbusi/apps/mmseqs/bin"
# createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
# rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
# convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
#
https://github.com/marbl/mash
mash
:
threads
:
10
# https://github.com/marbl/MashMap
mashmap
:
threads
:
10
# https://github.com/ParBLiSS/FastANI
fastani
:
threads
:
10
##############################
# Taxonomy
...
...
@@ -246,49 +210,19 @@ quast:
# https://github.com/DerrickWood/kraken2
kraken2
:
threads
:
10
db
:
maxikraken
:
"
/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
class
:
sr
:
"
--gzip-compressed
--paired"
lr
:
"
"
contigs
:
"
"
db
:
# dir. name in "dbs" folder
maxikraken
:
"
maxikraken2_1903_140GB"
# http://kaiju.binf.ku.dk/
# http://kaiju.binf.ku.dk/server
# https://github.com/bioinformatics-centre/kaiju
kaiju
:
threads
:
10
db
:
# key = basename of *.fmi
kaiju_db_nr_euk
:
"
/mnt/isilon/projects/ecosystem_biology/databases/kaiju/kaiju_db_nr_euk_2020-05-25"
db
:
# dir. name in "dbs" folder
# key = basename of *.fmi
kaiju_db_nr_euk
:
"
kaiju_db_nr_euk_2020-05-25"
ranks
:
[
"
phylum"
,
"
class"
,
"
order"
,
"
family"
,
"
genus"
,
"
species"
]
# # XXX
# GTDBTK:
# DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
##############################
# MISC
# https://github.com/marbl/mash
mash
:
threads
:
10
##############################
# Binning
# DAS_Tool:
# path: "/home/users/sbusi/apps/DAS_Tool-master"
# bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# db: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# # Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# # dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
##############################
# ???
# nonpareil:
# memory: 4096
# threads: 14
# rebaler:
# threads: 28
# https://github.com/Ecogenomics/GTDBTk
GTDBTK
:
# dir. name in "dbs" folder
DATA
:
"
gtdbtk_release89"
config/Zymo/sbatch.fast5.sh
View file @
778fc6a9
...
...
@@ -15,7 +15,8 @@ SMK_ENV="/scratch/users/vgalata/miniconda3/ONT_pilot" # CHANGE as needed
SMK_CONFIG
=
"config/Zymo/config.fast5.yaml"
SMK_SLURM
=
"config/Zymo/slurm.fast5.yaml"
# slurm cluster call
SMK_CLUSTER
=
"sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes} --ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
SMK_CLUSTER
=
"sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes}
\
--ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
conda activate
${
SMK_ENV
}
&&
\
snakemake
-s
workflow/Snakefile
-rp
--jobs
7
--local-cores
1
\
...
...
config/Zymo/sbatch.sh
View file @
778fc6a9
...
...
@@ -15,7 +15,8 @@ SMK_ENV="/scratch/users/vgalata/miniconda3/ONT_pilot" # CHANGE as needed
SMK_CONFIG
=
"config/Zymo/config.yaml"
SMK_SLURM
=
"config/Zymo/slurm.yaml"
# slurm cluster call
SMK_CLUSTER
=
"sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes} --ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
SMK_CLUSTER
=
"sbatch --partition {cluster.partition} {cluster.qos} {cluster.explicit} --nodes {cluster.nodes}
\
--ntasks {cluster.ntasks} --cpus-per-task {threads} --time {cluster.time} --job-name={cluster.job-name}"
conda activate
${
SMK_ENV
}
&&
\
snakemake
-s
workflow/Snakefile
-rp
--jobs
7
--local-cores
1
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment