Commit 726b11a8 authored by Valentina Galata's avatar Valentina Galata
Browse files

tax step: added kaiju (cdhit, issue #67), minor changes for kraken2 rules,...

tax step: added kaiju (cdhit, issue #67), minor changes for kraken2 rules, updated Snakefile and GDB configs
parent f867b451
......@@ -5,6 +5,7 @@
steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps_annotation: ["diamond", "rgi", "plasflow", "minced"] # prodigal is run in any case
steps_analysis: ["quast", "cdhit", "mash_dist"]
steps_taxonomy: ["kraken2", "kaiju"]
############################################################
# INPUT
......@@ -205,6 +206,14 @@ kraken2:
lr: ""
contigs: ""
# http://kaiju.binf.ku.dk/
# http://kaiju.binf.ku.dk/server
# https://github.com/bioinformatics-centre/kaiju
kaiju:
threads: 10
db: # key = basename of *.fmi
kaiju_db_nr_euk: "/mnt/isilon/projects/ecosystem_biology/databases/kaiju/kaiju_db_nr_euk_2020-05-25"
# # XXX
# GTDBTK:
# DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
......
......@@ -5,14 +5,14 @@
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH --time=1-00:00:00
#SBATCH --time=0-06:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
# conda env name or path
ONTP_ENV="ONT_pilot"
# number of concurrent jobs
ONTP_JOBS=20
ONTP_JOBS=30
# config files
ONTP_CONFIG="config/GDB/config.yaml"
ONTP_SLURM="config/GDB/slurm.yaml"
......
......@@ -175,7 +175,8 @@ annotation_plasflow:
n: 1
explicit: ""
kraken2_contigs:
# Taxonomy
tax_kraken2_contigs:
time: "00-02:00:00"
partition: "bigmem"
qos: "qos-bigmem"
......@@ -183,7 +184,7 @@ kraken2_contigs:
n: 1
explicit: ""
kraken2_sr:
tax_kraken2_sr:
time: "00-02:00:00"
partition: "bigmem"
qos: "qos-bigmem"
......@@ -191,10 +192,18 @@ kraken2_sr:
n: 1
explicit: ""
kraken2_lr:
tax_kraken2_lr:
time: "00-02:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
tax_kaiju_cdhit:
time: "00-01:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
\ No newline at end of file
......@@ -76,13 +76,13 @@ if "analysis" in config['steps']:
"status/analysis.done"
]
# # Taxonomy
# if "taxonomy" in config['steps']:
# include:
# "steps/taxonomy.smk"
# TARGETS += [
# "status/taxonomy.done"
# ]
# Taxonomy
if "taxonomy" in config['steps']:
include:
"steps/taxonomy.smk"
TARGETS += [
"status/taxonomy.done"
]
# No targets
if len(TARGETS) == 0:
......
# Taxonomic analysis
rule kraken2_contigs:
##################################################
# Kraken2
rule tax_kraken2_contigs:
input:
contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta"),
contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{atype}.fasta"),
db=lambda wildcards: config["kraken2"]["db"][wildcards.db]
output:
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}_{tool}_{db}_labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}_{tool}_{db}_report.txt")
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.report.txt")
log:
out="logs/kraken2.{db}.{rtype}.{tool}.out.log",
err="logs/kraken2.{db}.{rtype}.{tool}.err.log"
"logs/kraken2.{rtype}.{tool}.{atype}.{db}.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS),
atype="ASSEMBLY|ASSEMBLY.FILTERED",
db="|".join(config["kraken2"]["db"].keys())
threads:
config["kraken2"]["threads"]
......@@ -21,23 +24,22 @@ rule kraken2_contigs:
conda:
"../envs/kraken2.yaml"
message:
"Classification w/ Kraken2 ({wildcards.db})"
"Tax. classification w/ Kraken2 ({wildcards.db}, contigs)"
shell:
"(date && "
"kraken2 --db {input.db} --threads {threads} --use-names {params.params} --report {output.report} --output {output.labels} {input.contigs} && "
"date) 2> {log.err} > {log.out}"
"date) &> {log}"
rule kraken2_sr:
rule tax_kraken2_sr:
input:
r1=os.path.join(RESULTS_DIR, "preproc/{mtype}/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/{mtype}/sr/R2.fastp.fastq.gz"),
db=lambda wildcards: config["kraken2"]["db"][wildcards.db]
output:
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{mtype}_sr_{db}_labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{mtype}_sr_{db}_report.txt")
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{mtype}.sr.{db}.labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{mtype}.sr.{db}.report.txt")
log:
out="logs/kraken2.{db}.{mtype}.sr.out.log",
err="logs/kraken2.{db}.{mtype}.sr.err.log"
"logs/kraken2.{mtype}.sr.{db}.log"
wildcard_constraints:
mtype="|".join(META_TYPES),
db="|".join(config["kraken2"]["db"].keys())
......@@ -48,22 +50,21 @@ rule kraken2_sr:
conda:
"../envs/kraken2.yaml"
message:
"Classification w/ Kraken2 ({wildcards.db})"
"Tax. classification w/ Kraken2 ({wildcards.db}, SR)"
shell:
"(date && "
"kraken2 --db {input.db} --threads {threads} --use-names {params.params} --report {output.report} --output {output.labels} {input.r1} {input.r2} && "
"date) 2> {log.err} > {log.out}"
"date) &> {log}"
rule kraken2_lr:
rule tax_kraken2_lr:
input:
lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"),
db=lambda wildcards: config["kraken2"]["db"][wildcards.db]
output:
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/metag_lr_{db}_labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/metag_lr_{db}_report.txt")
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/metag.lr.{db}.labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/metag.lr.{db}.report.txt")
log:
out="logs/kraken2.{db}.metag.lr.out.log",
err="logs/kraken2.{db}.metag.lr.err.log"
"logs/kraken2.metag.lr.{db}.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS),
......@@ -75,8 +76,38 @@ rule kraken2_lr:
conda:
"../envs/kraken2.yaml"
message:
"Classification w/ Kraken2 ({wildcards.db})"
"Tax. classification w/ Kraken2 ({wildcards.db}, LR)"
shell:
"(date && "
"kraken2 --db {input.db} --threads {threads} --use-names {params.params} --report {output.report} --output {output.labels} {input.lr} && "
"date) 2> {log.err} > {log.out}"
\ No newline at end of file
"date) &> {log}"
##################################################
# Kaiju
rule tax_kaiju_cdhit:
input:
faa=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
fmi=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "%s.fmi" % wildcards.db),
output:
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv")
log:
"logs/kaiju.cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.log"
wildcard_constraints:
rtype1="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES),
tool1="|".join(ASSEMBLERS),
tool2="|".join(ASSEMBLERS),
db="|".join(config["kaiju"]["db"].keys())
threads:
config["kaiju"]["threads"]
conda:
"../envs/kaiju.yaml"
message:
"Tax. classification w/ Kaiju ({wildcards.db})"
shell:
"(date && "
"kaiju -t {input.nodes} -f {input.fmi} -i {input.faa} -o {output} -z {threads} -v -p && "
"date) &> {log}"
\ No newline at end of file
......@@ -5,28 +5,35 @@ include:
rule TAXONOMY:
input:
"status/taxonomy_kraken2.done",
output:
touch("status/taxonomy.done")
rule TAXONOMY_KRAKEN2:
input:
# Kraken2
expand(
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}_{db}_{otype}.txt"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{atype}.{db}.{otype}.txt"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
atype=["ASSEMBLY", "ASSEMBLY.FILTERED"],
db=config["kraken2"]["db"].keys(),
otype=["labels", "report"]
),
) if "kraken2" in config["steps_taxonomy"] else [],
expand(
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{mtype}_sr_{db}_{otype}.txt"),
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{mtype}.sr.{db}.{otype}.txt"),
mtype=META_TYPES,
db=config["kraken2"]["db"].keys(),
otype=["labels", "report"]
),
) if "kraken2" in config["steps_taxonomy"] else [],
expand(
os.path.join(RESULTS_DIR, "taxonomy/kraken2/metag_lr_{db}_{otype}.txt"),
os.path.join(RESULTS_DIR, "taxonomy/kraken2/metag.lr.{db}.{otype}.txt"),
db=config["kraken2"]["db"].keys(),
otype=["labels", "report"]
),
) if "kraken2" in config["steps_taxonomy"] else [],
# Kaiju
expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"),
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS],
db=config["kaiju"]["db"].keys()
) if "kaiju" in config["steps_taxonomy"] else [],
expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"),
combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS],
db=config["kaiju"]["db"].keys()
) if "kaiju" in config["steps_taxonomy"] else [],
output:
touch("status/taxonomy_kraken2.done")
touch("status/taxonomy.done")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment