Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Laura Denies
PathoFact
Commits
e709e686
Commit
e709e686
authored
Apr 21, 2020
by
Valentina Galata
Browse files
added log files to snakemake rules and logging in Rscripts (issue
#28
)
parent
d0e18ca4
Changes
14
Hide whitespace changes
Inline
Side-by-side
rules/AMR/AMR.snk
View file @
e709e686
...
...
@@ -15,12 +15,14 @@ rule run_deepARG:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
temp("{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG")
log:
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/DeepARG.yaml"
shell:
"python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out"
"python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out
&> {log}
"
def aggregate_AMR(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
...
...
rules/AMR/Combine_MGE_AMR.snk
View file @
e709e686
...
...
@@ -22,6 +22,8 @@ rule combine_AMR_plasmid:
output:
Report_1=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"),
Report_2=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv")
log:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.log"
params:
outdir="{OUTDIR}"
conda:
...
...
rules/AMR/Phage.snk
View file @
e709e686
...
...
@@ -17,13 +17,15 @@ checkpoint splitphage:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
split=directory("{OUTDIR}/{project}/contig_splitted/{input_file}/")
log:
"{OUTDIR}/{project}/contig_splitted/{input_file}.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file}
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file}
&> {log}
"""
rule run_VirSorter:
...
...
@@ -31,6 +33,8 @@ rule run_VirSorter:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv"
log:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.log"
params:
outdir="{OUTDIR}"
conda:
...
...
@@ -39,7 +43,7 @@ rule run_VirSorter:
12
shell:
"""
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]}
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]}
&> {log}
"""
rule aggregate_Virsorter:
...
...
@@ -58,13 +62,16 @@ rule run_Virfinder:
"{OUTDIR}/{project}/contig_splitted/{input_file}/{file_i}.fna"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt"
log:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/DeepVirFinder.yaml"
threads:
12
shell: "python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads}"
shell:
"python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads} &> {log}"
def aggregate_virfinder(wildcards):
checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split
...
...
rules/AMR/Plasmid.snk
View file @
e709e686
...
...
@@ -16,25 +16,29 @@ rule filter_seq:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
"{OUTDIR}/{project}/{input_file}_filtered.fna"
log:
"{OUTDIR}/{project}/{input_file}_filtered.log"
conda:
"../../envs/Biopython.yaml"
params:
outdir="{OUTDIR}"
shell:
"./scripts/filter.pl 1000 {input} > {output}"
"./scripts/filter.pl 1000 {input} > {output}
&> {log}
"
checkpoint splitplasmid:
input:
"{OUTDIR}/{project}/{input_file}_filtered.fna"
output:
split=directory("{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/")
log:
"{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.input_file}
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.input_file}
&> {log}
"""
# PlasFlow Plasmid prediction
...
...
@@ -43,13 +47,15 @@ rule run_PLASMID:
"{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/{file_i}.fna"
output:
temp("{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.tsv")
log:
"{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.log"
conda:
"../../envs/PlasFlow.yaml"
params:
outdir="{OUTDIR}"
shell:
"""
{config[Plasflow]} --input {input} --output {output} --threshold 0.7
{config[Plasflow]} --input {input} --output {output} --threshold 0.7
&> {log}
"""
def aggregate_plasmid_input(wildcards):
...
...
rules/Toxin/Combine_Toxin_SignalP.snk
View file @
e709e686
...
...
@@ -20,6 +20,8 @@ rule R_script:
output:
gene_library=temp("{OUTDIR}/{project}/TOXIN/R_output/{input_file}_gene_table_library.csv"),
gene_toxic=temp("{OUTDIR}/{project}/TOXIN/R_output/{input_file}_gene_table_Toxic.csv")
log:
"{OUTDIR}/{project}/TOXIN/R_output/{input_file}_gene_table_library.log"
message:
"Run external R script to join SignalP and ToxinHMM"
params:
...
...
rules/Toxin/Toxin.snk
View file @
e709e686
...
...
@@ -13,6 +13,8 @@ rule run_HMM:
renamed="{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmmscan"
log:
"{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.log"
message:
"Run HMM scan on {input[1]} to generate {output}"
params:
...
...
@@ -23,7 +25,7 @@ rule run_HMM:
1
shell:
"""
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
&> {log}
"""
# Adjust HMM results to correct format
...
...
rules/Universal/Combine_PathoFact.snk
View file @
e709e686
...
...
@@ -13,6 +13,8 @@ rule combine_PathoFact:
AMR_MGE="{OUTDIR}/{project}/AMR_MGE_prediction_{input_file}_report.tsv"
output:
"{OUTDIR}/{project}/PathoFact_{input_file}_predictions.csv"
log:
"{OUTDIR}/{project}/PathoFact_{input_file}_predictions.log"
conda:
"../../envs/R.yaml"
script:
...
...
rules/Universal/Preprocessing.snk
View file @
e709e686
...
...
@@ -51,11 +51,13 @@ checkpoint splitting:
"{OUTDIR}/{project}/renamed/{input_file}_ID.faa"
output:
splits=directory("{OUTDIR}/{project}/splitted/{input_file}/")
log:
"{OUTDIR}/{project}/splitted/{input_file}.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/splitted/{wildcards.input_file}
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/splitted/{wildcards.input_file}
&> {log}
"""
rules/Universal/SignalP.snk
View file @
e709e686
...
...
@@ -12,12 +12,14 @@ rule signalp:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/SignalP/{input_file}/{file_i}.txt"
log:
"{OUTDIR}/{project}/SignalP/{input_file}/{file_i}.log"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
outdir="{OUTDIR}"
shell:
"{config[signalp]} -t gram+ {input} > {output}"
"{config[signalp]} -t gram+ {input} > {output}
2> {log}
"
#adjust format of signalP files
rule SignalP_format:
...
...
rules/Virulence/Virulence.snk
View file @
e709e686
...
...
@@ -13,6 +13,8 @@ rule run_HMM_vir:
renamed="{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}/{file_i}.hmmscan"
log:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}/{file_i}.log"
message:
"Run HMM scan on {input[1]} to generate {output}"
params:
...
...
@@ -23,7 +25,7 @@ rule run_HMM_vir:
12
shell:
"""
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
&> {log}
"""
# Adjust HMM results to correct format
...
...
@@ -72,6 +74,8 @@ rule HMM_R_VIR:
ID="{OUTDIR}/{project}/renamed/{input_file}_translation.tsv"
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}.hmm_results.csv"
log:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}.hmm_results.log"
conda:
"../../envs/R.yaml"
script:
...
...
@@ -136,50 +140,60 @@ rule AAC:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_AAC.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_AAC.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/AAC.py --file {input} --out {output}"
"python scripts/AAC.py --file {input} --out {output}
&> {log}
"
rule DPC:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_DPC.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_DPC.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/DPC.py --file {input} --out {output}"
"python scripts/DPC.py --file {input} --out {output}
&> {log}
"
rule CTDC:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDC.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDC.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/CTDC.py --file {input} --out {output}"
"python scripts/CTDC.py --file {input} --out {output}
&> {log}
"
rule CTDT:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDT.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDT.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/CTDT.py --file {input} --out {output}"
"python scripts/CTDT.py --file {input} --out {output}
&> {log}
"
rule CTDD:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDD.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDD.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/CTDD.py --file {input} --out {output}"
"python scripts/CTDD.py --file {input} --out {output}
&> {log}
"
rule join_matrix:
input:
...
...
@@ -216,10 +230,12 @@ rule classifier:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_matrix.tsv"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_classifier_prediction.tsv"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_classifier_prediction.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/virulence_prediction.py {input} {output}"
"python scripts/virulence_prediction.py {input} {output}
&> {log}
"
def aggregate_classifier(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
...
...
scripts/AMR_MGE.R
View file @
e709e686
#!/usr/bin/env R
# logging
sink
(
file
=
file
(
snakemake
@
log
[[
1
]],
open
=
"wt"
),
type
=
"message"
)
library
(
tidyverse
)
# AMR prediction
...
...
scripts/PathoFact.R
View file @
e709e686
#!/usr/bin/env R
# logging
sink
(
file
=
file
(
snakemake
@
log
[[
1
]],
open
=
"wt"
),
type
=
"message"
)
library
(
tidyverse
)
# load virulence prediction
...
...
scripts/hmm.R
View file @
e709e686
#!/usr/bin/env R
# logging
sink
(
file
=
file
(
snakemake
@
log
[[
1
]],
open
=
"wt"
),
type
=
"message"
)
library
(
tidyverse
)
library
(
reshape2
)
...
...
scripts/ownHMM_library.R
View file @
e709e686
#!/usr/bin/env R
# logging
sink
(
file
=
file
(
snakemake
@
log
[[
1
]],
open
=
"wt"
),
type
=
"message"
)
#setwd("~/Desktop/own_HMM_library")
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment