Commit e709e686 authored by Valentina Galata's avatar Valentina Galata
Browse files

added log files to snakemake rules and logging in Rscripts (issue #28)

parent d0e18ca4
......@@ -15,12 +15,14 @@ rule run_deepARG:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
temp("{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG")
log:
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/DeepARG.yaml"
shell:
"python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out"
"python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out &> {log}"
def aggregate_AMR(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
......
......@@ -22,6 +22,8 @@ rule combine_AMR_plasmid:
output:
Report_1=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"),
Report_2=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv")
log:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.log"
params:
outdir="{OUTDIR}"
conda:
......
......@@ -17,13 +17,15 @@ checkpoint splitphage:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
split=directory("{OUTDIR}/{project}/contig_splitted/{input_file}/")
log:
"{OUTDIR}/{project}/contig_splitted/{input_file}.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file}
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file} &> {log}
"""
rule run_VirSorter:
......@@ -31,6 +33,8 @@ rule run_VirSorter:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv"
log:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.log"
params:
outdir="{OUTDIR}"
conda:
......@@ -39,7 +43,7 @@ rule run_VirSorter:
12
shell:
"""
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]}
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]} &> {log}
"""
rule aggregate_Virsorter:
......@@ -58,13 +62,16 @@ rule run_Virfinder:
"{OUTDIR}/{project}/contig_splitted/{input_file}/{file_i}.fna"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt"
log:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/DeepVirFinder.yaml"
threads:
12
shell: "python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads}"
shell:
"python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads} &> {log}"
def aggregate_virfinder(wildcards):
checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split
......
......@@ -16,25 +16,29 @@ rule filter_seq:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
"{OUTDIR}/{project}/{input_file}_filtered.fna"
log:
"{OUTDIR}/{project}/{input_file}_filtered.log"
conda:
"../../envs/Biopython.yaml"
params:
outdir="{OUTDIR}"
shell:
"./scripts/filter.pl 1000 {input} > {output}"
"./scripts/filter.pl 1000 {input} > {output} &> {log}"
checkpoint splitplasmid:
input:
"{OUTDIR}/{project}/{input_file}_filtered.fna"
output:
split=directory("{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/")
log:
"{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.input_file}
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.input_file} &> {log}
"""
# PlasFlow Plasmid prediction
......@@ -43,13 +47,15 @@ rule run_PLASMID:
"{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/{file_i}.fna"
output:
temp("{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.tsv")
log:
"{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.log"
conda:
"../../envs/PlasFlow.yaml"
params:
outdir="{OUTDIR}"
shell:
"""
{config[Plasflow]} --input {input} --output {output} --threshold 0.7
{config[Plasflow]} --input {input} --output {output} --threshold 0.7 &> {log}
"""
def aggregate_plasmid_input(wildcards):
......
......@@ -20,6 +20,8 @@ rule R_script:
output:
gene_library=temp("{OUTDIR}/{project}/TOXIN/R_output/{input_file}_gene_table_library.csv"),
gene_toxic=temp("{OUTDIR}/{project}/TOXIN/R_output/{input_file}_gene_table_Toxic.csv")
log:
"{OUTDIR}/{project}/TOXIN/R_output/{input_file}_gene_table_library.log"
message:
"Run external R script to join SignalP and ToxinHMM"
params:
......
......@@ -13,6 +13,8 @@ rule run_HMM:
renamed="{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmmscan"
log:
"{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.log"
message:
"Run HMM scan on {input[1]} to generate {output}"
params:
......@@ -23,7 +25,7 @@ rule run_HMM:
1
shell:
"""
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]} &> {log}
"""
# Adjust HMM results to correct format
......
......@@ -13,6 +13,8 @@ rule combine_PathoFact:
AMR_MGE="{OUTDIR}/{project}/AMR_MGE_prediction_{input_file}_report.tsv"
output:
"{OUTDIR}/{project}/PathoFact_{input_file}_predictions.csv"
log:
"{OUTDIR}/{project}/PathoFact_{input_file}_predictions.log"
conda:
"../../envs/R.yaml"
script:
......
......@@ -51,11 +51,13 @@ checkpoint splitting:
"{OUTDIR}/{project}/renamed/{input_file}_ID.faa"
output:
splits=directory("{OUTDIR}/{project}/splitted/{input_file}/")
log:
"{OUTDIR}/{project}/splitted/{input_file}.log"
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/splitted/{wildcards.input_file}
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/splitted/{wildcards.input_file} &> {log}
"""
......@@ -12,12 +12,14 @@ rule signalp:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/SignalP/{input_file}/{file_i}.txt"
log:
"{OUTDIR}/{project}/SignalP/{input_file}/{file_i}.log"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
outdir="{OUTDIR}"
shell:
"{config[signalp]} -t gram+ {input} > {output}"
"{config[signalp]} -t gram+ {input} > {output} 2> {log}"
#adjust format of signalP files
rule SignalP_format:
......
......@@ -13,6 +13,8 @@ rule run_HMM_vir:
renamed="{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}/{file_i}.hmmscan"
log:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}/{file_i}.log"
message:
"Run HMM scan on {input[1]} to generate {output}"
params:
......@@ -23,7 +25,7 @@ rule run_HMM_vir:
12
shell:
"""
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]} &> {log}
"""
# Adjust HMM results to correct format
......@@ -72,6 +74,8 @@ rule HMM_R_VIR:
ID="{OUTDIR}/{project}/renamed/{input_file}_translation.tsv"
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}.hmm_results.csv"
log:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}.hmm_results.log"
conda:
"../../envs/R.yaml"
script:
......@@ -136,50 +140,60 @@ rule AAC:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_AAC.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_AAC.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/AAC.py --file {input} --out {output}"
"python scripts/AAC.py --file {input} --out {output} &> {log}"
rule DPC:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_DPC.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_DPC.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/DPC.py --file {input} --out {output}"
"python scripts/DPC.py --file {input} --out {output} &> {log}"
rule CTDC:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDC.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDC.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/CTDC.py --file {input} --out {output}"
"python scripts/CTDC.py --file {input} --out {output} &> {log}"
rule CTDT:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDT.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDT.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/CTDT.py --file {input} --out {output}"
"python scripts/CTDT.py --file {input} --out {output} &> {log}"
rule CTDD:
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDD.txt"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_CTDD.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/CTDD.py --file {input} --out {output}"
"python scripts/CTDD.py --file {input} --out {output} &> {log}"
rule join_matrix:
input:
......@@ -216,10 +230,12 @@ rule classifier:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_matrix.tsv"
output:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_classifier_prediction.tsv"
log:
"{OUTDIR}/{project}/VIRULENCE/classifier_virulence/{input_file}/{file_i}_classifier_prediction.log"
conda:
"../../envs/Biopython.yaml"
shell:
"python scripts/virulence_prediction.py {input} {output}"
"python scripts/virulence_prediction.py {input} {output} &> {log}"
def aggregate_classifier(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
......
#!/usr/bin/env R
# logging
sink(file=file(snakemake@log[[1]], open="wt"), type="message")
library(tidyverse)
# AMR prediction
......
#!/usr/bin/env R
# logging
sink(file=file(snakemake@log[[1]], open="wt"), type="message")
library(tidyverse)
# load virulence prediction
......
#!/usr/bin/env R
# logging
sink(file=file(snakemake@log[[1]], open="wt"), type="message")
library(tidyverse)
library(reshape2)
......
#!/usr/bin/env R
# logging
sink(file=file(snakemake@log[[1]], open="wt"), type="message")
#setwd("~/Desktop/own_HMM_library")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment