Gitlab migration complete. If you have any issue please read the FAQ.

Commit 8d6b6ddd authored by Laura Denies's avatar Laura Denies
Browse files

Limit nr of files

parent 037108b1
......@@ -11,7 +11,8 @@ if config["pathofact"]["workflow"] == "complete":
[
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv",
"{datadir}/{project}/logs/{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......@@ -23,7 +24,8 @@ elif config["pathofact"]["workflow"] == "Tox":
expand(
[
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv"
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/logs/Tox_{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......@@ -33,7 +35,10 @@ elif config["pathofact"]["workflow"] == "Vir":
rule all:
input:
expand(
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
[
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
"{datadir}/{project}/logs/VF_{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
elif config["pathofact"]["workflow"] == "AMR":
......@@ -44,7 +49,8 @@ elif config["pathofact"]["workflow"] == "AMR":
expand(
[
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv"
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/logs/AMR_{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
......@@ -14,7 +14,7 @@ rule run_deepARG:
output:
temp("{datadir}/{project}/AMR/deepARG_results/{sample}/{file_i}.out.mapping.ARG")
log:
"{datadir}/{project}/AMR/logs/{sample}/{file_i}.out.mapping.ARG.log"
"{datadir}/{project}/logs/{sample}/{file_i}.out.mapping.ARG.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
......@@ -23,7 +23,9 @@ rule run_deepARG:
"../../envs/DeepARG.yaml"
message: "executing deep-arg on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"deeparg predict --model LS --model-version v2 --type prot -d {config[pathofact][scripts]}/deeparg_data/deepARG --input {input} --out {wildcards.datadir}/{wildcards.project}/AMR/deepARG_results/{wildcards.sample}/{wildcards.file_i}.out &> {log}"
"""
deeparg predict --model LS --model-version v2 --type prot -d {config[pathofact][scripts]}/deeparg_data/deepARG --input {input} --out {wildcards.datadir}/{wildcards.project}/AMR/deepARG_results/{wildcards.sample}/{wildcards.file_i}.out &> {log}
"""
def aggregate_AMR(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
......@@ -46,7 +48,10 @@ rule aggregate_deepARG:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "aggregating deep-arg results on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
"""
cat {input} > {output}
rm -rf {wildcards.datadir}/{wildcards.project}/AMR/deepARG_results/{wildcards.sample}
"""
# RGI
rule run_RGI:
......@@ -87,7 +92,10 @@ rule aggregate_RGI:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate RGI results on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
"""
cat {input} > {output}
rm -rf {wildcards.datadir}/{wildcards.project}/AMR/RGI_results/{wildcards.sample}
"""
# Combine DeepARG and RGI results
......@@ -99,7 +107,7 @@ rule combine_AMR:
output:
AMR_combined="{datadir}/{project}/AMR/{sample}_AMR_prediction.tsv"
log:
"{datadir}/{project}/logs/{sample}_combine_AMR_temp.log"
"{datadir}/{project}/logs/{sample}/combine_AMR_temp.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
......@@ -21,7 +21,7 @@ rule combine_AMR_plasmid:
Report_1="{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
Report_2="{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv"
log:
"{datadir}/{project}/logs/{sample}_MGE_AMR_prediction_detail_temp.log"
"{datadir}/{project}/logs/{sample}/MGE_AMR_prediction_detail_temp.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
......
......@@ -47,6 +47,7 @@ rule aggregate_VirSorter:
else
mv {input} {output}
fi
rm -rf {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virsorter
"""
# VIRFINDER Prediction
......
......@@ -14,7 +14,7 @@ rule filter_seq:
output:
temp("{datadir}/{project}/plasmid/{sample}_filtered.fna")
log:
"{datadir}/{project}/logs/{sample}_filtered.log"
"{datadir}/{project}/logs/{sample}/plasmid_filtered.log"
conda:
"../../envs/Biopython.yaml"
params:
......@@ -30,7 +30,7 @@ checkpoint splitplasmid:
input:
"{datadir}/{project}/plasmid/{sample}_filtered.fna"
output:
split=directory("{datadir}/{project}/MGE/plasmid_splitted/{sample}/")
split=temp(directory("{datadir}/{project}/MGE/plasmid_splitted/{sample}/"))
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -85,7 +85,10 @@ rule Plasmid_aggregate:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate PlasFlow results on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
"""
cat {input} > {output}
{wildcards.datadir}/{wildcards.project}/MGE/plasmid/PlasFlow/{wildcards.sample}
"""
rule select:
input:
......@@ -115,7 +118,7 @@ rule run_MOBsuite:
conda:
"../../envs/MOB_suite.yaml"
message: "Executing MOB_suite with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell: "mob_typer --multi --infile {input} --out_file {wildcards.datadir}/{wildcards.project}/MGE/plasmid/MOB_suite/{wildcards.sample}/{wildcards.file_i}_MOB_suite_prediction.txt -n {threads}"
shell: "mob_typer --multi --infile {input} --out_file {wildcards.datadir}/{wildcards.project}/MGE/plasmid/MOB_suite/{wildcards.sample}/{wildcards.file_i}_MOB_suite_prediction.txt -n {threads} &> {log}"
def aggregate_MOBsuite(wildcards):
checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
......
......@@ -18,7 +18,7 @@ rule R_script:
gene_library="{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
gene_toxic="{datadir}/{project}/Toxin_prediction_{sample}_report.tsv"
log:
"{datadir}/{project}/logs/{sample}_gene_table_library.log"
"{datadir}/{project}/logs/{sample}/gene_table_library.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
......@@ -71,7 +71,7 @@ rule HMM_correct_format_3:
input:
"{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R_temp.csv"
output:
temp("{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R.csv")
"{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R.csv"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
#Clean-up
import glob
import os
rule clean_all:
input: "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
output: "{datadir}/{project}/logs/{sample}_compressed.zip"
shell: """
zip -rm {output} {wildcards.datadir}/{wildcards.project}/logs/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.sample}
find {wildcards.datadir}/{wildcards.project} -type d -empty -delete
"""
#Clean-up
import glob
import os
rule clean_Toxin_workflow:
input: "{datadir}/{project}/Toxin_prediction_{sample}_report.tsv"
output: "{datadir}/{project}/logs/Tox_{sample}_compressed.zip"
shell: """
zip -rm {output} {wildcards.datadir}/{wildcards.project}/logs/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}
find {wildcards.datadir}/{wildcards.project} -type d -empty -delete
"""
rule clean_VF_workflow:
input: "{datadir}/{project}/Virulence_prediction_{sample}_report.tsv"
output: "{datadir}/{project}/logs/VF_{sample}_compressed.zip"
shell: """
zip -rm {output} {wildcards.datadir}/{wildcards.project}/logs/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}
find {wildcards.datadir}/{wildcards.project} -type d -empty -delete
"""
rule clean_AMR_workflow:
input: "{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv"
output: "{datadir}/{project}/logs/AMR_{sample}_compressed.zip"
shell: """
zip -rm {output} {wildcards.datadir}/{wildcards.project}/logs/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample}
rm -rf {wildcards.datadir}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.sample}
find {wildcards.datadir}/{wildcards.project} -type d -empty -delete
"""
......@@ -11,7 +11,7 @@ rule combine_PathoFact:
output:
PathoFact_report= "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
log:
"{datadir}/{project}/PathoFact_{sample}_predictions.log"
"{datadir}/{project}/logs/{sample}/PathoFact_predictions.log"
params:
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
......
......@@ -20,7 +20,7 @@ rule Prodigal:
conda:
"../../envs/Prodigal.yaml"
log:
"{datadir}/{project}/logs/Prodigal_{sample}.log"
"{datadir}/{project}/logs/{sample}/Prodigal.log"
shell:
"""
prodigal -i {input} -o {output.GFF} -a {output.ORF} -f gff -p meta &> {log}
......@@ -92,9 +92,9 @@ checkpoint splitting:
input:
"{datadir}/{project}/renamed/{sample}_ID.faa"
output:
splits=directory("{datadir}/{project}/splitted/{sample}/")
splits=temp(directory("{datadir}/{project}/splitted/{sample}/"))
log:
"{datadir}/{project}/logs/split_ORF_{sample}.log"
"{datadir}/{project}/logs/{sample}/split_ORF.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
......@@ -46,7 +46,7 @@ checkpoint splitcontig:
input:
"{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output:
split=directory("{datadir}/{project}/contig_splitted/{sample}/")
split=temp(directory("{datadir}/{project}/contig_splitted/{sample}/"))
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -55,7 +55,7 @@ checkpoint splitcontig:
conda:
"../../envs/Biopython.yaml"
log:
"{datadir}/{project}/logs/split_contig_{sample}.log"
"{datadir}/{project}/logs/{sample}/split_contig.log"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample} &> {log}
......
......@@ -8,7 +8,9 @@ checkpoint splittingsignalP:
input:
"{datadir}/{project}/renamed/{sample}_ID.faa"
output:
splits=directory("{datadir}/{project}/SignalP/splitted/{sample}_dir/")
splits=temp(directory("{datadir}/{project}/SignalP/splitted/{sample}_dir/"))
log:
"{datadir}/{project}/logs/{sample}/SignalP_split.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -18,7 +20,7 @@ checkpoint splittingsignalP:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} 2000 {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}_dir
python {config[pathofact][scripts]}/split.py {input} 2000 {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}_dir &> {log}
"""
......@@ -27,7 +29,9 @@ rule signalp_gramp:
input:
"{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output:
SignalP_gramP="{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5"
SignalP_gramP=temp("{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5")
log:
"{datadir}/{project}/logs/{sample}/SignalP_p_{file_i}.log"
message:
"Execute signalP on the following sample(s): {wildcards.project} - {wildcards.sample}"
params:
......@@ -37,14 +41,16 @@ rule signalp_gramp:
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram+/{wildcards.sample}/{wildcards.file_i}
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram+/{wildcards.sample}/{wildcards.file_i} &> {log}
"""
rule signalp_gramn:
input:
"{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output:
SignalP_gramN="{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5"
SignalP_gramN=temp("{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5")
log:
"{datadir}/{project}/logs/{sample}/SignalP_n_{file_i}.log"
message:
"Execute signalP on the following sample(s): {wildcards.project} - {wildcards.sample}"
params:
......@@ -54,7 +60,7 @@ rule signalp_gramn:
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram-/{wildcards.sample}/{wildcards.file_i}
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram-/{wildcards.sample}/{wildcards.file_i} &> {log}
"""
def aggregate_signalpP_input(wildcards):
......@@ -81,7 +87,7 @@ rule SignalPP_aggregate:
input:
aggregate_signalpP_input
output:
"{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5"
temp("{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -93,7 +99,7 @@ rule SignalPN_aggregate:
input:
aggregate_signalpN_input
output:
"{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
temp("{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -110,7 +116,7 @@ rule aggregate_signalP:
message:
"concatenate multiple split signalP files in a single joined file: {wildcards.project} - {wildcards.sample}"
log:
"{datadir}/{project}/SignalP/{sample}_SignalP_temp.log"
"{datadir}/{project}/SignalP/{sample}/SignalP_temp.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
......@@ -16,7 +16,7 @@ rule merge_SignalPVir:
output:
Virulence_report="{datadir}/{project}/Virulence_prediction_{sample}_report.tsv"
log:
"{datadir}/{project}/logs/{sample}.combine_virulence_results.log"
"{datadir}/{project}/logs/{sample}/combine_virulence_results.log"
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
......
......@@ -81,7 +81,7 @@ rule HMM_R_VIR:
output:
temp("{datadir}/{project}/VIRULENCE/HMM_virulence/{sample}.hmm_results.csv")
log:
"{datadir}/{project}/logs/{sample}.hmm_results.log"
"{datadir}/{project}/logs/{sample}/hmm_results.log"
params:
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
......
......@@ -11,6 +11,8 @@ include:
'../rules/AMR/Phage.smk'
include:
'../rules/AMR/Combine_MGE_AMR.smk'
include:
'../rules/Universal/Clean_up_individual.smk'
# master command
rule AMR_Analysis:
......@@ -22,7 +24,8 @@ rule AMR_Analysis:
"{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv",
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv"
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/logs/AMR_{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
......@@ -23,7 +23,8 @@ include:
'../rules/AMR/Combine_MGE_AMR.smk'
include:
'../rules/Universal/Combine_PathoFact.smk'
include:
'../rules/Universal/Clean_up.smk'
# master command
rule Analysis:
input:
......@@ -31,7 +32,8 @@ rule Analysis:
[
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv",
"{datadir}/{project}/logs/{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
......@@ -7,7 +7,8 @@ include:
'../rules/Toxin/Toxin.smk'
include:
'../rules/Toxin/Combine_Toxin_SignalP.smk'
include:
'../rules/Universal/Clean_up_individual.smk'
# master command
rule Analysis:
input:
......@@ -15,7 +16,8 @@ rule Analysis:
[
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv"
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
"{datadir}/{project}/logs/Tox_{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
......@@ -7,7 +7,8 @@ include:
'../rules/Virulence/Virulence.smk'
include:
'../rules/Virulence/Combine_Virulence_SignalP.smk'
include:
'../rules/Universal/Clean_up_individual.smk'
# master command
rule Analysis_Virulence:
......@@ -15,7 +16,8 @@ rule Analysis_Virulence:
expand(
[
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.tsv",
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv"
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
"{datadir}/{project}/logs/VF_{sample}_compressed.zip"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment