Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 07f76d3a authored by Laura Denies's avatar Laura Denies
Browse files

include messages within rules

parent 804a5f50
pathofact:
sample: ["SRR2582233","SRR2582234","SRR2582237","SRR2582238","SRR2582241","SRR2582243","SRR2582246","SRR2582247","SRR2582248","SRR2582251","SRR2582252","SRR2582253","SRR2582255","SRR2582257","SRR2582258"] # requires user input
project: PathoFact_v2 # requires user input
datadir: /work/projects/ecosystem_biology/local_tools/PathoFact_update/Datasets/CDI # requires user input
sample: ["test_sample"] # requires user input
project: PathoFact_module_test # requires user input
datadir: ../test_dataset # requires user input
workflow: "complete"
size_fasta: 10000
scripts: "scripts"
......
......@@ -21,6 +21,7 @@ rule run_deepARG:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/DeepARG.yaml"
message: "executing deep-arg on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"deeparg predict --model LS --model-version v2 --type prot -d {config[pathofact][scripts]}/deeparg_data/deepARG --input {input} --out {wildcards.datadir}/{wildcards.project}/AMR/deepARG_results/{wildcards.sample}/{wildcards.file_i}.out &> {log}"
......@@ -43,6 +44,7 @@ rule aggregate_deepARG:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "aggregating deep-arg results on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
......@@ -60,6 +62,7 @@ rule run_RGI:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/rgi.yaml"
message: "executing RGI on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"rgi main --input_sequence {input} --output_file {wildcards.datadir}/{wildcards.project}/AMR/RGI_results/{wildcards.sample}/{wildcards.file_i}.RGI --input_type protein --local --clean &> {log}"
......@@ -82,6 +85,7 @@ rule aggregate_RGI:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate RGI results on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
......@@ -101,5 +105,6 @@ rule combine_AMR:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda: "../../envs/R.yaml"
message: "Combine AMR prediction of RGI and DeepArg for the following sample: {wildcards.project} - {wildcards.sample}"
script:
"../../scripts/AMR.R"
......@@ -28,6 +28,7 @@ rule combine_AMR_plasmid:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/R.yaml"
message: "Merge AMR and MGE predictions for the following sample: {wildcards.project} - {wildcards.sample}"
script:
"../../scripts/AMR_MGE.R"
......@@ -24,6 +24,7 @@ rule run_VirSorter:
"../../envs/VirSorter.yaml"
threads:
config["pathofact"]["mem"]["big_mem_cores"]
message: "Executing VirSorter with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"""
wrapper_phage_contigs_sorter_iPlant.pl -f {input} --ncpu {threads} --wdir {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virsorter --data-dir {config[pathofact][scripts]}/virsorter-data &> {log}
......@@ -37,6 +38,7 @@ rule aggregate_VirSorter:
"{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv"
params:
outdir="{datadir}"
message: "VirSorter failsave for empty files: {wildcards.project} - {wildcards.sample}"
shell:
"""
if [ -s {output} ]
......@@ -63,6 +65,7 @@ rule run_VirFinder:
"../../envs/DeepVirFinder.yaml"
threads:
config["pathofact"]["mem"]["big_mem_cores"]
message: "Executing Deep-VirFinder with {threads} threads on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][deepvirfinder]} -i {input} -o {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virfinder -c {threads} &> {log}"
......@@ -85,5 +88,6 @@ rule aggregate_VirFinder:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate VirFinder predictions on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} >{output}"
......@@ -22,6 +22,7 @@ rule filter_seq:
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
minlen=config["pathofact"]["plasflow_minlen"]
message: "Filter samples on length for PlasFlow predictions: {wildcards.project} - {wildcards.sample}"
shell:
"{config[pathofact][scripts]}/filter.pl {params.minlen} {input} > {output} 2> {log}"
......@@ -57,6 +58,7 @@ rule run_PLASMID:
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["big_mem_per_core_gb"],
threshold=config["pathofact"]["plasflow_threshold"]
message: "Executing PlasFLow on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
PlasFlow.py --input {input} --output {output} --threshold {params.threshold} &> {log}
......@@ -81,6 +83,7 @@ rule Plasmid_aggregate:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate PlasFlow results on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
......@@ -111,6 +114,7 @@ rule run_MOBsuite:
config["pathofact"]["mem"]["big_mem_cores"]
conda:
"../../envs/MOB_suite.yaml"
message: "Executing MOB_suite with {threads} threads on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell: "mob_typer --multi --infile {input} --out_file {wildcards.datadir}/{wildcards.project}/MGE/plasmid/MOB_suite/{wildcards.sample}/{wildcards.file_i}_MOB_suite_prediction.txt -n {threads}"
def aggregate_MOBsuite(wildcards):
......@@ -131,6 +135,7 @@ rule aggregate_MOBsuite:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate MOB_suite results on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
......@@ -19,12 +19,11 @@ rule R_script:
gene_toxic="{datadir}/{project}/Toxin_prediction_{sample}_report.tsv"
log:
"{datadir}/{project}/TOXIN/R_output/{sample}_gene_table_library.log"
message:
"Run external R script to join SignalP and ToxinHMM and create Toxin report (incl. confidence levels)"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Generate Toxin report on the following sample{s}: {wildcards.project} - {wildcards.sample}"
conda:
"../../envs/R.yaml"
script:
......
......@@ -12,8 +12,6 @@ rule run_HMM_tox:
temp("{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmmscan")
log:
"{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.log"
message:
"Run HMM scan on {input.renamed} to generate {output}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
......@@ -22,6 +20,7 @@ rule run_HMM_tox:
"../../envs/HMMER.yaml"
threads:
1
message: "Executing toxin prediction with {threads} threads on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
hmmsearch --cpu {threads} --noali --notextw --tblout {output} {input.hmm} {input.renamed} &> {log}
......@@ -33,12 +32,11 @@ rule HMM_correct_format:
"{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmmscan"
output:
temp("{datadir}/{project}/TOXIN/HMM_toxin/{sample}/{file_i}.hmm.csv")
message:
"Adjust {input} to correct format: {output}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Adjust output format of toxin predictions: {wildcards.project} - {wildcards.sample}"
shell:
"""
sed '/^#/ d' {input} | sed 's/ \+/\\t/g' > {output}
......@@ -63,6 +61,7 @@ rule HMM_correct_format_2:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate toxin prediction of the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
cut -f 1,3,5,6 {input} | uniq > {output}
......
......@@ -17,5 +17,6 @@ rule combine_PathoFact:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/R.yaml"
message: "Generate PathoFact report on the following sample{s}: {wildcards.project} - {wildcards.sample}"
script:
"../../scripts/PathoFact.R"
......@@ -14,7 +14,7 @@ rule Prodigal:
ORF="{datadir}/{project}/Prodigal/{sample}.faa",
GFF="{datadir}/{project}/Prodigal/{sample}.gff"
message:
"Generates ORFs and gff"
"Generates ORFs and gff: {wildcares.project} - {wildcares.sample}"
params:
outdir="{datadir}"
conda:
......@@ -32,7 +32,7 @@ rule mapping_file:
output:
"{datadir}/{project}/Prodigal/{sample}.contig"
message:
"Generate mapping file"
"Generate mapping file: {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}"
shell:
......@@ -52,7 +52,7 @@ rule generate_ID:
output:
"{datadir}/{project}/renamed/{sample}_ID.faa"
message:
"Replace fasta headers with unique ID number - {wildcards.project}"
"Replace fasta headers with unique ID number: {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -70,7 +70,7 @@ rule generate_translation:
output:
"{datadir}/{project}/renamed/{sample}_translation.tsv"
message:
"Generate {output} containing original fasta header with corresponding ID number - {wildcards.project}"
"Generate {output} containing original fasta header with corresponding ID number: {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
......@@ -14,7 +14,7 @@ rule generate_contigID:
output:
"{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
message:
"Replace fasta headers with unique ID number - {wildcards.project}"
"Replace fasta headers with unique ID number: {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -32,7 +32,7 @@ rule generate_ContigTranslation:
output:
"{datadir}/{project}/renamed/{sample}_Contig_translation.tsv"
message:
"Generate {output} containing original fasta header with corresponding ID number - {wildcards.project}"
"Generate {output} containing original fasta header with corresponding ID number: {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......
......@@ -29,7 +29,7 @@ rule signalp_gramp:
output:
SignalP_gramP="{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
"Execute signalP on the following sample(s): {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
......@@ -46,7 +46,7 @@ rule signalp_gramn:
output:
SignalP_gramN="{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
"Execute signalP on the following sample(s): {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
......@@ -108,7 +108,7 @@ rule aggregate_signalP:
output:
SignalP_report="{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.tsv"
message:
"concatenate multiple split signalP files in a single joined file: {output}"
"concatenate multiple split signalP files in a single joined file: {wildcards.project} - {wildcards.sample}"
log:
"{datadir}/{project}/SignalP/{sample}_SignalP_temp.log"
params:
......
......@@ -21,5 +21,6 @@ rule merge_SignalPVir:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda: "../../envs/R.yaml"
message: "Generate report on predicted virulence factors on the following sample{s}: {wildcards.project} - {wildcards.sample}"
script: "../../scripts/Virulence.R"
......@@ -22,6 +22,7 @@ rule run_HMM_vir:
"../../envs/HMMER.yaml"
threads:
12
message: "Executing prediction of virulence factors (HMM) with {threads} threads on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
hmmsearch --cpu {threads} --noali --notextw --tblout {output} {input.hmm} {input.renamed} &> {log}
......@@ -39,6 +40,7 @@ rule HMM_correct_format_vir:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Format virulence factor predictions (HMM): {wildcards.project} - {wildcards.sample}"
shell:
"""
sed '/^#/ d' {input} | sed 's/ \+/\\t/g' > {output}
......@@ -59,6 +61,7 @@ rule HMM_correct_format_2_vir:
aggregate_hmm
output:
"{datadir}/{project}/VIRULENCE/HMM_virulence/{sample}.Input_HMM_R.csv"
message: "Aggregate virulence factor prediction (HMM) on the following sample{s}: {wildcards.project} - {wildcards.sample}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
......@@ -98,6 +101,7 @@ rule HMM_VIR_classification:
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Classify HMM virulence predictions on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
awk '$2 == "True" && $3 == "False" && $4 == "False"' {input} | awk '$5 = "negative"' | sed 's/ /\\t/g' > {output.non_path}
......@@ -147,6 +151,7 @@ rule AAC:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/Biopython.yaml"
message: "Identify features (AAC) on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][scripts]}/AAC.py --file {input} --out {output} &> {log}"
......@@ -162,6 +167,7 @@ rule DPC:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/Biopython.yaml"
message: "Identify features (DPC) on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][scripts]}/DPC.py --file {input} --out {output} &> {log}"
......@@ -177,6 +183,7 @@ rule CTDC:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/Biopython.yaml"
message: "Identify features (CTDC) on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][scripts]}/CTDC.py --file {input} --out {output} &> {log}"
......@@ -192,6 +199,7 @@ rule CTDT:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/Biopython.yaml"
message: "Identify features (CTDT) on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][scripts]}/CTDT.py --file {input} --out {output} &> {log}"
......@@ -207,6 +215,7 @@ rule CTDD:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/Biopython.yaml"
message: "Identify features (CTDD) on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][scripts]}/CTDD.py --file {input} --out {output} &> {log}"
......@@ -222,6 +231,7 @@ rule join_matrix:
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Join feature matrix on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
xjoin() {{
......@@ -256,6 +266,7 @@ rule classifier:
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/Biopython.yaml"
message: "Execute classifier for virulence factor prediction on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"python {config[pathofact][scripts]}/virulence_prediction.py {input.input} {output} {input.model} &> {log}"
......@@ -277,6 +288,7 @@ rule format_classifier:
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate classifier predictions for virulence factors on the following sample{s}: {wildcards.project} - {wildcards.sample}"
shell:
"""
sed 's/"//g' {input} | cut -f2,3 >{output}
......
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment