Commit 72f0b27f authored by Laura Denies's avatar Laura Denies
Browse files

update signalp rules

parent 1a2c8a22
pathofact: pathofact:
sample: ["test_sample"] # requires user input sample: ["test_sample"] # requires user input
project: PathoFact_update_trial # requires user input project: PathoFact_update_trial_new # requires user input
datadir: ../test_dataset # requires user input datadir: ../test_dataset # requires user input
workflow: "complete" workflow: "complete"
size_fasta: 1000 size_fasta: 1000
......
...@@ -3,15 +3,31 @@ ...@@ -3,15 +3,31 @@
import glob import glob
import os import os
# Split sequences for signalp v5.0 (max. 5000 seq)
checkpoint splittingsignalP:
input:
"{datadir}/{project}/renamed/{sample}_ID.faa"
output:
splits=directory("{datadir}/{project}/SignalP/splitted/{sample}_dir/")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} 2000 {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}_dir
"""
#Run SignalP on split sequence files #Run SignalP on split sequence files
rule signalp: rule signalp_gramp:
input: input:
"{datadir}/{project}/renamed/{sample}_ID.faa" "{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output: output:
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5", SignalP_gramP="{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5"
SignalP_gramN="{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
log:
"{datadir}/{project}/SignalP/{sample}.log"
message: message:
"Running SignalP analysis on {input} resulting in {output}." "Running SignalP analysis on {input} resulting in {output}."
params: params:
...@@ -21,10 +37,70 @@ rule signalp: ...@@ -21,10 +37,70 @@ rule signalp:
shell: shell:
""" """
export PATH={config[pathofact][signalp]}:$PATH export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramp -batch {config[pathofact][size_fasta]} signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram+/{wildcards.sample}/{wildcards.file_i}
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramn -batch {config[pathofact][size_fasta]}
""" """
rule signalp_gramn:
input:
"{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output:
SignalP_gramN="{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram-/{wildcards.sample}/{wildcards.file_i}
"""
def aggregate_signalpP_input(wildcards):
checkpoint_output= checkpoints.splittingsignalP.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
def aggregate_signalpN_input(wildcards):
checkpoint_output= checkpoints.splittingsignalP.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
rule SignalPP_aggregate:
input:
aggregate_signalpP_input
output:
"{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rule SignalPN_aggregate:
input:
aggregate_signalpN_input
output:
"{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rule aggregate_signalP: rule aggregate_signalP:
input: input:
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5", SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment