Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 72f0b27f authored by Laura Denies's avatar Laura Denies
Browse files

update signalp rules

parent 1a2c8a22
pathofact:
sample: ["test_sample"] # requires user input
project: PathoFact_update_trial # requires user input
project: PathoFact_update_trial_new # requires user input
datadir: ../test_dataset # requires user input
workflow: "complete"
size_fasta: 1000
......
......@@ -3,15 +3,31 @@
import glob
import os
# Split sequences for signalp v5.0 (max. 5000 seq)
checkpoint splittingsignalP:
input:
"{datadir}/{project}/renamed/{sample}_ID.faa"
output:
splits=directory("{datadir}/{project}/SignalP/splitted/{sample}_dir/")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} 2000 {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}_dir
"""
#Run SignalP on split sequence files
rule signalp:
rule signalp_gramp:
input:
"{datadir}/{project}/renamed/{sample}_ID.faa"
"{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output:
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
SignalP_gramN="{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
log:
"{datadir}/{project}/SignalP/{sample}.log"
SignalP_gramP="{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
......@@ -21,10 +37,70 @@ rule signalp:
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramp -batch {config[pathofact][size_fasta]}
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramn -batch {config[pathofact][size_fasta]}
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram+/{wildcards.sample}/{wildcards.file_i}
"""
rule signalp_gramn:
input:
"{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output:
SignalP_gramN="{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram-/{wildcards.sample}/{wildcards.file_i}
"""
def aggregate_signalpP_input(wildcards):
checkpoint_output= checkpoints.splittingsignalP.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
def aggregate_signalpN_input(wildcards):
checkpoint_output= checkpoints.splittingsignalP.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
rule SignalPP_aggregate:
input:
aggregate_signalpP_input
output:
"{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rule SignalPN_aggregate:
input:
aggregate_signalpN_input
output:
"{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rule aggregate_signalP:
input:
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment