Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Laura Denies
PathoFact
Commits
72f0b27f
Commit
72f0b27f
authored
Jul 06, 2020
by
Laura Denies
Browse files
update signalp rules
parent
1a2c8a22
Changes
2
Hide whitespace changes
Inline
Side-by-side
config.yaml
View file @
72f0b27f
pathofact
:
sample
:
[
"
test_sample"
]
# requires user input
project
:
PathoFact_update_trial
# requires user input
project
:
PathoFact_update_trial
_new
# requires user input
datadir
:
../test_dataset
# requires user input
workflow
:
"
complete"
size_fasta
:
1000
...
...
rules/Universal/SignalP.smk
View file @
72f0b27f
...
...
@@ -3,15 +3,31 @@
import glob
import os
# Split sequences for signalp v5.0 (max. 5000 seq)
checkpoint splittingsignalP:
input:
"{datadir}/{project}/renamed/{sample}_ID.faa"
output:
splits=directory("{datadir}/{project}/SignalP/splitted/{sample}_dir/")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} 2000 {wildcards.datadir}/{wildcards.project}/SignalP/splitted/{wildcards.sample}_dir
"""
#Run SignalP on split sequence files
rule signalp:
rule signalp
_gramp
:
input:
"{datadir}/{project}/
renamed/{sample}_ID
.faa"
"{datadir}/{project}/
SignalP/splitted/{sample}_dir/{file_i}
.fa
st
a"
output:
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
SignalP_gramN="{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
log:
"{datadir}/{project}/SignalP/{sample}.log"
SignalP_gramP="{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
...
...
@@ -21,10 +37,70 @@ rule signalp:
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramp -batch {config[pathofact][size_fasta]}
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramn -batch {config[pathofact][size_fasta]}
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram+/{wildcards.sample}/{wildcards.file_i}
"""
rule signalp_gramn:
input:
"{datadir}/{project}/SignalP/splitted/{sample}_dir/{file_i}.fasta"
output:
SignalP_gramN="{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/Gram-/{wildcards.sample}/{wildcards.file_i}
"""
def aggregate_signalpP_input(wildcards):
checkpoint_output= checkpoints.splittingsignalP.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/Gram+/{sample}/{file_i}_summary.signalp5",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
def aggregate_signalpN_input(wildcards):
checkpoint_output= checkpoints.splittingsignalP.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/Gram-/{sample}/{file_i}_summary.signalp5",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
rule SignalPP_aggregate:
input:
aggregate_signalpP_input
output:
"{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rule SignalPN_aggregate:
input:
aggregate_signalpN_input
output:
"{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rule aggregate_signalP:
input:
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment