Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Laura Denies
PathoFact
Commits
4e54bc02
Commit
4e54bc02
authored
Jun 19, 2020
by
Laura Denies
Browse files
updated signalp to v5.0
parent
21195b19
Changes
7
Hide whitespace changes
Inline
Side-by-side
config.yaml
View file @
4e54bc02
...
...
@@ -2,11 +2,10 @@ pathofact:
sample
:
[
"
test_sample"
]
# requires user input
project
:
PathoFact_update_trial
# requires user input
datadir
:
../test_dataset
# requires user input
workflow
:
"
AMR
"
workflow
:
"
Tox
"
size_fasta
:
1000
scripts
:
"
scripts"
signalp
:
"
/path/to/signalp-4.1/signalp"
# requires user input
deeparg
:
"
submodules/deeparg-ss/deepARG.py"
signalp
:
"
/work/projects/ecosystem_biology/local_tools/SignalP/signalp-5.0b/bin"
# requires user input
deepvirfinder
:
"
submodules/DeepVirFinder/dvf.py"
tox_hmm
:
"
databases/toxins/combined_Toxin.hmm"
tox_lib
:
"
databases/library_HMM_Toxins.csv"
...
...
rules/Toxin/Combine_Toxin_SignalP.smk
View file @
4e54bc02
...
...
@@ -12,7 +12,7 @@ rule R_script:
input:
input_HMM="{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R.csv",
translation="{datadir}/{project}/renamed/{sample}_translation.tsv",
signalP="{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
xt
",
signalP="{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
sv
",
library=config["pathofact"]["tox_lib"]
output:
gene_library=temp("{datadir}/{project}/TOXIN/R_output/{sample}_gene_table_library.csv"),
...
...
rules/Universal/SignalP.smk
View file @
4e54bc02
...
...
@@ -6,72 +6,38 @@ import os
#Run SignalP on split sequence files
rule signalp:
input:
"{datadir}/{project}/
splitt
ed/{sample}
/{file_i}.fasta"
"{datadir}/{project}/
renam
ed/{sample}
_ID.faa"
output:
"{datadir}/{project}/SignalP/{sample}/{file_i}.txt"
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
SignalP_gramN="{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
log:
"{datadir}/{project}/SignalP/{sample}
/{file_i}
.log"
"{datadir}/{project}/SignalP/{sample}.log"
message:
"Running SignalP analysis on {input} resulting in {output}."
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"{config[pathofact][signalp]} -t gram+ {input} > {output} 2> {log}"
#adjust format of signalP files
rule SignalP_format:
input:
"{datadir}/{project}/SignalP/{sample}/{file_i}.txt"
output:
"{datadir}/{project}/SignalP/format/{sample}/{file_i}.txt"
message:
"Apply correct format to {input} to create {output}"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
sed '1,2d' {input} > {output}
"""
rule signalP_modified:
input:
"{datadir}/{project}/SignalP/format/{sample}/{file_i}.txt"
output:
"{datadir}/{project}/SignalP/modified/{sample}/{file_i}.txt"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
awk '{{print $1"\\t"$10}}' {input} > {output}
export PATH={config[pathofact][signalp]}:$PATH
signalp -fasta {input} -org gram+ -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramp -batch {config[pathofact][size_fasta]}
signalp -fasta {input} -org gram- -prefix {wildcards.datadir}/{wildcards.project}/SignalP/{wildcards.sample}/gramn -batch {config[pathofact][size_fasta]}
"""
def aggregate_input(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
return expand(
"{datadir}/{project}/SignalP/modified/{sample}/{file_i}.txt",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
# Join multiple SignalP files in a single file
rule aggregate:
rule aggregate_signalP:
input:
aggregate_input
SignalP_gramP="{datadir}/{project}/SignalP/{sample}/gramp_summary.signalp5",
SignalP_gramN="{datadir}/{project}/SignalP/{sample}/gramn_summary.signalp5"
output:
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
xt
"
SignalP_report=
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
sv
"
message:
"concatenate multiple split signalP files in a single joined file: {output}"
log:
"{datadir}/{project}/SignalP/{sample}_SignalP_temp.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}
"
conda: "../../envs/R.yaml"
script: "../../scripts/SignalP.R
"
scripts/SignalP.R
0 → 100644
View file @
4e54bc02
#!/usr/bin/env R
# logging
sink
(
file
=
file
(
snakemake
@
log
[[
1
]],
open
=
"wt"
),
type
=
"message"
)
library
(
tidyverse
)
V5_p
<-
read.delim
(
file
=
snakemake
@
input
[[
"SignalP_gramP"
]],
header
=
FALSE
,
comment.char
=
"#"
)
V5_p
$
V5_p_SignalP
<-
ifelse
(
V5_p
$
V2
==
"OTHER"
,
"N"
,
"Y"
)
V5_p
<-
V5_p
%>%
select
(
1
,
2
,
8
)
colnames
(
V5_p
)
<-
c
(
"ID"
,
"V5_p_Prediction"
,
"V5_p_SignalP"
)
V5_n
<-
read.delim
(
file
=
snakemake
@
input
[[
"SignalP_gramN"
]],
header
=
FALSE
,
comment.char
=
"#"
)
V5_n
$
V5_n_SignalP
<-
ifelse
(
V5_n
$
V2
==
"OTHER"
,
"N"
,
"Y"
)
V5_n
<-
V5_n
%>%
select
(
1
,
2
,
8
)
colnames
(
V5_n
)
<-
c
(
"ID"
,
"V5_n_Prediction"
,
"V5_n_SignalP"
)
SignalP
<-
full_join
(
V5_p
,
V5_n
)
SignalP
$
SignalPeptide
<-
ifelse
(
SignalP
$
V5_p_SignalP
==
"Y"
|
SignalP
$
V5_n_SignalP
==
"Y"
,
"Y"
,
"N"
)
SignalP
$
Organism
<-
ifelse
(
SignalP
$
V5_p_SignalP
==
"Y"
&
SignalP
$
V5_n_SignalP
==
"Y"
,
"gram+ or gram-"
,
ifelse
(
SignalP
$
V5_p_SignalP
==
"Y"
&
SignalP
$
V5_n_SignalP
==
"N"
,
"gram+"
,
ifelse
(
SignalP
$
V5_p_SignalP
==
"N"
&
SignalP
$
V5_n_SignalP
==
"Y"
,
"gram-"
,
"-"
)))
SignalP
$
compare
<-
as.character
(
SignalP
$
V5_p_Prediction
)
==
as.character
(
SignalP
$
V5_n_Prediction
)
SignalP
$
SignalP_prediction
<-
ifelse
(
SignalP
$
Organism
==
"gram+ or gram-"
&
SignalP
$
compare
==
"FALSE"
,
paste
(
SignalP
$
V5_p_Prediction
,
SignalP
$
V5_n_Prediction
,
sep
=
" / "
),
ifelse
(
SignalP
$
SignalPeptide
==
"Y"
&
SignalP
$
compare
==
"TRUE"
,
as.character
(
SignalP
$
V5_p_Prediction
),
"-"
))
SignalP
<-
SignalP
%>%
select
(
1
,
6
,
9
,
7
)
write.table
(
SignalP
,
file
=
snakemake
@
output
[[
"SignalP_report"
]],
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
scripts/ownHMM_library.R
View file @
4e54bc02
...
...
@@ -11,12 +11,9 @@ sink(file=file(snakemake@log[[1]], open="wt"), type="message")
gene_table_HMM
<-
read.delim
(
file
=
snakemake
@
input
[[
"input_HMM"
]],
comment.char
=
"#"
)
translation
<-
read.delim
(
file
=
snakemake
@
input
[[
"translation"
]],
header
=
FALSE
)
signalP
<-
read.table
(
file
=
snakemake
@
input
[[
"signalP"
]],
sep
=
"\t"
)
signalP
<-
read.table
(
file
=
snakemake
@
input
[[
"signalP"
]],
sep
=
"\t"
,
header
=
TRUE
)
gene_table_HMM
<-
gene_table_HMM
[,
c
(
1
,
2
,
4
,
3
)]
colnames
(
signalP
)
<-
c
(
"Query_sequence"
,
"SignalP"
)
colnames
(
translation
)
<-
c
(
"ID"
,
"Query_sequence"
)
translation
<-
translation
[,
c
(
2
,
1
)]
...
...
@@ -52,7 +49,7 @@ gene_table_Toxic$Prediction<-gsub(1,"pathogenic",gene_table_Toxic$Prediction)
gene_table_Toxic
<-
as.data.frame
(
gene_table_Toxic
)
#Merge SignalP and Toxic HMM
gene_table_Toxic
<-
merge.data.frame
(
gene_table_Toxic
,
signalP
,
by
=
"Query_sequence"
)
gene_table_Toxic
<-
merge.data.frame
(
gene_table_Toxic
,
signalP
,
by
.x
=
"Query_sequence"
,
by.y
=
"ID"
)
gene_table_Toxic
$
Query_sequence
<-
sprintf
(
"%010d"
,
as.numeric
(
gene_table_Toxic
$
Query_sequence
))
write.csv
(
gene_table_Toxic
,
file
=
snakemake
@
output
[[
"gene_toxic"
]])
workflows/Toxin_workflow.smk
View file @
4e54bc02
...
...
@@ -15,7 +15,7 @@ rule Analysis:
input:
expand(
[
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
xt
",
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
sv
",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/Toxin_prediction_{sample}_report.csv"
],
...
...
workflows/Virulence_workflow.smk
View file @
4e54bc02
...
...
@@ -15,7 +15,7 @@ rule Analysis_Virulence:
input:
expand(
[
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
xt
",
"{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.t
sv
",
"{datadir}/{project}/Virulence_prediction_{sample}_report.csv"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment