Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Laura Denies
PathoFact
Commits
46961fc8
Commit
46961fc8
authored
Apr 30, 2020
by
Valentina Galata
Browse files
replaced index by name when accessing snakemake variables (issue
#29
)
parent
0ba12560
Changes
12
Hide whitespace changes
Inline
Side-by-side
rules/Toxin/Combine_Toxin_SignalP.smk
View file @
46961fc8
...
...
@@ -132,10 +132,10 @@ rule combine_confidence:
output:
"{OUTDIR}/{project}/TOXIN/Toxin_prediction/{input_file}_Confidence_HMM_SignalP_ensembled.csv"
message:
"The different confidence files ({input
[0]}, {input[1]}, {input[2]
}) are combined in a single output file: {output}"
"The different confidence files ({input}) are combined in a single output file: {output}"
params:
outdir="{OUTDIR}"
shell:
"""
cat {input
[0]} {input[1]} {input[2]
} | sort > {output}
cat {input} | sort > {output}
"""
rules/Toxin/Toxin.smk
View file @
46961fc8
...
...
@@ -16,7 +16,7 @@ rule run_HMM:
log:
"{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.log"
message:
"Run HMM scan on {input
[1]
} to generate {output}"
"Run HMM scan on {input
.renamed
} to generate {output}"
params:
outdir="{OUTDIR}"
conda:
...
...
@@ -25,7 +25,7 @@ rule run_HMM:
1
shell:
"""
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input
[0]
} {input
[1]
} &> {log}
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input
.hmm
} {input
.renamed
} &> {log}
"""
# Adjust HMM results to correct format
...
...
rules/Toxin/Toxin_report.smk
View file @
46961fc8
...
...
@@ -21,7 +21,7 @@ rule merge_final:
outdir="{OUTDIR}"
shell:
"""
join -t $'\\t' <(sort {input
[0]
}) <(sort {input
[1]
}) > {output}
join -t $'\\t' <(sort {input
.translation
}) <(sort {input
.prediction
}) > {output}
"""
rule toxin_report:
...
...
rules/Universal/Preprocessing.smk
View file @
46961fc8
...
...
@@ -38,7 +38,7 @@ rule generate_translation:
outdir="{OUTDIR}"
shell:
"""
paste {input
[0]} {input[1]
} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
paste {input
.renamed} {input.original
} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
"""
###############################
...
...
rules/Universal/Preprocessing_contig.smk
View file @
46961fc8
...
...
@@ -38,5 +38,5 @@ rule generate_ContigTranslation:
outdir="{OUTDIR}"
shell:
"""
paste {input
[0]} {input[1]
} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
paste {input
.renamed} {input.original
} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
"""
rules/Virulence/Combine_Virulence_SignalP.smk
View file @
46961fc8
...
...
@@ -18,7 +18,7 @@ rule merge_SignalPVir:
"{OUTDIR}/{project}/VIRULENCE/virulence_merged/{input_file}_virulence_SignalP_prediction.tsv"
shell:
"""
join -t $'\\t' <(sort {input
[0]
}) <(sort {input
[1]
}) > {output
[0]
}
join -t $'\\t' <(sort {input
.Vir
}) <(sort {input
.SignalP
}) > {output}
"""
###########################################################
...
...
@@ -90,4 +90,4 @@ rule SignalVir_virulence_prediction:
output:
"{OUTDIR}/{project}/VIRULENCE/Virulence_prediction/{input_file}_Confidence_Virulence_ensembled.csv"
shell:
"cat {input
[0]} {input[1]} {input[2]} {input[3]} {input[4]
} > {output
[0]
}"
"cat {input} > {output}"
rules/Virulence/Virulence.smk
View file @
46961fc8
...
...
@@ -16,7 +16,7 @@ rule run_HMM_vir:
log:
"{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}/{file_i}.log"
message:
"Run HMM scan on {input
[1]
} to generate {output}"
"Run HMM scan on {input
.renamed
} to generate {output}"
params:
outdir="{OUTDIR}"
conda:
...
...
@@ -25,7 +25,7 @@ rule run_HMM_vir:
12
shell:
"""
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input
[0]
} {input
[1]
} &> {log}
{config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input
.hmm
} {input
.renamed
} &> {log}
"""
# Adjust HMM results to correct format
...
...
@@ -121,7 +121,7 @@ rule HMM_VIR_report:
temp("{OUTDIR}/{project}/VIRULENCE/HMM_virulence/{input_file}_hmm_prediction_intermediate.tsv")
shell:
"""
cat {input
[0]} {input[1]} {input[2]
} | sort > {output}
cat {input} | sort > {output}
"""
rule HMM_VIR_finalformat:
...
...
@@ -222,7 +222,7 @@ rule join_matrix:
fi
}}
xjoin {input
[0]} {input[1]} {input[2]} {input[3]} {input[4]
} | sort -k 1n,1 > {output}
xjoin {input} | sort -k 1n,1 > {output}
"""
rule classifier:
...
...
@@ -277,7 +277,7 @@ rule merge_virulence:
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_Virulence_prediction.tsv"
shell:
"join -t $'\\t' <(sort {input
[0]
}) <(sort {input
[1]
}) > {output};"
"join -t $'\\t' <(sort {input
.hmm
}) <(sort {input
.classifier
}) > {output};"
"sed -i $'1 i\\\ ID\\tHMM_prediction\\tmodel_prediction' {output}"
rule virulence_nonpath:
...
...
@@ -351,4 +351,4 @@ rule merge_combined:
output:
"{OUTDIR}/{project}/VIRULENCE/HMM_classifier_virulence/{input_file}_virulence_final_prediction.tsv"
shell:
"cat {input
[0]} {input[1]} {input[2]} {input[3]} {input[4]} {input[5]
} > {output}"
"cat {input} > {output}"
rules/Virulence/Virulence_report.smk
View file @
46961fc8
...
...
@@ -22,7 +22,7 @@ rule Virulence_merge_final:
outdir="{OUTDIR}"
shell:
"""
join -t $'\\t' <(sort {input
[0]
}) <(sort {input
[1]
}) > {output}
join -t $'\\t' <(sort {input
.translation
}) <(sort {input
.prediction
}) > {output}
"""
rule virulence_report:
...
...
scripts/AMR_MGE.R
View file @
46961fc8
...
...
@@ -6,28 +6,28 @@ sink(file=file(snakemake@log[[1]], open="wt"), type="message")
library
(
tidyverse
)
# AMR prediction
AMR
<-
read.delim
(
file
=
snakemake
@
input
[[
1
]],
header
=
FALSE
,
comment.char
=
"#"
)
AMR
<-
read.delim
(
file
=
snakemake
@
input
[[
"AMR"
]],
header
=
FALSE
,
comment.char
=
"#"
)
AMR
<-
AMR
%>%
select
(
1
,
4
,
5
)
colnames
(
AMR
)
<-
c
(
"ARG"
,
"GeneID"
,
"Category"
)
AMR_translation
<-
read.delim
(
file
=
snakemake
@
input
[[
2
]],
header
=
FALSE
)
AMR_translation
<-
read.delim
(
file
=
snakemake
@
input
[[
"AMR_translation"
]],
header
=
FALSE
)
colnames
(
AMR_translation
)
<-
c
(
"GeneID"
,
"Gene_Name"
)
AMR_translation
$
Gene_Name
<-
sub
(
'>'
,
''
,
AMR_translation
$
Gene_Name
)
# Plasmid Prediction
Plasmid
<-
read.delim
(
file
=
snakemake
@
input
[[
3
]],
header
=
TRUE
)
Plasmid
<-
read.delim
(
file
=
snakemake
@
input
[[
"Plasmid"
]],
header
=
TRUE
)
Plasmid
<-
separate
(
data
=
Plasmid
,
col
=
label
,
into
=
c
(
"left"
,
"right"
),
sep
=
"\\."
)
Plasmid
<-
Plasmid
%>%
select
(
1
,
2
)
colnames
(
Plasmid
)
<-
c
(
"ContigID"
,
"Plasmid_prediction"
)
Plasmid
<-
Plasmid
%>%
filter
(
ContigID
!=
"contig_name"
)
Plasmid
$
ContigID
<-
sprintf
(
"%010d"
,
Plasmid
$
ContigID
)
Plasmid_translation
<-
read.delim
(
file
=
snakemake
@
input
[[
4
]],
header
=
FALSE
)
Plasmid_translation
<-
read.delim
(
file
=
snakemake
@
input
[[
"Contig_translation"
]],
header
=
FALSE
)
colnames
(
Plasmid_translation
)
<-
c
(
"ContigID"
,
"Contig_Name"
)
Plasmid_translation
$
Contig_Name
<-
sub
(
'>'
,
''
,
Plasmid_translation
$
Contig_Name
)
# Combine AMR and Plasmid prediction
Contig_gene_list
<-
read.delim
(
file
=
snakemake
@
input
[[
5
]],
header
=
FALSE
)
Contig_gene_list
<-
read.delim
(
file
=
snakemake
@
input
[[
"Contig_gene_list"
]],
header
=
FALSE
)
colnames
(
Contig_gene_list
)
<-
c
(
"Contig_Name"
,
"Gene_Name"
)
Contig_gene_list
<-
merge
(
Contig_gene_list
,
AMR_translation
,
by
=
"Gene_Name"
,
all
=
TRUE
)
Contig_gene_list
<-
merge
(
Contig_gene_list
,
Plasmid_translation
,
by
=
"Contig_Name"
,
all
=
TRUE
)
...
...
@@ -42,7 +42,7 @@ AMR_Plasmid$Plasmid_prediction <- fct_explicit_na(AMR_Plasmid$Plasmid_prediction
# Phage prediction
## VirFinder
VirFinder
<-
read.delim
(
file
=
snakemake
@
input
[[
6
]])
VirFinder
<-
read.delim
(
file
=
snakemake
@
input
[[
"VirFinder"
]])
VirFinder
<-
VirFinder
%>%
select
(
1
,
3
,
4
)
colnames
(
VirFinder
)
<-
c
(
"ContigID"
,
"VirFinder_score"
,
"VirFinder_pvalue"
)
VirFinder
<-
VirFinder
%>%
filter
(
ContigID
!=
"name"
)
...
...
@@ -52,7 +52,7 @@ VirFinder$VirFinder_prediction <- ifelse(VirFinder$VirFinder_pvalue <=0.05 & Vir
VirFinder
$
ContigID
<-
sprintf
(
"%010d"
,
VirFinder
$
ContigID
)
## VirSorter
vs.pred
<-
read.csv
(
file
=
snakemake
@
input
[[
7
]],
quote
=
""
,
header
=
FALSE
)
vs.pred
<-
read.csv
(
file
=
snakemake
@
input
[[
"VirSorter"
]],
quote
=
""
,
header
=
FALSE
)
vs.head
<-
setNames
(
data.frame
(
matrix
(
ncol
=
12
,
nrow
=
0
)),
c
(
"Contig_id"
,
"Nb.genes.contigs"
,
"Fragment"
,
"Nb.genes"
,
"Category"
,
"Nb.phage.hallmark.genes"
,
"Phage.gene.enrichment.sig"
,
"Non-Caudovirales.phage.gene.enrichment.sig"
,
"Pfam.depletion.sig"
,
"Uncharacterized.enrichment.sig"
,
"Strand.switch.depletion.sig"
,
"Short.genes.enrichment.sig"
))
colnames
(
vs.pred
)
<-
colnames
(
vs.head
)
colnames
(
vs.pred
)[
1
]
<-
"vs.id"
...
...
@@ -84,7 +84,7 @@ AMR_MGE <- AMR_MGE %>% mutate_if(is.factor, fct_explicit_na, na_level = "-")
AMR_MGE
<-
AMR_MGE
%>%
select
(
2
,
4
,
3
,
5
,
6
,
7
,
10
,
11
)
AMR_MGE
$
GeneID
<-
sprintf
(
"%010d"
,
AMR_MGE
$
GeneID
)
write.csv
(
AMR_MGE
,
file
=
snakemake
@
output
[[
1
]],
row.names
=
FALSE
)
write.csv
(
AMR_MGE
,
file
=
snakemake
@
output
[[
"Report_1"
]],
row.names
=
FALSE
)
AMR_MGE_report
<-
AMR_MGE
AMR_MGE_report
$
origin
<-
ifelse
(
AMR_MGE_report
$
Plasmid_prediction
==
"chromosome"
&
AMR_MGE_report
$
VirFinder_prediction
==
"-"
&
AMR_MGE_report
$
VirSorter_prediction
==
"-"
,
"chromosome"
,
...
...
@@ -93,6 +93,6 @@ AMR_MGE_report$origin <- ifelse(AMR_MGE_report$Plasmid_prediction == "chromosome
ifelse
(
AMR_MGE_report
$
Plasmid_prediction
==
"unclassified"
&
AMR_MGE_report
$
VirFinder_prediction
==
"-"
&
AMR_MGE_report
$
VirSorter_prediction
==
"-"
,
"unclassified"
,
"ambiguous"
))))
AMR_MGE_report
<-
AMR_MGE_report
%>%
select
(
1
,
2
,
3
,
4
,
5
,
9
)
write.csv
(
AMR_MGE_report
,
file
=
snakemake
@
output
[[
2
]],
row.names
=
FALSE
)
write.csv
(
AMR_MGE_report
,
file
=
snakemake
@
output
[[
"Report_2"
]],
row.names
=
FALSE
)
scripts/PathoFact.R
View file @
46961fc8
...
...
@@ -6,21 +6,21 @@ sink(file=file(snakemake@log[[1]], open="wt"), type="message")
library
(
tidyverse
)
# load virulence prediction
Virulence_factors
<-
read.delim
(
file
=
snakemake
@
input
[[
1
]])
Virulence_factors
<-
read.delim
(
file
=
snakemake
@
input
[[
"Virulence_factor"
]])
Virulence_factors
<-
Virulence_factors
%>%
select
(
1
,
5
,
7
)
colnames
(
Virulence_factors
)
<-
c
(
"GeneID"
,
"Virulence_factor_prediction"
,
"Virulence_confidence_level"
)
Virulence_factors
$
GeneID
<-
as.numeric
(
as.character
(
Virulence_factors
$
GeneID
))
Virulence_factors
$
GeneID
<-
sprintf
(
"%010d"
,
Virulence_factors
$
GeneID
)
# load toxin prediction
Toxins
<-
read.delim
(
file
=
snakemake
@
input
[[
2
]])
Toxins
<-
read.delim
(
file
=
snakemake
@
input
[[
"Toxins"
]])
Toxins
<-
Toxins
%>%
select
(
1
,
4
,
5
,
6
)
colnames
(
Toxins
)
<-
c
(
"GeneID"
,
"Toxin_prediction"
,
"SignalP"
,
"Toxin_confidence_level"
)
Toxins
$
GeneID
<-
as.numeric
(
as.character
(
Toxins
$
GeneID
))
Toxins
$
GeneID
<-
sprintf
(
"%010d"
,
Toxins
$
GeneID
)
# load AMR prediction
AMR_MGE
<-
read.delim
(
file
=
snakemake
@
input
[[
3
]])
AMR_MGE
<-
read.delim
(
file
=
snakemake
@
input
[[
"AMR_MGE"
]])
colnames
(
AMR_MGE
)
<-
c
(
"GeneID"
,
"Gene_Name"
,
"Contig_Name"
,
"ARG_prediction"
,
"AMR_Category"
,
"MGE_prediction"
)
# Combine files
...
...
scripts/hmm.R
View file @
46961fc8
...
...
@@ -6,13 +6,13 @@ sink(file=file(snakemake@log[[1]], open="wt"), type="message")
library
(
tidyverse
)
library
(
reshape2
)
HMM_results
<-
read.delim
(
file
=
snakemake
@
input
[[
1
]],
header
=
FALSE
)
ID
<-
read.delim
(
file
=
snakemake
@
input
[[
5
]],
header
=
FALSE
)
HMM_results
<-
read.delim
(
file
=
snakemake
@
input
[[
"hmmresults"
]],
header
=
FALSE
)
ID
<-
read.delim
(
file
=
snakemake
@
input
[[
"ID"
]],
header
=
FALSE
)
HMM_results
<-
merge
(
HMM_results
,
ID
,
by.x
=
"V1"
,
by.y
=
"V1"
,
all.y
=
TRUE
)
positive
<-
read.table
(
file
=
snakemake
@
input
[[
2
]],
quote
=
"\""
,
comment.char
=
""
)
negative
<-
read.table
(
file
=
snakemake
@
input
[[
3
]],
quote
=
"\""
,
comment.char
=
""
)
shared
<-
read.table
(
file
=
snakemake
@
input
[[
4
]],
quote
=
"\""
,
comment.char
=
""
)
positive
<-
read.table
(
file
=
snakemake
@
input
[[
"positive"
]],
quote
=
"\""
,
comment.char
=
""
)
negative
<-
read.table
(
file
=
snakemake
@
input
[[
"negative"
]],
quote
=
"\""
,
comment.char
=
""
)
shared
<-
read.table
(
file
=
snakemake
@
input
[[
"shared"
]],
quote
=
"\""
,
comment.char
=
""
)
positive
$
class
<-
"positive"
negative
$
class
<-
"negative"
shared
$
class
<-
"shared"
...
...
scripts/ownHMM_library.R
View file @
46961fc8
...
...
@@ -9,9 +9,9 @@ sink(file=file(snakemake@log[[1]], open="wt"), type="message")
### Input of the different files ###
####################################
gene_table_HMM
<-
read.delim
(
file
=
snakemake
@
input
[[
1
]],
comment.char
=
"#"
)
translation
<-
read.delim
(
file
=
snakemake
@
input
[[
2
]],
header
=
FALSE
)
signalP
<-
read.table
(
file
=
snakemake
@
input
[[
3
]],
sep
=
"\t"
)
gene_table_HMM
<-
read.delim
(
file
=
snakemake
@
input
[[
"input_HMM"
]],
comment.char
=
"#"
)
translation
<-
read.delim
(
file
=
snakemake
@
input
[[
"translation"
]],
header
=
FALSE
)
signalP
<-
read.table
(
file
=
snakemake
@
input
[[
"signalP"
]],
sep
=
"\t"
)
gene_table_HMM
<-
gene_table_HMM
[,
c
(
1
,
2
,
4
,
3
)]
...
...
@@ -21,7 +21,7 @@ colnames(translation)<-c("ID","Query_sequence")
translation
<-
translation
[,
c
(
2
,
1
)]
#Description file of the HMM
library_HMM
<-
read.table
(
file
=
snakemake
@
input
[[
4
]],
sep
=
";"
,
header
=
T
,
quote
=
'#'
)
library_HMM
<-
read.table
(
file
=
snakemake
@
input
[[
"library"
]],
sep
=
";"
,
header
=
T
,
quote
=
'#'
)
colnames
(
library_HMM
)
<-
c
(
"HMM_ID"
,
"NAME"
,
"Alternative_name"
,
"Database"
,
"Description"
)
gene_table_library
<-
merge.data.frame
(
gene_table_HMM
,
library_HMM
,
by.x
=
"HMM_Name"
,
by.y
=
"HMM_ID"
)
gene_table_library
<-
gene_table_library
[,
c
(
2
,
1
,
3
,
4
,
5
,
6
,
7
,
8
)]
...
...
@@ -29,7 +29,7 @@ gene_table_library<-gene_table_library[,c(2,1,3,4,5,6,7,8)]
gene_table_library
<-
merge.data.frame
(
translation
,
gene_table_library
,
by.x
=
"ID"
,
by.y
=
"Query_sequence"
)
gene_table_library
$
ID
<-
sprintf
(
"%010d"
,
as.numeric
(
gene_table_library
$
ID
))
write.csv
(
gene_table_library
,
file
=
snakemake
@
output
[[
1
]])
write.csv
(
gene_table_library
,
file
=
snakemake
@
output
[[
"gene_library"
]])
#Toxin/Non-Toxin file
frequency
<-
data.frame
(
table
(
gene_table_HMM
[
1
]))
...
...
@@ -55,4 +55,4 @@ gene_table_Toxic<-as.data.frame(gene_table_Toxic)
gene_table_Toxic
<-
merge.data.frame
(
gene_table_Toxic
,
signalP
,
by
=
"Query_sequence"
)
gene_table_Toxic
$
Query_sequence
<-
sprintf
(
"%010d"
,
as.numeric
(
gene_table_Toxic
$
Query_sequence
))
write.csv
(
gene_table_Toxic
,
file
=
snakemake
@
output
[[
2
]])
write.csv
(
gene_table_Toxic
,
file
=
snakemake
@
output
[[
"gene_toxic"
]])
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment