Commit 97113017 authored by Laura Denies's avatar Laura Denies
Browse files

update PathoFact report

parent a09f2eaf
...@@ -11,7 +11,7 @@ if config["pathofact"]["workflow"] == "complete": ...@@ -11,7 +11,7 @@ if config["pathofact"]["workflow"] == "complete":
[ [
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv", "{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv", "{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.csv" "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
], ],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"] datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
) )
......
...@@ -2,7 +2,7 @@ pathofact: ...@@ -2,7 +2,7 @@ pathofact:
sample: ["test_sample"] # requires user input sample: ["test_sample"] # requires user input
project: PathoFact_update_trial # requires user input project: PathoFact_update_trial # requires user input
datadir: ../test_dataset # requires user input datadir: ../test_dataset # requires user input
workflow: "Vir" workflow: "complete"
size_fasta: 1000 size_fasta: 1000
scripts: "scripts" scripts: "scripts"
signalp: "/work/projects/ecosystem_biology/local_tools/SignalP/signalp-5.0b/bin" # requires user input signalp: "/work/projects/ecosystem_biology/local_tools/SignalP/signalp-5.0b/bin" # requires user input
......
#Toxin
import glob
import os
#################################
# Combine Toxin HMM and SignalP #
#################################
# Put Toxin HMM results in the correct format & join SignalP and Toxin HMM files
rule R_script:
input:
input_HMM="{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R.csv",
translation="{datadir}/{project}/renamed/{sample}_translation.tsv",
signalP="{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.tsv",
library=config["pathofact"]["tox_lib"]
output:
gene_library="{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
gene_toxic="{datadir}/{project}/Toxin_prediction_{sample}_report.tsv"
log:
"{datadir}/{project}/TOXIN/R_output/{sample}_gene_table_library.log"
message:
"Run external R script to join SignalP and ToxinHMM and create Toxin report (incl. confidence levels)"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/R.yaml"
script:
"../../scripts/ownHMM_library.R"
...@@ -5,11 +5,11 @@ import os ...@@ -5,11 +5,11 @@ import os
rule combine_PathoFact: rule combine_PathoFact:
input: input:
Virulence_factor= "{datadir}/{project}/Virulence_prediction_{sample}_report.csv", Virulence_factor= "{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
Toxins="{datadir}/{project}/Toxin_prediction_{sample}_report.csv", Toxins="{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
AMR_MGE="{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv" AMR_MGE="{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv"
output: output:
"{datadir}/{project}/PathoFact_{sample}_predictions.csv" PathoFact_report= "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
log: log:
"{datadir}/{project}/PathoFact_{sample}_predictions.log" "{datadir}/{project}/PathoFact_{sample}_predictions.log"
params: params:
......
...@@ -7,27 +7,24 @@ library(tidyverse) ...@@ -7,27 +7,24 @@ library(tidyverse)
# load virulence prediction # load virulence prediction
Virulence_factors <- read.delim(file=snakemake@input[["Virulence_factor"]]) Virulence_factors <- read.delim(file=snakemake@input[["Virulence_factor"]])
Virulence_factors <- Virulence_factors %>% select(1,5,7) Virulence_factors <- Virulence_factors %>% select(2,5,7)
colnames(Virulence_factors) <- c("GeneID","Virulence_factor_prediction","Virulence_confidence_level") Virulence_factors$ORF_ID <- sprintf("%010d", Virulence_factors$ORF_ID)
Virulence_factors$GeneID <- as.numeric(as.character(Virulence_factors$GeneID))
Virulence_factors$GeneID <- sprintf("%010d", Virulence_factors$GeneID)
# load toxin prediction # load toxin prediction
Toxins <- read.delim(file=snakemake@input[["Toxins"]]) Toxins <- read.delim(file=snakemake@input[["Toxins"]])
Toxins <- Toxins %>% select(1,4,5,6) Toxins <- Toxins %>% select(2,4,5,6)
colnames(Toxins) <- c("GeneID","Toxin_prediction", "SignalP", "Toxin_confidence_level") Toxins$ORF_ID <- sprintf("%010d", Toxins$ORF_ID)
Toxins$GeneID <- as.numeric(as.character(Toxins$GeneID))
Toxins$GeneID <- sprintf("%010d", Toxins$GeneID)
# load AMR prediction # load AMR prediction
AMR_MGE <- read.delim(file=snakemake@input[["AMR_MGE"]]) AMR_MGE <- read.delim(file=snakemake@input[["AMR_MGE"]])
colnames(AMR_MGE) <- c("GeneID","Gene_Name","Contig_Name","ARG_prediction","AMR_Category","MGE_prediction") AMR_MGE$ORF_ID <- sprintf("%010d", AMR_MGE$ORF_ID)
AMR_MGE$Contig_ID <- sprintf("%010d", AMR_MGE$Contig_ID)
AMR_MGE <- AMR_MGE %>% select(1:9,11)
# Combine files # Combine files
Predictions_dfs<-list(Toxins, Virulence_factors, AMR_MGE) Predictions_dfs<-list(Toxins, Virulence_factors, AMR_MGE)
PathoFact_predictions<-Reduce(full_join, Predictions_dfs) PathoFact_predictions<-Reduce(full_join, Predictions_dfs)
PathoFact_predictions <- PathoFact_predictions[,c(1,7,8,3,2,4,5,6,9,10,11)] PathoFact_predictions <- PathoFact_predictions[,c(1,9,8,7,2,4,3,5,6,10:15)]
PathoFact_predictions <- PathoFact_predictions %>% filter(GeneID != " NA")
write.csv(PathoFact_predictions, file=snakemake@output[[1]], row.names=FALSE)
write.table(PathoFact_predictions, file = snakemake@output[["PathoFact_report"]], sep="\t", row.names=FALSE, quote=FALSE)
...@@ -7,14 +7,10 @@ include: ...@@ -7,14 +7,10 @@ include:
'../rules/Virulence/Virulence.smk' '../rules/Virulence/Virulence.smk'
include: include:
'../rules/Virulence/Combine_Virulence_SignalP.smk' '../rules/Virulence/Combine_Virulence_SignalP.smk'
include:
'../rules/Virulence/Virulence_report.smk'
include: include:
'../rules/Toxin/Toxin.smk' '../rules/Toxin/Toxin.smk'
include: include:
'../rules/Toxin/Combine_Toxin_SignalP.smk' '../rules/Toxin/Combine_Toxin_SignalP.smk'
include:
'../rules/Toxin/Toxin_report.smk'
include: include:
'../rules/Universal/Preprocessing_contig.smk' '../rules/Universal/Preprocessing_contig.smk'
include: include:
...@@ -35,7 +31,7 @@ rule Analysis: ...@@ -35,7 +31,7 @@ rule Analysis:
[ [
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv", "{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv", "{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.csv" "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
], ],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"] datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment