Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 97113017 authored by Laura Denies's avatar Laura Denies
Browse files

update PathoFact report

parent a09f2eaf
......@@ -11,7 +11,7 @@ if config["pathofact"]["workflow"] == "complete":
[
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.csv"
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
......@@ -2,7 +2,7 @@ pathofact:
sample: ["test_sample"] # requires user input
project: PathoFact_update_trial # requires user input
datadir: ../test_dataset # requires user input
workflow: "Vir"
workflow: "complete"
size_fasta: 1000
scripts: "scripts"
signalp: "/work/projects/ecosystem_biology/local_tools/SignalP/signalp-5.0b/bin" # requires user input
......
#Toxin
import glob
import os
#################################
# Combine Toxin HMM and SignalP #
#################################
# Put Toxin HMM results in the correct format & join SignalP and Toxin HMM files
rule R_script:
input:
input_HMM="{datadir}/{project}/TOXIN/HMM_toxin/{sample}.Input_HMM_R.csv",
translation="{datadir}/{project}/renamed/{sample}_translation.tsv",
signalP="{datadir}/{project}/SignalP/aggregated/{sample}_SignalP_results.tsv",
library=config["pathofact"]["tox_lib"]
output:
gene_library="{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
gene_toxic="{datadir}/{project}/Toxin_prediction_{sample}_report.tsv"
log:
"{datadir}/{project}/TOXIN/R_output/{sample}_gene_table_library.log"
message:
"Run external R script to join SignalP and ToxinHMM and create Toxin report (incl. confidence levels)"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda:
"../../envs/R.yaml"
script:
"../../scripts/ownHMM_library.R"
......@@ -5,11 +5,11 @@ import os
rule combine_PathoFact:
input:
Virulence_factor= "{datadir}/{project}/Virulence_prediction_{sample}_report.csv",
Toxins="{datadir}/{project}/Toxin_prediction_{sample}_report.csv",
Virulence_factor= "{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
Toxins="{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
AMR_MGE="{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv"
output:
"{datadir}/{project}/PathoFact_{sample}_predictions.csv"
PathoFact_report= "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
log:
"{datadir}/{project}/PathoFact_{sample}_predictions.log"
params:
......
......@@ -7,27 +7,24 @@ library(tidyverse)
# load virulence prediction
Virulence_factors <- read.delim(file=snakemake@input[["Virulence_factor"]])
Virulence_factors <- Virulence_factors %>% select(1,5,7)
colnames(Virulence_factors) <- c("GeneID","Virulence_factor_prediction","Virulence_confidence_level")
Virulence_factors$GeneID <- as.numeric(as.character(Virulence_factors$GeneID))
Virulence_factors$GeneID <- sprintf("%010d", Virulence_factors$GeneID)
Virulence_factors <- Virulence_factors %>% select(2,5,7)
Virulence_factors$ORF_ID <- sprintf("%010d", Virulence_factors$ORF_ID)
# load toxin prediction
Toxins <- read.delim(file=snakemake@input[["Toxins"]])
Toxins <- Toxins %>% select(1,4,5,6)
colnames(Toxins) <- c("GeneID","Toxin_prediction", "SignalP", "Toxin_confidence_level")
Toxins$GeneID <- as.numeric(as.character(Toxins$GeneID))
Toxins$GeneID <- sprintf("%010d", Toxins$GeneID)
Toxins <- Toxins %>% select(2,4,5,6)
Toxins$ORF_ID <- sprintf("%010d", Toxins$ORF_ID)
# load AMR prediction
AMR_MGE <- read.delim(file=snakemake@input[["AMR_MGE"]])
colnames(AMR_MGE) <- c("GeneID","Gene_Name","Contig_Name","ARG_prediction","AMR_Category","MGE_prediction")
AMR_MGE$ORF_ID <- sprintf("%010d", AMR_MGE$ORF_ID)
AMR_MGE$Contig_ID <- sprintf("%010d", AMR_MGE$Contig_ID)
AMR_MGE <- AMR_MGE %>% select(1:9,11)
# Combine files
Predictions_dfs<-list(Toxins, Virulence_factors, AMR_MGE)
PathoFact_predictions<-Reduce(full_join, Predictions_dfs)
PathoFact_predictions <- PathoFact_predictions[,c(1,7,8,3,2,4,5,6,9,10,11)]
PathoFact_predictions <- PathoFact_predictions %>% filter(GeneID != " NA")
write.csv(PathoFact_predictions, file=snakemake@output[[1]], row.names=FALSE)
PathoFact_predictions <- PathoFact_predictions[,c(1,9,8,7,2,4,3,5,6,10:15)]
write.table(PathoFact_predictions, file = snakemake@output[["PathoFact_report"]], sep="\t", row.names=FALSE, quote=FALSE)
......@@ -7,14 +7,10 @@ include:
'../rules/Virulence/Virulence.smk'
include:
'../rules/Virulence/Combine_Virulence_SignalP.smk'
include:
'../rules/Virulence/Virulence_report.smk'
include:
'../rules/Toxin/Toxin.smk'
include:
'../rules/Toxin/Combine_Toxin_SignalP.smk'
include:
'../rules/Toxin/Toxin_report.smk'
include:
'../rules/Universal/Preprocessing_contig.smk'
include:
......@@ -35,7 +31,7 @@ rule Analysis:
[
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.csv"
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
],
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment