Commit 06d1a76c authored by Laura Denies's avatar Laura Denies
Browse files

update test module

parent 527d38c6
......@@ -14,7 +14,6 @@ rule combine_AMR_plasmid:
Contig_translation=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/renamed/{sample}_Contig_translation.tsv"),
AMR=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/AMR/{sample}_AMR_prediction.tsv"),
PlasFlow=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv"),
MOB_suite=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"),
DeepVirFinder=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/phage/{sample}_VirFinder_aggregated.csv"),
VirSorter=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/phage/{sample}_VIRSorter_aggregated.csv")
output:
......
......@@ -98,36 +98,36 @@ rule select:
cut -f 3,6 {input} > {output}
"""
rule run_MOBsuite:
input: os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/contig_splitted/{sample}/{file_i}.fasta")
output:
temp(os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"))
log:
os.path.join(DATA_DIR,"{project}/logs/{sample}/{file_i}_MOB_suite_prediction.log")
threads:
config["pathofact"]["mem"]["big_mem_cores"]
conda:
"../../envs/MOB_suite.yaml"
message: "Executing MOB_suite with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell: "mob_typer --multi --infile {input} --out_file {output} -n {threads} &> {log}"
#rule run_MOBsuite:
# input: os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/contig_splitted/{sample}/{file_i}.fasta")
# output:
# temp(os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"))
# log:
# os.path.join(DATA_DIR,"{project}/logs/{sample}/{file_i}_MOB_suite_prediction.log")
# threads:
# config["pathofact"]["mem"]["big_mem_cores"]
# conda:
# "../../envs/MOB_suite.yaml"
# message: "Executing MOB_suite with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
# shell: "mob_typer --multi --infile {input} --out_file {output} -n {threads} &> {log}"
def aggregate_MOBsuite(wildcards):
checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
return expand(
os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"),
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
#def aggregate_MOBsuite(wildcards):
# checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
# return expand(
# os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"),
# project=wildcards.project,
# sample=wildcards.sample,
# file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
# )
rule aggregate_MOBsuite:
input: aggregate_MOBsuite
output:
os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv")
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate MOB_suite results on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
#rule aggregate_MOBsuite:
# input: aggregate_MOBsuite
# output:
# os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv")
# params:
# runtime=config["pathofact"]["runtime"]["short"],
# mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
# message: "Aggregate MOB_suite results on the following sample(s): {wildcards.project} - {wildcards.sample}"
# shell:
# "cat {input} > {output}"
......@@ -39,25 +39,29 @@ PlasFlow <- PlasFlow %>% filter(Contig_ID != "contig_name")
PlasFlow$Contig_ID <- sprintf("%010d", PlasFlow$Contig_ID)
## MOBsuite
MOB_suite <- read.delim(file = snakemake@input[["MOB_suite"]], header=TRUE)
MOB_suite <- MOB_suite %>% filter(sample_id != "sample_id")
MOB_suite$mash_neighbor_distance <- as.numeric(as.character(MOB_suite$mash_neighbor_distance))
MOB_suite$MOB_suite_prediction <- if_else(MOB_suite$mash_neighbor_distance <= 0.06 , "plasmid","-")
MOB_suite <- MOB_suite %>% select(1,27) %>% filter(MOB_suite_prediction == "plasmid")
colnames(MOB_suite)<- c("Contig_ID","MOB_suite_prediction")
MOB_suite$Contig_ID <- sprintf("%010d", MOB_suite$Contig_ID)
#MOB_suite <- read.delim(file = snakemake@input[["MOB_suite"]], header=TRUE)
#MOB_suite <- MOB_suite %>% filter(sample_id != "sample_id")
#MOB_suite$mash_neighbor_distance <- as.numeric(as.character(MOB_suite$mash_neighbor_distance))
#MOB_suite$MOB_suite_prediction <- if_else(MOB_suite$mash_neighbor_distance <= 0.06 , "plasmid","-")
#MOB_suite <- MOB_suite %>% select(1,27) %>% filter(MOB_suite_prediction == "plasmid")
#colnames(MOB_suite)<- c("Contig_ID","MOB_suite_prediction")
#MOB_suite$Contig_ID <- sprintf("%010d", MOB_suite$Contig_ID)
## Combine PlasFlow and MOB_suite
Plasmid <- merge(PlasFlow, MOB_suite, by="Contig_ID", all = TRUE)
Plasmid <- merge(Plasmid, Contig_translation, by= "Contig_ID", all= TRUE)
#Plasmid <- merge(PlasFlow, MOB_suite, by="Contig_ID", all = TRUE)
Plasmid <- merge(PlasFlow, Contig_translation, by= "Contig_ID", all= TRUE)
Plasmid$PlasFlow_prediction <- fct_explicit_na(Plasmid$PlasFlow_prediction, na_level = "unclassified")
Plasmid$MOB_suite_prediction <- fct_explicit_na(Plasmid$MOB_suite_prediction, na_level = "unclassified")
Plasmid$Plasmid_prediction <- ifelse(Plasmid$PlasFlow_prediction == "plasmid" | Plasmid$MOB_suite_prediction == "plasmid", "plasmid", "-")
Plasmid$Plasmid_database <- ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "PlasFlow/MOB_suite",
ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction != "plasmid", "PlasFlow",
ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction != "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "MOB_suite", "-")))
Plasmid$Chromosome_prediction <- ifelse(Plasmid$PlasFlow_prediction == "chromosome" & Plasmid$MOB_suite_prediction != "plasmid", "chromosome","-")
Plasmid <- Plasmid %>% select(4,1,7,5,6)
#Plasmid$MOB_suite_prediction <- fct_explicit_na(Plasmid$MOB_suite_prediction, na_level = "unclassified")
#Plasmid$Plasmid_prediction <- ifelse(Plasmid$PlasFlow_prediction == "plasmid" | Plasmid$MOB_suite_prediction == "plasmid", "plasmid", "-")
#Plasmid$Plasmid_database <- ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "PlasFlow/MOB_suite",
# ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction != "plasmid", "PlasFlow",
# ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction != "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "MOB_suite", "-")))
#Plasmid$Chromosome_prediction <- ifelse(Plasmid$PlasFlow_prediction == "chromosome" & Plasmid$MOB_suite_prediction != "plasmid", "chromosome","-")
#Plasmid <- Plasmid %>% select(4,1,7,5,6)
Plasmid$Plasmid_prediction <- ifelse(Plasmid$PlasFlow_prediction == "plasmid", "plasmid", "-")
Plasmid$Chromosome_prediction <- ifelse(Plasmid$PlasFlow_prediction == "chromosome", "chromosome","-")
Plasmid <- Plasmid %>% select(1,3,5,4)
# Phage
## VirFinder
......@@ -119,8 +123,8 @@ MGEs$MGE_prediction <- ifelse(MGEs$Plasmid_prediction == "plasmid" & MGEs$Phage_
# Combine AMR MGEs
AMR_MGE <- merge(AMR, MGEs, by="Contig", all.x = TRUE)
AMR_MGE <- AMR_MGE %>% select(1,10,2,3,4,5,6,7,8,9,11,12,13,14,15,16)
AMR_MGE <- AMR_MGE %>% select(1,10,2,3,4,5,6,7,8,9,11,12,13,14,15)
write.table(AMR_MGE, file = snakemake@output[["Report_1"]], sep="\t", row.names=FALSE, quote=FALSE)
AMR_MGE_final <- AMR_MGE %>% select(1:10,16)
AMR_MGE_final <- AMR_MGE %>% select(1:10,15)
write.table(AMR_MGE_final, file = snakemake@output[["Report_2"]], sep="\t", row.names=FALSE, quote=FALSE)
#!/bin/bash -l
###########
# DeepARG #
###########
# From installation instructions: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/
# replace path in options.py
sed -i 's@^path\s*=.*$@path = "'$(realpath submodules/deeparg-ss)'"@' submodules/deeparg-ss/options.py
# make DIAMOND binary executable
chmod +x submodules/deeparg-ss/bin/diamond
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -6,7 +6,7 @@
# Config file
CONFIG = "test/test_config.yaml"
configfile: CONFIG
DATA_DIR=config["pathofact"]["datadir"]
# Libs
import os
import sys
......@@ -25,68 +25,39 @@ subworkflow pathofact:
CONFIG
##################################################
# Targets
# Rules
# PathoFact
PF = pathofact(
expand(
[
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
],
datadir=config["pathofact"]["datadir"],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
)
rule all:
input: expand([os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked"),os.path.join(DATA_DIR,"{project}/Toxin_prediction_{sample}.checked"),os.path.join(DATA_DIR,"{project}/Virulence_prediction_{sample}.checked"),os.path.join(DATA_DIR,"{project}/PathoFact_{sample}.checked")], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"])
# Checks
CHECKS = expand(
[
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv.checked",
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv.checked"
],
datadir=config["pathofact"]["datadir"],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
rule check_AMR_MGE:
input:
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
##################################################
# Rules
rule check_Toxin:
input:
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/Toxin_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/Toxin_prediction_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/Toxin_prediction_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
# All
rule all:
rule check_Virulence:
input:
# PathoFact
PF,
# Result checks
CHECKS
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/Virulence_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/Virulence_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/Virulence_prediction_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/Virulence_prediction_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
# Compare files
rule check:
rule check_PathoFact:
input:
exp="{datadir}/{project}/{{bname}}".format(
datadir=config["test"]["datadir"], project=config["test"]["project"]
),
res=pathofact("{datadir}/{project}/{{bname}}".format(
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"]
))
output:
"{datadir}/{project}/{{bname}}.checked".format(
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"]
)
log:
"{datadir}/{project}/{{bname}}.diff".format(
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"]
)
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/PathoFact_{sample}_predictions.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/PathoFact_{sample}_predictions.tsv")
output: os.path.join(DATA_DIR,"{project}/PathoFact_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/PathoFact_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
# Test workflow: complete PathoFact pipeline + result checks
##################################################
# Configuration
# Config file
CONFIG = "test/test_config.yaml"
configfile: CONFIG
DATA_DIR=config["pathofact"]["datadir"]
# Libs
import os
import sys
import pandas
##################################################
# Sub-workflows
# PathoFact
subworkflow pathofact:
workdir:
"../"
snakefile:
"../Snakefile"
configfile:
CONFIG
##################################################
# Targets
os.path.join(DATA_DIR,
# PathoFact
PF = pathofact(
expand(
[
os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_gene_library_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_prediction_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Virulence_prediction_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/PathoFact_{sample}_predictions.tsv")
],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
)
# Checks
CHECKS = expand(
[
os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_gene_library_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_prediction_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Virulence_prediction_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/PathoFact_{sample}_predictions.tsv.checked")
],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
##################################################
# Rules
# All
rule all:
input:
# PathoFact
PF,
# Result checks
CHECKS
# Compare files
rule check:
input:
exp=os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}".format(
project=config["test"]["project"]
),
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}").format(
project=config["pathofact"]["project"]
)
output:
os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}.checked").format(
project=config["pathofact"]["project"]
)
log:
os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}.diff").format(
project=config["pathofact"]["project"]
)
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}
"""
# Test workflow: complete PathoFact pipeline + result checks
##################################################
# Configuration
# Config file
CONFIG = "test/test_config.yaml"
configfile: CONFIG
DATA_DIR=config["pathofact"]["datadir"]
# Libs
import os
import sys
import pandas
##################################################
# Sub-workflows
# PathoFact
subworkflow pathofact:
workdir:
"../"
snakefile:
"../Snakefile"
configfile:
CONFIG
##################################################
# Rules
rule all:
input: expand(os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked"), project=config["pathofact"]["project"], sample=config["pathofact"]["sample"])
rule check_AMR_MGE:
input:
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment