Gitlab migration complete. If you have any issue please read the FAQ.

Commit 06d1a76c authored by Laura Denies's avatar Laura Denies
Browse files

update test module

parent 527d38c6
......@@ -14,7 +14,6 @@ rule combine_AMR_plasmid:
Contig_translation=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/renamed/{sample}_Contig_translation.tsv"),
AMR=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/AMR/{sample}_AMR_prediction.tsv"),
PlasFlow=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv"),
MOB_suite=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"),
DeepVirFinder=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/phage/{sample}_VirFinder_aggregated.csv"),
VirSorter=os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/phage/{sample}_VIRSorter_aggregated.csv")
output:
......
......@@ -98,36 +98,36 @@ rule select:
cut -f 3,6 {input} > {output}
"""
rule run_MOBsuite:
input: os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/contig_splitted/{sample}/{file_i}.fasta")
output:
temp(os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"))
log:
os.path.join(DATA_DIR,"{project}/logs/{sample}/{file_i}_MOB_suite_prediction.log")
threads:
config["pathofact"]["mem"]["big_mem_cores"]
conda:
"../../envs/MOB_suite.yaml"
message: "Executing MOB_suite with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell: "mob_typer --multi --infile {input} --out_file {output} -n {threads} &> {log}"
#rule run_MOBsuite:
# input: os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/contig_splitted/{sample}/{file_i}.fasta")
# output:
# temp(os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"))
# log:
# os.path.join(DATA_DIR,"{project}/logs/{sample}/{file_i}_MOB_suite_prediction.log")
# threads:
# config["pathofact"]["mem"]["big_mem_cores"]
# conda:
# "../../envs/MOB_suite.yaml"
# message: "Executing MOB_suite with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
# shell: "mob_typer --multi --infile {input} --out_file {output} -n {threads} &> {log}"
def aggregate_MOBsuite(wildcards):
checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
return expand(
os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"),
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
#def aggregate_MOBsuite(wildcards):
# checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
# return expand(
# os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt"),
# project=wildcards.project,
# sample=wildcards.sample,
# file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
# )
rule aggregate_MOBsuite:
input: aggregate_MOBsuite
output:
os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv")
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
message: "Aggregate MOB_suite results on the following sample(s): {wildcards.project} - {wildcards.sample}"
shell:
"cat {input} > {output}"
#rule aggregate_MOBsuite:
# input: aggregate_MOBsuite
# output:
# os.path.join(DATA_DIR,"{project}/PathoFact_intermediate/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv")
# params:
# runtime=config["pathofact"]["runtime"]["short"],
# mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
# message: "Aggregate MOB_suite results on the following sample(s): {wildcards.project} - {wildcards.sample}"
# shell:
# "cat {input} > {output}"
......@@ -39,25 +39,29 @@ PlasFlow <- PlasFlow %>% filter(Contig_ID != "contig_name")
PlasFlow$Contig_ID <- sprintf("%010d", PlasFlow$Contig_ID)
## MOBsuite
MOB_suite <- read.delim(file = snakemake@input[["MOB_suite"]], header=TRUE)
MOB_suite <- MOB_suite %>% filter(sample_id != "sample_id")
MOB_suite$mash_neighbor_distance <- as.numeric(as.character(MOB_suite$mash_neighbor_distance))
MOB_suite$MOB_suite_prediction <- if_else(MOB_suite$mash_neighbor_distance <= 0.06 , "plasmid","-")
MOB_suite <- MOB_suite %>% select(1,27) %>% filter(MOB_suite_prediction == "plasmid")
colnames(MOB_suite)<- c("Contig_ID","MOB_suite_prediction")
MOB_suite$Contig_ID <- sprintf("%010d", MOB_suite$Contig_ID)
#MOB_suite <- read.delim(file = snakemake@input[["MOB_suite"]], header=TRUE)
#MOB_suite <- MOB_suite %>% filter(sample_id != "sample_id")
#MOB_suite$mash_neighbor_distance <- as.numeric(as.character(MOB_suite$mash_neighbor_distance))
#MOB_suite$MOB_suite_prediction <- if_else(MOB_suite$mash_neighbor_distance <= 0.06 , "plasmid","-")
#MOB_suite <- MOB_suite %>% select(1,27) %>% filter(MOB_suite_prediction == "plasmid")
#colnames(MOB_suite)<- c("Contig_ID","MOB_suite_prediction")
#MOB_suite$Contig_ID <- sprintf("%010d", MOB_suite$Contig_ID)
## Combine PlasFlow and MOB_suite
Plasmid <- merge(PlasFlow, MOB_suite, by="Contig_ID", all = TRUE)
Plasmid <- merge(Plasmid, Contig_translation, by= "Contig_ID", all= TRUE)
#Plasmid <- merge(PlasFlow, MOB_suite, by="Contig_ID", all = TRUE)
Plasmid <- merge(PlasFlow, Contig_translation, by= "Contig_ID", all= TRUE)
Plasmid$PlasFlow_prediction <- fct_explicit_na(Plasmid$PlasFlow_prediction, na_level = "unclassified")
Plasmid$MOB_suite_prediction <- fct_explicit_na(Plasmid$MOB_suite_prediction, na_level = "unclassified")
Plasmid$Plasmid_prediction <- ifelse(Plasmid$PlasFlow_prediction == "plasmid" | Plasmid$MOB_suite_prediction == "plasmid", "plasmid", "-")
Plasmid$Plasmid_database <- ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "PlasFlow/MOB_suite",
ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction != "plasmid", "PlasFlow",
ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction != "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "MOB_suite", "-")))
Plasmid$Chromosome_prediction <- ifelse(Plasmid$PlasFlow_prediction == "chromosome" & Plasmid$MOB_suite_prediction != "plasmid", "chromosome","-")
Plasmid <- Plasmid %>% select(4,1,7,5,6)
#Plasmid$MOB_suite_prediction <- fct_explicit_na(Plasmid$MOB_suite_prediction, na_level = "unclassified")
#Plasmid$Plasmid_prediction <- ifelse(Plasmid$PlasFlow_prediction == "plasmid" | Plasmid$MOB_suite_prediction == "plasmid", "plasmid", "-")
#Plasmid$Plasmid_database <- ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "PlasFlow/MOB_suite",
# ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction == "plasmid" & Plasmid$MOB_suite_prediction != "plasmid", "PlasFlow",
# ifelse(Plasmid$Plasmid_prediction == "plasmid" & Plasmid$PlasFlow_prediction != "plasmid" & Plasmid$MOB_suite_prediction == "plasmid", "MOB_suite", "-")))
#Plasmid$Chromosome_prediction <- ifelse(Plasmid$PlasFlow_prediction == "chromosome" & Plasmid$MOB_suite_prediction != "plasmid", "chromosome","-")
#Plasmid <- Plasmid %>% select(4,1,7,5,6)
Plasmid$Plasmid_prediction <- ifelse(Plasmid$PlasFlow_prediction == "plasmid", "plasmid", "-")
Plasmid$Chromosome_prediction <- ifelse(Plasmid$PlasFlow_prediction == "chromosome", "chromosome","-")
Plasmid <- Plasmid %>% select(1,3,5,4)
# Phage
## VirFinder
......@@ -119,8 +123,8 @@ MGEs$MGE_prediction <- ifelse(MGEs$Plasmid_prediction == "plasmid" & MGEs$Phage_
# Combine AMR MGEs
AMR_MGE <- merge(AMR, MGEs, by="Contig", all.x = TRUE)
AMR_MGE <- AMR_MGE %>% select(1,10,2,3,4,5,6,7,8,9,11,12,13,14,15,16)
AMR_MGE <- AMR_MGE %>% select(1,10,2,3,4,5,6,7,8,9,11,12,13,14,15)
write.table(AMR_MGE, file = snakemake@output[["Report_1"]], sep="\t", row.names=FALSE, quote=FALSE)
AMR_MGE_final <- AMR_MGE %>% select(1:10,16)
AMR_MGE_final <- AMR_MGE %>% select(1:10,15)
write.table(AMR_MGE_final, file = snakemake@output[["Report_2"]], sep="\t", row.names=FALSE, quote=FALSE)
#!/bin/bash -l
###########
# DeepARG #
###########
# From installation instructions: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/
# replace path in options.py
sed -i 's@^path\s*=.*$@path = "'$(realpath submodules/deeparg-ss)'"@' submodules/deeparg-ss/options.py
# make DIAMOND binary executable
chmod +x submodules/deeparg-ss/bin/diamond
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -6,7 +6,7 @@
# Config file
CONFIG = "test/test_config.yaml"
configfile: CONFIG
DATA_DIR=config["pathofact"]["datadir"]
# Libs
import os
import sys
......@@ -25,68 +25,39 @@ subworkflow pathofact:
CONFIG
##################################################
# Targets
# Rules
# PathoFact
PF = pathofact(
expand(
[
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
],
datadir=config["pathofact"]["datadir"],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
)
rule all:
input: expand([os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked"),os.path.join(DATA_DIR,"{project}/Toxin_prediction_{sample}.checked"),os.path.join(DATA_DIR,"{project}/Virulence_prediction_{sample}.checked"),os.path.join(DATA_DIR,"{project}/PathoFact_{sample}.checked")], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"])
# Checks
CHECKS = expand(
[
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv.checked",
"{datadir}/{project}/Toxin_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/Virulence_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/PathoFact_{sample}_predictions.tsv.checked"
],
datadir=config["pathofact"]["datadir"],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
rule check_AMR_MGE:
input:
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
##################################################
# Rules
rule check_Toxin:
input:
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/Toxin_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/Toxin_prediction_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/Toxin_prediction_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
# All
rule all:
rule check_Virulence:
input:
# PathoFact
PF,
# Result checks
CHECKS
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/Virulence_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/Virulence_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/Virulence_prediction_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/Virulence_prediction_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
# Compare files
rule check:
rule check_PathoFact:
input:
exp="{datadir}/{project}/{{bname}}".format(
datadir=config["test"]["datadir"], project=config["test"]["project"]
),
res=pathofact("{datadir}/{project}/{{bname}}".format(
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"]
))
output:
"{datadir}/{project}/{{bname}}.checked".format(
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"]
)
log:
"{datadir}/{project}/{{bname}}.diff".format(
datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"]
)
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/PathoFact_{sample}_predictions.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/PathoFact_{sample}_predictions.tsv")
output: os.path.join(DATA_DIR,"{project}/PathoFact_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/PathoFact_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
# Test workflow: complete PathoFact pipeline + result checks
##################################################
# Configuration
# Config file
CONFIG = "test/test_config.yaml"
configfile: CONFIG
DATA_DIR=config["pathofact"]["datadir"]
# Libs
import os
import sys
import pandas
##################################################
# Sub-workflows
# PathoFact
subworkflow pathofact:
workdir:
"../"
snakefile:
"../Snakefile"
configfile:
CONFIG
##################################################
# Targets
os.path.join(DATA_DIR,
# PathoFact
PF = pathofact(
expand(
[
os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_gene_library_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_prediction_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Virulence_prediction_{sample}_report.tsv"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/PathoFact_{sample}_predictions.tsv")
],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
)
# Checks
CHECKS = expand(
[
os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_gene_library_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Toxin_prediction_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/Virulence_prediction_{sample}_report.tsv.checked"),
os.path.join(DATA_DIR,"{project}/PathoFact_report/PathoFact_{sample}_predictions.tsv.checked")
],
project=config["pathofact"]["project"],
sample=config["pathofact"]["sample"]
)
##################################################
# Rules
# All
rule all:
input:
# PathoFact
PF,
# Result checks
CHECKS
# Compare files
rule check:
input:
exp=os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}".format(
project=config["test"]["project"]
),
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}").format(
project=config["pathofact"]["project"]
)
output:
os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}.checked").format(
project=config["pathofact"]["project"]
)
log:
os.path.join(DATA_DIR,"{project}/PathoFact_report/{{bname}}.diff").format(
project=config["pathofact"]["project"]
)
params:
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"""
diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}
"""
# Test workflow: complete PathoFact pipeline + result checks
##################################################
# Configuration
# Config file
CONFIG = "test/test_config.yaml"
configfile: CONFIG
DATA_DIR=config["pathofact"]["datadir"]
# Libs
import os
import sys
import pandas
##################################################
# Sub-workflows
# PathoFact
subworkflow pathofact:
workdir:
"../"
snakefile:
"../Snakefile"
configfile:
CONFIG
##################################################
# Rules
rule all:
input: expand(os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked"), project=config["pathofact"]["project"], sample=config["pathofact"]["sample"])
rule check_AMR_MGE:
input:
res=pathofact(os.path.join(DATA_DIR,"{project}/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")),
exp=os.path.join(DATA_DIR,"output_expected/PathoFact_report/AMR_MGE_prediction_{sample}_report.tsv")
output: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.checked")
log: os.path.join(DATA_DIR,"{project}/AMR_MGE_{sample}.diff")
shell: "diff {input.exp} {input.res} > {log} && [ -s {log} ] || touch {output}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment