Commit d0581e48 authored by Valentina Galata's avatar Valentina Galata
Browse files

format fix in rules/AMR (issue #26)

parent 25774a3f
......@@ -4,35 +4,41 @@ import glob
import os
configfile:"config.yaml"
PROJECT=config["project"]
INPUT=config["input_file"]
PROJECT = config["project"]
INPUT = config["input_file"]
##########################
# AMR Prediction #
##########################
rule run_deepARG:
input:"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:temp("{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG")
params: outdir="{OUTDIR}"
conda: "../../envs/DeepARG.yaml"
shell: "python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out"
input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
output:
temp("{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG")
params:
outdir="{OUTDIR}"
conda:
"../../envs/DeepARG.yaml"
shell:
"python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out"
def aggregate_AMR(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
return expand("{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG",
OUTDIR=wildcards.OUTDIR,
project=wildcards.project,
input_file=wildcards.input_file,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.faa")).i)
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
return expand(
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG",
OUTDIR=wildcards.OUTDIR,
project=wildcards.project,
input_file=wildcards.input_file,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.faa")).i
)
rule aggregate_deepARG:
input: aggregate_AMR
output: "{OUTDIR}/{project}/AMR/deepARG_results/{input_file}.out.mapping.ARG"
params: outdir="{OUTDIR}"
shell: "cat {input} > {output}"
input:
aggregate_AMR
output:
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}.out.mapping.ARG"
params:
outdir="{OUTDIR}"
shell:
"cat {input} > {output}"
......@@ -4,44 +4,52 @@ import glob
import os
configfile:"config.yaml"
PROJECT=config["project"]
INPUT=config["input_file"]
PROJECT = config["project"]
INPUT = config["input_file"]
##########################
# AMR Prediction #
##########################
rule combine_AMR_plasmid:
input:
AMR="{OUTDIR}/{project}/AMR/deepARG_results/{input_file}.out.mapping.ARG",
AMR_translation="{OUTDIR}/{project}/renamed/{input_file}_translation.tsv",
Plasmid="{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_prediction_final.tsv",
Contig_translation="{OUTDIR}/{project}/renamed/{input_file}_Contig_translation.tsv",
Contig_gene_list="{OUTDIR}/{input_file}.contig",
VirFinder="{OUTDIR}/{project}/MGE/phage/{input_file}_VirFinder_aggregated.csv",
VirSorter="{OUTDIR}/{project}/MGE/phage/{input_file}_VIRSorter_aggregated.csv"
output:
Report_1=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"),
Report_2=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv")
params: outdir="{OUTDIR}"
conda: "../../envs/R.yaml"
script: "../../scripts/AMR_MGE.R"
input:
AMR="{OUTDIR}/{project}/AMR/deepARG_results/{input_file}.out.mapping.ARG",
AMR_translation="{OUTDIR}/{project}/renamed/{input_file}_translation.tsv",
Plasmid="{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_prediction_final.tsv",
Contig_translation="{OUTDIR}/{project}/renamed/{input_file}_Contig_translation.tsv",
Contig_gene_list="{OUTDIR}/{input_file}.contig",
VirFinder="{OUTDIR}/{project}/MGE/phage/{input_file}_VirFinder_aggregated.csv",
VirSorter="{OUTDIR}/{project}/MGE/phage/{input_file}_VIRSorter_aggregated.csv"
output:
Report_1=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"),
Report_2=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv")
params:
outdir="{OUTDIR}"
conda:
"../../envs/R.yaml"
script:
"../../scripts/AMR_MGE.R"
rule modify_details:
input:"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"
output: "{OUTDIR}/{project}/AMR/{input_file}_AMR_MGE_prediction_detailed.tsv"
params: outdir="{OUTDIR}"
shell: """
sed 's/"//g' {input} | sed 's/,/\t/g' > {output}
"""
rule modify_report:
input:"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv"
output: "{OUTDIR}/{project}/AMR_MGE_prediction_{input_file}_report.tsv"
params: outdir="{OUTDIR}"
shell: """
sed 's/"//g' {input} | sed 's/,/\t/g' > {output}
"""
input:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"
output:
"{OUTDIR}/{project}/AMR/{input_file}_AMR_MGE_prediction_detailed.tsv"
params:
outdir="{OUTDIR}"
shell:
"""
sed 's/"//g' {input} | sed 's/,/\t/g' > {output}
"""
rule modify_report:
input:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv"
output:
"{OUTDIR}/{project}/AMR_MGE_prediction_{input_file}_report.tsv"
params:
outdir="{OUTDIR}"
shell:
"""
sed 's/"//g' {input} | sed 's/,/\t/g' > {output}
"""
......@@ -4,9 +4,8 @@ import glob
import os
configfile: "config.yaml"
PROJECT=config["project"]
INPUT=config["input_file"]
PROJECT = config["project"]
INPUT = config["input_file"]
##########################
# Phage Prediction #
......@@ -15,50 +14,75 @@ INPUT=config["input_file"]
# VIRSORTER Preprocessing
checkpoint splitphage:
input: "{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output: split=directory("{OUTDIR}/{project}/contig_splitted/{input_file}/")
params: outdir="{OUTDIR}"
conda: "../../envs/SeqKit.yaml"
shell: """
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file}
"""
input:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
split=directory("{OUTDIR}/{project}/contig_splitted/{input_file}/")
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file}
"""
rule run_VirSorter:
input: "{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output: "{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv"
params: outdir="{OUTDIR}"
conda: "../../envs/VirSorter.yaml"
threads: 12
shell: """
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]}
"""
input:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv"
params:
outdir="{OUTDIR}"
conda:
"../../envs/VirSorter.yaml"
threads:
12
shell:
"""
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]}
"""
rule aggregate_Virsorter:
input: "{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv"
output: "{OUTDIR}/{project}/MGE/phage/{input_file}_VIRSorter_aggregated.csv"
params: outdir="{OUTDIR}"
shell: "mv {input} {output}"
input:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}_VIRSorter_aggregated.csv"
params:
outdir="{OUTDIR}"
shell:
"mv {input} {output}"
# VIRFINDER Prediction
rule run_Virfinder:
input: "{OUTDIR}/{project}/contig_splitted/{input_file}/{file_i}.fna"
output: "{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt"
params: outdir="{OUTDIR}"
conda: "../../envs/DeepVirFinder.yaml"
threads: 12
shell: "python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads}"
input:
"{OUTDIR}/{project}/contig_splitted/{input_file}/{file_i}.fna"
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt"
params:
outdir="{OUTDIR}"
conda:
"../../envs/DeepVirFinder.yaml"
threads:
12
shell: "python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads}"
def aggregate_virfinder(wildcards):
checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split
return expand("{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt",
OUTDIR=wildcards.OUTDIR,
project=wildcards.project,
input_file=wildcards.input_file,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i)
checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split
return expand(
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt",
OUTDIR=wildcards.OUTDIR,
project=wildcards.project,
input_file=wildcards.input_file,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i
)
rule aggregate_Virfinder:
input: aggregate_virfinder
output: "{OUTDIR}/{project}/MGE/phage/{input_file}_VirFinder_aggregated.csv"
params: outdir="{OUTDIR}"
shell: "cat {input} >{output}"
input:
aggregate_virfinder
output:
"{OUTDIR}/{project}/MGE/phage/{input_file}_VirFinder_aggregated.csv"
params:
outdir="{OUTDIR}"
shell:
"cat {input} >{output}"
......@@ -4,9 +4,8 @@ import glob
import os
configfile: "config.yaml"
PROJECT=config["project"]
INPUT=config["input_file"]
PROJECT = config["project"]
INPUT = config["input_file"]
##########################
# Plasmid Prediction #
......@@ -14,50 +13,74 @@ INPUT=config["input_file"]
# PlasFlow Preprocessing
rule filter_seq:
input: "{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output: "{OUTDIR}/{project}/{input_file}_filtered.fna"
conda: "../../envs/Biopython.yaml"
params: outdir="{OUTDIR}"
shell: "./scripts/filter.pl 1000 {input} > {output}"
input:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna"
output:
"{OUTDIR}/{project}/{input_file}_filtered.fna"
conda:
"../../envs/Biopython.yaml"
params:
outdir="{OUTDIR}"
shell:
"./scripts/filter.pl 1000 {input} > {output}"
checkpoint splitplasmid:
input: "{OUTDIR}/{project}/{input_file}_filtered.fna"
output: split=directory("{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/")
params: outdir="{OUTDIR}"
conda: "../../envs/SeqKit.yaml"
shell: """
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.input_file}
"""
input:
"{OUTDIR}/{project}/{input_file}_filtered.fna"
output:
split=directory("{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/")
params:
outdir="{OUTDIR}"
conda:
"../../envs/SeqKit.yaml"
shell:
"""
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.input_file}
"""
# PlasFlow Plasmid prediction
rule run_PLASMID:
input: "{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/{file_i}.fna"
output: temp("{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.tsv")
conda: "../../envs/PlasFlow.yaml"
params: outdir="{OUTDIR}"
shell: """
{config[Plasflow]} --input {input} --output {output} --threshold 0.7
"""
input:
"{OUTDIR}/{project}/MGE/plasmid_splitted/{input_file}/{file_i}.fna"
output:
temp("{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.tsv")
conda:
"../../envs/PlasFlow.yaml"
params:
outdir="{OUTDIR}"
shell:
"""
{config[Plasflow]} --input {input} --output {output} --threshold 0.7
"""
def aggregate_plasmid_input(wildcards):
checkpoint_output= checkpoints.splitplasmid.get(**wildcards).output.split
return expand("{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.tsv",
OUTDIR=wildcards.OUTDIR,
project=wildcards.project,
input_file=wildcards.input_file,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i)
checkpoint_output= checkpoints.splitplasmid.get(**wildcards).output.split
return expand(
"{OUTDIR}/{project}/MGE/plasmid/{input_file}/{file_i}_plasflow_prediction.tsv",
OUTDIR=wildcards.OUTDIR,
project=wildcards.project,
input_file=wildcards.input_file,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i
)
rule Plasmid_aggregate:
input: aggregate_plasmid_input
output: "{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_aggregated.tsv"
params: outdir="{OUTDIR}"
shell: " cat {input} > {output}"
input:
aggregate_plasmid_input
output:
"{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_aggregated.tsv"
params:
outdir="{OUTDIR}"
shell:
"cat {input} > {output}"
rule select:
input: "{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_aggregated.tsv"
output: "{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_prediction_final.tsv"
params: outdir="{OUTDIR}"
shell: """
cut -f 3,6 {input} > {output}
"""
input:
"{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_aggregated.tsv"
output:
"{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_prediction_final.tsv"
params:
outdir="{OUTDIR}"
shell:
"""
cut -f 3,6 {input} > {output}
"""
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment