Commit d80a6ad1 authored by Laura Denies's avatar Laura Denies
Browse files

Merge branch 'IMPirisv2' into 'master'

Merge master with IMPirisv2

See merge request !2
parents fef0c395 6589efab
...@@ -82,7 +82,7 @@ Each sample should have three input files: ...@@ -82,7 +82,7 @@ Each sample should have three input files:
- contig and gene IDs should be the same as in the FASTA files - contig and gene IDs should be the same as in the FASTA files
The files should be located in the same directory. The files should be located in the same directory.
For each sample, the corresponding input files should have the same basename, e.g. `SAMPLE_A.fna`, `SAMPLE_A.fna` and `SAMPLE_A.contig` for sample `SAMPLE_A`. For each sample, the corresponding input files should have the same basename, e.g. `SAMPLE_A.fna`, `SAMPLE_A.faa` and `SAMPLE_A.contig` for sample `SAMPLE_A`.
**NOTE**: For preprocessing and assembly of metagenomic reads we would suggest using IMP (https://imp.pages.uni.lu/web/) **NOTE**: For preprocessing and assembly of metagenomic reads we would suggest using IMP (https://imp.pages.uni.lu/web/)
...@@ -92,9 +92,9 @@ For each sample, the corresponding input files should have the same basename, e. ...@@ -92,9 +92,9 @@ For each sample, the corresponding input files should have the same basename, e.
To run PathoFact you need to adjust some parameters in `config.yaml`. To run PathoFact you need to adjust some parameters in `config.yaml`.
- `input_file`: This is a list of sample names, e.g. `input_file: ["SAMPLE_A","SAMPLE_B"]` - `sample`: This is a list of sample names, e.g. `sample: ["SAMPLE_A","SAMPLE_B"]`
- `project`: A unique project name which will be used as the name of the output directory in `OUTDIR` path (see below). - `project`: A unique project name which will be used as the name of the output directory in `datapath` path (see below).
- `OUTDIR`: Path to directory containing the sample data; the output directory will be created there. - `datapath`: Path to directory containing the sample data; the output directory will be created there.
- `workflow`: Pathofact can run the complete pipeline (default) or a specific step: - `workflow`: Pathofact can run the complete pipeline (default) or a specific step:
- "complete": complete pipeline = toxin + virulence + AMR + MGE prediction - "complete": complete pipeline = toxin + virulence + AMR + MGE prediction
- "Tox": toxin prediction - "Tox": toxin prediction
...@@ -120,3 +120,12 @@ snakemake -s Snakefile --use-conda --reason --cores <cores> -p ...@@ -120,3 +120,12 @@ snakemake -s Snakefile --use-conda --reason --cores <cores> -p
**NOTE**: It is advised to run the pipeline using multiple CPUs or CPUs with "higher" memory. **NOTE**: It is advised to run the pipeline using multiple CPUs or CPUs with "higher" memory.
For more options, see the [snakemake documentation](https://snakemake.readthedocs.io/en/stable/index.html). For more options, see the [snakemake documentation](https://snakemake.readthedocs.io/en/stable/index.html).
### Execution on a cluster
The pipeline can be run on a cluster using `slurm`.
The command can be found in the script `cluster.sh` which can also be used to submit the jobs to the cluster.
```bash
sbatch cluster.sh
```
...@@ -2,51 +2,51 @@ ...@@ -2,51 +2,51 @@
configfile: "config.yaml" configfile: "config.yaml"
if config["workflow"] == "complete": if config["pathofact"]["workflow"] == "complete":
include: include:
"workflows/Combine_PathoFact_workflow.smk" "workflows/Combine_PathoFact_workflow.smk"
rule all: rule all:
input: input:
expand( expand(
[ [
"{OUTDIR}/{project}/AMR/{input_file}_AMR_MGE_prediction_detailed.tsv", "{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv",
"{OUTDIR}/{project}/Toxin_gene_library_{input_file}_report.tsv", "{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{OUTDIR}/{project}/PathoFact_{input_file}_predictions.csv" "{datadir}/{project}/PathoFact_{sample}_predictions.csv"
], ],
OUTDIR=config["OUTDIR"], project=config["project"], input_file=config["input_file"] datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
) )
elif config["workflow"] == "Tox": elif config["pathofact"]["workflow"] == "Tox":
include: include:
"workflows/Toxin_workflow.smk" "workflows/Toxin_workflow.smk"
rule all: rule all:
input: input:
expand( expand(
[ [
"{OUTDIR}/{project}/Toxin_prediction_{input_file}_report.csv", "{datadir}/{project}/Toxin_prediction_{sample}_report.csv",
"{OUTDIR}/{project}/Toxin_gene_library_{input_file}_report.tsv" "{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv"
], ],
OUTDIR=config["OUTDIR"], project=config["project"], input_file=config["input_file"] datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
) )
elif config["workflow"] == "Vir": elif config["pathofact"]["workflow"] == "Vir":
include: include:
"workflows/Virulence_workflow.smk" "workflows/Virulence_workflow.smk"
rule all: rule all:
input: input:
expand( expand(
"{OUTDIR}/{project}/Virulence_prediction_{input_file}_report.csv", "{datadir}/{project}/Virulence_prediction_{sample}_report.csv",
OUTDIR=config["OUTDIR"], project=config["project"], input_file=config["input_file"] datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
) )
elif config["workflow"] == "AMR": elif config["pathofact"]["workflow"] == "AMR":
include: include:
"workflows/AMR_workflow.smk" "workflows/AMR_workflow.smk"
rule all: rule all:
input: input:
expand( expand(
[ [
"{OUTDIR}/{project}/AMR_MGE_prediction_{input_file}_report.tsv", "{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{OUTDIR}/{project}/AMR/{input_file}_AMR_MGE_prediction_detailed.tsv" "{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv"
], ],
OUTDIR=config["OUTDIR"], project=config["project"], input_file=config["input_file"] datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"]
) )
else: else:
raise Exception("Unknown workflow option: %s" % config["workflow"]) raise Exception("Unknown workflow option: %s" % config["pathofact"]["workflow"])
#!/bin/bash -l
#SBATCH -J PathoFact
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH --time=0-6:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
# activate env
conda activate PathoFact
# run pipeline
snakemake -s Snakefile --configfile config.yaml \
--use-conda --cores 10 --reason -p \
--cluster-config cluster.yaml --cluster \
"{cluster.call} {cluster.partition} {cluster.quality} {cluster.nodes} \
{cluster.runtime}{params.runtime} {cluster.mem_per_cpu}{params.mem} \
{cluster.threads}{threads} {cluster.job-name} {cluster.output}"
__default__:
call: "sbatch"
nodes: ""
mem_per_cpu: "--mem-per-cpu " # in rule params
partition: "-p batch"
quality: "-q qos-batch"
runtime: "-t" # in rule params
threads: "-n"
job-name: "-J PathoFact.{rule}"
output: "-o slurm-%j.%N-%x.out"
run_PLASMID:
nodes: "-N 1"
partition: "-p bigmem"
quality: "-q qos-bigmem"
run_VirSorter:
nodes: "-N 1"
partition: "-p bigmem"
quality: "-q qos-bigmem"
run_VirFinder:
nodes: "-N 1"
partition: "-p bigmem"
quality: "-q qos-bigmem"
# Insert name of input files to run analysis pathofact:
# All three input files used by the pipeline for one sample need to be given the same name, followed by the suffix .faa (amino acid, gene fasta file), .fna (nucleotide contig fasta file), .contig (table with contig and gene names). sample: ["SAMPLE_A","SAMPLE_B"] # requires user input
# Only give name of files and not the suffix (.faa, .fna, .contig) project: Project_A_PathoFact # requires user input
input_file: ["SAMPLE_A","SAMPLE_B"] datadir: /path/to/samples # requires user input
workflow: "complete"
# Define unique name for your project size_fasta: 10000
project: Project_A_PathoFact scripts: "scripts"
signalp: "/path/to/signalp-4.1/signalp" # requires user input
# Define dir to files (wil also be the output directory deeparg: "submodules/deeparg-ss/deepARG.py"
OUTDIR: /path/to/samples deepvirfinder: "submodules/DeepVirFinder/dvf.py"
tox_hmm: "databases/toxins/combined_Toxin.hmm"
# Define size of split fasta files (default 10 000 sequences/file) tox_lib: "databases/library_HMM_Toxins.csv"
size_fasta: 100000 vir_hmm: "databases/virulence/Virulence_factor.hmm"
vir_domains: "databases/models_and_domains"
# Workflow (default: "complete") plasflow_threshold: 0.7
# complete: complete pipeline: Tox + Vir + AMR prediction plasflow_minlen: 1000
# Tox: toxin prediction runtime:
# Vir: virulence prediction short: "00:10:00"
# AMR: antimicrobial resistance (AMR) & mobile genetic element (MGE) prediction medium: "01:00:00"
workflow: "complete" long: "02:00:00"
mem:
########### normal_mem_per_core_gb: "4G"
# SignalP # big_mem_cores: 4
########### big_mem_per_core_gb: "30G"
# Define path to signalP
signalp: "/path/to/signalp-4.1/signalp"
############
# Toxin #
############
# Define path to HMM
hmmscan_tool: "hmmsearch"
hmm_file: "databases/toxins/combined_Toxin.hmm"
#################
# Virulence #
#################
vir_hmm_file: "databases/virulence/Virulence_factor.hmm"
#############
# AMR #
#############
# Define path to deepARG
deep_ARG: "submodules/deeparg-ss/deepARG.py"
#Define path to Plasflow
Plasflow: "PlasFlow.py"
# Define path to phage prediction tools
# Virsorter
virsorter: "wrapper_phage_contigs_sorter_iPlant.pl"
virsorter_data: "scripts/virsorter-data"
#VirFinder
DeepVirFinder: "submodules/DeepVirFinder/dvf.py"
...@@ -3,34 +3,33 @@ ...@@ -3,34 +3,33 @@
import glob import glob
import os import os
PROJECT = config["project"]
INPUT = config["input_file"]
########################## ##########################
# AMR Prediction # # AMR Prediction #
########################## ##########################
rule run_deepARG: rule run_deepARG:
input: input:
"{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa" "{datadir}/{project}/splitted/{sample}/{file_i}.faa"
output: output:
temp("{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG") temp("{datadir}/{project}/AMR/deepARG_results/{sample}/{file_i}.out.mapping.ARG")
log: log:
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG.log" "{datadir}/{project}/AMR/deepARG_results/{sample}/{file_i}.out.mapping.ARG.log"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda: conda:
"../../envs/DeepARG.yaml" "../../envs/DeepARG.yaml"
shell: shell:
"python {config[deep_ARG]} --align --type prot --genes --input {input} --output {wildcards.OUTDIR}/{wildcards.project}/AMR/deepARG_results/{wildcards.input_file}/{wildcards.file_i}.out &> {log}" "python {config[pathofact][deeparg]} --align --type prot --genes --input {input} --output {wildcards.datadir}/{wildcards.project}/AMR/deepARG_results/{wildcards.sample}/{wildcards.file_i}.out &> {log}"
def aggregate_AMR(wildcards): def aggregate_AMR(wildcards):
checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
return expand( return expand(
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}/{file_i}.out.mapping.ARG", "{datadir}/{project}/AMR/deepARG_results/{sample}/{file_i}.out.mapping.ARG",
OUTDIR=wildcards.OUTDIR, datadir=wildcards.datadir,
project=wildcards.project, project=wildcards.project,
input_file=wildcards.input_file, sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.faa")).i file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.faa")).i
) )
...@@ -38,8 +37,10 @@ rule aggregate_deepARG: ...@@ -38,8 +37,10 @@ rule aggregate_deepARG:
input: input:
aggregate_AMR aggregate_AMR
output: output:
"{OUTDIR}/{project}/AMR/deepARG_results/{input_file}.out.mapping.ARG" "{datadir}/{project}/AMR/deepARG_results/{sample}.out.mapping.ARG"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell: shell:
"cat {input} > {output}" "cat {input} > {output}"
...@@ -3,29 +3,28 @@ ...@@ -3,29 +3,28 @@
import glob import glob
import os import os
PROJECT = config["project"]
INPUT = config["input_file"]
########################## ##########################
# AMR Prediction # # AMR Prediction #
########################## ##########################
rule combine_AMR_plasmid: rule combine_AMR_plasmid:
input: input:
AMR="{OUTDIR}/{project}/AMR/deepARG_results/{input_file}.out.mapping.ARG", AMR="{datadir}/{project}/AMR/deepARG_results/{sample}.out.mapping.ARG",
AMR_translation="{OUTDIR}/{project}/renamed/{input_file}_translation.tsv", AMR_translation="{datadir}/{project}/renamed/{sample}_translation.tsv",
Plasmid="{OUTDIR}/{project}/MGE/plasmid/{input_file}_plasflow_prediction_final.tsv", Plasmid="{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv",
Contig_translation="{OUTDIR}/{project}/renamed/{input_file}_Contig_translation.tsv", Contig_translation="{datadir}/{project}/renamed/{sample}_Contig_translation.tsv",
Contig_gene_list="{OUTDIR}/{input_file}.contig", Contig_gene_list="{datadir}/{sample}.contig",
VirFinder="{OUTDIR}/{project}/MGE/phage/{input_file}_VirFinder_aggregated.csv", VirFinder="{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv",
VirSorter="{OUTDIR}/{project}/MGE/phage/{input_file}_VIRSorter_aggregated.csv" VirSorter="{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv"
output: output:
Report_1=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv"), Report_1=temp("{datadir}/{project}/AMR/{sample}_MGE_AMR_prediction_detail_temp.csv"),
Report_2=temp("{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv") Report_2=temp("{datadir}/{project}/AMR/{sample}_MGE_AMR_prediction_report_temp.csv")
log: log:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.log" "{datadir}/{project}/AMR/{sample}_MGE_AMR_prediction_detail_temp.log"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
conda: conda:
"../../envs/R.yaml" "../../envs/R.yaml"
script: script:
...@@ -33,11 +32,13 @@ rule combine_AMR_plasmid: ...@@ -33,11 +32,13 @@ rule combine_AMR_plasmid:
rule modify_details: rule modify_details:
input: input:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_detail_temp.csv" "{datadir}/{project}/AMR/{sample}_MGE_AMR_prediction_detail_temp.csv"
output: output:
"{OUTDIR}/{project}/AMR/{input_file}_AMR_MGE_prediction_detailed.tsv" "{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell: shell:
""" """
sed 's/"//g' {input} | sed 's/,/\\t/g' > {output} sed 's/"//g' {input} | sed 's/,/\\t/g' > {output}
...@@ -45,11 +46,13 @@ rule modify_details: ...@@ -45,11 +46,13 @@ rule modify_details:
rule modify_report: rule modify_report:
input: input:
"{OUTDIR}/{project}/AMR/{input_file}_MGE_AMR_prediction_report_temp.csv" "{datadir}/{project}/AMR/{sample}_MGE_AMR_prediction_report_temp.csv"
output: output:
"{OUTDIR}/{project}/AMR_MGE_prediction_{input_file}_report.tsv" "{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell: shell:
""" """
sed 's/"//g' {input} | sed 's/,/\\t/g' > {output} sed 's/"//g' {input} | sed 's/,/\\t/g' > {output}
......
...@@ -3,9 +3,6 @@ ...@@ -3,9 +3,6 @@
import glob import glob
import os import os
PROJECT = config["project"]
INPUT = config["input_file"]
########################## ##########################
# Phage Prediction # # Phage Prediction #
########################## ##########################
...@@ -14,81 +11,91 @@ INPUT = config["input_file"] ...@@ -14,81 +11,91 @@ INPUT = config["input_file"]
checkpoint splitphage: checkpoint splitphage:
input: input:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna" "{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output: output:
split=directory("{OUTDIR}/{project}/contig_splitted/{input_file}/") split=directory("{datadir}/{project}/contig_splitted/{sample}/")
log: log:
"{OUTDIR}/{project}/contig_splitted/{input_file}.log" "{datadir}/{project}/contig_splitted/{sample}.log"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda: conda:
"../../envs/SeqKit.yaml" "../../envs/SeqKit.yaml"
shell: shell:
""" """
seqkit split2 -s 10000 {input} -O {wildcards.OUTDIR}/{wildcards.project}/contig_splitted/{wildcards.input_file} &> {log} seqkit split2 -s {params.split} {input} -O {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample} &> {log}
""" """
rule run_VirSorter: rule run_VirSorter:
input: input:
"{OUTDIR}/{project}/renamed/{input_file}_Contig_ID.fna" "{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output: output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv" "{datadir}/{project}/MGE/phage/{sample}/virsorter/VIRSorter_global-phage-signal.csv"
log: log:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.log" "{datadir}/{project}/MGE/phage/{sample}/virsorter/VIRSorter_global-phage-signal.log"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["big_mem_per_core_gb"]
conda: conda:
"../../envs/VirSorter.yaml" "../../envs/VirSorter.yaml"
threads: threads:
12 config["pathofact"]["mem"]["big_mem_cores"]
shell: shell:
""" """
{config[virsorter]} -f {input} --ncpu {threads} --wdir {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virsorter --data-dir {config[virsorter_data]} &> {log} wrapper_phage_contigs_sorter_iPlant.pl -f {input} --ncpu {threads} --wdir {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virsorter --data-dir {config[pathofact][scripts]}/virsorter-data &> {log}
""" """
rule aggregate_Virsorter: localrules: aggregate_VirSorter
rule aggregate_VirSorter:
input: input:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virsorter/VIRSorter_global-phage-signal.csv" "{datadir}/{project}/MGE/phage/{sample}/virsorter/VIRSorter_global-phage-signal.csv"
output: output:
"{OUTDIR}/{project}/MGE/phage/{input_file}_VIRSorter_aggregated.csv" "{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv"
params: params:
outdir="{OUTDIR}" outdir="{datadir}"
shell: shell:
"mv {input} {output}" "mv {input} {output}"
# VIRFINDER Prediction # VIRFINDER Prediction
rule run_Virfinder: rule run_VirFinder:
input: input:
"{OUTDIR}/{project}/contig_splitted/{input_file}/{file_i}.fna" "{datadir}/{project}/contig_splitted/{sample}/{file_i}.fna"
output: output:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt" "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt"
log: log:
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.log" "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.log"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
mem=config["pathofact"]["mem"]["big_mem_per_core_gb"]
conda: conda:
"../../envs/DeepVirFinder.yaml" "../../envs/DeepVirFinder.yaml"
threads: threads:
12 config["pathofact"]["mem"]["big_mem_cores"]
shell: shell:
"python {config[DeepVirFinder]} -i {input} -o {wildcards.OUTDIR}/{wildcards.project}/MGE/phage/{wildcards.input_file}/virfinder -c {threads} &> {log}" "python {config[pathofact][deepvirfinder]} -i {input} -o {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virfinder -c {threads} &> {log}"
def aggregate_virfinder(wildcards): def aggregate_VirFinder(wildcards):
checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split
return expand( return expand(
"{OUTDIR}/{project}/MGE/phage/{input_file}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt", "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt",
OUTDIR=wildcards.OUTDIR, datadir=wildcards.datadir,
project=wildcards.project, project=wildcards.project,
input_file=wildcards.input_file, sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i
) )
rule aggregate_Virfinder: rule aggregate_VirFinder:
input: input:
aggregate_virfinder aggregate_VirFinder
output: output:
"{OUTDIR}/{project}/MGE/phage/{input_file}_VirFinder_aggregated.csv" "{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv"
params: params:
outdir="{OUTDIR}" outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell: shell:
"cat {input} >{output}" "cat {input} >{output}"