Phage.smk 3.43 KB
Newer Older
Laura Denies's avatar
Laura Denies committed
1
2
3
4
5
6
7
8
9
10
11
12
#Plasmid

import glob
import os

##########################
#     Phage Prediction   #
##########################

# VIRSORTER Preprocessing

checkpoint splitphage:
13
    input:
14
        "{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
15
    output:
16
        split=directory("{datadir}/{project}/contig_splitted/{sample}/")
17
    log:
18
        "{datadir}/{project}/contig_splitted/{sample}.log"
19
    params:
20
21
22
23
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["medium"],
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
        split=config["pathofact"]["size_fasta"]
24
25
26
27
    conda:
        "../../envs/SeqKit.yaml"
    shell:
        """
28
        seqkit split2 -s {params.split} {input} -O {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample} &> {log}
29
        """
Laura Denies's avatar
Laura Denies committed
30
31

rule run_VirSorter:
32
    input:
33
        "{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
34
    output:
35
        "{datadir}/{project}/MGE/phage/{sample}/virsorter/VIRSorter_global-phage-signal.csv"
36
    log:
37
        "{datadir}/{project}/MGE/phage/{sample}/virsorter/VIRSorter_global-phage-signal.log"
38
    params:
39
40
41
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["long"],
        mem=config["pathofact"]["mem"]["big_mem_per_core_gb"]
42
43
44
    conda:
        "../../envs/VirSorter.yaml"
    threads:
45
        config["pathofact"]["mem"]["big_mem_cores"]
46
47
    shell:
        """
48
        wrapper_phage_contigs_sorter_iPlant.pl -f {input} --ncpu {threads} --wdir {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virsorter --data-dir {config[pathofact][scripts]}/virsorter-data &> {log}
49
50
        """

51
52
localrules: aggregate_VirSorter
rule aggregate_VirSorter:
53
    input:
54
        "{datadir}/{project}/MGE/phage/{sample}/virsorter/VIRSorter_global-phage-signal.csv"
55
    output:
56
        "{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv"
57
    params:
58
        outdir="{datadir}"
59
60
    shell:
        "mv {input} {output}"
Laura Denies's avatar
Laura Denies committed
61
62

# VIRFINDER Prediction
63
rule run_VirFinder:
64
    input:
65
        "{datadir}/{project}/contig_splitted/{sample}/{file_i}.fna"
66
    output:
67
        "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt"
68
    log:
69
        "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.log"
70
    params:
71
72
73
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["long"],
        mem=config["pathofact"]["mem"]["big_mem_per_core_gb"]
74
75
76
    conda:
        "../../envs/DeepVirFinder.yaml"
    threads:
77
        config["pathofact"]["mem"]["big_mem_cores"]
78
    shell:
79
        "python {config[pathofact][deepvirfinder]} -i {input} -o {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virfinder -c {threads} &> {log}"
Laura Denies's avatar
Laura Denies committed
80

81
def aggregate_VirFinder(wildcards):
82
83
    checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split
    return expand(
84
85
        "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt",
        datadir=wildcards.datadir,
86
        project=wildcards.project,
87
        sample=wildcards.sample,
88
89
        file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fna")).i
    )
Laura Denies's avatar
Laura Denies committed
90

91
rule aggregate_VirFinder:
92
    input:
93
        aggregate_VirFinder
94
    output:
95
        "{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv"
96
    params:
97
98
99
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["short"],
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
100
101
    shell:
        "cat {input} >{output}"