Plasmid.smk 5.25 KB
Newer Older
Laura Denies's avatar
Laura Denies committed
1
2
3
4
5
6
7
8
9
10
11
#Plasmid

import glob
import os

##########################
#   Plasmid Prediction   #
##########################

# PlasFlow Preprocessing
rule filter_seq:
12
    input:
13
        "{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
14
    output:
15
        "{datadir}/{project}/{sample}_filtered.fna"
16
    log:
17
        "{datadir}/{project}/{sample}_filtered.log"
18
19
20
    conda:
        "../../envs/Biopython.yaml"
    params:
21
22
23
24
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["medium"],
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
        minlen=config["pathofact"]["plasflow_minlen"]
Laura Denies's avatar
Laura Denies committed
25
    message: "Filter samples on length for PlasFlow predictions: {wildcards.project} - {wildcards.sample}"
26
    shell:
27
        "{config[pathofact][scripts]}/filter.pl {params.minlen} {input} > {output} 2> {log}"
Laura Denies's avatar
Laura Denies committed
28
29

checkpoint splitplasmid:
30
    input:
31
        "{datadir}/{project}/{sample}_filtered.fna"
32
    output:
33
        split=directory("{datadir}/{project}/MGE/plasmid_splitted/{sample}/")
34
    params:
35
        outdir="{datadir}",
36
        runtime=config["pathofact"]["runtime"]["short"],
37
38
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
        split=config["pathofact"]["size_fasta"]
39
    conda:
40
        "../../envs/Biopython.yaml"
41
42
    shell:
        """
43
         python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.sample}
44
        """
Laura Denies's avatar
Laura Denies committed
45
46
47

# PlasFlow Plasmid prediction
rule run_PLASMID:
48
    input:
49
        "{datadir}/{project}/MGE/plasmid_splitted/{sample}/{file_i}.fasta"
50
    output:
51
        temp("{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_plasflow_prediction.tsv")
52
    log:
53
        "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_plasflow_prediction.log"
54
55
56
    conda:
        "../../envs/PlasFlow.yaml"
    params:
57
58
59
60
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["long"],
        mem=config["pathofact"]["mem"]["big_mem_per_core_gb"],
        threshold=config["pathofact"]["plasflow_threshold"]
Laura Denies's avatar
Laura Denies committed
61
    message: "Executing PlasFLow on the following sample(s): {wildcards.project} - {wildcards.sample}"
62
63
    shell:
        """
64
        PlasFlow.py --input {input} --output {output} --threshold {params.threshold} &> {log}
65
        """
Laura Denies's avatar
Laura Denies committed
66
67

def aggregate_plasmid_input(wildcards):
68
69
    checkpoint_output= checkpoints.splitplasmid.get(**wildcards).output.split
    return expand(
70
        "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_plasflow_prediction.tsv",
71
        datadir=wildcards.datadir,
72
        project=wildcards.project,
73
        sample=wildcards.sample,
74
        file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
75
    )
Laura Denies's avatar
Laura Denies committed
76
77

rule Plasmid_aggregate:
78
79
80
    input:
        aggregate_plasmid_input
    output:
81
        "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_aggregated.tsv"
82
    params:
83
84
85
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["short"],
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
Laura Denies's avatar
Laura Denies committed
86
    message: "Aggregate PlasFlow results on the following sample(s): {wildcards.project} - {wildcards.sample}"
87
88
    shell:
        "cat {input} > {output}"
Laura Denies's avatar
Laura Denies committed
89
90

rule select:
91
    input:
92
        "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_aggregated.tsv"
93
    output:
94
        "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv"
95
    params:
96
97
98
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["short"],
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
99
100
101
102
    shell:
        """
        cut -f 3,6 {input} > {output}
        """
103
104
105
106
107
108
109
110
111
112
113
114
115
116

rule run_MOBsuite:
    input: "{datadir}/{project}/contig_splitted/{sample}/{file_i}.fasta"
    output:         
        temp("{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt")
    log:
        "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_MOB_suite_prediction.log"
    params: 
        outdir="{datadir}",
	runtime=config["pathofact"]["runtime"]["long"]
    threads:
        config["pathofact"]["mem"]["big_mem_cores"]
    conda:
        "../../envs/MOB_suite.yaml"
Laura Denies's avatar
Laura Denies committed
117
    message: "Executing MOB_suite with {threads} threads on the following sample(s): {wildcards.project} - {wildcards.sample}"
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
    shell: "mob_typer --multi --infile {input} --out_file {wildcards.datadir}/{wildcards.project}/MGE/plasmid/MOB_suite/{wildcards.sample}/{wildcards.file_i}_MOB_suite_prediction.txt -n {threads}"

def aggregate_MOBsuite(wildcards):
    checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
    return expand(
        "{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt",
        datadir=wildcards.datadir,
        project=wildcards.project,
        sample=wildcards.sample,
        file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
    )

rule aggregate_MOBsuite:
    input: aggregate_MOBsuite
    output:
        "{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"
    params:
        outdir="{datadir}",
        runtime=config["pathofact"]["runtime"]["short"],
        mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
Laura Denies's avatar
Laura Denies committed
138
    message: "Aggregate MOB_suite results on the following sample(s): {wildcards.project} - {wildcards.sample}"
139
140
141
    shell:
        "cat {input} > {output}"