Toxin.snk 2.38 KB
Newer Older
Laura Denies's avatar
Laura Denies committed
1
2
3
4
5
#Toxin

import glob
import os

6
7
PROJECT = config["project"]
INPUT   = config["input_file"]
Laura Denies's avatar
Laura Denies committed
8

9
# HMM scan
Laura Denies's avatar
Laura Denies committed
10
11
12
13
rule run_HMM:
    input:
        hmm=config["hmm_file"],
        renamed="{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
14
15
16
17
18
19
    output:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmmscan"
    message:
        "Run HMM scan on {input[1]} to generate {output}"
    params:
        outdir="{OUTDIR}"
20
21
    conda:
        "../../envs/HMMER.yaml"
22
23
24
25
    threads:
        1
    shell:
        """
Laura Denies's avatar
Laura Denies committed
26
27
28
29
30
        {config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input[0]} {input[1]}
        """

# Adjust HMM results to correct format
rule HMM_correct_format:
31
32
33
34
35
36
37
38
39
40
    input:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmmscan"
    output:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmm.csv"
    message:
        "Adjust {input} to correct format: {output}"
    params:
        outdir="{OUTDIR}"
    shell:
        """
Laura Denies's avatar
Laura Denies committed
41
42
43
44
        sed '/^#/ d' {input} | sed 's/ \+/\t/g' > {output}
        """

def aggregate_hmm(wildcards):
45
46
47
48
49
50
51
52
    checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
    return expand(
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmm.csv",
        OUTDIR=wildcards.OUTDIR,
        project=wildcards.project,
        input_file=wildcards.input_file,
        file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.faa")).i
    )
Laura Denies's avatar
Laura Denies committed
53
54

rule HMM_correct_format_2:
55
56
57
58
59
60
61
62
    input:
        aggregate_hmm
    output:
        temp("{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}.Input_HMM_R_temp.csv")
    params:
        outdir="{OUTDIR}"
    shell:
        """
Laura Denies's avatar
Laura Denies committed
63
        cut -f 1,3,5,6 {input} |uniq >{output}
64
        """
Laura Denies's avatar
Laura Denies committed
65
66

rule HMM_correct_format_3:
67
68
69
70
71
72
73
74
75
76
77
78
79
    input:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}.Input_HMM_R_temp.csv"
    output:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}.Input_HMM_R.csv"
    params:
        outdir="{OUTDIR}"
    shell:
        """
        echo "#Toxin" > {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header
        cat {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header {input} > {output}
        rm -rf {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header
        sed -i $'1 i\\\ Query_sequence\tHMM_Name\tSignificance_Evalue\tScore' {output}    
        """