Toxin.smk 2.48 KB
Newer Older
Laura Denies's avatar
Laura Denies committed
1
2
3
4
5
#Toxin

import glob
import os

6
7
PROJECT = config["project"]
INPUT   = config["input_file"]
Laura Denies's avatar
Laura Denies committed
8

9
# HMM scan
Laura Denies's avatar
Laura Denies committed
10
11
12
13
rule run_HMM:
    input:
        hmm=config["hmm_file"],
        renamed="{OUTDIR}/{project}/splitted/{input_file}/{file_i}.faa"
14
15
    output:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmmscan"
16
17
    log:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.log"
18
    message:
19
        "Run HMM scan on {input.renamed} to generate {output}"
20
21
    params:
        outdir="{OUTDIR}"
22
23
    conda:
        "../../envs/HMMER.yaml"
24
25
26
27
    threads:
        1
    shell:
        """
28
        {config[hmmscan_tool]} --cpu {threads} --noali --notextw --tblout {output} {input.hmm} {input.renamed} &> {log}
Laura Denies's avatar
Laura Denies committed
29
30
31
32
        """

# Adjust HMM results to correct format
rule HMM_correct_format:
33
34
35
36
37
38
39
40
41
42
    input:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmmscan"
    output:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmm.csv"
    message:
        "Adjust {input} to correct format: {output}"
    params:
        outdir="{OUTDIR}"
    shell:
        """
43
        sed '/^#/ d' {input} | sed 's/ \+/\\t/g' > {output}
Laura Denies's avatar
Laura Denies committed
44
45
46
        """

def aggregate_hmm(wildcards):
47
48
49
50
51
52
53
54
    checkpoint_output = checkpoints.splitting.get(**wildcards).output.splits
    return expand(
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}/{file_i}.hmm.csv",
        OUTDIR=wildcards.OUTDIR,
        project=wildcards.project,
        input_file=wildcards.input_file,
        file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.faa")).i
    )
Laura Denies's avatar
Laura Denies committed
55
56

rule HMM_correct_format_2:
57
58
59
60
61
62
63
64
    input:
        aggregate_hmm
    output:
        temp("{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}.Input_HMM_R_temp.csv")
    params:
        outdir="{OUTDIR}"
    shell:
        """
65
        cut -f 1,3,5,6 {input} | uniq > {output}
66
        """
Laura Denies's avatar
Laura Denies committed
67
68

rule HMM_correct_format_3:
69
70
71
72
73
74
75
76
77
78
79
    input:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}.Input_HMM_R_temp.csv"
    output:
        "{OUTDIR}/{project}/TOXIN/HMM_toxin/{input_file}.Input_HMM_R.csv"
    params:
        outdir="{OUTDIR}"
    shell:
        """
        echo "#Toxin" > {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header
        cat {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header {input} > {output}
        rm -rf {wildcards.OUTDIR}/{wildcards.project}/TOXIN/HMM_toxin/{wildcards.input_file}_header
80
        sed -i $'1 i\\\ Query_sequence\\tHMM_Name\\tSignificance_Evalue\\tScore' {output}    
81
        """