post-analysis.smk 9.03 KB
Newer Older
Antonie Vietor's avatar
Antonie Vietor committed
1
2
3
4
5
6
rule preseq_lc_extrap:
    input:
        "results/sam-view/{sample}.bam"
    output:
        "results/preseq/{sample}.lc_extrap"
    params:
Antonie Vietor's avatar
Antonie Vietor committed
7
         "-v {} -seed 1".format( "" if config["single_end"] else "-pe" )
Antonie Vietor's avatar
Antonie Vietor committed
8
9
10
11
12
13
14
    log:
        "logs/preseq/{sample}.log"
    wrapper:
        "0.64.0/bio/preseq/lc_extrap"

rule collect_multiple_metrics:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
15
         bam="results/filtered/{sample}.sorted.bam",
Antonie Vietor's avatar
Antonie Vietor committed
16
         ref="resources/ref/genome.fasta"
17
    output:
Antonie Vietor's avatar
Antonie Vietor committed
18
19
20
21
        # Through the output file extensions the different tools for the metrics can be selected
        # so that it is not necessary to specify them under params with the "PROGRAM" option.
        # Usable extensions (and which tools they implicitly call) are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/picard/collectmultiplemetrics.html.
Antonie Vietor's avatar
Antonie Vietor committed
22
23
24
25
26
27
28
29
        [
           "{path}{sample}.insert_size_metrics",
            report(
                "{path}{sample}.insert_size_histogram.pdf",
                caption="../report/plot_insert_size_histogram_picard_mm.rst",
                category="Multiple Metrics (picard)"
            )
        ] if not config["single_end"] else [],
Antonie Vietor's avatar
Antonie Vietor committed
30
31
32
33
34
        multiext("{path}{sample}",
                 ".alignment_summary_metrics",
                 ".base_distribution_by_cycle_metrics",
                 ".quality_by_cycle_metrics",
                 ".quality_distribution_metrics",
Antonie Vietor's avatar
Antonie Vietor committed
35
                 ),
36
37
38
39
40
41
42
43
44
45
46
47
        report(
            "{path}{sample}.base_distribution_by_cycle.pdf",
            caption="../report/plot_base_distribution_by_cycle_picard_mm.rst",
            category="Multiple Metrics (picard)"),
        report(
            "{path}{sample}.quality_by_cycle.pdf",
            caption="../report/plot_quality_by_cycle_picard_mm.rst",
            category="Multiple Metrics (picard)"),
        report(
            "{path}{sample}.quality_distribution.pdf",
            caption="../report/plot_quality_distribution_picard_mm.rst",
            category="Multiple Metrics (picard)")
Antonie Vietor's avatar
Antonie Vietor committed
48
49
50
51
52
53
54
    resources:
        # This parameter (default 3 GB) can be used to limit the total resources a pipeline is allowed to use, see:
        #     https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#resources
        mem_gb=3
    log:
        "logs/picard/{path}{sample}.log"
    params:
55
56
        # optional parameters
        "{} ".format(config["params"]["collect-multiple-metrics"])
Antonie Vietor's avatar
Antonie Vietor committed
57
58
59
60
61
    wrapper:
        "0.64.0/bio/picard/collectmultiplemetrics"

rule genomecov:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
62
63
64
65
66
67
68
        "results/filtered/{sample}.sorted.bam",
        flag_stats=expand("results/{step}/{{sample}}.sorted.{step}.flagstat",
            step= "bamtools_filtered" if config["single_end"]
            else "orph_rm_pe"),
        stats=expand("results/{step}/{{sample}}.sorted.{step}.stats.txt",
            step= "bamtools_filtered" if config["single_end"]
            else "orph_rm_pe"),
Antonie Vietor's avatar
Antonie Vietor committed
69
70
71
72
    output:
        pipe("results/bed_graph/{sample}.bedgraph")
    log:
        "logs/bed_graph/{sample}.log"
Antonie Vietor's avatar
Antonie Vietor committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    params:
        lambda w, input:
            "-bg -scale $(grep 'mapped (' {flagstats_file} | awk '{{print 1000000/$1}}') {pe_fragment} {extend}".format(
            flagstats_file=input.flag_stats,
            pe_fragment="" if config["single_end"] else "-pc",
            # Estimated fragment size used to extend single-end reads
            extend=
                "-fs $(grep ^SN {stats} | "
                "cut -f 2- | "
                "grep -m1 'average length:' | "
                "awk '{{print $NF}}')".format(
                stats=input.stats)
            if config["single_end"] else ""
        )
Antonie Vietor's avatar
Antonie Vietor committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    wrapper:
        "0.64.0/bio/bedtools/genomecov"

rule sort_genomecov:
    input:
        "results/bed_graph/{sample}.bedgraph"
    output:
        "results/bed_graph/{sample}.sorted.bedgraph"
    log:
        "logs/sort_genomecov/{sample}.log"
    shell:
        "sort -k1,1 -k2,2n {input} > {output} 2> {log}"

rule bedGraphToBigWig:
    input:
        bedGraph="results/bed_graph/{sample}.sorted.bedgraph",
        chromsizes="resources/ref/genome.chrom.sizes"
    output:
        "results/big_wig/{sample}.bigWig"
    log:
        "logs/big_wig/{sample}.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/ucsc/bedGraphToBigWig"

Antonie Vietor's avatar
Antonie Vietor committed
113
rule create_igv_bigwig:
Antonie Vietor's avatar
Antonie Vietor committed
114
115
116
117
    input:
        "resources/ref/genome.bed",
        expand("results/big_wig/{sample}.bigWig", sample=samples.index)
    output:
Antonie Vietor's avatar
Antonie Vietor committed
118
        "results/IGV/big_wig/merged_library.bigWig.igv.txt"
Antonie Vietor's avatar
Antonie Vietor committed
119
    log:
Antonie Vietor's avatar
Antonie Vietor committed
120
        "logs/igv/create_igv_bigwig.log"
Antonie Vietor's avatar
Antonie Vietor committed
121
    shell:
122
123
        "find {input} -type f -name '*.bigWig' -exec "
        "echo -e 'results/IGV/big_wig/\"{{}}\"\t0,0,178' \;  > {output} 2> {log}"
Antonie Vietor's avatar
Antonie Vietor committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

rule compute_matrix:
    input:
         bed="resources/ref/genome.bed",
         bigwig=expand("results/big_wig/{sample}.bigWig", sample=samples.index)
    output:
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/computematrix.html.
        matrix_gz="results/deeptools/matrix_files/matrix.gz",
        matrix_tab="results/deeptools/matrix_files/matrix.tab"
    log:
        "logs/deeptools/compute_matrix.log"
    threads: 2
    params:
        command="scale-regions",
        extra="--regionBodyLength 1000 "
              "--beforeRegionStartLength 3000 "
              "--afterRegionStartLength 3000 "
Antonie Vietor's avatar
Antonie Vietor committed
142
              "--missingDataAsZero " # added to prevent black output in the heatmap (plot_heatmap rule) https://github.com/deeptools/deepTools/issues/793
Antonie Vietor's avatar
Antonie Vietor committed
143
144
145
146
147
148
149
150
151
              "--skipZeros "
              "--smartLabels "
              "--numberOfProcessors 2 "
    wrapper:
        "0.64.0/bio/deeptools/computematrix"

rule plot_profile:
    input:
         "results/deeptools/matrix_files/matrix.gz"
152
    output:
Antonie Vietor's avatar
Antonie Vietor committed
153
154
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/plotprofile.html.
155
156
157
158
        plot_img=report(
            "results/deeptools/plot_profile.pdf",
            caption="../report/plot_profile_deeptools.rst",
            category="GenomicRegions"),
Antonie Vietor's avatar
Antonie Vietor committed
159
160
161
162
163
164
165
166
167
168
169
        data="results/deeptools/plot_profile_data.tab"
    log:
        "logs/deeptools/plot_profile.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/deeptools/plotprofile"

rule plot_heatmap:
    input:
         "results/deeptools/matrix_files/matrix.gz"
170
    output:
Antonie Vietor's avatar
Antonie Vietor committed
171
172
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/plotheatmap.html.
173
174
175
176
        heatmap_img=report(
            "results/deeptools/heatmap.pdf",
            caption="../report/plot_heatmap_deeptools.rst",
            category="Heatmaps"),
Antonie Vietor's avatar
Antonie Vietor committed
177
178
179
180
181
182
183
184
185
186
        heatmap_matrix="results/deeptools/heatmap_matrix.tab"
    log:
        "logs/deeptools/heatmap.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/deeptools/plotheatmap"

rule phantompeakqualtools:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
187
        "results/filtered/{sample}.sorted.bam"
Antonie Vietor's avatar
Antonie Vietor committed
188
    output:  #ToDo: add description to report caption
Antonie Vietor's avatar
Antonie Vietor committed
189
190
        res_phantom="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
        r_data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
191
192
193
194
        plot=report(
            "results/phantompeakqualtools/{sample}.phantompeak.pdf",
            caption="../report/plot_phantompeak_phantompeakqualtools.rst",
            category="Phantompeak")
Antonie Vietor's avatar
Antonie Vietor committed
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
    threads:
        8
    log:
        "logs/phantompeakqualtools/{sample}.phantompeak.log"
    conda:
        "../envs/phantompeakqualtools.yaml"
    shell:
        "( Rscript -e \"library(caTools); source('../workflow/scripts/run_spp.R')\" "
        "  -c={input} -savp={output.plot} -savd={output.r_data} "
        "  -out={output.res_phantom} -p={threads} 2>&1 ) >{log}"

rule phantompeak_correlation:
    input:
        data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
        header="../workflow/header/spp_corr_header.txt"
    output:
        "results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv"
    log:
        "logs/phantompeakqualtools/correlation/{sample}.spp_corr.log"
    conda:
        "../envs/phantom_corr.yaml"
    script:
        "../scripts/phantompeak_correlation.R"

rule phantompeak_multiqc:
    # NSC (Normalized strand cross-correlation) and RSC (relative strand cross-correlation) metrics use cross-correlation
    input:
        data="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
        nsc_header="../workflow/header/nsc_header.txt",
        rsc_header="../workflow/header/rsc_header.txt"
    output:
        nsc="results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
        rsc="results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
    log:
        "logs/phantompeakqualtools/correlation/{sample}.nsc_rsc.log"
    conda:
        "../envs/gawk.yaml"
    shell:
        "( gawk -v OFS='\t' '{{print $1, $9}}' {input.data} | cat {input.nsc_header} - > {output.nsc} && "
        "  gawk -v OFS='\t' '{{print $1, $10}}' {input.data} | cat {input.rsc_header} - > {output.rsc} 2>&1 ) >{log}"