post-analysis.smk 9 KB
Newer Older
Antonie Vietor's avatar
Antonie Vietor committed
1
2
3
4
5
6
rule preseq_lc_extrap:
    input:
        "results/sam-view/{sample}.bam"
    output:
        "results/preseq/{sample}.lc_extrap"
    params:
Antonie Vietor's avatar
Antonie Vietor committed
7
         "-v {} -seed 1".format( "" if config["single_end"] else "-pe" )
Antonie Vietor's avatar
Antonie Vietor committed
8
9
    log:
        "logs/preseq/{sample}.log"
10
11
12
13
    shell:
        """
        preseq lc_extrap {params} -output {output} {input}  2> {log}
        """
Antonie Vietor's avatar
Antonie Vietor committed
14
15
16

rule collect_multiple_metrics:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
17
         bam="results/filtered/{sample}.sorted.bam",
Antonie Vietor's avatar
Antonie Vietor committed
18
         ref="resources/ref/genome.fasta"
19
    output:
Antonie Vietor's avatar
Antonie Vietor committed
20
21
22
23
        # Through the output file extensions the different tools for the metrics can be selected
        # so that it is not necessary to specify them under params with the "PROGRAM" option.
        # Usable extensions (and which tools they implicitly call) are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/picard/collectmultiplemetrics.html.
Antonie Vietor's avatar
Antonie Vietor committed
24
25
26
27
28
29
30
31
        [
           "{path}{sample}.insert_size_metrics",
            report(
                "{path}{sample}.insert_size_histogram.pdf",
                caption="../report/plot_insert_size_histogram_picard_mm.rst",
                category="Multiple Metrics (picard)"
            )
        ] if not config["single_end"] else [],
Antonie Vietor's avatar
Antonie Vietor committed
32
33
34
35
36
        multiext("{path}{sample}",
                 ".alignment_summary_metrics",
                 ".base_distribution_by_cycle_metrics",
                 ".quality_by_cycle_metrics",
                 ".quality_distribution_metrics",
Antonie Vietor's avatar
Antonie Vietor committed
37
                 ),
38
39
40
41
42
43
44
45
46
47
48
49
        report(
            "{path}{sample}.base_distribution_by_cycle.pdf",
            caption="../report/plot_base_distribution_by_cycle_picard_mm.rst",
            category="Multiple Metrics (picard)"),
        report(
            "{path}{sample}.quality_by_cycle.pdf",
            caption="../report/plot_quality_by_cycle_picard_mm.rst",
            category="Multiple Metrics (picard)"),
        report(
            "{path}{sample}.quality_distribution.pdf",
            caption="../report/plot_quality_distribution_picard_mm.rst",
            category="Multiple Metrics (picard)")
Antonie Vietor's avatar
Antonie Vietor committed
50
51
52
53
54
55
56
    resources:
        # This parameter (default 3 GB) can be used to limit the total resources a pipeline is allowed to use, see:
        #     https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#resources
        mem_gb=3
    log:
        "logs/picard/{path}{sample}.log"
    params:
57
58
        # optional parameters
        "{} ".format(config["params"]["collect-multiple-metrics"])
Antonie Vietor's avatar
Antonie Vietor committed
59
60
61
62
63
    wrapper:
        "0.64.0/bio/picard/collectmultiplemetrics"

rule genomecov:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
64
65
66
67
68
69
70
        "results/filtered/{sample}.sorted.bam",
        flag_stats=expand("results/{step}/{{sample}}.sorted.{step}.flagstat",
            step= "bamtools_filtered" if config["single_end"]
            else "orph_rm_pe"),
        stats=expand("results/{step}/{{sample}}.sorted.{step}.stats.txt",
            step= "bamtools_filtered" if config["single_end"]
            else "orph_rm_pe"),
Antonie Vietor's avatar
Antonie Vietor committed
71
72
73
74
    output:
        pipe("results/bed_graph/{sample}.bedgraph")
    log:
        "logs/bed_graph/{sample}.log"
Antonie Vietor's avatar
Antonie Vietor committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
    params:
        lambda w, input:
            "-bg -scale $(grep 'mapped (' {flagstats_file} | awk '{{print 1000000/$1}}') {pe_fragment} {extend}".format(
            flagstats_file=input.flag_stats,
            pe_fragment="" if config["single_end"] else "-pc",
            # Estimated fragment size used to extend single-end reads
            extend=
                "-fs $(grep ^SN {stats} | "
                "cut -f 2- | "
                "grep -m1 'average length:' | "
                "awk '{{print $NF}}')".format(
                stats=input.stats)
            if config["single_end"] else ""
        )
Antonie Vietor's avatar
Antonie Vietor committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    wrapper:
        "0.64.0/bio/bedtools/genomecov"

rule sort_genomecov:
    input:
        "results/bed_graph/{sample}.bedgraph"
    output:
        "results/bed_graph/{sample}.sorted.bedgraph"
    log:
        "logs/sort_genomecov/{sample}.log"
    shell:
        "sort -k1,1 -k2,2n {input} > {output} 2> {log}"

rule bedGraphToBigWig:
    input:
        bedGraph="results/bed_graph/{sample}.sorted.bedgraph",
        chromsizes="resources/ref/genome.chrom.sizes"
    output:
AntonieV's avatar
AntonieV committed
107
        "results/big_wig/{sample}.bigWig"
Antonie Vietor's avatar
Antonie Vietor committed
108
109
110
111
112
113
114
    log:
        "logs/big_wig/{sample}.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/ucsc/bedGraphToBigWig"

Antonie Vietor's avatar
Antonie Vietor committed
115
rule create_igv_bigwig:
Antonie Vietor's avatar
Antonie Vietor committed
116
117
118
119
    input:
        "resources/ref/genome.bed",
        expand("results/big_wig/{sample}.bigWig", sample=samples.index)
    output:
Antonie Vietor's avatar
Antonie Vietor committed
120
        "results/IGV/big_wig/merged_library.bigWig.igv.txt"
Antonie Vietor's avatar
Antonie Vietor committed
121
    log:
Antonie Vietor's avatar
Antonie Vietor committed
122
        "logs/igv/create_igv_bigwig.log"
Antonie Vietor's avatar
Antonie Vietor committed
123
    shell:
124
        "find {input} -type f -name '*.bigWig' -exec "
AntonieV's avatar
AntonieV committed
125
        "echo -e '{{}}\t0,0,178' \;  > {output} 2> {log}"
Antonie Vietor's avatar
Antonie Vietor committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

rule compute_matrix:
    input:
         bed="resources/ref/genome.bed",
         bigwig=expand("results/big_wig/{sample}.bigWig", sample=samples.index)
    output:
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/computematrix.html.
        matrix_gz="results/deeptools/matrix_files/matrix.gz",
        matrix_tab="results/deeptools/matrix_files/matrix.tab"
    log:
        "logs/deeptools/compute_matrix.log"
    threads: 2
    params:
        command="scale-regions",
        extra="--regionBodyLength 1000 "
              "--beforeRegionStartLength 3000 "
              "--afterRegionStartLength 3000 "
Antonie Vietor's avatar
Antonie Vietor committed
144
              "--missingDataAsZero " # added to prevent black output in the heatmap (plot_heatmap rule) https://github.com/deeptools/deepTools/issues/793
Antonie Vietor's avatar
Antonie Vietor committed
145
146
147
148
149
150
151
152
153
              "--skipZeros "
              "--smartLabels "
              "--numberOfProcessors 2 "
    wrapper:
        "0.64.0/bio/deeptools/computematrix"

rule plot_profile:
    input:
         "results/deeptools/matrix_files/matrix.gz"
154
    output:
Antonie Vietor's avatar
Antonie Vietor committed
155
156
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/plotprofile.html.
157
158
159
160
        plot_img=report(
            "results/deeptools/plot_profile.pdf",
            caption="../report/plot_profile_deeptools.rst",
            category="GenomicRegions"),
Antonie Vietor's avatar
Antonie Vietor committed
161
162
163
164
165
166
167
168
169
170
171
        data="results/deeptools/plot_profile_data.tab"
    log:
        "logs/deeptools/plot_profile.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/deeptools/plotprofile"

rule plot_heatmap:
    input:
         "results/deeptools/matrix_files/matrix.gz"
172
    output:
Antonie Vietor's avatar
Antonie Vietor committed
173
174
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/plotheatmap.html.
175
176
177
178
        heatmap_img=report(
            "results/deeptools/heatmap.pdf",
            caption="../report/plot_heatmap_deeptools.rst",
            category="Heatmaps"),
Antonie Vietor's avatar
Antonie Vietor committed
179
180
181
182
183
184
185
186
187
188
        heatmap_matrix="results/deeptools/heatmap_matrix.tab"
    log:
        "logs/deeptools/heatmap.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/deeptools/plotheatmap"

rule phantompeakqualtools:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
189
        "results/filtered/{sample}.sorted.bam"
190
    output:
Antonie Vietor's avatar
Antonie Vietor committed
191
192
        res_phantom="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
        r_data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
193
194
195
196
        plot=report(
            "results/phantompeakqualtools/{sample}.phantompeak.pdf",
            caption="../report/plot_phantompeak_phantompeakqualtools.rst",
            category="Phantompeak")
Antonie Vietor's avatar
Antonie Vietor committed
197
198
199
200
201
202
203
    threads:
        8
    log:
        "logs/phantompeakqualtools/{sample}.phantompeak.log"
    conda:
        "../envs/phantompeakqualtools.yaml"
    shell:
204
        "( Rscript -e \"library(caTools); source('workflow/scripts/run_spp.R')\" "
Antonie Vietor's avatar
Antonie Vietor committed
205
206
207
208
209
210
        "  -c={input} -savp={output.plot} -savd={output.r_data} "
        "  -out={output.res_phantom} -p={threads} 2>&1 ) >{log}"

rule phantompeak_correlation:
    input:
        data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
211
        header="workflow/header/spp_corr_header.txt"
Antonie Vietor's avatar
Antonie Vietor committed
212
213
214
215
216
217
218
219
220
221
222
223
224
    output:
        "results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv"
    log:
        "logs/phantompeakqualtools/correlation/{sample}.spp_corr.log"
    conda:
        "../envs/phantom_corr.yaml"
    script:
        "../scripts/phantompeak_correlation.R"

rule phantompeak_multiqc:
    # NSC (Normalized strand cross-correlation) and RSC (relative strand cross-correlation) metrics use cross-correlation
    input:
        data="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
225
226
        nsc_header="workflow/header/nsc_header.txt",
        rsc_header="workflow/header/rsc_header.txt"
Antonie Vietor's avatar
Antonie Vietor committed
227
228
229
230
231
232
233
234
235
236
    output:
        nsc="results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
        rsc="results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
    log:
        "logs/phantompeakqualtools/correlation/{sample}.nsc_rsc.log"
    conda:
        "../envs/gawk.yaml"
    shell:
        "( gawk -v OFS='\t' '{{print $1, $9}}' {input.data} | cat {input.nsc_header} - > {output.nsc} && "
        "  gawk -v OFS='\t' '{{print $1, $10}}' {input.data} | cat {input.rsc_header} - > {output.rsc} 2>&1 ) >{log}"