post-analysis.smk 8.94 KB
Newer Older
Antonie Vietor's avatar
Antonie Vietor committed
1
2
3
4
5
6
rule preseq_lc_extrap:
    input:
        "results/sam-view/{sample}.bam"
    output:
        "results/preseq/{sample}.lc_extrap"
    params:
Antonie Vietor's avatar
Antonie Vietor committed
7
         "-v {} -seed 1".format( "" if config["single_end"] else "-pe" )
Antonie Vietor's avatar
Antonie Vietor committed
8
9
10
11
12
13
14
    log:
        "logs/preseq/{sample}.log"
    wrapper:
        "0.64.0/bio/preseq/lc_extrap"

rule collect_multiple_metrics:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
15
         bam="results/filtered/{sample}.sorted.bam",
Antonie Vietor's avatar
Antonie Vietor committed
16
         ref="resources/ref/genome.fasta"
Antonie Vietor's avatar
Antonie Vietor committed
17
    output: #ToDo: add descriptions to report captions
Antonie Vietor's avatar
Antonie Vietor committed
18
19
20
21
        # Through the output file extensions the different tools for the metrics can be selected
        # so that it is not necessary to specify them under params with the "PROGRAM" option.
        # Usable extensions (and which tools they implicitly call) are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/picard/collectmultiplemetrics.html.
Antonie Vietor's avatar
Antonie Vietor committed
22
23
24
25
26
27
28
29
        [
           "{path}{sample}.insert_size_metrics",
            report(
                "{path}{sample}.insert_size_histogram.pdf",
                caption="../report/plot_insert_size_histogram_picard_mm.rst",
                category="Multiple Metrics (picard)"
            )
        ] if not config["single_end"] else [],
Antonie Vietor's avatar
Antonie Vietor committed
30
31
32
33
34
        multiext("{path}{sample}",
                 ".alignment_summary_metrics",
                 ".base_distribution_by_cycle_metrics",
                 ".quality_by_cycle_metrics",
                 ".quality_distribution_metrics",
Antonie Vietor's avatar
Antonie Vietor committed
35
                 ),
Antonie Vietor's avatar
Antonie Vietor committed
36
37
38
        report("{path}{sample}.base_distribution_by_cycle.pdf", caption="../report/plot_base_distribution_by_cycle_picard_mm.rst", category="Multiple Metrics (picard)"),
        report("{path}{sample}.quality_by_cycle.pdf", caption="../report/plot_quality_by_cycle_picard_mm.rst", category="Multiple Metrics (picard)"),
        report("{path}{sample}.quality_distribution.pdf", caption="../report/plot_quality_distribution_picard_mm.rst", category="Multiple Metrics (picard)")
Antonie Vietor's avatar
Antonie Vietor committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    resources:
        # This parameter (default 3 GB) can be used to limit the total resources a pipeline is allowed to use, see:
        #     https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#resources
        mem_gb=3
    log:
        "logs/picard/{path}{sample}.log"
    params:
        # optional parameters, TODO: move to config.yaml and load from there
        "VALIDATION_STRINGENCY=LENIENT "
    wrapper:
        "0.64.0/bio/picard/collectmultiplemetrics"

rule genomecov:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
53
54
55
56
57
58
59
        "results/filtered/{sample}.sorted.bam",
        flag_stats=expand("results/{step}/{{sample}}.sorted.{step}.flagstat",
            step= "bamtools_filtered" if config["single_end"]
            else "orph_rm_pe"),
        stats=expand("results/{step}/{{sample}}.sorted.{step}.stats.txt",
            step= "bamtools_filtered" if config["single_end"]
            else "orph_rm_pe"),
Antonie Vietor's avatar
Antonie Vietor committed
60
61
62
63
    output:
        pipe("results/bed_graph/{sample}.bedgraph")
    log:
        "logs/bed_graph/{sample}.log"
Antonie Vietor's avatar
Antonie Vietor committed
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    params:
        lambda w, input:
            "-bg -scale $(grep 'mapped (' {flagstats_file} | awk '{{print 1000000/$1}}') {pe_fragment} {extend}".format(
            flagstats_file=input.flag_stats,
            pe_fragment="" if config["single_end"] else "-pc",
            # Estimated fragment size used to extend single-end reads
            extend=
                "-fs $(grep ^SN {stats} | "
                "cut -f 2- | "
                "grep -m1 'average length:' | "
                "awk '{{print $NF}}')".format(
                stats=input.stats)
            if config["single_end"] else ""
        )
Antonie Vietor's avatar
Antonie Vietor committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
    wrapper:
        "0.64.0/bio/bedtools/genomecov"

rule sort_genomecov:
    input:
        "results/bed_graph/{sample}.bedgraph"
    output:
        "results/bed_graph/{sample}.sorted.bedgraph"
    log:
        "logs/sort_genomecov/{sample}.log"
    shell:
        "sort -k1,1 -k2,2n {input} > {output} 2> {log}"

rule bedGraphToBigWig:
    input:
        bedGraph="results/bed_graph/{sample}.sorted.bedgraph",
        chromsizes="resources/ref/genome.chrom.sizes"
    output:
        "results/big_wig/{sample}.bigWig"
    log:
        "logs/big_wig/{sample}.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/ucsc/bedGraphToBigWig"

Antonie Vietor's avatar
Antonie Vietor committed
104
rule create_igv_bigwig:
Antonie Vietor's avatar
Antonie Vietor committed
105
106
107
108
    input:
        "resources/ref/genome.bed",
        expand("results/big_wig/{sample}.bigWig", sample=samples.index)
    output:
Antonie Vietor's avatar
Antonie Vietor committed
109
        "results/IGV/big_wig/merged_library.bigWig.igv.txt"
Antonie Vietor's avatar
Antonie Vietor committed
110
    log:
Antonie Vietor's avatar
Antonie Vietor committed
111
        "logs/igv/create_igv_bigwig.log"
Antonie Vietor's avatar
Antonie Vietor committed
112
    shell:
Antonie Vietor's avatar
Antonie Vietor committed
113
        "find {input} -type f -name '*.bigWig' -exec echo -e 'results/IGV/big_wig/\"{{}}\"\t0,0,178' \;  > {output} 2> {log}"
Antonie Vietor's avatar
Antonie Vietor committed
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

rule compute_matrix:
    input:
         bed="resources/ref/genome.bed",
         bigwig=expand("results/big_wig/{sample}.bigWig", sample=samples.index)
    output:
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/computematrix.html.
        matrix_gz="results/deeptools/matrix_files/matrix.gz",
        matrix_tab="results/deeptools/matrix_files/matrix.tab"
    log:
        "logs/deeptools/compute_matrix.log"
    threads: 2
    params:
        command="scale-regions",
        extra="--regionBodyLength 1000 "
              "--beforeRegionStartLength 3000 "
              "--afterRegionStartLength 3000 "
Antonie Vietor's avatar
Antonie Vietor committed
132
              "--missingDataAsZero " # added to prevent black output in the heatmap (plot_heatmap rule) https://github.com/deeptools/deepTools/issues/793
Antonie Vietor's avatar
Antonie Vietor committed
133
134
135
136
137
138
139
140
141
              "--skipZeros "
              "--smartLabels "
              "--numberOfProcessors 2 "
    wrapper:
        "0.64.0/bio/deeptools/computematrix"

rule plot_profile:
    input:
         "results/deeptools/matrix_files/matrix.gz"
Antonie Vietor's avatar
Antonie Vietor committed
142
    output: #ToDo: add description to report caption
Antonie Vietor's avatar
Antonie Vietor committed
143
144
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/plotprofile.html.
Antonie Vietor's avatar
Antonie Vietor committed
145
        plot_img=report("results/deeptools/plot_profile.pdf", caption="../report/plot_profile_deeptools.rst", category="GenomicRegions"),
Antonie Vietor's avatar
Antonie Vietor committed
146
147
148
149
150
151
152
153
154
155
156
        data="results/deeptools/plot_profile_data.tab"
    log:
        "logs/deeptools/plot_profile.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/deeptools/plotprofile"

rule plot_heatmap:
    input:
         "results/deeptools/matrix_files/matrix.gz"
Antonie Vietor's avatar
Antonie Vietor committed
157
    output:  #ToDo: add description to report caption
Antonie Vietor's avatar
Antonie Vietor committed
158
159
        # Usable output variables, their extensions and which option they implicitly call are listed here:
        #         https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/deeptools/plotheatmap.html.
Antonie Vietor's avatar
Antonie Vietor committed
160
        heatmap_img=report("results/deeptools/heatmap.pdf", caption="../report/plot_heatmap_deeptools.rst", category="Heatmaps"),
Antonie Vietor's avatar
Antonie Vietor committed
161
162
163
164
165
166
167
168
169
170
        heatmap_matrix="results/deeptools/heatmap_matrix.tab"
    log:
        "logs/deeptools/heatmap.log"
    params:
        ""
    wrapper:
        "0.64.0/bio/deeptools/plotheatmap"

rule phantompeakqualtools:
    input:
Antonie Vietor's avatar
Antonie Vietor committed
171
        "results/filtered/{sample}.sorted.bam"
Antonie Vietor's avatar
Antonie Vietor committed
172
    output:  #ToDo: add description to report caption
Antonie Vietor's avatar
Antonie Vietor committed
173
174
        res_phantom="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
        r_data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
Antonie Vietor's avatar
Antonie Vietor committed
175
        plot=report("results/phantompeakqualtools/{sample}.phantompeak.pdf", caption="../report/plot_phantompeak_phantompeakqualtools.rst", category="Phantompeak")
Antonie Vietor's avatar
Antonie Vietor committed
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    threads:
        8
    log:
        "logs/phantompeakqualtools/{sample}.phantompeak.log"
    conda:
        "../envs/phantompeakqualtools.yaml"
    shell:
        "( Rscript -e \"library(caTools); source('../workflow/scripts/run_spp.R')\" "
        "  -c={input} -savp={output.plot} -savd={output.r_data} "
        "  -out={output.res_phantom} -p={threads} 2>&1 ) >{log}"

rule phantompeak_correlation:
    input:
        data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
        header="../workflow/header/spp_corr_header.txt"
    output:
        "results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv"
    log:
        "logs/phantompeakqualtools/correlation/{sample}.spp_corr.log"
    conda:
        "../envs/phantom_corr.yaml"
    script:
        "../scripts/phantompeak_correlation.R"

rule phantompeak_multiqc:
    # NSC (Normalized strand cross-correlation) and RSC (relative strand cross-correlation) metrics use cross-correlation
    input:
        data="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
        nsc_header="../workflow/header/nsc_header.txt",
        rsc_header="../workflow/header/rsc_header.txt"
    output:
        nsc="results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
        rsc="results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
    log:
        "logs/phantompeakqualtools/correlation/{sample}.nsc_rsc.log"
    conda:
        "../envs/gawk.yaml"
    shell:
        "( gawk -v OFS='\t' '{{print $1, $9}}' {input.data} | cat {input.nsc_header} - > {output.nsc} && "
        "  gawk -v OFS='\t' '{{print $1, $10}}' {input.data} | cat {input.rsc_header} - > {output.rsc} 2>&1 ) >{log}"