Verified Commit 88cce103 authored by Aurélien Ginolhac's avatar Aurélien Ginolhac 🚴
Browse files

peak ana;lysis at the sample level instead of global

parent 335fee38
## v0.1.1
- adaptative peak calling according to the histone mark
## v0.1.0
- added a changelog
......
......@@ -30,8 +30,6 @@ trimming:
others: "--gzip --trimqualities --trimns --minlength 35"
params:
# choose "narrow" or "broad" for macs2 callpeak analysis, for documentation and source code please see https://github.com/macs3-project/MACS
peak-analysis: "narrow"
# Number of biological replicates required from a given condition for a peak to contribute to a consensus peak
min-reps-consensus: 1
callpeak:
......
sample group batch_effect control antibody
sample group batch_effect control antibody peak-analysis
Spt5_IN SptA batch1 Spt
Spt5 SptA batch1 Spt5_IN Spt
Spt5 SptA batch1 Spt5_IN Spt narrow
sample group batch_effect control antibody
IN_FY4 WT batch1 K4
FY4 WT batch1 IN_FY4 K4
IN_FY4dld3 dld3 batch1 K4
FY4_dld3 dld3 batch1 IN_FY4dld3 K4
\ No newline at end of file
sample group batch_effect control antibody peak-analysis
IN_FY4 WT batch1 K4 narrow
FY4 WT batch1 IN_FY4 K4 narrow
IN_FY4dld3 dld3 batch1 K4 narrow
FY4_dld3 dld3 batch1 IN_FY4dld3 K4 narrow
\ No newline at end of file
**HOMER** peak annotation summary plot is generated by calculating the proportion of
{{snakemake.config["params"]["peak-analysis"]}} peaks assigned to genomic features by
peaks assigned to genomic features by
`HOMER annotatePeaks.pl <http://homer.ucsd.edu/homer/ngs/annotation.html>`_.
**MACS2 FRiP score** is generated by calculating the fraction of all mapped reads that fall into the
`MACS2 <https://github.com/taoliu/MACS>`_ called {{snakemake.config["params"]["peak-analysis"]}} peak regions.
`MACS2 <https://github.com/taoliu/MACS>`_ called peak regions.
A read must overlap a peak by at least 20% to be counted to `FRiP score <https://www.encodeproject.org/data-standards/terms/>`_.
**MACS2 peak count** is calculated from total number of {{snakemake.config["params"]["peak-analysis"]}} peaks called by
**MACS2 peak count** is calculated from total number of peaks called by
`MACS2 <https://github.com/taoliu/MACS>`_.
......@@ -95,15 +95,36 @@ def get_sample_control_peak_combinations_list():
sam_contr = []
for sample in samples.index:
if not is_control(sample):
sam_contr.extend(expand(["{sample}-{control}.{peak}"], sample = sample, control = samples.loc[sample]["control"], peak = config["params"]["peak-analysis"]))
sam_contr.extend(expand(["{sample}-{control}.{peak}"], sample = sample, control = samples.loc[sample]["control"], peak = samples.loc[sample]["peak-analysis"]))
return sam_contr
def get_peak_combinations_list():
peak_contr = []
for sample in samples.index:
if not is_control(sample):
sam_contr.extend(expand(["{peak}"], peak = samples.loc[sample]["peak-analysis"]))
return peak_contr
def get_peaks_count_plot_input():
return expand(
"results/macs2_callpeak/peaks_count/{sam_contr_peak}.peaks_count.tsv",
sam_contr_peak = get_sample_control_peak_combinations_list()
)
def get_peaks_output():
peak_out = []
for sample in samples.index:
if not is_control(sample):
if samples.loc[sample]["peak-analysis"] == "broad":
peak_out.extend(expand(["results/macs2_callpeak/{sample}-{control}.{peak}_peaks.gappedPeak"], sample = sample, control = samples.loc[sample]["control"], peak = samples.loc[sample]["peak-analysis"]))
if samples.loc[sample]["peak-analysis"] == "nrrow":
peak_out.extend(expand(["results/macs2_callpeak/{sample}-{control}.{peak}_summits.bed"], sample = sample, control = samples.loc[sample]["control"], peak = samples.loc[sample]["peak-analysis"]))
else:
raise ValueError(
f"for {sample}, peak-analysis must be either narrow or broad (not {samples.loc[sample]["peak-analysis"]}\n"
)
return peak_out
def get_frip_score_input():
return expand(
"results/bedtools_intersect/{sam_contr_peak}.peaks_frip.tsv",
......@@ -113,7 +134,7 @@ def get_frip_score_input():
def get_macs2_peaks():
return expand(
"results/macs2_callpeak/{sam_contr_peak}_peaks.{peak}Peak",
sam_contr_peak = get_sample_control_peak_combinations_list(), peak =config["params"]["peak-analysis"]
sam_contr_peak = get_sample_control_peak_combinations_list(), peak = get_peak_combinations_list()
)
def get_plot_homer_annotatepeaks_input():
......@@ -168,7 +189,7 @@ def get_igv_input():
],
sample_control_peak=get_sample_control_peak_combinations_list(),
peak=config["params"]["peak-analysis"]
peak=get_peak_combinations_list()
)
)
......@@ -180,7 +201,7 @@ def get_igv_input():
#"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.deseq2.FDR_0.05.igv.txt"
],
antibody=antibody,
peak=config["params"]["peak-analysis"]
peak=get_peak_combinations_list()
)
)
return igv_input
......@@ -204,7 +225,7 @@ def get_files_for_igv():
"results/macs2_merged_expand/{antibody}.consensus_{peak}-peaks.boolean.bed"
],
sample_control_peak=get_sample_control_peak_combinations_list(),
peak=config["params"]["peak-analysis"],
peak=get_peak_combinations_list()
antibody=get_unique_antibodies()
)
)
......@@ -289,7 +310,7 @@ def get_multiqc_input(wildcards):
],
sample = sample,
control = samples.loc[sample]["control"],
peak = config["params"]["peak-analysis"]
peak = samples.loc[sample]["peak-analysis"]
)
)
if config["params"]["lc_extrap"]["activate"]:
......@@ -394,7 +415,7 @@ def all_input(wildcards):
],
sample = sample,
control = samples.loc[sample]["control"],
peak = config["params"]["peak-analysis"]
peak = samples.loc[sample]["peak-analysis"]
)
)
if do_peak_qc:
......@@ -404,7 +425,7 @@ def all_input(wildcards):
"results/homer/plots/plot_{peak}_annotatepeaks_summary.txt",
"results/homer/plots/plot_{peak}_annotatepeaks.pdf"
],
peak = config["params"]["peak-analysis"]
peak = samples.loc[sample]["peak-analysis"]
)
)
if do_consensus_peak:
......@@ -416,7 +437,7 @@ def all_input(wildcards):
"results/macs2_merged_expand/{antibody}.consensus_{peak}-peaks.boolean.saf",
"results/macs2_merged_expand/plots/{antibody}.consensus_{peak}-peaks.boolean.intersect.plot.pdf"
],
peak = config["params"]["peak-analysis"],
peak = samples.loc[sample]["peak-analysis"],
antibody = antibody
)
)
......@@ -427,7 +448,7 @@ def all_input(wildcards):
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.annotatePeaks.txt",
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.boolean.annotatePeaks.txt"
],
peak = config["params"]["peak-analysis"],
peak = samples.loc[sample]["peak-analysis"],
antibody = antibody
)
)
......@@ -445,30 +466,9 @@ def all_input(wildcards):
],
sample = sample,
control = samples.loc[sample]["control"],
peak = config["params"]["peak-analysis"],
peak = samples.loc[sample]["peak-analysis"],
antibody = samples.loc[sample]["antibody"]
)
)
if config["params"]["peak-analysis"] == "broad":
wanted_input.extend(
expand(
[
"results/macs2_callpeak/{sample}-{control}.{peak}_peaks.gappedPeak"
],
sample = sample,
control = samples.loc[sample]["control"],
peak = config["params"]["peak-analysis"]
)
)
if config["params"]["peak-analysis"] == "narrow":
wanted_input.extend(
expand(
[
"results/macs2_callpeak/{sample}-{control}.{peak}_summits.bed"
],
sample = sample,
control = samples.loc[sample]["control"],
peak = config["params"]["peak-analysis"]
)
)
wanted_input.extend(get_peaks_output())samples.loc[sample]["peak-analysis"]
return wanted_input
......@@ -36,7 +36,7 @@ rule macs2_merged_expand:
bool_intersect="results/macs2_merged_expand/{antibody}.consensus_{peak}-peaks.boolean.intersect.txt"
params:
sample_control_peak=get_sample_control_peak_combinations_list(),
narrow_param="--is_narrow_peak" if config["params"]["peak-analysis"] == "narrow" else "",
narrow_param="--is_narrow_peak" if get_peak_combinations_list() == "narrow" else "",
min_reps_consensus=config["params"]["min-reps-consensus"]
log:
"logs/macs2_merged_expand/{antibody}.consensus_{peak}-peaks.boolean.log"
......
......@@ -49,9 +49,6 @@ rule igv_files_to_report:
rule collect_igv_report_session_files:
input:
igv_data=get_files_for_igv(),
#deseq2_files=directory(expand("results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks",
#antibody=get_unique_antibodies(), peak=config["params"]["peak-analysis"])),
#fasta="resources/ref/genome.fasta",
igv_session="results/IGV/report_igv_session.xml"
output:
temp(directory("results/IGV/report_igv_session"))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment