Verified Commit 8b82483c authored by Aurélien Ginolhac's avatar Aurélien Ginolhac 🚴
Browse files

inactivate deseq2 and use yeast data

parent 6156f635
...@@ -42,7 +42,7 @@ params: ...@@ -42,7 +42,7 @@ params:
q-value: q-value:
deeptools-plots: deeptools-plots:
# when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory. # when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
activate: False activate: True
lc_extrap: lc_extrap:
activate: False activate: False
picard_metrics: picard_metrics:
......
sample group batch_effect control antibody sample group batch_effect control antibody
A TNFa batch1 D p65 IN_FY4 WT batch1 K4
B TNFa batch2 D p65 FY4 WT batch1 IN_FY4 K4
C E2TNFa batch1 E p65 IN_FY4dld3 dld3 batch1 K4
D TNFa batch1 FY4_dld3 dld3 batch1 IN_FY4dld3 K4
E E2TNFa batch1 \ No newline at end of file
sample unit fq1 fq2 sra_accession platform sample unit fq1 fq2 sra_accession platform
A 1 data/single_end_test_data/A-1_chr21.fastq.gz ILLUMINA IN_FY4 1 data/2020-08-II/IN_FY4_A_Lasse_S22_R1_001.fastq.gz ILLUMINA
B 1 data/single_end_test_data/B-1_chr21.fastq.gz ILLUMINA IN_FY4 2 data/2020-08-II/IN_FY4_B_Lasse_S23_R1_001.fastq.gz ILLUMINA
C 1 data/single_end_test_data/C-1_chr21.fastq.gz ILLUMINA IN_FY4 3 data/2020-08-II/IN_FY4_C_Lasse_S24_R1_001.fastq.gz ILLUMINA
C 2 data/single_end_test_data/C-2_chr21.fastq.gz ILLUMINA IN_FY4 4 data/2020-08-II/IN_FY4_D_Lasse_S64_R1_001.fastq.gz ILLUMINA
D 1 data/single_end_test_data/D-1_chr21.fastq.gz ILLUMINA IN_FY4 5 data/2020-08-II/IN_FY4_E_Lasse_S65_R1_001.fastq.gz ILLUMINA
E 1 data/single_end_test_data/E-1_chr21.fastq.gz ILLUMINA IN_FY4 6 data/2020-08-II/IN_FY4_F_Lasse_S66_R1_001.fastq.gz ILLUMINA
FY4 1 data/2020-08-II/K4_FY4_A_Lasse_S1_R1_001.fastq.gz ILLUMINA
FY4 2 data/2020-08-II/K4_FY4_B_Lasse_S2_R1_001.fastq.gz ILLUMINA
FY4 3 data/2020-08-II/K4_FY4_C_Lasse_S3_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 1 data/2020-08-II/IN_FY4dld3_A_Lasse_S25_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 2 data/2020-08-II/IN_FY4dld3_B_Lasse_S26_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 3 data/2020-08-II/IN_FY4dld3_C_Lasse_S27_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 4 data/2020-08-II/IN_FY4dld3_D_Lasse_S67_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 5 data/2020-08-II/IN_FY4dld3_E_Lasse_S68_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 6 data/2020-08-II/IN_FY4dld3_F_Lasse_S69_R1_001.fastq.gz ILLUMINA
FY4_dld3 1 data/2020-08-II/K4_FY4dld3_A_Lasse_S4_R1_001.fastq.gz ILLUMINA
FY4_dld3 2 data/2020-08-II/K4_FY4dld3_B_Lasse_S5_R1_001.fastq.gz ILLUMINA
FY4_dld3 3 data/2020-08-II/K4_FY4dld3_C_Lasse_S6_R1_001.fastq.gz ILLUMINA
...@@ -176,8 +176,8 @@ def get_igv_input(): ...@@ -176,8 +176,8 @@ def get_igv_input():
igv_input.extend( igv_input.extend(
expand( expand(
[ [
"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.igv.txt", "results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.igv.txt"
"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.deseq2.FDR_0.05.igv.txt" #"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.deseq2.FDR_0.05.igv.txt"
], ],
antibody=antibody, antibody=antibody,
peak=config["params"]["peak-analysis"] peak=config["params"]["peak-analysis"]
...@@ -227,6 +227,8 @@ def get_multiqc_input(wildcards): ...@@ -227,6 +227,8 @@ def get_multiqc_input(wildcards):
[ [
"results/qc/fastqc/{sample}.{unit}.{reads}_fastqc.zip", "results/qc/fastqc/{sample}.{unit}.{reads}_fastqc.zip",
"results/qc/fastqc/{sample}.{unit}.{reads}.html", "results/qc/fastqc/{sample}.{unit}.{reads}.html",
"results/qc/fastq_screen/{sample}.{unit}.{reads}.fastq_screen.txt",
"results/qc/fastq_screen/{sample}.{unit}.{reads}.fastq_screen.png",
"results/mapped/{sample}-{unit}.mapped.flagstat", "results/mapped/{sample}-{unit}.mapped.flagstat",
"results/mapped/{sample}-{unit}.mapped.idxstats", "results/mapped/{sample}-{unit}.mapped.idxstats",
"results/mapped/{sample}-{unit}.mapped.stats.txt" "results/mapped/{sample}-{unit}.mapped.stats.txt"
...@@ -246,11 +248,11 @@ def get_multiqc_input(wildcards): ...@@ -246,11 +248,11 @@ def get_multiqc_input(wildcards):
"results/picard_dedup/{sample}.picard_dedup.stats.txt", "results/picard_dedup/{sample}.picard_dedup.stats.txt",
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.flagstat", "results/bamtools_filtered/{sample}.sorted.bamtools_filtered.flagstat",
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.idxstats", "results/bamtools_filtered/{sample}.sorted.bamtools_filtered.idxstats",
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.stats.txt" "results/bamtools_filtered/{sample}.sorted.bamtools_filtered.stats.txt",
#"results/phantompeakqualtools/{sample}.phantompeak.spp.out", "results/phantompeakqualtools/{sample}.phantompeak.spp.out",
#"results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv", "results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv",
#"results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv", "results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
#"results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv" "results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
], ],
sample = sample sample = sample
) )
...@@ -360,14 +362,14 @@ def all_input(wildcards): ...@@ -360,14 +362,14 @@ def all_input(wildcards):
# mapping, merging and filtering bam-files # mapping, merging and filtering bam-files
for sample in samples.index: for sample in samples.index:
#wanted_input.extend( wanted_input.extend(
# expand ( expand (
# [ [
# "results/phantompeakqualtools/{sample}.phantompeak.pdf" "results/phantompeakqualtools/{sample}.phantompeak.pdf"
# ], ],
# sample = sample sample = sample
# ) )
#) )
if config["params"]["deeptools-plots"]["activate"]: if config["params"]["deeptools-plots"]["activate"]:
wanted_input.extend( wanted_input.extend(
...@@ -424,28 +426,7 @@ def all_input(wildcards): ...@@ -424,28 +426,7 @@ def all_input(wildcards):
expand( expand(
[ [
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.annotatePeaks.txt", "results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.annotatePeaks.txt",
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.boolean.annotatePeaks.txt", "results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.boolean.annotatePeaks.txt"
"results/feature_counts/{antibody}.consensus_{peak}-peaks.featureCounts",
"results/feature_counts/{antibody}.consensus_{peak}-peaks.featureCounts.summary",
"results/feature_counts/{antibody}.consensus_{peak}-peaks.featureCounts.jcounts",
"results/deseq2/dss_rld/{antibody}.consensus_{peak}-peaks.dds.rld.RData",
"results/deseq2/plots/{antibody}.consensus_{peak}-peaks.pca_plot.pdf",
"results/deseq2/plots/{antibody}.consensus_{peak}-peaks.heatmap_plot.pdf",
"results/deseq2/pca_vals/{antibody}.consensus_{peak}-peaks.pca.vals.txt",
"results/deseq2/dists/{antibody}.consensus_{peak}-peaks.sample.dists.txt",
"results/deseq2/sizeFactors/{antibody}.consensus_{peak}-peaks.sizeFactors.RData",
"results/deseq2/sizeFactors/{antibody}.consensus_{peak}-peaks.sizeFactors.sizeFactor.txt",
"results/deseq2/results/{antibody}.consensus_{peak}-peaks.deseq2_results.txt",
"results/deseq2/FDR/results/FDR_0.01_{antibody}.consensus_{peak}-peaks",
"results/deseq2/FDR/results/FDR_0.05_{antibody}.consensus_{peak}-peaks",
"results/deseq2/FDR/bed_files/FDR_0.01_{antibody}.consensus_{peak}-peaks",
"results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks",
"results/deseq2/comparison_plots/MA_plots/FDR_0.01_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/MA_plots/FDR_0.05_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/volcano_plots/FDR_0.01_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/volcano_plots/FDR_0.05_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/correlation_heatmaps_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/scatter_plots_{antibody}consensus_{peak}-peaks"
], ],
peak = config["params"]["peak-analysis"], peak = config["params"]["peak-analysis"],
antibody = antibody antibody = antibody
......
...@@ -48,19 +48,22 @@ rule igv_files_to_report: ...@@ -48,19 +48,22 @@ rule igv_files_to_report:
rule collect_igv_report_session_files: rule collect_igv_report_session_files:
input: input:
igv_data=get_files_for_igv(), igv_data=get_files_for_igv(),
deseq2_files=directory(expand("results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks", #deseq2_files=directory(expand("results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks",
antibody=get_unique_antibodies(), peak=config["params"]["peak-analysis"])), #antibody=get_unique_antibodies(), peak=config["params"]["peak-analysis"])),
fasta="resources/ref/genome.fasta", #fasta="resources/ref/genome.fasta",
igv_session="results/IGV/report_igv_session.xml" igv_session="results/IGV/report_igv_session.xml"
output: output:
temp(directory("results/IGV/report_igv_session")) temp(directory("results/IGV/report_igv_session"))
log: log:
"logs/igv/collect_igv_report_session_files.log" "logs/igv/collect_igv_report_session_files.log"
shell: shell:
"mkdir -p {output}; cp {input.igv_data} {output}/; " """
"cp -R {input.deseq2_files}/* {output}/; cp {input.fasta} {output}/; " mkdir -p {output}; cp {input.igv_data} {output}/;
"cp {input.igv_session} {output}/" cp {input.igv_session} {output}/
"""
#"cp -R {input.deseq2_files}/* {output}/; cp {input.fasta} {output}/; "
# igv session that can be downloaded from generated report # igv session that can be downloaded from generated report
rule zip_igv_report_session: rule zip_igv_report_session:
......
...@@ -11,7 +11,7 @@ rule bwa_mem: ...@@ -11,7 +11,7 @@ rule bwa_mem:
extra= get_read_group, extra= get_read_group,
sort="samtools", sort="samtools",
sort_order="coordinate", sort_order="coordinate",
threads: 8 threads: 6
wrapper: wrapper:
"0.64.0/bio/bwa/mem" "0.64.0/bio/bwa/mem"
......
rule preseq_lc_extrap: rule preseq_lc_extrap:
input: input:
"results/sam-view/{sample}.bam" "results/filtered/{sample}.sorted.bam"
output: output:
"results/preseq/{sample}.lc_extrap" "results/preseq/{sample}.lc_extrap"
params: params:
......
...@@ -8,7 +8,7 @@ rule fastqc: ...@@ -8,7 +8,7 @@ rule fastqc:
"" ""
log: log:
"logs/fastqc/{sample}.{unit}.{read}.log" "logs/fastqc/{sample}.{unit}.{read}.log"
threads: 6 threads: 4
wrapper: wrapper:
"0.72.0/bio/fastqc" "0.72.0/bio/fastqc"
...@@ -21,3 +21,53 @@ rule multiqc: ...@@ -21,3 +21,53 @@ rule multiqc:
"logs/multiqc.log" "logs/multiqc.log"
wrapper: wrapper:
"0.64.0/bio/multiqc" "0.64.0/bio/multiqc"
rule fastq_screen:
input:
get_individual_fastq
output:
txt="results/qc/fastq_screen/{sample}.{unit}.{read}.fastq_screen.txt",
png="results/qc/fastq_screen/{sample}.{unit}.{read}.fastq_screen.png"
log:
"logs/fastq_screen/{sample}.{unit}.{read}.log"
params:
fastq_screen_config = {
'database': {
'human': {
'bowtie2': "{}/Human/Homo_sapiens.GRCh38".format(config["params"]["db_bowtie_path"])},
'mouse': {
'bowtie2': "{}/Mouse/Mus_musculus.GRCm38".format(config["params"]["db_bowtie_path"])},
'rat':{
'bowtie2': "{}/Rat/Rnor_6.0".format(config["params"]["db_bowtie_path"])},
'drosophila':{
'bowtie2': "{}/Drosophila/BDGP6".format(config["params"]["db_bowtie_path"])},
'worm':{
'bowtie2': "{}/Worm/Caenorhabditis_elegans.WBcel235".format(config["params"]["db_bowtie_path"])},
'yeast':{
'bowtie2': "{}/Yeast/Saccharomyces_cerevisiae.R64-1-1".format(config["params"]["db_bowtie_path"])},
'arabidopsis':{
'bowtie2': "{}/Arabidopsis/Arabidopsis_thaliana.TAIR10".format(config["params"]["db_bowtie_path"])},
'ecoli':{
'bowtie2': "{}/E_coli/Ecoli".format(config["params"]["db_bowtie_path"])},
'rRNA':{
'bowtie2': "{}/rRNA/GRCm38_rRNA".format(config["params"]["db_bowtie_path"])},
'MT':{
'bowtie2': "{}/Mitochondria/mitochondria".format(config["params"]["db_bowtie_path"])},
'PhiX':{
'bowtie2': "{}/PhiX/phi_plus_SNPs".format(config["params"]["db_bowtie_path"])},
'Lambda':{
'bowtie2': "{}/Lambda/Lambda".format(config["params"]["db_bowtie_path"])},
'vectors':{
'bowtie2': "{}/Vectors/Vectors".format(config["params"]["db_bowtie_path"])},
'adapters':{
'bowtie2': "{}/Adapters/Contaminants".format(config["params"]["db_bowtie_path"])},
'mycoplasma':{
'bowtie2': "{}/Mycoplasma/mycoplasma".format(config["params"]["db_bowtie_path"])}
},
'aligner_paths': {'bowtie2': "{}/bowtie2".format(config["params"]["bowtie_path"])}
},
subset=100000,
aligner='bowtie2'
threads: 6
wrapper:
"0.65.0/bio/fastq_screen"
...@@ -36,7 +36,7 @@ rule sra_get_fastq_pe: ...@@ -36,7 +36,7 @@ rule sra_get_fastq_pe:
"resources/ref/sra-pe-reads/{accession}.2.fastq" "resources/ref/sra-pe-reads/{accession}.2.fastq"
params: params:
extra="" extra=""
threads: 6 threads: 4
log: log:
"logs/ref/sra-pe-reads/{accession}.log" "logs/ref/sra-pe-reads/{accession}.log"
wrapper: wrapper:
...@@ -47,7 +47,7 @@ rule sra_get_fastq_se: ...@@ -47,7 +47,7 @@ rule sra_get_fastq_se:
"resources/ref/sra-se-reads/{accession}.fastq" "resources/ref/sra-se-reads/{accession}.fastq"
params: params:
extra="" extra=""
threads: 6 threads: 4
log: log:
"logs/ref/sra-pe-reads/{accession}.log" "logs/ref/sra-pe-reads/{accession}.log"
wrapper: wrapper:
......
...@@ -113,7 +113,7 @@ count.table <- count.table[,7:ncol(count.table),drop=FALSE] ...@@ -113,7 +113,7 @@ count.table <- count.table[,7:ncol(count.table),drop=FALSE]
#setwd(opt$outdir) #setwd(opt$outdir)
samples.vec <- sort(colnames(count.table)) samples.vec <- sort(colnames(count.table))
groups <- sub("_[^_]+$", "", samples.vec) groups <- sub("_[^_]+$", "", samples.vec) #FIXME needs to be better, as in rna-seq
print(unique(groups)) print(unique(groups))
if (length(unique(groups)) == 1) { if (length(unique(groups)) == 1) {
quit(save = "no", status = 0, runLast = FALSE) quit(save = "no", status = 0, runLast = FALSE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment