Verified Commit 8b82483c authored by Aurélien Ginolhac's avatar Aurélien Ginolhac 🚴
Browse files

inactivate deseq2 and use yeast data

parent 6156f635
......@@ -42,7 +42,7 @@ params:
q-value:
deeptools-plots:
# when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
activate: False
activate: True
lc_extrap:
activate: False
picard_metrics:
......
sample group batch_effect control antibody
A TNFa batch1 D p65
B TNFa batch2 D p65
C E2TNFa batch1 E p65
D TNFa batch1
E E2TNFa batch1
IN_FY4 WT batch1 K4
FY4 WT batch1 IN_FY4 K4
IN_FY4dld3 dld3 batch1 K4
FY4_dld3 dld3 batch1 IN_FY4dld3 K4
\ No newline at end of file
sample unit fq1 fq2 sra_accession platform
A 1 data/single_end_test_data/A-1_chr21.fastq.gz ILLUMINA
B 1 data/single_end_test_data/B-1_chr21.fastq.gz ILLUMINA
C 1 data/single_end_test_data/C-1_chr21.fastq.gz ILLUMINA
C 2 data/single_end_test_data/C-2_chr21.fastq.gz ILLUMINA
D 1 data/single_end_test_data/D-1_chr21.fastq.gz ILLUMINA
E 1 data/single_end_test_data/E-1_chr21.fastq.gz ILLUMINA
IN_FY4 1 data/2020-08-II/IN_FY4_A_Lasse_S22_R1_001.fastq.gz ILLUMINA
IN_FY4 2 data/2020-08-II/IN_FY4_B_Lasse_S23_R1_001.fastq.gz ILLUMINA
IN_FY4 3 data/2020-08-II/IN_FY4_C_Lasse_S24_R1_001.fastq.gz ILLUMINA
IN_FY4 4 data/2020-08-II/IN_FY4_D_Lasse_S64_R1_001.fastq.gz ILLUMINA
IN_FY4 5 data/2020-08-II/IN_FY4_E_Lasse_S65_R1_001.fastq.gz ILLUMINA
IN_FY4 6 data/2020-08-II/IN_FY4_F_Lasse_S66_R1_001.fastq.gz ILLUMINA
FY4 1 data/2020-08-II/K4_FY4_A_Lasse_S1_R1_001.fastq.gz ILLUMINA
FY4 2 data/2020-08-II/K4_FY4_B_Lasse_S2_R1_001.fastq.gz ILLUMINA
FY4 3 data/2020-08-II/K4_FY4_C_Lasse_S3_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 1 data/2020-08-II/IN_FY4dld3_A_Lasse_S25_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 2 data/2020-08-II/IN_FY4dld3_B_Lasse_S26_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 3 data/2020-08-II/IN_FY4dld3_C_Lasse_S27_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 4 data/2020-08-II/IN_FY4dld3_D_Lasse_S67_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 5 data/2020-08-II/IN_FY4dld3_E_Lasse_S68_R1_001.fastq.gz ILLUMINA
IN_FY4dld3 6 data/2020-08-II/IN_FY4dld3_F_Lasse_S69_R1_001.fastq.gz ILLUMINA
FY4_dld3 1 data/2020-08-II/K4_FY4dld3_A_Lasse_S4_R1_001.fastq.gz ILLUMINA
FY4_dld3 2 data/2020-08-II/K4_FY4dld3_B_Lasse_S5_R1_001.fastq.gz ILLUMINA
FY4_dld3 3 data/2020-08-II/K4_FY4dld3_C_Lasse_S6_R1_001.fastq.gz ILLUMINA
......@@ -176,8 +176,8 @@ def get_igv_input():
igv_input.extend(
expand(
[
"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.igv.txt",
"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.deseq2.FDR_0.05.igv.txt"
"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.igv.txt"
#"results/IGV/consensus/merged_library.{antibody}.consensus_{peak}-peaks.deseq2.FDR_0.05.igv.txt"
],
antibody=antibody,
peak=config["params"]["peak-analysis"]
......@@ -227,6 +227,8 @@ def get_multiqc_input(wildcards):
[
"results/qc/fastqc/{sample}.{unit}.{reads}_fastqc.zip",
"results/qc/fastqc/{sample}.{unit}.{reads}.html",
"results/qc/fastq_screen/{sample}.{unit}.{reads}.fastq_screen.txt",
"results/qc/fastq_screen/{sample}.{unit}.{reads}.fastq_screen.png",
"results/mapped/{sample}-{unit}.mapped.flagstat",
"results/mapped/{sample}-{unit}.mapped.idxstats",
"results/mapped/{sample}-{unit}.mapped.stats.txt"
......@@ -246,11 +248,11 @@ def get_multiqc_input(wildcards):
"results/picard_dedup/{sample}.picard_dedup.stats.txt",
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.flagstat",
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.idxstats",
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.stats.txt"
#"results/phantompeakqualtools/{sample}.phantompeak.spp.out",
#"results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv",
#"results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
#"results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
"results/bamtools_filtered/{sample}.sorted.bamtools_filtered.stats.txt",
"results/phantompeakqualtools/{sample}.phantompeak.spp.out",
"results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv",
"results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
"results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
],
sample = sample
)
......@@ -360,14 +362,14 @@ def all_input(wildcards):
# mapping, merging and filtering bam-files
for sample in samples.index:
#wanted_input.extend(
# expand (
# [
# "results/phantompeakqualtools/{sample}.phantompeak.pdf"
# ],
# sample = sample
# )
#)
wanted_input.extend(
expand (
[
"results/phantompeakqualtools/{sample}.phantompeak.pdf"
],
sample = sample
)
)
if config["params"]["deeptools-plots"]["activate"]:
wanted_input.extend(
......@@ -424,28 +426,7 @@ def all_input(wildcards):
expand(
[
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.annotatePeaks.txt",
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.boolean.annotatePeaks.txt",
"results/feature_counts/{antibody}.consensus_{peak}-peaks.featureCounts",
"results/feature_counts/{antibody}.consensus_{peak}-peaks.featureCounts.summary",
"results/feature_counts/{antibody}.consensus_{peak}-peaks.featureCounts.jcounts",
"results/deseq2/dss_rld/{antibody}.consensus_{peak}-peaks.dds.rld.RData",
"results/deseq2/plots/{antibody}.consensus_{peak}-peaks.pca_plot.pdf",
"results/deseq2/plots/{antibody}.consensus_{peak}-peaks.heatmap_plot.pdf",
"results/deseq2/pca_vals/{antibody}.consensus_{peak}-peaks.pca.vals.txt",
"results/deseq2/dists/{antibody}.consensus_{peak}-peaks.sample.dists.txt",
"results/deseq2/sizeFactors/{antibody}.consensus_{peak}-peaks.sizeFactors.RData",
"results/deseq2/sizeFactors/{antibody}.consensus_{peak}-peaks.sizeFactors.sizeFactor.txt",
"results/deseq2/results/{antibody}.consensus_{peak}-peaks.deseq2_results.txt",
"results/deseq2/FDR/results/FDR_0.01_{antibody}.consensus_{peak}-peaks",
"results/deseq2/FDR/results/FDR_0.05_{antibody}.consensus_{peak}-peaks",
"results/deseq2/FDR/bed_files/FDR_0.01_{antibody}.consensus_{peak}-peaks",
"results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks",
"results/deseq2/comparison_plots/MA_plots/FDR_0.01_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/MA_plots/FDR_0.05_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/volcano_plots/FDR_0.01_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/volcano_plots/FDR_0.05_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/correlation_heatmaps_{antibody}consensus_{peak}-peaks",
"results/deseq2/comparison_plots/scatter_plots_{antibody}consensus_{peak}-peaks"
"results/homer/annotate_consensus_peaks/{antibody}.consensus_{peak}-peaks.boolean.annotatePeaks.txt"
],
peak = config["params"]["peak-analysis"],
antibody = antibody
......
......@@ -49,18 +49,21 @@ rule igv_files_to_report:
rule collect_igv_report_session_files:
input:
igv_data=get_files_for_igv(),
deseq2_files=directory(expand("results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks",
antibody=get_unique_antibodies(), peak=config["params"]["peak-analysis"])),
fasta="resources/ref/genome.fasta",
#deseq2_files=directory(expand("results/deseq2/FDR/bed_files/FDR_0.05_{antibody}.consensus_{peak}-peaks",
#antibody=get_unique_antibodies(), peak=config["params"]["peak-analysis"])),
#fasta="resources/ref/genome.fasta",
igv_session="results/IGV/report_igv_session.xml"
output:
temp(directory("results/IGV/report_igv_session"))
log:
"logs/igv/collect_igv_report_session_files.log"
shell:
"mkdir -p {output}; cp {input.igv_data} {output}/; "
"cp -R {input.deseq2_files}/* {output}/; cp {input.fasta} {output}/; "
"cp {input.igv_session} {output}/"
"""
mkdir -p {output}; cp {input.igv_data} {output}/;
cp {input.igv_session} {output}/
"""
#"cp -R {input.deseq2_files}/* {output}/; cp {input.fasta} {output}/; "
# igv session that can be downloaded from generated report
rule zip_igv_report_session:
......
......@@ -11,7 +11,7 @@ rule bwa_mem:
extra= get_read_group,
sort="samtools",
sort_order="coordinate",
threads: 8
threads: 6
wrapper:
"0.64.0/bio/bwa/mem"
......
rule preseq_lc_extrap:
input:
"results/sam-view/{sample}.bam"
"results/filtered/{sample}.sorted.bam"
output:
"results/preseq/{sample}.lc_extrap"
params:
......
......@@ -8,7 +8,7 @@ rule fastqc:
""
log:
"logs/fastqc/{sample}.{unit}.{read}.log"
threads: 6
threads: 4
wrapper:
"0.72.0/bio/fastqc"
......@@ -21,3 +21,53 @@ rule multiqc:
"logs/multiqc.log"
wrapper:
"0.64.0/bio/multiqc"
rule fastq_screen:
input:
get_individual_fastq
output:
txt="results/qc/fastq_screen/{sample}.{unit}.{read}.fastq_screen.txt",
png="results/qc/fastq_screen/{sample}.{unit}.{read}.fastq_screen.png"
log:
"logs/fastq_screen/{sample}.{unit}.{read}.log"
params:
fastq_screen_config = {
'database': {
'human': {
'bowtie2': "{}/Human/Homo_sapiens.GRCh38".format(config["params"]["db_bowtie_path"])},
'mouse': {
'bowtie2': "{}/Mouse/Mus_musculus.GRCm38".format(config["params"]["db_bowtie_path"])},
'rat':{
'bowtie2': "{}/Rat/Rnor_6.0".format(config["params"]["db_bowtie_path"])},
'drosophila':{
'bowtie2': "{}/Drosophila/BDGP6".format(config["params"]["db_bowtie_path"])},
'worm':{
'bowtie2': "{}/Worm/Caenorhabditis_elegans.WBcel235".format(config["params"]["db_bowtie_path"])},
'yeast':{
'bowtie2': "{}/Yeast/Saccharomyces_cerevisiae.R64-1-1".format(config["params"]["db_bowtie_path"])},
'arabidopsis':{
'bowtie2': "{}/Arabidopsis/Arabidopsis_thaliana.TAIR10".format(config["params"]["db_bowtie_path"])},
'ecoli':{
'bowtie2': "{}/E_coli/Ecoli".format(config["params"]["db_bowtie_path"])},
'rRNA':{
'bowtie2': "{}/rRNA/GRCm38_rRNA".format(config["params"]["db_bowtie_path"])},
'MT':{
'bowtie2': "{}/Mitochondria/mitochondria".format(config["params"]["db_bowtie_path"])},
'PhiX':{
'bowtie2': "{}/PhiX/phi_plus_SNPs".format(config["params"]["db_bowtie_path"])},
'Lambda':{
'bowtie2': "{}/Lambda/Lambda".format(config["params"]["db_bowtie_path"])},
'vectors':{
'bowtie2': "{}/Vectors/Vectors".format(config["params"]["db_bowtie_path"])},
'adapters':{
'bowtie2': "{}/Adapters/Contaminants".format(config["params"]["db_bowtie_path"])},
'mycoplasma':{
'bowtie2': "{}/Mycoplasma/mycoplasma".format(config["params"]["db_bowtie_path"])}
},
'aligner_paths': {'bowtie2': "{}/bowtie2".format(config["params"]["bowtie_path"])}
},
subset=100000,
aligner='bowtie2'
threads: 6
wrapper:
"0.65.0/bio/fastq_screen"
......@@ -36,7 +36,7 @@ rule sra_get_fastq_pe:
"resources/ref/sra-pe-reads/{accession}.2.fastq"
params:
extra=""
threads: 6
threads: 4
log:
"logs/ref/sra-pe-reads/{accession}.log"
wrapper:
......@@ -47,7 +47,7 @@ rule sra_get_fastq_se:
"resources/ref/sra-se-reads/{accession}.fastq"
params:
extra=""
threads: 6
threads: 4
log:
"logs/ref/sra-pe-reads/{accession}.log"
wrapper:
......
......@@ -113,7 +113,7 @@ count.table <- count.table[,7:ncol(count.table),drop=FALSE]
#setwd(opt$outdir)
samples.vec <- sort(colnames(count.table))
groups <- sub("_[^_]+$", "", samples.vec)
groups <- sub("_[^_]+$", "", samples.vec) #FIXME needs to be better, as in rna-seq
print(unique(groups))
if (length(unique(groups)) == 1) {
quit(save = "no", status = 0, runLast = FALSE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment