Commit 23b3c017 authored by AntonieV's avatar AntonieV
Browse files

multiqc

parent 725c4018
# The main entry point of workflow.
# After configuring, running snakemake -n in a clone of this repository should successfully execute a dry-run of the workflow.
# import os
include: "rules/common.smk"
include: "rules/qc.smk"
def all_input(wildcards):
wanted_input = []
# sample_unit_read_set = set()
for (sample, unit) in units.index:
reads = [ "1", "2" ]
if is_single_end(sample, unit):
reads = [ "0" ]
# for read in reads:
# sample_unit_read_set.add(str(sample) + "." + str(unit) + "." + str(read))
# print(str(sample) + "." + str(unit) + "." + str(read))
wanted_input.extend(
expand (
[
"results/samples_fq/{sample}.{unit}.{reads}.fq",
"results/qc/fastqc/zip-files/{sample}.{unit}.{reads}.fq_fastqc.zip",
"results/qc/fastqc/reports/{sample}.{unit}.{reads}.fq.html",
# "results/qc/fastqc/txt-files/{sample}.{unit}.{reads}.fastqc_data.txt",
"results/qc/multiqc/multiqc.html"
],
sample = sample,
unit = unit,
reads = reads
)
)
# if os.path.exists("results/qc/fastqc/txt-files"):
# wanted_input.extend(
# expand (
# [ "results/qc/multiqc.html" ],
# samples = sample_unit_read_set
# )
# )
wanted_input.extend(["results/qc/multiqc/multiqc.html"])
return wanted_input
......
from snakemake.utils import validate
import pandas as pd
import os
from pathlib import Path
# this container defines the underlying OS for each job when using the workflow
# with --use-conda --use-singularity
......@@ -53,6 +51,27 @@ def get_individual_fastq(wildcards):
elif wildcards.read == "2":
return units.loc[ (wildcards.sample, wildcards.unit), "fq2" ]
# def get_samples_list(directory_path, format):
# return([Path(file).stem for file in os.listdir(directory_path) if file.endswith(format)])
# return [str(directory_path)+"/"+str(file) for file in os.listdir(directory_path) if file.endswith(format)]
def get_fastqc_list(wildcards):
return multiqc_input
##### FastQC #####
multiqc_input = []
for (sample, unit) in units.index:
reads = [ "1", "2" ]
if is_single_end(sample, unit):
reads = [ "0" ]
multiqc_input.extend(
expand (
[
"results/qc/fastqc/{sample}.{unit}.{reads}.fq_fastqc.zip",
"results/qc/fastqc/{sample}.{unit}.{reads}.fq.html",
],
sample = sample,
unit = unit,
reads = reads
)
)
rule samples_fq:
input:
get_individual_fastq
output:
"results/samples_fq/{sample}.{unit}.{read}.fq"
shell:
"cp {input} {output}"
rule fastqc:
input:
"results/samples_fq/{sample}.{unit}.{read}.fq"
get_individual_fastq
output:
html="results/qc/fastqc/reports/{sample}.{unit}.{read}.fq.html",
zip="results/qc/fastqc/zip-files/{sample}.{unit}.{read}.fq_fastqc.zip"
params: ""
html="results/qc/fastqc/{sample}.{unit}.{read}.fq.html",
zip="results/qc/fastqc/{sample}.{unit}.{read}.fq_fastqc.zip"
log:
"logs/fastqc/{sample}.{unit}.{read}.log"
wrapper:
"0.51.2/bio/fastqc"
#
# rule extract_txt:
# input:
# "results/qc/fastqc/zip-files/{sample}.{unit}.{read}.fq_fastqc.zip"
# output:
# "results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt"
# params:
# outdir="results/qc/fastqc/txt-files",
# infile="{sample}.{unit}.{read}_fastqc/fastqc_data.txt"
# shell:
# "unzip {input} $(unzip -Z1 {input} | grep 'fastqc_data.txt'$) -d {params.outdir} && mv {params.outdir}/$(unzip -Z1 {input} | grep 'fastqc_data.txt'$) {output} && rmdir {params.outdir}/$(unzip -Z1 {input} | grep 'fastqc_data.txt'$ | cut -d '/' -f1)"
#
#
# files = set()
#
# rule generate_sample_list:
# input:
# "results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt"
# output:
# "results/qc/fastqc/txt-files/test.txt"
# run:
#
# samples_list = get_samples_list("results/qc/fastqc/txt-files", ".txt")
# for i in samples_list:
# print(i)
# files.add(i)
# filepath = os.path.join("results/qc/fastqc/txt-files", "test.txt")
# f=open("test.txt","w+")
# f.write("test")
# f.close()
# # open("results/qc/fastqc/txt-files/all_files.txt", files)
#
# print(files)
rule multiqc:
input:
# files = get_samples_list("results/qc/fastqc/txt-files", ".txt"),
# expand("results/qc/fastqc/txt-files/{file}.txt", file = files)
# get_samples_list("results/qc/fastqc/txt-files", ".txt")
# "results/qc/fastqc/txt-files/{files}.fastqc_data.txt"
# expand("results/qc/fastqc/txt-files/{sample}.fastqc_data.txt", sample = "{{sample}}")
# expand("results/qc/fastqc/txt-files/{files}.txt", files=get_samples_list)
# get_samples_list(directory_path="results/qc/fastqc/txt-files", format="txt")
# "results/qc/fastqc/txt-files/{files, "."}.fastqc_data.txt"
# "results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt"
directory("results/qc/fastqc/zip-files")
# expand("results/qc/fastqc/txt-files/{{sample}}.{{unit}}.{read}.fastqc_data.txt", read=["0", "1", "2"], allow_missing=True)
# expand("results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt", read=["0", "1", "2"], allow_missing=True)
# dynamic(expand("results/qc/fastqc/txt-files/{{sample}}.{{unit}}.{read}.fastqc_data.txt", read=["0", "1", "2"], allow_missing=True))
get_fastqc_list
output:
"results/qc/multiqc/multiqc.html"
# wildcard_constraints:
# sample=units.index.sample,
# unit=units.index.unit
# files = get_samples_list(directory_path="results/qc/fastqc/txt-files", format="txt")
# # file="\w+.\w+"
params:
# files = get_samples_list("results/qc/fastqc/txt-files", ".txt")
"" # Optional: extra parameters for multiqc.
log:
"logs/multiqc.log"
wrapper:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment