Commit 725c4018 authored by AntonieV's avatar AntonieV
Browse files

changes in fastqc rule handling, multiqc

parent f639b76c
......@@ -3,6 +3,3 @@
# one row per sample. It can be parsed easily via pandas.
samples: "config/samples.tsv"
units: "config/units.tsv"
# directory where the reads are
reads_dir: "data/reads"
# The main entry point of workflow.
# After configuring, running snakemake -n in a clone of this repository should successfully execute a dry-run of the workflow.
from pathlib import Path
import glob
# import os
include: "rules/common.smk"
include: "rules/qc.smk"
reads_names = set()
path = glob.glob(config["reads_dir"]+"/*.fq")
for p in path:
reads_names.add(Path(p).stem)
def all_input(wildcards):
wanted_input = []
# sample_unit_read_set = set()
for (sample, unit) in units.index:
reads = [ "1", "2" ]
if is_single_end(sample, unit):
reads = [ "0" ]
# for read in reads:
# sample_unit_read_set.add(str(sample) + "." + str(unit) + "." + str(read))
# print(str(sample) + "." + str(unit) + "." + str(read))
wanted_input.extend(
expand (
[
"results/qc/fastqc/{filename}.fq_fastqc.zip",
"results/qc/fastqc/reports/{filename}.fq.html"
],
filename=reads_names
wanted_input.extend(
expand (
[
"results/samples_fq/{sample}.{unit}.{reads}.fq",
"results/qc/fastqc/zip-files/{sample}.{unit}.{reads}.fq_fastqc.zip",
"results/qc/fastqc/reports/{sample}.{unit}.{reads}.fq.html",
# "results/qc/fastqc/txt-files/{sample}.{unit}.{reads}.fastqc_data.txt",
"results/qc/multiqc/multiqc.html"
],
sample = sample,
unit = unit,
reads = reads
)
)
)
# if os.path.exists("results/qc/fastqc/txt-files"):
# wanted_input.extend(
# expand (
# [ "results/qc/multiqc.html" ],
# samples = sample_unit_read_set
# )
# )
return wanted_input
......
from snakemake.utils import validate
import pandas as pd
import os
from pathlib import Path
# this container defines the underlying OS for each job when using the workflow
# with --use-conda --use-singularity
......@@ -44,10 +46,13 @@ def is_single_end(sample, unit):
)
return fq2_present
def get_fastqs(wildcards):
"""Get raw FASTQ files from unit sheet."""
if is_single_end(wildcards.sample, wildcards.unit):
def get_individual_fastq(wildcards):
"""Get individual raw FASTQ files from unit sheet, based on a read (end) wildcard"""
if ( wildcards.read == "0" or wildcards.read == "1" ):
return units.loc[ (wildcards.sample, wildcards.unit), "fq1" ]
else:
u = units.loc[ (wildcards.sample, wildcards.unit), ["fq1", "fq2"] ].dropna()
return [ f"{u.fq1}", f"{u.fq2}" ]
elif wildcards.read == "2":
return units.loc[ (wildcards.sample, wildcards.unit), "fq2" ]
# def get_samples_list(directory_path, format):
# return([Path(file).stem for file in os.listdir(directory_path) if file.endswith(format)])
# return [str(directory_path)+"/"+str(file) for file in os.listdir(directory_path) if file.endswith(format)]
rule samples_fq:
input:
get_individual_fastq
output:
"results/samples_fq/{sample}.{unit}.{read}.fq"
shell:
"cp {input} {output}"
rule fastqc:
input:
expand("{path}/{{read}}.fq", path=config["reads_dir"])
"results/samples_fq/{sample}.{unit}.{read}.fq"
output:
html="results/qc/fastqc/reports/{read}.fq.html",
zip="results/qc/fastqc/{read}.fq_fastqc.zip"
html="results/qc/fastqc/reports/{sample}.{unit}.{read}.fq.html",
zip="results/qc/fastqc/zip-files/{sample}.{unit}.{read}.fq_fastqc.zip"
params: ""
log:
"logs/fastqc/{read}.log"
"logs/fastqc/{sample}.{unit}.{read}.log"
wrapper:
"0.51.2/bio/fastqc"
#
# rule extract_txt:
# input:
# "results/qc/fastqc/zip-files/{sample}.{unit}.{read}.fq_fastqc.zip"
# output:
# "results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt"
# params:
# outdir="results/qc/fastqc/txt-files",
# infile="{sample}.{unit}.{read}_fastqc/fastqc_data.txt"
# shell:
# "unzip {input} $(unzip -Z1 {input} | grep 'fastqc_data.txt'$) -d {params.outdir} && mv {params.outdir}/$(unzip -Z1 {input} | grep 'fastqc_data.txt'$) {output} && rmdir {params.outdir}/$(unzip -Z1 {input} | grep 'fastqc_data.txt'$ | cut -d '/' -f1)"
#
#
# files = set()
#
# rule generate_sample_list:
# input:
# "results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt"
# output:
# "results/qc/fastqc/txt-files/test.txt"
# run:
#
# samples_list = get_samples_list("results/qc/fastqc/txt-files", ".txt")
# for i in samples_list:
# print(i)
# files.add(i)
# filepath = os.path.join("results/qc/fastqc/txt-files", "test.txt")
# f=open("test.txt","w+")
# f.write("test")
# f.close()
# # open("results/qc/fastqc/txt-files/all_files.txt", files)
#
# print(files)
rule multiqc:
input:
# files = get_samples_list("results/qc/fastqc/txt-files", ".txt"),
# expand("results/qc/fastqc/txt-files/{file}.txt", file = files)
# get_samples_list("results/qc/fastqc/txt-files", ".txt")
# "results/qc/fastqc/txt-files/{files}.fastqc_data.txt"
# expand("results/qc/fastqc/txt-files/{sample}.fastqc_data.txt", sample = "{{sample}}")
# expand("results/qc/fastqc/txt-files/{files}.txt", files=get_samples_list)
# get_samples_list(directory_path="results/qc/fastqc/txt-files", format="txt")
# "results/qc/fastqc/txt-files/{files, "."}.fastqc_data.txt"
# "results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt"
directory("results/qc/fastqc/zip-files")
# expand("results/qc/fastqc/txt-files/{{sample}}.{{unit}}.{read}.fastqc_data.txt", read=["0", "1", "2"], allow_missing=True)
# expand("results/qc/fastqc/txt-files/{sample}.{unit}.{read}.fastqc_data.txt", read=["0", "1", "2"], allow_missing=True)
# dynamic(expand("results/qc/fastqc/txt-files/{{sample}}.{{unit}}.{read}.fastqc_data.txt", read=["0", "1", "2"], allow_missing=True))
output:
"results/qc/multiqc/multiqc.html"
# wildcard_constraints:
# sample=units.index.sample,
# unit=units.index.unit
# files = get_samples_list(directory_path="results/qc/fastqc/txt-files", format="txt")
# # file="\w+.\w+"
params:
# files = get_samples_list("results/qc/fastqc/txt-files", ".txt")
"" # Optional: extra parameters for multiqc.
log:
"logs/multiqc.log"
wrapper:
"0.51.3/bio/multiqc"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment