Unverified Commit bb5a88a1 authored by Antonie Vietor's avatar Antonie Vietor Committed by GitHub
Browse files

Mapping and duplicate marking (#2)

* trimming with cutadapt added

* additions to cutadapt according to View #1, BWA added as draft

* adding cutadapt comments back, bwa mem will be moved to a new branch

* mapping, duplicate marking and merging

* changes according View #2 added, some minor additional changes with handling cutadapt-log-files for multiqc input in common.smk were necessary

* code formatting with black

* changes according view #2 added and replaced directory name mapped with merged in bwa_mem output, because its bam files are only temporary and otherwise the empty mapped directory would remain after the workflow-run

* view #2: output path for rule bwa_mem and input path for rule merge_bams changed
parent 3f3ed4f5
......@@ -4,6 +4,11 @@
samples: "config/samples.tsv"
units: "config/units.tsv"
resources:
ref:
# path to referece genome file in FASTA format
genome: "resources/ref/genome.chr21.fa"
params:
# these cutadapt parameters need to contain the required flag(s) for
# the type of adapter(s) to trim, i.e.:
......
sample unit fragment_len_mean fragment_len_sd fq1 fq2
A 1 raw/a.chr21.1.fq raw/a.chr21.2.fq
B 1 raw/b.chr21.1.fq raw/b.chr21.2.fq
B 2 300 14 raw/b.chr21.1.fq
C 1 raw/a.chr21.1.fq raw/a.chr21.2.fq
D 1 raw/b.chr21.1.fq raw/b.chr21.2.fq
sample unit fragment_len_mean fragment_len_sd fq1 fq2 platform
A 1 raw/a.chr21.1.fq raw/a.chr21.2.fq ILLUMINA
B 1 raw/b.chr21.1.fq raw/b.chr21.2.fq ILLUMINA
B 2 300 14 raw/b.chr21.1.fq ILLUMINA
C 1 raw/a.chr21.1.fq raw/a.chr21.2.fq ILLUMINA
D 1 raw/b.chr21.1.fq raw/b.chr21.2.fq ILLUMINA
......@@ -4,13 +4,16 @@
include: "rules/common.smk"
include: "rules/qc.smk"
include: "rules/cutadapt.smk"
include: "rules/mapping.smk"
def all_input(wildcards):
wanted_input = []
# QC with fastQC and multiQC
wanted_input.extend(["results/qc/multiqc/multiqc.html"])
# trimming reads
for (sample, unit) in units.index:
if is_single_end(sample, unit):
wanted_input.extend(expand(
......@@ -35,6 +38,16 @@ def all_input(wildcards):
)
)
# mapping and merging bam-files
wanted_input.extend(
expand (
[
"results/merged/dedup/{sample}.bam"
],
sample = sample
)
)
return wanted_input
rule all:
......
......@@ -57,6 +57,11 @@ def get_multiqc_input(wildcards):
reads = [ "1", "2" ]
if is_single_end(sample, unit):
reads = [ "0" ]
multiqc_input.extend(expand (["logs/cutadapt/{sample}-{unit}.se.log"],
sample = sample, unit = unit))
else:
multiqc_input.extend(expand (["logs/cutadapt/{sample}-{unit}.pe.log"],
sample = sample, unit = unit))
multiqc_input.extend(
expand (
......@@ -69,6 +74,15 @@ def get_multiqc_input(wildcards):
reads = reads
)
)
for sample in samples.index:
multiqc_input.extend(
expand (
[
"results/merged/dedup/{sample}.metrics.txt"
],
sample = sample
)
)
return multiqc_input
def get_fastqs(wildcards):
......@@ -78,3 +92,15 @@ def get_fastqs(wildcards):
else:
u = units.loc[ (wildcards.sample, wildcards.unit), ["fq1", "fq2"] ].dropna()
return [ f"{u.fq1}", f"{u.fq2}" ]
def get_map_reads_input(wildcards):
if is_single_end(wildcards.sample, wildcards.unit):
return "results/trimmed/{sample}-{unit}.fastq.gz"
return ["results/trimmed/{sample}-{unit}.1.fastq.gz", "results/trimmed/{sample}-{unit}.2.fastq.gz"]
def get_read_group(wildcards):
"""Denote sample name and platform in read group."""
return r"-R '@RG\tID:{sample}-{unit}\tSM:{sample}-{unit}\tPL:{platform}'".format(
sample=wildcards.sample,
unit=wildcards.unit,
platform=units.loc[(wildcards.sample, wildcards.unit), "platform"])
......@@ -4,12 +4,13 @@ rule cutadapt_pe:
output:
fastq1="results/trimmed/{sample}-{unit}.1.fastq.gz",
fastq2="results/trimmed/{sample}-{unit}.2.fastq.gz",
qc="results/trimmed/{sample}-{unit}.pe.qc.txt"
qc="results/trimmed/{sample}-{unit}.pe.qc.txt",
log="logs/cutadapt/{sample}-{unit}.pe.log"
params:
adapters = config["params"]["cutadapt-pe"],
others = config["params"]["cutadapt-others"]
log:
"results/logs/cutadapt/{sample}-{unit}.log"
"logs/cutadapt/{sample}-{unit}.pe.log"
wrapper:
"0.52.0/bio/cutadapt/pe"
......@@ -18,13 +19,14 @@ rule cutadapt_se:
get_fastqs
output:
fastq="results/trimmed/{sample}-{unit}.fastq.gz",
qc="results/trimmed/{sample}-{unit}.se.qc.txt"
qc="results/trimmed/{sample}-{unit}.se.qc.txt",
log="logs/cutadapt/{sample}-{unit}.se.log"
params:
"{} {}".format(
config["params"]["cutadapt-se"],
config["params"]["cutadapt-others"]
)
log:
"results/logs/cutadapt/{sample}-{unit}.log"
"logs/cutadapt/{sample}-{unit}.se.log"
wrapper:
"0.52.0/bio/cutadapt/se"
rule bwa_index:
input:
config["resources"]["ref"]["genome"]
output:
multiext(config["resources"]["ref"]["genome"], ".amb", ".ann", ".bwt", ".pac", ".sa")
log:
"logs/bwa/bwa_index.log"
params:
algorithm="bwtsw"
wrapper:
"0.55.1/bio/bwa/index"
rule bwa_mem:
input:
reads = get_map_reads_input,
idx = rules.bwa_index.output
output:
temp("results/mapped/{sample}-{unit}.bam")
log:
"logs/bwa/bwa_mem/{sample}-{unit}.log"
params:
index= lambda w, input: os.path.splitext(input.idx[0])[0],
extra= get_read_group,
sort="samtools",
sort_order="coordinate",
threads: 8
wrapper:
"0.52.0/bio/bwa/mem"
rule merge_bams:
input:
lambda w: expand("results/mapped/{sample}-{unit}.bam",
sample = w.sample,
unit = units.loc[units['sample'] == w.sample].unit.to_list()
)
output:
temp("results/merged/{sample}.bam")
log:
"logs/picard/mergebamfiles/{sample}.log"
params:
"VALIDATION_STRINGENCY=LENIENT"
wrapper:
"0.55.1/bio/picard/mergesamfiles"
rule mark_merged_duplicates:
input:
"results/merged/{sample}.bam"
output:
bam="results/merged/dedup/{sample}.bam",
metrics="results/merged/dedup/{sample}.metrics.txt"
log:
"logs/picard/dedup_merged/{sample}.log"
params:
"REMOVE_DUPLICATES=false ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT"
wrapper:
"0.55.1/bio/picard/markduplicates"
......@@ -12,10 +12,8 @@ rule fastqc:
rule multiqc:
input:
get_multiqc_input
output:
"results/qc/multiqc/multiqc.html"
log:
"logs/multiqc.log"
wrapper:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment