Verified Commit 6107cbd8 authored by Aurélien Ginolhac's avatar Aurélien Ginolhac 🚴
Browse files

replace cutadapt by adapterremoval

parent bd56726e
......@@ -26,6 +26,12 @@ resources:
# if igenomes.yaml cannot be used, a path to an own blacklist can be specified here
blacklist:
trimming:
threads: 2
se: "--adapter1 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
pe: "--adapter1 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC --adapter2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"
others: "--gzip --trimqualities --trimns --minlength 35"
params:
# choose "narrow" or "broad" for macs2 callpeak analysis, for documentation and source code please see https://github.com/macs3-project/MACS
peak-analysis: "broad"
......@@ -65,22 +71,6 @@ params:
# optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
# see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
# TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
# these cutadapt parameters need to contain the required flag(s) for
# the type of adapter(s) to trim, i.e.:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
# * `-a` for 3' adapter in the forward reads
# * `-g` for 5' adapter in the forward reads
# * `-b` for adapters anywhere in the forward reads
# also, separate capitalised letter flags are required for adapters in
# the reverse reads of paired end sequencing:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#trimming-paired-end-reads
cutadapt-se: "-g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
# reasoning behind parameters:
# * `-e 0.005`: the default cutadapt maximum error rate of `0.2` is far too high, for Illumina
# data the error rate is more in the range of `0.005` and setting it accordingly should avoid
# false positive adapter matches
# * `--minimum-overlap 7`: the cutadapt default minimum overlap of `5` did trimming on the level
# of expected adapter matches by chance
cutadapt-pe: "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -G AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
cutadapt-others: "-e 0.005 --overlap 7"
bowtie_path: "/usr/local/bin/"
db_bowtie_path: "/scratch/users/aginolhac/FastQ_Screen_Genomes/"
......@@ -9,12 +9,12 @@ configfile: "config/config.yaml"
report: "report/workflow.rst"
container: "docker://continuumio/miniconda3"
container: "docker://ginolhac/snake-chip-seq:0.1"
include: "rules/common.smk"
include: "rules/ref.smk"
include: "rules/qc.smk"
include: "rules/cutadapt.smk"
include: "rules/trimming.smk"
include: "rules/mapping.smk"
include: "rules/filtering.smk"
include: "rules/stats.smk"
......
......@@ -224,10 +224,10 @@ def get_multiqc_input(wildcards):
reads = [ "1", "2" ]
if is_single_end(sample, unit):
reads = [ "0" ]
multiqc_input.extend(expand (["logs/cutadapt/{sample}-{unit}.se.log"],
multiqc_input.extend(expand (["logs/trimming/{sample}-{unit}.se.log"],
sample = sample, unit = unit))
else:
multiqc_input.extend(expand (["logs/cutadapt/{sample}-{unit}.pe.log"],
multiqc_input.extend(expand (["logs/trimming/{sample}-{unit}.pe.log"],
sample = sample, unit = unit))
multiqc_input.extend(
......
rule cutadapt_pe:
input:
get_fastqs
output:
fastq1="results/trimmed/{sample}-{unit}.1.fastq.gz",
fastq2="results/trimmed/{sample}-{unit}.2.fastq.gz",
qc="results/trimmed/{sample}-{unit}.pe.qc.txt",
log="logs/cutadapt/{sample}-{unit}.pe.log"
params:
adapters = config["params"]["cutadapt-pe"],
others = config["params"]["cutadapt-others"]
log:
"logs/cutadapt/{sample}-{unit}.pe.log"
wrapper:
"0.64.0/bio/cutadapt/pe"
rule cutadapt_se:
input:
get_fastqs
output:
fastq="results/trimmed/{sample}-{unit}.fastq.gz",
qc="results/trimmed/{sample}-{unit}.se.qc.txt",
log="logs/cutadapt/{sample}-{unit}.se.log"
params:
"{} {}".format(
config["params"]["cutadapt-se"],
config["params"]["cutadapt-others"]
)
log:
"logs/cutadapt/{sample}-{unit}.se.log"
wrapper:
"0.64.0/bio/cutadapt/se"
rule trimming_se:
input:
get_fastqs
output:
fastq="results/trimmed_se/{sample}-{unit}.fastq.gz",
discarded="trimmed_se/{sample}-{unit}.discarded.fastq.gz",
settings="trimmed_se/{sample}-{unit}.settings"
params:
adapters = config["trimming"]["se"],
others = config["trimming"]["others"]
threads: config["trimming"]["threads"]
log:
"logs/trimming/{sample}-{unit}.se.log"
wrapper:
"0.76.0/bio/adapterremoval"
rule trimming_pe:
input:
get_fastqs
output:
fq1="trimmed_pe/{sample}-{unit}_R1.fastq.gz", # trimmed mate1 reads
fq2="trimmed_pe/{sample}-{unit}_R2.fastq.gz", # trimmed mate2 reads
collapsed="trimmed_pe/{sample}-{unit}.collapsed.fastq.gz", # overlapping mate-pairs which have been merged into a single read
collapsed_trunc="trimmed_pe/{sample}-{unit}.collapsed_trunc.fastq.gz", # collapsed reads that were quality trimmed
singleton="trimmed_pe/{sample}-{unit}.singleton.fastq.gz", # mate-pairs for which the mate has been discarded
discarded="trimmed_pe/{sample}-{unit}.discarded.fastq.gz", # reads that did not pass filters
settings="trimmed_pe/{sample}-{unit}.settings"
params:
adapters = config["trimming"]["pe"],
others = config["trimming"]["others"]
threads: config["trimming"]["threads"]
log:
"logs/trimming/{sample}-{unit}.pe.log"
wrapper:
"0.76.0/bio/adapterremoval"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment