config.yaml 1.68 KB
Newer Older
AntonieV's avatar
AntonieV committed
1
2
3
4
# This file should contain everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
samples: "config/samples.tsv"
AntonieV's avatar
AntonieV committed
5
units: "config/units.tsv"
AntonieV's avatar
AntonieV committed
6

7
8
9
10
11
resources:
  ref:
    # path to referece genome file in FASTA format
    genome: "resources/ref/genome.chr21.fa"
    
AntonieV's avatar
AntonieV committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
params:
  # these cutadapt parameters need to contain the required flag(s) for
  # the type of adapter(s) to trim, i.e.:
  # * https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
  #   * `-a` for 3' adapter in the forward reads
  #   * `-g` for 5' adapter in the forward reads
  #   * `-b` for adapters anywhere in the forward reads
  # also, separate capitalised letter flags are required for adapters in
  # the reverse reads of paired end sequencing:
  # * https://cutadapt.readthedocs.io/en/stable/guide.html#trimming-paired-end-reads
  cutadapt-se: "-g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
   # reasoning behind parameters:
  #   * `-e 0.005`: the default cutadapt maximum error rate of `0.2` is far too high, for Illumina
  #     data the error rate is more in the range of `0.005` and setting it accordingly should avoid
  #     false positive adapter matches
  #   * `--minimum-overlap 7`: the cutadapt default minimum overlap of `5` did trimming on the level
  #     of expected adapter matches by chance
  cutadapt-pe: "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -G AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
  cutadapt-others: "-e 0.005 --overlap 7"