config.yaml 3.6 KB
Newer Older
Antonie Vietor's avatar
Antonie Vietor committed
1
2
3
4
# This file should contain everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
samples: "config/samples.tsv"
Antonie Vietor's avatar
Antonie Vietor committed
5
6
7
# to download reads from SRA the accession numbers (see https://www.ncbi.nlm.nih.gov/sra) of samples must be given in
# units.tsv dataset for testing this workflow with single end reads:
# https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA255509&o=acc_s%3Aa
Antonie Vietor's avatar
Antonie Vietor committed
8
units: "config/units.tsv"
Antonie Vietor's avatar
Antonie Vietor committed
9
single_end: True
Antonie Vietor's avatar
Antonie Vietor committed
10

Antonie Vietor's avatar
Antonie Vietor committed
11
# config for a large single end data set
Antonie Vietor's avatar
Antonie Vietor committed
12
13
14
15
16
17
18
19
20
21
22
resources:
  ref:
    # Number of chromosomes to consider for calling.
    # The first n entries of the FASTA will be considered.
    n_chromosomes: 25
    # Ensembl species name
    species: homo_sapiens
    # Ensembl release
    release: 101
    # Genome build
    build: GRCh38
Antonie Vietor's avatar
Antonie Vietor committed
23
24
25
26
27
28
29
30
    # for testing data a specific chromosome can be selected
    chromosome:
    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), default: 1.2.2
    igenomes_release: 1.2.2
    # if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
    macs-gsize:
    # if igenomes.yaml cannot be used, a path to an own blacklist can be specified here
    blacklist:
Antonie Vietor's avatar
Antonie Vietor committed
31
32

params:
Antonie Vietor's avatar
Antonie Vietor committed
33
34
  # choose "narrow" or "broad" for macs2 callpeak analysis, for documentation and source code please see https://github.com/macs3-project/MACS
  peak-analysis: "broad"
Antonie Vietor's avatar
Antonie Vietor committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  # Number of biological replicates required from a given condition for a peak to contribute to a consensus peak
  min-reps-consensus: 1
  callpeak:
    p-value: 0.5
    q-value:
  deeptools-plots:
    # when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
    activate: True
  lc_extrap:
    activate: True
  picard_metrics:
    activate: True
  deseq2:
    # optional to run vst transform instead of rlog
    vst: True
  peak-annotation-analysis:
    activate: True
  peak-qc:
    activate: True
  consensus-peak-analysis:
    activate: True
Antonie Vietor's avatar
Antonie Vietor committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  # TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
  # these cutadapt parameters need to contain the required flag(s) for
  # the type of adapter(s) to trim, i.e.:
  # * https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
  #   * `-a` for 3' adapter in the forward reads
  #   * `-g` for 5' adapter in the forward reads
  #   * `-b` for adapters anywhere in the forward reads
  # also, separate capitalised letter flags are required for adapters in
  # the reverse reads of paired end sequencing:
  # * https://cutadapt.readthedocs.io/en/stable/guide.html#trimming-paired-end-reads
  cutadapt-se: "-g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
   # reasoning behind parameters:
  #   * `-e 0.005`: the default cutadapt maximum error rate of `0.2` is far too high, for Illumina
  #     data the error rate is more in the range of `0.005` and setting it accordingly should avoid
  #     false positive adapter matches
  #   * `--minimum-overlap 7`: the cutadapt default minimum overlap of `5` did trimming on the level
  #     of expected adapter matches by chance
  cutadapt-pe: "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -G AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
  cutadapt-others: "-e 0.005 --overlap 7"