config.yaml 3.21 KB
Newer Older
AntonieV's avatar
AntonieV committed
1
# This file contains everything to configure the workflow on a global scale.
2
# The sample based data must be complemented by a samples.tsv file that contains
AntonieV's avatar
AntonieV committed
3
4
# one row per sample. It can be parsed easily via pandas.
samples: "config/samples.tsv"
AntonieV's avatar
AntonieV committed
5
# The source of fastq files for every sequencing unit of all samples has to be provided in the units.tsv file.
6
units: "config/units.tsv"
7
single_end: True
AntonieV's avatar
AntonieV committed
8

9
10
resources:
  ref:
Antonie Vietor's avatar
Antonie Vietor committed
11
    # Ensembl species name
Antonie Vietor's avatar
Antonie Vietor committed
12
    species: saccharomyces_cerevisiae
Antonie Vietor's avatar
Antonie Vietor committed
13
14
15
    # Ensembl release
    release: 101
    # Genome build
Antonie Vietor's avatar
Antonie Vietor committed
16
    build: R64-1-1
AntonieV's avatar
AntonieV committed
17
    # for testing data a single chromosome can be selected (leave empty for a regular analysis)
Antonie Vietor's avatar
Antonie Vietor committed
18
    chromosome:
19
    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
Antonie Vietor's avatar
Antonie Vietor committed
20
21
22
23
24
    igenomes_release: 1.2.2
    # if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
    macs-gsize:
    # if igenomes.yaml cannot be used, a path to an own blacklist can be specified here
    blacklist:
Antonie Vietor's avatar
Antonie Vietor committed
25

26
27
28
29
30
31
trimming:
  threads: 2
  se: "--adapter1 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
  pe: "--adapter1 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC --adapter2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"
  others: "--gzip --trimqualities --trimns --minlength 35"

AntonieV's avatar
AntonieV committed
32
params:
Antonie Vietor's avatar
Antonie Vietor committed
33
34
35
36
37
38
39
  # Number of biological replicates required from a given condition for a peak to contribute to a consensus peak
  min-reps-consensus: 1
  callpeak:
    p-value: 0.5
    q-value:
  deeptools-plots:
    # when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
40
    activate: True
Antonie Vietor's avatar
Antonie Vietor committed
41
  lc_extrap:
42
    activate: False
Antonie Vietor's avatar
Antonie Vietor committed
43
44
45
  picard_metrics:
    activate: True
  peak-annotation-analysis:
46
    activate: True
Antonie Vietor's avatar
Antonie Vietor committed
47
48
49
  peak-qc:
    activate: True
  consensus-peak-analysis:
50
    # when more than one sample
51
    activate: False
AntonieV's avatar
AntonieV committed
52
  # samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
Antonie Vietor's avatar
Antonie Vietor committed
53
54
55
  # if duplicates should be removed in this filtering, add "-F 0x0400" to the params
  # if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
  # if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
56
57
58
  # the -L option is automatically activated if a path to a blacklist of the given genome exists in the
  # downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
  # "config['resources']['ref']['blacklist']" in this configuration file
Antonie Vietor's avatar
Antonie Vietor committed
59
60
  samtools-view-se: "-b -F 0x004"
  samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
61
  plotfingerprint:
AntonieV's avatar
AntonieV committed
62
    # --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
63
64
65
66
    number-of-samples: 500000
  # optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
  # see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
  collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
67
68
69
  bowtie_path: "/usr/local/bin/"
  db_bowtie_path: "/scratch/users/aginolhac/FastQ_Screen_Genomes/"