Verified Commit e62b0c4f authored by Aurélien Ginolhac's avatar Aurélien Ginolhac 🚴
Browse files

remove test dataset and github actions

parent 5f7e35b9
name: Tests
on:
push:
branches:
- master
pull_request:
branches: [ master ]
branches_ignore: []
jobs:
Linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Lint workflow
uses: snakemake/snakemake-github-action@v1.19.0
with:
directory: .
snakefile: workflow/Snakefile
args: "--lint"
stagein: |
export TMPDIR=/tmp
Testing:
runs-on: ubuntu-latest
needs: Linting
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Test dry run for a large single end workflow
uses: snakemake/snakemake-github-action@v1.19.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--use-conda -n --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
- name: Test minimized single end workflow (on local reduced SRA files for a single chromosome - homo sapiens)
uses: snakemake/snakemake-github-action@v1.19.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config_single_end_reduced/config.yaml --use-conda --cache --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
stagein: |
export TMPDIR=/tmp
rm -rf .test/resources .test/results
export SNAKEMAKE_OUTPUT_CACHE=/snakemake-cache
mkdir -p -m a+rw $SNAKEMAKE_OUTPUT_CACHE
#
# # Test for single end reads with larger data sets and download of SRA files. It can be included for heavy duty testing on dedicated machines.
#
# - name: Test single end workflow (test data sra-download)
# uses: snakemake/snakemake-github-action@v1.19.0
# with:
# directory: .test
# snakefile: workflow/Snakefile
# args: "--configfile .test/config_single_end/config.yaml --use-conda --cache --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
# stagein: |
# export TMPDIR=/tmp
# rm -rf .test/resources .test/results
# export SNAKEMAKE_OUTPUT_CACHE=/snakemake-cache
# mkdir -p -m a+rw $SNAKEMAKE_OUTPUT_CACHE
- name: Test minimized paired end workflow (on local reduced SRA files for a single chromosome - saccharomyces cerevisiae)
uses: snakemake/snakemake-github-action@v1.19.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config_paired_end_reduced/config.yaml --use-conda --cache --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
stagein: |
export TMPDIR=/tmp
rm -rf .test/resources .test/results
export SNAKEMAKE_OUTPUT_CACHE=/snakemake-cache
mkdir -p -m a+rw $SNAKEMAKE_OUTPUT_CACHE
#
# # Test for paired end reads with larger datasets from git submodules. It can be included for heavy duty testing on dedicated machines.
#
# - uses: actions/checkout@v1
# - name: Checkout submodules
# uses: textbook/git-checkout-submodule-action@2.0.0
#
# - name: Test paired end workflow (submodule test data)
# uses: snakemake/snakemake-github-action@v1.19.0
# with:
# directory: .test
# snakefile: workflow/Snakefile
# args: "--configfile .test/config_paired_end/config.yaml --use-conda --cache --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
# stagein: |
# export TMPDIR=/tmp
# rm -rf .test/resources .test/results
# export SNAKEMAKE_OUTPUT_CACHE=/snakemake-cache
# mkdir -p -m a+rw $SNAKEMAKE_OUTPUT_CACHE
- name: Test report
uses: snakemake/snakemake-github-action@v1.19.0
with:
directory: .test
snakefile: workflow/Snakefile
args: "--report report.zip --configfile .test/config_paired_end_reduced/config.yaml"
stagein: |
export TMPDIR=/tmp
rm -rf .test/resources .test/results
# For testing the workflow with larger datasets for paired end reads once a dedicated GitHub actions machine is set up.
[submodule ".test/data/atacseq/test-datasets"]
path = .test/data/atacseq/test-datasets
url = https://github.com/nf-core/test-datasets.git
branch = atacseq
[submodule ".test/data/chipseq/test-datasets"]
path = .test/data/chipseq/test-datasets
url = https://github.com/nf-core/test-datasets.git
branch = chipseq
# This file should contains everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
samples: "config/samples.tsv"
# to download reads from SRA the accession numbers (see https://www.ncbi.nlm.nih.gov/sra) of samples must be given in
# units.tsv dataset for testing this workflow with single end reads:
# https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA255509&o=acc_s%3Aa
units: "config/units.tsv"
single_end: True
# config for a large single end data set
resources:
ref:
# Number of chromosomes to consider for calling.
# The first n entries of the FASTA will be considered.
n_chromosomes: 25
# Ensembl species name
species: homo_sapiens
# Ensembl release
release: 101
# Genome build
build: GRCh38
# for testing data a single chromosome can be selected (leave empty for a regular analysis)
chromosome:
# specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
igenomes_release: 1.2.2
# if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
macs-gsize:
# if igenomes.yaml cannot be used, a path to an own blacklist can be specified here
blacklist:
params:
# choose "narrow" or "broad" for macs2 callpeak analysis, for documentation and source code please see https://github.com/macs3-project/MACS
peak-analysis: "broad"
# Number of biological replicates required from a given condition for a peak to contribute to a consensus peak
min-reps-consensus: 1
callpeak:
p-value: 0.5
q-value:
deeptools-plots:
# when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
activate: True
lc_extrap:
activate: True
picard_metrics:
activate: True
deseq2:
# set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
vst: True
peak-annotation-analysis:
activate: True
peak-qc:
activate: True
consensus-peak-analysis:
activate: True
# samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
# if duplicates should be removed in this filtering, add "-F 0x0400" to the params
# if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
# if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
# the -L option is automatically activated if a path to a blacklist of the given genome exists in the
# downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
# "config['resources']['ref']['blacklist']" in this configuration file
samtools-view-se: "-b -F 0x004"
samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
plotfingerprint:
# --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
number-of-samples: 500000
# optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
# see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
# TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
# these cutadapt parameters need to contain the required flag(s) for
# the type of adapter(s) to trim, i.e.:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
# * `-a` for 3' adapter in the forward reads
# * `-g` for 5' adapter in the forward reads
# * `-b` for adapters anywhere in the forward reads
# also, separate capitalised letter flags are required for adapters in
# the reverse reads of paired end sequencing:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#trimming-paired-end-reads
cutadapt-se: "-g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
# reasoning behind parameters:
# * `-e 0.005`: the default cutadapt maximum error rate of `0.2` is far too high, for Illumina
# data the error rate is more in the range of `0.005` and setting it accordingly should avoid
# false positive adapter matches
# * `--minimum-overlap 7`: the cutadapt default minimum overlap of `5` did trimming on the level
# of expected adapter matches by chance
cutadapt-pe: "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -G AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
cutadapt-others: "-e 0.005 --overlap 7"
{
"filters" : [
{
"id" : "paired_end",
"isPaired" : "true"
},
{
"id" : "mismatch",
"tag" : "NM:<=4"
},
{
"id" : "min_size",
"insertSize" : ">=-2000"
},
{
"id" : "max_size",
"insertSize" : "<=2000"
}
],
"rule" : " (paired_end & mismatch & min_size & max_size) | (!paired_end & mismatch) "
}
sample group batch_effect control antibody
A Veh batch1 AG ERa
B Veh batch2 AH ERa
C E2 batch1 AI ERa
D E2 batch2 AJ ERa
E TNFa batch1 AK ERa
F TNFa batch2 AL ERa
G E2_TNFa batch1 AM ERa
H Veh batch1 AG p65
I Veh batch2 AH p65
J E2 batch1 AI p65
K E2 batch2 AJ p65
L TNFa batch1 AK p65
M TNFa batch2 AL p65
N E2_TNFa batch1 AM p65
O E2_TNFa batch2 AN p65
P Veh batch1 AG FoxA1
Q Veh batch2 AH FoxA1
R E2 batch1 AI FoxA1
S E2 batch2 AJ FoxA1
T TNFa batch1 AK FoxA1
U TNFa batch2 AL FoxA1
V E2_TNFa batch1 AM FoxA1
W E2_TNFa batch2 AM FoxA1
X E2_TNFa batch1 AM ERa
Y E2_TNFa batch1 AM ERa
Z E2_TNFa batch1 AM ERa
AA E2_TNFa batch1 AM ERa
AB E2_TNFa batch2 AN ERa
AC E2_TNFa batch2 AN ERa
AD E2_TNFa batch2 AN ERa
AE E2_TNFa batch2 AN ERa
AF Veh batch1
AG Veh batch2
AH E2 batch1
AI E2 batch2
AJ TNFa batch1
AK TNFa batch2
AL E2_TNFa batch1
AM E2_TNFa batch2
{
"filters" : [
{
"id" : "mismatch",
"tag" : "NM:<=4"
}
],
"rule" : " mismatch "
}
sample unit fq1 fq2 sra_accession platform
A 1 SRR1635443 ILLUMINA
B 1 SRR1635444 ILLUMINA
C 1 SRR1635445 ILLUMINA
D 1 SRR1635446 ILLUMINA
E 1 SRR1635447 ILLUMINA
F 1 SRR1635448 ILLUMINA
G 1 SRR1635449 ILLUMINA
G 2 SRR1635450 ILLUMINA
H 1 SRR1635451 ILLUMINA
I 1 SRR1635452 ILLUMINA
J 1 SRR1635453 ILLUMINA
K 1 SRR1635454 ILLUMINA
L 1 SRR1635455 ILLUMINA
M 1 SRR1635456 ILLUMINA
N 1 SRR1635457 ILLUMINA
O 1 SRR1635458 ILLUMINA
P 1 SRR1635459 ILLUMINA
Q 1 SRR1635460 ILLUMINA
R 1 SRR1635461 ILLUMINA
S 1 SRR1635462 ILLUMINA
T 1 SRR1635463 ILLUMINA
U 1 SRR1635464 ILLUMINA
V 1 SRR1635465 ILLUMINA
W 1 SRR1635466 ILLUMINA
X 1 SRR1635467 ILLUMINA
Y 1 SRR1635468 ILLUMINA
Z 1 SRR1635469 ILLUMINA
AA 1 SRR1635470 ILLUMINA
AB 1 SRR1635471 ILLUMINA
AC 1 SRR1635472 ILLUMINA
AD 1 SRR1635473 ILLUMINA
AE 1 SRR1635474 ILLUMINA
AF 1 SRR1635435 ILLUMINA
AG 1 SRR1635436 ILLUMINA
AH 1 SRR1635437 ILLUMINA
AI 1 SRR1635438 ILLUMINA
AJ 1 SRR1635439 ILLUMINA
AK 1 SRR1635440 ILLUMINA
AL 1 SRR1635441 ILLUMINA
AM 1 SRR1635442 ILLUMINA
# This file should contains everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
samples: "config_paired_end_reduced/samples.tsv"
units: "config_paired_end_reduced/units.tsv"
single_end: False
# config for paired end data set for testing
resources:
ref:
# Number of chromosomes to consider for calling.
# The first n entries of the FASTA will be considered.
n_chromosomes: 17
# Ensembl species name
species: saccharomyces_cerevisiae
# Ensembl release
release: 101
# Genome build
build: R64-1-1
# for testing data a single chromosome can be selected (leave empty for a regular analysis)
chromosome:
# specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
igenomes_release: 1.2.2
# if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
macs-gsize:
# if igenomes.yaml cannot be used, a path to an own blacklist can be specified here
blacklist:
params:
# choose "narrow" or "broad" for macs2 callpeak analysis, for documentation and source code please see https://github.com/macs3-project/MACS
peak-analysis: "narrow"
# Number of biological replicates required from a given condition for a peak to contribute to a consensus peak
min-reps-consensus: 1
callpeak:
p-value: 0.5
q-value:
deeptools-plots:
# when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
activate: True
lc_extrap:
activate: True
picard_metrics:
activate: True
deseq2:
# set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
vst: True
peak-annotation-analysis:
activate: True
peak-qc:
activate: True
consensus-peak-analysis:
activate: True
# samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
# if duplicates should be removed in this filtering, add "-F 0x0400" to the params
# if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
# if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
# the -L option is automatically activated if a path to a blacklist of the given genome exists in the
# downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
# "config['resources']['ref']['blacklist']" in this configuration file
samtools-view-se: "-b -F 0x004"
samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
plotfingerprint:
# --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
number-of-samples: 500000
# optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
# see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
# TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
# these cutadapt parameters need to contain the required flag(s) for
# the type of adapter(s) to trim, i.e.:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
# * `-a` for 3' adapter in the forward reads
# * `-g` for 5' adapter in the forward reads
# * `-b` for adapters anywhere in the forward reads
# also, separate capitalised letter flags are required for adapters in
# the reverse reads of paired end sequencing:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#trimming-paired-end-reads
cutadapt-se: "-g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
# reasoning behind parameters:
# * `-e 0.005`: the default cutadapt maximum error rate of `0.2` is far too high, for Illumina
# data the error rate is more in the range of `0.005` and setting it accordingly should avoid
# false positive adapter matches
# * `--minimum-overlap 7`: the cutadapt default minimum overlap of `5` did trimming on the level
# of expected adapter matches by chance
cutadapt-pe: "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -G AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
cutadapt-others: "-e 0.005 --overlap 7"
{
"filters" : [
{
"id" : "paired_end",
"isPaired" : "true"
},
{
"id" : "mismatch",
"tag" : "NM:<=4"
},
{
"id" : "min_size",
"insertSize" : ">=-2000"
},
{
"id" : "max_size",
"insertSize" : "<=2000"
}
],
"rule" : " (paired_end & mismatch & min_size & max_size) | (!paired_end & mismatch) "
}
sample group batch_effect control antibody
A T0 batch1 E SPT5
B T0 batch2 E SPT5
C T15 batch1 F SPT5
D T15 batch2 F SPT5
E T0 batch1
F T15 batch1
{
"filters" : [
{
"id" : "mismatch",
"tag" : "NM:<=4"
}
],
"rule" : " mismatch "
}
sample unit fq1 fq2 sra_accession platform
A 1 data/atacseq/test-datasets/testdata/SRR1822153_1.fastq.gz data/atacseq/test-datasets/testdata/SRR1822153_2.fastq.gz ILLUMINA
B 1 data/atacseq/test-datasets/testdata/SRR1822154_1.fastq.gz data/atacseq/test-datasets/testdata/SRR1822154_2.fastq.gz ILLUMINA
C 1 data/atacseq/test-datasets/testdata/SRR1822157_1.fastq.gz data/atacseq/test-datasets/testdata/SRR1822157_2.fastq.gz ILLUMINA
D 1 data/atacseq/test-datasets/testdata/SRR1822158_1.fastq.gz data/atacseq/test-datasets/testdata/SRR1822158_2.fastq.gz ILLUMINA
E 1 data/chipseq/test-datasets/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz data/chipseq/test-datasets/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz ILLUMINA
F 1 data/chipseq/test-datasets/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz data/chipseq/test-datasets/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz ILLUMINA
# This file should contains everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
samples: "config_paired_end_reduced/samples.tsv"
units: "config_paired_end_reduced/units.tsv"
single_end: False
# config for paired end data set for testing
resources:
ref:
# Number of chromosomes to consider for calling.
# The first n entries of the FASTA will be considered.
n_chromosomes: 17
# Ensembl species name
species: saccharomyces_cerevisiae
# Ensembl release
release: 101
# Genome build
build: R64-1-1
# for testing data a single chromosome can be selected (leave empty for a regular analysis)
chromosome: VII
# specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
igenomes_release: 1.2.2
# if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
macs-gsize:
# if igenomes.yaml cannot be used, a path to an own blacklist can be specified here
blacklist:
params:
# choose "narrow" or "broad" for macs2 callpeak analysis, for documentation and source code please see https://github.com/macs3-project/MACS
peak-analysis: "broad"
# Number of biological replicates required from a given condition for a peak to contribute to a consensus peak
min-reps-consensus: 1
callpeak:
p-value: 0.5
q-value:
deeptools-plots:
# when activated the plot profile and heatmap plot are generated, this involves a matrix calculation that requires a lot of working memory.
activate: True
lc_extrap:
activate: False
picard_metrics:
activate: True
deseq2:
# set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
vst: False
peak-annotation-analysis:
activate: True
peak-qc:
activate: True
consensus-peak-analysis:
activate: True
# samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
# if duplicates should be removed in this filtering, add "-F 0x0400" to the params
# if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
# if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
# the -L option is automatically activated if a path to a blacklist of the given genome exists in the
# downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
# "config['resources']['ref']['blacklist']" in this configuration file
samtools-view-se: "-b -F 0x004"
samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
plotfingerprint:
# --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
number-of-samples: 500000
# optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
# see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
# TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
# these cutadapt parameters need to contain the required flag(s) for
# the type of adapter(s) to trim, i.e.:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
# * `-a` for 3' adapter in the forward reads
# * `-g` for 5' adapter in the forward reads
# * `-b` for adapters anywhere in the forward reads
# also, separate capitalised letter flags are required for adapters in
# the reverse reads of paired end sequencing:
# * https://cutadapt.readthedocs.io/en/stable/guide.html#trimming-paired-end-reads
cutadapt-se: "-g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
# reasoning behind parameters:
# * `-e 0.005`: the default cutadapt maximum error rate of `0.2` is far too high, for Illumina
# data the error rate is more in the range of `0.005` and setting it accordingly should avoid
# false positive adapter matches
# * `--minimum-overlap 7`: the cutadapt default minimum overlap of `5` did trimming on the level
# of expected adapter matches by chance
cutadapt-pe: "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -g AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -G AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
cutadapt-others: "-e 0.005 --overlap 7"
{
"filters" : [
{
"id" : "paired_end",
"isPaired" : "true"
},
{
"id" : "mismatch",
"tag" : "NM:<=4"
},
{
"id" : "min_size",
"insertSize" : ">=-2000"
},
{
"id" : "max_size",
"insertSize" : "<=2000"
}
],
"rule" : " (paired_end & mismatch & min_size & max_size) | (!paired_end & mismatch) "
}
sample group batch_effect control antibody
A T0 batch1 E SPT5
B T0 batch2 E SPT5
C T15 batch1 E SPT5
D T15 batch2 E SPT5
E T0 batch1