Verified Commit f8084486 authored by Aurélien Ginolhac's avatar Aurélien Ginolhac 🚴
Browse files

trimming with adapter removal

parent 7ac84a08
sample group batch_effect control antibody
A TNFa batch1 D p65
B TNFa batch2 D p65
C E2TNFa batch1 E p65
D TNFa batch1
E E2TNFa batch1
sample unit fq1 fq2 sra_accession platform
A 1 data/single_end_test_data/A-1_chr21.fastq.gz ILLUMINA
B 1 data/single_end_test_data/B-1_chr21.fastq.gz ILLUMINA
C 1 data/single_end_test_data/C-1_chr21.fastq.gz ILLUMINA
D 1 data/single_end_test_data/D-1_chr21.fastq.gz ILLUMINA
E 1 data/single_end_test_data/E-1_chr21.fastq.gz ILLUMINA
......@@ -81,19 +81,11 @@ def get_individual_fastq(wildcards):
else:
return units.loc[ (wildcards.sample, wildcards.unit), "fq2" ]
def get_fastqs(wildcards):
"""Get raw FASTQ files from unit sheet."""
if is_sra_se(wildcards.sample, wildcards.unit):
return expand("resources/ref/sra-se-reads/{accession}.fastq",
accession=units.loc[ (wildcards.sample, wildcards.unit), "sra_accession" ])
elif is_sra_pe(wildcards.sample, wildcards.unit):
return expand(["resources/ref/sra-pe-reads/{accession}.1.fastq", "ref/sra-pe-reads/{accession}.2.fastq"],
accession=units.loc[ (wildcards.sample, wildcards.unit), "sra_accession" ])
elif is_single_end(wildcards.sample, wildcards.unit):
return units.loc[ (wildcards.sample, wildcards.unit), "fq1" ]
else:
u = units.loc[ (wildcards.sample, wildcards.unit), ["fq1", "fq2"] ].dropna()
return [ f"{u.fq1}", f"{u.fq2}" ]
def get_fastq(wildcards):
fastqs = units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()
if len(fastqs) == 2:
return {"fq1": fastqs.fq1, "fq2": fastqs.fq2}
return {"fq1": fastqs.fq1}
def is_control(sample):
control = samples.loc[sample]["control"]
......@@ -151,8 +143,8 @@ def get_samples_of_antibody(antibody):
def get_map_reads_input(wildcards):
if is_single_end(wildcards.sample, wildcards.unit):
return "results/trimmed/{sample}-{unit}.fastq.gz"
return ["results/trimmed/{sample}-{unit}.1.fastq.gz", "results/trimmed/{sample}-{unit}.2.fastq.gz"]
return "results/trimmed_se/{sample}-{unit}.fastq.gz"
return ["results/trimmed_pe/{sample}-{unit}.1.fastq.gz", "results/trimmed_pe/{sample}-{unit}.2.fastq.gz"]
def get_read_group(wildcards):
"""Denote sample name and platform in read group."""
......@@ -346,8 +338,8 @@ def all_input(wildcards):
if is_single_end(sample, unit):
wanted_input.extend(expand(
[
"results/trimmed/{sample}-{unit}.fastq.gz",
"results/trimmed/{sample}-{unit}.se.qc.txt"
"results/trimmed_se/{sample}-{unit}.fastq.gz"#,
#"results/trimmed_se/{sample}-{unit}.settings"
],
sample = sample,
unit = unit
......@@ -357,9 +349,9 @@ def all_input(wildcards):
wanted_input.extend(
expand (
[
"results/trimmed/{sample}-{unit}.1.fastq.gz",
"results/trimmed/{sample}-{unit}.2.fastq.gz",
"results/trimmed/{sample}-{unit}.pe.qc.txt"
"results/trimmed_pe/{sample}-{unit}.R1.fastq.gz",
"results/trimmed_pe/{sample}-{unit}.R2.fastq.gz",
"results/trimmed_pe/{sample}-{unit}.settings"
],
sample = sample,
unit = unit
......
......@@ -20,7 +20,7 @@ rule bamtools_filter_json:
temp("results/bamtools_filtered/{sample}.bam")
params:
# filters mismatches in all reads and filters pe-reads within a size range given in json-file
json="../config/{}_bamtools_filtering_rules.json".format("se" if config["single_end"] else "pe")
json="config/{}_bamtools_filtering_rules.json".format("se" if config["single_end"] else "pe")
log:
"logs/filtered/{sample}.log"
wrapper:
......@@ -54,7 +54,7 @@ rule orphan_remove:
conda:
"../envs/pysam.yaml"
shell:
" ../workflow/scripts/rm_orphan_pe_bam.py {input} {output.bam} {params} 2> {log}"
" workflow/scripts/rm_orphan_pe_bam.py {input} {output.bam} {params} 2> {log}"
rule samtools_sort_pe:
input:
......
......@@ -206,7 +206,7 @@ rule phantompeakqualtools:
rule phantompeak_correlation:
input:
data="results/phantompeakqualtools/{sample}.phantompeak.Rdata",
header="../workflow/header/spp_corr_header.txt"
header="workflow/header/spp_corr_header.txt"
output:
"results/phantompeakqualtools/{sample}.spp_correlation_mqc.tsv"
log:
......@@ -220,8 +220,8 @@ rule phantompeak_multiqc:
# NSC (Normalized strand cross-correlation) and RSC (relative strand cross-correlation) metrics use cross-correlation
input:
data="results/phantompeakqualtools/{sample}.phantompeak.spp.out",
nsc_header="../workflow/header/nsc_header.txt",
rsc_header="../workflow/header/rsc_header.txt"
nsc_header="workflow/header/nsc_header.txt",
rsc_header="workflow/header/rsc_header.txt"
output:
nsc="results/phantompeakqualtools/{sample}.spp_nsc_mqc.tsv",
rsc="results/phantompeakqualtools/{sample}.spp_rsc_mqc.tsv"
......
# adapted from https://github.com/UofABioinformaticsHub/RNAseq_snakemake/blob/master/snakefile.py
rule trimming_pe:
input:
# unpack to convert the returned dict to a list
unpack(get_fastq)
output:
fastq1="results/trimmed_pe/{sample}-{unit}.1.fastq.gz",
fastq2="results/trimmed_pe/{sample}-{unit}.2.fastq.gz",
single="results/trimmed_pe/{sample}-{unit}.singletons.gz",
discarded="results/trimmed_pe/{sample}-{unit}.discarded.gz",
settings="results/trimmed_pe/{sample}-{unit}.settings"
params:
adapter = config["trimming"]["pe"],
others = config["trimming"]["others"]
container:
"docker://ginolhac/snake-rna-seq:0.4"
threads: config["trimming"]["threads"]
log:
"logs/trimming/{sample}-{unit}.pe.log"
shell:
"AdapterRemoval --file1 {input.fq1} --file2 {input.fq2} "
"--output1 {output.fq1} --output2 {output.fq2} "
"--singleton {output.single} "
"--settings {output.settings} --discarded {output.discarded} "
" {params.adapter} "
"--threads {threads} {params.others}"
# adapted from https://github.com/UofABioinformaticsHub/RNAseq_snakemake/blob/master/snakefile.py
rule trimming_se:
input:
unpack(get_fastq)
output:
fastq="results/trimmed_se/{sample}-{unit}.fastq.gz",
settings="results/trimmed_se/{sample}-{unit}.settings",
discarded="results/trimmed_se/{sample}-{unit}.discarded.gz"
params:
adapter = config["trimming"]["se"],
others = config["trimming"]["others"]
container:
"docker://ginolhac/snake-rna-seq:0.4"
threads: config["trimming"]["threads"]
log:
"logs/trimming/{sample}-{unit}.se.log"
shell:
"AdapterRemoval --file1 {input.fq1} "
"--output1 {output.fastq} "
"--settings {output.settings} --discarded {output.discarded} "
" {params.adapter} "
"--threads {threads} {params.others}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment