Commit 804a5f50 authored by Laura Denies's avatar Laura Denies
Browse files

update test module

parent 4fea8c56
pathofact: pathofact:
sample: ["test_sample"] # requires user input sample: ["SRR2582233","SRR2582234","SRR2582237","SRR2582238","SRR2582241","SRR2582243","SRR2582246","SRR2582247","SRR2582248","SRR2582251","SRR2582252","SRR2582253","SRR2582255","SRR2582257","SRR2582258"] # requires user input
project: PathoFact_update_trial_new # requires user input project: PathoFact_v2 # requires user input
datadir: ../test_dataset # requires user input datadir: /work/projects/ecosystem_biology/local_tools/PathoFact_update/Datasets/CDI # requires user input
workflow: "complete" workflow: "complete"
size_fasta: 1000 size_fasta: 10000
scripts: "scripts" scripts: "scripts"
signalp: "/work/projects/ecosystem_biology/local_tools/SignalP/signalp-5.0b/bin" # requires user input signalp: "/work/projects/ecosystem_biology/local_tools/SignalP/signalp-5.0b/bin" # requires user input
deepvirfinder: "submodules/DeepVirFinder/dvf.py" deepvirfinder: "submodules/DeepVirFinder/dvf.py"
......
#Prepare fasta
import glob
import os
configfile: "config.yaml"
rule all:
input: expand("{datadir}/{project}/Prodigal/{sample}.contig", datadir=config["pathofact"]["datadir"], project=config["pathofact"]["project"], sample=config["pathofact"]["sample"])
# Generate ORFs and GFF
# Generate unique ID number for each sequence
rule Prodigal:
input:
"{datadir}/{sample}.fna"
output:
ORF="{datadir}/{project}/Prodigal/{sample}.faa",
GFF="{datadir}/{project}/Prodigal/{sample}.gff"
message:
"Generates ORFs and gff"
params:
outdir="{datadir}"
conda:
"../../envs/Prodigal.yaml"
shell:
"""
prodigal -i {input} -o {output.GFF} -a {output.ORF} -f gff -p meta
"""
rule mapping_file:
input:
ORF="{datadir}/{project}/Prodigal/{sample}.faa",
GFF="{datadir}/{project}/Prodigal/{sample}.gff"
output:
"{datadir}/{project}/Prodigal/{sample}.contig"
message:
"Generate mapping file"
params:
outdir="{datadir}"
shell:
"""
sed -i 's/[^>]*ID=//;s/;.*//' {input.ORF}
sed -i '/^#/d' {input.GFF}
cut -f 1,9 {input.GFF} |cut -d';' -f1| sed 's/ID=//' > {output}
"""
...@@ -91,8 +91,8 @@ checkpoint splitting: ...@@ -91,8 +91,8 @@ checkpoint splitting:
"{datadir}/{project}/renamed/{sample}_ID.faa" "{datadir}/{project}/renamed/{sample}_ID.faa"
output: output:
splits=directory("{datadir}/{project}/splitted/{sample}/") splits=directory("{datadir}/{project}/splitted/{sample}/")
# log: log:
# "{datadir}/{project}/splitted/{sample}.log" "{datadir}/{project}/splitted/{sample}.log"
params: params:
outdir="{datadir}", outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"], runtime=config["pathofact"]["runtime"]["short"],
......
...@@ -4,22 +4,11 @@ This is a data set to test the `PathoFact` pipeline. ...@@ -4,22 +4,11 @@ This is a data set to test the `PathoFact` pipeline.
See `README.md` in parent directory for `PathoFact` set-up instructions. See `README.md` in parent directory for `PathoFact` set-up instructions.
## Input files
How input files were created:
```bash
# *.fna, *.faa and *.gff
rsync -avP /work/projects/ecosystem_biology/local_tools/IMP3/test/testRAW/run150320/Analysis/annotation/prokka.gff test_sample.gff
rsync -avP /work/projects/ecosystem_biology/local_tools/IMP3/test/testRAW/run150320/Analysis/annotation/prokka.fna test_sample.fna
rsync -avP /work/projects/ecosystem_biology/local_tools/IMP3/test/testRAW/run150320/Analysis/annotation/prokka.faa test_sample.faa
# set-up: modify files, create other required files
./set-up.sh
```
## Test run ## Test run
Include the required path to the SingalP v5.0 installation to the config file
``` ```
# activate env # activate env
conda activate PathoFact conda activate PathoFact
......
...@@ -33,9 +33,9 @@ PF = pathofact( ...@@ -33,9 +33,9 @@ PF = pathofact(
[ [
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv", "{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv", "{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv",
"{datadir}/{project}/Toxin_prediction_{sample}_report.csv", "{datadir}/{project}/Toxin_prediction_{sample}_report.tsv",
"{datadir}/{project}/Virulence_prediction_{sample}_report.csv", "{datadir}/{project}/Virulence_prediction_{sample}_report.tsv",
"{datadir}/{project}/PathoFact_{sample}_predictions.csv" "{datadir}/{project}/PathoFact_{sample}_predictions.tsv"
], ],
datadir=config["pathofact"]["datadir"], datadir=config["pathofact"]["datadir"],
project=config["pathofact"]["project"], project=config["pathofact"]["project"],
...@@ -48,9 +48,9 @@ CHECKS = expand( ...@@ -48,9 +48,9 @@ CHECKS = expand(
[ [
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv.checked", "{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv.checked", "{datadir}/{project}/Toxin_gene_library_{sample}_report.tsv.checked",
"{datadir}/{project}/Toxin_prediction_{sample}_report.csv.checked", "{datadir}/{project}/Toxin_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/Virulence_prediction_{sample}_report.csv.checked", "{datadir}/{project}/Virulence_prediction_{sample}_report.tsv.checked",
"{datadir}/{project}/PathoFact_{sample}_predictions.csv.checked" "{datadir}/{project}/PathoFact_{sample}_predictions.tsv.checked"
], ],
datadir=config["pathofact"]["datadir"], datadir=config["pathofact"]["datadir"],
project=config["pathofact"]["project"], project=config["pathofact"]["project"],
......
No preview for this file type
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
No preview for this file type
No preview for this file type
No preview for this file type
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
#!/bin/bash -l
# modify *.faa: rm record descriptions
sed -i '/^>/ s/ .*//' test_sample.faa
# *.gff to *.contig: contig ID, feature ID
sed -n '/^##gff/,/^##FASTA/p' test_sample.gff | grep -v '^#' | cut -f1,9 | cut -d';' -f1 | sed 's/ID=//' > test_sample.contig
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment