Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Laura Denies
PathoFact
Commits
01b6b60d
Commit
01b6b60d
authored
Jun 18, 2020
by
Laura Denies
Browse files
Include MOB_suite for Plasmid prediction | modify MGE prediction
parent
223aa6a5
Changes
8
Hide whitespace changes
Inline
Side-by-side
envs/DeepVirFinder.yaml
View file @
01b6b60d
channels
:
-
conda-forge
-
bioconda
-
conda-forge
-
defaults
dependencies
:
-
_libgcc_mutex=0.1=main
...
...
@@ -33,7 +33,7 @@ dependencies:
-
libgcc-ng=9.1.0=hdf63c60_0
-
libgfortran-ng=7.3.0=hdf63c60_0
-
libgpuarray=0.7.6=h14c3975_1003
- liblapack=3.8.0=12_openblas
-
liblapack=3.8.0=12_openblas
-
libopenblas=0.3.7=h6e990d7_1
-
libprotobuf=3.9.2=h8b12597_0
-
libstdcxx-ng=9.1.0=hdf63c60_0
...
...
@@ -69,4 +69,5 @@ dependencies:
-
wrapt=1.11.2=py36h516909a_0
-
xz=5.2.4=h14c3975_1001
-
yaml=0.1.7=h14c3975_1001
-
zlib=1.2.11=h516909a_1006
\ No newline at end of file
-
zlib=1.2.11=h516909a_1006
envs/MOB_suite.yaml
0 → 100644
View file @
01b6b60d
channels
:
-
bioconda
-
conda-forge
-
defaults
dependencies
:
-
_libgcc_mutex=0.1=conda_forge
-
_openmp_mutex=4.5=1_llvm
-
biopython=1.76=py38h516909a_0
-
blast=2.5.0=hc0b0e79_3
-
blosc=1.19.0=he1b5a44_0
-
boost=1.73.0=py38hd103949_0
-
boost-cpp=1.73.0=h6c875cf_0
-
bzip2=1.0.8=h516909a_2
-
ca-certificates=2020.4.5.2=hecda079_0
-
capnproto=0.6.1=hfc679d8_1
-
certifi=2020.4.5.2=py38h32f6830_0
-
dbus=1.13.6=he372182_0
-
ete3=3.1.1=pyh9f0ad1d_2
-
expat=2.2.9=he1b5a44_2
-
fontconfig=2.13.1=h86ecdb6_1001
-
freetype=2.10.2=he06d7ca_0
-
gettext=0.19.8.1=hc5be6a0_1002
-
glib=2.64.3=h6f030ca_0
-
gsl=2.5=h294904e_1
-
gst-plugins-base=1.14.5=h0935bb2_2
-
gstreamer=1.14.5=h36ae1b5_2
-
hdf5=1.10.6=nompi_h3c11f04_100
-
icu=64.2=he1b5a44_1
-
jpeg=9d=h516909a_0
-
krb5=1.17.1=h2fd8d38_0
-
ld_impl_linux-64=2.34=h53a641e_5
-
libblas=3.8.0=16_openblas
-
libcblas=3.8.0=16_openblas
-
libclang=9.0.1=default_hde54327_0
-
libcurl=7.69.1=hf7181ac_0
-
libedit=3.1.20191231=h46ee950_0
-
libffi=3.2.1=he1b5a44_1007
-
libgcc-ng=9.2.0=h24d8f2e_2
-
libgfortran-ng=7.5.0=hdf63c60_6
-
libiconv=1.15=h516909a_1006
-
liblapack=3.8.0=16_openblas
-
libllvm9=9.0.1=he513fc3_1
-
libopenblas=0.3.9=h5ec1e0e_0
-
libpng=1.6.37=hed695b0_1
-
libssh2=1.9.0=hab1572f_2
-
libstdcxx-ng=9.2.0=hdf63c60_2
-
libuuid=2.32.1=h14c3975_1000
-
libxcb=1.13=h14c3975_1002
-
libxkbcommon=0.10.0=he1b5a44_0
-
libxml2=2.9.10=hee79883_0
-
libxslt=1.1.33=h31b3aaa_0
-
llvm-openmp=10.0.0=hc9558a2_0
-
lxml=4.5.1=py38hbb43d70_0
-
lz4-c=1.9.2=he1b5a44_1
-
lzo=2.10=h14c3975_1000
-
mash=2.2.2=h3d38be6_1
-
mob_suite=3.0.0=py_1
-
mock=4.0.2=py38h32f6830_0
-
ncurses=6.1=hf484d3e_1002
-
nspr=4.25=he1b5a44_0
-
nss=3.47=he751ad9_0
-
numexpr=2.7.1=py38hcb8c335_1
-
numpy=1.18.5=py38h8854b6b_0
-
openssl=1.1.1g=h516909a_0
-
pandas=1.0.4=py38hcb8c335_0
-
pcre=8.44=he1b5a44_0
-
pip=20.1.1=py_1
-
pthread-stubs=0.4=h14c3975_1001
-
pycurl=7.43.0.5=py38h16ce93b_0
-
pyqt=5.12.3=py38ha8c2ead_3
-
pytables=3.6.1=py38h1b207d3_2
-
python=3.8.3=cpython_he5300dc_0
-
python-dateutil=2.8.1=py_0
-
python_abi=3.8=1_cp38
-
pytz=2020.1=pyh9f0ad1d_0
-
qt=5.12.5=hd8c4c69_1
-
readline=8.0=hf8c457e_0
-
scipy=1.4.1=py38h18bccfc_3
-
setuptools=47.1.1=py38h32f6830_0
-
six=1.15.0=pyh9f0ad1d_0
-
sqlite=3.30.1=hcee41ef_0
-
tk=8.6.10=hed695b0_0
-
wheel=0.34.2=py_1
-
xorg-kbproto=1.0.7=h14c3975_1002
-
xorg-libice=1.0.10=h516909a_0
-
xorg-libsm=1.2.3=h84519dc_1000
-
xorg-libx11=1.6.9=h516909a_0
-
xorg-libxau=1.0.9=h14c3975_0
-
xorg-libxdmcp=1.1.3=h516909a_0
-
xorg-libxext=1.3.4=h516909a_0
-
xorg-libxrender=0.9.10=h516909a_1002
-
xorg-renderproto=0.11.1=h14c3975_1002
-
xorg-xextproto=7.3.0=h14c3975_1002
-
xorg-xproto=7.0.31=h14c3975_1007
-
xz=5.2.5=h516909a_0
-
zlib=1.2.11=h516909a_1006
-
zstd=1.4.4=h6597ccf_3
-
pip
:
-
pyqt5-sip==4.19.18
-
pyqtchart==5.12
-
pyqtwebengine==5.12.1
rules/AMR/Combine_MGE_AMR.smk
View file @
01b6b60d
...
...
@@ -11,7 +11,7 @@ rule combine_AMR_plasmid:
input:
AMR="{datadir}/{project}/AMR/deepARG_results/{sample}.out.mapping.ARG",
AMR_translation="{datadir}/{project}/renamed/{sample}_translation.tsv",
Plasmid="{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv",
Plasmid="{datadir}/{project}/MGE/
PlasFlow/
plasmid/{sample}_plasflow_prediction_final.tsv",
Contig_translation="{datadir}/{project}/renamed/{sample}_Contig_translation.tsv",
Contig_gene_list="{datadir}/{project}/Prodigal/{sample}.contig",
VirFinder="{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv",
...
...
rules/AMR/Phage.smk
View file @
01b6b60d
...
...
@@ -7,26 +7,7 @@ import os
# Phage Prediction #
##########################
# VIRSORTER Preprocessing
checkpoint splitphage:
input:
"{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output:
split=directory("{datadir}/{project}/contig_splitted/{sample}/")
# log:
# "{datadir}/{project}/contig_splitted/{sample}.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample}
"""
# VIRSORTER
rule run_VirSorter:
input:
...
...
@@ -64,9 +45,9 @@ rule run_VirFinder:
input:
"{datadir}/{project}/contig_splitted/{sample}/{file_i}.fasta"
output:
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.f
n
a_gt1bp_dvfpred.txt"
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.f
ast
a_gt1bp_dvfpred.txt"
log:
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.f
n
a_gt1bp_dvfpred.log"
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.f
ast
a_gt1bp_dvfpred.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"],
...
...
@@ -79,9 +60,9 @@ rule run_VirFinder:
"python {config[pathofact][deepvirfinder]} -i {input} -o {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virfinder -c {threads} &> {log}"
def aggregate_VirFinder(wildcards):
checkpoint_output= checkpoints.split
phage
.get(**wildcards).output.split
checkpoint_output= checkpoints.split
contig
.get(**wildcards).output.split
return expand(
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.f
n
a_gt1bp_dvfpred.txt",
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.f
ast
a_gt1bp_dvfpred.txt",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
...
...
rules/AMR/Plasmid.smk
View file @
01b6b60d
...
...
@@ -30,18 +30,16 @@ checkpoint splitplasmid:
"{datadir}/{project}/{sample}_filtered.fna"
output:
split=directory("{datadir}/{project}/MGE/plasmid_splitted/{sample}/")
log:
"{datadir}/{project}/MGE/plasmid_splitted/{sample}.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["
medium
"],
runtime=config["pathofact"]["runtime"]["
short
"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/plasmid_splitted/{wildcards.sample}
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/
MGE/
plasmid_splitted/{wildcards.sample}
"""
# PlasFlow Plasmid prediction
...
...
@@ -49,9 +47,9 @@ rule run_PLASMID:
input:
"{datadir}/{project}/MGE/plasmid_splitted/{sample}/{file_i}.fasta"
output:
temp("{datadir}/{project}/MGE/plasmid/{sample}/{file_i}_plasflow_prediction.tsv")
temp("{datadir}/{project}/MGE/plasmid/
PlasFlow/
{sample}/{file_i}_plasflow_prediction.tsv")
log:
"{datadir}/{project}/MGE/plasmid/{sample}/{file_i}_plasflow_prediction.log"
"{datadir}/{project}/MGE/plasmid/
PlasFlow/
{sample}/{file_i}_plasflow_prediction.log"
conda:
"../../envs/PlasFlow.yaml"
params:
...
...
@@ -67,7 +65,7 @@ rule run_PLASMID:
def aggregate_plasmid_input(wildcards):
checkpoint_output= checkpoints.splitplasmid.get(**wildcards).output.split
return expand(
"{datadir}/{project}/MGE/plasmid/{sample}/{file_i}_plasflow_prediction.tsv",
"{datadir}/{project}/MGE/plasmid/
PlasFlow/
{sample}/{file_i}_plasflow_prediction.tsv",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
...
...
@@ -78,7 +76,7 @@ rule Plasmid_aggregate:
input:
aggregate_plasmid_input
output:
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_aggregated.tsv"
"{datadir}/{project}/MGE/plasmid/
PlasFlow/
{sample}_plasflow_aggregated.tsv"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
...
...
@@ -88,9 +86,9 @@ rule Plasmid_aggregate:
rule select:
input:
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_aggregated.tsv"
"{datadir}/{project}/MGE/plasmid/
PlasFlow/
{sample}_plasflow_aggregated.tsv"
output:
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv"
"{datadir}/{project}/MGE/plasmid/
PlasFlow/
{sample}_plasflow_prediction_final.tsv"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
...
...
@@ -99,3 +97,40 @@ rule select:
"""
cut -f 3,6 {input} > {output}
"""
rule run_MOBsuite:
input: "{datadir}/{project}/contig_splitted/{sample}/{file_i}.fasta"
output:
temp("{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt")
log:
"{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_MOB_suite_prediction.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"]
threads:
config["pathofact"]["mem"]["big_mem_cores"]
conda:
"../../envs/MOB_suite.yaml"
shell: "mob_typer --multi --infile {input} --out_file {wildcards.datadir}/{wildcards.project}/MGE/plasmid/MOB_suite/{wildcards.sample}/{wildcards.file_i}_MOB_suite_prediction.txt -n {threads}"
def aggregate_MOBsuite(wildcards):
checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
return expand(
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
rule aggregate_MOBsuite:
input: aggregate_MOBsuite
output:
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
rules/Universal/Preprocessing_contig.smk
View file @
01b6b60d
...
...
@@ -41,3 +41,22 @@ rule generate_ContigTranslation:
"""
paste {input.renamed} {input.original} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
"""
checkpoint splitcontig:
input:
"{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output:
split=directory("{datadir}/{project}/contig_splitted/{sample}/")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample}
"""
run_PathoFact.sh
0 → 100644
View file @
01b6b60d
#!/bin/bash -l
conda activate snakemake
snakemake
-s
Snakefile
--use-conda
--rerun-incomplete
--cores
28
-p
workflows/AMR_workflow.smk
View file @
01b6b60d
...
...
@@ -19,7 +19,8 @@ rule AMR_Analysis:
[
"{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv",
"{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv",
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv",
"{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv",
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv"
],
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment