Commit 01b6b60d authored by Laura Denies's avatar Laura Denies
Browse files

Include MOB_suite for Plasmid prediction | modify MGE prediction

parent 223aa6a5
channels: channels:
- conda-forge
- bioconda - bioconda
- conda-forge
- defaults - defaults
dependencies: dependencies:
- _libgcc_mutex=0.1=main - _libgcc_mutex=0.1=main
...@@ -33,7 +33,7 @@ dependencies: ...@@ -33,7 +33,7 @@ dependencies:
- libgcc-ng=9.1.0=hdf63c60_0 - libgcc-ng=9.1.0=hdf63c60_0
- libgfortran-ng=7.3.0=hdf63c60_0 - libgfortran-ng=7.3.0=hdf63c60_0
- libgpuarray=0.7.6=h14c3975_1003 - libgpuarray=0.7.6=h14c3975_1003
- liblapack=3.8.0=12_openblas - liblapack=3.8.0=12_openblas
- libopenblas=0.3.7=h6e990d7_1 - libopenblas=0.3.7=h6e990d7_1
- libprotobuf=3.9.2=h8b12597_0 - libprotobuf=3.9.2=h8b12597_0
- libstdcxx-ng=9.1.0=hdf63c60_0 - libstdcxx-ng=9.1.0=hdf63c60_0
...@@ -69,4 +69,5 @@ dependencies: ...@@ -69,4 +69,5 @@ dependencies:
- wrapt=1.11.2=py36h516909a_0 - wrapt=1.11.2=py36h516909a_0
- xz=5.2.4=h14c3975_1001 - xz=5.2.4=h14c3975_1001
- yaml=0.1.7=h14c3975_1001 - yaml=0.1.7=h14c3975_1001
- zlib=1.2.11=h516909a_1006 - zlib=1.2.11=h516909a_1006
\ No newline at end of file
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_llvm
- biopython=1.76=py38h516909a_0
- blast=2.5.0=hc0b0e79_3
- blosc=1.19.0=he1b5a44_0
- boost=1.73.0=py38hd103949_0
- boost-cpp=1.73.0=h6c875cf_0
- bzip2=1.0.8=h516909a_2
- ca-certificates=2020.4.5.2=hecda079_0
- capnproto=0.6.1=hfc679d8_1
- certifi=2020.4.5.2=py38h32f6830_0
- dbus=1.13.6=he372182_0
- ete3=3.1.1=pyh9f0ad1d_2
- expat=2.2.9=he1b5a44_2
- fontconfig=2.13.1=h86ecdb6_1001
- freetype=2.10.2=he06d7ca_0
- gettext=0.19.8.1=hc5be6a0_1002
- glib=2.64.3=h6f030ca_0
- gsl=2.5=h294904e_1
- gst-plugins-base=1.14.5=h0935bb2_2
- gstreamer=1.14.5=h36ae1b5_2
- hdf5=1.10.6=nompi_h3c11f04_100
- icu=64.2=he1b5a44_1
- jpeg=9d=h516909a_0
- krb5=1.17.1=h2fd8d38_0
- ld_impl_linux-64=2.34=h53a641e_5
- libblas=3.8.0=16_openblas
- libcblas=3.8.0=16_openblas
- libclang=9.0.1=default_hde54327_0
- libcurl=7.69.1=hf7181ac_0
- libedit=3.1.20191231=h46ee950_0
- libffi=3.2.1=he1b5a44_1007
- libgcc-ng=9.2.0=h24d8f2e_2
- libgfortran-ng=7.5.0=hdf63c60_6
- libiconv=1.15=h516909a_1006
- liblapack=3.8.0=16_openblas
- libllvm9=9.0.1=he513fc3_1
- libopenblas=0.3.9=h5ec1e0e_0
- libpng=1.6.37=hed695b0_1
- libssh2=1.9.0=hab1572f_2
- libstdcxx-ng=9.2.0=hdf63c60_2
- libuuid=2.32.1=h14c3975_1000
- libxcb=1.13=h14c3975_1002
- libxkbcommon=0.10.0=he1b5a44_0
- libxml2=2.9.10=hee79883_0
- libxslt=1.1.33=h31b3aaa_0
- llvm-openmp=10.0.0=hc9558a2_0
- lxml=4.5.1=py38hbb43d70_0
- lz4-c=1.9.2=he1b5a44_1
- lzo=2.10=h14c3975_1000
- mash=2.2.2=h3d38be6_1
- mob_suite=3.0.0=py_1
- mock=4.0.2=py38h32f6830_0
- ncurses=6.1=hf484d3e_1002
- nspr=4.25=he1b5a44_0
- nss=3.47=he751ad9_0
- numexpr=2.7.1=py38hcb8c335_1
- numpy=1.18.5=py38h8854b6b_0
- openssl=1.1.1g=h516909a_0
- pandas=1.0.4=py38hcb8c335_0
- pcre=8.44=he1b5a44_0
- pip=20.1.1=py_1
- pthread-stubs=0.4=h14c3975_1001
- pycurl=7.43.0.5=py38h16ce93b_0
- pyqt=5.12.3=py38ha8c2ead_3
- pytables=3.6.1=py38h1b207d3_2
- python=3.8.3=cpython_he5300dc_0
- python-dateutil=2.8.1=py_0
- python_abi=3.8=1_cp38
- pytz=2020.1=pyh9f0ad1d_0
- qt=5.12.5=hd8c4c69_1
- readline=8.0=hf8c457e_0
- scipy=1.4.1=py38h18bccfc_3
- setuptools=47.1.1=py38h32f6830_0
- six=1.15.0=pyh9f0ad1d_0
- sqlite=3.30.1=hcee41ef_0
- tk=8.6.10=hed695b0_0
- wheel=0.34.2=py_1
- xorg-kbproto=1.0.7=h14c3975_1002
- xorg-libice=1.0.10=h516909a_0
- xorg-libsm=1.2.3=h84519dc_1000
- xorg-libx11=1.6.9=h516909a_0
- xorg-libxau=1.0.9=h14c3975_0
- xorg-libxdmcp=1.1.3=h516909a_0
- xorg-libxext=1.3.4=h516909a_0
- xorg-libxrender=0.9.10=h516909a_1002
- xorg-renderproto=0.11.1=h14c3975_1002
- xorg-xextproto=7.3.0=h14c3975_1002
- xorg-xproto=7.0.31=h14c3975_1007
- xz=5.2.5=h516909a_0
- zlib=1.2.11=h516909a_1006
- zstd=1.4.4=h6597ccf_3
- pip:
- pyqt5-sip==4.19.18
- pyqtchart==5.12
- pyqtwebengine==5.12.1
...@@ -11,7 +11,7 @@ rule combine_AMR_plasmid: ...@@ -11,7 +11,7 @@ rule combine_AMR_plasmid:
input: input:
AMR="{datadir}/{project}/AMR/deepARG_results/{sample}.out.mapping.ARG", AMR="{datadir}/{project}/AMR/deepARG_results/{sample}.out.mapping.ARG",
AMR_translation="{datadir}/{project}/renamed/{sample}_translation.tsv", AMR_translation="{datadir}/{project}/renamed/{sample}_translation.tsv",
Plasmid="{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv", Plasmid="{datadir}/{project}/MGE/PlasFlow/plasmid/{sample}_plasflow_prediction_final.tsv",
Contig_translation="{datadir}/{project}/renamed/{sample}_Contig_translation.tsv", Contig_translation="{datadir}/{project}/renamed/{sample}_Contig_translation.tsv",
Contig_gene_list="{datadir}/{project}/Prodigal/{sample}.contig", Contig_gene_list="{datadir}/{project}/Prodigal/{sample}.contig",
VirFinder="{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv", VirFinder="{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv",
......
...@@ -7,26 +7,7 @@ import os ...@@ -7,26 +7,7 @@ import os
# Phage Prediction # # Phage Prediction #
########################## ##########################
# VIRSORTER Preprocessing # VIRSORTER
checkpoint splitphage:
input:
"{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output:
split=directory("{datadir}/{project}/contig_splitted/{sample}/")
# log:
# "{datadir}/{project}/contig_splitted/{sample}.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample}
"""
rule run_VirSorter: rule run_VirSorter:
input: input:
...@@ -64,9 +45,9 @@ rule run_VirFinder: ...@@ -64,9 +45,9 @@ rule run_VirFinder:
input: input:
"{datadir}/{project}/contig_splitted/{sample}/{file_i}.fasta" "{datadir}/{project}/contig_splitted/{sample}/{file_i}.fasta"
output: output:
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt" "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fasta_gt1bp_dvfpred.txt"
log: log:
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.log" "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fasta_gt1bp_dvfpred.log"
params: params:
outdir="{datadir}", outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"], runtime=config["pathofact"]["runtime"]["long"],
...@@ -79,9 +60,9 @@ rule run_VirFinder: ...@@ -79,9 +60,9 @@ rule run_VirFinder:
"python {config[pathofact][deepvirfinder]} -i {input} -o {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virfinder -c {threads} &> {log}" "python {config[pathofact][deepvirfinder]} -i {input} -o {wildcards.datadir}/{wildcards.project}/MGE/phage/{wildcards.sample}/virfinder -c {threads} &> {log}"
def aggregate_VirFinder(wildcards): def aggregate_VirFinder(wildcards):
checkpoint_output= checkpoints.splitphage.get(**wildcards).output.split checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
return expand( return expand(
"{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fna_gt1bp_dvfpred.txt", "{datadir}/{project}/MGE/phage/{sample}/virfinder/{file_i}.fasta_gt1bp_dvfpred.txt",
datadir=wildcards.datadir, datadir=wildcards.datadir,
project=wildcards.project, project=wildcards.project,
sample=wildcards.sample, sample=wildcards.sample,
......
...@@ -30,18 +30,16 @@ checkpoint splitplasmid: ...@@ -30,18 +30,16 @@ checkpoint splitplasmid:
"{datadir}/{project}/{sample}_filtered.fna" "{datadir}/{project}/{sample}_filtered.fna"
output: output:
split=directory("{datadir}/{project}/MGE/plasmid_splitted/{sample}/") split=directory("{datadir}/{project}/MGE/plasmid_splitted/{sample}/")
log:
"{datadir}/{project}/MGE/plasmid_splitted/{sample}.log"
params: params:
outdir="{datadir}", outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["medium"], runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"], mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"] split=config["pathofact"]["size_fasta"]
conda: conda:
"../../envs/Biopython.yaml" "../../envs/Biopython.yaml"
shell: shell:
""" """
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/plasmid_splitted/{wildcards.sample} python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/MGE/plasmid_splitted/{wildcards.sample}
""" """
# PlasFlow Plasmid prediction # PlasFlow Plasmid prediction
...@@ -49,9 +47,9 @@ rule run_PLASMID: ...@@ -49,9 +47,9 @@ rule run_PLASMID:
input: input:
"{datadir}/{project}/MGE/plasmid_splitted/{sample}/{file_i}.fasta" "{datadir}/{project}/MGE/plasmid_splitted/{sample}/{file_i}.fasta"
output: output:
temp("{datadir}/{project}/MGE/plasmid/{sample}/{file_i}_plasflow_prediction.tsv") temp("{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_plasflow_prediction.tsv")
log: log:
"{datadir}/{project}/MGE/plasmid/{sample}/{file_i}_plasflow_prediction.log" "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_plasflow_prediction.log"
conda: conda:
"../../envs/PlasFlow.yaml" "../../envs/PlasFlow.yaml"
params: params:
...@@ -67,7 +65,7 @@ rule run_PLASMID: ...@@ -67,7 +65,7 @@ rule run_PLASMID:
def aggregate_plasmid_input(wildcards): def aggregate_plasmid_input(wildcards):
checkpoint_output= checkpoints.splitplasmid.get(**wildcards).output.split checkpoint_output= checkpoints.splitplasmid.get(**wildcards).output.split
return expand( return expand(
"{datadir}/{project}/MGE/plasmid/{sample}/{file_i}_plasflow_prediction.tsv", "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_plasflow_prediction.tsv",
datadir=wildcards.datadir, datadir=wildcards.datadir,
project=wildcards.project, project=wildcards.project,
sample=wildcards.sample, sample=wildcards.sample,
...@@ -78,7 +76,7 @@ rule Plasmid_aggregate: ...@@ -78,7 +76,7 @@ rule Plasmid_aggregate:
input: input:
aggregate_plasmid_input aggregate_plasmid_input
output: output:
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_aggregated.tsv" "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_aggregated.tsv"
params: params:
outdir="{datadir}", outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"], runtime=config["pathofact"]["runtime"]["short"],
...@@ -88,9 +86,9 @@ rule Plasmid_aggregate: ...@@ -88,9 +86,9 @@ rule Plasmid_aggregate:
rule select: rule select:
input: input:
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_aggregated.tsv" "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_aggregated.tsv"
output: output:
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv" "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv"
params: params:
outdir="{datadir}", outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"], runtime=config["pathofact"]["runtime"]["short"],
...@@ -99,3 +97,40 @@ rule select: ...@@ -99,3 +97,40 @@ rule select:
""" """
cut -f 3,6 {input} > {output} cut -f 3,6 {input} > {output}
""" """
rule run_MOBsuite:
input: "{datadir}/{project}/contig_splitted/{sample}/{file_i}.fasta"
output:
temp("{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt")
log:
"{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}/{file_i}_MOB_suite_prediction.log"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["long"]
threads:
config["pathofact"]["mem"]["big_mem_cores"]
conda:
"../../envs/MOB_suite.yaml"
shell: "mob_typer --multi --infile {input} --out_file {wildcards.datadir}/{wildcards.project}/MGE/plasmid/MOB_suite/{wildcards.sample}/{wildcards.file_i}_MOB_suite_prediction.txt -n {threads}"
def aggregate_MOBsuite(wildcards):
checkpoint_output= checkpoints.splitcontig.get(**wildcards).output.split
return expand(
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}/{file_i}_MOB_suite_prediction.txt",
datadir=wildcards.datadir,
project=wildcards.project,
sample=wildcards.sample,
file_i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fasta")).i
)
rule aggregate_MOBsuite:
input: aggregate_MOBsuite
output:
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"]
shell:
"cat {input} > {output}"
...@@ -41,3 +41,22 @@ rule generate_ContigTranslation: ...@@ -41,3 +41,22 @@ rule generate_ContigTranslation:
""" """
paste {input.renamed} {input.original} | awk 'sub(/^>/,"")' OFS='\\t' > {output} paste {input.renamed} {input.original} | awk 'sub(/^>/,"")' OFS='\\t' > {output}
""" """
checkpoint splitcontig:
input:
"{datadir}/{project}/renamed/{sample}_Contig_ID.fna"
output:
split=directory("{datadir}/{project}/contig_splitted/{sample}/")
params:
outdir="{datadir}",
runtime=config["pathofact"]["runtime"]["short"],
mem=config["pathofact"]["mem"]["normal_mem_per_core_gb"],
split=config["pathofact"]["size_fasta"]
conda:
"../../envs/Biopython.yaml"
shell:
"""
python {config[pathofact][scripts]}/split.py {input} {params.split} {wildcards.datadir}/{wildcards.project}/contig_splitted/{wildcards.sample}
"""
#!/bin/bash -l
conda activate snakemake
snakemake -s Snakefile --use-conda --rerun-incomplete --cores 28 -p
...@@ -19,7 +19,8 @@ rule AMR_Analysis: ...@@ -19,7 +19,8 @@ rule AMR_Analysis:
[ [
"{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv", "{datadir}/{project}/MGE/phage/{sample}_VIRSorter_aggregated.csv",
"{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv", "{datadir}/{project}/MGE/phage/{sample}_VirFinder_aggregated.csv",
"{datadir}/{project}/MGE/plasmid/{sample}_plasflow_prediction_final.tsv", "{datadir}/{project}/MGE/plasmid/PlasFlow/{sample}_plasflow_prediction_final.tsv",
"{datadir}/{project}/MGE/plasmid/MOB_suite/{sample}_MOB_suite_aggregated.tsv"
"{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv", "{datadir}/{project}/AMR_MGE_prediction_{sample}_report.tsv",
"{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv" "{datadir}/{project}/AMR/{sample}_AMR_MGE_prediction_detailed.tsv"
], ],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment