Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ESB
ONT_pilot_gitlab
Commits
09e65b15
Commit
09e65b15
authored
Jan 28, 2021
by
Valentina Galata
Browse files
analysis: FAA files for metaP analysis (all, cluster representatives) (issue
#97
)
parent
f39cae88
Changes
4
Hide whitespace changes
Inline
Side-by-side
workflow/envs/biopython.yaml
0 → 120000
View file @
09e65b15
../../workflow_zymo/envs/biopython.yaml
\ No newline at end of file
workflow/rules/analysis.smk
View file @
09e65b15
...
...
@@ -363,6 +363,20 @@ rule analysis_mmseqs2:
"mmseqs createtsv {params.db} {params.db} {params.cl} {output.tsv} && "
"date) &> {log}"
# FAA file of cluster representatives
rule analysis_mmseqs2_cluster_faa:
input:
faa=os.path.join(RESULTS_DIR, "analysis/mmseqs2/proteins.faa"),
tsv=os.path.join(RESULTS_DIR, "analysis/mmseqs2/clusters.tsv")
output:
os.path.join(RESULTS_DIR, "analysis/mmseqs2/clusters.faa")
conda:
os.path.join(ENV_DIR, "biopython.yaml")
message:
"Analysis: MMSeqs2 clustering FAA: {input}"
script:
os.path.join(SRC_DIR, "mmseqs2_cluster_faa.py")
##################################################
# COVERAGE
...
...
workflow/scripts/mmseqs2_cluster_faa.py
0 → 100644
View file @
09e65b15
#!/usr/bin/env python
# FAA file containing only cluster representatives
import
pandas
from
Bio
import
SeqIO
# protein IDs = cluster representatives (1st column)
prot_ids
=
set
(
pandas
.
read_csv
(
snakemake
.
input
.
tsv
,
sep
=
"
\t
"
,
header
=
None
).
iloc
[:,
0
])
# filter input FAA by IDs
with
open
(
snakemake
.
input
.
faa
,
"r"
)
as
ifile
,
open
(
snakemake
.
output
[
0
],
"w"
)
as
ofile
:
for
record
in
SeqIO
.
parse
(
ifile
,
"fasta"
):
if
record
.
id
in
prot_ids
:
SeqIO
.
write
(
record
,
ofile
,
"fasta"
)
workflow/steps/analysis.smk
View file @
09e65b15
...
...
@@ -49,6 +49,10 @@ rule ANALYSIS:
) if "diamond" in config["steps_analysis"] else [],
# mmseqs2
[os.path.join(RESULTS_DIR, "analysis/mmseqs2/clusters.tsv")] if "mmseqs2" in config["steps_analysis"] else [],
[ # for metaP analysis
os.path.join(RESULTS_DIR, "analysis/mmseqs2/proteins.faa"),
os.path.join(RESULTS_DIR, "analysis/mmseqs2/clusters.faa")
] if "mmseqs2" in config["steps_analysis"] and "metap" in config["data"] else [],
# coverage
expand(
os.path.join(RESULTS_DIR, "mapping/{mtype}/{rtype_tool}.cov.pergene"),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment