Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
David Hoksza
bh19-rare-diseases
Commits
9778d52a
Commit
9778d52a
authored
Dec 03, 2019
by
David Hoksza
Browse files
working version of implanting annotations
parent
a8004749
Changes
2
Hide whitespace changes
Inline
Side-by-side
map_generator/utils/implant_annotations.py
View file @
9778d52a
...
...
@@ -2,11 +2,10 @@ import logging
import
argparse
import
pandas
as
pd
import
xml.etree.ElementTree
as
ET
from
.
import
utils
import
utils
def
get_full_name
(
elem_name
,
ns
,
nss
)
->
str
:
return
"{{{}}}{}"
.
format
(
nss
[
ns
],
elem_name
)
def
implant
(
map_path
:
str
,
variants_path
:
str
)
->
str
:
...
...
@@ -15,22 +14,21 @@ def implant(map_path: str, variants_path: str) -> str:
for
name
,
g
in
df_vars
.
groupby
(
'gene_name'
)[
'identifier_uniprot'
]:
gene_uniprot
[
name
]
=
set
(
g
)
utils
.
register_namespaces
(
map_path
)
namespaces
=
utils
.
register_namespaces
(
map_path
)
tree
=
ET
.
parse
(
map_path
)
root
=
tree
.
getroot
()
for
elem
in
root
.
iter
(
'species'
):
atts
=
elem
.
attrib
# if 'name' in atts:
# print(elem.tag, atts)
for
k
,
v
in
atts
.
items
():
if
len
(
v
)
>
250
:
logging
.
warning
(
"Trimming {} (key {} too long)"
.
format
(
elem
,
k
))
if
"resource"
in
k
:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts
[
k
]
=
atts
[
k
].
split
(
";"
)[
0
]
else
:
atts
[
k
]
=
""
for
elem
in
list
(
root
.
iter
(
get_full_name
(
"species"
,
""
,
namespaces
))):
# for elem in root.iterfind(get_full_name("layout", "layout", namespaces)):
name
=
elem
.
attrib
[
"name"
]
if
name
in
gene_uniprot
:
for
uniprot_id
in
gene_uniprot
[
name
]:
idb
=
ET
.
Element
(
'bqbiol:isDescribedBy'
)
b
=
ET
.
SubElement
(
idb
,
'rdf:Bag'
)
urn_key
=
'urn:miriam:uniprot'
if
'-'
not
in
uniprot_id
else
'urn:miriam:uniprot.isoform'
ET
.
SubElement
(
b
,
'rdf:li'
,
{
'rdf:resource'
:
"{}:{}"
.
format
(
urn_key
,
uniprot_id
)})
elem
.
append
(
idb
)
return
ET
.
tostring
(
root
,
encoding
=
'utf8'
).
decode
(
'utf8'
)
...
...
map_generator/utils/utils.py
View file @
9778d52a
import
xml.etree.ElementTree
as
ET
import
re
from
typing
import
Dict
def
register_namespaces
(
path
):
def
register_namespaces
(
path
)
->
Dict
:
namespaces
=
{}
with
open
(
path
)
as
f
:
content
=
f
.
read
()
sbml_tag
=
re
.
search
(
r
'<sbml([^>]*)>'
,
content
).
group
(
1
)
for
ns
in
re
.
finditer
(
r
'xmlns:*([^=]*)="([^"]*)"'
,
sbml_tag
):
ET
.
register_namespace
(
ns
.
group
(
1
),
ns
.
group
(
2
))
\ No newline at end of file
k
=
ns
.
group
(
1
)
v
=
ns
.
group
(
2
)
# if not name:
# name = "core"
ET
.
register_namespace
(
k
,
v
)
namespaces
[
k
]
=
v
return
namespaces
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment