Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
David Hoksza
bh19-rare-diseases
Commits
a8004749
Commit
a8004749
authored
Dec 03, 2019
by
David Hoksza
Browse files
correct handling of namespaces in xml
parent
b24b357a
Changes
3
Hide whitespace changes
Inline
Side-by-side
map_generator/utils/implant_annotations.py
0 → 100644
View file @
a8004749
import
logging
import
argparse
import
pandas
as
pd
import
xml.etree.ElementTree
as
ET
from
.
import
utils
def
implant
(
map_path
:
str
,
variants_path
:
str
)
->
str
:
df_vars
=
pd
.
read_csv
(
variants_path
,
sep
=
'
\t
'
,
skiprows
=
4
)
gene_uniprot
=
{}
for
name
,
g
in
df_vars
.
groupby
(
'gene_name'
)[
'identifier_uniprot'
]:
gene_uniprot
[
name
]
=
set
(
g
)
utils
.
register_namespaces
(
map_path
)
tree
=
ET
.
parse
(
map_path
)
root
=
tree
.
getroot
()
for
elem
in
root
.
iter
(
'species'
):
atts
=
elem
.
attrib
# if 'name' in atts:
# print(elem.tag, atts)
for
k
,
v
in
atts
.
items
():
if
len
(
v
)
>
250
:
logging
.
warning
(
"Trimming {} (key {} too long)"
.
format
(
elem
,
k
))
if
"resource"
in
k
:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts
[
k
]
=
atts
[
k
].
split
(
";"
)[
0
]
else
:
atts
[
k
]
=
""
return
ET
.
tostring
(
root
,
encoding
=
'utf8'
).
decode
(
'utf8'
)
if
__name__
==
'__main__'
:
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'%(asctime)s [%(levelname)s] %(module)s - %(message)s'
,
datefmt
=
'%H:%M:%S'
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"-m"
,
"--map_path"
,
required
=
True
,
help
=
"Input map XML file"
)
parser
.
add_argument
(
"-v"
,
"--variants_path"
,
required
=
True
,
help
=
"MINERVA variant file"
)
args
=
parser
.
parse_args
()
print
(
implant
(
args
.
map_path
,
args
.
variants_path
))
\ No newline at end of file
map_generator/utils/trim_long_strings.py
View file @
a8004749
import
logging
import
argparse
import
xml.etree.ElementTree
as
ET
import
utils
def
trim
(
path
:
str
)
->
str
:
utils
.
register_namespaces
(
path
)
tree
=
ET
.
parse
(
path
)
root
=
tree
.
getroot
()
...
...
map_generator/utils/utils.py
0 → 100644
View file @
a8004749
import
xml.etree.ElementTree
as
ET
import
re
def
register_namespaces
(
path
):
with
open
(
path
)
as
f
:
content
=
f
.
read
()
sbml_tag
=
re
.
search
(
r
'<sbml([^>]*)>'
,
content
).
group
(
1
)
for
ns
in
re
.
finditer
(
r
'xmlns:*([^=]*)="([^"]*)"'
,
sbml_tag
):
ET
.
register_namespace
(
ns
.
group
(
1
),
ns
.
group
(
2
))
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment