Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
David Hoksza
bh19-rare-diseases
Commits
17b4348a
Commit
17b4348a
authored
Dec 03, 2019
by
David Hoksza
Browse files
unitprot annotations in rdf tag
parent
9778d52a
Changes
4
Hide whitespace changes
Inline
Side-by-side
assemble.sh
View file @
17b4348a
#!/usr/bin/env bash
# ------------------------- PARAMETERS TO SET -------------------------
# Load parameters either from parameters.sh, or from a file provided as the first parameter of the script
if
[[
$1
==
""
]]
;
then
source
parameters.sh
...
...
@@ -8,9 +9,6 @@ else
fi
# ------------------------- PARAMETERS TO SET -------------------------
# ------------------------- PARAMETERS TO SET -------------------------
ASSOCIATIONS_DIR
=
associations/
ASSOCIATIONS_DATA_DIR
=
$ASSOCIATIONS_DIR
/data/
EXTEND_DIR
=
${
ASSOCIATIONS_DIR
}
/extend_network/
...
...
@@ -157,9 +155,17 @@ check_exit_code
log
"Pathways assembled into
${
map_out_path
}
"
log
"Implanting UniProt annotations..."
map_out_path_uniprot
=
${
map_out_path
/.xml/_unp.xml
}
$PYTHON_BIN
${
MAP_GENERATOR_DIR
}
/utils/implant_annotations.py
-m
${
map_out_path
}
-v
${
minerva_variants_out_path
}
>
${
map_out_path_uniprot
}
check_exit_code
log
"UniProt annotations implanted"
log
"Trimming long strings..."
map_out_path_trimmed
=
${
map_out_path
/.xml/_trim.xml
}
$PYTHON_BIN
${
MAP_GENERATOR_DIR
}
/utils/trim_long_strings.py
-i
${
map_out_path
}
>
${
map_out_path_trimmed
}
map_out_path_trimmed
=
${
map_out_path_uniprot
/.xml/_trim.xml
}
$PYTHON_BIN
${
MAP_GENERATOR_DIR
}
/utils/trim_long_strings.py
-m
${
map_out_path_uniprot
}
>
${
map_out_path_trimmed
}
check_exit_code
log
"Long string trimmed"
log
"Combining the map with overlays"
...
...
map_generator/utils/implant_annotations.py
View file @
17b4348a
...
...
@@ -20,17 +20,33 @@ def implant(map_path: str, variants_path: str) -> str:
for
elem
in
list
(
root
.
iter
(
get_full_name
(
"species"
,
""
,
namespaces
))):
# for elem in root.iterfind(get_full_name("layout", "layout", namespaces)):
name
=
elem
.
attrib
[
"name"
]
if
name
in
gene_uniprot
:
for
uniprot_id
in
gene_uniprot
[
name
]:
idb
=
ET
.
Element
(
'bqbiol:isDescribedBy'
)
elem_name
=
elem
.
attrib
[
"name"
]
elem_id
=
elem
.
attrib
[
"id"
]
if
elem_name
in
gene_uniprot
:
rdfs
=
list
(
elem
.
iter
(
get_full_name
(
"RDF"
,
"rdf"
,
namespaces
)))
if
len
(
rdfs
)
>
0
:
rdf
=
rdfs
[
0
]
else
:
rdf
=
ET
.
SubElement
(
elem
,
'rdf:RDF'
)
rdf_descs
=
list
(
rdf
.
iter
(
get_full_name
(
"Description"
,
"rdf"
,
namespaces
)))
if
len
(
rdf_descs
)
>
0
:
rdf_desc
=
rdf_descs
[
0
]
else
:
rdf_desc
=
ET
.
SubElement
(
rdf
,
'rdf:about'
,
{
"#{}"
.
format
(
elem_id
)})
for
uniprot_id
in
gene_uniprot
[
elem_name
]:
idb
=
ET
.
SubElement
(
rdf_desc
,
'bqbiol:isDescribedBy'
)
b
=
ET
.
SubElement
(
idb
,
'rdf:Bag'
)
urn_key
=
'urn:miriam:uniprot'
if
'-'
not
in
uniprot_id
else
'urn:miriam:uniprot.isoform'
ET
.
SubElement
(
b
,
'rdf:li'
,
{
'rdf:resource'
:
"{}:{}"
.
format
(
urn_key
,
uniprot_id
)})
elem
.
append
(
idb
)
return
ET
.
tostring
(
root
,
encoding
=
'utf8'
).
decode
(
'utf8'
)
# return ET.tostring(root)
return
ET
.
tostring
(
root
,
encoding
=
'unicode'
)
if
__name__
==
'__main__'
:
...
...
@@ -42,7 +58,7 @@ if __name__ == '__main__':
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"-m"
,
"--map_path"
,
required
=
True
,
help
=
"
Input map XML
file"
)
help
=
"
SBML map
file"
)
parser
.
add_argument
(
"-v"
,
"--variants_path"
,
required
=
True
,
help
=
"MINERVA variant file"
)
...
...
map_generator/utils/trim_long_strings.py
View file @
17b4348a
...
...
@@ -33,8 +33,8 @@ if __name__ == '__main__':
datefmt
=
'%H:%M:%S'
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"-
i
"
,
"--
input_xml
"
,
parser
.
add_argument
(
"-
m
"
,
"--
map_path
"
,
required
=
True
,
help
=
"
X
ML file with a map"
)
help
=
"
SB
ML file with a map"
)
args
=
parser
.
parse_args
()
print
(
trim
(
args
.
input_xml
))
\ No newline at end of file
print
(
trim
(
args
.
map_path
))
\ No newline at end of file
map_generator/utils/utils.py
View file @
17b4348a
...
...
@@ -6,10 +6,13 @@ def register_namespaces(path) -> Dict:
namespaces
=
{}
with
open
(
path
)
as
f
:
content
=
f
.
read
()
sbml_tag
=
re
.
search
(
r
'<sbml([^>]*)>'
,
content
).
group
(
1
)
for
ns
in
re
.
finditer
(
r
'xmlns:*([^=]*)="([^"]*)"'
,
sbml_tag
):
# sbml_tag = re.search(r'<sbml([^>]*)>', content).group(1)
# for ns in re.finditer(r'xmlns:*([^=]*)="([^"]*)"', sbml_tag ):
for
ns
in
re
.
finditer
(
r
'xmlns:*([^=]*)="([^"]*)"'
,
content
):
k
=
ns
.
group
(
1
)
v
=
ns
.
group
(
2
)
if
k
in
namespaces
:
continue
# if not name:
# name = "core"
ET
.
register_namespace
(
k
,
v
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment