Commit 9778d52a authored by David Hoksza's avatar David Hoksza
Browse files

working version of implanting annotations

parent a8004749
......@@ -2,11 +2,10 @@ import logging
import argparse
import pandas as pd
import xml.etree.ElementTree as ET
from . import utils
import utils
def get_full_name(elem_name, ns, nss) -> str:
return "{{{}}}{}".format(nss[ns], elem_name)
def implant(map_path: str, variants_path: str) -> str:
......@@ -15,22 +14,21 @@ def implant(map_path: str, variants_path: str) -> str:
for name, g in df_vars.groupby('gene_name')['identifier_uniprot']:
gene_uniprot[name] = set(g)
utils.register_namespaces(map_path)
namespaces = utils.register_namespaces(map_path)
tree = ET.parse(map_path)
root = tree.getroot()
for elem in root.iter('species'):
atts = elem.attrib
# if 'name' in atts:
# print(elem.tag, atts)
for k, v in atts.items():
if len(v) > 250:
logging.warning("Trimming {} (key {} too long)".format(elem, k))
if "resource" in k:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts[k] = atts[k].split(";")[0]
else:
atts[k] = ""
for elem in list(root.iter(get_full_name("species", "", namespaces))):
# for elem in root.iterfind(get_full_name("layout", "layout", namespaces)):
name = elem.attrib["name"]
if name in gene_uniprot:
for uniprot_id in gene_uniprot[name]:
idb = ET.Element('bqbiol:isDescribedBy')
b = ET.SubElement(idb, 'rdf:Bag')
urn_key = 'urn:miriam:uniprot' if '-' not in uniprot_id else 'urn:miriam:uniprot.isoform'
ET.SubElement(b, 'rdf:li', {'rdf:resource': "{}:{}".format(urn_key, uniprot_id)})
elem.append(idb)
return ET.tostring(root, encoding='utf8').decode('utf8')
......
import xml.etree.ElementTree as ET
import re
from typing import Dict
def register_namespaces(path):
def register_namespaces(path) -> Dict:
namespaces = {}
with open(path) as f:
content = f.read()
sbml_tag = re.search(r'<sbml([^>]*)>', content).group(1)
for ns in re.finditer(r'xmlns:*([^=]*)="([^"]*)"', sbml_tag ):
ET.register_namespace(ns.group(1), ns.group(2))
\ No newline at end of file
k = ns.group(1)
v = ns.group(2)
# if not name:
# name = "core"
ET.register_namespace(k, v)
namespaces[k] = v
return namespaces
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment