Commit a8004749 authored by David Hoksza's avatar David Hoksza
Browse files

correct handling of namespaces in xml

parent b24b357a
import logging
import argparse
import pandas as pd
import xml.etree.ElementTree as ET
from . import utils
def implant(map_path: str, variants_path: str) -> str:
df_vars = pd.read_csv(variants_path, sep='\t', skiprows=4)
gene_uniprot = {}
for name, g in df_vars.groupby('gene_name')['identifier_uniprot']:
gene_uniprot[name] = set(g)
tree = ET.parse(map_path)
root = tree.getroot()
for elem in root.iter('species'):
atts = elem.attrib
# if 'name' in atts:
# print(elem.tag, atts)
for k, v in atts.items():
if len(v) > 250:
logging.warning("Trimming {} (key {} too long)".format(elem, k))
if "resource" in k:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts[k] = atts[k].split(";")[0]
atts[k] = ""
return ET.tostring(root, encoding='utf8').decode('utf8')
if __name__ == '__main__':
format='%(asctime)s [%(levelname)s] %(module)s - %(message)s',
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--map_path",
help="Input map XML file")
parser.add_argument("-v", "--variants_path",
help="MINERVA variant file")
args = parser.parse_args()
print(implant(args.map_path, args.variants_path))
\ No newline at end of file
import logging
import argparse
import xml.etree.ElementTree as ET
import utils
def trim(path: str) -> str:
tree = ET.parse(path)
root = tree.getroot()
import xml.etree.ElementTree as ET
import re
def register_namespaces(path):
with open(path) as f:
content =
sbml_tag ='<sbml([^>]*)>', content).group(1)
for ns in re.finditer(r'xmlns:*([^=]*)="([^"]*)"', sbml_tag ):
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment