import logging
import argparse
import pandas as pd
import xml.etree.ElementTree as ET
from . import utils
def implant(map_path: str, variants_path: str) -> str:
df_vars = pd.read_csv(variants_path, sep='\t', skiprows=4)
gene_uniprot = {}
for name, g in df_vars.groupby('gene_name')['identifier_uniprot']:
gene_uniprot[name] = set(g)
tree = ET.parse(map_path)
root = tree.getroot()
for elem in root.iter('species'):
atts = elem.attrib
# if 'name' in atts:
# print(elem.tag, atts)
for k, v in atts.items():
if len(v) > 250:
logging.warning("Trimming {} (key {} too long)".format(elem, k))
if "resource" in k:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts[k] = atts[k].split(";")[0]
atts[k] = ""
return ET.tostring(root, encoding='utf8').decode('utf8')
if __name__ == '__main__':
format='%(asctime)s [%(levelname)s] %(module)s - %(message)s',
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--map_path",
help="Input map XML file")
parser.add_argument("-v", "--variants_path",
help="MINERVA variant file")
args = parser.parse_args()
print(implant(args.map_path, args.variants_path))
import logging
import argparse
import xml.etree.ElementTree as ET
import utils
def trim(path: str) -> str:
tree = ET.parse(path)
root = tree.getroot()
import xml.etree.ElementTree as ET
import re
def register_namespaces(path):
with open(path) as f:
content =
sbml_tag ='<sbml([^>]*)>', content).group(1)
for ns in re.finditer(r'xmlns:*([^=]*)="([^"]*)"', sbml_tag ):
