trim_long_strings.py 1.18 KB
Newer Older
1
2
3
import logging
import argparse
import xml.etree.ElementTree as ET
4
import utils
5
6

def trim(path: str) -> str:
7
8
9

    utils.register_namespaces(path)

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    tree = ET.parse(path)
    root = tree.getroot()

    for elem in root.iter():
        atts = elem.attrib
        # if 'name' in atts:
        #     print(elem.tag, atts)
        for k, v in atts.items():
            if len(v) > 250:
                logging.warning("Trimming {} (key {} too long)".format(elem, k))
                if "resource" in k:
                    #  We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
                    atts[k] = atts[k].split(";")[0]
                else:
                    atts[k] = ""

    return ET.tostring(root, encoding='utf8').decode('utf8')

if __name__ == '__main__':

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s [%(levelname)s] %(module)s - %(message)s',
        datefmt='%H:%M:%S')

    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input_xml",
                        required=True,
                        help="XML file with a map")
    args = parser.parse_args()
    print(trim(args.input_xml))