Commit 27022035 authored by David Hoksza's avatar David Hoksza
Browse files

trimming of long strims added to the pipeline

parent 5264ad15
......@@ -148,7 +148,12 @@ java -Xmx4g -jar ${MAP_GENERATOR_DIR}/biohackathon/target/biohackathon-1.0-jar-w
#java -Xmx4g -jar ${MAP_GENERATOR_DIR}/biohackathon/target/biohackathon-1.0-jar-with-dependencies.jar --enrichr-file ${enriched_paths_out_path} --minerva-file ${enriched_maps_out_path} --output-file ${map_out_path}
check_exit_code
log "Pathwaygs assembled into ${map_out_path}"
log "Pathways assembled into ${map_out_path}"
log "Trimming long strings..."
map_out_path_trimmed=${map_out_path/.xml/_trim.xml}
$PYTHON_BIN ${MAP_GENERATOR_DIR}/utils/trim_long_strings.py -i ${map_out_path} > ${map_out_path_trimmed}
log "Long string trimmed"
log "Combining the map with overlays"
......@@ -156,13 +161,13 @@ tmp_dir=${RES_DIR}/tmp/
tmp_dir_layouts=${tmp_dir}/layouts/
mkdir ${tmp_dir}
cp ${map_out_path} ${tmp_dir}
cp ${map_out_path_trimmed} ${tmp_dir}
mkdir ${tmp_dir_layouts}
cp ${minerva_genes_out_path} ${tmp_dir_layouts}
cp ${minerva_variants_out_path} ${tmp_dir_layouts}
map_zip_out_path=${map_out_path/.xml/.zip}
map_zip_out_path=${map_out_path_trimmed/.xml/.zip}
rm ${map_zip_out_path}
cd ${tmp_dir}
zip -r tmp.zip .
......
import logging
import argparse
import xml.etree.ElementTree as ET
def trim(path: str) -> str:
tree = ET.parse(path)
root = tree.getroot()
for elem in root.iter():
atts = elem.attrib
# if 'name' in atts:
# print(elem.tag, atts)
for k, v in atts.items():
if len(v) > 250:
logging.warning("Trimming {} (key {} too long)".format(elem, k))
if "resource" in k:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts[k] = atts[k].split(";")[0]
else:
atts[k] = ""
return ET.tostring(root, encoding='utf8').decode('utf8')
if __name__ == '__main__':
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(module)s - %(message)s',
datefmt='%H:%M:%S')
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input_xml",
required=True,
help="XML file with a map")
args = parser.parse_args()
print(trim(args.input_xml))
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment