Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
David Hoksza
bh19-rare-diseases
Commits
27022035
Commit
27022035
authored
Dec 02, 2019
by
David Hoksza
Browse files
trimming of long strims added to the pipeline
parent
5264ad15
Changes
2
Hide whitespace changes
Inline
Side-by-side
assemble.sh
View file @
27022035
...
...
@@ -148,7 +148,12 @@ java -Xmx4g -jar ${MAP_GENERATOR_DIR}/biohackathon/target/biohackathon-1.0-jar-w
#java -Xmx4g -jar ${MAP_GENERATOR_DIR}/biohackathon/target/biohackathon-1.0-jar-with-dependencies.jar --enrichr-file ${enriched_paths_out_path} --minerva-file ${enriched_maps_out_path} --output-file ${map_out_path}
check_exit_code
log
"Pathwaygs assembled into
${
map_out_path
}
"
log
"Pathways assembled into
${
map_out_path
}
"
log
"Trimming long strings..."
map_out_path_trimmed
=
${
map_out_path
/.xml/_trim.xml
}
$PYTHON_BIN
${
MAP_GENERATOR_DIR
}
/utils/trim_long_strings.py
-i
${
map_out_path
}
>
${
map_out_path_trimmed
}
log
"Long string trimmed"
log
"Combining the map with overlays"
...
...
@@ -156,13 +161,13 @@ tmp_dir=${RES_DIR}/tmp/
tmp_dir_layouts
=
${
tmp_dir
}
/layouts/
mkdir
${
tmp_dir
}
cp
${
map_out_path
}
${
tmp_dir
}
cp
${
map_out_path
_trimmed
}
${
tmp_dir
}
mkdir
${
tmp_dir_layouts
}
cp
${
minerva_genes_out_path
}
${
tmp_dir_layouts
}
cp
${
minerva_variants_out_path
}
${
tmp_dir_layouts
}
map_zip_out_path
=
${
map_out_path
/.xml/.zip
}
map_zip_out_path
=
${
map_out_path
_trimmed
/.xml/.zip
}
rm
${
map_zip_out_path
}
cd
${
tmp_dir
}
zip
-r
tmp.zip
.
...
...
map_generator/utils/trim_long_strings.py
0 → 100644
View file @
27022035
import
logging
import
argparse
import
xml.etree.ElementTree
as
ET
def
trim
(
path
:
str
)
->
str
:
tree
=
ET
.
parse
(
path
)
root
=
tree
.
getroot
()
for
elem
in
root
.
iter
():
atts
=
elem
.
attrib
# if 'name' in atts:
# print(elem.tag, atts)
for
k
,
v
in
atts
.
items
():
if
len
(
v
)
>
250
:
logging
.
warning
(
"Trimming {} (key {} too long)"
.
format
(
elem
,
k
))
if
"resource"
in
k
:
# We want to keep the RDF resource still a valid resource otherwise MINERVA will fail during import
atts
[
k
]
=
atts
[
k
].
split
(
";"
)[
0
]
else
:
atts
[
k
]
=
""
return
ET
.
tostring
(
root
,
encoding
=
'utf8'
).
decode
(
'utf8'
)
if
__name__
==
'__main__'
:
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'%(asctime)s [%(levelname)s] %(module)s - %(message)s'
,
datefmt
=
'%H:%M:%S'
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"-i"
,
"--input_xml"
,
required
=
True
,
help
=
"XML file with a map"
)
args
=
parser
.
parse_args
()
print
(
trim
(
args
.
input_xml
))
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment