Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
elixir
metadata-tools
Commits
23e16ed0
Commit
23e16ed0
authored
Apr 08, 2021
by
Pinar Alper
Browse files
Merge branch 'updates-to-exporter' into 'master'
Updates to exporter See merge request pinar.alper/metadata-tools!10
parents
3ab1ee8b
00b7b6b8
Changes
3
Hide whitespace changes
Inline
Side-by-side
metadata_tools/commands/export_xls_to_json.py
View file @
23e16ed0
...
...
@@ -60,7 +60,7 @@ for fname in fileList:
fname_json
=
pathlib
.
Path
(
fname
).
stem
+
".json"
fname_out
=
os
.
path
.
join
(
args
.
output_dir
,
fname_json
)
with
open
(
fname_out
,
'w'
)
as
outfile
:
with
open
(
fname_out
,
'w'
,
encoding
=
'utf-8'
)
as
outfile
:
save_exported_datasets_to_file
(
dataset_dict
,
outfile
)
counter
+=
1
...
...
metadata_tools/importxls/dish_xls_exporter.py
View file @
23e16ed0
...
...
@@ -67,8 +67,8 @@ class DishXlsExporter:
logging
.
info
(
'Processing sheet ----> {}'
.
format
(
book
.
sheet_names
()[
idx
]))
if
is_study
(
sheet
):
cohort_dict
=
{
'name'
:
sheet
[
1
,
1
],
'description'
:
sheet
[
2
,
1
]
+
' '
+
sheet
[
6
,
1
],
cohort_dict
=
{
'name'
:
sheet
[
1
,
1
]
.
strip
()
,
'description'
:
sheet
[
2
,
1
]
+
' '
+
sheet
[
6
,
1
]
+
' '
+
sheet
[
16
,
1
]
,
'has_ethics_approval'
:
process_yes_no_answer
(
sheet
[
4
,
1
]),
"ethics_approval_notes"
:
sheet
[
5
,
1
],
"url"
:
sheet
[
3
,
1
],
...
...
@@ -81,7 +81,7 @@ class DishXlsExporter:
}
if
sheet
[
12
,
1
]
and
sheet
[
15
,
1
]:
cohort_dict
[
"contacts"
]
=
cohort_dict
[
"contacts"
].
append
({
"first_name"
:
get_names_from_string
(
sheet
[
12
,
1
])[
0
],
cohort_dict
[
"contacts"
].
append
({
"first_name"
:
get_names_from_string
(
sheet
[
12
,
1
])[
0
],
"last_name"
:
get_names_from_string
(
sheet
[
12
,
1
])[
1
],
"role"
:
sheet
[
15
,
1
],
"email"
:
sheet
[
13
,
1
],
...
...
@@ -90,14 +90,17 @@ class DishXlsExporter:
dataset_dict
[
"studies"
].
append
(
cohort_dict
)
elif
is_data
(
sheet
):
datadec_dict
=
{
'title'
:
sheet
[
1
,
1
],
'source_study'
:
sheet
[
2
,
1
],
datadec_dict
=
{
'title'
:
sheet
[
1
,
1
]
.
strip
()
,
'source_study'
:
sheet
[
2
,
1
]
.
strip
()
,
"data_types"
:[]}
datadec_dict
[
"data_type_notes"
]
=
sheet
[
7
,
1
]
data_type_info
=
self
.
process_data_types
(
get_value_list_from_row
(
sheet
,
6
))
datadec_dict
[
"data_types"
].
extend
(
data_type_info
[
0
])
datadec_dict
[
"data_type_notes"
]
=
datadec_dict
[
"data_type_notes"
]
+
" "
+
data_type_info
[
1
]
+
" Notes on samples: "
+
sheet
[
10
,
1
]
if
data_type_info
[
1
]:
datadec_dict
[
"data_type_notes"
]
+=
" "
+
data_type_info
[
1
]
if
sheet
[
10
,
1
]:
datadec_dict
[
"data_type_notes"
]
+=
" Notes on samples: "
+
sheet
[
10
,
1
]
#if it involves samples add this as a datatype
if
process_yes_no_answer
(
sheet
[
9
,
1
]):
...
...
@@ -263,31 +266,31 @@ class DishXlsExporter:
dataset_dict
[
"data_declarations"
].
append
(
datadec_dict
)
elif
is_submission
(
sheet
):
dataset_dict
[
"name"
]
=
sheet
[
2
,
1
]
dataset_dict
[
"project"
]
=
sheet
[
5
,
1
]
dataset_dict
[
"name"
]
=
sheet
[
2
,
1
]
.
strip
()
dataset_dict
[
"project"
]
=
sheet
[
5
,
1
]
.
strip
()
dataset_dict
[
"contacts"
].
extend
([{
"first_name"
:
get_names_from_string
(
sheet
[
9
,
1
])[
0
],
"last_name"
:
get_names_from_string
(
sheet
[
9
,
1
])[
1
],
"role"
:
sheet
[
11
,
1
],
"email"
:
sheet
[
10
,
1
],
"email"
:
sheet
[
10
,
1
]
.
strip
()
,
"affiliations"
:
[
self
.
process_institution
(
sheet
[
7
,
1
])]
},
{
"first_name"
:
get_names_from_string
(
sheet
[
12
,
1
])[
0
],
"last_name"
:
get_names_from_string
(
sheet
[
12
,
1
])[
1
],
"role"
:
"Legal_Representative"
,
"email"
:
sheet
[
1
0
,
1
],
"email"
:
sheet
[
1
3
,
1
]
.
strip
()
,
"affiliations"
:
[
self
.
process_institution
(
sheet
[
7
,
1
])]
},
{
"first_name"
:
get_names_from_string
(
sheet
[
14
,
1
])[
0
],
"last_name"
:
get_names_from_string
(
sheet
[
14
,
1
])[
1
],
"role"
:
"Data_Protection_Officer"
,
"email"
:
sheet
[
1
0
,
1
],
"email"
:
sheet
[
1
5
,
1
]
.
strip
()
,
"affiliations"
:
[
self
.
process_institution
(
sheet
[
7
,
1
])]
}])
if
sheet
[
16
,
1
]
and
sheet
[
18
,
1
]:
dataset_dict
[
"contacts"
].
append
({
"first_name"
:
get_names_from_string
(
sheet
[
1
4
,
1
])[
0
],
"last_name"
:
get_names_from_string
(
sheet
[
1
4
,
1
])[
1
],
dataset_dict
[
"contacts"
].
append
({
"first_name"
:
get_names_from_string
(
sheet
[
1
6
,
1
])[
0
],
"last_name"
:
get_names_from_string
(
sheet
[
1
6
,
1
])[
1
],
"role"
:
sheet
[
18
,
1
],
"email"
:
sheet
[
1
0
,
1
],
"email"
:
sheet
[
1
7
,
1
]
.
strip
()
,
"affiliations"
:
[
self
.
process_institution
(
sheet
[
7
,
1
])]
})
...
...
metadata_tools/importxls/export_utils.py
View file @
23e16ed0
...
...
@@ -81,23 +81,28 @@ def process_possible_date(possible_date):
def
get_names_from_string
(
full_name
):
result
=
[
''
,
''
]
name
=
full_name
.
strip
()
if
name_cointains_title
(
name
):
logging
.
error
(
f
'Name contains titles:
{
name
}
'
)
if
name
.
endswith
(
','
)
or
name
.
endswith
(
','
):
name
=
name
[:
-
1
]
if
name
is
not
None
:
if
" "
in
name
:
name_list
=
name
.
split
(
" "
)
len_name
=
len
(
name_list
)
result
[
0
]
=
name_list
[
0
]
if
len_name
>
1
:
result
[
1
]
=
name_list
[
1
]
if
len_name
==
3
:
result
[
1
]
=
result
[
1
]
+
' '
+
name_list
[
2
]
result
[
1
]
=
" "
.
join
(
name_list
[
1
:])
else
:
result
[
0
]
=
name
return
result
def
name_cointains_title
(
full_name
):
full_name_dotless
=
full_name
.
replace
(
"."
,
" "
).
lower
()
titles
=
[
'dr'
,
'prof'
,
'mr'
,
'mrs'
,
'ms'
,
'msc'
,
'mga'
,
'mph'
,
'sc'
,
'drph'
,
'rndr'
,
'phd'
,
'ph'
,
'msi'
]
name_list
=
full_name_dotless
.
split
(
" "
)
res
=
[
ele
for
ele
in
titles
if
(
ele
in
name_list
)]
return
bool
(
res
)
def
get_lines_from_string
(
a_string
):
result
=
[]
stripped
=
a_string
.
strip
()
...
...
@@ -142,4 +147,4 @@ def save_exported_datasets_to_file(exported_dataset, output_file):
"$schema"
:
"https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/elu-dataset.json"
,
"items"
:
items
}
return
json
.
dump
(
obj
,
output_file
,
indent
=
4
)
return
json
.
dump
(
obj
,
output_file
,
ensure_ascii
=
False
,
indent
=
4
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment