Commit 23e16ed0 authored by Pinar Alper's avatar Pinar Alper
Browse files

Merge branch 'updates-to-exporter' into 'master'

Updates to exporter

See merge request pinar.alper/metadata-tools!10
parents 3ab1ee8b 00b7b6b8
......@@ -60,7 +60,7 @@ for fname in fileList:
fname_json = pathlib.Path(fname).stem + ".json"
fname_out = os.path.join(args.output_dir, fname_json)
with open(fname_out, 'w') as outfile:
with open(fname_out, 'w', encoding='utf-8') as outfile:
save_exported_datasets_to_file(dataset_dict, outfile)
counter += 1
......
......@@ -67,8 +67,8 @@ class DishXlsExporter:
logging.info('Processing sheet ----> {}'.format(book.sheet_names()[idx]))
if is_study(sheet):
cohort_dict = {'name': sheet[1, 1],
'description' : sheet[2, 1] + ' ' + sheet[6, 1],
cohort_dict = {'name': sheet[1, 1].strip(),
'description' : sheet[2, 1] + ' ' + sheet[6, 1] + ' ' + sheet[16,1],
'has_ethics_approval' : process_yes_no_answer(sheet[4, 1]),
"ethics_approval_notes": sheet[5, 1],
"url": sheet[3, 1],
......@@ -81,7 +81,7 @@ class DishXlsExporter:
}
if sheet[12, 1] and sheet[15, 1]:
cohort_dict["contacts"] = cohort_dict["contacts"].append({"first_name": get_names_from_string(sheet[12,1])[0],
cohort_dict["contacts"].append({"first_name": get_names_from_string(sheet[12,1])[0],
"last_name": get_names_from_string(sheet[12,1])[1],
"role": sheet[15,1],
"email":sheet[13,1],
......@@ -90,14 +90,17 @@ class DishXlsExporter:
dataset_dict["studies"].append(cohort_dict)
elif is_data(sheet):
datadec_dict = {'title' : sheet[1, 1],
'source_study' : sheet[2, 1],
datadec_dict = {'title' : sheet[1, 1].strip(),
'source_study' : sheet[2, 1].strip(),
"data_types":[]}
datadec_dict["data_type_notes"] = sheet[7, 1]
data_type_info = self.process_data_types(get_value_list_from_row(sheet, 6))
datadec_dict["data_types"].extend(data_type_info[0])
datadec_dict["data_type_notes"] = datadec_dict["data_type_notes"] +" "+ data_type_info[1]+ " Notes on samples: " + sheet[10, 1]
if data_type_info[1]:
datadec_dict["data_type_notes"] += " " + data_type_info[1]
if sheet[10, 1]:
datadec_dict["data_type_notes"] += " Notes on samples: " + sheet[10, 1]
#if it involves samples add this as a datatype
if process_yes_no_answer(sheet[9, 1]):
......@@ -263,31 +266,31 @@ class DishXlsExporter:
dataset_dict["data_declarations"].append(datadec_dict)
elif is_submission(sheet):
dataset_dict["name"] = sheet[2, 1]
dataset_dict["project"] = sheet[5, 1]
dataset_dict["name"] = sheet[2, 1].strip()
dataset_dict["project"] = sheet[5, 1].strip()
dataset_dict["contacts"].extend([{"first_name": get_names_from_string(sheet[9, 1])[0],
"last_name":get_names_from_string(sheet[9, 1])[1],
"role": sheet[11,1],
"email":sheet[10,1],
"email":sheet[10,1].strip(),
"affiliations": [self.process_institution(sheet[7,1])]
},
{"first_name": get_names_from_string(sheet[12, 1])[0],
"last_name":get_names_from_string(sheet[12, 1])[1],
"role": "Legal_Representative",
"email":sheet[10,1],
"email":sheet[13,1].strip(),
"affiliations": [self.process_institution(sheet[7,1])]
},
{"first_name": get_names_from_string(sheet[14, 1])[0],
"last_name":get_names_from_string(sheet[14, 1])[1],
"role": "Data_Protection_Officer",
"email":sheet[10,1],
"email":sheet[15,1].strip(),
"affiliations": [self.process_institution(sheet[7,1])]
}])
if sheet[16, 1] and sheet[18, 1]:
dataset_dict["contacts"].append({"first_name": get_names_from_string(sheet[14, 1])[0],
"last_name": get_names_from_string(sheet[14, 1])[1],
dataset_dict["contacts"].append({"first_name": get_names_from_string(sheet[16, 1])[0],
"last_name": get_names_from_string(sheet[16, 1])[1],
"role": sheet[18,1],
"email": sheet[10,1],
"email": sheet[17,1].strip(),
"affiliations": [self.process_institution(sheet[7,1])]
})
......
......@@ -81,23 +81,28 @@ def process_possible_date(possible_date):
def get_names_from_string(full_name):
result = ['', '']
name = full_name.strip()
if name_cointains_title(name):
logging.error(f'Name contains titles: {name}')
if name.endswith(',') or name.endswith(','):
name = name[:-1]
if name is not None:
if " " in name:
name_list = name.split(" ")
len_name = len(name_list)
result[0] = name_list[0]
if len_name > 1:
result[1] = name_list[1]
if len_name == 3:
result[1] = result[1] + ' ' + name_list[2]
result[1] = " ".join(name_list[1:])
else:
result[0] = name
return result
def name_cointains_title(full_name):
full_name_dotless = full_name.replace(".", " ").lower()
titles = ['dr', 'prof', 'mr', 'mrs', 'ms', 'msc', 'mga', 'mph', 'sc', 'drph', 'rndr', 'phd', 'ph', 'msi']
name_list = full_name_dotless.split(" ")
res = [ele for ele in titles if(ele in name_list)]
return bool(res)
def get_lines_from_string(a_string):
result = []
stripped = a_string.strip()
......@@ -142,4 +147,4 @@ def save_exported_datasets_to_file(exported_dataset, output_file):
"$schema": "https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/elu-dataset.json",
"items": items
}
return json.dump(obj, output_file, indent=4)
return json.dump(obj, output_file, ensure_ascii = False, indent=4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment