Commit 5d2fe642 authored by Pinar Alper's avatar Pinar Alper
Browse files

Merge branch 'new-format-for-export' into 'master'

feat: use new format for export, added datetime to log entries and hyphen in use restrictions

See merge request pinar.alper/metadata-tools!7
parents ac7535c6 a8bfe5de
...@@ -3,3 +3,6 @@ __pycache__/ ...@@ -3,3 +3,6 @@ __pycache__/
# Visual studio code # Visual studio code
.vscode .vscode
project_venv project_venv
env
*.egg-info
*.log
...@@ -8,7 +8,10 @@ from .export_utils import get_partners_from_daisy, process_possible_date, proces ...@@ -8,7 +8,10 @@ from .export_utils import get_partners_from_daisy, process_possible_date, proces
class DishXlsExporter: class DishXlsExporter:
def __init__(self): def __init__(self):
logging.basicConfig(filename='export_dishxls.log', level=logging.DEBUG) logging.basicConfig(filename='export_dishxls.log',
level=logging.DEBUG,
format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
institutions = get_partners_from_daisy() institutions = get_partners_from_daisy()
self.inst_dict = {} self.inst_dict = {}
self.inst_ac_dict = {} self.inst_ac_dict = {}
...@@ -106,7 +109,7 @@ class DishXlsExporter: ...@@ -106,7 +109,7 @@ class DishXlsExporter:
datadec_dict["consent_status_description"] = sheet[33, 1] datadec_dict["consent_status_description"] = sheet[33, 1]
if sheet[20, 1]: if sheet[20, 1]:
datadec_dict['subject_categories'] = sheet[20, 1].replace(' & ', '_and_') datadec_dict['subjects_category'] = sheet[20, 1].replace(' & ', '_and_')
if sheet[12, 1]: if sheet[12, 1]:
lb_code = self.extract_lb_code(sheet[12, 1]) lb_code = self.extract_lb_code(sheet[12, 1])
...@@ -145,19 +148,19 @@ class DishXlsExporter: ...@@ -145,19 +148,19 @@ class DishXlsExporter:
use_restrictions = [] use_restrictions = []
if process_yes_no_dontknow_answer(sheet[24, 1]): if process_yes_no_dontknow_answer(sheet[24, 1]):
use_restrictions.append({'use_class': 'RS[XX]', use_restrictions.append({'use_class': 'RS-[XX]',
'use_restriction_rule': "CONSTRAINTS", 'use_restriction_rule': "CONSTRAINTS",
'use_class_note': sheet[25, 1]}) 'use_class_note': sheet[25, 1]})
elif process_yes_no_dontknow_answer(sheet[24, 1]) is not None: elif process_yes_no_dontknow_answer(sheet[24, 1]) is not None:
use_restrictions.append({'use_class': 'RS[XX]', use_restrictions.append({'use_class': 'RS-[XX]',
'use_restriction_rule': "NO_CONSTRAINTS", 'use_restriction_rule': "NO_CONSTRAINTS",
'use_class_note': sheet[25, 1]}) 'use_class_note': sheet[25, 1]})
if process_yes_no_dontknow_answer(sheet[26, 1]): if process_yes_no_dontknow_answer(sheet[26, 1]):
use_restrictions.append({'use_class': 'GS[XX]', use_restrictions.append({'use_class': 'GS-[XX]',
'use_restriction_rule': "CONSTRAINTS", 'use_restriction_rule': "CONSTRAINTS",
'use_class_note': sheet[27, 1]}) 'use_class_note': sheet[27, 1]})
elif process_yes_no_dontknow_answer(sheet[26, 1]) is not None: elif process_yes_no_dontknow_answer(sheet[26, 1]) is not None:
use_restrictions.append({'use_class': 'GS[XX]', use_restrictions.append({'use_class': 'GS-[XX]',
'use_restriction_rule': "NO_CONSTRAINTS", 'use_restriction_rule': "NO_CONSTRAINTS",
'use_class_note': sheet[27, 1]}) 'use_class_note': sheet[27, 1]})
......
from datetime import datetime
import datetime as dt import datetime as dt
import json import json
import logging
import urllib import urllib
from datetime import datetime
from json import dumps from json import dumps
from urllib.error import HTTPError, URLError
from socket import timeout from socket import timeout
import logging from urllib.error import HTTPError, URLError
def process_yes_no_answer(answer): def process_yes_no_answer(answer):
""" """
...@@ -107,7 +109,6 @@ def get_lines_from_string(a_string): ...@@ -107,7 +109,6 @@ def get_lines_from_string(a_string):
def get_partners_from_daisy(): def get_partners_from_daisy():
entities_json_str = None entities_json_str = None
try: try:
urlEntities = urllib.parse.urljoin('https://daisy.lcsb.uni.lu/api/', 'partners') urlEntities = urllib.parse.urljoin('https://daisy.lcsb.uni.lu/api/', 'partners')
...@@ -130,3 +131,15 @@ def get_partners_from_daisy(): ...@@ -130,3 +131,15 @@ def get_partners_from_daisy():
entities_json_str = dumps('{}') entities_json_str = dumps('{}')
return json.loads(entities_json_str) return json.loads(entities_json_str)
def save_exported_datasets_to_file(exported_dataset, output_file):
if isinstance(exported_dataset, list):
items = exported_dataset
else:
items = [exported_dataset]
obj = {
"$schema": "https://git-r3lab.uni.lu/pinar.alper/metadata-tools/raw/master/metadata_tools/resources/elu-dataset.json",
"items": items
}
return json.dump(obj, output_file, indent=4)
...@@ -145,6 +145,7 @@ ...@@ -145,6 +145,7 @@
"US", "US",
"PS", "PS",
"IS", "IS",
"IP",
"Other" "Other"
] ]
}, },
......
from unittest import TestCase
import os
from metadata_tools.importxls.dish_xls_exporter import DishXlsExporter
import json import json
import os
from pathlib import Path from pathlib import Path
from unittest import TestCase
class TestDishXlsPartnerExtractor(TestCase): from metadata_tools.importxls.export_utils import save_exported_datasets_to_file
from metadata_tools.importxls.dish_xls_exporter import DishXlsExporter
def test_dish_export(self):
class TestDishXlsPartnerExtractor(TestCase):
def test_dish_export(self):
exporter = DishXlsExporter() exporter = DishXlsExporter()
#full_file_path = os.path.join(os.path.dirname(__file__), 'resources', 'UL_Extended_Data_Information_Sheet-8.xlsx') #full_file_path = os.path.join(os.path.dirname(__file__), 'resources', 'UL_Extended_Data_Information_Sheet-8.xlsx')
...@@ -18,7 +20,7 @@ class TestDishXlsPartnerExtractor(TestCase): ...@@ -18,7 +20,7 @@ class TestDishXlsPartnerExtractor(TestCase):
full_file_path = os.path.join(dirName, fname) full_file_path = os.path.join(dirName, fname)
dataset_dict = exporter.export_submission(full_file_path) dataset_dict = exporter.export_submission(full_file_path)
with open(Path(full_file_path).stem + ".json", 'w') as outfile: with open(Path(full_file_path).stem + ".json", 'w') as outfile:
json.dump([dataset_dict], outfile, indent=4) save_exported_datasets_to_file(dataset_dict, outfile)
except ValueError: except ValueError:
print("Could not import {}".format(full_file_path)) print("Could not import {}".format(full_file_path))
return return
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment