Commit 0ac6a330 authored by Pinar Alper's avatar Pinar Alper
Browse files

Merge branch '13-remove-old-schema-tests' into 'master'

Resolve "Remove old schema tests"

Closes #13

See merge request pinar.alper/metadata-tools!16
parents 8d92b2ff 062c8ae4
import json
import os import os
from pathlib import Path from pathlib import Path
...@@ -7,20 +6,45 @@ from unittest import TestCase ...@@ -7,20 +6,45 @@ from unittest import TestCase
from metadata_tools.importxls.export_utils import save_exported_datasets_to_file from metadata_tools.importxls.export_utils import save_exported_datasets_to_file
from metadata_tools.importxls.dish_xls_exporter import DishXlsExporter from metadata_tools.importxls.dish_xls_exporter import DishXlsExporter
from jsonschema import validate
from jsonschema.exceptions import SchemaError, ValidationError
from json import loads
class TestDishXlsPartnerExtractor(TestCase): class TestDishXlsPartnerExtractor(TestCase):
def test_dish_export(self): def test_dish_export(self):
def process_excel_file(fname):
try:
full_file_path = os.path.join(dirName, fname)
dataset_dict = exporter.export_submission(full_file_path)
with open(Path(full_file_path).stem + ".json", 'w') as outfile:
save_exported_datasets_to_file(dataset_dict, outfile)
except ValueError:
print("Could not import {}".format(full_file_path))
return dataset_dict
def validate_dataset_dict(dataset_dict):
schemaFile = open("metadata_tools/resources/json-schemas/schemas/elu-dataset.json", encoding='utf-8')
schema = loads(schemaFile.read())
try:
validate(dataset_dict, schema)
except ValidationError:
self.fail(f"Validation of '{fname}' failed using schema 'json-schemas/schemas/elu-dataset.json'")
finally:
schemaFile.close()
return
exporter = DishXlsExporter() exporter = DishXlsExporter()
#full_file_path = os.path.join(os.path.dirname(__file__), 'resources', 'UL_Extended_Data_Information_Sheet-8.xlsx')
for dirName, subdirList, fileList in os.walk(os.path.join(os.path.dirname(__file__), 'resources')): for dirName, _, fileList in os.walk(os.path.join(os.path.dirname(__file__), 'resources')):
for fname in fileList: for fname in fileList:
if fname.lower().endswith('xls') or fname.lower().endswith('xlsx'): if fname.lower().endswith('xls') or fname.lower().endswith('xlsx'):
try: # Test export from Excel to JSON
full_file_path = os.path.join(dirName, fname) dataset_dict = process_excel_file(fname)
dataset_dict = exporter.export_submission(full_file_path)
with open(Path(full_file_path).stem + ".json", 'w') as outfile: # Validate resulting JSON file
save_exported_datasets_to_file(dataset_dict, outfile) validate_dataset_dict(dataset_dict)
except ValueError:
print("Could not import {}".format(full_file_path))
return return
[
{
"source": "P12_Exhibit_1_UL_DISH-8_COVIRNA_01_MS_29.10.2020_CK.xlsx",
"contacts": [
{
"first_name": "John.",
"last_name": "Doe.",
"role": "Principal_Investigator",
"email": "john.doe@imise.uni-leipzig.de",
"affiliations": [
"Universit\u00e4t Leipzig"
]
},
{
"first_name": "Jane",
"last_name": "Doe",
"role": "Legal_Representative",
"email": "jane.doe@imise.uni-leipzig.de",
"affiliations": [
"Universit\u00e4t Leipzig"
]
},
{
"first_name": "Ronal",
"last_name": "SPEER",
"role": "Data_Protection_Officer",
"email": "alice.white@imise.uni-leipzig.de",
"affiliations": [
"Universit\u00e4t Leipzig"
]
}
],
"data_declarations": [
{
"title": "PROVID-PROGRESS",
"source_study": "PROVID cohort of hospitalized COVID-19 patients",
"data_types": [
"Clinical_data",
"Lifestyle_data",
"Other_Phenotype_data",
"Other",
"Samples"
],
"data_type_notes": "Data from laboratory measurements concerning health, Biomaterials including PAXgene for RNA extraction Notes on samples: PAXgene blood",
"de_identification": "pseudonymized",
"consent_status": "heterogeneous",
"consent_status_description": "n/a",
"subject_categories": "cases",
"has_special_subjects": true,
"special_subject_notes": "Individuals not able to consent at the time of enrolment due to their health status. Consent will be obtained from legal representative.",
"storage_end_date": "2035-12-31",
"use_restrictions": [
{
"use_class": "RS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Research on airway infections, their treatment and consequences"
},
{
"use_class": "GS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Patients can individually decide if they want to allow data sharing to parties not covered by the European GDPR. If consented to by the patient, the third party will have to be bound by contract to the standards layed out in the GDPR"
},
{
"use_class": "IS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Have to follow research goals regarding airway infections, their treatment and consequences"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Data have to be deleted after 15 years except if required to be stored for reasons of reproducable research or ongoing new add-on studies the patient consented to"
},
{
"use_class": "PS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Use of DATA is limited to the RESEARCH PROJECT.\nIs the use of data limited to the project named in the Submission sheet? PROJECT: COVIRNA"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "2035-12-31"
},
{
"use_class": "PUB",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "The cohort study must be cited. Authors from the cohort project have to be considered according to their contribution."
},
{
"use_class": "IP",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "See COVIRNA consortium agreement"
}
],
"access_procedure": "Research goal, background, planned analyses and planned publication need to be stated. Adherance to GDPR regulations must be confirmed. DAC will decide on adherance to informed consent and competing interests. ",
"access_category": "controlled-access"
},
{
"title": "PROVID-CAPNETZ",
"source_study": "PROVID cohort of hospitalized COVID-19 patients",
"data_types": [
"Clinical_data",
"Samples"
],
"data_type_notes": "Data from laboratory measurements concerning health, Biomaterials including PAXgene for RNA extraction Notes on samples: PAXgene blood",
"de_identification": "pseudonymized",
"consent_status": "heterogeneous",
"consent_status_description": "n/a",
"subject_categories": "cases",
"has_special_subjects": true,
"special_subject_notes": "Individuals not able to consent at the time of enrolment due to their health status. Consent will be obtained from legal representative.",
"storage_end_date": "2035-12-31",
"use_restrictions": [
{
"use_class": "RS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Research on airwy infections, their treatment and consequences"
},
{
"use_class": "GS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Patients can individually decide if they want to allow data sharing to parties not covered by the European GDPR. If consented to by the patient, the third party will have to be bound by contract to the standards layed out in the GDPR"
},
{
"use_class": "IS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Have to follow research goals regarding airway infections, their treatment and consequences"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Data have to be deleted after 15 years except if required to be stored for reasons of reproducable research or ongoing new add-on studies the patient consented to"
},
{
"use_class": "PS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Use of DATA is limited to the RESEARCH PROJECT.\nIs the use of data limited to the project named in the Submission sheet? PROJECT: COVIRNA"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "2035-12-31"
},
{
"use_class": "PUB",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "The cohort study must be cited. Authors from the cohort project have to be considered according to their contribution."
},
{
"use_class": "IP",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "See COVIRNA consortium agreement"
}
],
"access_procedure": "Research goal, background, planned analyses and planned publication need to be stated. Adherance to GDPR regulations must be confirmed. DAC will decide on adherance to informed consent. ",
"access_category": "controlled-access"
}
],
"studies": [
{
"name": "PROVID-PROGRESS",
"description": "The PROVID-PROGRESS Study is a prospective, longitudinal, observational, multi-center study of hospitalized Patients positive for SARS-CoV-2 with the aim to identify clinical and molecular markers of disease progression and new therapies. U07.1! , COVID-19, SARS-CoV-2 infection, prospective, longitudinal, observational, multi-center, medical history, clinical characteristics, laboratory parameters, quality of life, mechanical ventilation, medication, treatments",
"has_ethics_approval": false,
"ethics_approval_notes": "see Remarks",
"url": "https://www.gesundheitsforschung-bmbf.de/de/provid-klinische-molekulare-und-funktionelle-biomarker-fur-prognose-pathomechanismen-und-11690.php",
"contacts": [
{
"first_name": "Jim",
"last_name": "Doe",
"role": "Principal_Investigator",
"email": "jim.doe@charite.de",
"affiliations": [
"Charit\u00e9 - Universit\u00e4tsmedizin Berlin"
]
}
]
},
{
"name": "PROVID-CAPNETZ",
"description": "The PROVID-CAPNETZ Study is a prospective, longitudinal, observational, multi-center study of hospitalized Patients positive for SARS-CoV-2 with the aim to identify clinical and molecular markers of disease progression and new therapies. U07.1! , COVID-19, SARS-CoV-2 infection, prospective, longitudinal, observational, multi-center, medical history, clinical characteristics, laboratory parameters, quality of life, mechanical ventilation, medication, treatments",
"has_ethics_approval": true,
"ethics_approval_notes": "301-2008",
"url": "https://www.gesundheitsforschung-bmbf.de/de/provid-klinische-molekulare-und-funktionelle-biomarker-fur-prognose-pathomechanismen-und-11690.php",
"contacts": [
{
"first_name": "Jack",
"last_name": "Black",
"role": "Other",
"email": "jack.black@mh-hannover.de",
"affiliations": [
"MHH Hannover"
]
}
]
}
],
"legal_bases": [
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, the subsequent processing of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, subsequent processing of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, the subsequent processing of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, subsequent processing of special category (sensitive) personal data?*"
}
],
"name": "PROVID_2020_01",
"project": "COVIRNA"
}
]
[
{
"source": "P1_LIH_Exhibit_1_UL_DISH-8_COVIRNA.xlsx",
"contacts": [
{
"first_name": "John",
"last_name": "Doe",
"role": "Researcher",
"email": "john.doe@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
},
{
"first_name": "Alice",
"last_name": "White",
"role": "Legal_Representative",
"email": "alice.white@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
},
{
"first_name": "Jane",
"last_name": "Doe",
"role": "Data_Protection_Officer",
"email": "jane.doe@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
},
{
"first_name": "Jack",
"last_name": "Doe",
"role": "Other",
"email": "jack.doe@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
}
],
"data_declarations": [
{
"title": "PREDI-COVID participants` samples and data for COVIRNA",
"source_study": "PREDI-COVID ",
"data_types": [
"Clinical_data",
"Other",
"Samples"
],
"data_type_notes": "Clinical data (ISARIC protocol CRF data), baseline and follow-up data Notes on samples: From each predi-COVID participant: 1 aliquot of 400microL EDTA-plasma and 1 whole blood sample collected in PAXgene tube",
"de_identification": "pseudonymized",
"consent_status": "heterogeneous",
"consent_status_description": "Specific agreement is sought on the use of clinical data and data collected through the CoLive digital application.\nClinical data is dissociated from sample collection. Specific agreement is sough on the sample and/or data collection by research areas (virology and/or other health domain research), sharing with non-EU countries, secondary use of samples and incidental findings.",
"subject_categories": "cases",
"has_special_subjects": true,
"special_subject_notes": "In exceptional cases, when the subject is presented with acute conditions and unable to receive and understand the information about the study, the Informed consent can be first requested from the next relative and from the subject as soon as this becomes possible.",
"storage_end_date": "",
"use_restrictions": [
{
"use_class": "RS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "biomedical research"
},
{
"use_class": "GS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "In the ICF, the participant is asked to agree/or not on the transfer of samples in non-EU countries. Only participant who have answered \"yes\" will be included. "
},
{
"use_class": "IS",
"use_restriction_rule": "NO_CONSTRAINTS",
"use_class_note": "\u2026"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Encoded data will be kept for 15 years after the end of the study. However, 5 years after the end of the study, nominative data and the correspondence table between the identifying data and the study number will be deleted."
},
{
"use_class": "PS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Use of DATA is limited to the RESEARCH PROJECT.\nIs the use of data limited to the project named in the Submission sheet? PROJECT: COVIRNA"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": ""
},
{
"use_class": "PUB",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "The Predi-COVID study team will be acknowledged in all communications."
},
{
"use_class": "IP",
"use_restriction_rule": "NO_CONSTRAINTS",
"use_class_note": "\u2026."
}
],
"access_procedure": "Described in the DSA",
"access_category": "controlled-access"
}
],
"studies": [
{
"name": "PREDI-COVID",
"description": "Luxembourg cohort of positive patients for COVID-19: a stratification study to predict patient prognosis The study aims at identifying factors associated with the COVID-19 disease severity. COVID-19 patients with severity criteria will be compared to patients with mild disease managed at home.\nA deep phenotyping related to the symptoms of the disease as well as biosampling allowing for laboratory-based and computational analytics will be performed.",
"has_ethics_approval": true,
"ethics_approval_notes": "202003/07",
"url": "https://researchluxembourg.lu/covid-19-taskforce/predi-covid/",
"contacts": null
}
],
"legal_bases": [
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, the subsequent processing of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, subsequent processing of special category (sensitive) personal data?*"
}
],
"name": "LIH Data Information for COVIRNA",
"project": "COVIRNA"
}
]
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from unittest import TestCase
from json import loads
from jsonschema import validate
from jsonschema.exceptions import SchemaError
import os
# Fixtures
dirname = os.path.dirname(__file__)
testdata_dir = os.path.abspath(os.path.join(dirname, "data/"))
schema_dir = os.path.abspath(os.path.join(dirname,"../../metadata_tools/resources/json-schemas/schemas"))
schema_testdata_filename_map = [
('elu-dataset.json', 'datasets.json'),
#('elu-project.json', 'projects.json'),
('elu-institution.json', 'partners.json')
]
class TestJSONSchemas(TestCase):
def test_validation(self):
for schema_filename, dataset_filename in schema_testdata_filename_map:
schema_filepath = os.path.abspath(os.path.join(schema_dir, schema_filename))
schemaFile = open(os.path.join(schema_dir, schema_filename), encoding='utf-8')
data_filepath = os.path.abspath(os.path.join(testdata_dir, dataset_filename))
dataFile = open(data_filepath, encoding='utf-8')
schema = loads(schemaFile.read())
data = loads(dataFile.read())['items']
for item in data:
try:
validate(item, schema)
except SchemaError:
self.fail(f"JSONSchema {schema_filepath} is not valid - SchemaError.")