Commit 0ac6a330 authored by Pinar Alper's avatar Pinar Alper
Browse files

Merge branch '13-remove-old-schema-tests' into 'master'

Resolve "Remove old schema tests"

Closes #13

See merge request pinar.alper/metadata-tools!16
parents 8d92b2ff 062c8ae4
import json
import os
from pathlib import Path
......@@ -7,20 +6,45 @@ from unittest import TestCase
from metadata_tools.importxls.export_utils import save_exported_datasets_to_file
from metadata_tools.importxls.dish_xls_exporter import DishXlsExporter
from jsonschema import validate
from jsonschema.exceptions import SchemaError, ValidationError
from json import loads
class TestDishXlsPartnerExtractor(TestCase):
def test_dish_export(self):
def process_excel_file(fname):
try:
full_file_path = os.path.join(dirName, fname)
dataset_dict = exporter.export_submission(full_file_path)
with open(Path(full_file_path).stem + ".json", 'w') as outfile:
save_exported_datasets_to_file(dataset_dict, outfile)
except ValueError:
print("Could not import {}".format(full_file_path))
return dataset_dict
def validate_dataset_dict(dataset_dict):
schemaFile = open("metadata_tools/resources/json-schemas/schemas/elu-dataset.json", encoding='utf-8')
schema = loads(schemaFile.read())
try:
validate(dataset_dict, schema)
except ValidationError:
self.fail(f"Validation of '{fname}' failed using schema 'json-schemas/schemas/elu-dataset.json'")
finally:
schemaFile.close()
return
exporter = DishXlsExporter()
#full_file_path = os.path.join(os.path.dirname(__file__), 'resources', 'UL_Extended_Data_Information_Sheet-8.xlsx')
for dirName, subdirList, fileList in os.walk(os.path.join(os.path.dirname(__file__), 'resources')):
for fname in fileList:
for dirName, _, fileList in os.walk(os.path.join(os.path.dirname(__file__), 'resources')):
for fname in fileList:
if fname.lower().endswith('xls') or fname.lower().endswith('xlsx'):
try:
full_file_path = os.path.join(dirName, fname)
dataset_dict = exporter.export_submission(full_file_path)
with open(Path(full_file_path).stem + ".json", 'w') as outfile:
save_exported_datasets_to_file(dataset_dict, outfile)
except ValueError:
print("Could not import {}".format(full_file_path))
# Test export from Excel to JSON
dataset_dict = process_excel_file(fname)
# Validate resulting JSON file
validate_dataset_dict(dataset_dict)
return
[
{
"source": "P12_Exhibit_1_UL_DISH-8_COVIRNA_01_MS_29.10.2020_CK.xlsx",
"contacts": [
{
"first_name": "John.",
"last_name": "Doe.",
"role": "Principal_Investigator",
"email": "john.doe@imise.uni-leipzig.de",
"affiliations": [
"Universit\u00e4t Leipzig"
]
},
{
"first_name": "Jane",
"last_name": "Doe",
"role": "Legal_Representative",
"email": "jane.doe@imise.uni-leipzig.de",
"affiliations": [
"Universit\u00e4t Leipzig"
]
},
{
"first_name": "Ronal",
"last_name": "SPEER",
"role": "Data_Protection_Officer",
"email": "alice.white@imise.uni-leipzig.de",
"affiliations": [
"Universit\u00e4t Leipzig"
]
}
],
"data_declarations": [
{
"title": "PROVID-PROGRESS",
"source_study": "PROVID cohort of hospitalized COVID-19 patients",
"data_types": [
"Clinical_data",
"Lifestyle_data",
"Other_Phenotype_data",
"Other",
"Samples"
],
"data_type_notes": "Data from laboratory measurements concerning health, Biomaterials including PAXgene for RNA extraction Notes on samples: PAXgene blood",
"de_identification": "pseudonymized",
"consent_status": "heterogeneous",
"consent_status_description": "n/a",
"subject_categories": "cases",
"has_special_subjects": true,
"special_subject_notes": "Individuals not able to consent at the time of enrolment due to their health status. Consent will be obtained from legal representative.",
"storage_end_date": "2035-12-31",
"use_restrictions": [
{
"use_class": "RS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Research on airway infections, their treatment and consequences"
},
{
"use_class": "GS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Patients can individually decide if they want to allow data sharing to parties not covered by the European GDPR. If consented to by the patient, the third party will have to be bound by contract to the standards layed out in the GDPR"
},
{
"use_class": "IS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Have to follow research goals regarding airway infections, their treatment and consequences"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Data have to be deleted after 15 years except if required to be stored for reasons of reproducable research or ongoing new add-on studies the patient consented to"
},
{
"use_class": "PS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Use of DATA is limited to the RESEARCH PROJECT.\nIs the use of data limited to the project named in the Submission sheet? PROJECT: COVIRNA"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "2035-12-31"
},
{
"use_class": "PUB",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "The cohort study must be cited. Authors from the cohort project have to be considered according to their contribution."
},
{
"use_class": "IP",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "See COVIRNA consortium agreement"
}
],
"access_procedure": "Research goal, background, planned analyses and planned publication need to be stated. Adherance to GDPR regulations must be confirmed. DAC will decide on adherance to informed consent and competing interests. ",
"access_category": "controlled-access"
},
{
"title": "PROVID-CAPNETZ",
"source_study": "PROVID cohort of hospitalized COVID-19 patients",
"data_types": [
"Clinical_data",
"Samples"
],
"data_type_notes": "Data from laboratory measurements concerning health, Biomaterials including PAXgene for RNA extraction Notes on samples: PAXgene blood",
"de_identification": "pseudonymized",
"consent_status": "heterogeneous",
"consent_status_description": "n/a",
"subject_categories": "cases",
"has_special_subjects": true,
"special_subject_notes": "Individuals not able to consent at the time of enrolment due to their health status. Consent will be obtained from legal representative.",
"storage_end_date": "2035-12-31",
"use_restrictions": [
{
"use_class": "RS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Research on airwy infections, their treatment and consequences"
},
{
"use_class": "GS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Patients can individually decide if they want to allow data sharing to parties not covered by the European GDPR. If consented to by the patient, the third party will have to be bound by contract to the standards layed out in the GDPR"
},
{
"use_class": "IS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Have to follow research goals regarding airway infections, their treatment and consequences"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Data have to be deleted after 15 years except if required to be stored for reasons of reproducable research or ongoing new add-on studies the patient consented to"
},
{
"use_class": "PS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Use of DATA is limited to the RESEARCH PROJECT.\nIs the use of data limited to the project named in the Submission sheet? PROJECT: COVIRNA"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "2035-12-31"
},
{
"use_class": "PUB",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "The cohort study must be cited. Authors from the cohort project have to be considered according to their contribution."
},
{
"use_class": "IP",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "See COVIRNA consortium agreement"
}
],
"access_procedure": "Research goal, background, planned analyses and planned publication need to be stated. Adherance to GDPR regulations must be confirmed. DAC will decide on adherance to informed consent. ",
"access_category": "controlled-access"
}
],
"studies": [
{
"name": "PROVID-PROGRESS",
"description": "The PROVID-PROGRESS Study is a prospective, longitudinal, observational, multi-center study of hospitalized Patients positive for SARS-CoV-2 with the aim to identify clinical and molecular markers of disease progression and new therapies. U07.1! , COVID-19, SARS-CoV-2 infection, prospective, longitudinal, observational, multi-center, medical history, clinical characteristics, laboratory parameters, quality of life, mechanical ventilation, medication, treatments",
"has_ethics_approval": false,
"ethics_approval_notes": "see Remarks",
"url": "https://www.gesundheitsforschung-bmbf.de/de/provid-klinische-molekulare-und-funktionelle-biomarker-fur-prognose-pathomechanismen-und-11690.php",
"contacts": [
{
"first_name": "Jim",
"last_name": "Doe",
"role": "Principal_Investigator",
"email": "jim.doe@charite.de",
"affiliations": [
"Charit\u00e9 - Universit\u00e4tsmedizin Berlin"
]
}
]
},
{
"name": "PROVID-CAPNETZ",
"description": "The PROVID-CAPNETZ Study is a prospective, longitudinal, observational, multi-center study of hospitalized Patients positive for SARS-CoV-2 with the aim to identify clinical and molecular markers of disease progression and new therapies. U07.1! , COVID-19, SARS-CoV-2 infection, prospective, longitudinal, observational, multi-center, medical history, clinical characteristics, laboratory parameters, quality of life, mechanical ventilation, medication, treatments",
"has_ethics_approval": true,
"ethics_approval_notes": "301-2008",
"url": "https://www.gesundheitsforschung-bmbf.de/de/provid-klinische-molekulare-und-funktionelle-biomarker-fur-prognose-pathomechanismen-und-11690.php",
"contacts": [
{
"first_name": "Jack",
"last_name": "Black",
"role": "Other",
"email": "jack.black@mh-hannover.de",
"affiliations": [
"MHH Hannover"
]
}
]
}
],
"legal_bases": [
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, the subsequent processing of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-PROGRESS"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, subsequent processing of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, the subsequent processing of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PROVID-CAPNETZ"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, subsequent processing of special category (sensitive) personal data?*"
}
],
"name": "PROVID_2020_01",
"project": "COVIRNA"
}
]
[
{
"source": "P1_LIH_Exhibit_1_UL_DISH-8_COVIRNA.xlsx",
"contacts": [
{
"first_name": "John",
"last_name": "Doe",
"role": "Researcher",
"email": "john.doe@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
},
{
"first_name": "Alice",
"last_name": "White",
"role": "Legal_Representative",
"email": "alice.white@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
},
{
"first_name": "Jane",
"last_name": "Doe",
"role": "Data_Protection_Officer",
"email": "jane.doe@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
},
{
"first_name": "Jack",
"last_name": "Doe",
"role": "Other",
"email": "jack.doe@lih.lu ",
"affiliations": [
"Luxembourg Institute of Health"
]
}
],
"data_declarations": [
{
"title": "PREDI-COVID participants` samples and data for COVIRNA",
"source_study": "PREDI-COVID ",
"data_types": [
"Clinical_data",
"Other",
"Samples"
],
"data_type_notes": "Clinical data (ISARIC protocol CRF data), baseline and follow-up data Notes on samples: From each predi-COVID participant: 1 aliquot of 400microL EDTA-plasma and 1 whole blood sample collected in PAXgene tube",
"de_identification": "pseudonymized",
"consent_status": "heterogeneous",
"consent_status_description": "Specific agreement is sought on the use of clinical data and data collected through the CoLive digital application.\nClinical data is dissociated from sample collection. Specific agreement is sough on the sample and/or data collection by research areas (virology and/or other health domain research), sharing with non-EU countries, secondary use of samples and incidental findings.",
"subject_categories": "cases",
"has_special_subjects": true,
"special_subject_notes": "In exceptional cases, when the subject is presented with acute conditions and unable to receive and understand the information about the study, the Informed consent can be first requested from the next relative and from the subject as soon as this becomes possible.",
"storage_end_date": "",
"use_restrictions": [
{
"use_class": "RS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "biomedical research"
},
{
"use_class": "GS[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "In the ICF, the participant is asked to agree/or not on the transfer of samples in non-EU countries. Only participant who have answered \"yes\" will be included. "
},
{
"use_class": "IS",
"use_restriction_rule": "NO_CONSTRAINTS",
"use_class_note": "\u2026"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Encoded data will be kept for 15 years after the end of the study. However, 5 years after the end of the study, nominative data and the correspondence table between the identifying data and the study number will be deleted."
},
{
"use_class": "PS",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "Use of DATA is limited to the RESEARCH PROJECT.\nIs the use of data limited to the project named in the Submission sheet? PROJECT: COVIRNA"
},
{
"use_class": "TS-[XX]",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": ""
},
{
"use_class": "PUB",
"use_restriction_rule": "CONSTRAINTS",
"use_class_note": "The Predi-COVID study team will be acknowledged in all communications."
},
{
"use_class": "IP",
"use_restriction_rule": "NO_CONSTRAINTS",
"use_class_note": "\u2026."
}
],
"access_procedure": "Described in the DSA",
"access_category": "controlled-access"
}
],
"studies": [
{
"name": "PREDI-COVID",
"description": "Luxembourg cohort of positive patients for COVID-19: a stratification study to predict patient prognosis The study aims at identifying factors associated with the COVID-19 disease severity. COVID-19 patients with severity criteria will be compared to patients with mild disease managed at home.\nA deep phenotyping related to the symptoms of the disease as well as biosampling allowing for laboratory-based and computational analytics will be performed.",
"has_ethics_approval": true,
"ethics_approval_notes": "202003/07",
"url": "https://researchluxembourg.lu/covid-19-taskforce/predi-covid/",
"contacts": null
}
],
"legal_bases": [
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Standard"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, the subsequent processing of standard (non-sensitive) personal data?*"
},
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the collection of special category (sensitive) personal data?*"
},
{
"data_declarations": [
"PREDI-COVID participants` samples and data for COVIRNA"
],
"personal_data_codes": [
"Special"
],
"legal_basis_codes": [
"6.1(a)"
],
"legal_basis_notes": "What is the legal basis according to Art. 6.1 GDPR for the sharing and, where applicable, subsequent processing of special category (sensitive) personal data?*"
}
],
"name": "LIH Data Information for COVIRNA",
"project": "COVIRNA"
}
]
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"$schema": "https://raw.githubusercontent.com/elixir-luxembourg/json-schemas/master/schemas/elu-institution.json",
"items": [
{
"pk": "12",
"name": "23andMe Company",
"elu_accession": "ELU_I_12",
"acronym": null,
"is_clinical": false,
"geo_category": "Non_EU",
"sector_category": "PRIVATE_P",
"address": null,
"country_code": null,
"source": "example.com"
},
{
"pk": "18",
"name": "Alstem LLC",
"elu_accession": "ELU_I_18",
"acronym": null,
"is_clinical": false,
"geo_category": "Non_EU",
"sector_category": "PRIVATE_P",
"address": null,
"country_code": null,
"source": "example.com"
},
{
"pk": "84",
"name": "Alzheimer's Disease Neuroimaging Initiative",
"elu_accession": "ELU_I_84",
"acronym": "ADNI",
"is_clinical": false,
"geo_category": "Non_EU",
"sector_category": "PUBLIC",
"address": null,
"country_code": null,
"source": "example.com"
},
{
"pk": "5",
"name": "August Pi i Sunyer Biomedical Research Institute",
"elu_accession": "ELU_I_5",
"acronym": "IDIBAPS",