Commit 22a748f1 authored by Pinar Alper's avatar Pinar Alper
Browse files

Added initial list of collaborator institutions

parent dc7c3cb8
[
{
"elu_accession": "ELU_I_1",
"institution_name": "Integrated Biobank of Luxembourg (IBBL)",
"geo_category": "National"},
{
"elu_accession": "ELU_I_2",
"institution_name": "European Molecular Biology Laboratory (EMBL)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_3",
"institution_name": "Erasmus Hospital Brussels",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_4",
"institution_name": "Erasmus University Medical Center (Erasmus MC)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_5",
"institution_name": "August Pi i Sunyer Biomedical Research Institute (IDIBAPS)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_6",
"institution_name": "University Hospital of the Saarland (UKS Homburg)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_7",
"institution_name": "University Medical Center Utrecht (UMC Utrecht)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_8",
"institution_name": "University of Tübingen",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_9",
"institution_name": "Centre Hospitalier de Luxembourg (CHL)",
"geo_category": "National"},
{
"elu_accession": "ELU_I_10",
"institution_name": "Charité University Hospital Berlin",
"geo_category": "EU"
{
"elu_accession": "ELU_I_11",
"institution_name": "Cologne Center for Genomics (CCG)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_12",
"institution_name": "23andMe Company",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_13",
"institution_name": "Fraunhofer Institute for Algorithms and Scientific Computing (Fraunhofer SCAI)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_14",
"institution_name": "Karolinska Institute",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_15",
"institution_name": "Boehringer Ingelheim International GmbH",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_16",
"institution_name": "Union Chimique Belge (UCB) Biopharma",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_17",
"institution_name": "Brain \& Spine Institute (ICM)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_18",
"institution_name": "Alstem LLC",
"geo_category": "Non-EU"
{
"elu_accession": "ELU_I_19",
"institution_name": "Baylor College of Medicine",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_20",
"institution_name": "Biomedical Research Foundation Academy Of Athens (BRFAA)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_21",
"institution_name": "Brazilian Institute of Neuroscience and Neurotechnology (BRAINN)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_22",
"institution_name": "Centre Hospitalier Emile Mayrisch (CHEM)",
"geo_category": "National"},
{
"elu_accession": "ELU_I_23",
"institution_name": "Charité – Universitätsmedizin Berlin",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_24",
"institution_name": "Children\'s Hospital of Philadelphia",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_25",
"institution_name": "Cornell University",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_26",
"institution_name": "Corriell Institute for Medical Research",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_27",
"institution_name": "European Bank for induced pluripotent Stem Cells (EBiSC)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_28",
"institution_name": "Duke University",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_29",
"institution_name": "Columbia University",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_30",
"institution_name": "Oxford Parkinson\'s Disease Centre (OPDC)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_31",
"institution_name": "Giannina Gaslini Institute (Gaslini Biobank)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_32",
"institution_name": "Thermo Fisher Scientific",
"geo_category": "Non-EU"
{
"elu_accession": "ELU_I_33",
"institution_name": "Griffith Institute for Drug Discovery (GRIDD)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_34",
"institution_name": "Institute of Ophthalmic Research - Tübingen",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_35",
"institution_name": "Luxembourg Institute of Science and Technology (LIST)",
"geo_category": "National"},
{
"elu_accession": "ELU_I_36",
"institution_name": "Life Sciences Research Unit - University of Luxembourg (LSRU)",
"geo_category": "National"
{
"elu_accession": "ELU_I_37",
"institution_name": "Luxembourg Red Cross",
"geo_category": "National"},
{
"elu_accession": "ELU_I_38",
"institution_name": "Michael J. Fox Foundation for Parkinson\'s Research (MJFF)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_39",
"institution_name": "Maastricht University",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_40",
"institution_name": "Magdeburg University Hospital",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_41",
"institution_name": "Max Planck Society",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_42",
"institution_name": "Max Rubner-Institut (MRI)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_43",
"institution_name": "Mayo Clinic",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_44",
"institution_name": "Medical Research Council Unit The Gambia (MRC Unit The Gambia - LSHTM)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_45",
"institution_name": "Murdoch Children\'s Research Institute (MCRI)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_46",
"institution_name": "National Institute on Aging (NIA)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_47",
"institution_name": "National Institutes of Health (NIH)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_48",
"institution_name": "Newcastle University",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_49",
"institution_name": "Norwegian University of Science and Technology (NTNU)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_50",
"institution_name": "Ohio State University Comprehensive Cancer Center (OSUCCC – James)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_51",
"institution_name": "Paracelsus-Elena-Klinik",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_52",
"institution_name": "Royal College of Surgeons - Ireland",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_53",
"institution_name": "Sage Bionetworks",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_54",
"institution_name": "Betterhumans Inc. Supercentenarians Research Study",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_55",
"institution_name": "Technical University Dresden (TU Dresden)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_56",
"institution_name": "University Hospital of Würzburg",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_57",
"institution_name": "University Hospital Bonn",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_58",
"institution_name": "University College London (UCL)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_59",
"institution_name": "University Hospital Cologne",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_60",
"institution_name": "University of Luxembourg",
"geo_category": "National"},
{
"elu_accession": "ELU_I_61",
"institution_name": "University Hospital Tübingen",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_62",
"institution_name": "University of Lübeck",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_63",
"institution_name": "University Medical Center Göttingen",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_64",
"institution_name": "Philipps University - Marburg",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_65",
"institution_name": "University of Trier",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_66",
"institution_name": "University Hospital Kiel (UKSH)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_67",
"institution_name": "University of Adelide",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_68",
"institution_name": "University of Eastern Finland (UEF)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_69",
"institution_name": "University of Melbourne",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_70",
"institution_name": "University of Vienna",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_71",
"institution_name": "University of Oslo (UiO)",
"geo_category": "Non-EU"},
{
"elu_accession": "ELU_I_72",
"institution_name": "Uppsala University",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_73",
"institution_name": "University Hospital Salzburg (SALK)",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_74",
"institution_name": "Wellcome Sanger Institute",
"geo_category": "EU"},
{
"elu_accession": "ELU_I_75",
"institution_name": "Zithaklinik - Hôpitaux Robert Schuman",
"geo_category": "National"
{
"elu_accession": "ELU_I_76",
"institution_name": "Broad Institute",
"geo_category": "Non-EU"
}
]
\ No newline at end of file
......@@ -12,7 +12,9 @@ from tests.importxls.test_utils import get_value_list_from_row, process_data_typ
class TestProjectsParser(TestCase):
def test_export_from_collaborator(self):
h = hashlib.md5()
count = 0
# dataset_count = 0
# custodian_count = {}
# datatype_count={}
for dirName, subdirList, fileList in os.walk(SHEETS_FOLDER):
for fname in fileList:
if fname.startswith('from-collaborator'):
......@@ -24,18 +26,34 @@ class TestProjectsParser(TestCase):
idx = 1
#print('----> {}'.format(full_file_path))
while idx < book.number_of_sheets():
count+=1
# dataset_count+=1
sheet = book.sheet_by_index(idx)
dataset_data = {}
dataset_data['source_type'] = 'From_Collaborator'
dataset_data['submission_id'] = submission_id
dataset_data['local_custodian'] = get_value_list_from_row(sheet, 3)
# for cc in dataset_data['local_custodian']:
# if cc in custodian_count.keys():
# custodian_count[cc] +=1
# else:
# custodian_count[cc] =1
dataset_data['title'] = sheet[4, 2]
if not dataset_data['title']:
print('Missing dataset title ----> {}'.format(full_file_path))
datatype_info = process_data_types(get_value_list_from_row(sheet, 5))
dataset_data['data_types'] = datatype_info[0]
if datatype_info[1]:
dataset_data['data_type_notes'] = datatype_info[1]
# for dd in datatype_info[0]:
# if dd in datatype_count.keys():
# datatype_count[dd] +=1
# else:
# datatype_count[dd] =1
dataset_data['involves_samples'] = process_yes_no_answer(sheet[6, 2])
if sheet[7, 2]:
......@@ -146,4 +164,6 @@ class TestProjectsParser(TestCase):
with open('datasets-{}.json'.format(submission_id), 'w') as outfile:
json.dump(dataset_list, outfile, indent=4)
# print(json.dumps(dataset_list, indent=4))
print('Number of Collab Datasets {} \n'.format(count))
# print('Number of Collab Datasets {} \n'.format(dataset_count))
# print('DataType counts {} \n'.format(datatype_count))
# print('Custodian counts {} \n'.format(custodian_count))
\ No newline at end of file
......@@ -13,7 +13,9 @@ class TestProjectsParser(TestCase):
def test_export_from_repository(self):
h = hashlib.md5()
count = 0
# dataset_count = 0
# custodian_count = {}
# datatype_count={}
for dirName, subdirList, fileList in os.walk(SHEETS_FOLDER):
for fname in fileList:
......@@ -26,12 +28,17 @@ class TestProjectsParser(TestCase):
idx = 1
# print('----> {}'.format(full_file_path))
while idx < book.number_of_sheets():
count+=1
# dataset_count+=1
sheet = book.sheet_by_index(idx)
dataset_data = {}
dataset_data['source_type'] = 'From_Repository'
dataset_data['submission_id'] = submission_id
dataset_data['local_custodian'] = get_value_list_from_row(sheet, 2)
# for cc in dataset_data['local_custodian']:
# if cc in custodian_count.keys():
# custodian_count[cc] +=1
# else:
# custodian_count[cc] =1
dataset_data['source_repository'] = sheet[6, 2]
if sheet[4, 2]:
......@@ -40,11 +47,20 @@ class TestProjectsParser(TestCase):
if sheet[5, 2]:
dataset_data['title'] = sheet[5, 2]
if not dataset_data['title']:
print('Missing dataset title ----> {}'.format(full_file_path))
datatype_info = process_data_types(get_value_list_from_row(sheet, 7))
dataset_data['data_types'] = datatype_info[0]
if datatype_info[1]:
dataset_data['data_type_notes'] = datatype_info[1]
# for dd in datatype_info[0]:
# if dd in datatype_count.keys():
# datatype_count[dd] +=1
# else:
# datatype_count[dd] =1
if sheet[8, 2]:
dataset_data['de_identification'] = sheet[8, 2]
......@@ -113,4 +129,6 @@ class TestProjectsParser(TestCase):
json.dump(dataset_list, outfile, indent=4)
# print(json.dumps(dataset_list, indent=4))
print('Number of Repo Datasets {} \n'.format(count))
# print('Number of Repo Datasets {} \n'.format(dataset_count))
# print('DataType counts {} \n'.format(datatype_count))
# print('Custodian counts {} \n'.format(custodian_count))
......@@ -13,7 +13,9 @@ class TestProjectsParser(TestCase):
def test_export_own_cohort(self):
h = hashlib.md5()
count = 0
# count = 0
# custodian_count = {}
# datatype_count={}
for dirName, subdirList, fileList in os.walk(SHEETS_FOLDER):
for fname in fileList:
......@@ -26,13 +28,20 @@ class TestProjectsParser(TestCase):
idx = 1
print('----> {}'.format(full_file_path))
while idx < book.number_of_sheets():
count+=1
# count+=1
sheet = book.sheet_by_index(idx)
dataset_data = {}
dataset_data['source_type'] = 'Own_Cohort'
dataset_data['submission_id'] = submission_id
dataset_data['local_custodian'] = get_value_list_from_row(sheet, 3)
# for cc in dataset_data['local_custodian']:
# if cc in custodian_count.keys():
# custodian_count[cc] +=1
# else:
# custodian_count[cc] =1
dataset_data['title'] = sheet[4, 2]
if not dataset_data['title']:
print('Missing dataset title ----> {}'.format(full_file_path))
if sheet[5, 2]:
dataset_data['source_project'] = sheet[5, 2]
......@@ -41,6 +50,12 @@ class TestProjectsParser(TestCase):
if datatype_info[1]:
dataset_data['data_type_notes'] = datatype_info[1]
# for dd in datatype_info[0]:
# if dd in datatype_count.keys():
# datatype_count[dd] +=1
# else:
# datatype_count[dd] =1
dataset_data['involves_samples'] = process_yes_no_answer(sheet[7, 2])
if sheet[7, 2]:
......@@ -153,4 +168,6 @@ class TestProjectsParser(TestCase):
json.dump(dataset_list, outfile, indent=4)
# print(json.dumps(dataset_list, indent=4))
print('Number of Own-Cohort Datasets {} \n'.format(count))
\ No newline at end of file
# print('Number of Own-Cohort Datasets {} \n'.format(count))
# print('DataType counts {} \n'.format(datatype_count))
# print('Custodian counts {} \n'.format(custodian_count))
\ No newline at end of file
......@@ -130,19 +130,14 @@ class TestProjectsParser(TestCase):
prs_data['institution'] = 'Luxembourg Center for Systems Biomedicine (LCSB)'
contacts_list.append(prs_data)
prj_data['contacts'] = contacts_list
if HasUniLUERP:
prj_data[
'has_institutional_ethics_approval'] = True if HasUniLUERP == 'Yes' else False
else:
prj_data['has_institutional_ethics_approval'] = False
prj_data['has_institutional_ethics_approval'] = HasUniLUERP
if ERPNotes:
prj_data['institutional_ethics_approval_notes'] = ERPNotes
if HasCNER:
prj_data['has_national_ethics_approval'] = True if HasUniLUERP == 'Yes' else False
else:
pp_data['has_national_ethics_approval'] = False
prj_data['has_national_ethics_approval'] = HasCNER
if CNERNotes:
prj_data['national_ethics_approval_notes'] = CNERNotes
......
......@@ -56,8 +56,8 @@ def collect_prj_info(sheets_folder):
if numprojects > 0:
for row in range(2, 2 + numprojects):
projects.append((sheet[row, 1], full_file_path, sheet[row, 2], sheet[row, 3], sheet[row, 4],
sheet[row, 5], sheet[row, 6], sheet[row, 7], sheet[row, 8], sheet[row, 9],
sheet[row, 10], sheet[row, 11], sheet[row, 12]))
sheet[row, 5], sheet[row, 6], sheet[row, 7], process_yes_no_answer(sheet[row, 8]), sheet[row, 9],
process_yes_no_answer(sheet[row, 10]), sheet[row, 11], sheet[row, 12]))
return projects
def process_possible_date(possible_date):
......@@ -129,7 +129,7 @@ def is_storage_resource(location):
'personal_laptop',
'Owncloud',
'External Storage (e.g. Hard disk, DVD)',
'Other'
'OTHER'
])
if location in predefined_types:
......@@ -175,20 +175,33 @@ def add_storage_locations(storage_dict, locations_list, category):
{'storage_resource': locations_list[0], 'location': '<missing_info>',
'category': category})
else:
storage_dict.append(
{'storage_resource': 'Other', 'location': locations_list[0],
'category': category})
for line in get_lines_from_string(locations_list[0]):
storage_dict.append(
{'storage_resource': 'Other', 'location': line,
'category': category})
else:
raise ValueError('Uneven Master Data Location Row')
elif len(locations_list) % 2 == 0 and len(locations_list) > 0:
s = 0
e = len(locations_list) // 2
while s < e:
res = locations_list[s * 2] if locations_list[s * 2] else 'Other'
if is_storage_resource(locations_list[s * 2]):
for line in get_lines_from_string(locations_list[s * 2 + 1]):
storage_dict.append(
{'storage_resource': locations_list[s * 2], 'location': line,
'category': category})
else:
for line in get_lines_from_string(locations_list[s * 2]):
storage_dict.append(
{'storage_resource': 'Other', 'location': line,
'category': category})
storage_dict.append({'storage_resource': locations_list[s * 2],
'location': locations_list[s * 2 + 1],
'category': category})
# res = locations_list[s * 2] if locations_list[s * 2] else 'Other'
#
# storage_dict.append({'storage_resource': res,
# 'location': locations_list[s * 2 + 1],
# 'category': category})
s += 1
......@@ -209,4 +222,15 @@ def get_names_from_string(full_name):
result[1] = result[1] + ' ' + name_list[2]
else:
result[0] = name
return result
\ No newline at end of file
return result
def get_lines_from_string(a_string):
result = []
stripped = a_string.strip()
line_list = stripped.splitlines()
for line in line_list:
if line:
result.append(line)
return result
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment