Commit fdc79774 authored by Pinar Alper's avatar Pinar Alper
Browse files

Initial commit

parents
# Elixir Metadata Utility Tools
## Development
Install dependencies with:
```bash
pip install -e .[dev]
```
## Current Version
**v0.0.1-dev**
{
"description": "Study Schema",
"schema": {
"title": "A JSON Schema for describing Studies within the ELIXIR Data Submission System.",
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"required": [
"name",
"description",
"descriptors",
"funding_sources",
"data_types",
"contacts"
],
"properties": {
"name": {
"type": "string"
},
"description": {
"type": "string"
},
"web_page": {
"type": "string",
"format": "uri"
},
"in_ex_criteria": {
"type": "string"
},
"descriptors": {
"type": "array",
"minItems": 1,
"items": [
{
"type": "string",
"enum": [
"Observational",
"Interventional",
"Expanded_Access",
"Longitudinal_Cohort",
"Cross_Sectional",
"Case_Control",
"Case_Set",
"Control_Set",
"Parent_Offspring",
"Unrelated_Individuals",
"Sibling_Pairs",
"Family",
"Pedigree",
"Preclinical_Trial",
"Clinical_Trial",
"Meta_Analysis",
"Prospective",
"Retrospective",
"Phase_I",
"Phase_II",
"Phase_III",
"Phase_IV",
"Single_Group",
"Parallel",
"Cross_Over",
"Factorial",
"Randomized",
"Blind",
"Controlled",
"Open",
"Single_Blind",
"Double_Blind"
]
}
]
},
"disease_terms": {
"$ref": "#/definitions/term_set"
},
"gene_id_terms": {
"$ref": "#/definitions/term_set"
},
"phenotype_terms": {
"$ref": "#/definitions/term_set"
},
"funding_sources": {
"type": "array",
"items": {
"type": "string",
"enum": [
"IMI",
"H2020",
"FP7",
"NIH",
"OTHER"
]
}
},
"funding_source_description": {
"type": "string"
},
"planning_site": {
"type": "string"
},
"data_standards_implemented": {
"type": "string"
},
"is_multi_site": {
"type": "boolean"
},
"data_types": {
"type": "array",
"items": {
"type": "string",
"enum": [
"Omics_data",
"Genotype_data",
"Whole_genome_sequencing",
"Exome_sequencing",
"Genomics_variant_array",
"RNASeq",
"Genetic_derived_genetic_data",
"Transcriptome_array",
"Methylation_array",
"MicroRNA_array",
"Metabolomics",
"Metagenomics",
"Proteomics",
"Other_omics_data",
"Clinical_Imaging",
"Cell_Imaging",
"Human_subject_data",
"Clinical_data",
"Lifestyle_data",
"Socio_Economic_Data",
"Environmental_Data",
"Other_Phenotype_data",
"Other"
]
}
},
"contacts": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"properties": {
"first_name": {
"type": "string"
},
"last_name": {
"type": "string"
},
"role": {
"type": "string",
"enum": [
"Principal_Investigator",
"Researcher",
"Data_Manager",
"Data_Protection_Officer",
"Legal_Representative",
"Other"
]
},
"email": {
"type": "string",
"format": "email"
},
"affiliation": {
"type": "string"
},
"address": {
"type": "string"
},
"phone": {
"type": "string"
}
},
"required": [
"first_name",
"last_name",
"role",
"email",
"affiliation"
]
}
},
"publications": {
"type": "array",
"items": {
"type": "object",
"properties": {
"doi": {
"type": "string"
},
"title": {
"type": "string"
},
"year": {
"type": "integer"
},
"venue": {
"type": "string"
},
"authors": {
"type": "string"
}
},
"required": [
"title",
"venue",
"authors"
]
}
}
},
"definitions": {
"term_set": {
"type": "object",
"properties": {
"vocabularyUrl": {
"type": "string",
"format": "uri"
},
"vocabularyDescription": {
"type": "string"
},
"terms": {
"type": "array",
"minItems": 1,
"items": [
{
"type": "string"
}
]
}
},
"required": [
"terms"
]
}
}
}
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
try:
from setuptools import setup, find_packages
except ImportError:
from distutils.core import setup
requirements = [
'jsonschema'
]
test_requirements = [
'coverage'
]
setup(
name='metadata-tools',
version='0.0.1-dev',
description="A utility tool for using validating ELIXIR metadata files.",
packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
package_dir={'metadata-tools': 'metadata-tools'},
include_package_data=True,
install_requires=requirements,
zip_safe=False,
keywords=['elixir', 'metadata'],
classifiers=[
'Development Status :: 2 - Pre-Alpha',
# 'Intended Audience :: Developers',
# 'License :: OSI Approved :: ISC License (ISCL)',
'Natural Language :: English',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],
test_suite='tests',
tests_require=test_requirements,
package_data={
'metadata-tools': ['metadata-tools/resources/*']
},
extras_require={
'dev': [
'tox',
'pep8',
'bumpversion',
'coverage'
]
}
)
{
"name" : "Test Study Name",
"description" : "Test Study description...................",
"web_page" : "http://the.study.webpage",
"in_ex_criteria" : "The criteria for includsion was....",
"descriptors" : [ "Case_Control", "Factorial" ],
"disease_terms" : {
"vocabularyUrl" : "http://some.disease.ontology.org",
"vocabularyDescription" : "We used verson 2.0 of the ontolgy.....",
"terms" : [ "parkinsons", "dementia", "epilepsy" ]
},
"gene_id_terms" : {
"vocabularyUrl" : "http://gene.ontology.org",
"vocabularyDescription" : "We used latest version of GO v X. Y ....",
"terms" : [ "GO:0000016", "GO:0000017", "GO:0000018" ]
},
"phenotype_terms" : {
"vocabularyUrl" : "http://human-phenotype-ontology.github.io",
"vocabularyDescription" : "......",
"terms" : [ "HP:0003674", "HP:0003623" ]
},
"funding_sources" : [ "FP7" ],
"funding_source_description" : "Grant-project numbers .1247654",
"planning_site" : "Study was planned at Karolinska Instituet ",
"data_standards_implemented" : "We used CDISC and .j...",
"is_multi_site" : false,
"data_types" : [ "Other_omics_data", "Other_Phenotype_data", "Genomics_variant_array" ],
"contacts" : [ {
"first_name" : "John",
"last_name" : "Doe",
"role" : "Principal_Investigator",
"email" : "john@abc.edu",
"affiliation" : "University of ABC",
"address" : "Building ABC, block2, Belval, Luxembourg",
"phone" : "94798375"
}, {
"first_name" : "Alice",
"last_name" : "White",
"role" : "Researcher",
"email" : "alice@abc.edu",
"affiliation" : "University of ABC",
"address" : "Building ABC, block2, Belval, Luxembourg",
"phone" : "95869457"
} ],
"publications" : [ {
"doi" : "doi://12123",
"title" : " First paper on PD research",
"year" : 2017,
"venue" : "Journal ABC",
"authors" : "P Alper, V Groues, Y Jarosz"
} ]
}
\ No newline at end of file
from unittest import TestCase
from json import loads
from jsonschema import validate
import os
class TestParser(TestCase):
def test_validation(self):
schemaFile = open('../metadata-tools/resources/elx-study.json', encoding='utf-8')
dataFile = open(os.path.join(os.path.dirname(__file__), 'study1.json'), encoding='utf-8')
schema = loads(schemaFile.read())
data = loads(dataFile.read())
try:
validate(data, schema['schema'])
self.assert_(True)
except:
self.fail()
finally:
dataFile.close()
schemaFile.close()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment