Commit 47b61cdf authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Working ETL for transmart numeric data

parent a0ee2306
Pipeline #2233 failed with stage
in 3 minutes and 6 seconds
"""Provides numerical concept ETL for tranSMART."""
from typing import List
import requests
from pandas import DataFrame
......@@ -16,20 +14,30 @@ class NumericalETL(ETL):
def can_handle(handler: str, descriptor: dict) -> bool:
return handler == 'transmart' and descriptor['dataType'] == 'numerical'
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
r = requests.get(url='{}/v2/observations'
headers={'Accept': 'application/json',
'Authorization': 'Bearer {}'.format(token)})
return handler == 'transmart' and descriptor['data_type'] == 'numerical'
def extract(self, server: str, token: str, descriptor: dict) -> dict:
r = requests.get(url='{}/v2/observations'.format(server),
'constraint': '{{"type": "concept", "path": "{}"}}'.format(descriptor["path"]),
'type': 'clinical'
'Accept': 'application/json',
'Authorization': 'Bearer {}'.format(token)
if r.status_code != 200:
raise ValueError(
"Data extraction failed. Target server responded with "
"status code {}.".format(r.status_code))
return r
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
return raw_data
\ No newline at end of file
return r.json()
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
for entry in raw_data['cells']:
idx = entry['dimensionIndexes'][2]
id = raw_data['dimensionElements']['patient'][idx]['inTrialId']
value = entry['numericValue']
rows.append([id, value])
df = DataFrame(rows, columns=['id', 'value'])
return df
......@@ -24,17 +24,18 @@ class TransmartHandler(ETLHandler):
def make_label(descriptor: dict) -> str:
return 'test'
return descriptor['path']
def _get_token_for_credentials(self, server: str,
user: str, passwd: str) -> str:
r = requests.get(url='{}/oauth/token?grant_type=password'
'&password={}'.format(server, user, passwd),
headers={'Accept': 'application/json'})
r ='{}/oauth/token?grant_type=password'
'&password={}'.format(server, user, passwd),
headers={'Accept': 'application/json'})
if r.status_code != 200:
raise ValueError("Could not authenticate. Reason: [{}]: {}"
.format(r.status_code, r.text))
return r
\ No newline at end of file
response = r.json()
return response['access_token']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment