Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
a19f4651
Commit
a19f4651
authored
Jul 18, 2017
by
Sascha Herzinger
Browse files
Added some more tests including transmart numerical etl
parent
bea8f971
Pipeline
#2236
passed with stage
in 4 minutes and 25 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
fractalis/data/etl.py
View file @
a19f4651
"""This module provides the ETL class"""
import
os
import
abc
import
json
import
logging
import
os
from
celery
import
Task
from
pandas
import
DataFrame
from
fractalis
import
app
,
redis
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -41,6 +40,9 @@ class ETL(Task, metaclass=abc.ABCMeta):
def
can_handle
(
handler
:
str
,
descriptor
:
dict
)
->
bool
:
"""Check if the current implementation of ETL can handle given handler
and data type.
WARNING: You should never raise an Exception here and expect it to be
propagated further up. It will be caught and assumed that the
current ETL cannot handle the given arguments.
:param handler: Describes the handler. E.g.: transmart, ada
:param descriptor: Describes the data that we want to download.
:return: True if implementation can handle given parameters.
...
...
@@ -58,11 +60,21 @@ class ETL(Task, metaclass=abc.ABCMeta):
"""
from
.
import
ETL_REGISTRY
for
ETL_TASK
in
ETL_REGISTRY
:
if
ETL_TASK
.
can_handle
(
handler
,
descriptor
):
return
ETL_TASK
()
# noinspection PyBroadException
try
:
if
ETL_TASK
.
can_handle
(
handler
,
descriptor
):
return
ETL_TASK
()
except
Exception
as
e
:
logger
.
warning
(
"Caught exception and assumed that ETL '{}' "
"cannot handle handler '{}' and descriptor: '{}'"
" Exception:'{}'"
.
format
(
type
(
ETL_TASK
).
__name__
,
handler
,
str
(
descriptor
),
e
))
continue
raise
NotImplementedError
(
"No ETL implementation found for handler '{}' and descriptor '{}'"
.
format
(
handler
,
descriptor
))
.
format
(
handler
,
descriptor
))
@
abc
.
abstractmethod
def
extract
(
self
,
server
:
str
,
token
:
str
,
descriptor
:
dict
)
->
object
:
...
...
fractalis/data/etls/transmart/etl_numerical.py
View file @
a19f4651
"""Provides numerical concept ETL for tranSMART."""
import
logging
import
requests
from
pandas
import
DataFrame
from
fractalis.data.etl
import
ETL
logger
=
logging
.
getLogger
(
__name__
)
class
NumericalETL
(
ETL
):
"""NumericalETL implements support for tranSMARTs 'numerical' type."""
...
...
@@ -19,7 +24,7 @@ class NumericalETL(ETL):
def
extract
(
self
,
server
:
str
,
token
:
str
,
descriptor
:
dict
)
->
dict
:
r
=
requests
.
get
(
url
=
'{}/v2/observations'
.
format
(
server
),
params
=
{
'constraint'
:
'{{"type": "concept",
"path": "{}"}}'
.
format
(
descriptor
[
"path"
]),
'constraint'
:
'{{"type": "concept","path": "{}"}}'
.
format
(
descriptor
[
"path"
]),
'type'
:
'clinical'
},
headers
=
{
...
...
@@ -30,7 +35,12 @@ class NumericalETL(ETL):
raise
ValueError
(
"Data extraction failed. Target server responded with "
"status code {}."
.
format
(
r
.
status_code
))
return
r
.
json
()
try
:
return
r
.
json
()
except
Exception
:
error
=
"Data extraction failed. Got unexpected data format."
logger
.
error
(
error
)
raise
ValueError
(
error
)
def
transform
(
self
,
raw_data
:
dict
,
descriptor
:
dict
)
->
DataFrame
:
rows
=
[]
...
...
fractalis/data/etls/transmart/handler_transmart.py
View file @
a19f4651
...
...
@@ -51,17 +51,16 @@ class TransmartHandler(ETLHandler):
'password'
:
passwd
},
headers
=
{
'Accept'
:
'application/json'
})
auth_error
=
''
if
r
.
status_code
!=
200
:
auth_
error
=
"Could not authenticate. "
\
"Reason: [{}]: {}"
.
format
(
r
.
status_code
,
r
.
text
)
logger
.
error
(
auth_
error
)
raise
ValueError
(
auth_
error
)
error
=
"Could not authenticate. "
\
"Reason: [{}]: {}"
.
format
(
r
.
status_code
,
r
.
text
)
logger
.
error
(
error
)
raise
ValueError
(
error
)
try
:
response
=
r
.
json
()
return
response
[
'access_token'
]
except
ValueError
:
auth_error
=
"Could not authenticate. "
\
"Got unexpected response: '{}'"
.
format
(
r
.
text
)
logger
.
error
(
auth_error
)
raise
ValueError
(
auth_error
)
\ No newline at end of file
except
Exception
:
error
=
"Could not authenticate. "
\
"Got unexpected response: '{}'"
.
format
(
r
.
text
)
logger
.
error
(
error
)
raise
ValueError
(
error
)
tests/transmart_etls/test_etl_numerical.py
View file @
a19f4651
"""This module provides test for the numerical data ETL for tranSMART"""
import
pytest
import
responses
from
fractalis.data.etls.transmart.etl_numerical
import
NumericalETL
# noinspection PyMissingOrEmptyDocstring,PyMissingTypeHints
class
TestNumericalETL
:
etl
=
NumericalETL
()
def
test_correct_handler
(
self
):
assert
self
.
etl
.
can_handle
(
handler
=
'transmart'
,
descriptor
=
{
'data_type'
:
'numerical'
})
assert
not
self
.
etl
.
can_handle
(
handler
=
'ada'
,
descriptor
=
{
'data_type'
:
'numerical'
})
assert
not
self
.
etl
.
can_handle
(
handler
=
'ada'
,
descriptor
=
{
'data_type'
:
'categorical'
})
assert
not
self
.
etl
.
can_handle
(
handler
=
'ada'
,
descriptor
=
{
'foo'
:
'bar'
})
def
test_extract_raises_readable_if_not_200
(
self
):
with
responses
.
RequestsMock
()
as
response
:
response
.
add
(
response
.
GET
,
'http://foo.bar/v2/observations'
,
body
=
'{}'
,
status
=
400
,
content_type
=
'application/json'
)
with
pytest
.
raises
(
ValueError
)
as
e
:
self
.
etl
.
extract
(
server
=
'http://foo.bar'
,
token
=
''
,
descriptor
=
{
'path'
:
''
})
assert
'[400]'
in
e
def
test_extract_raises_readable_if_not_json
(
self
):
with
responses
.
RequestsMock
()
as
response
:
response
.
add
(
response
.
GET
,
'http://foo.bar/v2/observations'
,
body
=
'123{//}'
,
status
=
200
,
content_type
=
'application/json'
)
with
pytest
.
raises
(
ValueError
)
as
e
:
self
.
etl
.
extract
(
server
=
'http://foo.bar'
,
token
=
''
,
descriptor
=
{
'path'
:
''
})
assert
'unexpected data'
in
e
def
test_extract_works_for_valid_input
(
self
):
with
responses
.
RequestsMock
()
as
response
:
response
.
add
(
response
.
GET
,
'http://foo.bar/v2/observations'
,
body
=
'{}'
,
status
=
200
,
content_type
=
'application/json'
)
raw_data
=
self
.
etl
.
extract
(
server
=
'http://foo.bar'
,
token
=
''
,
descriptor
=
{
'path'
:
''
})
assert
isinstance
(
raw_data
,
dict
)
tests/transmart_etls/test_handler_transmart.py
View file @
a19f4651
...
...
@@ -2,7 +2,6 @@
import
pytest
import
responses
import
requests
from
fractalis.data.etls.transmart.handler_transmart
import
TransmartHandler
...
...
@@ -54,7 +53,6 @@ class TestTransmartHandler:
auth
=
{
'user'
:
'foo'
,
'passwd'
:
'bar'
})
assert
'unexpected response'
in
e
def
test_auth_raises_exception_for_non_200_return
(
self
):
with
responses
.
RequestsMock
()
as
response
:
response
.
add
(
response
.
POST
,
'http://foo.bar/oauth/token'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment