Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
9480390a
Commit
9480390a
authored
Jul 19, 2017
by
Sascha Herzinger
Browse files
More tests, logging and better timeouts
parent
a19f4651
Pipeline
#2237
passed with stage
in 7 minutes and 5 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
fractalis/analytics/task.py
View file @
9480390a
...
...
@@ -116,7 +116,7 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
raise
ValueError
(
error
)
result
=
json
.
dumps
(
result
)
except
TypeError
as
e
:
logg
ing
.
exception
(
e
)
logg
er
.
exception
(
e
)
raise
# NaN is invalid JSON and JS can't parse it. null on the other hand...
result
=
re
.
sub
(
r
': NaN'
,
': null'
,
result
)
...
...
fractalis/data/etl.py
View file @
9480390a
...
...
@@ -124,12 +124,24 @@ class ETL(Task, metaclass=abc.ABCMeta):
"""
logger
.
info
(
"Starting ETL process ..."
)
logger
.
info
(
"(E)xtracting data from server '{}'."
.
format
(
server
))
raw_data
=
self
.
extract
(
server
,
token
,
descriptor
)
try
:
raw_data
=
self
.
extract
(
server
,
token
,
descriptor
)
except
Exception
as
e
:
logger
.
exception
(
e
)
raise
RuntimeError
(
"Data extraction failed."
)
logger
.
info
(
"(T)ransforming data to Fractalis format."
)
data_frame
=
self
.
transform
(
raw_data
,
descriptor
)
try
:
data_frame
=
self
.
transform
(
raw_data
,
descriptor
)
except
Exception
as
e
:
logger
.
exception
(
e
)
raise
RuntimeError
(
"Data transformation failed."
)
if
not
isinstance
(
data_frame
,
DataFrame
):
error
=
"transform() must return 'pandas.DataFrame', "
\
"but returned '{}' instead."
.
format
(
type
(
data_frame
))
logging
.
error
(
error
,
exc_info
=
1
)
raise
TypeError
(
error
)
self
.
load
(
data_frame
,
file_path
)
try
:
self
.
load
(
data_frame
,
file_path
)
except
Exception
as
e
:
logger
.
exception
(
e
)
raise
RuntimeError
(
"Data loading failed."
)
fractalis/data/etlhandler.py
View file @
9480390a
...
...
@@ -56,7 +56,11 @@ class ETLHandler(metaclass=abc.ABCMeta):
except
KeyError
:
logger
.
info
(
'No token has been provided. '
'Attempting to authenticate with the API.'
)
self
.
_token
=
self
.
_get_token_for_credentials
(
server
,
auth
)
try
:
self
.
_token
=
self
.
_get_token_for_credentials
(
server
,
auth
)
except
Exception
as
e
:
logger
.
exception
(
e
)
raise
ValueError
(
"Could not authenticate with API."
)
@
staticmethod
@
abc
.
abstractmethod
...
...
fractalis/data/etls/ada/common.py
View file @
9480390a
"""This module contains code that is shared between the different ETLs."""
import
logging
from
typing
import
List
import
requests
logger
=
logging
.
getLogger
(
__name__
)
def
make_cookie
(
token
:
str
)
->
dict
:
return
{
'PLAY2AUTH_SESS_ID'
:
token
}
...
...
@@ -12,19 +16,23 @@ def make_cookie(token: str) -> dict:
def
get_field
(
server
:
str
,
data_set
:
str
,
cookie
:
dict
,
projection
:
str
)
->
List
[
dict
]:
r
=
requests
.
get
(
url
=
'{}/studies/records/findCustom'
.
format
(
server
),
headers
=
{
'Accept'
:
'application/json'
},
params
=
{
'dataSet'
:
data_set
,
'projection'
:
[
'_id'
,
projection
],
'filterOrId'
:
'[{{"fieldName":"{}","conditionType":"!=","value":""}}]'
.
format
(
projection
)
},
cookies
=
cookie
)
headers
=
{
'Accept'
:
'application/json'
},
params
=
{
'dataSet'
:
data_set
,
'projection'
:
[
'_id'
,
projection
],
'filterOrId'
:
'[{{"fieldName":"{}","conditionType":"!=","value":""}}]'
.
format
(
projection
)
},
cookies
=
cookie
,
timeout
=
20
)
if
r
.
status_code
!=
200
:
raise
ValueError
(
"Data extraction failed. Target server responded with "
"status code {}."
.
format
(
r
.
status_code
))
error
=
"Data extraction failed. Target server responded with "
\
"status code {}."
.
format
(
r
.
status_code
)
logger
.
error
(
error
)
raise
ValueError
(
error
)
try
:
field_data
=
r
.
json
()
except
Exception
:
except
Exception
as
e
:
logger
.
exception
(
e
)
raise
TypeError
(
"Data extraction failed. Target server did not return "
"expected data. Possible authentication error."
)
return
field_data
...
...
fractalis/data/etls/ada/handler_ada.py
View file @
9480390a
...
...
@@ -36,17 +36,19 @@ class AdaHandler(ETLHandler):
passwd
=
auth
[
'passwd'
]
if
len
(
user
)
==
0
or
len
(
passwd
)
==
0
:
raise
KeyError
except
KeyError
:
error
=
"The authentication object must contain the non-empty "
\
"fields 'user' and 'passwd'."
logger
.
error
(
error
)
raise
ValueError
(
error
)
except
KeyError
as
e
:
logger
.
exception
(
e
)
raise
ValueError
(
"The authentication object must contain the "
"non-empty fields 'user' and 'passwd'."
)
r
=
requests
.
post
(
url
=
'{}/login'
.
format
(
server
),
headers
=
{
'Accept'
:
'application/json'
},
data
=
{
'id'
:
user
,
'password'
:
passwd
})
data
=
{
'id'
:
user
,
'password'
:
passwd
},
timeout
=
10
)
if
r
.
status_code
!=
200
:
raise
ValueError
(
"Could not authenticate. Reason: [{}]: {}"
.
format
(
r
.
status_code
,
r
.
text
))
error
=
"Could not authenticate. "
\
"Reason: [{}]: {}"
.
format
(
r
.
status_code
,
r
.
text
)
logger
.
error
(
error
)
raise
ValueError
(
error
)
cookie
=
r
.
headers
[
'Set-Cookie'
]
token
=
[
s
for
s
in
cookie
.
split
(
';'
)
if
s
.
startswith
(
'PLAY2AUTH_SESS_ID'
)][
0
]
...
...
fractalis/data/etls/transmart/etl_numerical.py
View file @
9480390a
...
...
@@ -30,17 +30,19 @@ class NumericalETL(ETL):
headers
=
{
'Accept'
:
'application/json'
,
'Authorization'
:
'Bearer {}'
.
format
(
token
)
})
},
timeout
=
60
)
if
r
.
status_code
!=
200
:
raise
ValueError
(
"Data extraction failed. Target server responded with "
"status code {}."
.
format
(
r
.
status_code
))
try
:
return
r
.
json
()
except
Exception
:
error
=
"Data extraction failed. Got unexpected data format."
error
=
"Data extraction failed. Target server responded with "
\
"status code {}."
.
format
(
r
.
status_code
)
logger
.
error
(
error
)
raise
ValueError
(
error
)
try
:
return
r
.
json
()
except
Exception
as
e
:
logger
.
exception
(
e
)
raise
ValueError
(
"Data extraction failed. "
"Got unexpected data format."
)
def
transform
(
self
,
raw_data
:
dict
,
descriptor
:
dict
)
->
DataFrame
:
rows
=
[]
...
...
fractalis/data/etls/transmart/handler_transmart.py
View file @
9480390a
...
...
@@ -50,7 +50,8 @@ class TransmartHandler(ETLHandler):
'username'
:
user
,
'password'
:
passwd
},
headers
=
{
'Accept'
:
'application/json'
})
headers
=
{
'Accept'
:
'application/json'
},
timeout
=
10
)
if
r
.
status_code
!=
200
:
error
=
"Could not authenticate. "
\
"Reason: [{}]: {}"
.
format
(
r
.
status_code
,
r
.
text
)
...
...
tests/transmart_etls/test_etl_numerical.py
View file @
9480390a
"""This module provides test for the numerical data ETL for tranSMART"""
import
json
import
pytest
import
responses
...
...
@@ -52,3 +54,20 @@ class TestNumericalETL:
raw_data
=
self
.
etl
.
extract
(
server
=
'http://foo.bar'
,
token
=
''
,
descriptor
=
{
'path'
:
''
})
assert
isinstance
(
raw_data
,
dict
)
def
test_transform_valid_input_correct_output
(
self
):
body
=
{
"cells"
:
[{
"inlineDimensions"
:
[
"292278994-08-16T23:00:00Z"
,
None
,
"@"
],
"dimensionIndexes"
:
[
0
,
0
,
0
,
None
,
0
,
None
,
None
],
"numericValue"
:
52.0
}],
"dimensionElements"
:
{
"patient"
:
[{
"id"
:
1000421548
,
"deathDate"
:
None
,
"birthDate"
:
None
,
"race"
:
None
,
"maritalStatus"
:
None
,
"inTrialId"
:
"3052"
,
"age"
:
52
,
"trial"
:
"GSE4382"
,
"sexCd"
:
None
,
"sex"
:
"unknown"
,
"religion"
:
None
}]}
}
with
responses
.
RequestsMock
()
as
response
:
response
.
add
(
response
.
GET
,
'http://foo.bar/v2/observations'
,
body
=
json
.
dumps
(
body
),
status
=
200
,
content_type
=
'application/json'
)
raw_data
=
self
.
etl
.
extract
(
server
=
'http://foo.bar'
,
token
=
''
,
descriptor
=
{
'path'
:
''
})
df
=
self
.
etl
.
transform
(
raw_data
=
raw_data
,
descriptor
=
{
'path'
:
''
})
assert
df
.
shape
==
(
1
,
2
)
assert
df
.
values
.
tolist
()
==
[[
'3052'
,
52.0
]]
assert
list
(
df
)
==
[
'id'
,
'value'
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment