Commit 12ecb76c authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Heavily improved Ada ETL performance under certain conditions

parent bacbd03d
Pipeline #2215 passed with stage
in 5 minutes and 58 seconds
......@@ -10,14 +10,15 @@ def make_cookie(token: str) -> dict:
def get_field(server: str, data_set: str,
cookie: dict, projections: List[str]) -> List[dict]:
cookie: dict, projection: str) -> List[dict]:
r = requests.get(url='{}/studies/records/findCustom'.format(server),
headers={'Accept': 'application/json'},
params={
'dataSet': data_set,
'projection': projections
},
cookies=cookie)
headers={'Accept': 'application/json'},
params={
'dataSet': data_set,
'projection': ['_id', projection],
'filterOrId': '[{{"fieldName":"{}","conditionType":"!=","value":""}}]'.format(projection)
},
cookies=cookie)
if r.status_code != 200:
raise ValueError("Data extraction failed. Target server responded with "
"status code {}.".format(r.status_code))
......
......@@ -22,11 +22,10 @@ class BooleanETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projections = ['_id']
projections += [descriptor['dictionary']['projection']]
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie=cookie, projections=projections)
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
......
......@@ -22,11 +22,10 @@ class DateETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projections = ['_id']
projections += [descriptor['dictionary']['projection']]
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie=cookie, projections=projections)
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
......
......@@ -22,11 +22,10 @@ class DoubleETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projections = ['_id']
projections += [descriptor['dictionary']['projection']]
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie=cookie, projections=projections)
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
......
......@@ -22,11 +22,10 @@ class EnumETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projections = ['_id']
projections += [descriptor['dictionary']['projection']]
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie=cookie, projections=projections)
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
......
......@@ -22,11 +22,10 @@ class IntegerETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projections = ['_id']
projections += [descriptor['dictionary']['projection']]
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie=cookie, projections=projections)
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
......
......@@ -22,11 +22,10 @@ class StringETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projections = ['_id']
projections += [descriptor['dictionary']['projection']]
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie=cookie, projections=projections)
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment