Commit 6cbd51bd authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

added id filter to pca

parent 75a99bbc
......@@ -39,6 +39,10 @@ class PCATask(AnalyticTask):
df = df.pivot(index='feature', columns='id', values='value')
df = df.T
# apply id filter
if id_filter:
df = df[df.index.isin(id_filter)]
# save ids so we can re-assign them after pca
ids = df.index.tolist()
......
......@@ -2,10 +2,11 @@
import logging
import requests
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.transmart.shared import extract_data
from fractalis.data.etls.transmart import observations_pb2
logger = logging.getLogger(__name__)
......@@ -21,7 +22,30 @@ class HighdimETL(ETL):
return handler == 'transmart' and descriptor['data_type'] == 'highdim'
def extract(self, server: str, token: str, descriptor: dict) -> dict:
return extract_data(server=server, descriptor=descriptor, token=token)
r = requests.get(url='{}/v2/observations'.format(server),
params={
'constraint': '{{"type": "concept","path": "{}"}}'
''.format(descriptor["path"]),
'projection': 'log_intensity',
'type': 'autodetect'
},
headers={
'Accept': 'application/x-protobuf',
'Authorization': 'Bearer {}'.format(token)
},
timeout=2000)
if r.status_code != 200:
error = "Data extraction failed. Target server responded with " \
"status code {}.".format(r.status_code)
logger.error(error)
raise ValueError(error)
try:
pass # TODO
except Exception as e:
logger.exception(e)
raise ValueError("Data extraction failed. "
"Got unexpected data format.")
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
......
This diff is collapsed.
......@@ -45,3 +45,20 @@ class TestPCATask:
assert data['id'].tolist() == [101, 102, 103, 104, 105]
assert data['subset'].unique().tolist() == [0]
assert data['category'].unique().tolist() == ['a', None]
def test_id_filter_works(self):
features = [
pd.DataFrame([[101, 'foo', 5], [101, 'bar', 6],
[102, 'foo', 10], [102, 'bar', 11],
[103, 'foo', 15], [103, 'bar', 16],
[104, 'foo', 20], [104, 'bar', 21]],
columns=['id', 'feature', 'value'])
]
result = self.task.main(features=features,
categories=[],
n_components=2,
whiten=False,
id_filter=[101, 104],
subsets=[])
data = pd.read_json(result['data'])
assert data['id'].unique().tolist() == [101, 104]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment