Commit 9946fb98 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Added API for deleting data

parent 0acfcfec
Pipeline #2077 passed with stage
in 3 minutes and 25 seconds
......@@ -5,13 +5,35 @@ from flask import Blueprint, session, request, jsonify
from .etlhandler import ETLHandler
from .schema import create_data_schema
from fractalis.validator import validate_json, validate_schema
from fractalis import celery
from fractalis import redis
from fractalis import celery, redis
from fractalis.sync import remove_file
data_blueprint = Blueprint('data_blueprint', __name__)
def get_data_by_id(data_id, wait):
data_obj = json.loads(redis.get('data:{}'.format(data_id)))
job_id = data_obj['job_id']
async_result = celery.AsyncResult(job_id)
if wait:
async_result.get(propagate=False) # wait for results
state = async_result.state
result = async_result.result
if isinstance(result, Exception): # Exception -> str
result = "{}: {}".format(type(result).__name__, str(result))
data_obj['state'] = state
data_obj['message'] = result
data_obj['data_id'] = data_id
# remove internal information from response
del data_obj['file_path']
del data_obj['access']
return data_obj
@data_blueprint.before_request
def prepare_session():
session.permanent = True
......@@ -19,6 +41,13 @@ def prepare_session():
session['data_ids'] = []
@data_blueprint.before_request
def cleanup_session():
for data_id in session['data_ids']:
if not redis.exists('shadow:data:{}'.format(data_id)):
session['data_ids'].remove(data_id)
@data_blueprint.route('', methods=['POST'])
@validate_json
@validate_schema(create_data_schema)
......@@ -36,31 +65,16 @@ def create_data():
return jsonify({'data_ids': data_ids}), 201
def get_data_by_id(data_id, wait):
value = redis.get('data:{}'.format(data_id))
data_obj = json.loads(value)
job_id = data_obj['job_id']
async_result = celery.AsyncResult(job_id)
if wait:
async_result.get(propagate=False) # wait for results
state = async_result.state
result = async_result.result
if isinstance(result, Exception): # Exception -> str
result = "{}: {}".format(type(result).__name__, str(result))
data_obj['state'] = state
data_obj['message'] = result
data_obj['data_id'] = data_id
# remove internal information from response
del data_obj['file_path']
del data_obj['access']
return data_obj
@data_blueprint.route('', methods=['GET'])
def get_all_data_state():
wait = request.args.get('wait') == '1'
data_states = [get_data_by_id(data_id, wait)
for data_id in session['data_ids']]
return jsonify({'data_states': data_states}), 200
@data_blueprint.route('/<string:params>', methods=['GET'])
def get_data_by_params(params):
def get_data_state(params):
wait = request.args.get('wait') == '1'
# params can be data_id or dict
try:
......@@ -76,9 +90,31 @@ def get_data_by_params(params):
return jsonify({'data_state': data_obj}), 200
@data_blueprint.route('', methods=['GET'])
def get_all_data_state():
@data_blueprint.route('/<string:params>', methods=['DELETE'])
def delete_data(params):
wait = request.args.get('wait') == '1'
data_states = [get_data_by_id(data_id, wait)
for data_id in session['data_ids']]
return jsonify({'data_states': data_states}), 200
# params can be data_id or dict
try:
params = json.loads(params)
data_id = ETLHandler.compute_data_id(server=params['server'],
descriptor=params['descriptor'])
except ValueError:
data_id = params
if data_id not in session['data_ids']: # access control
return jsonify(
{'error_msg': "No matching data found. Maybe expired?"}), 404
data_obj = json.loads(redis.get('data:{}'.format(data_id)))
file_path = data_obj['file_path']
async_result = remove_file.delay(file_path)
if wait:
async_result.get(propagate=False)
redis.delete('data:{}'.format(data_id))
redis.delete('shadow:data:{}'.format(data_id))
return '', 200
@data_blueprint.route('/', methods=['DELETE'])
def delete_all_data():
for data_id in session['data_ids']:
delete_data(data_id)
return '', 200
......@@ -89,11 +89,11 @@ class ETLHandler(metaclass=abc.ABCMeta):
file_name = str(uuid4())
file_path = os.path.join(data_dir, file_name)
access = []
# test if description for data is specified
# test if label for data is specified
try:
description = descriptor['description']
label = descriptor['label']
except KeyError:
description = str(descriptor)
label = str(descriptor)
etl = ETL.factory(handler=self._handler,
data_type=descriptor['data_type'])
task_id = uuid()
......@@ -101,7 +101,8 @@ class ETLHandler(metaclass=abc.ABCMeta):
'file_path': file_path,
'job_id': task_id,
'data_type': etl.produces,
'description': description,
'label': label,
'descriptor': descriptor,
'access': access
}
redis.set(name='data:{}'.format(data_id),
......
......@@ -38,7 +38,6 @@ def get_dictionary(server: str, data_set: str,
cookies=cookie)
if r.status_code != 200:
dictionary = None
# TODO: Log this
pass
else:
dictionary = r.json()
......
......@@ -6,9 +6,18 @@ db and the file system.
import os
from shutil import rmtree
from fractalis import redis, app
from fractalis import redis, app, celery
@celery.task
def remove_file(file_path: str) -> None:
try:
os.remove(file_path)
except FileNotFoundError:
pass
@celery.task
def cleanup_all() -> None:
"""Reset redis and the filesystem. This is only useful for testing and
should !!!NEVER!!! be used otherwise.
......
......@@ -4,6 +4,7 @@ import json
from flask_script import Manager
from fractalis import app, redis
from fractalis.sync import remove_file
manager = Manager(app)
......@@ -25,10 +26,7 @@ def janitor():
print('deleting file: ', file_path)
print('deleting redis key: ', expired_key)
redis.delete(expired_key)
try:
os.remove(file_path)
except FileNotFoundError:
pass
remove_file.delay()
if __name__ == "__main__":
......
......@@ -260,12 +260,13 @@ class TestData:
body = flask.json.loads(rv.get_data())
data_state = body['data_state']
assert rv.status_code == 200, data_state
assert len(data_state) == 6 # include only minimal data in response
assert len(data_state) == 7 # include only minimal data in response
assert not data_state['message']
assert data_state['state']
assert data_state['job_id']
assert data_state['data_type']
assert data_state['description']
assert data_state['label']
assert data_state['descriptor']
assert data_state['data_id']
def test_GET_by_all_and_valid_response(self, test_client, big_post):
......@@ -279,12 +280,13 @@ class TestData:
body = flask.json.loads(rv.get_data())
for data_state in body['data_states']:
assert len(data_state) == 6
assert len(data_state) == 7
assert not data_state['message']
assert data_state['state']
assert data_state['job_id']
assert data_state['data_type']
assert data_state['description']
assert data_state['label']
assert data_state['descriptor']
assert data_state['data_id']
def test_GET_by_params_and_valid_response(self, test_client):
......@@ -308,12 +310,13 @@ class TestData:
body = flask.json.loads(rv.get_data())
data_state = body['data_state']
assert rv.status_code == 200, data_state
assert len(data_state) == 6 # include only minimal data in response
assert len(data_state) == 7 # include only minimal data in response
assert not data_state['message']
assert data_state['state']
assert data_state['job_id']
assert data_state['data_type']
assert data_state['description']
assert data_state['label']
assert data_state['descriptor']
assert data_state['data_id']
def test_404_on_GET_by_id_if_no_auth(self, test_client, big_post):
......@@ -415,7 +418,7 @@ class TestData:
with test_client.session_transaction() as sess:
assert len(sess['data_ids']) == 1
def test_no_access_on_failure(self, test_client, small_post):
def test_no_access_on_failure(self, test_client):
rv = test_client.post(
'/data', data=flask.json.dumps(dict(
handler='test',
......@@ -440,4 +443,59 @@ class TestData:
sess['data_ids'] = []
rv = test_client.get('/data/{}'.format(data_id))
assert rv.status_code == 404
\ No newline at end of file
assert rv.status_code == 404
def test_delete_and_no_db_entries(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
data_obj = redis.get('data:{}'.format(data_id))
assert data_obj
assert redis.exists('shadow:data:{}'.format(data_id))
test_client.delete('/data/{}?wait=1'.format(data_id))
assert not redis.exists('data:{}'.format(data_id))
assert not redis.exists('shadow:data:{}'.format(data_id))
def test_delete_and_no_files(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
test_client.get('/data/{}?wait=1'.format(data_id))
data_obj = json.loads(redis.get('data:{}'.format(data_id)))
assert os.path.exists(data_obj['file_path'])
test_client.delete('/data/{}?wait=1'.format(data_id))
assert not os.path.exists(data_obj['file_path'])
def test_delete_and_no_id_in_session(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
test_client.get('/data/{}?wait=1'.format(data_id))
with test_client.session_transaction() as sess:
assert data_id in sess['data_ids']
test_client.delete('/data/{}?wait=1'.format(data_id))
test_client.get('/data/{}?wait=1'.format(data_id))
with test_client.session_transaction() as sess:
assert data_id not in sess['data_ids']
def test_cannot_delete_without_auth(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
test_client.get('/data/{}?wait=1'.format(data_id))
with test_client.session_transaction() as sess:
sess['data_ids'] = []
rv = test_client.delete('/data/{}?wait=1'.format(data_id))
assert rv.status_code == 404
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment