From 67f7d5fcff28ebd381bc82c749eb8a94b4cf18f0 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 20 Oct 2020 22:58:41 -0700 Subject: [PATCH 01/42] Generate JWT in Flask app --- setup.cfg | 2 +- superset/app.py | 7 +++ superset/config.py | 10 ++++ superset/extensions.py | 2 + superset/utils/async_query_manager.py | 75 +++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 superset/utils/async_query_manager.py diff --git a/setup.cfg b/setup.cfg index 44f7e0d1461e2..c7af1c3d2aff2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/superset/app.py b/superset/app.py index dc8d612d144e5..2952258821546 100644 --- a/superset/app.py +++ b/superset/app.py @@ -30,6 +30,7 @@ _event_logger, APP_DIR, appbuilder, + async_query_manager, cache_manager, celery_app, csrf, @@ -485,6 +486,7 @@ def init_app_in_ctx(self) -> None: self.configure_url_map_converters() self.configure_data_sources() self.configure_auth_provider() + self.configure_async_queries() # Hook that provides administrators a handle on the Flask APP # after initialization @@ -639,6 +641,11 @@ def configure_wtf(self) -> None: for ex in csrf_exempt_list: csrf.exempt(ex) + def configure_async_queries(self) -> None: + if feature_flag_manager.is_feature_enabled("GLOBAL_ASYNC_QUERIES"): + logger.info("*************** Init async queries") + async_query_manager.init_app(self.flask_app) + def register_blueprints(self) -> None: for bp in self.config["BLUEPRINTS"]: try: diff --git a/superset/config.py b/superset/config.py index 012c7f7af760f..978fddae50baa 100644 --- a/superset/config.py +++ b/superset/config.py @@ -323,6 +323,7 @@ def _try_json_readsha( # pylint: disable=unused-argument # When True, this escapes HTML (rather than rendering it) in Markdown components "ESCAPE_MARKDOWN_HTML": False, "SIP_34_ANNOTATIONS_UI": False, + "GLOBAL_ASYNC_QUERIES": False, } # Set the default view to card/grid view if thumbnail support is enabled. @@ -944,6 +945,15 @@ class CeleryConfig: # pylint: disable=too-few-public-methods # conventions and such. You can find examples in the tests. SQLA_TABLE_MUTATOR = lambda table: table +# Global async query config options. +# Requires GLOBAL_ASYNC_QUERIES feature flag to be enabled. +GLOBAL_ASYNC_QUERIES_REDIS_CONFIG = {} +GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME = "async-token" +GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE = ( + False # this should likely be True in production +) +GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me" + if CONFIG_PATH_ENV_VAR in os.environ: # Explicitly import config module that is not necessarily in pythonpath; useful # for case where app is being executed via pex. diff --git a/superset/extensions.py b/superset/extensions.py index 9be0c37bb937a..4bd7b4286e705 100644 --- a/superset/extensions.py +++ b/superset/extensions.py @@ -32,6 +32,7 @@ from flask_wtf.csrf import CSRFProtect from werkzeug.local import LocalProxy +from superset.utils.async_query_manager import AsyncQueryManager from superset.utils.cache_manager import CacheManager from superset.utils.feature_flag_manager import FeatureFlagManager from superset.utils.machine_auth import MachineAuthProviderFactory @@ -133,6 +134,7 @@ def get_manifest_files(self, bundle: str, asset_type: str) -> List[str]: APP_DIR = os.path.dirname(__file__) appbuilder = AppBuilder(update_perms=False) +async_query_manager = AsyncQueryManager() cache_manager = CacheManager() celery_app = celery.Celery() csrf = CSRFProtect() diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py new file mode 100644 index 0000000000000..d9eb70f7e9a9d --- /dev/null +++ b/superset/utils/async_query_manager.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import logging +import uuid +from typing import Any, Dict + +import jwt +from flask import Flask, Response, session + +logger = logging.getLogger(__name__) + + +class AsyncQueryManager: + def __init__(self) -> None: + super().__init__() + self._jwt_cookie_name = None + self._jwt_secret = None + + def init_app(self, app: Flask) -> None: + self._jwt_cookie_name = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"] + self._jwt_secret = app.config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"] + + @app.after_request + def set_async_jwt_cookie(response: Response) -> Response: + reset_token = False + user_id = None + + if "user_id" in session: + user_id = session["user_id"] + + if "async_channel_id" not in session or "async_user_id" not in session: + reset_token = True + elif user_id != session["async_user_id"]: + reset_token = True + + if reset_token: + logger.info("******************** setting async_channel_id") + async_channel_id = str(uuid.uuid4()) + session["async_channel_id"] = async_channel_id + session["async_user_id"] = user_id + + token = self.get_jwt({"channel": async_channel_id, "user_id": user_id}) + + response.set_cookie( + self._jwt_cookie_name, + value=token, + httponly=True, + # max_age=max_age or config.cookie_max_age, + # secure=config.cookie_secure, + # domain=config.cookie_domain, + # path=config.access_cookie_path, + # samesite=config.cookie_samesite + ) + + # logger.info("session", session) + + return response + + def get_jwt(self, data: Dict) -> Dict[str, Any]: + encoded_jwt = jwt.encode(data, self._jwt_secret, algorithm="HS256") + return encoded_jwt From 1e8c03918e8227ef2a7e5d764d7961dc2e75da31 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 22 Oct 2020 15:38:26 -0700 Subject: [PATCH 02/42] Refactor chart data API query logic, add JWT validation and async worker --- superset/charts/api.py | 64 ++++++++++++++++++---- superset/charts/commands/data.py | 76 ++++++++++++++++++++++++++ superset/charts/commands/exceptions.py | 8 +++ superset/config.py | 14 +++-- superset/tasks/async_queries.py | 66 ++++++++++++++++++++++ superset/utils/async_query_manager.py | 33 +++++++++-- 6 files changed, 239 insertions(+), 22 deletions(-) create mode 100644 superset/charts/commands/data.py create mode 100644 superset/tasks/async_queries.py diff --git a/superset/charts/api.py b/superset/charts/api.py index 3ef27d097ee63..41b37adfa296c 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -33,10 +33,13 @@ from superset import is_feature_enabled, thumbnail_cache from superset.charts.commands.bulk_delete import BulkDeleteChartCommand from superset.charts.commands.create import CreateChartCommand +from superset.charts.commands.data import ChartDataCommand from superset.charts.commands.delete import DeleteChartCommand from superset.charts.commands.exceptions import ( ChartBulkDeleteFailedError, ChartCreateFailedError, + ChartDataQueryFailedError, + ChartDataValidationError, ChartDeleteFailedError, ChartForbiddenError, ChartInvalidError, @@ -59,9 +62,10 @@ ) from superset.constants import RouteMethod from superset.exceptions import SupersetSecurityException -from superset.extensions import event_logger +from superset.extensions import async_query_manager, event_logger from superset.models.slice import Slice from superset.tasks.thumbnails import cache_chart_thumbnail +from superset.utils.async_query_manager import AsyncQueryTokenException from superset.utils.core import ChartDataResultFormat, json_int_dttm_ser from superset.utils.screenshots import ChartScreenshot from superset.utils.urls import get_url_path @@ -475,6 +479,26 @@ def data(self) -> Response: json_body = json.loads(request.form["form_data"]) else: return self.response_400(message="Request is not JSON") + + # try: + # query_context = ChartDataQueryContextSchema().load(json_body) + # except KeyError: + # return self.response_400(message="Request is incorrect") + # except ValidationError as error: + # return self.response_400( + # message=_("Request is incorrect: %(error)s", error=error.messages) + # ) + # logger.info('************* legacy query_context') + # logger.info(query_context.queries[0].__dict__); + + try: + command = ChartDataCommand(g.user, json_body) + except ChartDataValidationError as exc: + logger.error("********* failed validation") + return self.response_400(message=exc.message) + except SupersetSecurityException: + return self.response_401() + try: query_context = ChartDataQueryContextSchema().load(json_body) except KeyError: @@ -483,25 +507,39 @@ def data(self) -> Response: return self.response_400( message=_("Request is incorrect: %(error)s", error=error.messages) ) + # try: + # query_context.raise_for_access() + # except SupersetSecurityException: + # return self.response_401() + + if is_feature_enabled("GLOBAL_ASYNC_QUERIES"): + try: + jwt_data = async_query_manager.parse_jwt_from_request(request) + async_channel_id = jwt_data["async_channel_id"] + except AsyncQueryTokenException: + return self.response_401() + + result = command.run_async(async_channel_id) + return self.response(202) + # return self.response( + # 202, cache_key=cache_key, chart_url=chart_url, image_url=image_url + # ) + try: - query_context.raise_for_access() - except SupersetSecurityException: - return self.response_401() - payload = query_context.get_payload() - for query in payload: - if query.get("error"): - return self.response_400(message=f"Error: {query['error']}") - result_format = query_context.result_format + result = command.run() + except ChartDataQueryFailedError as exc: + return self.response_400(message=exc.message) + result_format = result["query_context"].result_format response = self.response_400( message=f"Unsupported result_format: {result_format}" ) if result_format == ChartDataResultFormat.CSV: # return the first result - result = payload[0]["data"] + data = result["payload"][0]["data"] response = CsvResponse( - result, + data, status=200, headers=generate_download_headers("csv"), mimetype="application/csv", @@ -509,7 +547,9 @@ def data(self) -> Response: if result_format == ChartDataResultFormat.JSON: response_data = simplejson.dumps( - {"result": payload}, default=json_int_dttm_ser, ignore_nan=True + {"result": result["payload"]}, + default=json_int_dttm_ser, + ignore_nan=True, ) resp = make_response(response_data, 200) resp.headers["Content-Type"] = "application/json; charset=utf-8" diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py new file mode 100644 index 0000000000000..5903fcbd7d782 --- /dev/null +++ b/superset/charts/commands/data.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import logging +from typing import Dict, List, Optional + +from flask_appbuilder.models.sqla import Model +from flask_appbuilder.security.sqla.models import User +from marshmallow import ValidationError + +from superset.charts.commands.exceptions import ( + ChartDataQueryFailedError, + ChartDataValidationError, +) +from superset.charts.dao import ChartDAO +from superset.charts.schemas import ChartDataQueryContextSchema +from superset.commands.base import BaseCommand +from superset.common.query_context import QueryContext +from superset.exceptions import SupersetSecurityException +from superset.models.dashboard import Dashboard +from superset.models.slice import Slice +from superset.tasks.async_queries import load_chart_data_into_cache +from superset.views.base import check_ownership + +logger = logging.getLogger(__name__) + + +class ChartDataCommand(BaseCommand): + def __init__(self, user: User, form_data: Dict): + self._actor = user + self._form_data = form_data + self._query_context: Optional[QueryContext] = None + # self._async_channel_id = async_channel_id + self.validate() + + def run(self): + logger.info("******** ChartDataCommand run_sync") + # logger.info('************ new query_context') + # logger.info(self._query_context.queries[0].__dict__); + + # caching is handled in query_context.get_df_payload (also evals `force` property) + payload = self._query_context.get_payload() + + for query in payload: + if query.get("error"): + raise ChartDataQueryFailedError(f"Error: {query['error']}") + + return {"query_context": self._query_context, "payload": payload} + + def run_async(self, async_channel_id: str): + # TODO: confirm cache backend is configured + job_info = load_chart_data_into_cache.delay(self._actor, self._form_data) + + def validate(self) -> None: + try: + self._query_context = ChartDataQueryContextSchema().load(self._form_data) + self._query_context.raise_for_access() + except KeyError: + raise ChartDataValidationError("Request is incorrect") + except ValidationError as error: + raise ChartDataValidationError( + "Request is incorrect: %(error)s", error=error.messages + ) diff --git a/superset/charts/commands/exceptions.py b/superset/charts/commands/exceptions.py index 2308d62a77216..967f1e32c21bb 100644 --- a/superset/charts/commands/exceptions.py +++ b/superset/charts/commands/exceptions.py @@ -83,3 +83,11 @@ class ChartForbiddenError(ForbiddenError): class ChartBulkDeleteFailedError(CreateFailedError): message = _("Charts could not be deleted.") + + +class ChartDataValidationError(CommandException): + pass + + +class ChartDataQueryFailedError(CommandException): + pass diff --git a/superset/config.py b/superset/config.py index 978fddae50baa..cee12b14e5e65 100644 --- a/superset/config.py +++ b/superset/config.py @@ -947,12 +947,18 @@ class CeleryConfig: # pylint: disable=too-few-public-methods # Global async query config options. # Requires GLOBAL_ASYNC_QUERIES feature flag to be enabled. -GLOBAL_ASYNC_QUERIES_REDIS_CONFIG = {} +GLOBAL_ASYNC_QUERIES_REDIS_CONFIG = { + "port": 6379, + "host": "127.0.0.1", + "family": 4, + "password": "", + "db": 0, +} +GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX = "async-events-" GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME = "async-token" -GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE = ( - False # this should likely be True in production -) +GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE = False GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me" +GLOBAL_ASYNC_QUERIES_TRANSPORT = "ws" if CONFIG_PATH_ENV_VAR in os.environ: # Explicitly import config module that is not necessarily in pythonpath; useful diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py new file mode 100644 index 0000000000000..a1352337f54ee --- /dev/null +++ b/superset/tasks/async_queries.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Utility functions used across Superset""" + +import logging +from typing import Dict, Optional + +from flask import current_app +from flask_appbuilder.security.sqla.models import User + +from superset import app, security_manager, thumbnail_cache +from superset.extensions import celery_app + +logger = logging.getLogger(__name__) +query_timeout = current_app.config[ + "SQLLAB_ASYNC_TIME_LIMIT_SEC" +] # TODO: new config key + + +@celery_app.task(name="load_chart_data_into_cache", soft_time_limit=query_timeout) +def load_chart_data_into_cache(user: User, form_data: Dict,) -> None: + from superset.charts.commands.data import ( + ChartDataCommand, + ) # load here to prevent circular imports + + with app.app_context(): # type: ignore + try: + command = ChartDataCommand(user, form_data) + except ChartDataValidationError as exc: + # TODO: update job status + raise + except SupersetSecurityException: + # TODO: update job status + raise + + command.run() + + # if not thumbnail_cache: + # logger.warning("No cache set, refusing to compute") + # return None + # logger.info("Caching chart: %s", url) + # screenshot = ChartScreenshot(url, digest) + # user = security_manager.find_user(current_app.config["THUMBNAIL_SELENIUM_USER"]) + # screenshot.compute_and_cache( + # user=user, + # cache=thumbnail_cache, + # force=force, + # window_size=window_size, + # thumb_size=thumb_size, + # ) + return None diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index d9eb70f7e9a9d..73ed1919ba12f 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -24,18 +24,24 @@ logger = logging.getLogger(__name__) +class AsyncQueryTokenException(Exception): + pass + + class AsyncQueryManager: def __init__(self) -> None: super().__init__() self._jwt_cookie_name = None + self._jwt_cookie_secure = None self._jwt_secret = None def init_app(self, app: Flask) -> None: self._jwt_cookie_name = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"] + self._jwt_cookie_secure = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE"] self._jwt_secret = app.config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"] @app.after_request - def set_async_jwt_cookie(response: Response) -> Response: + def validate_session(response: Response) -> Response: reset_token = False user_id = None @@ -53,23 +59,38 @@ def set_async_jwt_cookie(response: Response) -> Response: session["async_channel_id"] = async_channel_id session["async_user_id"] = user_id - token = self.get_jwt({"channel": async_channel_id, "user_id": user_id}) + token = self.generate_jwt( + {"channel": async_channel_id, "user_id": user_id} + ) response.set_cookie( self._jwt_cookie_name, value=token, httponly=True, + secure=self._jwt_cookie_secure, # max_age=max_age or config.cookie_max_age, - # secure=config.cookie_secure, # domain=config.cookie_domain, # path=config.access_cookie_path, # samesite=config.cookie_samesite ) - # logger.info("session", session) - return response - def get_jwt(self, data: Dict) -> Dict[str, Any]: + def generate_jwt(self, data: Dict) -> Dict[str, Any]: encoded_jwt = jwt.encode(data, self._jwt_secret, algorithm="HS256") return encoded_jwt + + def parse_jwt(self, token: str) -> Dict[str, Any]: + data = jwt.decode(token, self._jwt_secret, algorithms=["HS256"]) + return data + + def parse_jwt_from_request(self, request: Dict) -> Dict[str, Any]: + token = request.cookies.get(self._jwt_cookie_name) + if not token: + raise AsyncQueryTokenException("Token not preset") + + try: + return self.parse_jwt(token) + except Exception as exc: + logger.warning(exc) + raise AsyncQueryTokenException("Failed to parse token") From a50750b6de3da5e1101a2a96587b5662985a3065 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 27 Oct 2020 10:01:04 -0700 Subject: [PATCH 03/42] Add redis stream implementation, refactoring --- setup.cfg | 2 +- superset/charts/api.py | 38 +++-------------- superset/charts/commands/data.py | 36 ++++++++-------- superset/config.py | 3 +- superset/tasks/async_queries.py | 51 ++++++++++------------- superset/utils/async_query_manager.py | 60 +++++++++++++++++++++++++-- 6 files changed, 102 insertions(+), 88 deletions(-) diff --git a/setup.cfg b/setup.cfg index c7af1c3d2aff2..07a0bcb131bef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytest,pytz,redis,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/superset/charts/api.py b/superset/charts/api.py index 41b37adfa296c..583df6547afcf 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -480,50 +480,22 @@ def data(self) -> Response: else: return self.response_400(message="Request is not JSON") - # try: - # query_context = ChartDataQueryContextSchema().load(json_body) - # except KeyError: - # return self.response_400(message="Request is incorrect") - # except ValidationError as error: - # return self.response_400( - # message=_("Request is incorrect: %(error)s", error=error.messages) - # ) - # logger.info('************* legacy query_context') - # logger.info(query_context.queries[0].__dict__); - try: - command = ChartDataCommand(g.user, json_body) + command = ChartDataCommand(json_body) + command.validate() except ChartDataValidationError as exc: - logger.error("********* failed validation") return self.response_400(message=exc.message) except SupersetSecurityException: return self.response_401() - try: - query_context = ChartDataQueryContextSchema().load(json_body) - except KeyError: - return self.response_400(message="Request is incorrect") - except ValidationError as error: - return self.response_400( - message=_("Request is incorrect: %(error)s", error=error.messages) - ) - # try: - # query_context.raise_for_access() - # except SupersetSecurityException: - # return self.response_401() - if is_feature_enabled("GLOBAL_ASYNC_QUERIES"): try: - jwt_data = async_query_manager.parse_jwt_from_request(request) - async_channel_id = jwt_data["async_channel_id"] + command.validate_request(request) except AsyncQueryTokenException: return self.response_401() - result = command.run_async(async_channel_id) - return self.response(202) - # return self.response( - # 202, cache_key=cache_key, chart_url=chart_url, image_url=image_url - # ) + result = command.run_async() + return self.response(202, **result) try: result = command.run() diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 5903fcbd7d782..db1ebecea4096 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -17,40 +17,28 @@ import logging from typing import Dict, List, Optional -from flask_appbuilder.models.sqla import Model -from flask_appbuilder.security.sqla.models import User from marshmallow import ValidationError from superset.charts.commands.exceptions import ( ChartDataQueryFailedError, ChartDataValidationError, ) -from superset.charts.dao import ChartDAO from superset.charts.schemas import ChartDataQueryContextSchema from superset.commands.base import BaseCommand from superset.common.query_context import QueryContext -from superset.exceptions import SupersetSecurityException -from superset.models.dashboard import Dashboard -from superset.models.slice import Slice +from superset.extensions import async_query_manager from superset.tasks.async_queries import load_chart_data_into_cache -from superset.views.base import check_ownership logger = logging.getLogger(__name__) class ChartDataCommand(BaseCommand): - def __init__(self, user: User, form_data: Dict): - self._actor = user + def __init__(self, form_data: Dict): self._form_data = form_data self._query_context: Optional[QueryContext] = None - # self._async_channel_id = async_channel_id - self.validate() + self._async_channel_id = None def run(self): - logger.info("******** ChartDataCommand run_sync") - # logger.info('************ new query_context') - # logger.info(self._query_context.queries[0].__dict__); - # caching is handled in query_context.get_df_payload (also evals `force` property) payload = self._query_context.get_payload() @@ -60,17 +48,27 @@ def run(self): return {"query_context": self._query_context, "payload": payload} - def run_async(self, async_channel_id: str): + def run_async(self): # TODO: confirm cache backend is configured - job_info = load_chart_data_into_cache.delay(self._actor, self._form_data) + job_metadata = async_query_manager.init_job(self._async_channel_id) + load_chart_data_into_cache.delay(job_metadata, self._form_data) - def validate(self) -> None: + return job_metadata + + def set_query_context(self) -> None: try: self._query_context = ChartDataQueryContextSchema().load(self._form_data) - self._query_context.raise_for_access() except KeyError: raise ChartDataValidationError("Request is incorrect") except ValidationError as error: raise ChartDataValidationError( "Request is incorrect: %(error)s", error=error.messages ) + + def validate(self) -> None: + self.set_query_context() + self._query_context.raise_for_access() + + def validate_request(self, request: Dict): + jwt_data = async_query_manager.parse_jwt_from_request(request) + self._async_channel_id = jwt_data["channel"] diff --git a/superset/config.py b/superset/config.py index cee12b14e5e65..5b51704a38681 100644 --- a/superset/config.py +++ b/superset/config.py @@ -950,11 +950,12 @@ class CeleryConfig: # pylint: disable=too-few-public-methods GLOBAL_ASYNC_QUERIES_REDIS_CONFIG = { "port": 6379, "host": "127.0.0.1", - "family": 4, "password": "", "db": 0, } GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX = "async-events-" +GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT = 1000 +GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT_FIREHOSE = 1000000 GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME = "async-token" GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE = False GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me" diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index a1352337f54ee..b326b6610f234 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -15,16 +15,17 @@ # specific language governing permissions and limitations # under the License. -"""Utility functions used across Superset""" - import logging -from typing import Dict, Optional +from typing import Dict from flask import current_app -from flask_appbuilder.security.sqla.models import User -from superset import app, security_manager, thumbnail_cache -from superset.extensions import celery_app +from superset import app +from superset.charts.commands.exceptions import ( + ChartDataQueryFailedError, + ChartDataValidationError, +) +from superset.extensions import async_query_manager, celery_app logger = logging.getLogger(__name__) query_timeout = current_app.config[ @@ -33,34 +34,24 @@ @celery_app.task(name="load_chart_data_into_cache", soft_time_limit=query_timeout) -def load_chart_data_into_cache(user: User, form_data: Dict,) -> None: +def load_chart_data_into_cache(job_metadata: Dict, form_data: Dict,) -> None: from superset.charts.commands.data import ( ChartDataCommand, - ) # load here to prevent circular imports + ) # load here due to circular imports with app.app_context(): # type: ignore try: - command = ChartDataCommand(user, form_data) - except ChartDataValidationError as exc: - # TODO: update job status - raise - except SupersetSecurityException: - # TODO: update job status - raise - - command.run() + command = ChartDataCommand(form_data) + command.set_query_context() + command.run() + async_query_manager.update_job( + job_metadata, async_query_manager.STATUS_DONE + ) + except Exception as exc: + msg = exc.message if hasattr(exc, "message") else str(exc) + async_query_manager.update_job( + job_metadata, async_query_manager.STATUS_ERROR, msg + ) + raise exc - # if not thumbnail_cache: - # logger.warning("No cache set, refusing to compute") - # return None - # logger.info("Caching chart: %s", url) - # screenshot = ChartScreenshot(url, digest) - # user = security_manager.find_user(current_app.config["THUMBNAIL_SELENIUM_USER"]) - # screenshot.compute_and_cache( - # user=user, - # cache=thumbnail_cache, - # force=force, - # window_size=window_size, - # thumb_size=thumb_size, - # ) return None diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index 73ed1919ba12f..08c98dfe52701 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -14,11 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import json import logging import uuid from typing import Any, Dict import jwt +import redis from flask import Flask, Response, session logger = logging.getLogger(__name__) @@ -28,14 +30,33 @@ class AsyncQueryTokenException(Exception): pass +class AsyncQueryJobException(Exception): + pass + + class AsyncQueryManager: + STATUS_PENDING = "pending" + STATUS_RUNNING = "running" + STATUS_ERROR = "error" + STATUS_DONE = "done" + def __init__(self) -> None: super().__init__() + self._redis = None + self._stream_prefix = None + self._stream_limit = None + self._stream_limit_firehose = None self._jwt_cookie_name = None self._jwt_cookie_secure = None self._jwt_secret = None def init_app(self, app: Flask) -> None: + self._redis = redis.Redis(**app.config["GLOBAL_ASYNC_QUERIES_REDIS_CONFIG"]) + self._stream_prefix = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] + self._stream_limit = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT"] + self._stream_limit_firehose = app.config[ + "GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT_FIREHOSE" + ] self._jwt_cookie_name = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"] self._jwt_cookie_secure = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE"] self._jwt_secret = app.config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"] @@ -43,10 +64,7 @@ def init_app(self, app: Flask) -> None: @app.after_request def validate_session(response: Response) -> Response: reset_token = False - user_id = None - - if "user_id" in session: - user_id = session["user_id"] + user_id = session["user_id"] if "user_id" in session else None if "async_channel_id" not in session or "async_user_id" not in session: reset_token = True @@ -94,3 +112,37 @@ def parse_jwt_from_request(self, request: Dict) -> Dict[str, Any]: except Exception as exc: logger.warning(exc) raise AsyncQueryTokenException("Failed to parse token") + + def init_job(self, channel_id: str): + job_id = str(uuid.uuid4()) + return self._build_job_metadata(channel_id, job_id, status=self.STATUS_PENDING) + + def _build_job_metadata(self, channel_id: str, job_id: str, **kwargs): + return { + "channel_id": channel_id, + "job_id": job_id, + "user_id": session["user_id"] if "user_id" in session else None, + "status": kwargs["status"], + "msg": kwargs["msg"] if "msg" in kwargs else None, + } + + def update_job(self, job_metadata: Dict, status: str, msg: str = None): + if "channel_id" not in job_metadata: + raise AsyncQueryJobException("No channel ID specified") + + if "job_id" not in job_metadata: + raise AsyncQueryJobException("No job ID specified") + + updates = {"status": status, "msg": msg} + event_data = {"data": json.dumps({**job_metadata, **updates})} + + logger.info( + f"********** logging event data to stream {self._stream_prefix}{job_metadata['channel_id']}" + ) + logger.info(event_data) + + full_stream_name = f"{self._stream_prefix}full" + scoped_stream_name = f"{self._stream_prefix}{job_metadata['channel_id']}" + + self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) + self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) From 64fbfae03952878e74b638ab0228866406447311 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 29 Oct 2020 17:49:08 -0700 Subject: [PATCH 04/42] Add chart data cache endpoint, refactor QueryContext caching --- superset/charts/api.py | 98 ++++++++++++++++++++++++-- superset/charts/commands/data.py | 46 +++++++++--- superset/charts/commands/exceptions.py | 4 ++ superset/common/query_context.py | 71 ++++++++++++++----- superset/exceptions.py | 4 ++ superset/tasks/async_queries.py | 12 ++-- superset/utils/async_query_manager.py | 5 +- superset/utils/cache.py | 39 +++++++++- superset/views/api.py | 3 +- superset/views/base_api.py | 1 + tests/query_context_tests.py | 10 +-- 11 files changed, 248 insertions(+), 45 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index 583df6547afcf..c61ba059a9fa5 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -38,6 +38,7 @@ from superset.charts.commands.exceptions import ( ChartBulkDeleteFailedError, ChartCreateFailedError, + ChartDataCacheLoadError, ChartDataQueryFailedError, ChartDataValidationError, ChartDeleteFailedError, @@ -51,7 +52,6 @@ from superset.charts.filters import ChartAllTextFilter, ChartFavoriteFilter, ChartFilter from superset.charts.schemas import ( CHART_SCHEMAS, - ChartDataQueryContextSchema, ChartPostSchema, ChartPutSchema, get_delete_ids_schema, @@ -91,6 +91,7 @@ class ChartRestApi(BaseSupersetModelRestApi): RouteMethod.RELATED, "bulk_delete", # not using RouteMethod since locally defined "data", + "data_from_cache", "viz_types", } class_permission_name = "SliceModelView" @@ -467,8 +468,12 @@ def data(self) -> Response: application/json: schema: $ref: "#/components/schemas/ChartDataResponseSchema" + 202: + $ref: '#/components/responses/202' 400: $ref: '#/components/responses/400' + 401: + $ref: '#/components/responses/401' 500: $ref: '#/components/responses/500' """ @@ -481,8 +486,8 @@ def data(self) -> Response: return self.response_400(message="Request is not JSON") try: - command = ChartDataCommand(json_body) - command.validate() + command = ChartDataCommand() + command.validate(json_body) except ChartDataValidationError as exc: return self.response_400(message=exc.message) except SupersetSecurityException: @@ -497,8 +502,91 @@ def data(self) -> Response: result = command.run_async() return self.response(202, **result) + # TODO: DRY + try: + result = command.run() + except ChartDataQueryFailedError as exc: + return self.response_400(message=exc.message) + + result_format = result["query_context"].result_format + response = self.response_400( + message=f"Unsupported result_format: {result_format}" + ) + + if result_format == ChartDataResultFormat.CSV: + # return the first result + data = result["queries"][0]["data"] + response = CsvResponse( + data, + status=200, + headers=generate_download_headers("csv"), + mimetype="application/csv", + ) + + if result_format == ChartDataResultFormat.JSON: + response_data = simplejson.dumps( + {"result": result["queries"]}, + default=json_int_dttm_ser, + ignore_nan=True, + ) + resp = make_response(response_data, 200) + resp.headers["Content-Type"] = "application/json; charset=utf-8" + response = resp + + return response + + @expose("/data/", methods=["GET"]) + @event_logger.log_this + @protect() + @safe + @statsd_metrics + def data_from_cache(self, cache_key: str) -> Response: + """ + Takes a query context cache key returns payload + data response for the given query. + --- + get: + description: >- + Takes a query context constructed in the client and returns payload data + response for the given query. + parameters: + - in: path + schema: + type: string + name: cache_key + responses: + 200: + description: Query result + content: + application/json: + schema: + $ref: "#/components/schemas/ChartDataResponseSchema" + 400: + $ref: '#/components/responses/400' + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + 500: + $ref: '#/components/responses/500' + """ + command = ChartDataCommand() + try: + cached_data = command.load_query_context_from_cache(cache_key) + command.validate(cached_data) + except ChartDataCacheLoadError: + return self.response_404() + except ChartDataValidationError as exc: + return self.response_400(message=exc.message) + except SupersetSecurityException as exc: + logger.info(exc) + return self.response_401() + + # TODO: DRY try: result = command.run() + except ChartDataCacheLoadError as exc: + return self.response_400(message=exc.message) except ChartDataQueryFailedError as exc: return self.response_400(message=exc.message) @@ -509,7 +597,7 @@ def data(self) -> Response: if result_format == ChartDataResultFormat.CSV: # return the first result - data = result["payload"][0]["data"] + data = result["queries"][0]["data"] response = CsvResponse( data, status=200, @@ -519,7 +607,7 @@ def data(self) -> Response: if result_format == ChartDataResultFormat.JSON: response_data = simplejson.dumps( - {"result": result["payload"]}, + {"result": result["queries"]}, default=json_int_dttm_ser, ignore_nan=True, ) diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index db1ebecea4096..8374b924a8b51 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -15,17 +15,20 @@ # specific language governing permissions and limitations # under the License. import logging -from typing import Dict, List, Optional +from typing import Any, Dict, Optional from marshmallow import ValidationError +from superset import cache from superset.charts.commands.exceptions import ( + ChartDataCacheLoadError, ChartDataQueryFailedError, ChartDataValidationError, ) from superset.charts.schemas import ChartDataQueryContextSchema from superset.commands.base import BaseCommand from superset.common.query_context import QueryContext +from superset.exceptions import CacheLoadError from superset.extensions import async_query_manager from superset.tasks.async_queries import load_chart_data_into_cache @@ -33,20 +36,34 @@ class ChartDataCommand(BaseCommand): - def __init__(self, form_data: Dict): - self._form_data = form_data + def __init__(self): + self._form_data = None self._query_context: Optional[QueryContext] = None self._async_channel_id = None - def run(self): + def run(self, **kwargs): # caching is handled in query_context.get_df_payload (also evals `force` property) - payload = self._query_context.get_payload() + cache_query_context = kwargs["cache"] if "cache" in kwargs else False + force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + try: + payload = self._query_context.get_payload( + cache_query_context=cache_query_context, force_cached=force_cached + ) + except CacheLoadError: + raise ChartDataCacheLoadError() - for query in payload: + for query in payload["queries"]: if query.get("error"): raise ChartDataQueryFailedError(f"Error: {query['error']}") - return {"query_context": self._query_context, "payload": payload} + return_value = { + "query_context": self._query_context, + "queries": payload["queries"], + } + if cache_query_context: + return_value.update(cache_key=payload["cache_key"]) + + return return_value def run_async(self): # TODO: confirm cache backend is configured @@ -55,7 +72,8 @@ def run_async(self): return job_metadata - def set_query_context(self) -> None: + def set_query_context(self, form_data: Dict) -> None: + self._form_data = form_data try: self._query_context = ChartDataQueryContextSchema().load(self._form_data) except KeyError: @@ -65,10 +83,18 @@ def set_query_context(self) -> None: "Request is incorrect: %(error)s", error=error.messages ) - def validate(self) -> None: - self.set_query_context() + def validate(self, form_data: Dict) -> None: + self.set_query_context(form_data) self._query_context.raise_for_access() def validate_request(self, request: Dict): jwt_data = async_query_manager.parse_jwt_from_request(request) self._async_channel_id = jwt_data["channel"] + + def load_query_context_from_cache(self, cache_key: str) -> Dict[str, Any]: + if cache_key and cache: + cache_value = cache.get(cache_key) + if cache_value: + return cache_value["data"] + else: + raise ChartDataCacheLoadError("Cached data not found") diff --git a/superset/charts/commands/exceptions.py b/superset/charts/commands/exceptions.py index 967f1e32c21bb..5b9c1847b8a5f 100644 --- a/superset/charts/commands/exceptions.py +++ b/superset/charts/commands/exceptions.py @@ -91,3 +91,7 @@ class ChartDataValidationError(CommandException): class ChartDataQueryFailedError(CommandException): pass + + +class ChartDataCacheLoadError(CommandException): + pass diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 61f038f53e8a7..04045592bebf1 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import copy +import hashlib import logging import math from datetime import datetime, timedelta @@ -28,11 +29,11 @@ from superset.common.query_object import QueryObject from superset.connectors.base.models import BaseDatasource from superset.connectors.connector_registry import ConnectorRegistry -from superset.exceptions import QueryObjectValidationError +from superset.exceptions import CacheLoadError, QueryObjectValidationError from superset.stats_logger import BaseStatsLogger from superset.utils import core as utils +from superset.utils.cache import generate_cache_key, set_and_log_cache from superset.utils.core import DTTM_ALIAS -from superset.viz import set_and_log_cache config = app.config stats_logger: BaseStatsLogger = config["STATS_LOGGER"] @@ -74,6 +75,13 @@ def __init__( # pylint: disable=too-many-arguments self.custom_cache_timeout = custom_cache_timeout self.result_type = result_type or utils.ChartDataResultType.FULL self.result_format = result_format or utils.ChartDataResultFormat.JSON + self.cache_values = { + "datasource": datasource, + "queries": queries, + "force": force, + "result_type": result_type, + "result_format": result_format, + } def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]: """Returns a pandas dataframe based on the query object""" @@ -138,8 +146,11 @@ def get_data(self, df: pd.DataFrame,) -> Union[str, List[Dict[str, Any]]]: return df.to_dict(orient="records") - def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]: + def get_single_payload( + self, query_obj: QueryObject, **kwargs: Any + ) -> Dict[str, Any]: """Returns a payload of metadata and data""" + force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False if self.result_type == utils.ChartDataResultType.QUERY: return { "query": self.datasource.get_query_str(query_obj.to_dict()), @@ -154,7 +165,7 @@ def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]: query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"]) query_obj.row_offset = 0 query_obj.columns = [o.column_name for o in self.datasource.columns] - payload = self.get_df_payload(query_obj) + payload = self.get_df_payload(query_obj, force_cached=force_cached) # TODO: implement payload["annotation_data"] = [] df = payload["df"] @@ -182,9 +193,27 @@ def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]: return {"data": payload["data"]} return payload - def get_payload(self) -> List[Dict[str, Any]]: + def get_payload(self, **kwargs: Any) -> Dict[str, Any]: + cache_query_context = ( + kwargs["cache_query_context"] if "cache_query_context" in kwargs else False + ) + force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + """Get all the payloads from the QueryObjects""" - return [self.get_single_payload(query_object) for query_object in self.queries] + query_results = [ + self.get_single_payload(query_object, force_cached=force_cached) + for query_object in self.queries + ] + return_value = {"queries": query_results} + + if cache_query_context: + cache_key = self.cache_key() + set_and_log_cache( + cache, cache_key, self.cache_timeout, {"data": self.cache_values} + ) + return_value.update(cache_key=cache_key) + + return return_value @property def cache_timeout(self) -> int: @@ -199,7 +228,18 @@ def cache_timeout(self) -> int: return self.datasource.database.cache_timeout return config["CACHE_DEFAULT_TIMEOUT"] - def cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: + def cache_key(self, **extra: Any) -> str: + """ + The cache key is made out of the key/values from self.cached_values, plus any + other key/values in `extra` + """ + key_prefix = "qc-" + cache_dict = self.cache_values.copy() + cache_dict.update(extra) + + return generate_cache_key(cache_dict, key_prefix) + + def query_cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = ( @@ -211,7 +251,7 @@ def cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: and self.datasource.is_rls_supported else [], changed_on=self.datasource.changed_on, - **kwargs + **kwargs, ) if query_obj else None @@ -222,12 +262,12 @@ def get_df_payload( # pylint: disable=too-many-statements self, query_obj: QueryObject, **kwargs: Any ) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" - cache_key = self.cache_key(query_obj, **kwargs) + force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + cache_key = self.query_cache_key(query_obj) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() - cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" @@ -249,6 +289,10 @@ def get_df_payload( # pylint: disable=too-many-statements ) logger.info("Serving from cache") + if force_cached and not is_loaded: + logger.warning(f"force_cached: value not found for key {cache_key}") + raise CacheLoadError() + if query_obj and not is_loaded: try: invalid_columns = [ @@ -287,12 +331,7 @@ def get_df_payload( # pylint: disable=too-many-statements if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: set_and_log_cache( - cache_key, - df, - query, - cached_dttm, - self.cache_timeout, - self.datasource.uid, + cache, cache_key, self.cache_timeout, {"df": df, "query": query} ) return { "cache_key": cache_key, diff --git a/superset/exceptions.py b/superset/exceptions.py index c0d55f8924426..839a90f16a98b 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -91,6 +91,10 @@ class QueryObjectValidationError(SupersetException): status = 400 +class CacheLoadError(SupersetException): + status = 404 + + class DashboardImportException(SupersetException): pass diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index b326b6610f234..2637efdd866a1 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -41,16 +41,18 @@ def load_chart_data_into_cache(job_metadata: Dict, form_data: Dict,) -> None: with app.app_context(): # type: ignore try: - command = ChartDataCommand(form_data) - command.set_query_context() - command.run() + command = ChartDataCommand() + command.set_query_context(form_data) + result = command.run(cache=True) async_query_manager.update_job( - job_metadata, async_query_manager.STATUS_DONE + job_metadata, + async_query_manager.STATUS_DONE, + cache_key=result["cache_key"], ) except Exception as exc: msg = exc.message if hasattr(exc, "message") else str(exc) async_query_manager.update_job( - job_metadata, async_query_manager.STATUS_ERROR, msg + job_metadata, async_query_manager.STATUS_ERROR, msg=msg ) raise exc diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index 08c98dfe52701..d6c7fed127bd2 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -124,16 +124,17 @@ def _build_job_metadata(self, channel_id: str, job_id: str, **kwargs): "user_id": session["user_id"] if "user_id" in session else None, "status": kwargs["status"], "msg": kwargs["msg"] if "msg" in kwargs else None, + "cache_key": kwargs["cache_key"] if "cache_key" in kwargs else None, } - def update_job(self, job_metadata: Dict, status: str, msg: str = None): + def update_job(self, job_metadata: Dict, status: str, **kwargs: Any): if "channel_id" not in job_metadata: raise AsyncQueryJobException("No channel ID specified") if "job_id" not in job_metadata: raise AsyncQueryJobException("No job ID specified") - updates = {"status": status, "msg": msg} + updates = {"status": status, **kwargs} event_data = {"data": json.dumps({**job_metadata, **updates})} logger.info( diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 1e51909bfa434..387fd32fa754c 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -14,11 +14,48 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Callable, Optional +import hashlib +import json +import logging +from datetime import datetime +from typing import Any, Callable, Dict, Optional from flask import request +from flask_caching import Cache +from superset import app, utils from superset.extensions import cache_manager +from superset.stats_logger import BaseStatsLogger +from superset.utils.core import json_int_dttm_ser + +config = app.config +stats_logger: BaseStatsLogger = config["STATS_LOGGER"] +logger = logging.getLogger(__name__) + +# TODO: DRY up cache key code +def json_dumps(obj: Any, sort_keys: bool = False) -> str: + return json.dumps(obj, default=json_int_dttm_ser, sort_keys=sort_keys) + + +def generate_cache_key(values_dict: Dict[str, Any], key_prefix: str = "") -> str: + json_data = json_dumps(values_dict, sort_keys=True) + hash = hashlib.md5(json_data.encode("utf-8")).hexdigest() + return f"{key_prefix}{hash}" + + +def set_and_log_cache( + cache_instance: Cache, cache_key: str, cache_timeout: int, cache_value: Dict, +) -> None: + try: + dttm = datetime.utcnow().isoformat().split(".")[0] + value = {**cache_value, "dttm": dttm} + cache_instance.set(cache_key, value, timeout=cache_timeout) + stats_logger.incr("set_cache_key") + except Exception as ex: + # cache.set call can fail if the backend is down or if + # the key is too large or whatever other reasons + logger.warning("Could not cache key {}".format(cache_key)) + logger.exception(ex) def view_cache_key(*args: Any, **kwargs: Any) -> str: # pylint: disable=unused-argument diff --git a/superset/views/api.py b/superset/views/api.py index a5090b31c9fcd..1b19455126aba 100644 --- a/superset/views/api.py +++ b/superset/views/api.py @@ -44,7 +44,8 @@ def query(self) -> FlaskResponse: """ query_context = QueryContext(**json.loads(request.form["query_context"])) query_context.raise_for_access() - payload_json = query_context.get_payload() + result = query_context.get_payload() + payload_json = result["queries"] return json.dumps( payload_json, default=utils.json_int_dttm_ser, ignore_nan=True ) diff --git a/superset/views/base_api.py b/superset/views/base_api.py index d507d4ace8c90..6335705d0c327 100644 --- a/superset/views/base_api.py +++ b/superset/views/base_api.py @@ -125,6 +125,7 @@ class BaseSupersetModelRestApi(ModelRestApi): method_permission_name = { "bulk_delete": "delete", "data": "list", + "data_from_cache": "list", "delete": "delete", "distinct": "list", "export": "mulexport", diff --git a/tests/query_context_tests.py b/tests/query_context_tests.py index 5bad2d1efd10f..8051f4134c875 100644 --- a/tests/query_context_tests.py +++ b/tests/query_context_tests.py @@ -79,7 +79,7 @@ def test_cache_key_changes_when_datasource_is_updated(self): # construct baseline cache_key query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] - cache_key_original = query_context.cache_key(query_object) + cache_key_original = query_context.query_cache_key(query_object) # make temporary change and revert it to refresh the changed_on property datasource = ConnectorRegistry.get_datasource( @@ -96,7 +96,7 @@ def test_cache_key_changes_when_datasource_is_updated(self): # create new QueryContext with unchanged attributes and extract new cache_key query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] - cache_key_new = query_context.cache_key(query_object) + cache_key_new = query_context.query_cache_key(query_object) # the new cache_key should be different due to updated datasource self.assertNotEqual(cache_key_original, cache_key_new) @@ -112,20 +112,20 @@ def test_cache_key_changes_when_post_processing_is_updated(self): # construct baseline cache_key from query_context with post processing operation query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] - cache_key_original = query_context.cache_key(query_object) + cache_key_original = query_context.query_cache_key(query_object) # ensure added None post_processing operation doesn't change cache_key payload["queries"][0]["post_processing"].append(None) query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] - cache_key_with_null = query_context.cache_key(query_object) + cache_key_with_null = query_context.query_cache_key(query_object) self.assertEqual(cache_key_original, cache_key_with_null) # ensure query without post processing operation is different payload["queries"][0].pop("post_processing") query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] - cache_key_without_post_processing = query_context.cache_key(query_object) + cache_key_without_post_processing = query_context.query_cache_key(query_object) self.assertNotEqual(cache_key_original, cache_key_without_post_processing) def test_query_context_time_range_endpoints(self): From e377b31b562a07f29a7ef90d378347e74ca76c83 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 10 Nov 2020 14:06:51 -0800 Subject: [PATCH 05/42] Typing, linting, refactoring --- superset/charts/api.py | 19 +++++--- superset/charts/commands/data.py | 38 +++++++--------- superset/charts/commands/exceptions.py | 4 -- superset/common/query_context.py | 2 +- superset/common/query_object.py | 2 +- superset/tasks/async_queries.py | 12 +++-- superset/utils/async_query_manager.py | 59 ++++++++++++++----------- superset/utils/cache.py | 5 ++- superset/utils/pandas_postprocessing.py | 12 ++--- 9 files changed, 80 insertions(+), 73 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index b7f577d18260e..ad207709004e9 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -40,7 +40,6 @@ ChartCreateFailedError, ChartDataCacheLoadError, ChartDataQueryFailedError, - ChartDataValidationError, ChartDeleteFailedError, ChartForbiddenError, ChartInvalidError, @@ -493,9 +492,12 @@ def data(self) -> Response: try: command = ChartDataCommand() - command.validate(json_body) - except ChartDataValidationError as exc: - return self.response_400(message=exc.message) + command.set_query_context(json_body) + command.validate() + except ValidationError as error: + return self.response_400( + message=_("Request is incorrect: %(error)s", error=error.messages) + ) except SupersetSecurityException: return self.response_401() @@ -579,11 +581,14 @@ def data_from_cache(self, cache_key: str) -> Response: command = ChartDataCommand() try: cached_data = command.load_query_context_from_cache(cache_key) - command.validate(cached_data) + command.set_query_context(cached_data) + command.validate() except ChartDataCacheLoadError: return self.response_404() - except ChartDataValidationError as exc: - return self.response_400(message=exc.message) + except ValidationError as error: + return self.response_400( + message=_("Request is incorrect: %(error)s", error=error.messages) + ) except SupersetSecurityException as exc: logger.info(exc) return self.response_401() diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 8374b924a8b51..27540c1d37ec1 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -17,13 +17,13 @@ import logging from typing import Any, Dict, Optional +from flask import Request from marshmallow import ValidationError from superset import cache from superset.charts.commands.exceptions import ( ChartDataCacheLoadError, ChartDataQueryFailedError, - ChartDataValidationError, ) from superset.charts.schemas import ChartDataQueryContextSchema from superset.commands.base import BaseCommand @@ -36,12 +36,12 @@ class ChartDataCommand(BaseCommand): - def __init__(self): - self._form_data = None - self._query_context: Optional[QueryContext] = None - self._async_channel_id = None + def __init__(self) -> None: + self._form_data: Dict[str, Any] + self._query_context: QueryContext + self._async_channel_id: str - def run(self, **kwargs): + def run(self, **kwargs: Any) -> Dict[str, Any]: # caching is handled in query_context.get_df_payload (also evals `force` property) cache_query_context = kwargs["cache"] if "cache" in kwargs else False force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False @@ -65,36 +65,32 @@ def run(self, **kwargs): return return_value - def run_async(self): + def run_async(self) -> Dict[str, Any]: # TODO: confirm cache backend is configured job_metadata = async_query_manager.init_job(self._async_channel_id) load_chart_data_into_cache.delay(job_metadata, self._form_data) return job_metadata - def set_query_context(self, form_data: Dict) -> None: + def set_query_context(self, form_data: Dict[str, Any]) -> None: self._form_data = form_data try: self._query_context = ChartDataQueryContextSchema().load(self._form_data) except KeyError: - raise ChartDataValidationError("Request is incorrect") + raise ValidationError("Request is incorrect") except ValidationError as error: - raise ChartDataValidationError( - "Request is incorrect: %(error)s", error=error.messages - ) + raise error - def validate(self, form_data: Dict) -> None: - self.set_query_context(form_data) + def validate(self) -> None: self._query_context.raise_for_access() - def validate_request(self, request: Dict): + def validate_request(self, request: Request) -> None: jwt_data = async_query_manager.parse_jwt_from_request(request) self._async_channel_id = jwt_data["channel"] def load_query_context_from_cache(self, cache_key: str) -> Dict[str, Any]: - if cache_key and cache: - cache_value = cache.get(cache_key) - if cache_value: - return cache_value["data"] - else: - raise ChartDataCacheLoadError("Cached data not found") + cache_value = cache.get(cache_key) + if not cache_value: + raise ChartDataCacheLoadError("Cached data not found") + + return cache_value["data"] diff --git a/superset/charts/commands/exceptions.py b/superset/charts/commands/exceptions.py index 5b9c1847b8a5f..16fb7757b0624 100644 --- a/superset/charts/commands/exceptions.py +++ b/superset/charts/commands/exceptions.py @@ -85,10 +85,6 @@ class ChartBulkDeleteFailedError(CreateFailedError): message = _("Charts could not be deleted.") -class ChartDataValidationError(CommandException): - pass - - class ChartDataQueryFailedError(CommandException): pass diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 6946142e1c7a3..9ab061aed7ace 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -211,7 +211,7 @@ def get_payload(self, **kwargs: Any) -> Dict[str, Any]: set_and_log_cache( cache, cache_key, self.cache_timeout, {"data": self.cache_values} ) - return_value.update(cache_key=cache_key) + return_value["cache_key"] = cache_key # type: ignore return return_value diff --git a/superset/common/query_object.py b/superset/common/query_object.py index aa2d3147dca87..4692860b47482 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -251,7 +251,7 @@ def exec_post_processing(self, df: DataFrame) -> DataFrame: :param df: DataFrame returned from database model. :return: new DataFrame to which all post processing operations have been applied - :raises ChartDataValidationError: If the post processing operation in incorrect + :raises QueryObjectValidationError: If the post processing operation in incorrect """ for post_process in self.post_processing: operation = post_process.get("operation") diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index 2637efdd866a1..e39a7ed286259 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -16,15 +16,11 @@ # under the License. import logging -from typing import Dict +from typing import Any, Dict from flask import current_app from superset import app -from superset.charts.commands.exceptions import ( - ChartDataQueryFailedError, - ChartDataValidationError, -) from superset.extensions import async_query_manager, celery_app logger = logging.getLogger(__name__) @@ -34,7 +30,9 @@ @celery_app.task(name="load_chart_data_into_cache", soft_time_limit=query_timeout) -def load_chart_data_into_cache(job_metadata: Dict, form_data: Dict,) -> None: +def load_chart_data_into_cache( + job_metadata: Dict[str, Any], form_data: Dict[str, Any], +) -> None: from superset.charts.commands.data import ( ChartDataCommand, ) # load here due to circular imports @@ -50,7 +48,7 @@ def load_chart_data_into_cache(job_metadata: Dict, form_data: Dict,) -> None: cache_key=result["cache_key"], ) except Exception as exc: - msg = exc.message if hasattr(exc, "message") else str(exc) + msg = exc.message if hasattr(exc, "message") else str(exc) # type: ignore async_query_manager.update_job( job_metadata, async_query_manager.STATUS_ERROR, msg=msg ) diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index d6c7fed127bd2..bfab7185cfcf6 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -17,11 +17,11 @@ import json import logging import uuid -from typing import Any, Dict +from typing import Any, Dict, Optional import jwt import redis -from flask import Flask, Response, session +from flask import Flask, Request, Response, session logger = logging.getLogger(__name__) @@ -42,24 +42,30 @@ class AsyncQueryManager: def __init__(self) -> None: super().__init__() - self._redis = None - self._stream_prefix = None - self._stream_limit = None - self._stream_limit_firehose = None - self._jwt_cookie_name = None - self._jwt_cookie_secure = None - self._jwt_secret = None + self._redis: redis.Redis + self._stream_prefix: str = "" + self._stream_limit: Optional[int] + self._stream_limit_firehose: Optional[int] + self._jwt_cookie_name: str + self._jwt_cookie_secure: bool = False + self._jwt_secret: str def init_app(self, app: Flask) -> None: - self._redis = redis.Redis(**app.config["GLOBAL_ASYNC_QUERIES_REDIS_CONFIG"]) - self._stream_prefix = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] - self._stream_limit = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT"] - self._stream_limit_firehose = app.config[ + config = app.config + if len(config.get("GLOBAL_ASYNC_QUERIES_JWT_SECRET", "")) < 32: + raise AsyncQueryTokenException( + "Please provide a JWT secret at least 32 bytes long" + ) + + self._redis = redis.Redis(**config["GLOBAL_ASYNC_QUERIES_REDIS_CONFIG"]) + self._stream_prefix = config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] + self._stream_limit = config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT"] + self._stream_limit_firehose = config[ "GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT_FIREHOSE" ] - self._jwt_cookie_name = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"] - self._jwt_cookie_secure = app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE"] - self._jwt_secret = app.config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"] + self._jwt_cookie_name = config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"] + self._jwt_cookie_secure = config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE"] + self._jwt_secret = config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"] @app.after_request def validate_session(response: Response) -> Response: @@ -72,7 +78,6 @@ def validate_session(response: Response) -> Response: reset_token = True if reset_token: - logger.info("******************** setting async_channel_id") async_channel_id = str(uuid.uuid4()) session["async_channel_id"] = async_channel_id session["async_user_id"] = user_id @@ -94,15 +99,15 @@ def validate_session(response: Response) -> Response: return response - def generate_jwt(self, data: Dict) -> Dict[str, Any]: + def generate_jwt(self, data: Dict[str, Any]) -> str: encoded_jwt = jwt.encode(data, self._jwt_secret, algorithm="HS256") - return encoded_jwt + return encoded_jwt.decode("utf-8") def parse_jwt(self, token: str) -> Dict[str, Any]: data = jwt.decode(token, self._jwt_secret, algorithms=["HS256"]) return data - def parse_jwt_from_request(self, request: Dict) -> Dict[str, Any]: + def parse_jwt_from_request(self, request: Request) -> Dict[str, Any]: token = request.cookies.get(self._jwt_cookie_name) if not token: raise AsyncQueryTokenException("Token not preset") @@ -113,11 +118,13 @@ def parse_jwt_from_request(self, request: Dict) -> Dict[str, Any]: logger.warning(exc) raise AsyncQueryTokenException("Failed to parse token") - def init_job(self, channel_id: str): + def init_job(self, channel_id: str) -> Dict[str, Any]: job_id = str(uuid.uuid4()) return self._build_job_metadata(channel_id, job_id, status=self.STATUS_PENDING) - def _build_job_metadata(self, channel_id: str, job_id: str, **kwargs): + def _build_job_metadata( + self, channel_id: str, job_id: str, **kwargs: Any + ) -> Dict[str, Any]: return { "channel_id": channel_id, "job_id": job_id, @@ -127,7 +134,9 @@ def _build_job_metadata(self, channel_id: str, job_id: str, **kwargs): "cache_key": kwargs["cache_key"] if "cache_key" in kwargs else None, } - def update_job(self, job_metadata: Dict, status: str, **kwargs: Any): + def update_job( + self, job_metadata: Dict[str, Any], status: str, **kwargs: Any + ) -> None: if "channel_id" not in job_metadata: raise AsyncQueryJobException("No channel ID specified") @@ -145,5 +154,5 @@ def update_job(self, job_metadata: Dict, status: str, **kwargs: Any): full_stream_name = f"{self._stream_prefix}full" scoped_stream_name = f"{self._stream_prefix}{job_metadata['channel_id']}" - self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) - self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) + self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) # type: ignore + self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) # type: ignore diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 387fd32fa754c..8ac8472b1aec2 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -44,7 +44,10 @@ def generate_cache_key(values_dict: Dict[str, Any], key_prefix: str = "") -> str def set_and_log_cache( - cache_instance: Cache, cache_key: str, cache_timeout: int, cache_value: Dict, + cache_instance: Cache, + cache_key: str, + cache_timeout: int, + cache_value: Dict[str, Any], ) -> None: try: dttm = datetime.utcnow().isoformat().split(".")[0] diff --git a/superset/utils/pandas_postprocessing.py b/superset/utils/pandas_postprocessing.py index a0d7e71719034..f0a3546f01a0e 100644 --- a/superset/utils/pandas_postprocessing.py +++ b/superset/utils/pandas_postprocessing.py @@ -227,7 +227,7 @@ def pivot( # pylint: disable=too-many-arguments Default to 'All'. :param flatten_columns: Convert column names to strings :return: A pivot table - :raises ChartDataValidationError: If the request in incorrect + :raises QueryObjectValidationError: If the request in incorrect """ if not index: raise QueryObjectValidationError( @@ -282,7 +282,7 @@ def aggregate( :param groupby: columns to aggregate :param aggregates: A mapping from metric column to the function used to aggregate values. - :raises ChartDataValidationError: If the request in incorrect + :raises QueryObjectValidationError: If the request in incorrect """ aggregates = aggregates or {} aggregate_funcs = _get_aggregate_funcs(df, aggregates) @@ -298,7 +298,7 @@ def sort(df: DataFrame, columns: Dict[str, bool]) -> DataFrame: :param columns: columns by by which to sort. The key specifies the column name, value specifies if sorting in ascending order. :return: Sorted DataFrame - :raises ChartDataValidationError: If the request in incorrect + :raises QueryObjectValidationError: If the request in incorrect """ return df.sort_values(by=list(columns.keys()), ascending=list(columns.values())) @@ -333,7 +333,7 @@ def rolling( # pylint: disable=too-many-arguments :param min_periods: The minimum amount of periods required for a row to be included in the result set. :return: DataFrame with the rolling columns - :raises ChartDataValidationError: If the request in incorrect + :raises QueryObjectValidationError: If the request in incorrect """ rolling_type_options = rolling_type_options or {} df_rolling = df[columns.keys()] @@ -393,7 +393,7 @@ def select( For instance, `{'y': 'y2'}` will rename the column `y` to `y2`. :return: Subset of columns in original DataFrame - :raises ChartDataValidationError: If the request in incorrect + :raises QueryObjectValidationError: If the request in incorrect """ df_select = df.copy(deep=False) if columns: @@ -418,7 +418,7 @@ def diff(df: DataFrame, columns: Dict[str, str], periods: int = 1,) -> DataFrame unchanged. :param periods: periods to shift for calculating difference. :return: DataFrame with diffed columns - :raises ChartDataValidationError: If the request in incorrect + :raises QueryObjectValidationError: If the request in incorrect """ df_diff = df[columns.keys()] df_diff = df_diff.diff(periods=periods) From f7ac5b6c584e9b537f8a037e12008977e271faab Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 10 Nov 2020 19:30:20 -0800 Subject: [PATCH 06/42] pytest fixes and openapi schema update --- superset/charts/api.py | 6 +++++- superset/charts/schemas.py | 13 +++++++++++++ superset/utils/cache.py | 2 +- tests/query_context_tests.py | 15 ++++++--------- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index ad207709004e9..1f8ae39aab96f 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -474,7 +474,11 @@ def data(self) -> Response: schema: $ref: "#/components/schemas/ChartDataResponseSchema" 202: - $ref: '#/components/responses/202' + description: Async job details + content: + application/json: + schema: + $ref: "#/components/schemas/ChartDataAsyncResponseSchema" 400: $ref: '#/components/responses/400' 401: diff --git a/superset/charts/schemas.py b/superset/charts/schemas.py index 026ad13fd80e6..6100c8ed852f0 100644 --- a/superset/charts/schemas.py +++ b/superset/charts/schemas.py @@ -1033,6 +1033,18 @@ class ChartDataResponseSchema(Schema): ) +class ChartDataAsyncResponseSchema(Schema): + channel_id = fields.String( + description="Unique session async channel ID", allow_none=False, + ) + job_id = fields.String(description="Unique async job ID", allow_none=False,) + status = fields.String(description="Status value for async job", allow_none=False,) + msg = fields.String(description="User-facing message text", allow_none=True,) + cache_key = fields.String( + description="Unique cache key for async QueryContext", allow_none=False, + ) + + class ChartFavStarResponseResult(Schema): id = fields.Integer(description="The Chart id") value = fields.Boolean(description="The FaveStar value") @@ -1048,6 +1060,7 @@ class GetFavStarIdsSchema(Schema): CHART_SCHEMAS = ( ChartDataQueryContextSchema, ChartDataResponseSchema, + ChartDataAsyncResponseSchema, # TODO: These should optimally be included in the QueryContext schema as an `anyOf` # in ChartDataPostPricessingOperation.options, but since `anyOf` is not # by Marshmallow<3, this is not currently possible. diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 8ac8472b1aec2..9341348dfce07 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -28,7 +28,7 @@ from superset.stats_logger import BaseStatsLogger from superset.utils.core import json_int_dttm_ser -config = app.config +config = app.config # type: ignore stats_logger: BaseStatsLogger = config["STATS_LOGGER"] logger = logging.getLogger(__name__) diff --git a/tests/query_context_tests.py b/tests/query_context_tests.py index 8051f4134c875..f25fce20dcc28 100644 --- a/tests/query_context_tests.py +++ b/tests/query_context_tests.py @@ -176,13 +176,10 @@ def test_csv_response_format(self): query_context = ChartDataQueryContextSchema().load(payload) responses = query_context.get_payload() self.assertEqual(len(responses), 1) - data = responses[0]["data"] + data = responses["queries"][0]["data"] self.assertIn("name,sum__num\n", data) self.assertEqual(len(data.split("\n")), 12) - ck = db.session.query(CacheKey).order_by(CacheKey.id.desc()).first() - assert ck.datasource_uid == f"{table.id}__table" - def test_sql_injection_via_groupby(self): """ Ensure that calling invalid columns names in groupby are caught @@ -194,7 +191,7 @@ def test_sql_injection_via_groupby(self): payload["queries"][0]["groupby"] = ["currentDatabase()"] query_context = ChartDataQueryContextSchema().load(payload) query_payload = query_context.get_payload() - assert query_payload[0].get("error") is not None + assert query_payload["queries"][0].get("error") is not None def test_sql_injection_via_columns(self): """ @@ -209,7 +206,7 @@ def test_sql_injection_via_columns(self): payload["queries"][0]["columns"] = ["*, 'extra'"] query_context = ChartDataQueryContextSchema().load(payload) query_payload = query_context.get_payload() - assert query_payload[0].get("error") is not None + assert query_payload["queries"][0].get("error") is not None def test_sql_injection_via_metrics(self): """ @@ -230,7 +227,7 @@ def test_sql_injection_via_metrics(self): ] query_context = ChartDataQueryContextSchema().load(payload) query_payload = query_context.get_payload() - assert query_payload[0].get("error") is not None + assert query_payload["queries"][0].get("error") is not None def test_samples_response_type(self): """ @@ -245,7 +242,7 @@ def test_samples_response_type(self): query_context = ChartDataQueryContextSchema().load(payload) responses = query_context.get_payload() self.assertEqual(len(responses), 1) - data = responses[0]["data"] + data = responses["queries"][0]["data"] self.assertIsInstance(data, list) self.assertEqual(len(data), 5) self.assertNotIn("sum__num", data[0]) @@ -262,7 +259,7 @@ def test_query_response_type(self): query_context = ChartDataQueryContextSchema().load(payload) responses = query_context.get_payload() self.assertEqual(len(responses), 1) - response = responses[0] + response = responses["queries"][0] self.assertEqual(len(response), 2) self.assertEqual(response["language"], "sql") self.assertIn("SELECT", response["query"]) From 6805180c955b9075a344e96a74d94c088890b9aa Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 19 Nov 2020 13:38:22 -0800 Subject: [PATCH 07/42] Enforce caching be configured for async query init --- superset/app.py | 8 +++++++- superset/charts/api.py | 4 ++-- superset/charts/commands/data.py | 1 - 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/superset/app.py b/superset/app.py index 5ecb23a6acde4..7e602e7a5de81 100644 --- a/superset/app.py +++ b/superset/app.py @@ -644,7 +644,13 @@ def configure_wtf(self) -> None: def configure_async_queries(self) -> None: if feature_flag_manager.is_feature_enabled("GLOBAL_ASYNC_QUERIES"): - logger.info("*************** Init async queries") + if ( + self.config["CACHE_CONFIG"]["CACHE_TYPE"] == "null" + or self.config["DATA_CACHE_CONFIG"]["CACHE_TYPE"] == "null" + ): + raise Exception( + """Cache backends (CACHE_CONFIG, DATA_CACHE_CONFIG) must be configured and non-null in order to enable async queries """ + ) async_query_manager.init_app(self.flask_app) def register_blueprints(self) -> None: diff --git a/superset/charts/api.py b/superset/charts/api.py index dbd077d39ecbc..f381dec8e74c2 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -556,12 +556,12 @@ def data(self) -> Response: @statsd_metrics def data_from_cache(self, cache_key: str) -> Response: """ - Takes a query context cache key returns payload + Takes a query context cache key and returns payload data response for the given query. --- get: description: >- - Takes a query context constructed in the client and returns payload data + Takes a query context cache key and returns payload data response for the given query. parameters: - in: path diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 27540c1d37ec1..4427d7a3f6281 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -66,7 +66,6 @@ def run(self, **kwargs: Any) -> Dict[str, Any]: return return_value def run_async(self) -> Dict[str, Any]: - # TODO: confirm cache backend is configured job_metadata = async_query_manager.init_job(self._async_channel_id) load_chart_data_into_cache.delay(job_metadata, self._form_data) From d5eef4f27ed0b0ba00b710ddbee2a494e1b446f5 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 19 Nov 2020 14:31:08 -0800 Subject: [PATCH 08/42] Async query processing for explore_json endpoint --- superset/common/query_context.py | 8 ++-- superset/tasks/async_queries.py | 46 +++++++++++++++++++++- superset/utils/cache.py | 5 ++- superset/views/core.py | 66 +++++++++++++++++++++++++++++++- superset/views/utils.py | 30 +++++++++++++-- superset/viz.py | 28 ++++++++++---- 6 files changed, 163 insertions(+), 20 deletions(-) diff --git a/superset/common/query_context.py b/superset/common/query_context.py index bffa8e8133a94..95fd188d85c72 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -213,8 +213,8 @@ def get_payload(self, **kwargs: Any) -> Dict[str, Any]: set_and_log_cache( cache_manager.cache, cache_key, - self.cache_timeout, {"data": self.cache_values}, + self.cache_timeout, ) return_value["cache_key"] = cache_key # type: ignore @@ -295,7 +295,9 @@ def get_df_payload( # pylint: disable=too-many-statements logger.info("Serving from cache") if force_cached and not is_loaded: - logger.warning(f"force_cached: value not found for key {cache_key}") + logger.warning( + f"force_cached (QueryContext): value not found for key {cache_key}" + ) raise CacheLoadError() if query_obj and not is_loaded: @@ -338,8 +340,8 @@ def get_df_payload( # pylint: disable=too-many-statements set_and_log_cache( cache_manager.cache, cache_key, - self.cache_timeout, {"df": df, "query": query}, + self.cache_timeout, ) return { "cache_key": cache_key, diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index e39a7ed286259..49f330fa493d3 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -16,12 +16,14 @@ # under the License. import logging -from typing import Any, Dict +from typing import Any, cast, Dict from flask import current_app from superset import app -from superset.extensions import async_query_manager, celery_app +from superset.extensions import async_query_manager, cache_manager, celery_app +from superset.utils.cache import generate_cache_key, set_and_log_cache +from superset.views.utils import get_datasource_info, get_viz logger = logging.getLogger(__name__) query_timeout = current_app.config[ @@ -55,3 +57,43 @@ def load_chart_data_into_cache( raise exc return None + + +@celery_app.task(name="load_explore_json_into_cache", soft_time_limit=query_timeout) +def load_explore_json_into_cache( + job_metadata: Dict[str, Any], + form_data: Dict[str, Any], + response_type: str, + force: bool = False, +) -> None: + with app.app_context(): # type: ignore + try: + datasource_id, datasource_type = get_datasource_info(None, None, form_data) + + viz_obj = get_viz( + datasource_type=cast(str, datasource_type), + datasource_id=datasource_id, + form_data=form_data, + force=force, + ) + # run query & cache results + viz_obj.get_payload() + + # cache form_data for async retrieval + cache_value = {"form_data": form_data, "response_type": response_type} + cache_key = generate_cache_key( + cache_value, "ejr-" + ) # ejr: explore_json request + set_and_log_cache(cache_manager.cache, cache_key, cache_value) + + async_query_manager.update_job( + job_metadata, async_query_manager.STATUS_DONE, cache_key=cache_key, + ) + except Exception as exc: + msg = exc.message if hasattr(exc, "message") else str(exc) # type: ignore + async_query_manager.update_job( + job_metadata, async_query_manager.STATUS_ERROR, msg=msg + ) + raise exc + + return None diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 971ca86c0fc18..158c5e2fe5bc1 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -48,13 +48,14 @@ def generate_cache_key(values_dict: Dict[str, Any], key_prefix: str = "") -> str def set_and_log_cache( cache_instance: Cache, cache_key: str, - cache_timeout: int, cache_value: Dict[str, Any], + cache_timeout: Optional[int] = None, ) -> None: + timeout = cache_timeout if cache_timeout else config["CACHE_DEFAULT_TIMEOUT"] try: dttm = datetime.utcnow().isoformat().split(".")[0] value = {**cache_value, "dttm": dttm} - cache_instance.set(cache_key, value, timeout=cache_timeout) + cache_instance.set(cache_key, value, timeout=timeout) stats_logger.incr("set_cache_key") except Exception as ex: # cache.set call can fail if the backend is down or if diff --git a/superset/views/core.py b/superset/views/core.py index fbfac7cb944e2..f2cc02c564829 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -28,7 +28,11 @@ from flask import abort, flash, g, Markup, redirect, render_template, request, Response from flask_appbuilder import expose from flask_appbuilder.models.sqla.interface import SQLAInterface -from flask_appbuilder.security.decorators import has_access, has_access_api +from flask_appbuilder.security.decorators import ( + has_access, + has_access_api, + permission_name, +) from flask_appbuilder.security.sqla import models as ab_models from flask_babel import gettext as __, lazy_gettext as _ from jinja2.exceptions import TemplateError @@ -65,6 +69,7 @@ from superset.dashboards.dao import DashboardDAO from superset.databases.filters import DatabaseFilter from superset.exceptions import ( + CacheLoadError, CertificateException, DatabaseNotFound, SerializationError, @@ -72,6 +77,7 @@ SupersetSecurityException, SupersetTimeoutException, ) +from superset.extensions import async_query_manager, cache_manager, security_manager from superset.jinja_context import get_template_processor from superset.models.core import Database, FavStar, Log from superset.models.dashboard import Dashboard @@ -86,8 +92,10 @@ ) from superset.sql_parse import CtasMethod, ParsedQuery, Table from superset.sql_validators import get_validator_by_name +from superset.tasks.async_queries import load_explore_json_into_cache from superset.typing import FlaskResponse from superset.utils import core as utils +from superset.utils.async_query_manager import AsyncQueryTokenException from superset.utils.cache import etag_cache from superset.utils.dates import now_as_float from superset.views.base import ( @@ -112,6 +120,7 @@ apply_display_max_row_limit, bootstrap_user_data, check_datasource_perms, + check_explore_cache_perms, check_slice_perms, get_cta_schema_name, get_dashboard_extra_filters, @@ -480,6 +489,43 @@ def annotation_json( # pylint: disable=no-self-use payload = viz_obj.get_payload() return data_payload_response(*viz_obj.payload_json_and_has_error(payload)) + @event_logger.log_this + @api + @has_access_api + @handle_api_exception + @permission_name("explore_json") + @expose("/explore_json/data/", methods=["GET"]) + @etag_cache(check_perms=check_explore_cache_perms) + def explore_json_data(self, cache_key: str) -> FlaskResponse: + """Serves cached result data for async explore_json calls + + `self.generate_json` receives this input and returns different + payloads based on the request args in the first block + + TODO: form_data should not be loaded twice from cache + (also loaded in `check_explore_cache_perms`) + """ + try: + cached = cache_manager.cache.get(cache_key) + if not cached: + raise CacheLoadError("Cached data not found") + + form_data = cached["form_data"] + response_type = cached["response_type"] + + datasource_id, datasource_type = get_datasource_info(None, None, form_data) + + viz_obj = get_viz( + datasource_type=cast(str, datasource_type), + datasource_id=datasource_id, + form_data=form_data, + force_cached=True, + ) + + return self.generate_json(viz_obj, response_type) + except SupersetException as ex: + return json_error_response(utils.error_msg_from_exception(ex), 400) + EXPLORE_JSON_METHODS = ["POST"] if not is_feature_enabled("ENABLE_EXPLORE_JSON_CSRF_PROTECTION"): EXPLORE_JSON_METHODS.append("GET") @@ -524,11 +570,27 @@ def explore_json( datasource_id, datasource_type, form_data ) + force = request.args.get("force") == "true" + + if is_feature_enabled("GLOBAL_ASYNC_QUERIES"): + try: + async_channel_id = async_query_manager.parse_jwt_from_request( + request + )["channel"] + job_metadata = async_query_manager.init_job(async_channel_id) + load_explore_json_into_cache.delay( + job_metadata, form_data, response_type, force + ) + except AsyncQueryTokenException: + return json_error_response("Not authorized", 401) + + return json_success(json.dumps(job_metadata), status=202) + viz_obj = get_viz( datasource_type=cast(str, datasource_type), datasource_id=datasource_id, form_data=form_data, - force=request.args.get("force") == "true", + force=force, ) return self.generate_json(viz_obj, response_type) diff --git a/superset/views/utils.py b/superset/views/utils.py index 08de168003526..28104aa86a08a 100644 --- a/superset/views/utils.py +++ b/superset/views/utils.py @@ -34,10 +34,12 @@ from superset.connectors.connector_registry import ConnectorRegistry from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import ( + CacheLoadError, SerializationError, SupersetException, SupersetSecurityException, ) +from superset.extensions import cache_manager from superset.legacy import update_time_range from superset.models.core import Database from superset.models.dashboard import Dashboard @@ -108,13 +110,19 @@ def get_permissions( def get_viz( - form_data: FormData, datasource_type: str, datasource_id: int, force: bool = False + form_data: FormData, + datasource_type: str, + datasource_id: int, + force: bool = False, + force_cached: bool = False, ) -> BaseViz: viz_type = form_data.get("viz_type", "table") datasource = ConnectorRegistry.get_datasource( datasource_type, datasource_id, db.session ) - viz_obj = viz.viz_types[viz_type](datasource, form_data=form_data, force=force) + viz_obj = viz.viz_types[viz_type]( + datasource, form_data=form_data, force=force, force_cached=force_cached + ) return viz_obj @@ -422,10 +430,26 @@ def is_owner(obj: Union[Dashboard, Slice], user: User) -> bool: return obj and user in obj.owners +def check_explore_cache_perms(_self: Any, cache_key: str) -> None: + """ + Loads async explore_json request data from cache and performs access check + + :param _self: the Superset view instance + :param cache_key: the cache key passed into /explore_json/data/ + :raises SupersetSecurityException: If the user cannot access the resource + """ + cached = cache_manager.cache.get(cache_key) + if not cached: + raise CacheLoadError("Cached data not found") + + check_datasource_perms(_self, form_data=cached["form_data"]) + + def check_datasource_perms( _self: Any, datasource_type: Optional[str] = None, datasource_id: Optional[int] = None, + **kwargs: Any ) -> None: """ Check if user can access a cached response from explore_json. @@ -438,7 +462,7 @@ def check_datasource_perms( :raises SupersetSecurityException: If the user cannot access the resource """ - form_data = get_form_data()[0] + form_data = kwargs["form_data"] if "form_data" in kwargs else get_form_data()[0] try: datasource_id, datasource_type = get_datasource_info( diff --git a/superset/viz.py b/superset/viz.py index 0a8a7028ed968..2eeb03cd26b6a 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -38,6 +38,7 @@ Optional, Set, Tuple, + Type, TYPE_CHECKING, Union, ) @@ -57,6 +58,7 @@ from superset.constants import NULL_STRING from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import ( + CacheLoadError, NullValueException, QueryObjectValidationError, SpatialException, @@ -141,6 +143,7 @@ def __init__( datasource: "BaseDatasource", form_data: Dict[str, Any], force: bool = False, + force_cached: bool = False, ) -> None: if not datasource: raise QueryObjectValidationError(_("Viz is missing a datasource")) @@ -161,6 +164,7 @@ def __init__( self.results: Optional[QueryResult] = None self.errors: List[Dict[str, Any]] = [] self.force = force + self.force_cached = force_cached self.from_dttm: Optional[datetime] = None self.to_dttm: Optional[datetime] = None @@ -269,7 +273,7 @@ def get_samples(self) -> List[Dict[str, Any]]: "columns": [o.column_name for o in self.datasource.columns], } ) - df = self.get_df(query_obj) + df = self.get_df_payload(query_obj)["df"] # leverage caching logic return df.to_dict(orient="records") def get_df(self, query_obj: Optional[QueryObjectDict] = None) -> pd.DataFrame: @@ -535,6 +539,12 @@ def get_df_payload( ) logger.info("Serving from cache") + if self.force_cached and not is_loaded: + logger.warning( + f"force_cached (viz.py): value not found for key {cache_key}" + ) + raise CacheLoadError() + if query_obj and not is_loaded: try: invalid_columns = [ @@ -635,7 +645,7 @@ def data(self) -> Dict[str, Any]: return content def get_csv(self) -> Optional[str]: - df = self.get_df() + df = self.get_df_payload()["df"] # leverage caching logic include_index = not isinstance(df.index, pd.RangeIndex) return df.to_csv(index=include_index, **config["CSV_EXPORT"]) @@ -2979,12 +2989,14 @@ def get_data(self, df: pd.DataFrame) -> VizData: return self.nest_values(levels) +def get_subclasses(cls: Type[BaseViz]) -> Set[Type[BaseViz]]: + return set(cls.__subclasses__()).union( + [sc for c in cls.__subclasses__() for sc in get_subclasses(c)] + ) + + viz_types = { o.viz_type: o - for o in globals().values() - if ( - inspect.isclass(o) - and issubclass(o, BaseViz) - and o.viz_type not in config["VIZ_TYPE_DENYLIST"] - ) + for o in get_subclasses(BaseViz) + if o.viz_type not in config["VIZ_TYPE_DENYLIST"] } From 467c6bb9de3837400af843f0f9bbe19c66225b4b Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Fri, 20 Nov 2020 14:11:23 -0800 Subject: [PATCH 09/42] Add /api/v1/async_event endpoint --- superset/app.py | 2 + superset/async_events/__init__.py | 16 ++++ superset/async_events/api.py | 103 ++++++++++++++++++++++++++ superset/utils/async_query_manager.py | 20 ++++- 4 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 superset/async_events/__init__.py create mode 100644 superset/async_events/api.py diff --git a/superset/app.py b/superset/app.py index 7e602e7a5de81..fb4590917af60 100644 --- a/superset/app.py +++ b/superset/app.py @@ -128,6 +128,7 @@ def init_views(self) -> None: # pylint: disable=too-many-branches from superset.annotation_layers.api import AnnotationLayerRestApi from superset.annotation_layers.annotations.api import AnnotationRestApi + from superset.async_events.api import AsyncEventsRestApi from superset.cachekeys.api import CacheRestApi from superset.charts.api import ChartRestApi from superset.connectors.druid.views import ( @@ -201,6 +202,7 @@ def init_views(self) -> None: # appbuilder.add_api(AnnotationRestApi) appbuilder.add_api(AnnotationLayerRestApi) + appbuilder.add_api(AsyncEventsRestApi) appbuilder.add_api(CacheRestApi) appbuilder.add_api(ChartRestApi) appbuilder.add_api(CssTemplateRestApi) diff --git a/superset/async_events/__init__.py b/superset/async_events/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/superset/async_events/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/superset/async_events/api.py b/superset/async_events/api.py new file mode 100644 index 0000000000000..91d4e24541c6f --- /dev/null +++ b/superset/async_events/api.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import logging + +from flask import request, Response +from flask_appbuilder import expose +from flask_appbuilder.api import BaseApi, safe +from flask_appbuilder.security.decorators import permission_name, protect + +from superset.extensions import async_query_manager, cache_manager, db, event_logger +from superset.utils.async_query_manager import AsyncQueryTokenException + +logger = logging.getLogger(__name__) + + +class AsyncEventsRestApi(BaseApi): + resource_name = "async_event" + allow_browser_login = True + include_route_methods = { + "events", + } + + @expose("/", methods=["GET"]) + @event_logger.log_this + @protect() + @safe + @permission_name("list") + def events(self) -> Response: + """ + Reads off of the Redis async events stream, using the user's JWT token and + optional query params for last event received. + --- + get: + description: >- + Reads off of the Redis events stream, using the user's JWT token and + optional query params for last event received. + parameters: + - in: query + name: last_id + description: Last ID received by the client + schema: + type: string + responses: + 200: + description: Async event results + content: + application/json: + schema: + type: object + properties: + result: + type: array + items: + type: object + properties: + id: + type: string + channel_id: + type: string + job_id: + type: string + user_id: + type: string + status: + type: string + msg: + type: string + cache_key: + type: string + 400: + $ref: '#/components/responses/400' + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + 500: + $ref: '#/components/responses/500' + """ + try: + async_channel_id = async_query_manager.parse_jwt_from_request(request)[ + "channel" + ] + last_event_id = request.args.get("last_id") + events = async_query_manager.read_events(async_channel_id, last_event_id) + + except AsyncQueryTokenException: + return self.response_401() + + return self.response(200, result=events) diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index bfab7185cfcf6..a8b8392934535 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -17,7 +17,7 @@ import json import logging import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional, Tuple import jwt import redis @@ -35,6 +35,7 @@ class AsyncQueryJobException(Exception): class AsyncQueryManager: + MAX_EVENT_COUNT = 100 STATUS_PENDING = "pending" STATUS_RUNNING = "running" STATUS_ERROR = "error" @@ -57,7 +58,9 @@ def init_app(self, app: Flask) -> None: "Please provide a JWT secret at least 32 bytes long" ) - self._redis = redis.Redis(**config["GLOBAL_ASYNC_QUERIES_REDIS_CONFIG"]) + self._redis = redis.Redis( # type: ignore + **config["GLOBAL_ASYNC_QUERIES_REDIS_CONFIG"], decode_responses=True + ) self._stream_prefix = config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] self._stream_limit = config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_LIMIT"] self._stream_limit_firehose = config[ @@ -134,6 +137,19 @@ def _build_job_metadata( "cache_key": kwargs["cache_key"] if "cache_key" in kwargs else None, } + def read_events( + self, channel: str, last_id: Optional[str] + ) -> List[Optional[Dict[str, Any]]]: + stream_name = f"{self._stream_prefix}{channel}" + start_id = last_id if last_id else "-" + results = self._redis.xrange(stream_name, start_id, "+", self.MAX_EVENT_COUNT) # type: ignore + return [] if not results else list(map(self.parse_event, results)) + + def parse_event(self, event_data: Tuple[str, Dict[str, Any]]) -> Dict[str, Any]: + event_id = event_data[0] + event_payload = event_data[1]["data"] + return {"id": event_id, **json.loads(event_payload)} + def update_job( self, job_metadata: Dict[str, Any], status: str, **kwargs: Any ) -> None: From f867cd5b479cfcdead0a09c5c8b880aac682d0dd Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 23 Nov 2020 23:37:41 -0800 Subject: [PATCH 10/42] Async frontend for dashboards [WIP] --- superset-frontend/src/chart/chartAction.js | 60 +++++++----- superset-frontend/src/chart/chartReducer.js | 9 ++ superset-frontend/src/dashboard/index.jsx | 95 ++++++++++++++++++- .../src/dashboard/reducers/asyncEvents.js | 38 ++++++++ .../src/dashboard/reducers/index.js | 4 + superset-frontend/src/featureFlags.ts | 1 + 6 files changed, 181 insertions(+), 26 deletions(-) create mode 100644 superset-frontend/src/dashboard/reducers/asyncEvents.js diff --git a/superset-frontend/src/chart/chartAction.js b/superset-frontend/src/chart/chartAction.js index 50d9915920da4..6f6dbbfabf2b6 100644 --- a/superset-frontend/src/chart/chartAction.js +++ b/superset-frontend/src/chart/chartAction.js @@ -66,6 +66,11 @@ export function chartUpdateFailed(queryResponse, key) { return { type: CHART_UPDATE_FAILED, queryResponse, key }; } +export const CHART_UPDATE_QUEUED = 'CHART_UPDATE_QUEUED'; +export function chartUpdateQueued(asyncJobMeta, key) { + return { type: CHART_UPDATE_QUEUED, asyncJobMeta, key }; +} + export const CHART_RENDERING_FAILED = 'CHART_RENDERING_FAILED'; export function chartRenderingFailed(error, key, stackTrace) { return { type: CHART_RENDERING_FAILED, error, key, stackTrace }; @@ -354,32 +359,37 @@ export function exploreJSON( const chartDataRequestCaught = chartDataRequest .then(response => { - // new API returns an object with an array of restults - // problem: response holds a list of results, when before we were just getting one result. - // How to make the entire app compatible with multiple results? - // For now just use the first result. - const result = response.result[0]; + if(isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) { + return dispatch(chartUpdateQueued(response, key)); - dispatch( - logEvent(LOG_ACTIONS_LOAD_CHART, { - slice_id: key, - applied_filters: result.applied_filters, - is_cached: result.is_cached, - force_refresh: force, - row_count: result.rowcount, - datasource: formData.datasource, - start_offset: logStart, - ts: new Date().getTime(), - duration: Logger.getTimestamp() - logStart, - has_extra_filters: - formData.extra_filters && formData.extra_filters.length > 0, - viz_type: formData.viz_type, - data_age: result.is_cached - ? moment(new Date()).diff(moment.utc(result.cached_dttm)) - : null, - }), - ); - return dispatch(chartUpdateSucceeded(result, key)); + } else { + // new API returns an object with an array of restults + // problem: response holds a list of results, when before we were just getting one result. + // How to make the entire app compatible with multiple results? + // For now just use the first result. + const result = response.result[0]; + + dispatch( + logEvent(LOG_ACTIONS_LOAD_CHART, { + slice_id: key, + applied_filters: result.applied_filters, + is_cached: result.is_cached, + force_refresh: force, + row_count: result.rowcount, + datasource: formData.datasource, + start_offset: logStart, + ts: new Date().getTime(), + duration: Logger.getTimestamp() - logStart, + has_extra_filters: + formData.extra_filters && formData.extra_filters.length > 0, + viz_type: formData.viz_type, + data_age: result.is_cached + ? moment(new Date()).diff(moment.utc(result.cached_dttm)) + : null, + }), + ); + return dispatch(chartUpdateSucceeded(result, key)); + } }) .catch(response => { const appendErrorLog = (errorDetails, isCached) => { diff --git a/superset-frontend/src/chart/chartReducer.js b/superset-frontend/src/chart/chartReducer.js index b3e72124f9829..0d1b046820990 100644 --- a/superset-frontend/src/chart/chartReducer.js +++ b/superset-frontend/src/chart/chartReducer.js @@ -71,6 +71,15 @@ export default function chartReducer(charts = {}, action) { chartUpdateEndTime: now(), }; }, + [actions.CHART_UPDATE_QUEUED](state) { + return { + ...state, + asyncJobId: action.asyncJobMeta["job_id"], + chartStatus: 'queued', + chartAlert: null, + chartUpdateEndTime: now(), + }; + }, [actions.CHART_RENDERING_SUCCEEDED](state) { return { ...state, chartStatus: 'rendered', chartUpdateEndTime: now() }; }, diff --git a/superset-frontend/src/dashboard/index.jsx b/superset-frontend/src/dashboard/index.jsx index 3937a357dfcce..9171847a0a7d4 100644 --- a/superset-frontend/src/dashboard/index.jsx +++ b/superset-frontend/src/dashboard/index.jsx @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ +import { get, some } from 'lodash'; import React from 'react'; import ReactDOM from 'react-dom'; import thunk from 'redux-thunk'; @@ -26,17 +27,109 @@ import getInitialState from './reducers/getInitialState'; import rootReducer from './reducers/index'; import logger from '../middleware/loggerMiddleware'; + +// TODO: move/abstract this +import * as actions from '../chart/chartAction'; +import asyncEventReceived from './reducers/asyncEvents'; + + import App from './App'; +// TODO: re-enable +// import { catch } from 'fetch-mock'; const appContainer = document.getElementById('app'); const bootstrapData = JSON.parse(appContainer.getAttribute('data-bootstrap')); initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); + +// TODO: move/abstract this +const asyncEventMiddleware = store => next => { + const pollingUrl = '/api/v1/async_event/'; + const isLoading = (state) => some(state.charts, {chartStatus: "queued"}); // pass this in + + const fetchEvents = async (lastEventId) => { + const url = lastEventId ? `${pollingUrl}?last_id=${lastEventId}` : pollingUrl; + const response = await fetch(url); + + if (!response.ok) { + const message = `An error has occured: ${response.status}`; + throw new Error(message); + } + + const data = await response.json(); + return data.result; + } + + const fetchData = async (asyncEvent) => { + const dataUrl = `/api/v1/chart/data/${asyncEvent.cache_key}`; // TODO: determine from event type + const response = await fetch(dataUrl); + + if (!response.ok) { + const message = `An error has occured: ${response.status}`; + throw new Error(message); + } + + const data = await response.json(); + return data.result[0]; + } + + const updateMessages = async () => { + if (isLoading(store.getState())) { + console.log('************* fetching events'); + try { + const state = store.getState(); + const lastEventId = get(state, 'asyncEvents.last_event_id'); + console.log('********** lastEventId', lastEventId); + const events = await fetchEvents(lastEventId); + + // TODO: how to tie this event back to the caller component (by key?) + console.log('async events received', events); + + // iterate over queued charts + const componentsInState = store.getState()['charts']; // TODO: pass key into middleware init + const queuedComponents = _.filter(componentsInState, {chartStatus: "queued"}) // TODO: pass in + const componentsByJobId = queuedComponents.reduce((acc, item) => { + acc[item['asyncJobId']] = item; + return acc; + }, {}); + + console.log('componentsByJobId', componentsByJobId); + + for (const asyncEvent of events) { + console.log('async event', asyncEvent); + console.log('job id', asyncEvent['job_id']); + const component = componentsByJobId[asyncEvent['job_id']]; + if (component) { + const key = component['id']; + const componentData = await fetchData(asyncEvent); + console.log('************* dispatch', key, componentData); + store.dispatch(actions.chartUpdateSucceeded(componentData, key)); + } + // console.log('asyncEventReceived', asyncEventReceived); + store.dispatch({ type: 'ASYNC_EVENT_RECEIVED', eventId: asyncEvent['id'] }); + // store.dispatch(asyncEventReceived(asyncEvent['id'])); + } + } catch (err) { + // console.error(err.message); + throw err; + } + } else { + console.log('********** no components waiting for data'); + } + + setTimeout(updateMessages, 500); + }; + + updateMessages(); + + return action => next(action); +}; + const store = createStore( rootReducer, initState, - compose(applyMiddleware(thunk, logger), initEnhancer(false)), + compose(applyMiddleware(thunk, logger, asyncEventMiddleware), initEnhancer(false)), ); ReactDOM.render(, document.getElementById('app')); diff --git a/superset-frontend/src/dashboard/reducers/asyncEvents.js b/superset-frontend/src/dashboard/reducers/asyncEvents.js new file mode 100644 index 0000000000000..4b5c9bf800441 --- /dev/null +++ b/superset-frontend/src/dashboard/reducers/asyncEvents.js @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* eslint-disable camelcase */ + +// TODO: move this to a more generic location +export const ASYNC_EVENT_RECEIVED = 'ASYNC_EVENT_RECEIVED'; +// export function asyncEventReceived(eventId) { +// return { type: ASYNC_EVENT_RECEIVED, eventId }; +// } + +export default function asyncEventsReducer(state = {}, action) { + const actionHandlers = { + [ASYNC_EVENT_RECEIVED]() { + return { ...state, last_event_id: action.eventId }; + }, + } + + if (action.type in actionHandlers) { + return actionHandlers[action.type](); + } + return state; +} diff --git a/superset-frontend/src/dashboard/reducers/index.js b/superset-frontend/src/dashboard/reducers/index.js index afc77ce6c8e3a..b5a08bd4ca851 100644 --- a/superset-frontend/src/dashboard/reducers/index.js +++ b/superset-frontend/src/dashboard/reducers/index.js @@ -18,6 +18,7 @@ */ import { combineReducers } from 'redux'; +import asyncEvents from './asyncEvents'; import charts from '../../chart/chartReducer'; import dashboardInfo from './dashboardInfo'; import dashboardState from './dashboardState'; @@ -29,7 +30,10 @@ import messageToasts from '../../messageToasts/reducers'; const impressionId = (state = '') => state; +const asyncEvent = (state = {}, eventId) => ({ ...state, async_last_event: eventId }); + export default combineReducers({ + asyncEvents, charts, datasources, dashboardInfo, diff --git a/superset-frontend/src/featureFlags.ts b/superset-frontend/src/featureFlags.ts index 1bac02dcbdc22..c8a1bbb0768e5 100644 --- a/superset-frontend/src/featureFlags.ts +++ b/superset-frontend/src/featureFlags.ts @@ -33,6 +33,7 @@ export enum FeatureFlag { ESCAPE_MARKDOWN_HTML = 'ESCAPE_MARKDOWN_HTML', VERSIONED_EXPORT = 'VERSIONED_EXPORT', SIP_34_QUERY_SEARCH_UI = 'SIP_34_QUERY_SEARCH_UI', + GLOBAL_ASYNC_QUERIES = 'GLOBAL_ASYNC_QUERIES', } export type FeatureFlagMap = { From 8111ea6e230ab66abcb8fa556ade995f87d2438e Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Sun, 29 Nov 2020 19:24:54 -0800 Subject: [PATCH 11/42] Chart async error message support, refactoring --- .../utils/getClientErrorObject_spec.ts | 2 +- .../src/SqlLab/actions/sqlLab.js | 2 +- .../SqlLab/components/ShareSqlLabQuery.jsx | 2 +- superset-frontend/src/chart/Chart.jsx | 2 + superset-frontend/src/chart/chartAction.js | 6 +- superset-frontend/src/chart/chartReducer.js | 3 +- .../src/components/AsyncSelect.jsx | 2 +- .../src/dashboard/actions/dashboardState.js | 2 +- .../src/dashboard/actions/datasources.js | 2 +- .../src/dashboard/actions/sliceEntities.js | 2 +- .../dashboard/components/PropertiesModal.jsx | 2 +- superset-frontend/src/dashboard/index.jsx | 95 ++++++++++++------- .../src/dashboard/reducers/asyncEvents.js | 38 -------- .../src/dashboard/reducers/index.js | 4 - .../src/datasource/ChangeDatasourceModal.tsx | 2 +- .../src/datasource/DatasourceEditor.jsx | 2 +- .../src/datasource/DatasourceModal.tsx | 2 +- .../explore/components/DisplayQueryButton.jsx | 2 +- .../explore/components/PropertiesModal.tsx | 2 +- superset-frontend/src/setup/setupApp.ts | 3 +- superset-frontend/src/utils/common.js | 2 +- .../src/utils/getClientErrorObject.ts | 60 ++++++------ .../views/CRUD/annotation/AnnotationList.tsx | 2 +- .../CRUD/data/database/DatabaseModal.tsx | 2 +- superset-frontend/src/views/CRUD/utils.tsx | 2 +- superset/charts/commands/data.py | 1 + superset/charts/schemas.py | 6 +- superset/exceptions.py | 10 +- superset/tasks/async_queries.py | 32 +++++-- superset/utils/async_query_manager.py | 4 +- superset/viz.py | 12 ++- 31 files changed, 165 insertions(+), 145 deletions(-) delete mode 100644 superset-frontend/src/dashboard/reducers/asyncEvents.js diff --git a/superset-frontend/spec/javascripts/utils/getClientErrorObject_spec.ts b/superset-frontend/spec/javascripts/utils/getClientErrorObject_spec.ts index 8519b71206af7..8e89fec2842f0 100644 --- a/superset-frontend/spec/javascripts/utils/getClientErrorObject_spec.ts +++ b/superset-frontend/spec/javascripts/utils/getClientErrorObject_spec.ts @@ -17,7 +17,7 @@ * under the License. */ import { ErrorTypeEnum } from 'src/components/ErrorMessage/types'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; describe('getClientErrorObject()', () => { it('Returns a Promise', () => { diff --git a/superset-frontend/src/SqlLab/actions/sqlLab.js b/superset-frontend/src/SqlLab/actions/sqlLab.js index 7c1bce3e1fc49..ebfca9116f97e 100644 --- a/superset-frontend/src/SqlLab/actions/sqlLab.js +++ b/superset-frontend/src/SqlLab/actions/sqlLab.js @@ -30,7 +30,7 @@ import { addSuccessToast as addSuccessToastAction, addWarningToast as addWarningToastAction, } from '../../messageToasts/actions/index'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; import COMMON_ERR_MESSAGES from '../../utils/errorMessages'; export const RESET_STATE = 'RESET_STATE'; diff --git a/superset-frontend/src/SqlLab/components/ShareSqlLabQuery.jsx b/superset-frontend/src/SqlLab/components/ShareSqlLabQuery.jsx index dc9fb4267f5af..c094d08775f9e 100644 --- a/superset-frontend/src/SqlLab/components/ShareSqlLabQuery.jsx +++ b/superset-frontend/src/SqlLab/components/ShareSqlLabQuery.jsx @@ -25,7 +25,7 @@ import { isFeatureEnabled, FeatureFlag } from 'src/featureFlags'; import Button from 'src/components/Button'; import CopyToClipboard from '../../components/CopyToClipboard'; import { storeQuery } from '../../utils/common'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; import withToasts from '../../messageToasts/enhancers/withToasts'; const propTypes = { diff --git a/superset-frontend/src/chart/Chart.jsx b/superset-frontend/src/chart/Chart.jsx index 2636751fd6e1e..92804a5ddd836 100644 --- a/superset-frontend/src/chart/Chart.jsx +++ b/superset-frontend/src/chart/Chart.jsx @@ -157,6 +157,8 @@ class Chart extends React.PureComponent { queryResponse, } = this.props; + console.log('**** Chart renderError', queryResponse); + const error = queryResponse?.errors?.[0]; if (error) { const extra = error.extra || {}; diff --git a/superset-frontend/src/chart/chartAction.js b/superset-frontend/src/chart/chartAction.js index 6f6dbbfabf2b6..d2135ef95c34f 100644 --- a/superset-frontend/src/chart/chartAction.js +++ b/superset-frontend/src/chart/chartAction.js @@ -38,7 +38,7 @@ import { import { addDangerToast } from '../messageToasts/actions'; import { logEvent } from '../logger/actions'; import { Logger, LOG_ACTIONS_LOAD_CHART } from '../logger/LogUtils'; -import getClientErrorObject from '../utils/getClientErrorObject'; +import { getClientErrorObject } from '../utils/getClientErrorObject'; import { allowCrossDomain as domainShardingEnabled } from '../utils/hostNamesConfig'; export const CHART_UPDATE_STARTED = 'CHART_UPDATE_STARTED'; @@ -360,7 +360,9 @@ export function exploreJSON( const chartDataRequestCaught = chartDataRequest .then(response => { if(isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) { - return dispatch(chartUpdateQueued(response, key)); + // deal with getChartDataRequest transforming the response data + const result = ('result' in response) ? response['result'][0] : response; + return dispatch(chartUpdateQueued(result, key)); } else { // new API returns an object with an array of restults diff --git a/superset-frontend/src/chart/chartReducer.js b/superset-frontend/src/chart/chartReducer.js index 0d1b046820990..7efda628a59d5 100644 --- a/superset-frontend/src/chart/chartReducer.js +++ b/superset-frontend/src/chart/chartReducer.js @@ -75,8 +75,7 @@ export default function chartReducer(charts = {}, action) { return { ...state, asyncJobId: action.asyncJobMeta["job_id"], - chartStatus: 'queued', - chartAlert: null, + chartStatus: 'loading', chartUpdateEndTime: now(), }; }, diff --git a/superset-frontend/src/components/AsyncSelect.jsx b/superset-frontend/src/components/AsyncSelect.jsx index fc9c5eeb59667..93bbb34087e53 100644 --- a/superset-frontend/src/components/AsyncSelect.jsx +++ b/superset-frontend/src/components/AsyncSelect.jsx @@ -21,7 +21,7 @@ import PropTypes from 'prop-types'; // TODO: refactor this with `import { AsyncSelect } from src/components/Select` import { Select } from 'src/components/Select'; import { t, SupersetClient } from '@superset-ui/core'; -import getClientErrorObject from '../utils/getClientErrorObject'; +import { getClientErrorObject } from '../utils/getClientErrorObject'; const propTypes = { dataEndpoint: PropTypes.string.isRequired, diff --git a/superset-frontend/src/dashboard/actions/dashboardState.js b/superset-frontend/src/dashboard/actions/dashboardState.js index a499fa9a8e45a..fc31a8501abcd 100644 --- a/superset-frontend/src/dashboard/actions/dashboardState.js +++ b/superset-frontend/src/dashboard/actions/dashboardState.js @@ -29,7 +29,7 @@ import { updateDirectPathToFilter, } from './dashboardFilters'; import { applyDefaultFormData } from '../../explore/store'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; import { SAVE_TYPE_OVERWRITE } from '../util/constants'; import { addSuccessToast, diff --git a/superset-frontend/src/dashboard/actions/datasources.js b/superset-frontend/src/dashboard/actions/datasources.js index 40cba8559a2a8..4277edc661fc2 100644 --- a/superset-frontend/src/dashboard/actions/datasources.js +++ b/superset-frontend/src/dashboard/actions/datasources.js @@ -17,7 +17,7 @@ * under the License. */ import { SupersetClient } from '@superset-ui/core'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; export const SET_DATASOURCE = 'SET_DATASOURCE'; export function setDatasource(datasource, key) { diff --git a/superset-frontend/src/dashboard/actions/sliceEntities.js b/superset-frontend/src/dashboard/actions/sliceEntities.js index 01512ddb14b51..69472d9155f54 100644 --- a/superset-frontend/src/dashboard/actions/sliceEntities.js +++ b/superset-frontend/src/dashboard/actions/sliceEntities.js @@ -22,7 +22,7 @@ import rison from 'rison'; import { addDangerToast } from 'src/messageToasts/actions'; import { getDatasourceParameter } from 'src/modules/utils'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; export const SET_ALL_SLICES = 'SET_ALL_SLICES'; export function setAllSlices(slices) { diff --git a/superset-frontend/src/dashboard/components/PropertiesModal.jsx b/superset-frontend/src/dashboard/components/PropertiesModal.jsx index 019118591dd0b..dd17dcb7d553b 100644 --- a/superset-frontend/src/dashboard/components/PropertiesModal.jsx +++ b/superset-frontend/src/dashboard/components/PropertiesModal.jsx @@ -35,7 +35,7 @@ import FormLabel from 'src/components/FormLabel'; import { JsonEditor } from 'src/components/AsyncAceEditor'; import ColorSchemeControlWrapper from 'src/dashboard/components/ColorSchemeControlWrapper'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; import withToasts from '../../messageToasts/enhancers/withToasts'; import '../stylesheets/buttons.less'; diff --git a/superset-frontend/src/dashboard/index.jsx b/superset-frontend/src/dashboard/index.jsx index 9171847a0a7d4..86e7b89e4a021 100644 --- a/superset-frontend/src/dashboard/index.jsx +++ b/superset-frontend/src/dashboard/index.jsx @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -import { get, some } from 'lodash'; +import { some } from 'lodash'; import React from 'react'; import ReactDOM from 'react-dom'; import thunk from 'redux-thunk'; @@ -29,8 +29,9 @@ import logger from '../middleware/loggerMiddleware'; // TODO: move/abstract this +import { SupersetClient } from '@superset-ui/core'; import * as actions from '../chart/chartAction'; -import asyncEventReceived from './reducers/asyncEvents'; +import { getClientErrorObject, parseErrorJson } from '../utils/getClientErrorObject'; import App from './App'; @@ -45,8 +46,23 @@ const initState = getInitialState(bootstrapData); // TODO: move/abstract this const asyncEventMiddleware = store => next => { + const JOB_STATUS = { + PENDING: "pending", + RUNNING: "running", + ERROR: "error", + DONE: "done", + } + const LOCALSTORAGE_KEY = 'last_async_event_id'; const pollingUrl = '/api/v1/async_event/'; - const isLoading = (state) => some(state.charts, {chartStatus: "queued"}); // pass this in + const isLoading = (state) => some(state.charts, {chartStatus: 'loading'}); // TODO: pass this in + + let lastReceivedEventId; + try { + lastReceivedEventId = localStorage.getItem(LOCALSTORAGE_KEY); + console.log('************ loaded last ID from localStorage', lastReceivedEventId); + } catch(err) { + console.warn("failed to fetch last event Id from localStorage"); + } const fetchEvents = async (lastEventId) => { const url = lastEventId ? `${pollingUrl}?last_id=${lastEventId}` : pollingUrl; @@ -61,34 +77,26 @@ const asyncEventMiddleware = store => next => { return data.result; } - const fetchData = async (asyncEvent) => { - const dataUrl = `/api/v1/chart/data/${asyncEvent.cache_key}`; // TODO: determine from event type - const response = await fetch(dataUrl); - - if (!response.ok) { - const message = `An error has occured: ${response.status}`; - throw new Error(message); - } - - const data = await response.json(); - return data.result[0]; + const fetchCachedData = async (asyncEvent) => { + return SupersetClient.get({ + endpoint: asyncEvent['result_url'], + }) + .then(({ json }) => { + const result = ('result' in json) ? json.result[0] : json; + return result; + }); } - const updateMessages = async () => { + const processEvents = async () => { if (isLoading(store.getState())) { console.log('************* fetching events'); try { - const state = store.getState(); - const lastEventId = get(state, 'asyncEvents.last_event_id'); - console.log('********** lastEventId', lastEventId); - const events = await fetchEvents(lastEventId); - - // TODO: how to tie this event back to the caller component (by key?) - console.log('async events received', events); + const events = await fetchEvents(lastReceivedEventId); + console.log('******* async events received', events); // iterate over queued charts const componentsInState = store.getState()['charts']; // TODO: pass key into middleware init - const queuedComponents = _.filter(componentsInState, {chartStatus: "queued"}) // TODO: pass in + const queuedComponents = _.filter(componentsInState, {chartStatus: "loading"}) // TODO: pass in const componentsByJobId = queuedComponents.reduce((acc, item) => { acc[item['asyncJobId']] = item; return acc; @@ -102,26 +110,49 @@ const asyncEventMiddleware = store => next => { const component = componentsByJobId[asyncEvent['job_id']]; if (component) { const key = component['id']; - const componentData = await fetchData(asyncEvent); - console.log('************* dispatch', key, componentData); - store.dispatch(actions.chartUpdateSucceeded(componentData, key)); + switch(asyncEvent['status']) { + case JOB_STATUS.DONE: + try { + const componentData = await fetchCachedData(asyncEvent); + console.log('************* success dispatch', key, componentData); + store.dispatch(actions.chartUpdateSucceeded(componentData, key)); // TODO: abstract + } catch(errorResponse) { + console.log('*************** error loading data from cache', errorResponse); + getClientErrorObject(errorResponse).then(parsedResponse => { + console.log('************* failed dispatch', key, parsedResponse); + store.dispatch(actions.chartUpdateFailed(parsedResponse, key)); // TODO: abstract + }); + } + break; + case JOB_STATUS.ERROR: + console.log('************ error event received'); + const parsedEvent = parseErrorJson(asyncEvent); + console.log('************* parsedErrorEvent', parsedEvent); + store.dispatch(actions.chartUpdateFailed(parsedEvent, key)); // TODO: abstract + break; + } + } else { + console.log('component not found for job_id', asyncEvent['job_id']); + } + lastReceivedEventId = asyncEvent['id']; + try { + localStorage.setItem(LOCALSTORAGE_KEY, asyncEvent['id']); + } catch (err) { + console.warn('Localstorage not enabled'); } - // console.log('asyncEventReceived', asyncEventReceived); - store.dispatch({ type: 'ASYNC_EVENT_RECEIVED', eventId: asyncEvent['id'] }); - // store.dispatch(asyncEventReceived(asyncEvent['id'])); } } catch (err) { - // console.error(err.message); throw err; } } else { console.log('********** no components waiting for data'); } - setTimeout(updateMessages, 500); + setTimeout(processEvents, 500); }; - updateMessages(); + // TODO: call only if feature flag is enabled + processEvents(); return action => next(action); }; diff --git a/superset-frontend/src/dashboard/reducers/asyncEvents.js b/superset-frontend/src/dashboard/reducers/asyncEvents.js deleted file mode 100644 index 4b5c9bf800441..0000000000000 --- a/superset-frontend/src/dashboard/reducers/asyncEvents.js +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* eslint-disable camelcase */ - -// TODO: move this to a more generic location -export const ASYNC_EVENT_RECEIVED = 'ASYNC_EVENT_RECEIVED'; -// export function asyncEventReceived(eventId) { -// return { type: ASYNC_EVENT_RECEIVED, eventId }; -// } - -export default function asyncEventsReducer(state = {}, action) { - const actionHandlers = { - [ASYNC_EVENT_RECEIVED]() { - return { ...state, last_event_id: action.eventId }; - }, - } - - if (action.type in actionHandlers) { - return actionHandlers[action.type](); - } - return state; -} diff --git a/superset-frontend/src/dashboard/reducers/index.js b/superset-frontend/src/dashboard/reducers/index.js index b5a08bd4ca851..afc77ce6c8e3a 100644 --- a/superset-frontend/src/dashboard/reducers/index.js +++ b/superset-frontend/src/dashboard/reducers/index.js @@ -18,7 +18,6 @@ */ import { combineReducers } from 'redux'; -import asyncEvents from './asyncEvents'; import charts from '../../chart/chartReducer'; import dashboardInfo from './dashboardInfo'; import dashboardState from './dashboardState'; @@ -30,10 +29,7 @@ import messageToasts from '../../messageToasts/reducers'; const impressionId = (state = '') => state; -const asyncEvent = (state = {}, eventId) => ({ ...state, async_last_event: eventId }); - export default combineReducers({ - asyncEvents, charts, datasources, dashboardInfo, diff --git a/superset-frontend/src/datasource/ChangeDatasourceModal.tsx b/superset-frontend/src/datasource/ChangeDatasourceModal.tsx index 4f1597be9060d..81b9b8cb8c943 100644 --- a/superset-frontend/src/datasource/ChangeDatasourceModal.tsx +++ b/superset-frontend/src/datasource/ChangeDatasourceModal.tsx @@ -27,7 +27,7 @@ import { Alert, FormControl, FormControlProps } from 'react-bootstrap'; import { SupersetClient, t } from '@superset-ui/core'; import TableView from 'src/components/TableView'; import Modal from 'src/common/components/Modal'; -import getClientErrorObject from '../utils/getClientErrorObject'; +import { getClientErrorObject } from '../utils/getClientErrorObject'; import Loading from '../components/Loading'; import withToasts from '../messageToasts/enhancers/withToasts'; diff --git a/superset-frontend/src/datasource/DatasourceEditor.jsx b/superset-frontend/src/datasource/DatasourceEditor.jsx index 553066203a69d..9f12c0fef1161 100644 --- a/superset-frontend/src/datasource/DatasourceEditor.jsx +++ b/superset-frontend/src/datasource/DatasourceEditor.jsx @@ -31,7 +31,7 @@ import Loading from 'src/components/Loading'; import TableSelector from 'src/components/TableSelector'; import EditableTitle from 'src/components/EditableTitle'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import CheckboxControl from 'src/explore/components/controls/CheckboxControl'; import TextControl from 'src/explore/components/controls/TextControl'; diff --git a/superset-frontend/src/datasource/DatasourceModal.tsx b/superset-frontend/src/datasource/DatasourceModal.tsx index 0a1b1c8e2b084..daf47c25944e3 100644 --- a/superset-frontend/src/datasource/DatasourceModal.tsx +++ b/superset-frontend/src/datasource/DatasourceModal.tsx @@ -25,7 +25,7 @@ import Modal from 'src/common/components/Modal'; import AsyncEsmComponent from 'src/components/AsyncEsmComponent'; import { isFeatureEnabled, FeatureFlag } from 'src/featureFlags'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import withToasts from 'src/messageToasts/enhancers/withToasts'; const DatasourceEditor = AsyncEsmComponent(() => import('./DatasourceEditor')); diff --git a/superset-frontend/src/explore/components/DisplayQueryButton.jsx b/superset-frontend/src/explore/components/DisplayQueryButton.jsx index 285d8ac4e80e2..b66a1b0dd9e2a 100644 --- a/superset-frontend/src/explore/components/DisplayQueryButton.jsx +++ b/superset-frontend/src/explore/components/DisplayQueryButton.jsx @@ -32,7 +32,7 @@ import { styled, t } from '@superset-ui/core'; import { Menu } from 'src/common/components'; import TableView, { EmptyWrapperType } from 'src/components/TableView'; import Button from 'src/components/Button'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; import CopyToClipboard from '../../components/CopyToClipboard'; import { getChartDataRequest } from '../../chart/chartAction'; import downloadAsImage from '../../utils/downloadAsImage'; diff --git a/superset-frontend/src/explore/components/PropertiesModal.tsx b/superset-frontend/src/explore/components/PropertiesModal.tsx index 1281bc382ff3b..240d2b6840f67 100644 --- a/superset-frontend/src/explore/components/PropertiesModal.tsx +++ b/superset-frontend/src/explore/components/PropertiesModal.tsx @@ -32,7 +32,7 @@ import rison from 'rison'; import { t, SupersetClient } from '@superset-ui/core'; import Chart, { Slice } from 'src/types/Chart'; import FormLabel from 'src/components/FormLabel'; -import getClientErrorObject from '../../utils/getClientErrorObject'; +import { getClientErrorObject } from '../../utils/getClientErrorObject'; type PropertiesModalProps = { slice: Slice; diff --git a/superset-frontend/src/setup/setupApp.ts b/superset-frontend/src/setup/setupApp.ts index 1eef937bc7d73..740c97b355778 100644 --- a/superset-frontend/src/setup/setupApp.ts +++ b/superset-frontend/src/setup/setupApp.ts @@ -19,7 +19,8 @@ /* eslint global-require: 0 */ import $ from 'jquery'; import { SupersetClient } from '@superset-ui/core'; -import getClientErrorObject, { +import { + getClientErrorObject, ClientErrorObject, } from '../utils/getClientErrorObject'; import setupErrorMessages from './setupErrorMessages'; diff --git a/superset-frontend/src/utils/common.js b/superset-frontend/src/utils/common.js index 033382294eb3e..2753c0205c992 100644 --- a/superset-frontend/src/utils/common.js +++ b/superset-frontend/src/utils/common.js @@ -21,7 +21,7 @@ import { getTimeFormatter, TimeFormats, } from '@superset-ui/core'; -import getClientErrorObject from './getClientErrorObject'; +import { getClientErrorObject } from './getClientErrorObject'; // ATTENTION: If you change any constants, make sure to also change constants.py diff --git a/superset-frontend/src/utils/getClientErrorObject.ts b/superset-frontend/src/utils/getClientErrorObject.ts index 274b8b4d93cfd..a617a99a7f674 100644 --- a/superset-frontend/src/utils/getClientErrorObject.ts +++ b/superset-frontend/src/utils/getClientErrorObject.ts @@ -36,7 +36,36 @@ export type ClientErrorObject = { stacktrace?: string; } & Partial; -export default function getClientErrorObject( +export function parseErrorJson(responseObject: SupersetClientResponse | (Response & { timeout: number }) | object): ClientErrorObject { + // Backwards compatibility for old error renderers with the new error object + if (responseObject['errors'] && responseObject['errors'].length > 0) { + responseObject['error'] = responseObject['description'] = responseObject['errors'][0].message; + responseObject['link'] = responseObject['errors'][0]?.extra?.link; + } + + if (responseObject['stack']) { + responseObject = { + ...responseObject, + error: + t('Unexpected error: ') + + (responseObject['description'] || + t('(no description, click to see stack trace)')), + stacktrace: responseObject['stack'], + }; + } else if ( + responseObject['responseText'] && + responseObject['responseText'].indexOf('CSRF') >= 0 + ) { + responseObject = { + ...responseObject, + error: t(COMMON_ERR_MESSAGES.SESSION_TIMED_OUT), + }; + } + + return { ...responseObject, error: responseObject['error'] }; // typescript madness +} + +export function getClientErrorObject( response: SupersetClientResponse | (Response & { timeout: number }) | string, ): Promise { // takes a SupersetClientResponse as input, attempts to read response as Json if possible, @@ -54,33 +83,8 @@ export default function getClientErrorObject( .clone() .json() .then(errorJson => { - let error = { ...responseObject, ...errorJson }; - - // Backwards compatibility for old error renderers with the new error object - if (error.errors && error.errors.length > 0) { - error.error = error.description = error.errors[0].message; - error.link = error.errors[0]?.extra?.link; - } - - if (error.stack) { - error = { - ...error, - error: - t('Unexpected error: ') + - (error.description || - t('(no description, click to see stack trace)')), - stacktrace: error.stack, - }; - } else if ( - error.responseText && - error.responseText.indexOf('CSRF') >= 0 - ) { - error = { - ...error, - error: t(COMMON_ERR_MESSAGES.SESSION_TIMED_OUT), - }; - } - resolve(error); + const error = { ...responseObject, ...errorJson }; + resolve(parseErrorJson(error)); }) .catch(() => { // fall back to reading as text diff --git a/superset-frontend/src/views/CRUD/annotation/AnnotationList.tsx b/superset-frontend/src/views/CRUD/annotation/AnnotationList.tsx index dba16e34659a4..d1b7ef535561a 100644 --- a/superset-frontend/src/views/CRUD/annotation/AnnotationList.tsx +++ b/superset-frontend/src/views/CRUD/annotation/AnnotationList.tsx @@ -29,7 +29,7 @@ import ConfirmStatusChange from 'src/components/ConfirmStatusChange'; import DeleteModal from 'src/components/DeleteModal'; import ListView, { ListViewProps } from 'src/components/ListView'; import SubMenu, { SubMenuProps } from 'src/components/Menu/SubMenu'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import withToasts from 'src/messageToasts/enhancers/withToasts'; import { IconName } from 'src/components/Icon'; import { useListViewResource } from 'src/views/CRUD/hooks'; diff --git a/superset-frontend/src/views/CRUD/data/database/DatabaseModal.tsx b/superset-frontend/src/views/CRUD/data/database/DatabaseModal.tsx index 1fa7f7bd13a0c..d53f5f1a7f669 100644 --- a/superset-frontend/src/views/CRUD/data/database/DatabaseModal.tsx +++ b/superset-frontend/src/views/CRUD/data/database/DatabaseModal.tsx @@ -21,7 +21,7 @@ import { styled, t, SupersetClient } from '@superset-ui/core'; import InfoTooltip from 'src/common/components/InfoTooltip'; import { useSingleViewResource } from 'src/views/CRUD/hooks'; import withToasts from 'src/messageToasts/enhancers/withToasts'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import Icon from 'src/components/Icon'; import Modal from 'src/common/components/Modal'; import Tabs from 'src/common/components/Tabs'; diff --git a/superset-frontend/src/views/CRUD/utils.tsx b/superset-frontend/src/views/CRUD/utils.tsx index 675e2b3feac6c..4c6aa1a486b65 100644 --- a/superset-frontend/src/views/CRUD/utils.tsx +++ b/superset-frontend/src/views/CRUD/utils.tsx @@ -25,7 +25,7 @@ import { } from '@superset-ui/core'; import Chart from 'src/types/Chart'; import rison from 'rison'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import { FetchDataConfig } from 'src/components/ListView'; import { Dashboard } from './types'; diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 4427d7a3f6281..42f6a43415552 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -52,6 +52,7 @@ def run(self, **kwargs: Any) -> Dict[str, Any]: except CacheLoadError: raise ChartDataCacheLoadError() + # TODO: QueryContext should support SIP-40 style errors for query in payload["queries"]: if query.get("error"): raise ChartDataQueryFailedError(f"Error: {query['error']}") diff --git a/superset/charts/schemas.py b/superset/charts/schemas.py index b3e8c98d36139..351b7babeea18 100644 --- a/superset/charts/schemas.py +++ b/superset/charts/schemas.py @@ -1111,10 +1111,10 @@ class ChartDataAsyncResponseSchema(Schema): description="Unique session async channel ID", allow_none=False, ) job_id = fields.String(description="Unique async job ID", allow_none=False,) + user_id = fields.String(description="Requesting user ID", allow_none=True,) status = fields.String(description="Status value for async job", allow_none=False,) - msg = fields.String(description="User-facing message text", allow_none=True,) - cache_key = fields.String( - description="Unique cache key for async QueryContext", allow_none=False, + result_url = fields.String( + description="Unique result URL for fetching async query data", allow_none=False, ) diff --git a/superset/exceptions.py b/superset/exceptions.py index 839a90f16a98b..fcf897f7bde50 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from flask_babel import gettext as _ @@ -63,6 +63,14 @@ def __init__( self.payload = payload +class SupersetVizException(SupersetException): + status = 400 + + def __init__(self, errors: List[SupersetError] = []) -> None: + super(SupersetVizException, self).__init__(str(errors)) + self.errors = errors + + class NoDataException(SupersetException): status = 400 diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index 49f330fa493d3..e11a0df1ea7d5 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -21,6 +21,7 @@ from flask import current_app from superset import app +from superset.exceptions import SupersetException, SupersetVizException from superset.extensions import async_query_manager, cache_manager, celery_app from superset.utils.cache import generate_cache_key, set_and_log_cache from superset.views.utils import get_datasource_info, get_viz @@ -44,15 +45,17 @@ def load_chart_data_into_cache( command = ChartDataCommand() command.set_query_context(form_data) result = command.run(cache=True) + cache_key = result["cache_key"] + result_url = f"/api/v1/chart/data/{cache_key}" async_query_manager.update_job( - job_metadata, - async_query_manager.STATUS_DONE, - cache_key=result["cache_key"], + job_metadata, async_query_manager.STATUS_DONE, result_url=result_url, ) except Exception as exc: - msg = exc.message if hasattr(exc, "message") else str(exc) # type: ignore + # TODO: QueryContext should support SIP-40 style errors + error = exc.message if hasattr(exc, "message") else str(exc) # type: ignore + errors = [{"message": error}] async_query_manager.update_job( - job_metadata, async_query_manager.STATUS_ERROR, msg=msg + job_metadata, async_query_manager.STATUS_ERROR, errors=errors ) raise exc @@ -77,7 +80,9 @@ def load_explore_json_into_cache( force=force, ) # run query & cache results - viz_obj.get_payload() + payload = viz_obj.get_payload() + if viz_obj.has_error(payload): + raise SupersetVizException(errors=payload["errors"]) # cache form_data for async retrieval cache_value = {"form_data": form_data, "response_type": response_type} @@ -85,14 +90,21 @@ def load_explore_json_into_cache( cache_value, "ejr-" ) # ejr: explore_json request set_and_log_cache(cache_manager.cache, cache_key, cache_value) - + result_url = f"/superset/explore_json/data/{cache_key}" async_query_manager.update_job( - job_metadata, async_query_manager.STATUS_DONE, cache_key=cache_key, + job_metadata, async_query_manager.STATUS_DONE, result_url=result_url, ) except Exception as exc: - msg = exc.message if hasattr(exc, "message") else str(exc) # type: ignore + if isinstance(exc, SupersetVizException): + errors = exc.errors + else: + error = ( + exc.message if hasattr(exc, "message") else str(exc) # type: ignore + ) + errors = [error] + async_query_manager.update_job( - job_metadata, async_query_manager.STATUS_ERROR, msg=msg + job_metadata, async_query_manager.STATUS_ERROR, errors=errors ) raise exc diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index a8b8392934535..b4d72dbb34ccb 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -133,8 +133,8 @@ def _build_job_metadata( "job_id": job_id, "user_id": session["user_id"] if "user_id" in session else None, "status": kwargs["status"], - "msg": kwargs["msg"] if "msg" in kwargs else None, - "cache_key": kwargs["cache_key"] if "cache_key" in kwargs else None, + "errors": kwargs["errors"] if "errors" in kwargs else [], + "result_url": kwargs["result_url"] if "result_url" in kwargs else None, } def read_events( diff --git a/superset/viz.py b/superset/viz.py index 2eeb03cd26b6a..8f05fa8f80b53 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -541,9 +541,9 @@ def get_df_payload( if self.force_cached and not is_loaded: logger.warning( - f"force_cached (viz.py): value not found for key {cache_key}" + f"force_cached (viz.py): value not found for cache key {cache_key}" ) - raise CacheLoadError() + raise CacheLoadError(_("Cached value not found")) if query_obj and not is_loaded: try: @@ -625,13 +625,15 @@ def json_dumps(self, obj: Any, sort_keys: bool = False) -> str: obj, default=utils.json_int_dttm_ser, ignore_nan=True, sort_keys=sort_keys ) - def payload_json_and_has_error(self, payload: VizPayload) -> Tuple[str, bool]: - has_error = ( + def has_error(self, payload: VizPayload) -> bool: + return ( payload.get("status") == utils.QueryStatus.FAILED or payload.get("error") is not None or bool(payload.get("errors")) ) - return self.json_dumps(payload), has_error + + def payload_json_and_has_error(self, payload: VizPayload) -> Tuple[str, bool]: + return self.json_dumps(payload), self.has_error(payload) @property def data(self) -> Dict[str, Any]: From 3b41f16cbfd8099660a76509eb4fdc55c81c4203 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 30 Nov 2020 00:08:20 -0800 Subject: [PATCH 12/42] Abstract asyncEvent middleware --- superset-frontend/src/dashboard/index.jsx | 129 +----------------- .../src/middleware/asyncEvent.js | 129 ++++++++++++++++++ 2 files changed, 136 insertions(+), 122 deletions(-) create mode 100644 superset-frontend/src/middleware/asyncEvent.js diff --git a/superset-frontend/src/dashboard/index.jsx b/superset-frontend/src/dashboard/index.jsx index 86e7b89e4a021..8ebba38058f5e 100644 --- a/superset-frontend/src/dashboard/index.jsx +++ b/superset-frontend/src/dashboard/index.jsx @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -import { some } from 'lodash'; +import { filter } from 'lodash'; import React from 'react'; import ReactDOM from 'react-dom'; import thunk from 'redux-thunk'; @@ -25,137 +25,22 @@ import { initFeatureFlags } from 'src/featureFlags'; import { initEnhancer } from '../reduxUtils'; import getInitialState from './reducers/getInitialState'; import rootReducer from './reducers/index'; +import initAsyncEvents from '../middleware/asyncEvent'; import logger from '../middleware/loggerMiddleware'; - - -// TODO: move/abstract this -import { SupersetClient } from '@superset-ui/core'; import * as actions from '../chart/chartAction'; -import { getClientErrorObject, parseErrorJson } from '../utils/getClientErrorObject'; - import App from './App'; -// TODO: re-enable -// import { catch } from 'fetch-mock'; const appContainer = document.getElementById('app'); const bootstrapData = JSON.parse(appContainer.getAttribute('data-bootstrap')); initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); - -// TODO: move/abstract this -const asyncEventMiddleware = store => next => { - const JOB_STATUS = { - PENDING: "pending", - RUNNING: "running", - ERROR: "error", - DONE: "done", - } - const LOCALSTORAGE_KEY = 'last_async_event_id'; - const pollingUrl = '/api/v1/async_event/'; - const isLoading = (state) => some(state.charts, {chartStatus: 'loading'}); // TODO: pass this in - - let lastReceivedEventId; - try { - lastReceivedEventId = localStorage.getItem(LOCALSTORAGE_KEY); - console.log('************ loaded last ID from localStorage', lastReceivedEventId); - } catch(err) { - console.warn("failed to fetch last event Id from localStorage"); - } - - const fetchEvents = async (lastEventId) => { - const url = lastEventId ? `${pollingUrl}?last_id=${lastEventId}` : pollingUrl; - const response = await fetch(url); - - if (!response.ok) { - const message = `An error has occured: ${response.status}`; - throw new Error(message); - } - - const data = await response.json(); - return data.result; - } - - const fetchCachedData = async (asyncEvent) => { - return SupersetClient.get({ - endpoint: asyncEvent['result_url'], - }) - .then(({ json }) => { - const result = ('result' in json) ? json.result[0] : json; - return result; - }); - } - - const processEvents = async () => { - if (isLoading(store.getState())) { - console.log('************* fetching events'); - try { - const events = await fetchEvents(lastReceivedEventId); - console.log('******* async events received', events); - - // iterate over queued charts - const componentsInState = store.getState()['charts']; // TODO: pass key into middleware init - const queuedComponents = _.filter(componentsInState, {chartStatus: "loading"}) // TODO: pass in - const componentsByJobId = queuedComponents.reduce((acc, item) => { - acc[item['asyncJobId']] = item; - return acc; - }, {}); - - console.log('componentsByJobId', componentsByJobId); - - for (const asyncEvent of events) { - console.log('async event', asyncEvent); - console.log('job id', asyncEvent['job_id']); - const component = componentsByJobId[asyncEvent['job_id']]; - if (component) { - const key = component['id']; - switch(asyncEvent['status']) { - case JOB_STATUS.DONE: - try { - const componentData = await fetchCachedData(asyncEvent); - console.log('************* success dispatch', key, componentData); - store.dispatch(actions.chartUpdateSucceeded(componentData, key)); // TODO: abstract - } catch(errorResponse) { - console.log('*************** error loading data from cache', errorResponse); - getClientErrorObject(errorResponse).then(parsedResponse => { - console.log('************* failed dispatch', key, parsedResponse); - store.dispatch(actions.chartUpdateFailed(parsedResponse, key)); // TODO: abstract - }); - } - break; - case JOB_STATUS.ERROR: - console.log('************ error event received'); - const parsedEvent = parseErrorJson(asyncEvent); - console.log('************* parsedErrorEvent', parsedEvent); - store.dispatch(actions.chartUpdateFailed(parsedEvent, key)); // TODO: abstract - break; - } - } else { - console.log('component not found for job_id', asyncEvent['job_id']); - } - lastReceivedEventId = asyncEvent['id']; - try { - localStorage.setItem(LOCALSTORAGE_KEY, asyncEvent['id']); - } catch (err) { - console.warn('Localstorage not enabled'); - } - } - } catch (err) { - throw err; - } - } else { - console.log('********** no components waiting for data'); - } - - setTimeout(processEvents, 500); - }; - - // TODO: call only if feature flag is enabled - processEvents(); - - return action => next(action); -}; +const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents: (state) => filter(state.charts, {chartStatus: 'loading'}), + successAction: (componentId, componentData) => actions.chartUpdateSucceeded(componentData, componentId), + errorAction: (componentId, response) => actions.chartUpdateFailed(response, componentId) +}); const store = createStore( rootReducer, diff --git a/superset-frontend/src/middleware/asyncEvent.js b/superset-frontend/src/middleware/asyncEvent.js new file mode 100644 index 0000000000000..de4f2199cf29b --- /dev/null +++ b/superset-frontend/src/middleware/asyncEvent.js @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { SupersetClient } from '@superset-ui/core'; +import { isFeatureEnabled, FeatureFlag } from '../featureFlags'; +import { getClientErrorObject, parseErrorJson } from '../utils/getClientErrorObject'; + +const initAsyncEvents = (options) => { + const POLLING_DELAY = 500; + const { getPendingComponents, successAction, errorAction } = options; + + const middleware = store => next => { + const JOB_STATUS = { + PENDING: "pending", + RUNNING: "running", + ERROR: "error", + DONE: "done", + } + const LOCALSTORAGE_KEY = 'last_async_event_id'; + const pollingUrl = '/api/v1/async_event/'; + let lastReceivedEventId; + + try { + lastReceivedEventId = localStorage.getItem(LOCALSTORAGE_KEY); + } catch(err) { + console.warn("failed to fetch last event Id from localStorage"); + } + + const fetchEvents = async (lastEventId) => { + const url = lastEventId ? `${pollingUrl}?last_id=${lastEventId}` : pollingUrl; + const response = await fetch(url); + + if (!response.ok) { + const message = `An error has occured: ${response.status}`; + throw new Error(message); + } + + const data = await response.json(); + return data.result; + } + + const fetchCachedData = async (asyncEvent) => { + const { json } = await SupersetClient.get({ + endpoint: asyncEvent['result_url'], + }); + const result = ('result' in json) ? json.result[0] : json; + return result; + } + + const setLastId = asyncEvent => { + lastReceivedEventId = asyncEvent['id']; + try { + localStorage.setItem(LOCALSTORAGE_KEY, lastReceivedEventId); + } catch (err) { + console.warn('Error saving event ID to localStorage', err); + } + } + + const processEvents = async () => { + const state = store.getState(); + const queuedComponents = getPendingComponents(state); + if (queuedComponents.length) { + try { + const events = await fetchEvents(lastReceivedEventId); + if (!events || !events.length) { + return setTimeout(processEvents, POLLING_DELAY); + } + const componentsByJobId = queuedComponents.reduce((acc, item) => { + acc[item['asyncJobId']] = item; + return acc; + }, {}); + + for (const asyncEvent of events) { + const component = componentsByJobId[asyncEvent['job_id']]; + if (!component) { + console.warn('component not found for job_id', asyncEvent['job_id']); + continue; + } + const componentId = component['id']; + switch(asyncEvent['status']) { + case JOB_STATUS.DONE: + try { + const componentData = await fetchCachedData(asyncEvent); + store.dispatch(successAction(componentId, componentData)); + } catch(errorResponse) { + getClientErrorObject(errorResponse).then(parsedResponse => { + store.dispatch(errorAction(componentId, parsedResponse)); + }); + } + break; + case JOB_STATUS.ERROR: + const parsedEvent = parseErrorJson(asyncEvent); + store.dispatch(errorAction(componentId, parsedEvent)); + break; + } + setLastId(asyncEvent); + } + } catch (err) { + console.error(err); + } + } + + setTimeout(processEvents, POLLING_DELAY); + }; + + if(isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) processEvents(); + + return action => next(action); + }; + + return middleware; +} + +export default initAsyncEvents; From 49b6b52411813c2064acb66ccc0a17ea1c416413 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 30 Nov 2020 12:42:23 -0800 Subject: [PATCH 13/42] Async chart loading for Explore --- superset-frontend/src/explore/index.jsx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/superset-frontend/src/explore/index.jsx b/superset-frontend/src/explore/index.jsx index 25a704757a523..1cebfd43bd7ac 100644 --- a/superset-frontend/src/explore/index.jsx +++ b/superset-frontend/src/explore/index.jsx @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ +import { filter } from 'lodash'; import React from 'react'; import ReactDOM from 'react-dom'; import { createStore, applyMiddleware, compose } from 'redux'; @@ -25,6 +26,8 @@ import { initFeatureFlags } from '../featureFlags'; import { initEnhancer } from '../reduxUtils'; import getInitialState from './reducers/getInitialState'; import rootReducer from './reducers/index'; +import initAsyncEvents from '../middleware/asyncEvent'; +import * as actions from '../chart/chartAction'; import App from './App'; @@ -35,10 +38,17 @@ const bootstrapData = JSON.parse( initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); +const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents: (state) => filter(state.charts, {chartStatus: 'loading'}), + successAction: (componentId, componentData) => actions.chartUpdateSucceeded(componentData, componentId), + errorAction: (componentId, response) => actions.chartUpdateFailed(response, componentId) +}); + + const store = createStore( rootReducer, initState, - compose(applyMiddleware(thunk, logger), initEnhancer(false)), + compose(applyMiddleware(thunk, logger, asyncEventMiddleware), initEnhancer(false)), ); ReactDOM.render(, document.getElementById('app')); From e2dc30eab4f315f110a383b516cf70bd622496fe Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 30 Nov 2020 15:25:59 -0800 Subject: [PATCH 14/42] Pylint fixes --- superset/app.py | 5 +- superset/async_events/api.py | 2 +- superset/charts/api.py | 105 ++++++++++---------------- superset/charts/commands/data.py | 9 ++- superset/common/query_context.py | 7 +- superset/common/query_object.py | 3 +- superset/exceptions.py | 2 +- superset/tasks/async_queries.py | 8 +- superset/utils/async_query_manager.py | 62 ++++++++------- superset/utils/cache.py | 9 +-- superset/views/core.py | 2 +- 11 files changed, 98 insertions(+), 116 deletions(-) diff --git a/superset/app.py b/superset/app.py index fb4590917af60..03d7d62b7ca01 100644 --- a/superset/app.py +++ b/superset/app.py @@ -651,7 +651,10 @@ def configure_async_queries(self) -> None: or self.config["DATA_CACHE_CONFIG"]["CACHE_TYPE"] == "null" ): raise Exception( - """Cache backends (CACHE_CONFIG, DATA_CACHE_CONFIG) must be configured and non-null in order to enable async queries """ + """ + Cache backends (CACHE_CONFIG, DATA_CACHE_CONFIG) must be configured + and non-null in order to enable async queries + """ ) async_query_manager.init_app(self.flask_app) diff --git a/superset/async_events/api.py b/superset/async_events/api.py index 91d4e24541c6f..5676038802d32 100644 --- a/superset/async_events/api.py +++ b/superset/async_events/api.py @@ -21,7 +21,7 @@ from flask_appbuilder.api import BaseApi, safe from flask_appbuilder.security.decorators import permission_name, protect -from superset.extensions import async_query_manager, cache_manager, db, event_logger +from superset.extensions import async_query_manager, event_logger from superset.utils.async_query_manager import AsyncQueryTokenException logger = logging.getLogger(__name__) diff --git a/superset/charts/api.py b/superset/charts/api.py index 6c1107e5e317f..1de930bfbf7e2 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -66,7 +66,7 @@ from superset.commands.importers.v1.utils import remove_root from superset.constants import RouteMethod from superset.exceptions import SupersetSecurityException -from superset.extensions import async_query_manager, event_logger +from superset.extensions import event_logger from superset.models.slice import Slice from superset.tasks.thumbnails import cache_chart_thumbnail from superset.utils.async_query_manager import AsyncQueryTokenException @@ -453,6 +453,41 @@ def bulk_delete(self, **kwargs: Any) -> Response: except ChartBulkDeleteFailedError as ex: return self.response_422(message=str(ex)) + def get_data_response(self, command: ChartDataCommand) -> Response: + try: + result = command.run() + except ChartDataCacheLoadError as exc: + return self.response_400(message=exc.message) + except ChartDataQueryFailedError as exc: + return self.response_400(message=exc.message) + + result_format = result["query_context"].result_format + response = self.response_400( + message=f"Unsupported result_format: {result_format}" + ) + + if result_format == ChartDataResultFormat.CSV: + # return the first result + data = result["queries"][0]["data"] + response = CsvResponse( + data, + status=200, + headers=generate_download_headers("csv"), + mimetype="application/csv", + ) + + if result_format == ChartDataResultFormat.JSON: + response_data = simplejson.dumps( + {"result": result["queries"]}, + default=json_int_dttm_ser, + ignore_nan=True, + ) + resp = make_response(response_data, 200) + resp.headers["Content-Type"] = "application/json; charset=utf-8" + response = resp + + return response + @expose("/data", methods=["POST"]) @protect() @safe @@ -524,38 +559,7 @@ def data(self) -> Response: result = command.run_async() return self.response(202, **result) - # TODO: DRY - try: - result = command.run() - except ChartDataQueryFailedError as exc: - return self.response_400(message=exc.message) - - result_format = result["query_context"].result_format - response = self.response_400( - message=f"Unsupported result_format: {result_format}" - ) - - if result_format == ChartDataResultFormat.CSV: - # return the first result - data = result["queries"][0]["data"] - response = CsvResponse( - data, - status=200, - headers=generate_download_headers("csv"), - mimetype="application/csv", - ) - - if result_format == ChartDataResultFormat.JSON: - response_data = simplejson.dumps( - {"result": result["queries"]}, - default=json_int_dttm_ser, - ignore_nan=True, - ) - resp = make_response(response_data, 200) - resp.headers["Content-Type"] = "application/json; charset=utf-8" - response = resp - - return response + return self.get_data_response(command) @expose("/data/", methods=["GET"]) @event_logger.log_this @@ -607,40 +611,7 @@ def data_from_cache(self, cache_key: str) -> Response: logger.info(exc) return self.response_401() - # TODO: DRY - try: - result = command.run() - except ChartDataCacheLoadError as exc: - return self.response_400(message=exc.message) - except ChartDataQueryFailedError as exc: - return self.response_400(message=exc.message) - - result_format = result["query_context"].result_format - response = self.response_400( - message=f"Unsupported result_format: {result_format}" - ) - - if result_format == ChartDataResultFormat.CSV: - # return the first result - data = result["queries"][0]["data"] - response = CsvResponse( - data, - status=200, - headers=generate_download_headers("csv"), - mimetype="application/csv", - ) - - if result_format == ChartDataResultFormat.JSON: - response_data = simplejson.dumps( - {"result": result["queries"]}, - default=json_int_dttm_ser, - ignore_nan=True, - ) - resp = make_response(response_data, 200) - resp.headers["Content-Type"] = "application/json; charset=utf-8" - response = resp - - return response + return self.get_data_response(command) @expose("//cache_screenshot/", methods=["GET"]) @protect() diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 42f6a43415552..97365d89715ae 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import logging -from typing import Any, Dict, Optional +from typing import Any, Dict from flask import Request from marshmallow import ValidationError @@ -42,7 +42,8 @@ def __init__(self) -> None: self._async_channel_id: str def run(self, **kwargs: Any) -> Dict[str, Any]: - # caching is handled in query_context.get_df_payload (also evals `force` property) + # caching is handled in query_context.get_df_payload + # (also evals `force` property) cache_query_context = kwargs["cache"] if "cache" in kwargs else False force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False try: @@ -88,7 +89,9 @@ def validate_request(self, request: Request) -> None: jwt_data = async_query_manager.parse_jwt_from_request(request) self._async_channel_id = jwt_data["channel"] - def load_query_context_from_cache(self, cache_key: str) -> Dict[str, Any]: + def load_query_context_from_cache( # pylint: disable=no-self-use + self, cache_key: str + ) -> Dict[str, Any]: cache_value = cache.get(cache_key) if not cache_value: raise ChartDataCacheLoadError("Cached data not found") diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 95fd188d85c72..d53e362392128 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -15,10 +15,9 @@ # specific language governing permissions and limitations # under the License. import copy -import hashlib import logging import math -from datetime import datetime, timedelta +from datetime import timedelta from typing import Any, cast, ClassVar, Dict, List, Optional, Union import numpy as np @@ -201,7 +200,7 @@ def get_payload(self, **kwargs: Any) -> Dict[str, Any]: ) force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False - """Get all the payloads from the QueryObjects""" + # Get all the payloads from the QueryObjects query_results = [ self.get_single_payload(query_object, force_cached=force_cached) for query_object in self.queries @@ -296,7 +295,7 @@ def get_df_payload( # pylint: disable=too-many-statements if force_cached and not is_loaded: logger.warning( - f"force_cached (QueryContext): value not found for key {cache_key}" + "force_cached (QueryContext): value not found for key %s", cache_key ) raise CacheLoadError() diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 4692860b47482..3277bbbbcb76f 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -251,7 +251,8 @@ def exec_post_processing(self, df: DataFrame) -> DataFrame: :param df: DataFrame returned from database model. :return: new DataFrame to which all post processing operations have been applied - :raises QueryObjectValidationError: If the post processing operation in incorrect + :raises QueryObjectValidationError: If the post processing operation + is incorrect """ for post_process in self.post_processing: operation = post_process.get("operation") diff --git a/superset/exceptions.py b/superset/exceptions.py index f84efb2f2d6a9..52c19c3a02ad6 100644 --- a/superset/exceptions.py +++ b/superset/exceptions.py @@ -68,7 +68,7 @@ def __init__( class SupersetVizException(SupersetException): status = 400 - def __init__(self, errors: List[SupersetError] = []) -> None: + def __init__(self, errors: List[SupersetError]) -> None: super(SupersetVizException, self).__init__(str(errors)) self.errors = errors diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index e11a0df1ea7d5..b89388148b88a 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -21,7 +21,7 @@ from flask import current_app from superset import app -from superset.exceptions import SupersetException, SupersetVizException +from superset.exceptions import SupersetVizException from superset.extensions import async_query_manager, cache_manager, celery_app from superset.utils.cache import generate_cache_key, set_and_log_cache from superset.views.utils import get_datasource_info, get_viz @@ -52,7 +52,7 @@ def load_chart_data_into_cache( ) except Exception as exc: # TODO: QueryContext should support SIP-40 style errors - error = exc.message if hasattr(exc, "message") else str(exc) # type: ignore + error = exc.message if hasattr(exc, "message") else str(exc) # type: ignore # pylint: disable=no-member errors = [{"message": error}] async_query_manager.update_job( job_metadata, async_query_manager.STATUS_ERROR, errors=errors @@ -96,10 +96,10 @@ def load_explore_json_into_cache( ) except Exception as exc: if isinstance(exc, SupersetVizException): - errors = exc.errors + errors = exc.errors # pylint: disable=no-member else: error = ( - exc.message if hasattr(exc, "message") else str(exc) # type: ignore + exc.message if hasattr(exc, "message") else str(exc) # type: ignore # pylint: disable=no-member ) errors = [error] diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index b4d72dbb34ccb..ce5a304214cf9 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -34,6 +34,23 @@ class AsyncQueryJobException(Exception): pass +def build_job_metadata(channel_id: str, job_id: str, **kwargs: Any) -> Dict[str, Any]: + return { + "channel_id": channel_id, + "job_id": job_id, + "user_id": session["user_id"] if "user_id" in session else None, + "status": kwargs["status"], + "errors": kwargs["errors"] if "errors" in kwargs else [], + "result_url": kwargs["result_url"] if "result_url" in kwargs else None, + } + + +def parse_event(event_data: Tuple[str, Dict[str, Any]]) -> Dict[str, Any]: + event_id = event_data[0] + event_payload = event_data[1]["data"] + return {"id": event_id, **json.loads(event_payload)} + + class AsyncQueryManager: MAX_EVENT_COUNT = 100 STATUS_PENDING = "pending" @@ -71,7 +88,9 @@ def init_app(self, app: Flask) -> None: self._jwt_secret = config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"] @app.after_request - def validate_session(response: Response) -> Response: + def validate_session( # pylint: disable=unused-variable + response: Response, + ) -> Response: reset_token = False user_id = session["user_id"] if "user_id" in session else None @@ -123,32 +142,17 @@ def parse_jwt_from_request(self, request: Request) -> Dict[str, Any]: def init_job(self, channel_id: str) -> Dict[str, Any]: job_id = str(uuid.uuid4()) - return self._build_job_metadata(channel_id, job_id, status=self.STATUS_PENDING) - - def _build_job_metadata( - self, channel_id: str, job_id: str, **kwargs: Any - ) -> Dict[str, Any]: - return { - "channel_id": channel_id, - "job_id": job_id, - "user_id": session["user_id"] if "user_id" in session else None, - "status": kwargs["status"], - "errors": kwargs["errors"] if "errors" in kwargs else [], - "result_url": kwargs["result_url"] if "result_url" in kwargs else None, - } + return build_job_metadata(channel_id, job_id, status=self.STATUS_PENDING) def read_events( self, channel: str, last_id: Optional[str] ) -> List[Optional[Dict[str, Any]]]: stream_name = f"{self._stream_prefix}{channel}" start_id = last_id if last_id else "-" - results = self._redis.xrange(stream_name, start_id, "+", self.MAX_EVENT_COUNT) # type: ignore - return [] if not results else list(map(self.parse_event, results)) - - def parse_event(self, event_data: Tuple[str, Dict[str, Any]]) -> Dict[str, Any]: - event_id = event_data[0] - event_payload = event_data[1]["data"] - return {"id": event_id, **json.loads(event_payload)} + results = self._redis.xrange( # type: ignore + stream_name, start_id, "+", self.MAX_EVENT_COUNT + ) + return [] if not results else list(map(parse_event, results)) def update_job( self, job_metadata: Dict[str, Any], status: str, **kwargs: Any @@ -162,13 +166,15 @@ def update_job( updates = {"status": status, **kwargs} event_data = {"data": json.dumps({**job_metadata, **updates})} - logger.info( - f"********** logging event data to stream {self._stream_prefix}{job_metadata['channel_id']}" - ) - logger.info(event_data) - full_stream_name = f"{self._stream_prefix}full" scoped_stream_name = f"{self._stream_prefix}{job_metadata['channel_id']}" - self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) # type: ignore - self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) # type: ignore + logger.info("********** logging event data to stream %s", scoped_stream_name) + logger.info(event_data) + + self._redis.xadd( # type: ignore + scoped_stream_name, event_data, "*", self._stream_limit + ) + self._redis.xadd( # type: ignore + full_stream_name, event_data, "*", self._stream_limit_firehose + ) diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 158c5e2fe5bc1..0076d989afb5b 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -25,7 +25,6 @@ from flask_caching import Cache from werkzeug.wrappers.etag import ETagResponseMixin -from superset import utils from superset.extensions import cache_manager from superset.stats_logger import BaseStatsLogger from superset.utils.core import json_int_dttm_ser @@ -41,8 +40,8 @@ def json_dumps(obj: Any, sort_keys: bool = False) -> str: def generate_cache_key(values_dict: Dict[str, Any], key_prefix: str = "") -> str: json_data = json_dumps(values_dict, sort_keys=True) - hash = hashlib.md5(json_data.encode("utf-8")).hexdigest() - return f"{key_prefix}{hash}" + hash_str = hashlib.md5(json_data.encode("utf-8")).hexdigest() + return f"{key_prefix}{hash_str}" def set_and_log_cache( @@ -57,10 +56,10 @@ def set_and_log_cache( value = {**cache_value, "dttm": dttm} cache_instance.set(cache_key, value, timeout=timeout) stats_logger.incr("set_cache_key") - except Exception as ex: + except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons - logger.warning("Could not cache key {}".format(cache_key)) + logger.warning("Could not cache key %s", cache_key) logger.exception(ex) diff --git a/superset/views/core.py b/superset/views/core.py index 38b2f3f35a018..40190680e7dd3 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -77,7 +77,7 @@ SupersetSecurityException, SupersetTimeoutException, ) -from superset.extensions import async_query_manager, cache_manager, security_manager +from superset.extensions import async_query_manager, cache_manager from superset.jinja_context import get_template_processor from superset.models.core import Database, FavStar, Log from superset.models.dashboard import Dashboard From 276c84e870455c4192f530705f3f8e3bf309c658 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 1 Dec 2020 10:12:27 -0800 Subject: [PATCH 15/42] asyncEvent middleware -> TypeScript, JS linting --- superset-frontend/src/chart/chartAction.js | 57 +++--- superset-frontend/src/chart/chartReducer.js | 2 +- superset-frontend/src/dashboard/index.jsx | 14 +- superset-frontend/src/explore/index.jsx | 15 +- .../src/middleware/asyncEvent.js | 129 ------------- .../src/middleware/asyncEvent.ts | 179 ++++++++++++++++++ .../src/utils/getClientErrorObject.ts | 33 ++-- 7 files changed, 243 insertions(+), 186 deletions(-) delete mode 100644 superset-frontend/src/middleware/asyncEvent.js create mode 100644 superset-frontend/src/middleware/asyncEvent.ts diff --git a/superset-frontend/src/chart/chartAction.js b/superset-frontend/src/chart/chartAction.js index b873abf4b7ba6..70000517366e8 100644 --- a/superset-frontend/src/chart/chartAction.js +++ b/superset-frontend/src/chart/chartAction.js @@ -361,39 +361,38 @@ export function exploreJSON( const chartDataRequestCaught = chartDataRequest .then(response => { - if(isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) { + if (isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) { // deal with getChartDataRequest transforming the response data - const result = ('result' in response) ? response['result'][0] : response; + const result = 'result' in response ? response.result[0] : response; return dispatch(chartUpdateQueued(result, key)); + } - } else { - // new API returns an object with an array of restults - // problem: response holds a list of results, when before we were just getting one result. - // How to make the entire app compatible with multiple results? - // For now just use the first result. - const result = response.result[0]; + // new API returns an object with an array of restults + // problem: response holds a list of results, when before we were just getting one result. + // How to make the entire app compatible with multiple results? + // For now just use the first result. + const result = response.result[0]; - dispatch( - logEvent(LOG_ACTIONS_LOAD_CHART, { - slice_id: key, - applied_filters: result.applied_filters, - is_cached: result.is_cached, - force_refresh: force, - row_count: result.rowcount, - datasource: formData.datasource, - start_offset: logStart, - ts: new Date().getTime(), - duration: Logger.getTimestamp() - logStart, - has_extra_filters: - formData.extra_filters && formData.extra_filters.length > 0, - viz_type: formData.viz_type, - data_age: result.is_cached - ? moment(new Date()).diff(moment.utc(result.cached_dttm)) - : null, - }), - ); - return dispatch(chartUpdateSucceeded(result, key)); - } + dispatch( + logEvent(LOG_ACTIONS_LOAD_CHART, { + slice_id: key, + applied_filters: result.applied_filters, + is_cached: result.is_cached, + force_refresh: force, + row_count: result.rowcount, + datasource: formData.datasource, + start_offset: logStart, + ts: new Date().getTime(), + duration: Logger.getTimestamp() - logStart, + has_extra_filters: + formData.extra_filters && formData.extra_filters.length > 0, + viz_type: formData.viz_type, + data_age: result.is_cached + ? moment(new Date()).diff(moment.utc(result.cached_dttm)) + : null, + }), + ); + return dispatch(chartUpdateSucceeded(result, key)); }) .catch(response => { const appendErrorLog = (errorDetails, isCached) => { diff --git a/superset-frontend/src/chart/chartReducer.js b/superset-frontend/src/chart/chartReducer.js index 7efda628a59d5..fc28e99d2cac5 100644 --- a/superset-frontend/src/chart/chartReducer.js +++ b/superset-frontend/src/chart/chartReducer.js @@ -74,7 +74,7 @@ export default function chartReducer(charts = {}, action) { [actions.CHART_UPDATE_QUEUED](state) { return { ...state, - asyncJobId: action.asyncJobMeta["job_id"], + asyncJobId: action.asyncJobMeta.job_id, chartStatus: 'loading', chartUpdateEndTime: now(), }; diff --git a/superset-frontend/src/dashboard/index.jsx b/superset-frontend/src/dashboard/index.jsx index 8ebba38058f5e..90430f3021f37 100644 --- a/superset-frontend/src/dashboard/index.jsx +++ b/superset-frontend/src/dashboard/index.jsx @@ -37,15 +37,21 @@ initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); const asyncEventMiddleware = initAsyncEvents({ - getPendingComponents: (state) => filter(state.charts, {chartStatus: 'loading'}), - successAction: (componentId, componentData) => actions.chartUpdateSucceeded(componentData, componentId), - errorAction: (componentId, response) => actions.chartUpdateFailed(response, componentId) + getPendingComponents: state => + filter(state.charts, { chartStatus: 'loading' }), + successAction: (componentId, componentData) => + actions.chartUpdateSucceeded(componentData, componentId), + errorAction: (componentId, response) => + actions.chartUpdateFailed(response, componentId), }); const store = createStore( rootReducer, initState, - compose(applyMiddleware(thunk, logger, asyncEventMiddleware), initEnhancer(false)), + compose( + applyMiddleware(thunk, logger, asyncEventMiddleware), + initEnhancer(false), + ), ); ReactDOM.render(, document.getElementById('app')); diff --git a/superset-frontend/src/explore/index.jsx b/superset-frontend/src/explore/index.jsx index 1cebfd43bd7ac..c18969cd02440 100644 --- a/superset-frontend/src/explore/index.jsx +++ b/superset-frontend/src/explore/index.jsx @@ -39,16 +39,21 @@ initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); const asyncEventMiddleware = initAsyncEvents({ - getPendingComponents: (state) => filter(state.charts, {chartStatus: 'loading'}), - successAction: (componentId, componentData) => actions.chartUpdateSucceeded(componentData, componentId), - errorAction: (componentId, response) => actions.chartUpdateFailed(response, componentId) + getPendingComponents: state => + filter(state.charts, { chartStatus: 'loading' }), + successAction: (componentId, componentData) => + actions.chartUpdateSucceeded(componentData, componentId), + errorAction: (componentId, response) => + actions.chartUpdateFailed(response, componentId), }); - const store = createStore( rootReducer, initState, - compose(applyMiddleware(thunk, logger, asyncEventMiddleware), initEnhancer(false)), + compose( + applyMiddleware(thunk, logger, asyncEventMiddleware), + initEnhancer(false), + ), ); ReactDOM.render(, document.getElementById('app')); diff --git a/superset-frontend/src/middleware/asyncEvent.js b/superset-frontend/src/middleware/asyncEvent.js deleted file mode 100644 index de4f2199cf29b..0000000000000 --- a/superset-frontend/src/middleware/asyncEvent.js +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -import { SupersetClient } from '@superset-ui/core'; -import { isFeatureEnabled, FeatureFlag } from '../featureFlags'; -import { getClientErrorObject, parseErrorJson } from '../utils/getClientErrorObject'; - -const initAsyncEvents = (options) => { - const POLLING_DELAY = 500; - const { getPendingComponents, successAction, errorAction } = options; - - const middleware = store => next => { - const JOB_STATUS = { - PENDING: "pending", - RUNNING: "running", - ERROR: "error", - DONE: "done", - } - const LOCALSTORAGE_KEY = 'last_async_event_id'; - const pollingUrl = '/api/v1/async_event/'; - let lastReceivedEventId; - - try { - lastReceivedEventId = localStorage.getItem(LOCALSTORAGE_KEY); - } catch(err) { - console.warn("failed to fetch last event Id from localStorage"); - } - - const fetchEvents = async (lastEventId) => { - const url = lastEventId ? `${pollingUrl}?last_id=${lastEventId}` : pollingUrl; - const response = await fetch(url); - - if (!response.ok) { - const message = `An error has occured: ${response.status}`; - throw new Error(message); - } - - const data = await response.json(); - return data.result; - } - - const fetchCachedData = async (asyncEvent) => { - const { json } = await SupersetClient.get({ - endpoint: asyncEvent['result_url'], - }); - const result = ('result' in json) ? json.result[0] : json; - return result; - } - - const setLastId = asyncEvent => { - lastReceivedEventId = asyncEvent['id']; - try { - localStorage.setItem(LOCALSTORAGE_KEY, lastReceivedEventId); - } catch (err) { - console.warn('Error saving event ID to localStorage', err); - } - } - - const processEvents = async () => { - const state = store.getState(); - const queuedComponents = getPendingComponents(state); - if (queuedComponents.length) { - try { - const events = await fetchEvents(lastReceivedEventId); - if (!events || !events.length) { - return setTimeout(processEvents, POLLING_DELAY); - } - const componentsByJobId = queuedComponents.reduce((acc, item) => { - acc[item['asyncJobId']] = item; - return acc; - }, {}); - - for (const asyncEvent of events) { - const component = componentsByJobId[asyncEvent['job_id']]; - if (!component) { - console.warn('component not found for job_id', asyncEvent['job_id']); - continue; - } - const componentId = component['id']; - switch(asyncEvent['status']) { - case JOB_STATUS.DONE: - try { - const componentData = await fetchCachedData(asyncEvent); - store.dispatch(successAction(componentId, componentData)); - } catch(errorResponse) { - getClientErrorObject(errorResponse).then(parsedResponse => { - store.dispatch(errorAction(componentId, parsedResponse)); - }); - } - break; - case JOB_STATUS.ERROR: - const parsedEvent = parseErrorJson(asyncEvent); - store.dispatch(errorAction(componentId, parsedEvent)); - break; - } - setLastId(asyncEvent); - } - } catch (err) { - console.error(err); - } - } - - setTimeout(processEvents, POLLING_DELAY); - }; - - if(isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) processEvents(); - - return action => next(action); - }; - - return middleware; -} - -export default initAsyncEvents; diff --git a/superset-frontend/src/middleware/asyncEvent.ts b/superset-frontend/src/middleware/asyncEvent.ts new file mode 100644 index 0000000000000..f94218afbf20c --- /dev/null +++ b/superset-frontend/src/middleware/asyncEvent.ts @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Middleware, MiddlewareAPI, Dispatch } from 'redux'; +import { SupersetClient } from '@superset-ui/core'; +import { SupersetError } from 'src/components/ErrorMessage/types'; +import { isFeatureEnabled, FeatureFlag } from '../featureFlags'; +import { + getClientErrorObject, + parseErrorJson, +} from '../utils/getClientErrorObject'; + +type AsyncEventOptions = { + getPendingComponents: (state: any) => any[]; + successAction: (componentId: number, componentData: any) => { type: string }; + errorAction: (componentId: number, response: any) => { type: string }; +}; + +export type AsyncEvent = { + id: string; + channel_id: string; + job_id: string; + user_id: string; + status: string; + errors: SupersetError[]; + result_url: string; +}; + +type CachedDataResponse = { + componentId: number; + status: string; + data: any; +}; + +const initAsyncEvents = (options: AsyncEventOptions) => { + const POLLING_DELAY = 500; + const { getPendingComponents, successAction, errorAction } = options; + + const middleware: Middleware = (store: MiddlewareAPI) => ( + next: Dispatch, + ) => { + const JOB_STATUS = { + PENDING: 'pending', + RUNNING: 'running', + ERROR: 'error', + DONE: 'done', + }; + const LOCALSTORAGE_KEY = 'last_async_event_id'; + const pollingUrl = '/api/v1/async_event/'; + let lastReceivedEventId: string | null; + + try { + lastReceivedEventId = localStorage.getItem(LOCALSTORAGE_KEY); + } catch (err) { + console.warn('failed to fetch last event Id from localStorage'); + } + + const fetchEvents = async ( + lastEventId: string | null, + ): Promise => { + const url = lastEventId + ? `${pollingUrl}?last_id=${lastEventId}` + : pollingUrl; + const response = await fetch(url); + + if (!response.ok) { + const message = `An error has occured: ${response.status}`; + throw new Error(message); + } + + const data = await response.json(); + return data.result; + }; + + const fetchCachedData = async ( + asyncEvent: AsyncEvent, + componentId: number, + ): Promise => { + let status = 'success'; + let data; + try { + const { json } = await SupersetClient.get({ + endpoint: asyncEvent.result_url, + }); + data = 'result' in json ? json.result[0] : json; + } catch (response) { + status = 'error'; + data = await getClientErrorObject(response); + } + + return { componentId, status, data }; + }; + + const setLastId = (asyncEvent: AsyncEvent) => { + lastReceivedEventId = asyncEvent.id; + try { + localStorage.setItem(LOCALSTORAGE_KEY, lastReceivedEventId as string); + } catch (err) { + console.warn('Error saving event ID to localStorage', err); + } + }; + + const processEvents = async () => { + const state = store.getState(); + const queuedComponents = getPendingComponents(state); + if (queuedComponents.length) { + try { + const events = await fetchEvents(lastReceivedEventId); + if (!events || !events.length) { + return setTimeout(processEvents, POLLING_DELAY); + } + const componentsByJobId = queuedComponents.reduce((acc, item) => { + acc[item.asyncJobId] = item; + return acc; + }, {}); + const fetchDataEvents: Promise[] = []; + events.forEach((asyncEvent: AsyncEvent) => { + const component = componentsByJobId[asyncEvent.job_id]; + if (!component) { + console.warn('component not found for job_id', asyncEvent.job_id); + return false; + } + const componentId = component.id; + switch (asyncEvent.status) { + case JOB_STATUS.DONE: + fetchDataEvents.push(fetchCachedData(asyncEvent, componentId)); + break; + case JOB_STATUS.ERROR: + store.dispatch( + errorAction(componentId, parseErrorJson(asyncEvent)), + ); + break; + default: + console.warn('received event with status', asyncEvent.status); + } + + return setLastId(asyncEvent); + }); + + const fetchResults = await Promise.all(fetchDataEvents); + fetchResults.forEach(result => { + if (result.status === 'success') { + store.dispatch(successAction(result.componentId, result.data)); + } else { + store.dispatch(errorAction(result.componentId, result.data)); + } + }); + } catch (err) { + console.error(err); + } + } + + return setTimeout(processEvents, POLLING_DELAY); + }; + + if (isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) processEvents(); + + return action => next(action); + }; + + return middleware; +}; + +export default initAsyncEvents; diff --git a/superset-frontend/src/utils/getClientErrorObject.ts b/superset-frontend/src/utils/getClientErrorObject.ts index a617a99a7f674..269126169f604 100644 --- a/superset-frontend/src/utils/getClientErrorObject.ts +++ b/superset-frontend/src/utils/getClientErrorObject.ts @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -import { SupersetClientResponse, t } from '@superset-ui/core'; +import { JsonObject, SupersetClientResponse, t } from '@superset-ui/core'; import { SupersetError, ErrorTypeEnum, @@ -36,33 +36,30 @@ export type ClientErrorObject = { stacktrace?: string; } & Partial; -export function parseErrorJson(responseObject: SupersetClientResponse | (Response & { timeout: number }) | object): ClientErrorObject { +export function parseErrorJson(responseObject: JsonObject): ClientErrorObject { + let error = { ...responseObject }; // Backwards compatibility for old error renderers with the new error object - if (responseObject['errors'] && responseObject['errors'].length > 0) { - responseObject['error'] = responseObject['description'] = responseObject['errors'][0].message; - responseObject['link'] = responseObject['errors'][0]?.extra?.link; + if (error.errors && error.errors.length > 0) { + error.error = error.description = error.errors[0].message; + error.link = error.errors[0]?.extra?.link; } - if (responseObject['stack']) { - responseObject = { - ...responseObject, + if (error.stack) { + error = { + ...error, error: t('Unexpected error: ') + - (responseObject['description'] || - t('(no description, click to see stack trace)')), - stacktrace: responseObject['stack'], + (error.description || t('(no description, click to see stack trace)')), + stacktrace: error.stack, }; - } else if ( - responseObject['responseText'] && - responseObject['responseText'].indexOf('CSRF') >= 0 - ) { - responseObject = { - ...responseObject, + } else if (error.responseText && error.responseText.indexOf('CSRF') >= 0) { + error = { + ...error, error: t(COMMON_ERR_MESSAGES.SESSION_TIMED_OUT), }; } - return { ...responseObject, error: responseObject['error'] }; // typescript madness + return { ...error, error: error.error }; // explicit ClientErrorObject } export function getClientErrorObject( From 4566c01f0a2236cdd233e8972852aec222946667 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 1 Dec 2020 20:48:29 -0800 Subject: [PATCH 16/42] Chart data API: enforce forced_cache, add tests --- superset/charts/api.py | 12 ++-- superset/charts/commands/data.py | 4 +- superset/common/query_context.py | 2 +- tests/charts/api_tests.py | 114 ++++++++++++++++++++++++++++++- tests/superset_test_config.py | 4 +- 5 files changed, 126 insertions(+), 10 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index 1de930bfbf7e2..1f41d9800bd33 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -453,11 +453,13 @@ def bulk_delete(self, **kwargs: Any) -> Response: except ChartBulkDeleteFailedError as ex: return self.response_422(message=str(ex)) - def get_data_response(self, command: ChartDataCommand) -> Response: + def get_data_response( + self, command: ChartDataCommand, force_cached: bool = False + ) -> Response: try: - result = command.run() + result = command.run(force_cached=force_cached) except ChartDataCacheLoadError as exc: - return self.response_400(message=exc.message) + return self.response_422(message=exc.message) except ChartDataQueryFailedError as exc: return self.response_400(message=exc.message) @@ -593,6 +595,8 @@ def data_from_cache(self, cache_key: str) -> Response: $ref: '#/components/responses/401' 404: $ref: '#/components/responses/404' + 422: + $ref: '#/components/responses/422' 500: $ref: '#/components/responses/500' """ @@ -611,7 +615,7 @@ def data_from_cache(self, cache_key: str) -> Response: logger.info(exc) return self.response_401() - return self.get_data_response(command) + return self.get_data_response(command, True) @expose("//cache_screenshot/", methods=["GET"]) @protect() diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 97365d89715ae..9d1fd12d3c161 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -50,8 +50,8 @@ def run(self, **kwargs: Any) -> Dict[str, Any]: payload = self._query_context.get_payload( cache_query_context=cache_query_context, force_cached=force_cached ) - except CacheLoadError: - raise ChartDataCacheLoadError() + except CacheLoadError as exc: + raise ChartDataCacheLoadError(exc.message) # TODO: QueryContext should support SIP-40 style errors for query in payload["queries"]: diff --git a/superset/common/query_context.py b/superset/common/query_context.py index d53e362392128..97f4bccee5f54 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -297,7 +297,7 @@ def get_df_payload( # pylint: disable=too-many-statements logger.warning( "force_cached (QueryContext): value not found for key %s", cache_key ) - raise CacheLoadError() + raise CacheLoadError("Error loading data from cache") if query_obj and not is_loaded: try: diff --git a/tests/charts/api_tests.py b/tests/charts/api_tests.py index 511186db2d54e..189042ba2c887 100644 --- a/tests/charts/api_tests.py +++ b/tests/charts/api_tests.py @@ -34,15 +34,17 @@ from superset.utils.core import get_example_database from tests.fixtures.unicode_dashboard import load_unicode_dashboard_with_slice from tests.test_app import app +from superset.charts.commands.data import ChartDataCommand from superset.connectors.connector_registry import ConnectorRegistry -from superset.extensions import db, security_manager +from superset.extensions import async_query_manager, db, security_manager from superset.models.core import Database, FavStar, FavStarClassName from superset.models.dashboard import Dashboard from superset.models.reports import ReportSchedule, ReportScheduleType from superset.models.slice import Slice from superset.utils import core as utils from tests.base_api_tests import ApiOwnersTestCaseMixin -from tests.base_tests import SupersetTestCase +from tests.base_tests import SupersetTestCase, post_assert_metric, test_client + from tests.fixtures.importexport import ( chart_config, chart_metadata_config, @@ -1263,6 +1265,114 @@ def test_chart_data_jinja_filter_request(self): if get_example_database().backend != "presto": assert "('boy' = 'boy')" in result + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + def test_chart_data_async(self): + """ + Chart data API: Test chart data query (async) + """ + async_query_manager.init_app(app) + self.login(username="admin") + table = self.get_table_by_name("birth_names") + request_payload = get_query_context(table.name, table.id, table.type) + rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data") + self.assertEqual(rv.status_code, 202) + data = json.loads(rv.data.decode("utf-8")) + keys = list(data.keys()) + self.assertCountEqual( + keys, ["channel_id", "job_id", "user_id", "status", "errors", "result_url"] + ) + + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + def test_chart_data_async_invalid_token(self): + """ + Chart data API: Test chart data query (async) + """ + async_query_manager.init_app(app) + self.login(username="admin") + table = self.get_table_by_name("birth_names") + request_payload = get_query_context(table.name, table.id, table.type) + test_client.set_cookie( + "localhost", app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"], "foo" + ) + rv = post_assert_metric(test_client, CHART_DATA_URI, request_payload, "data") + self.assertEqual(rv.status_code, 401) + + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + @mock.patch.object(ChartDataCommand, "load_query_context_from_cache") + def test_chart_data_cache(self, load_qc_mock): + """ + Chart data cache API: Test chart data async cache request + """ + async_query_manager.init_app(app) + self.login(username="admin") + table = self.get_table_by_name("birth_names") + query_context = get_query_context(table.name, table.id, table.type) + load_qc_mock.return_value = query_context + orig_run = ChartDataCommand.run + + def mock_run(self, **kwargs): + assert kwargs["force_cached"] == True + # override force_cached to get result from DB + return orig_run(self, force_cached=False) + + with mock.patch.object(ChartDataCommand, "run", new=mock_run): + rv = self.get_assert_metric( + f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" + ) + data = json.loads(rv.data.decode("utf-8")) + + self.assertEqual(rv.status_code, 200) + self.assertEqual(data["result"][0]["rowcount"], 45) + + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + @mock.patch.object(ChartDataCommand, "load_query_context_from_cache") + def test_chart_data_cache_run_failed(self, load_qc_mock): + """ + Chart data cache API: Test chart data async cache request with run failure + """ + async_query_manager.init_app(app) + self.login(username="admin") + table = self.get_table_by_name("birth_names") + query_context = get_query_context(table.name, table.id, table.type) + load_qc_mock.return_value = query_context + rv = self.get_assert_metric( + f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" + ) + data = json.loads(rv.data.decode("utf-8")) + + self.assertEqual(rv.status_code, 422) + self.assertEqual(data["message"], "Error loading data from cache") + + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + def test_chart_data_cache_key_error(self): + """ + Chart data cache API: Test chart data async cache request with invalid cache key + """ + async_query_manager.init_app(app) + self.login(username="admin") + table = self.get_table_by_name("birth_names") + query_context = get_query_context(table.name, table.id, table.type) + rv = self.get_assert_metric( + f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" + ) + + self.assertEqual(rv.status_code, 404) + def test_export_chart(self): """ Chart API: Test export chart diff --git a/tests/superset_test_config.py b/tests/superset_test_config.py index 6a380829fa731..e7710ab10737a 100644 --- a/tests/superset_test_config.py +++ b/tests/superset_test_config.py @@ -22,7 +22,7 @@ AUTH_USER_REGISTRATION_ROLE = "alpha" SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "unittests.db") -DEBUG = True +DEBUG = False SUPERSET_WEBSERVER_PORT = 8081 # Allowing SQLALCHEMY_DATABASE_URI and SQLALCHEMY_EXAMPLES_URI to be defined as an env vars for @@ -96,6 +96,8 @@ def GET_FEATURE_FLAGS_FUNC(ff): "CACHE_KEY_PREFIX": "superset_data_cache", } +GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me-test-secret-change-me" + class CeleryConfig(object): BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" From 9eba0c41584b5e1108e886638f93f936d6860b34 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Wed, 2 Dec 2020 18:04:45 -0800 Subject: [PATCH 17/42] Add tests for explore_json endpoints --- superset/views/core.py | 4 +- superset/viz.py | 6 +- tests/core_tests.py | 156 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 3 deletions(-) diff --git a/superset/views/core.py b/superset/views/core.py index 40190680e7dd3..bebc2c3576050 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -513,8 +513,8 @@ def explore_json_data(self, cache_key: str) -> FlaskResponse: if not cached: raise CacheLoadError("Cached data not found") - form_data = cached["form_data"] - response_type = cached["response_type"] + form_data = cached.get("form_data") + response_type = cached.get("response_type") datasource_id, datasource_type = get_datasource_info(None, None, form_data) diff --git a/superset/viz.py b/superset/viz.py index 21d9ca367d575..5f7f3457bfc49 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -164,7 +164,7 @@ def __init__( self.results: Optional[QueryResult] = None self.errors: List[Dict[str, Any]] = [] self.force = force - self.force_cached = force_cached + self._force_cached = force_cached self.from_dttm: Optional[datetime] = None self.to_dttm: Optional[datetime] = None @@ -181,6 +181,10 @@ def __init__( self.applied_filters: List[Dict[str, str]] = [] self.rejected_filters: List[Dict[str, str]] = [] + @property + def force_cached(self) -> bool: + return self._force_cached + def process_metrics(self) -> None: # metrics in TableViz is order sensitive, so metric_dict should be # OrderedDict diff --git a/tests/core_tests.py b/tests/core_tests.py index 94f3c29a60f47..ba21ade970e66 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -49,6 +49,7 @@ from superset.connectors.sqla.models import SqlaTable from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.mssql import MssqlEngineSpec +from superset.extensions import async_query_manager from superset.models import core as models from superset.models.annotations import Annotation, AnnotationLayer from superset.models.dashboard import Dashboard @@ -831,6 +832,161 @@ def test_slice_payload_no_datasource(self): "The datasource associated with this chart no longer exists", ) + def test_explore_json(self): + tbl_id = self.table_ids.get("birth_names") + form_data = { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{tbl_id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + self.login(username="admin") + rv = self.client.post( + "/superset/explore_json/", data={"form_data": json.dumps(form_data)}, + ) + data = json.loads(rv.data.decode("utf-8")) + + self.assertEqual(rv.status_code, 200) + self.assertEqual(data["rowcount"], 2) + + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + def test_explore_json_async(self): + tbl_id = self.table_ids.get("birth_names") + form_data = { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{tbl_id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + async_query_manager.init_app(app) + self.login(username="admin") + rv = self.client.post( + "/superset/explore_json/", data={"form_data": json.dumps(form_data)}, + ) + data = json.loads(rv.data.decode("utf-8")) + keys = list(data.keys()) + + self.assertEqual(rv.status_code, 202) + self.assertCountEqual( + keys, ["channel_id", "job_id", "user_id", "status", "errors", "result_url"] + ) + + @mock.patch( + "superset.utils.cache_manager.CacheManager.cache", + new_callable=mock.PropertyMock, + ) + @mock.patch("superset.viz.BaseViz.force_cached", new_callable=mock.PropertyMock) + def test_explore_json_data(self, mock_force_cached, mock_cache): + tbl_id = self.table_ids.get("birth_names") + form_data = dict( + { + "form_data": { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{tbl_id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + } + ) + + class MockCache: + def get(self, key): + return form_data + + def set(self): + return None + + mock_cache.return_value = MockCache() + mock_force_cached.return_value = False + + self.login(username="admin") + rv = self.client.get("/superset/explore_json/data/valid-cache-key") + data = json.loads(rv.data.decode("utf-8")) + + self.assertEqual(rv.status_code, 200) + self.assertEqual(data["rowcount"], 2) + + @mock.patch( + "superset.utils.cache_manager.CacheManager.cache", + new_callable=mock.PropertyMock, + ) + def test_explore_json_data_no_login(self, mock_cache): + tbl_id = self.table_ids.get("birth_names") + form_data = dict( + { + "form_data": { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{tbl_id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + } + ) + + class MockCache: + def get(self, key): + return form_data + + def set(self): + return None + + mock_cache.return_value = MockCache() + + rv = self.client.get("/superset/explore_json/data/valid-cache-key") + self.assertEqual(rv.status_code, 401) + + def test_explore_json_data_invalid_cache_key(self): + self.login(username="admin") + cache_key = "invalid-cache-key" + rv = self.client.get(f"/superset/explore_json/data/{cache_key}") + data = json.loads(rv.data.decode("utf-8")) + + self.assertEqual(rv.status_code, 404) + self.assertEqual(data["error"], "Cached data not found") + @mock.patch( "superset.security.SupersetSecurityManager.get_schemas_accessible_by_user" ) From d2e4529cfc32ba4f3bfc6d90b615aabe38a35361 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Wed, 2 Dec 2020 18:05:40 -0800 Subject: [PATCH 18/42] Add test for chart data cache enpoint (no login) --- tests/charts/api_tests.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/charts/api_tests.py b/tests/charts/api_tests.py index 189042ba2c887..8ae95aa3d90ea 100644 --- a/tests/charts/api_tests.py +++ b/tests/charts/api_tests.py @@ -1355,6 +1355,33 @@ def test_chart_data_cache_run_failed(self, load_qc_mock): self.assertEqual(rv.status_code, 422) self.assertEqual(data["message"], "Error loading data from cache") + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + @mock.patch.object(ChartDataCommand, "load_query_context_from_cache") + def test_chart_data_cache_no_login(self, load_qc_mock): + """ + Chart data cache API: Test chart data async cache request (no login) + """ + async_query_manager.init_app(app) + table = self.get_table_by_name("birth_names") + query_context = get_query_context(table.name, table.id, table.type) + load_qc_mock.return_value = query_context + orig_run = ChartDataCommand.run + + def mock_run(self, **kwargs): + assert kwargs["force_cached"] == True + # override force_cached to get result from DB + return orig_run(self, force_cached=False) + + with mock.patch.object(ChartDataCommand, "run", new=mock_run): + rv = self.get_assert_metric( + f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" + ) + + self.assertEqual(rv.status_code, 401) + @mock.patch.dict( "superset.extensions.feature_flag_manager._feature_flags", GLOBAL_ASYNC_QUERIES=True, @@ -1365,8 +1392,6 @@ def test_chart_data_cache_key_error(self): """ async_query_manager.init_app(app) self.login(username="admin") - table = self.get_table_by_name("birth_names") - query_context = get_query_context(table.name, table.id, table.type) rv = self.get_assert_metric( f"{CHART_DATA_URI}/test-cache-key", "data_from_cache" ) From 0bd7d67051aceade71a5340d4755c1c6c38600f7 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Wed, 2 Dec 2020 19:02:37 -0800 Subject: [PATCH 19/42] Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag --- superset/common/query_context.py | 3 ++- superset/config.py | 3 +++ superset/utils/cache.py | 11 +++++++++++ superset/viz.py | 34 +++----------------------------- superset/viz_sip38.py | 7 +++---- tests/core_tests.py | 3 +++ 6 files changed, 25 insertions(+), 36 deletions(-) diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 97f4bccee5f54..ab0a009d2ca74 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -337,10 +337,11 @@ def get_df_payload( # pylint: disable=too-many-statements if is_loaded and cache_key and status != utils.QueryStatus.FAILED: set_and_log_cache( - cache_manager.cache, + cache_manager.data_cache, cache_key, {"df": df, "query": query}, self.cache_timeout, + self.datasource.uid, ) return { "cache_key": cache_key, diff --git a/superset/config.py b/superset/config.py index 79e0985cb8aed..f9bf6d0014fbc 100644 --- a/superset/config.py +++ b/superset/config.py @@ -407,6 +407,9 @@ def _try_json_readsha( # pylint: disable=unused-argument # Cache for datasource metadata and query results DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"} +# store cache keys by datasource UID (via CacheKey) for custom processing/invalidation +STORE_CACHE_KEYS_IN_METADATA_DB = False + # CORS Options ENABLE_CORS = False CORS_OPTIONS: Dict[Any, Any] = {} diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 0076d989afb5b..729c316866d07 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -25,7 +25,9 @@ from flask_caching import Cache from werkzeug.wrappers.etag import ETagResponseMixin +from superset import db from superset.extensions import cache_manager +from superset.models.cache import CacheKey from superset.stats_logger import BaseStatsLogger from superset.utils.core import json_int_dttm_ser @@ -49,6 +51,7 @@ def set_and_log_cache( cache_key: str, cache_value: Dict[str, Any], cache_timeout: Optional[int] = None, + datasource_uid: Optional[str] = None, ) -> None: timeout = cache_timeout if cache_timeout else config["CACHE_DEFAULT_TIMEOUT"] try: @@ -56,6 +59,14 @@ def set_and_log_cache( value = {**cache_value, "dttm": dttm} cache_instance.set(cache_key, value, timeout=timeout) stats_logger.incr("set_cache_key") + + if datasource_uid and config["STORE_CACHE_KEYS_IN_METADATA_DB"]: + ck = CacheKey( + cache_key=cache_key, + cache_timeout=cache_timeout, + datasource_uid=datasource_uid, + ) + db.session.add(ck) except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons diff --git a/superset/viz.py b/superset/viz.py index 5f7f3457bfc49..2d4b76d80a67c 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -68,6 +68,7 @@ from superset.models.helpers import QueryResult from superset.typing import QueryObjectDict, VizData, VizPayload from superset.utils import core as utils +from superset.utils.cache import set_and_log_cache from superset.utils.core import ( DTTM_ALIAS, JS_MAX_INTEGER, @@ -99,34 +100,6 @@ ] -def set_and_log_cache( - cache_key: str, - df: pd.DataFrame, - query: str, - cached_dttm: str, - cache_timeout: int, - datasource_uid: Optional[str], -) -> None: - try: - cache_value = dict(dttm=cached_dttm, df=df, query=query) - stats_logger.incr("set_cache_key") - cache_manager.data_cache.set(cache_key, cache_value, timeout=cache_timeout) - - if datasource_uid: - ck = CacheKey( - cache_key=cache_key, - cache_timeout=cache_timeout, - datasource_uid=datasource_uid, - ) - db.session.add(ck) - except Exception as ex: - # cache.set call can fail if the backend is down or if - # the key is too large or whatever other reasons - logger.warning("Could not cache key {}".format(cache_key)) - logger.exception(ex) - cache_manager.data_cache.delete(cache_key) - - class BaseViz: """All visualizations derive this base class""" @@ -601,10 +574,9 @@ def get_df_payload( if is_loaded and cache_key and self.status != utils.QueryStatus.FAILED: set_and_log_cache( + cache_manager.data_cache, cache_key, - df, - self.query, - cached_dttm, + {"df": df, "query": self.query}, self.cache_timeout, self.datasource.uid, ) diff --git a/superset/viz_sip38.py b/superset/viz_sip38.py index 600f44141a1c6..798ce42f27116 100644 --- a/superset/viz_sip38.py +++ b/superset/viz_sip38.py @@ -57,13 +57,13 @@ from superset.models.helpers import QueryResult from superset.typing import QueryObjectDict, VizData, VizPayload from superset.utils import core as utils +from superset.utils.cache import set_and_log_cache from superset.utils.core import ( DTTM_ALIAS, JS_MAX_INTEGER, merge_extra_filters, to_adhoc, ) -from superset.viz import set_and_log_cache if TYPE_CHECKING: from superset.connectors.base.models import BaseDatasource @@ -518,10 +518,9 @@ def get_df_payload( if is_loaded and cache_key and self.status != utils.QueryStatus.FAILED: set_and_log_cache( + cache_manager.data_cache, cache_key, - df, - self.query, - cached_dttm, + {"df": df, "query": self.query}, self.cache_timeout, self.datasource.uid, ) diff --git a/tests/core_tests.py b/tests/core_tests.py index ba21ade970e66..4e231735e9b1a 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -595,10 +595,13 @@ def test_warm_up_cache(self): ) == [{"slice_id": slc.id, "viz_error": None, "viz_status": "success"}] def test_cache_logging(self): + store_cache_keys = app.config["STORE_CACHE_KEYS_IN_METADATA_DB"] + app.config["STORE_CACHE_KEYS_IN_METADATA_DB"] = True girls_slice = self.get_slice("Girls", db.session) self.get_json_resp("/superset/warm_up_cache?slice_id={}".format(girls_slice.id)) ck = db.session.query(CacheKey).order_by(CacheKey.id.desc()).first() assert ck.datasource_uid == f"{girls_slice.table.id}__table" + app.config["STORE_CACHE_KEYS_IN_METADATA_DB"] = store_cache_keys def test_shortner(self): self.login(username="admin") From 444145972eeada9216728fe6d2c69fdad35c7cd3 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 3 Dec 2020 14:04:15 -0800 Subject: [PATCH 20/42] Add tests for tasks/async_queries and address PR comments --- superset/charts/commands/data.py | 4 +- superset/common/query_context.py | 10 +- superset/config.py | 1 - superset/tasks/async_queries.py | 4 +- superset/utils/async_query_manager.py | 8 +- tests/tasks/__init__.py | 16 +++ tests/tasks/async_queries_tests.py | 153 ++++++++++++++++++++++++++ 7 files changed, 181 insertions(+), 15 deletions(-) create mode 100644 tests/tasks/__init__.py create mode 100644 tests/tasks/async_queries_tests.py diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index 9d1fd12d3c161..a7e7979aaab16 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -44,8 +44,8 @@ def __init__(self) -> None: def run(self, **kwargs: Any) -> Dict[str, Any]: # caching is handled in query_context.get_df_payload # (also evals `force` property) - cache_query_context = kwargs["cache"] if "cache" in kwargs else False - force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + cache_query_context = kwargs.get("cache", False) + force_cached = kwargs.get("force_cached", False) try: payload = self._query_context.get_payload( cache_query_context=cache_query_context, force_cached=force_cached diff --git a/superset/common/query_context.py b/superset/common/query_context.py index ab0a009d2ca74..eb346e4f1a442 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -150,7 +150,7 @@ def get_single_payload( self, query_obj: QueryObject, **kwargs: Any ) -> Dict[str, Any]: """Returns a payload of metadata and data""" - force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + force_cached = kwargs.get("force_cached", False) if self.result_type == utils.ChartDataResultType.QUERY: return { "query": self.datasource.get_query_str(query_obj.to_dict()), @@ -195,10 +195,8 @@ def get_single_payload( return payload def get_payload(self, **kwargs: Any) -> Dict[str, Any]: - cache_query_context = ( - kwargs["cache_query_context"] if "cache_query_context" in kwargs else False - ) - force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + cache_query_context = kwargs.get("cache_query_context", False) + force_cached = kwargs.get("force_cached", False) # Get all the payloads from the QueryObjects query_results = [ @@ -266,7 +264,7 @@ def get_df_payload( # pylint: disable=too-many-statements self, query_obj: QueryObject, **kwargs: Any ) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" - force_cached = kwargs["force_cached"] if "force_cached" in kwargs else False + force_cached = kwargs.get("force_cached", False) cache_key = self.query_cache_key(query_obj) logger.info("Cache key: %s", cache_key) is_loaded = False diff --git a/superset/config.py b/superset/config.py index f9bf6d0014fbc..63315b1f02c2f 100644 --- a/superset/config.py +++ b/superset/config.py @@ -326,7 +326,6 @@ def _try_json_readsha( # pylint: disable=unused-argument "DISPLAY_MARKDOWN_HTML": True, # When True, this escapes HTML (rather than rendering it) in Markdown components "ESCAPE_MARKDOWN_HTML": False, - "SIP_34_ANNOTATIONS_UI": False, "GLOBAL_ASYNC_QUERIES": False, "VERSIONED_EXPORT": False, # Note that: RowLevelSecurityFilter is only given by default to the Admin role diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index b89388148b88a..39730ea867dd2 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -16,7 +16,7 @@ # under the License. import logging -from typing import Any, cast, Dict +from typing import Any, cast, Dict, Optional from flask import current_app @@ -66,7 +66,7 @@ def load_chart_data_into_cache( def load_explore_json_into_cache( job_metadata: Dict[str, Any], form_data: Dict[str, Any], - response_type: str, + response_type: Optional[str] = None, force: bool = False, ) -> None: with app.app_context(): # type: ignore diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index ce5a304214cf9..13b72d7aeb47d 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -38,10 +38,10 @@ def build_job_metadata(channel_id: str, job_id: str, **kwargs: Any) -> Dict[str, return { "channel_id": channel_id, "job_id": job_id, - "user_id": session["user_id"] if "user_id" in session else None, - "status": kwargs["status"], - "errors": kwargs["errors"] if "errors" in kwargs else [], - "result_url": kwargs["result_url"] if "result_url" in kwargs else None, + "user_id": session.get("user_id"), + "status": kwargs.get("status"), + "errors": kwargs.get("errors", []), + "result_url": kwargs.get("result_url"), } diff --git a/tests/tasks/__init__.py b/tests/tasks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/tasks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/tasks/async_queries_tests.py b/tests/tasks/async_queries_tests.py new file mode 100644 index 0000000000000..f1d3843564ffb --- /dev/null +++ b/tests/tasks/async_queries_tests.py @@ -0,0 +1,153 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for async query celery jobs in Superset""" +import re +from unittest.mock import patch +from uuid import uuid4 + +import pytest + +from superset import db +from superset.charts.commands.data import ChartDataCommand +from superset.charts.commands.exceptions import ChartDataQueryFailedError +from superset.connectors.sqla.models import SqlaTable +from superset.exceptions import SupersetException +from superset.extensions import async_query_manager +from superset.tasks.async_queries import ( + load_chart_data_into_cache, + load_explore_json_into_cache, +) +from tests.fixtures.query_context import get_query_context +from tests.test_app import app + + +def get_table_by_name(name: str) -> SqlaTable: + with app.app_context(): + return db.session.query(SqlaTable).filter_by(table_name=name).one() + + +@patch.object(async_query_manager, "update_job") +def test_load_chart_data_into_cache(mock_update_job): + async_query_manager.init_app(app) + table = get_table_by_name("birth_names") + form_data = get_query_context(table.name, table.id, table.type) + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } + + load_chart_data_into_cache(job_metadata, form_data) + + assert mock_update_job.called + call_args = mock_update_job.call_args + assert call_args.args[0] == job_metadata + assert call_args.args[1] == "done" + assert re.match(r"^/api/v1/chart/data/qc-\w+", call_args.kwargs["result_url"]) + + +@patch.object( + ChartDataCommand, "run", side_effect=ChartDataQueryFailedError("Error: foo") +) +@patch.object(async_query_manager, "update_job") +def test_load_chart_data_into_cache_error(mock_update_job, mock_run_command): + async_query_manager.init_app(app) + table = get_table_by_name("birth_names") + form_data = get_query_context(table.name, table.id, table.type) + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } + with pytest.raises(ChartDataQueryFailedError): + load_chart_data_into_cache(job_metadata, form_data) + + assert mock_run_command.called + assert mock_run_command.call_args.kwargs["cache"] == True + + assert mock_update_job.called + call_args = mock_update_job.call_args + assert call_args.args[0] == job_metadata + assert call_args.args[1] == "error" + assert call_args.kwargs["errors"] == [{"message": "Error: foo"}] + + +@patch.object(async_query_manager, "update_job") +def test_load_explore_json_into_cache(mock_update_job): + async_query_manager.init_app(app) + table = get_table_by_name("birth_names") + form_data = { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{table.id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } + + load_explore_json_into_cache(job_metadata, form_data) + + assert mock_update_job.called + call_args = mock_update_job.call_args + assert call_args.args[0] == job_metadata + assert call_args.args[1] == "done" + assert re.match( + r"^/superset/explore_json/data/ejr-\w+", call_args.kwargs["result_url"] + ) + + +@patch.object(async_query_manager, "update_job") +def test_load_explore_json_into_cache_error(mock_update_job): + async_query_manager.init_app(app) + form_data = {} + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } + + with pytest.raises(SupersetException): + load_explore_json_into_cache(job_metadata, form_data) + + assert mock_update_job.called + call_args = mock_update_job.call_args + assert call_args.args[0] == job_metadata + assert call_args.args[1] == "error" + assert call_args.kwargs["errors"] == [ + "The datasource associated with this chart no longer exists" + ] From 5896799ad0549e7227e729f437201463b7dffb19 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 3 Dec 2020 14:58:47 -0800 Subject: [PATCH 21/42] Bypass non-JSON result formats for async queries --- superset/charts/api.py | 18 ++++++++++++++---- superset/charts/commands/data.py | 6 ++++-- superset/views/core.py | 6 +++++- tests/charts/api_tests.py | 16 ++++++++++++++++ tests/core_tests.py | 30 ++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 7 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index 1f41d9800bd33..6235c5d9c6a4d 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -70,7 +70,11 @@ from superset.models.slice import Slice from superset.tasks.thumbnails import cache_chart_thumbnail from superset.utils.async_query_manager import AsyncQueryTokenException -from superset.utils.core import ChartDataResultFormat, json_int_dttm_ser +from superset.utils.core import ( + ChartDataResultFormat, + ChartDataResultType, + json_int_dttm_ser, +) from superset.utils.screenshots import ChartScreenshot from superset.utils.urls import get_url_path from superset.views.base_api import ( @@ -543,7 +547,7 @@ def data(self) -> Response: try: command = ChartDataCommand() - command.set_query_context(json_body) + query_context = command.set_query_context(json_body) command.validate() except ValidationError as error: return self.response_400( @@ -552,9 +556,15 @@ def data(self) -> Response: except SupersetSecurityException: return self.response_401() - if is_feature_enabled("GLOBAL_ASYNC_QUERIES"): + # TODO: support CSV, SQL query and other non-JSON types + if ( + is_feature_enabled("GLOBAL_ASYNC_QUERIES") + and query_context.result_format == ChartDataResultFormat.JSON + and query_context.result_type == ChartDataResultType.FULL + ): + try: - command.validate_request(request) + command.validate_async_request(request) except AsyncQueryTokenException: return self.response_401() diff --git a/superset/charts/commands/data.py b/superset/charts/commands/data.py index a7e7979aaab16..275a723d60d7a 100644 --- a/superset/charts/commands/data.py +++ b/superset/charts/commands/data.py @@ -73,7 +73,7 @@ def run_async(self) -> Dict[str, Any]: return job_metadata - def set_query_context(self, form_data: Dict[str, Any]) -> None: + def set_query_context(self, form_data: Dict[str, Any]) -> QueryContext: self._form_data = form_data try: self._query_context = ChartDataQueryContextSchema().load(self._form_data) @@ -82,10 +82,12 @@ def set_query_context(self, form_data: Dict[str, Any]) -> None: except ValidationError as error: raise error + return self._query_context + def validate(self) -> None: self._query_context.raise_for_access() - def validate_request(self, request: Request) -> None: + def validate_async_request(self, request: Request) -> None: jwt_data = async_query_manager.parse_jwt_from_request(request) self._async_channel_id = jwt_data["channel"] diff --git a/superset/views/core.py b/superset/views/core.py index bebc2c3576050..f78ee5b5af356 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -575,7 +575,11 @@ def explore_json( force = request.args.get("force") == "true" - if is_feature_enabled("GLOBAL_ASYNC_QUERIES"): + # TODO: support CSV, SQL query and other non-JSON types + if ( + is_feature_enabled("GLOBAL_ASYNC_QUERIES") + and response_type == utils.ChartDataResultFormat.JSON + ): try: async_channel_id = async_query_manager.parse_jwt_from_request( request diff --git a/tests/charts/api_tests.py b/tests/charts/api_tests.py index 8ae95aa3d90ea..5577e7f717361 100644 --- a/tests/charts/api_tests.py +++ b/tests/charts/api_tests.py @@ -1285,6 +1285,22 @@ def test_chart_data_async(self): keys, ["channel_id", "job_id", "user_id", "status", "errors", "result_url"] ) + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + def test_chart_data_async_results_type(self): + """ + Chart data API: Test chart data query non-JSON format (async) + """ + async_query_manager.init_app(app) + self.login(username="admin") + table = self.get_table_by_name("birth_names") + request_payload = get_query_context(table.name, table.id, table.type) + request_payload["result_type"] = "results" + rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data") + self.assertEqual(rv.status_code, 200) + @mock.patch.dict( "superset.extensions.feature_flag_manager._feature_flags", GLOBAL_ASYNC_QUERIES=True, diff --git a/tests/core_tests.py b/tests/core_tests.py index 4e231735e9b1a..c0a936d2912ec 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -897,6 +897,36 @@ def test_explore_json_async(self): keys, ["channel_id", "job_id", "user_id", "status", "errors", "result_url"] ) + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + GLOBAL_ASYNC_QUERIES=True, + ) + def test_explore_json_async_results_format(self): + tbl_id = self.table_ids.get("birth_names") + form_data = { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{tbl_id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + async_query_manager.init_app(app) + self.login(username="admin") + rv = self.client.post( + "/superset/explore_json/?results=true", + data={"form_data": json.dumps(form_data)}, + ) + self.assertEqual(rv.status_code, 200) + @mock.patch( "superset.utils.cache_manager.CacheManager.cache", new_callable=mock.PropertyMock, From 5999bf35db70af5069afbf342013160e34cdcf13 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Fri, 4 Dec 2020 08:56:43 -0800 Subject: [PATCH 22/42] Add tests for redux middleware --- CONTRIBUTING.md | 5 + .../javascripts/middleware/asyncEvent_spec.js | 255 ++++++++++++++++++ .../src/middleware/asyncEvent.ts | 127 +++++---- 3 files changed, 328 insertions(+), 59 deletions(-) create mode 100644 superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2e7d79e405303..4e5a860fd3e42 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -623,6 +623,11 @@ cd superset-frontend npm run test ``` +To run a single test file: +```bash +npm run test -- path/to/file.js +``` + ### Integration Testing We use [Cypress](https://www.cypress.io/) for integration tests. Tests can be run by `tox -e cypress`. To open Cypress and explore tests first setup and run test server: diff --git a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js new file mode 100644 index 0000000000000..9cfc62e80cede --- /dev/null +++ b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import fetchMock from 'fetch-mock'; +import sinon from 'sinon'; +import * as featureFlags from 'src/featureFlags'; +import initAsyncEvents from 'src/middleware/asyncEvent'; + +jest.useFakeTimers(); + +describe('asyncEvent middleware', () => { + const next = sinon.spy(); + const state = { + charts: { + 123: { + id: 123, + status: 'loading', + asyncJobId: 'foo123', + }, + 345: { + id: 345, + status: 'loading', + asyncJobId: 'foo345', + }, + }, + }; + const events = [ + { + status: 'done', + result_url: '/api/v1/chart/data/cache-key-1', + job_id: 'foo123', + channel_id: '999', + errors: [], + }, + { + status: 'done', + result_url: '/api/v1/chart/data/cache-key-2', + job_id: 'foo345', + channel_id: '999', + errors: [], + }, + ]; + const mockStore = { + getState: () => state, + dispatch: sinon.stub(), + }; + const action = { + type: 'GENERIC_ACTION', + }; + const EVENTS_ENDPOINT = 'glob:*/api/v1/async_event/*'; + const CACHED_DATA_ENDPOINT = 'glob:*/api/v1/chart/data/*'; + let featureEnabledStub; + + function setup() { + const getPendingComponents = sinon.stub(); + const successAction = sinon.spy(); + const errorAction = sinon.spy(); + const testCallback = sinon.stub(); + const testCallbackPromise = sinon.stub(); + testCallbackPromise.returns( + new Promise(resolve => { + testCallback.callsFake(resolve); + }), + ); + + return { + getPendingComponents, + successAction, + errorAction, + testCallback, + testCallbackPromise, + }; + } + + beforeEach(() => { + fetchMock.get(EVENTS_ENDPOINT, { + status: 200, + body: { result: [] }, + }); + fetchMock.get(CACHED_DATA_ENDPOINT, { + status: 200, + body: { result: { some: 'data' } }, + }); + featureEnabledStub = sinon.stub(featureFlags, 'isFeatureEnabled'); + featureEnabledStub.withArgs('GLOBAL_ASYNC_QUERIES').returns(true); + }); + afterEach(() => { + fetchMock.reset(); + next.resetHistory(); + featureEnabledStub.restore(); + }); + afterAll(fetchMock.reset); + + it('should initialize and call next', () => { + const { getPendingComponents, successAction, errorAction } = setup(); + getPendingComponents.returns([]); + const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents, + successAction, + errorAction, + }); + asyncEventMiddleware(mockStore)(next)(action); + expect(next.callCount).toBe(1); + }); + + it('should fetch events when there are pending components', () => { + const { + getPendingComponents, + successAction, + errorAction, + testCallback, + testCallbackPromise, + } = setup(); + getPendingComponents.returns(Object.values(state.charts)); + const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents, + successAction, + errorAction, + processEventsCallback: testCallback, + }); + + asyncEventMiddleware(mockStore)(next)(action); + + return testCallbackPromise().then(() => { + expect(fetchMock.calls(EVENTS_ENDPOINT)).toHaveLength(1); + }); + }); + + it('should fetch cached when there are successful events', () => { + const { + getPendingComponents, + successAction, + errorAction, + testCallback, + testCallbackPromise, + } = setup(); + fetchMock.reset(); + fetchMock.get(EVENTS_ENDPOINT, { + status: 200, + body: { result: events }, + }); + fetchMock.get(CACHED_DATA_ENDPOINT, { + status: 200, + body: { result: { some: 'data' } }, + }); + getPendingComponents.returns(Object.values(state.charts)); + const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents, + successAction, + errorAction, + processEventsCallback: testCallback, + }); + + asyncEventMiddleware(mockStore)(next)(action); + + return testCallbackPromise().then(() => { + expect(fetchMock.calls(EVENTS_ENDPOINT)).toHaveLength(1); + expect(fetchMock.calls(CACHED_DATA_ENDPOINT)).toHaveLength(2); + expect(successAction.callCount).toBe(2); + }); + }); + + it('should call errorAction for cache fetch error responses', () => { + const { + getPendingComponents, + successAction, + errorAction, + testCallback, + testCallbackPromise, + } = setup(); + fetchMock.reset(); + fetchMock.get(EVENTS_ENDPOINT, { + status: 200, + body: { result: events }, + }); + fetchMock.get(CACHED_DATA_ENDPOINT, { + status: 400, + body: { errors: ['error'] }, + }); + getPendingComponents.returns(Object.values(state.charts)); + const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents, + successAction, + errorAction, + processEventsCallback: testCallback, + }); + + asyncEventMiddleware(mockStore)(next)(action); + + return testCallbackPromise().then(() => { + expect(fetchMock.calls(EVENTS_ENDPOINT)).toHaveLength(1); + expect(fetchMock.calls(CACHED_DATA_ENDPOINT)).toHaveLength(2); + expect(errorAction.callCount).toBe(2); + }); + }); + + it('should handle event fetching error responses', () => { + const { + getPendingComponents, + successAction, + errorAction, + testCallback, + testCallbackPromise, + } = setup(); + fetchMock.reset(); + fetchMock.get(EVENTS_ENDPOINT, { + status: 400, + body: { message: 'error' }, + }); + getPendingComponents.returns(Object.values(state.charts)); + const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents, + successAction, + errorAction, + processEventsCallback: testCallback, + }); + + asyncEventMiddleware(mockStore)(next)(action); + + return testCallbackPromise().then(() => { + expect(fetchMock.calls(EVENTS_ENDPOINT)).toHaveLength(1); + }); + }); + + it('should not fetch events when async queries are disabled', () => { + featureEnabledStub.restore(); + featureEnabledStub = sinon.stub(featureFlags, 'isFeatureEnabled'); + featureEnabledStub.withArgs('GLOBAL_ASYNC_QUERIES').returns(false); + const { getPendingComponents, successAction, errorAction } = setup(); + getPendingComponents.returns(Object.values(state.charts)); + const asyncEventMiddleware = initAsyncEvents({ + getPendingComponents, + successAction, + errorAction, + }); + + asyncEventMiddleware(mockStore)(next)(action); + expect(getPendingComponents.called).toBe(false); + }); +}); diff --git a/superset-frontend/src/middleware/asyncEvent.ts b/superset-frontend/src/middleware/asyncEvent.ts index f94218afbf20c..acdce53c66efd 100644 --- a/superset-frontend/src/middleware/asyncEvent.ts +++ b/superset-frontend/src/middleware/asyncEvent.ts @@ -25,12 +25,6 @@ import { parseErrorJson, } from '../utils/getClientErrorObject'; -type AsyncEventOptions = { - getPendingComponents: (state: any) => any[]; - successAction: (componentId: number, componentData: any) => { type: string }; - errorAction: (componentId: number, response: any) => { type: string }; -}; - export type AsyncEvent = { id: string; channel_id: string; @@ -41,6 +35,13 @@ export type AsyncEvent = { result_url: string; }; +type AsyncEventOptions = { + getPendingComponents: (state: any) => any[]; + successAction: (componentId: number, componentData: any) => { type: string }; + errorAction: (componentId: number, response: any) => { type: string }; + processEventsCallback?: (events: AsyncEvent[]) => void; // this is currently used only for tests +}; + type CachedDataResponse = { componentId: number; status: string; @@ -49,7 +50,12 @@ type CachedDataResponse = { const initAsyncEvents = (options: AsyncEventOptions) => { const POLLING_DELAY = 500; - const { getPendingComponents, successAction, errorAction } = options; + const { + getPendingComponents, + successAction, + errorAction, + processEventsCallback, + } = options; const middleware: Middleware = (store: MiddlewareAPI) => ( next: Dispatch, @@ -61,7 +67,7 @@ const initAsyncEvents = (options: AsyncEventOptions) => { DONE: 'done', }; const LOCALSTORAGE_KEY = 'last_async_event_id'; - const pollingUrl = '/api/v1/async_event/'; + const POLLING_URL = '/api/v1/async_event/'; let lastReceivedEventId: string | null; try { @@ -74,17 +80,13 @@ const initAsyncEvents = (options: AsyncEventOptions) => { lastEventId: string | null, ): Promise => { const url = lastEventId - ? `${pollingUrl}?last_id=${lastEventId}` - : pollingUrl; - const response = await fetch(url); - - if (!response.ok) { - const message = `An error has occured: ${response.status}`; - throw new Error(message); - } + ? `${POLLING_URL}?last_id=${lastEventId}` + : POLLING_URL; + const { json } = await SupersetClient.get({ + endpoint: url, + }); - const data = await response.json(); - return data.result; + return json.result; }; const fetchCachedData = async ( @@ -118,53 +120,60 @@ const initAsyncEvents = (options: AsyncEventOptions) => { const processEvents = async () => { const state = store.getState(); const queuedComponents = getPendingComponents(state); - if (queuedComponents.length) { + let events: AsyncEvent[] = []; + if (queuedComponents && queuedComponents.length) { try { - const events = await fetchEvents(lastReceivedEventId); - if (!events || !events.length) { - return setTimeout(processEvents, POLLING_DELAY); - } - const componentsByJobId = queuedComponents.reduce((acc, item) => { - acc[item.asyncJobId] = item; - return acc; - }, {}); - const fetchDataEvents: Promise[] = []; - events.forEach((asyncEvent: AsyncEvent) => { - const component = componentsByJobId[asyncEvent.job_id]; - if (!component) { - console.warn('component not found for job_id', asyncEvent.job_id); - return false; - } - const componentId = component.id; - switch (asyncEvent.status) { - case JOB_STATUS.DONE: - fetchDataEvents.push(fetchCachedData(asyncEvent, componentId)); - break; - case JOB_STATUS.ERROR: - store.dispatch( - errorAction(componentId, parseErrorJson(asyncEvent)), + events = await fetchEvents(lastReceivedEventId); + if (events && events.length) { + const componentsByJobId = queuedComponents.reduce((acc, item) => { + acc[item.asyncJobId] = item; + return acc; + }, {}); + const fetchDataEvents: Promise[] = []; + events.forEach((asyncEvent: AsyncEvent) => { + const component = componentsByJobId[asyncEvent.job_id]; + if (!component) { + console.warn( + 'component not found for job_id', + asyncEvent.job_id, ); - break; - default: - console.warn('received event with status', asyncEvent.status); - } - - return setLastId(asyncEvent); - }); - - const fetchResults = await Promise.all(fetchDataEvents); - fetchResults.forEach(result => { - if (result.status === 'success') { - store.dispatch(successAction(result.componentId, result.data)); - } else { - store.dispatch(errorAction(result.componentId, result.data)); - } - }); + return false; + } + const componentId = component.id; + switch (asyncEvent.status) { + case JOB_STATUS.DONE: + fetchDataEvents.push( + fetchCachedData(asyncEvent, componentId), + ); + break; + case JOB_STATUS.ERROR: + store.dispatch( + errorAction(componentId, parseErrorJson(asyncEvent)), + ); + break; + default: + console.warn('received event with status', asyncEvent.status); + } + + return setLastId(asyncEvent); + }); + + const fetchResults = await Promise.all(fetchDataEvents); + fetchResults.forEach(result => { + if (result.status === 'success') { + store.dispatch(successAction(result.componentId, result.data)); + } else { + store.dispatch(errorAction(result.componentId, result.data)); + } + }); + } } catch (err) { - console.error(err); + console.warn(err); } } + if (processEventsCallback) processEventsCallback(events); + return setTimeout(processEvents, POLLING_DELAY); }; From 27e4548e5b33d1ebb6b8e4e87982ea781443f301 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Fri, 4 Dec 2020 09:49:32 -0800 Subject: [PATCH 23/42] Remove debug statement Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> --- superset-frontend/src/chart/Chart.jsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/superset-frontend/src/chart/Chart.jsx b/superset-frontend/src/chart/Chart.jsx index 92804a5ddd836..2636751fd6e1e 100644 --- a/superset-frontend/src/chart/Chart.jsx +++ b/superset-frontend/src/chart/Chart.jsx @@ -157,8 +157,6 @@ class Chart extends React.PureComponent { queryResponse, } = this.props; - console.log('**** Chart renderError', queryResponse); - const error = queryResponse?.errors?.[0]; if (error) { const extra = error.extra || {}; From 119cae7c194da3eb585d24023f5359783b7abb5f Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Fri, 4 Dec 2020 15:04:13 -0800 Subject: [PATCH 24/42] Skip force_cached if no queryObj --- superset/viz.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/superset/viz.py b/superset/viz.py index 2d4b76d80a67c..9e1d9e2b3b627 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -496,7 +496,6 @@ def get_df_payload( is_loaded = False stacktrace = None df = None - cached_dttm = datetime.utcnow().isoformat().split(".")[0] if cache_key and cache_manager.data_cache and not self.force: cache_value = cache_manager.data_cache.get(cache_key) if cache_value: @@ -516,13 +515,12 @@ def get_df_payload( ) logger.info("Serving from cache") - if self.force_cached and not is_loaded: - logger.warning( - f"force_cached (viz.py): value not found for cache key {cache_key}" - ) - raise CacheLoadError(_("Cached value not found")) - if query_obj and not is_loaded: + if self.force_cached: + logger.warning( + f"force_cached (viz.py): value not found for cache key {cache_key}" + ) + raise CacheLoadError(_("Cached value not found")) try: invalid_columns = [ col From 966640845ab6f455b3af9ec115fbecc762d2d489 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Fri, 4 Dec 2020 15:05:23 -0800 Subject: [PATCH 25/42] SunburstViz: don't modify self.form_data --- superset/viz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/viz.py b/superset/viz.py index 9e1d9e2b3b627..b546733480217 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -1704,7 +1704,7 @@ class SunburstViz(BaseViz): def get_data(self, df: pd.DataFrame) -> VizData: if df.empty: return None - fd = self.form_data + fd = copy.deepcopy(self.form_data) cols = fd.get("groupby") or [] cols.extend(["m1", "m2"]) metric = utils.get_metric_name(fd["metric"]) From e40eb459134c48eec5ac319bdedeba255db2f538 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 7 Dec 2020 10:45:28 -0800 Subject: [PATCH 26/42] Fix failing annotation test --- superset/common/query_context.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 9617164c3416d..f21679dacbd41 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -174,9 +174,6 @@ def get_single_payload( query_obj.row_offset = 0 query_obj.columns = [o.column_name for o in self.datasource.columns] payload = self.get_df_payload(query_obj, force_cached=force_cached) - # TODO: implement - payload["annotation_data"] = [] - df = payload["df"] status = payload["status"] if status != utils.QueryStatus.FAILED: From f4917890846bcb010df757a275f065bb2e288dd2 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 7 Dec 2020 12:00:44 -0800 Subject: [PATCH 27/42] Resolve merge/lint issues --- superset-frontend/src/explore/components/DataTablesPane.tsx | 2 +- superset-frontend/src/explore/components/DisplayQueryButton.jsx | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/superset-frontend/src/explore/components/DataTablesPane.tsx b/superset-frontend/src/explore/components/DataTablesPane.tsx index b1bc9081db6b1..474ef94f83e7e 100644 --- a/superset-frontend/src/explore/components/DataTablesPane.tsx +++ b/superset-frontend/src/explore/components/DataTablesPane.tsx @@ -23,7 +23,7 @@ import Tabs from 'src/common/components/Tabs'; import Loading from 'src/components/Loading'; import TableView, { EmptyWrapperType } from 'src/components/TableView'; import { getChartDataRequest } from 'src/chart/chartAction'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import { CopyToClipboardButton, FilterInput, diff --git a/superset-frontend/src/explore/components/DisplayQueryButton.jsx b/superset-frontend/src/explore/components/DisplayQueryButton.jsx index b6fef2a47f667..dbb5d8441bda7 100644 --- a/superset-frontend/src/explore/components/DisplayQueryButton.jsx +++ b/superset-frontend/src/explore/components/DisplayQueryButton.jsx @@ -30,8 +30,6 @@ import { DropdownButton } from 'react-bootstrap'; import { styled, t } from '@superset-ui/core'; import { Menu } from 'src/common/components'; -import TableView, { EmptyWrapperType } from 'src/components/TableView'; -import Button from 'src/components/Button'; import { getClientErrorObject } from '../../utils/getClientErrorObject'; import CopyToClipboard from '../../components/CopyToClipboard'; import { getChartDataRequest } from '../../chart/chartAction'; From f01740ebdc5926dcee66a5548ed0529b3658f862 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 7 Dec 2020 16:38:06 -0800 Subject: [PATCH 28/42] Reduce polling delay --- superset-frontend/src/middleware/asyncEvent.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset-frontend/src/middleware/asyncEvent.ts b/superset-frontend/src/middleware/asyncEvent.ts index acdce53c66efd..63241a30cd167 100644 --- a/superset-frontend/src/middleware/asyncEvent.ts +++ b/superset-frontend/src/middleware/asyncEvent.ts @@ -49,7 +49,7 @@ type CachedDataResponse = { }; const initAsyncEvents = (options: AsyncEventOptions) => { - const POLLING_DELAY = 500; + const POLLING_DELAY = 250; const { getPendingComponents, successAction, From 838c5266f4422d5d422296a09279d1d77f846e1f Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 7 Dec 2020 17:08:45 -0800 Subject: [PATCH 29/42] Fix new getClientErrorObject reference --- superset-frontend/src/views/CRUD/hooks.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset-frontend/src/views/CRUD/hooks.ts b/superset-frontend/src/views/CRUD/hooks.ts index e209815aa3666..dedcadd87afbb 100644 --- a/superset-frontend/src/views/CRUD/hooks.ts +++ b/superset-frontend/src/views/CRUD/hooks.ts @@ -25,7 +25,7 @@ import { FetchDataConfig } from 'src/components/ListView'; import { FilterValue } from 'src/components/ListView/types'; import Chart, { Slice } from 'src/types/Chart'; import copyTextToClipboard from 'src/utils/copy'; -import getClientErrorObject from 'src/utils/getClientErrorObject'; +import { getClientErrorObject } from 'src/utils/getClientErrorObject'; import { FavoriteStatus } from './types'; interface ListViewResourceState { From f0de265b6eecc1fe0a20bbaa3268acebf878ecde Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Mon, 7 Dec 2020 21:31:27 -0800 Subject: [PATCH 30/42] Fix flakey unit tests --- tests/tasks/async_queries_tests.py | 207 ++++++++++++++--------------- 1 file changed, 103 insertions(+), 104 deletions(-) diff --git a/tests/tasks/async_queries_tests.py b/tests/tasks/async_queries_tests.py index f1d3843564ffb..89d41a9d67e34 100644 --- a/tests/tasks/async_queries_tests.py +++ b/tests/tasks/async_queries_tests.py @@ -31,6 +31,7 @@ load_chart_data_into_cache, load_explore_json_into_cache, ) +from tests.base_tests import SupersetTestCase from tests.fixtures.query_context import get_query_context from tests.test_app import app @@ -40,114 +41,112 @@ def get_table_by_name(name: str) -> SqlaTable: return db.session.query(SqlaTable).filter_by(table_name=name).one() -@patch.object(async_query_manager, "update_job") -def test_load_chart_data_into_cache(mock_update_job): - async_query_manager.init_app(app) - table = get_table_by_name("birth_names") - form_data = get_query_context(table.name, table.id, table.type) - job_metadata = { - "channel_id": str(uuid4()), - "job_id": str(uuid4()), - "user_id": 1, - "status": "pending", - "errors": [], - } +class TestAsyncQueries(SupersetTestCase): + @patch.object(async_query_manager, "update_job") + def test_load_chart_data_into_cache(self, mock_update_job): + async_query_manager.init_app(app) + table = get_table_by_name("birth_names") + form_data = get_query_context(table.name, table.id, table.type) + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } - load_chart_data_into_cache(job_metadata, form_data) - - assert mock_update_job.called - call_args = mock_update_job.call_args - assert call_args.args[0] == job_metadata - assert call_args.args[1] == "done" - assert re.match(r"^/api/v1/chart/data/qc-\w+", call_args.kwargs["result_url"]) - - -@patch.object( - ChartDataCommand, "run", side_effect=ChartDataQueryFailedError("Error: foo") -) -@patch.object(async_query_manager, "update_job") -def test_load_chart_data_into_cache_error(mock_update_job, mock_run_command): - async_query_manager.init_app(app) - table = get_table_by_name("birth_names") - form_data = get_query_context(table.name, table.id, table.type) - job_metadata = { - "channel_id": str(uuid4()), - "job_id": str(uuid4()), - "user_id": 1, - "status": "pending", - "errors": [], - } - with pytest.raises(ChartDataQueryFailedError): load_chart_data_into_cache(job_metadata, form_data) - assert mock_run_command.called - assert mock_run_command.call_args.kwargs["cache"] == True - - assert mock_update_job.called - call_args = mock_update_job.call_args - assert call_args.args[0] == job_metadata - assert call_args.args[1] == "error" - assert call_args.kwargs["errors"] == [{"message": "Error: foo"}] - - -@patch.object(async_query_manager, "update_job") -def test_load_explore_json_into_cache(mock_update_job): - async_query_manager.init_app(app) - table = get_table_by_name("birth_names") - form_data = { - "queryFields": { - "metrics": "metrics", - "groupby": "groupby", - "columns": "groupby", - }, - "datasource": f"{table.id}__table", - "viz_type": "dist_bar", - "time_range_endpoints": ["inclusive", "exclusive"], - "granularity_sqla": "ds", - "time_range": "No filter", - "metrics": ["count"], - "adhoc_filters": [], - "groupby": ["gender"], - "row_limit": 100, - } - job_metadata = { - "channel_id": str(uuid4()), - "job_id": str(uuid4()), - "user_id": 1, - "status": "pending", - "errors": [], - } - - load_explore_json_into_cache(job_metadata, form_data) - - assert mock_update_job.called - call_args = mock_update_job.call_args - assert call_args.args[0] == job_metadata - assert call_args.args[1] == "done" - assert re.match( - r"^/superset/explore_json/data/ejr-\w+", call_args.kwargs["result_url"] - ) - + assert mock_update_job.called + call_args = mock_update_job.call_args + self.assertEqual(call_args.args[0], job_metadata) + assert call_args.args[1] == "done" + assert re.match(r"^/api/v1/chart/data/qc-\w+", call_args.kwargs["result_url"]) -@patch.object(async_query_manager, "update_job") -def test_load_explore_json_into_cache_error(mock_update_job): - async_query_manager.init_app(app) - form_data = {} - job_metadata = { - "channel_id": str(uuid4()), - "job_id": str(uuid4()), - "user_id": 1, - "status": "pending", - "errors": [], - } + @patch.object( + ChartDataCommand, "run", side_effect=ChartDataQueryFailedError("Error: foo") + ) + @patch.object(async_query_manager, "update_job") + def test_load_chart_data_into_cache_error(self, mock_update_job, mock_run_command): + async_query_manager.init_app(app) + table = get_table_by_name("birth_names") + form_data = get_query_context(table.name, table.id, table.type) + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } + with pytest.raises(ChartDataQueryFailedError): + load_chart_data_into_cache(job_metadata, form_data) + + assert mock_run_command.called + assert mock_run_command.call_args.kwargs["cache"] == True + + assert mock_update_job.called + call_args = mock_update_job.call_args + self.assertEqual(call_args.args[0], job_metadata) + assert call_args.args[1] == "error" + assert call_args.kwargs["errors"] == [{"message": "Error: foo"}] + + @patch.object(async_query_manager, "update_job") + def test_load_explore_json_into_cache(self, mock_update_job): + async_query_manager.init_app(app) + table = get_table_by_name("birth_names") + form_data = { + "queryFields": { + "metrics": "metrics", + "groupby": "groupby", + "columns": "groupby", + }, + "datasource": f"{table.id}__table", + "viz_type": "dist_bar", + "time_range_endpoints": ["inclusive", "exclusive"], + "granularity_sqla": "ds", + "time_range": "No filter", + "metrics": ["count"], + "adhoc_filters": [], + "groupby": ["gender"], + "row_limit": 100, + } + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } - with pytest.raises(SupersetException): load_explore_json_into_cache(job_metadata, form_data) - assert mock_update_job.called - call_args = mock_update_job.call_args - assert call_args.args[0] == job_metadata - assert call_args.args[1] == "error" - assert call_args.kwargs["errors"] == [ - "The datasource associated with this chart no longer exists" - ] + assert mock_update_job.called + call_args = mock_update_job.call_args + self.assertEqual(call_args.args[0], job_metadata) + assert call_args.args[1] == "done" + assert re.match( + r"^/superset/explore_json/data/ejr-\w+", call_args.kwargs["result_url"] + ) + + @patch.object(async_query_manager, "update_job") + def test_load_explore_json_into_cache_error(self, mock_update_job): + async_query_manager.init_app(app) + form_data = {} + job_metadata = { + "channel_id": str(uuid4()), + "job_id": str(uuid4()), + "user_id": 1, + "status": "pending", + "errors": [], + } + + with pytest.raises(SupersetException): + load_explore_json_into_cache(job_metadata, form_data) + + assert mock_update_job.called + call_args = mock_update_job.call_args + self.assertEqual(call_args.args[0], job_metadata) + assert call_args.args[1] == "error" + assert call_args.kwargs["errors"] == [ + "The datasource associated with this chart no longer exists" + ] From 066504f0864225d809028f4331260ce8f9b66e44 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Tue, 8 Dec 2020 17:55:23 -0800 Subject: [PATCH 31/42] /api/v1/async_event: increment redis stream ID, add tests --- superset/async_events/api.py | 6 +- superset/utils/async_query_manager.py | 15 ++- tests/async_events/__init__.py | 16 ++++ tests/async_events/api_tests.py | 130 ++++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 8 deletions(-) create mode 100644 tests/async_events/__init__.py create mode 100644 tests/async_events/api_tests.py diff --git a/superset/async_events/api.py b/superset/async_events/api.py index 5676038802d32..61b85ac6f9edb 100644 --- a/superset/async_events/api.py +++ b/superset/async_events/api.py @@ -74,19 +74,15 @@ def events(self) -> Response: job_id: type: string user_id: - type: string + type: integer status: type: string msg: type: string cache_key: type: string - 400: - $ref: '#/components/responses/400' 401: $ref: '#/components/responses/401' - 404: - $ref: '#/components/responses/404' 500: $ref: '#/components/responses/500' """ diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index 13b72d7aeb47d..fe18409e91ad4 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -51,6 +51,15 @@ def parse_event(event_data: Tuple[str, Dict[str, Any]]) -> Dict[str, Any]: return {"id": event_id, **json.loads(event_payload)} +def increment_id(redis_id: str) -> str: + # redis stream IDs are in this format: '1607477697866-0' + try: + prefix, last = redis_id[:-1], int(redis_id[-1]) + return prefix + str(last + 1) + except Exception: # pylint: disable=broad-except + return redis_id + + class AsyncQueryManager: MAX_EVENT_COUNT = 100 STATUS_PENDING = "pending" @@ -148,7 +157,7 @@ def read_events( self, channel: str, last_id: Optional[str] ) -> List[Optional[Dict[str, Any]]]: stream_name = f"{self._stream_prefix}{channel}" - start_id = last_id if last_id else "-" + start_id = increment_id(last_id) if last_id else "-" results = self._redis.xrange( # type: ignore stream_name, start_id, "+", self.MAX_EVENT_COUNT ) @@ -169,8 +178,8 @@ def update_job( full_stream_name = f"{self._stream_prefix}full" scoped_stream_name = f"{self._stream_prefix}{job_metadata['channel_id']}" - logger.info("********** logging event data to stream %s", scoped_stream_name) - logger.info(event_data) + logger.debug("********** logging event data to stream %s", scoped_stream_name) + logger.debug(event_data) self._redis.xadd( # type: ignore scoped_stream_name, event_data, "*", self._stream_limit diff --git a/tests/async_events/__init__.py b/tests/async_events/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/async_events/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/async_events/api_tests.py b/tests/async_events/api_tests.py new file mode 100644 index 0000000000000..ccf980ce77a4c --- /dev/null +++ b/tests/async_events/api_tests.py @@ -0,0 +1,130 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +from typing import Optional +from unittest import mock + +from superset.extensions import async_query_manager +from tests.base_tests import SupersetTestCase +from tests.test_app import app + + +class TestAsyncEventApi(SupersetTestCase): + def fetch_events(self, last_id: Optional[str] = None): + base_uri = "api/v1/async_event/" + uri = f"{base_uri}?last_id={last_id}" if last_id else base_uri + return self.client.get(uri) + + def test_events(self): + async_query_manager.init_app(app) + self.login(username="admin") + with mock.patch.object(async_query_manager._redis, "xrange") as mock_xrange: + rv = self.fetch_events() + response = json.loads(rv.data.decode("utf-8")) + args = mock_xrange.call_args.args + prefix_len = len(app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"]) + + assert rv.status_code == 200 + assert mock_xrange.called + assert len(args[0]) == (prefix_len + 36) # uuidv4 + assert args[1] == "-" + assert args[2] == "+" + assert args[3] == 100 + self.assertEqual(response, {"result": []}) + + def test_events_last_id(self): + async_query_manager.init_app(app) + self.login(username="admin") + with mock.patch.object(async_query_manager._redis, "xrange") as mock_xrange: + rv = self.fetch_events("1607471525180-0") + response = json.loads(rv.data.decode("utf-8")) + args = mock_xrange.call_args.args + prefix_len = len(app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"]) + + assert rv.status_code == 200 + assert mock_xrange.called + assert len(args[0]) == (prefix_len + 36) # uuidv4 + assert args[1] == "1607471525180-1" # increments + assert args[2] == "+" + assert args[3] == 100 + self.assertEqual(response, {"result": []}) + + def test_events_results(self): + async_query_manager.init_app(app) + self.login(username="admin") + with mock.patch.object(async_query_manager._redis, "xrange") as mock_xrange: + mock_xrange.return_value = [ + ( + "1607477697866-0", + { + "data": '{"channel_id": "1095c1c9-b6b1-444d-aa83-8e323b32831f", "job_id": "10a0bd9a-03c8-4737-9345-f4234ba86512", "user_id": "1", "status": "done", "errors": [], "result_url": "/api/v1/chart/data/qc-ecd766dd461f294e1bcdaa321e0e8463"}' + }, + ), + ( + "1607477697993-0", + { + "data": '{"channel_id": "1095c1c9-b6b1-444d-aa83-8e323b32831f", "job_id": "027cbe49-26ce-4813-bb5a-0b95a626b84c", "user_id": "1", "status": "done", "errors": [], "result_url": "/api/v1/chart/data/qc-1bbc3a240e7039ba4791aefb3a7ee80d"}' + }, + ), + ] + rv = self.fetch_events() + response = json.loads(rv.data.decode("utf-8")) + args = mock_xrange.call_args.args + prefix_len = len(app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"]) + + assert rv.status_code == 200 + assert mock_xrange.called + assert len(args[0]) == (prefix_len + 36) # uuidv4 + assert args[1] == "-" + assert args[2] == "+" + assert args[3] == 100 + expected = { + "result": [ + { + "channel_id": "1095c1c9-b6b1-444d-aa83-8e323b32831f", + "errors": [], + "id": "1607477697866-0", + "job_id": "10a0bd9a-03c8-4737-9345-f4234ba86512", + "result_url": "/api/v1/chart/data/qc-ecd766dd461f294e1bcdaa321e0e8463", + "status": "done", + "user_id": "1", + }, + { + "channel_id": "1095c1c9-b6b1-444d-aa83-8e323b32831f", + "errors": [], + "id": "1607477697993-0", + "job_id": "027cbe49-26ce-4813-bb5a-0b95a626b84c", + "result_url": "/api/v1/chart/data/qc-1bbc3a240e7039ba4791aefb3a7ee80d", + "status": "done", + "user_id": "1", + }, + ] + } + self.assertEqual(response, expected) + + def test_events_no_login(self): + async_query_manager.init_app(app) + rv = self.fetch_events() + assert rv.status_code == 401 + + def test_events_no_token(self): + self.login(username="admin") + self.client.set_cookie( + "localhost", app.config["GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME"], "" + ) + rv = self.fetch_events() + assert rv.status_code == 401 From d6c8a1d7ad668f66124392bc9d5ad3a58056996b Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Wed, 9 Dec 2020 15:32:43 -0800 Subject: [PATCH 32/42] PR feedback: refactoring, configuration --- .../javascripts/middleware/asyncEvent_spec.js | 6 +++ superset-frontend/src/dashboard/index.jsx | 5 +- superset-frontend/src/explore/index.jsx | 5 +- superset-frontend/src/featureFlags.ts | 7 ++- .../src/middleware/asyncEvent.ts | 46 +++++++++++-------- superset/app.py | 10 ---- superset/charts/api.py | 10 ++-- superset/common/query_context.py | 8 +++- superset/config.py | 6 +-- superset/tasks/async_queries.py | 5 +- superset/utils/async_query_manager.py | 17 ++++++- tests/cache_tests.py | 6 ++- 12 files changed, 77 insertions(+), 54 deletions(-) diff --git a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js index 9cfc62e80cede..2dee5d30dca79 100644 --- a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js +++ b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js @@ -65,6 +65,7 @@ describe('asyncEvent middleware', () => { const EVENTS_ENDPOINT = 'glob:*/api/v1/async_event/*'; const CACHED_DATA_ENDPOINT = 'glob:*/api/v1/chart/data/*'; let featureEnabledStub; + let getFeatureStub; function setup() { const getPendingComponents = sinon.stub(); @@ -98,11 +99,16 @@ describe('asyncEvent middleware', () => { }); featureEnabledStub = sinon.stub(featureFlags, 'isFeatureEnabled'); featureEnabledStub.withArgs('GLOBAL_ASYNC_QUERIES').returns(true); + getFeatureStub = sinon.stub(featureFlags, 'getFeatureFlag'); + getFeatureStub + .withArgs('GLOBAL_ASYNC_QUERIES_OPTIONS') + .returns({ transport: 'polling', polling_delay: 250 }); }); afterEach(() => { fetchMock.reset(); next.resetHistory(); featureEnabledStub.restore(); + getFeatureStub.restore(); }); afterAll(fetchMock.reset); diff --git a/superset-frontend/src/dashboard/index.jsx b/superset-frontend/src/dashboard/index.jsx index 90430f3021f37..bbbd0172c7eef 100644 --- a/superset-frontend/src/dashboard/index.jsx +++ b/superset-frontend/src/dashboard/index.jsx @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -import { filter } from 'lodash'; import React from 'react'; import ReactDOM from 'react-dom'; import thunk from 'redux-thunk'; @@ -37,8 +36,8 @@ initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); const asyncEventMiddleware = initAsyncEvents({ - getPendingComponents: state => - filter(state.charts, { chartStatus: 'loading' }), + getPendingComponents: ({ charts }) => + Object.values(charts).filter(c => c.chartStatus === 'loading'), successAction: (componentId, componentData) => actions.chartUpdateSucceeded(componentData, componentId), errorAction: (componentId, response) => diff --git a/superset-frontend/src/explore/index.jsx b/superset-frontend/src/explore/index.jsx index c18969cd02440..61082a7bb639e 100644 --- a/superset-frontend/src/explore/index.jsx +++ b/superset-frontend/src/explore/index.jsx @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -import { filter } from 'lodash'; import React from 'react'; import ReactDOM from 'react-dom'; import { createStore, applyMiddleware, compose } from 'redux'; @@ -39,8 +38,8 @@ initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); const asyncEventMiddleware = initAsyncEvents({ - getPendingComponents: state => - filter(state.charts, { chartStatus: 'loading' }), + getPendingComponents: ({ charts }) => + Object.values(charts).filter(c => c.chartStatus === 'loading'), successAction: (componentId, componentData) => actions.chartUpdateSucceeded(componentData, componentId), errorAction: (componentId, response) => diff --git a/superset-frontend/src/featureFlags.ts b/superset-frontend/src/featureFlags.ts index 93b909ddcb3b1..5a317fb30089f 100644 --- a/superset-frontend/src/featureFlags.ts +++ b/superset-frontend/src/featureFlags.ts @@ -35,10 +35,11 @@ export enum FeatureFlag { ESCAPE_MARKDOWN_HTML = 'ESCAPE_MARKDOWN_HTML', VERSIONED_EXPORT = 'VERSIONED_EXPORT', GLOBAL_ASYNC_QUERIES = 'GLOBAL_ASYNC_QUERIES', + GLOBAL_ASYNC_QUERIES_OPTIONS = 'GLOBAL_ASYNC_QUERIES_OPTIONS', } export type FeatureFlagMap = { - [key in FeatureFlag]?: boolean; + [key in FeatureFlag]?: any; }; // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -59,3 +60,7 @@ export function initFeatureFlags(featureFlags: FeatureFlagMap) { export function isFeatureEnabled(feature: FeatureFlag) { return window && window.featureFlags && !!window.featureFlags[feature]; } + +export function getFeatureFlag(feature: FeatureFlag) { + return window.featureFlags[feature]; +} diff --git a/superset-frontend/src/middleware/asyncEvent.ts b/superset-frontend/src/middleware/asyncEvent.ts index 63241a30cd167..e80f0957f0139 100644 --- a/superset-frontend/src/middleware/asyncEvent.ts +++ b/superset-frontend/src/middleware/asyncEvent.ts @@ -17,9 +17,9 @@ * under the License. */ import { Middleware, MiddlewareAPI, Dispatch } from 'redux'; -import { SupersetClient } from '@superset-ui/core'; +import { makeApi, SupersetClient } from '@superset-ui/core'; import { SupersetError } from 'src/components/ErrorMessage/types'; -import { isFeatureEnabled, FeatureFlag } from '../featureFlags'; +import { getFeatureFlag, isFeatureEnabled, FeatureFlag } from '../featureFlags'; import { getClientErrorObject, parseErrorJson, @@ -49,7 +49,11 @@ type CachedDataResponse = { }; const initAsyncEvents = (options: AsyncEventOptions) => { - const POLLING_DELAY = 250; + // TODO: implement websocket support + const TRANSPORT_POLLING = 'polling'; + const config = getFeatureFlag(FeatureFlag.GLOBAL_ASYNC_QUERIES_OPTIONS) || {}; + const transport = config.transport || TRANSPORT_POLLING; + const polling_delay = config.polling_delay || 500; const { getPendingComponents, successAction, @@ -76,18 +80,13 @@ const initAsyncEvents = (options: AsyncEventOptions) => { console.warn('failed to fetch last event Id from localStorage'); } - const fetchEvents = async ( - lastEventId: string | null, - ): Promise => { - const url = lastEventId - ? `${POLLING_URL}?last_id=${lastEventId}` - : POLLING_URL; - const { json } = await SupersetClient.get({ - endpoint: url, - }); - - return json.result; - }; + const fetchEvents = makeApi< + { last_id?: string | null }, + { result: AsyncEvent[] } + >({ + method: 'GET', + endpoint: POLLING_URL, + }); const fetchCachedData = async ( asyncEvent: AsyncEvent, @@ -120,10 +119,13 @@ const initAsyncEvents = (options: AsyncEventOptions) => { const processEvents = async () => { const state = store.getState(); const queuedComponents = getPendingComponents(state); - let events: AsyncEvent[] = []; + const eventArgs = lastReceivedEventId + ? { last_id: lastReceivedEventId } + : {}; + const events: AsyncEvent[] = []; if (queuedComponents && queuedComponents.length) { try { - events = await fetchEvents(lastReceivedEventId); + const { result: events } = await fetchEvents(eventArgs); if (events && events.length) { const componentsByJobId = queuedComponents.reduce((acc, item) => { acc[item.asyncJobId] = item; @@ -137,7 +139,7 @@ const initAsyncEvents = (options: AsyncEventOptions) => { 'component not found for job_id', asyncEvent.job_id, ); - return false; + return setLastId(asyncEvent); } const componentId = component.id; switch (asyncEvent.status) { @@ -174,10 +176,14 @@ const initAsyncEvents = (options: AsyncEventOptions) => { if (processEventsCallback) processEventsCallback(events); - return setTimeout(processEvents, POLLING_DELAY); + return setTimeout(processEvents, polling_delay); }; - if (isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES)) processEvents(); + if ( + isFeatureEnabled(FeatureFlag.GLOBAL_ASYNC_QUERIES) && + transport === TRANSPORT_POLLING + ) + processEvents(); return action => next(action); }; diff --git a/superset/app.py b/superset/app.py index 0dce86e2c21ae..f187e3e7c9e71 100644 --- a/superset/app.py +++ b/superset/app.py @@ -653,16 +653,6 @@ def configure_wtf(self) -> None: def configure_async_queries(self) -> None: if feature_flag_manager.is_feature_enabled("GLOBAL_ASYNC_QUERIES"): - if ( - self.config["CACHE_CONFIG"]["CACHE_TYPE"] == "null" - or self.config["DATA_CACHE_CONFIG"]["CACHE_TYPE"] == "null" - ): - raise Exception( - """ - Cache backends (CACHE_CONFIG, DATA_CACHE_CONFIG) must be configured - and non-null in order to enable async queries - """ - ) async_query_manager.init_app(self.flask_app) def register_blueprints(self) -> None: diff --git a/superset/charts/api.py b/superset/charts/api.py index e781474c36cd5..add6c7d256a95 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -468,14 +468,10 @@ def get_data_response( return self.response_400(message=exc.message) result_format = result["query_context"].result_format - response = self.response_400( - message=f"Unsupported result_format: {result_format}" - ) - if result_format == ChartDataResultFormat.CSV: # return the first result data = result["queries"][0]["data"] - response = CsvResponse( + return CsvResponse( data, status=200, headers=generate_download_headers("csv"), @@ -490,9 +486,9 @@ def get_data_response( ) resp = make_response(response_data, 200) resp.headers["Content-Type"] = "application/json; charset=utf-8" - response = resp + return resp - return response + return self.response_400(message=f"Unsupported result_format: {result_format}") @expose("/data", methods=["POST"]) @protect() diff --git a/superset/common/query_context.py b/superset/common/query_context.py index f21679dacbd41..a7900cf6677c8 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -237,8 +237,9 @@ def cache_timeout(self) -> int: def cache_key(self, **extra: Any) -> str: """ - The cache key is made out of the key/values from self.cached_values, plus any - other key/values in `extra` + The QueryContext cache key is made out of the key/values from + self.cached_values, plus any other key/values in `extra`. It includes only data + required to rehydrate a QueryContext object. """ key_prefix = "qc-" cache_dict = self.cache_values.copy() @@ -247,6 +248,9 @@ def cache_key(self, **extra: Any) -> str: return generate_cache_key(cache_dict, key_prefix) def query_cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: + """ + Returns a QueryObject cache key for objects in self.queries + """ extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = ( diff --git a/superset/config.py b/superset/config.py index 6121821b3f64a..216e6149e0a6c 100644 --- a/superset/config.py +++ b/superset/config.py @@ -297,7 +297,7 @@ def _try_json_readsha( # pylint: disable=unused-argument # For example, DEFAULT_FEATURE_FLAGS = { 'FOO': True, 'BAR': False } here # and FEATURE_FLAGS = { 'BAR': True, 'BAZ': True } in superset_config.py # will result in combined feature flags of { 'FOO': True, 'BAR': True, 'BAZ': True } -DEFAULT_FEATURE_FLAGS: Dict[str, bool] = { +DEFAULT_FEATURE_FLAGS: Dict[str, Any] = { # allow dashboard to use sub-domains to send chart request # you also need ENABLE_CORS and # SUPERSET_WEBSERVER_DOMAINS for list of domains @@ -328,6 +328,7 @@ def _try_json_readsha( # pylint: disable=unused-argument # When True, this escapes HTML (rather than rendering it) in Markdown components "ESCAPE_MARKDOWN_HTML": False, "GLOBAL_ASYNC_QUERIES": False, + "GLOBAL_ASYNC_QUERIES_OPTIONS": {"transport": "polling", "polling_delay": 250}, "VERSIONED_EXPORT": False, # Note that: RowLevelSecurityFilter is only given by default to the Admin role # and the Admin Role does have the all_datasources security permission. @@ -349,7 +350,7 @@ def _try_json_readsha( # pylint: disable=unused-argument DEFAULT_FEATURE_FLAGS["LISTVIEWS_DEFAULT_CARD_VIEW"] = True # This is merely a default. -FEATURE_FLAGS: Dict[str, bool] = {} +FEATURE_FLAGS: Dict[str, Any] = {} # A function that receives a dict of all feature flags # (DEFAULT_FEATURE_FLAGS merged with FEATURE_FLAGS) @@ -983,7 +984,6 @@ class CeleryConfig: # pylint: disable=too-few-public-methods GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME = "async-token" GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE = False GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me" -GLOBAL_ASYNC_QUERIES_TRANSPORT = "ws" if CONFIG_PATH_ENV_VAR in os.environ: # Explicitly import config module that is not necessarily in pythonpath; useful diff --git a/superset/tasks/async_queries.py b/superset/tasks/async_queries.py index 39730ea867dd2..b8db82b9afe2d 100644 --- a/superset/tasks/async_queries.py +++ b/superset/tasks/async_queries.py @@ -70,6 +70,7 @@ def load_explore_json_into_cache( force: bool = False, ) -> None: with app.app_context(): # type: ignore + cache_key_prefix = "ejr-" # ejr: explore_json request try: datasource_id, datasource_type = get_datasource_info(None, None, form_data) @@ -86,9 +87,7 @@ def load_explore_json_into_cache( # cache form_data for async retrieval cache_value = {"form_data": form_data, "response_type": response_type} - cache_key = generate_cache_key( - cache_value, "ejr-" - ) # ejr: explore_json request + cache_key = generate_cache_key(cache_value, cache_key_prefix) set_and_log_cache(cache_manager.cache, cache_key, cache_value) result_url = f"/superset/explore_json/data/{cache_key}" async_query_manager.update_job( diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index fe18409e91ad4..602cbb685c6dd 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -79,7 +79,22 @@ def __init__(self) -> None: def init_app(self, app: Flask) -> None: config = app.config - if len(config.get("GLOBAL_ASYNC_QUERIES_JWT_SECRET", "")) < 32: + print('************** config["CACHE_CONFIG"]') + print(config["CACHE_CONFIG"]) + print('************** config["DATA_CACHE_CONFIG"]') + print(config["DATA_CACHE_CONFIG"]) + if ( + config["CACHE_CONFIG"]["CACHE_TYPE"] == "null" + or config["DATA_CACHE_CONFIG"]["CACHE_TYPE"] == "null" + ): + raise Exception( + """ + Cache backends (CACHE_CONFIG, DATA_CACHE_CONFIG) must be configured + and non-null in order to enable async queries + """ + ) + + if len(config["GLOBAL_ASYNC_QUERIES_JWT_SECRET"]) < 32: raise AsyncQueryTokenException( "Please provide a JWT secret at least 32 bytes long" ) diff --git a/tests/cache_tests.py b/tests/cache_tests.py index 0b887622ef074..c79d3e2c1f47b 100644 --- a/tests/cache_tests.py +++ b/tests/cache_tests.py @@ -35,6 +35,7 @@ def tearDown(self): cache_manager.data_cache.clear() def test_no_data_cache(self): + data_cache_config = app.config["DATA_CACHE_CONFIG"] app.config["DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "null"} cache_manager.init_app(app) @@ -48,11 +49,14 @@ def test_no_data_cache(self): resp_from_cache = self.get_json_resp( json_endpoint, {"form_data": json.dumps(slc.viz.form_data)} ) + # restore DATA_CACHE_CONFIG + app.config["DATA_CACHE_CONFIG"] = data_cache_config self.assertFalse(resp["is_cached"]) self.assertFalse(resp_from_cache["is_cached"]) def test_slice_data_cache(self): # Override cache config + data_cache_config = app.config["DATA_CACHE_CONFIG"] app.config["CACHE_DEFAULT_TIMEOUT"] = 100 app.config["DATA_CACHE_CONFIG"] = { "CACHE_TYPE": "simple", @@ -87,5 +91,5 @@ def test_slice_data_cache(self): self.assertIsNone(cache_manager.cache.get(resp_from_cache["cache_key"])) # reset cache config - app.config["DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "null"} + app.config["DATA_CACHE_CONFIG"] = data_cache_config cache_manager.init_app(app) From 088a49c4430300508af6a438a654d02de5143d44 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Wed, 9 Dec 2020 15:39:48 -0800 Subject: [PATCH 33/42] Fixup: remove debugging --- superset/utils/async_query_manager.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index 602cbb685c6dd..fe73d717bb15c 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -79,10 +79,6 @@ def __init__(self) -> None: def init_app(self, app: Flask) -> None: config = app.config - print('************** config["CACHE_CONFIG"]') - print(config["CACHE_CONFIG"]) - print('************** config["DATA_CACHE_CONFIG"]') - print(config["DATA_CACHE_CONFIG"]) if ( config["CACHE_CONFIG"]["CACHE_TYPE"] == "null" or config["DATA_CACHE_CONFIG"]["CACHE_TYPE"] == "null" From c9b871e0da0e8f274b6769c84888cbd2d6fbda73 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 10:21:41 -0800 Subject: [PATCH 34/42] Fix typescript errors due to redux upgrade --- superset-frontend/src/middleware/asyncEvent.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/superset-frontend/src/middleware/asyncEvent.ts b/superset-frontend/src/middleware/asyncEvent.ts index e80f0957f0139..36ff039618971 100644 --- a/superset-frontend/src/middleware/asyncEvent.ts +++ b/superset-frontend/src/middleware/asyncEvent.ts @@ -61,9 +61,7 @@ const initAsyncEvents = (options: AsyncEventOptions) => { processEventsCallback, } = options; - const middleware: Middleware = (store: MiddlewareAPI) => ( - next: Dispatch, - ) => { + const middleware: Middleware = (store: MiddlewareAPI) => (next: Dispatch) => { const JOB_STATUS = { PENDING: 'pending', RUNNING: 'running', From 89925a5718a25cc3f1a998d36db6dcaf7678de4a Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 10:22:24 -0800 Subject: [PATCH 35/42] Update UPDATING.md --- UPDATING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UPDATING.md b/UPDATING.md index 79777b79f2421..9e92d36c46750 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -23,7 +23,7 @@ This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. ## Next - +- [11499](https://github.com/apache/incubator-superset/pull/11499): Breaking change: `STORE_CACHE_KEYS_IN_METADATA_DB` config flag added (default=`False`) to write `CacheKey` records to the metadata DB. `CacheKey` recording was enabled by default previously. - [11920](https://github.com/apache/incubator-superset/pull/11920): Undos the DB migration from [11714](https://github.com/apache/incubator-superset/pull/11714) to prevent adding new columns to the logs table. Deploying a sha between these two PRs may result in locking your DB. - [11704](https://github.com/apache/incubator-superset/pull/11704) Breaking change: Jinja templating for SQL queries has been updated, removing default modules such as `datetime` and `random` and enforcing static template values. To restore or extend functionality, use `JINJA_CONTEXT_ADDONS` and `CUSTOM_TEMPLATE_PROCESSORS` in `superset_config.py`. - [11714](https://github.com/apache/incubator-superset/pull/11714): Logs From 0ad7234e8c0b617bcc94b86355a1ab40b42db309 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 11:12:30 -0800 Subject: [PATCH 36/42] Fix failing py tests --- tests/cache_tests.py | 2 ++ tests/viz_tests.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/tests/cache_tests.py b/tests/cache_tests.py index c79d3e2c1f47b..3ffd52a378163 100644 --- a/tests/cache_tests.py +++ b/tests/cache_tests.py @@ -57,6 +57,7 @@ def test_no_data_cache(self): def test_slice_data_cache(self): # Override cache config data_cache_config = app.config["DATA_CACHE_CONFIG"] + cache_default_timeout = app.config["CACHE_DEFAULT_TIMEOUT"] app.config["CACHE_DEFAULT_TIMEOUT"] = 100 app.config["DATA_CACHE_CONFIG"] = { "CACHE_TYPE": "simple", @@ -92,4 +93,5 @@ def test_slice_data_cache(self): # reset cache config app.config["DATA_CACHE_CONFIG"] = data_cache_config + app.config["CACHE_DEFAULT_TIMEOUT"] = cache_default_timeout cache_manager.init_app(app) diff --git a/tests/viz_tests.py b/tests/viz_tests.py index 1dffdcd2adcc3..09fd3a7c9187e 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -163,9 +163,20 @@ def test_cache_timeout(self): datasource.database.cache_timeout = 1666 self.assertEqual(1666, test_viz.cache_timeout) + datasource.database.cache_timeout = None + test_viz = viz.BaseViz(datasource, form_data={}) + self.assertEqual( + app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"], + test_viz.cache_timeout, + ) + + data_cache_timeout = app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"] + app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"] = None datasource.database.cache_timeout = None test_viz = viz.BaseViz(datasource, form_data={}) self.assertEqual(app.config["CACHE_DEFAULT_TIMEOUT"], test_viz.cache_timeout) + # restore DATA_CACHE_CONFIG timeout + app.config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"] = data_cache_timeout class TestTableViz(SupersetTestCase): From c72b4c6cd2838ef6f75fd19ddc4267bb4075dca6 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 13:15:33 -0800 Subject: [PATCH 37/42] asyncEvent_spec.js -> asyncEvent_spec.ts --- .../middleware/{asyncEvent_spec.js => asyncEvent_spec.ts} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename superset-frontend/spec/javascripts/middleware/{asyncEvent_spec.js => asyncEvent_spec.ts} (99%) diff --git a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts similarity index 99% rename from superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js rename to superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts index 2dee5d30dca79..6e235f9fe1a4e 100644 --- a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.js +++ b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts @@ -64,8 +64,8 @@ describe('asyncEvent middleware', () => { }; const EVENTS_ENDPOINT = 'glob:*/api/v1/async_event/*'; const CACHED_DATA_ENDPOINT = 'glob:*/api/v1/chart/data/*'; - let featureEnabledStub; - let getFeatureStub; + let featureEnabledStub: any; + let getFeatureStub: any; function setup() { const getPendingComponents = sinon.stub(); From 1fb748945747d4491871e3c49760ec5610bf12ff Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 14:22:02 -0800 Subject: [PATCH 38/42] Refactor flakey Python 3.7 mock assertions --- tests/async_events/api_tests.py | 38 +++++++++--------------- tests/tasks/async_queries_tests.py | 46 +++++++++--------------------- 2 files changed, 27 insertions(+), 57 deletions(-) diff --git a/tests/async_events/api_tests.py b/tests/async_events/api_tests.py index ccf980ce77a4c..04d838b97b0a0 100644 --- a/tests/async_events/api_tests.py +++ b/tests/async_events/api_tests.py @@ -24,46 +24,41 @@ class TestAsyncEventApi(SupersetTestCase): + UUID = "943c920-32a5-412a-977d-b8e47d36f5a4" + def fetch_events(self, last_id: Optional[str] = None): base_uri = "api/v1/async_event/" uri = f"{base_uri}?last_id={last_id}" if last_id else base_uri return self.client.get(uri) - def test_events(self): + @mock.patch("uuid.uuid4", return_value=UUID) + def test_events(self, mock_uuid4): async_query_manager.init_app(app) self.login(username="admin") with mock.patch.object(async_query_manager._redis, "xrange") as mock_xrange: rv = self.fetch_events() response = json.loads(rv.data.decode("utf-8")) - args = mock_xrange.call_args.args - prefix_len = len(app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"]) assert rv.status_code == 200 - assert mock_xrange.called - assert len(args[0]) == (prefix_len + 36) # uuidv4 - assert args[1] == "-" - assert args[2] == "+" - assert args[3] == 100 + channel_id = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] + self.UUID + mock_xrange.assert_called_with(channel_id, "-", "+", 100) self.assertEqual(response, {"result": []}) - def test_events_last_id(self): + @mock.patch("uuid.uuid4", return_value=UUID) + def test_events_last_id(self, mock_uuid4): async_query_manager.init_app(app) self.login(username="admin") with mock.patch.object(async_query_manager._redis, "xrange") as mock_xrange: rv = self.fetch_events("1607471525180-0") response = json.loads(rv.data.decode("utf-8")) - args = mock_xrange.call_args.args - prefix_len = len(app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"]) assert rv.status_code == 200 - assert mock_xrange.called - assert len(args[0]) == (prefix_len + 36) # uuidv4 - assert args[1] == "1607471525180-1" # increments - assert args[2] == "+" - assert args[3] == 100 + channel_id = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] + self.UUID + mock_xrange.assert_called_with(channel_id, "1607471525180-1", "+", 100) self.assertEqual(response, {"result": []}) - def test_events_results(self): + @mock.patch("uuid.uuid4", return_value=UUID) + def test_events_results(self, mock_uuid4): async_query_manager.init_app(app) self.login(username="admin") with mock.patch.object(async_query_manager._redis, "xrange") as mock_xrange: @@ -83,15 +78,10 @@ def test_events_results(self): ] rv = self.fetch_events() response = json.loads(rv.data.decode("utf-8")) - args = mock_xrange.call_args.args - prefix_len = len(app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"]) assert rv.status_code == 200 - assert mock_xrange.called - assert len(args[0]) == (prefix_len + 36) # uuidv4 - assert args[1] == "-" - assert args[2] == "+" - assert args[3] == 100 + channel_id = app.config["GLOBAL_ASYNC_QUERIES_REDIS_STREAM_PREFIX"] + self.UUID + mock_xrange.assert_called_with(channel_id, "-", "+", 100) expected = { "result": [ { diff --git a/tests/tasks/async_queries_tests.py b/tests/tasks/async_queries_tests.py index 89d41a9d67e34..6fe2e7c319bd1 100644 --- a/tests/tasks/async_queries_tests.py +++ b/tests/tasks/async_queries_tests.py @@ -16,7 +16,7 @@ # under the License. """Unit tests for async query celery jobs in Superset""" import re -from unittest.mock import patch +from unittest import mock from uuid import uuid4 import pytest @@ -42,7 +42,7 @@ def get_table_by_name(name: str) -> SqlaTable: class TestAsyncQueries(SupersetTestCase): - @patch.object(async_query_manager, "update_job") + @mock.patch.object(async_query_manager, "update_job") def test_load_chart_data_into_cache(self, mock_update_job): async_query_manager.init_app(app) table = get_table_by_name("birth_names") @@ -57,16 +57,12 @@ def test_load_chart_data_into_cache(self, mock_update_job): load_chart_data_into_cache(job_metadata, form_data) - assert mock_update_job.called - call_args = mock_update_job.call_args - self.assertEqual(call_args.args[0], job_metadata) - assert call_args.args[1] == "done" - assert re.match(r"^/api/v1/chart/data/qc-\w+", call_args.kwargs["result_url"]) + mock_update_job.assert_called_with(job_metadata, "done", result_url=mock.ANY) - @patch.object( + @mock.patch.object( ChartDataCommand, "run", side_effect=ChartDataQueryFailedError("Error: foo") ) - @patch.object(async_query_manager, "update_job") + @mock.patch.object(async_query_manager, "update_job") def test_load_chart_data_into_cache_error(self, mock_update_job, mock_run_command): async_query_manager.init_app(app) table = get_table_by_name("birth_names") @@ -81,16 +77,11 @@ def test_load_chart_data_into_cache_error(self, mock_update_job, mock_run_comman with pytest.raises(ChartDataQueryFailedError): load_chart_data_into_cache(job_metadata, form_data) - assert mock_run_command.called - assert mock_run_command.call_args.kwargs["cache"] == True + mock_run_command.assert_called_with(cache=True) + errors = [{"message": "Error: foo"}] + mock_update_job.assert_called_with(job_metadata, "error", errors=errors) - assert mock_update_job.called - call_args = mock_update_job.call_args - self.assertEqual(call_args.args[0], job_metadata) - assert call_args.args[1] == "error" - assert call_args.kwargs["errors"] == [{"message": "Error: foo"}] - - @patch.object(async_query_manager, "update_job") + @mock.patch.object(async_query_manager, "update_job") def test_load_explore_json_into_cache(self, mock_update_job): async_query_manager.init_app(app) table = get_table_by_name("birth_names") @@ -120,15 +111,9 @@ def test_load_explore_json_into_cache(self, mock_update_job): load_explore_json_into_cache(job_metadata, form_data) - assert mock_update_job.called - call_args = mock_update_job.call_args - self.assertEqual(call_args.args[0], job_metadata) - assert call_args.args[1] == "done" - assert re.match( - r"^/superset/explore_json/data/ejr-\w+", call_args.kwargs["result_url"] - ) + mock_update_job.assert_called_with(job_metadata, "done", result_url=mock.ANY) - @patch.object(async_query_manager, "update_job") + @mock.patch.object(async_query_manager, "update_job") def test_load_explore_json_into_cache_error(self, mock_update_job): async_query_manager.init_app(app) form_data = {} @@ -143,10 +128,5 @@ def test_load_explore_json_into_cache_error(self, mock_update_job): with pytest.raises(SupersetException): load_explore_json_into_cache(job_metadata, form_data) - assert mock_update_job.called - call_args = mock_update_job.call_args - self.assertEqual(call_args.args[0], job_metadata) - assert call_args.args[1] == "error" - assert call_args.kwargs["errors"] == [ - "The datasource associated with this chart no longer exists" - ] + errors = ["The datasource associated with this chart no longer exists"] + mock_update_job.assert_called_with(job_metadata, "error", errors=errors) From 024da76e6234e8172344a743f3fae90fc64cbbf0 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 15:46:18 -0800 Subject: [PATCH 39/42] Fix another shared state issue in Py tests --- tests/charts/api_tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/charts/api_tests.py b/tests/charts/api_tests.py index 6f5ff9cb1e0fa..64817141ea8f8 100644 --- a/tests/charts/api_tests.py +++ b/tests/charts/api_tests.py @@ -34,7 +34,7 @@ from superset.charts.commands.data import ChartDataCommand from superset.connectors.connector_registry import ConnectorRegistry from superset.connectors.sqla.models import SqlaTable -from superset.extensions import async_query_manager, db, security_manager +from superset.extensions import async_query_manager, cache_manager, db, security_manager from superset.models.annotations import AnnotationLayer from superset.models.core import Database, FavStar, FavStarClassName from superset.models.dashboard import Dashboard @@ -100,6 +100,12 @@ def insert_chart( db.session.commit() return slice + @pytest.fixture(autouse=True) + def clear_data_cache(self): + with app.app_context(): + cache_manager.data_cache.clear() + yield + @pytest.fixture() def create_charts(self): with self.create_app().app_context(): From 887754bf03ff5ea5d164f4cb1b89673ac94b1a9c Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 17:01:30 -0800 Subject: [PATCH 40/42] Use 'sub' claim in JWT for user_id --- superset/utils/async_query_manager.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index fe73d717bb15c..42d2c130bddb7 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -124,9 +124,8 @@ def validate_session( # pylint: disable=unused-variable session["async_channel_id"] = async_channel_id session["async_user_id"] = user_id - token = self.generate_jwt( - {"channel": async_channel_id, "user_id": user_id} - ) + sub = str(user_id) if user_id else None + token = self.generate_jwt({"channel": async_channel_id, "sub": sub}) response.set_cookie( self._jwt_cookie_name, From 601ec51cdcf7003d75fd68935abe3605b8932f16 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 18:57:31 -0800 Subject: [PATCH 41/42] Refactor async middleware config --- .../javascripts/middleware/asyncEvent_spec.ts | 16 ++++++++++------ superset-frontend/src/dashboard/index.jsx | 1 + superset-frontend/src/explore/index.jsx | 1 + superset-frontend/src/featureFlags.ts | 5 ----- superset-frontend/src/middleware/asyncEvent.ts | 12 ++++++++---- superset/config.py | 7 ++++--- superset/views/base.py | 2 ++ 7 files changed, 26 insertions(+), 18 deletions(-) diff --git a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts index 6e235f9fe1a4e..e42ac9152f3c0 100644 --- a/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts +++ b/superset-frontend/spec/javascripts/middleware/asyncEvent_spec.ts @@ -64,8 +64,11 @@ describe('asyncEvent middleware', () => { }; const EVENTS_ENDPOINT = 'glob:*/api/v1/async_event/*'; const CACHED_DATA_ENDPOINT = 'glob:*/api/v1/chart/data/*'; + const config = { + GLOBAL_ASYNC_QUERIES_TRANSPORT: 'polling', + GLOBAL_ASYNC_QUERIES_POLLING_DELAY: 500, + }; let featureEnabledStub: any; - let getFeatureStub: any; function setup() { const getPendingComponents = sinon.stub(); @@ -99,16 +102,11 @@ describe('asyncEvent middleware', () => { }); featureEnabledStub = sinon.stub(featureFlags, 'isFeatureEnabled'); featureEnabledStub.withArgs('GLOBAL_ASYNC_QUERIES').returns(true); - getFeatureStub = sinon.stub(featureFlags, 'getFeatureFlag'); - getFeatureStub - .withArgs('GLOBAL_ASYNC_QUERIES_OPTIONS') - .returns({ transport: 'polling', polling_delay: 250 }); }); afterEach(() => { fetchMock.reset(); next.resetHistory(); featureEnabledStub.restore(); - getFeatureStub.restore(); }); afterAll(fetchMock.reset); @@ -116,6 +114,7 @@ describe('asyncEvent middleware', () => { const { getPendingComponents, successAction, errorAction } = setup(); getPendingComponents.returns([]); const asyncEventMiddleware = initAsyncEvents({ + config, getPendingComponents, successAction, errorAction, @@ -134,6 +133,7 @@ describe('asyncEvent middleware', () => { } = setup(); getPendingComponents.returns(Object.values(state.charts)); const asyncEventMiddleware = initAsyncEvents({ + config, getPendingComponents, successAction, errorAction, @@ -166,6 +166,7 @@ describe('asyncEvent middleware', () => { }); getPendingComponents.returns(Object.values(state.charts)); const asyncEventMiddleware = initAsyncEvents({ + config, getPendingComponents, successAction, errorAction, @@ -200,6 +201,7 @@ describe('asyncEvent middleware', () => { }); getPendingComponents.returns(Object.values(state.charts)); const asyncEventMiddleware = initAsyncEvents({ + config, getPendingComponents, successAction, errorAction, @@ -230,6 +232,7 @@ describe('asyncEvent middleware', () => { }); getPendingComponents.returns(Object.values(state.charts)); const asyncEventMiddleware = initAsyncEvents({ + config, getPendingComponents, successAction, errorAction, @@ -250,6 +253,7 @@ describe('asyncEvent middleware', () => { const { getPendingComponents, successAction, errorAction } = setup(); getPendingComponents.returns(Object.values(state.charts)); const asyncEventMiddleware = initAsyncEvents({ + config, getPendingComponents, successAction, errorAction, diff --git a/superset-frontend/src/dashboard/index.jsx b/superset-frontend/src/dashboard/index.jsx index bbbd0172c7eef..9fe82346c3247 100644 --- a/superset-frontend/src/dashboard/index.jsx +++ b/superset-frontend/src/dashboard/index.jsx @@ -36,6 +36,7 @@ initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); const asyncEventMiddleware = initAsyncEvents({ + config: bootstrapData.common.conf, getPendingComponents: ({ charts }) => Object.values(charts).filter(c => c.chartStatus === 'loading'), successAction: (componentId, componentData) => diff --git a/superset-frontend/src/explore/index.jsx b/superset-frontend/src/explore/index.jsx index 61082a7bb639e..83e4bc63dc9ca 100644 --- a/superset-frontend/src/explore/index.jsx +++ b/superset-frontend/src/explore/index.jsx @@ -38,6 +38,7 @@ initFeatureFlags(bootstrapData.common.feature_flags); const initState = getInitialState(bootstrapData); const asyncEventMiddleware = initAsyncEvents({ + config: bootstrapData.common.conf, getPendingComponents: ({ charts }) => Object.values(charts).filter(c => c.chartStatus === 'loading'), successAction: (componentId, componentData) => diff --git a/superset-frontend/src/featureFlags.ts b/superset-frontend/src/featureFlags.ts index 5a317fb30089f..1da39a768323d 100644 --- a/superset-frontend/src/featureFlags.ts +++ b/superset-frontend/src/featureFlags.ts @@ -35,7 +35,6 @@ export enum FeatureFlag { ESCAPE_MARKDOWN_HTML = 'ESCAPE_MARKDOWN_HTML', VERSIONED_EXPORT = 'VERSIONED_EXPORT', GLOBAL_ASYNC_QUERIES = 'GLOBAL_ASYNC_QUERIES', - GLOBAL_ASYNC_QUERIES_OPTIONS = 'GLOBAL_ASYNC_QUERIES_OPTIONS', } export type FeatureFlagMap = { @@ -60,7 +59,3 @@ export function initFeatureFlags(featureFlags: FeatureFlagMap) { export function isFeatureEnabled(feature: FeatureFlag) { return window && window.featureFlags && !!window.featureFlags[feature]; } - -export function getFeatureFlag(feature: FeatureFlag) { - return window.featureFlags[feature]; -} diff --git a/superset-frontend/src/middleware/asyncEvent.ts b/superset-frontend/src/middleware/asyncEvent.ts index 36ff039618971..637bb1b38d84f 100644 --- a/superset-frontend/src/middleware/asyncEvent.ts +++ b/superset-frontend/src/middleware/asyncEvent.ts @@ -19,7 +19,7 @@ import { Middleware, MiddlewareAPI, Dispatch } from 'redux'; import { makeApi, SupersetClient } from '@superset-ui/core'; import { SupersetError } from 'src/components/ErrorMessage/types'; -import { getFeatureFlag, isFeatureEnabled, FeatureFlag } from '../featureFlags'; +import { isFeatureEnabled, FeatureFlag } from '../featureFlags'; import { getClientErrorObject, parseErrorJson, @@ -36,6 +36,10 @@ export type AsyncEvent = { }; type AsyncEventOptions = { + config: { + GLOBAL_ASYNC_QUERIES_TRANSPORT: string; + GLOBAL_ASYNC_QUERIES_POLLING_DELAY: number; + }; getPendingComponents: (state: any) => any[]; successAction: (componentId: number, componentData: any) => { type: string }; errorAction: (componentId: number, response: any) => { type: string }; @@ -51,15 +55,15 @@ type CachedDataResponse = { const initAsyncEvents = (options: AsyncEventOptions) => { // TODO: implement websocket support const TRANSPORT_POLLING = 'polling'; - const config = getFeatureFlag(FeatureFlag.GLOBAL_ASYNC_QUERIES_OPTIONS) || {}; - const transport = config.transport || TRANSPORT_POLLING; - const polling_delay = config.polling_delay || 500; const { + config, getPendingComponents, successAction, errorAction, processEventsCallback, } = options; + const transport = config.GLOBAL_ASYNC_QUERIES_TRANSPORT || TRANSPORT_POLLING; + const polling_delay = config.GLOBAL_ASYNC_QUERIES_POLLING_DELAY || 500; const middleware: Middleware = (store: MiddlewareAPI) => (next: Dispatch) => { const JOB_STATUS = { diff --git a/superset/config.py b/superset/config.py index 7f44b1fb521aa..f20cbff716d98 100644 --- a/superset/config.py +++ b/superset/config.py @@ -297,7 +297,7 @@ def _try_json_readsha( # pylint: disable=unused-argument # For example, DEFAULT_FEATURE_FLAGS = { 'FOO': True, 'BAR': False } here # and FEATURE_FLAGS = { 'BAR': True, 'BAZ': True } in superset_config.py # will result in combined feature flags of { 'FOO': True, 'BAR': True, 'BAZ': True } -DEFAULT_FEATURE_FLAGS: Dict[str, Any] = { +DEFAULT_FEATURE_FLAGS: Dict[str, bool] = { # allow dashboard to use sub-domains to send chart request # you also need ENABLE_CORS and # SUPERSET_WEBSERVER_DOMAINS for list of domains @@ -328,7 +328,6 @@ def _try_json_readsha( # pylint: disable=unused-argument # When True, this escapes HTML (rather than rendering it) in Markdown components "ESCAPE_MARKDOWN_HTML": False, "GLOBAL_ASYNC_QUERIES": False, - "GLOBAL_ASYNC_QUERIES_OPTIONS": {"transport": "polling", "polling_delay": 250}, "VERSIONED_EXPORT": False, # Note that: RowLevelSecurityFilter is only given by default to the Admin role # and the Admin Role does have the all_datasources security permission. @@ -350,7 +349,7 @@ def _try_json_readsha( # pylint: disable=unused-argument DEFAULT_FEATURE_FLAGS["LISTVIEWS_DEFAULT_CARD_VIEW"] = True # This is merely a default. -FEATURE_FLAGS: Dict[str, Any] = {} +FEATURE_FLAGS: Dict[str, bool] = {} # A function that receives a dict of all feature flags # (DEFAULT_FEATURE_FLAGS merged with FEATURE_FLAGS) @@ -984,6 +983,8 @@ class CeleryConfig: # pylint: disable=too-few-public-methods GLOBAL_ASYNC_QUERIES_JWT_COOKIE_NAME = "async-token" GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SECURE = False GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me" +GLOBAL_ASYNC_QUERIES_TRANSPORT = "polling" +GLOBAL_ASYNC_QUERIES_POLLING_DELAY = 500 if CONFIG_PATH_ENV_VAR in os.environ: # Explicitly import config module that is not necessarily in pythonpath; useful diff --git a/superset/views/base.py b/superset/views/base.py index 2f87a81f97393..9ed65222bc1a3 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -77,6 +77,8 @@ "SUPERSET_WEBSERVER_DOMAINS", "SQLLAB_SAVE_WARNING_MESSAGE", "DISPLAY_MAX_ROW", + "GLOBAL_ASYNC_QUERIES_TRANSPORT", + "GLOBAL_ASYNC_QUERIES_POLLING_DELAY", ) logger = logging.getLogger(__name__) From df673cff9b84913e04c22ecd64342d67decec637 Mon Sep 17 00:00:00 2001 From: Rob DiCiuccio Date: Thu, 10 Dec 2020 19:36:37 -0800 Subject: [PATCH 42/42] Fixup: restore FeatureFlag boolean type --- superset-frontend/src/featureFlags.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset-frontend/src/featureFlags.ts b/superset-frontend/src/featureFlags.ts index 1da39a768323d..93b909ddcb3b1 100644 --- a/superset-frontend/src/featureFlags.ts +++ b/superset-frontend/src/featureFlags.ts @@ -38,7 +38,7 @@ export enum FeatureFlag { } export type FeatureFlagMap = { - [key in FeatureFlag]?: any; + [key in FeatureFlag]?: boolean; }; // eslint-disable-next-line @typescript-eslint/no-unused-vars