Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(dashboard_rbac): dashboard extra jwt #13773

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions superset-frontend/src/chart/chartAction.js
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,8 @@ export function redirectSQLLab(formData) {
export function refreshChart(chartKey, force, dashboardId) {
return (dispatch, getState) => {
const chart = (getState().charts || {})[chartKey];
const timeout = getState().dashboardInfo.common.conf
.SUPERSET_WEBSERVER_TIMEOUT;
const { dashboardInfo } = getState();
const timeout = dashboardInfo.common.conf.SUPERSET_WEBSERVER_TIMEOUT;

if (
!chart.latestQueryFormData ||
Expand Down
1 change: 1 addition & 0 deletions superset-frontend/src/dashboard/containers/Chart.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ function mapStateToProps(
});

formData.dashboardId = dashboardInfo.id;
formData.extra_jwt = dashboardInfo.extraJwt;

return {
chart,
Expand Down
10 changes: 9 additions & 1 deletion superset-frontend/src/dashboard/reducers/getInitialState.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,14 @@ import newComponentFactory from '../util/newComponentFactory';
import { TIME_RANGE } from '../../visualizations/FilterBox/FilterBox';

export default function getInitialState(bootstrapData) {
const { user_id, datasources, common, editMode, urlParams } = bootstrapData;
const {
user_id,
datasources,
common,
editMode,
urlParams,
extra_jwt,
} = bootstrapData;

const dashboard = { ...bootstrapData.dashboard_data };
let preselectFilters = {};
Expand Down Expand Up @@ -283,6 +290,7 @@ export default function getInitialState(bootstrapData) {
conf: common.conf,
},
lastModifiedTime: dashboard.last_modified_time,
extraJwt: extra_jwt,
},
dashboardFilters,
nativeFilters,
Expand Down
10 changes: 8 additions & 2 deletions superset/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
cache_manager,
celery_app,
csrf,
dashboard_jwt_manager,
db,
feature_flag_manager,
machine_auth_provider_factory,
Expand Down Expand Up @@ -69,13 +70,13 @@ def create_app() -> Flask:
raise ex


class SupersetIndexView(IndexView):
class SupersetIndexView(IndexView): # pylint: disable=too-few-public-methods
@expose("/")
def index(self) -> FlaskResponse:
return redirect("/superset/welcome/")


class SupersetAppInitializer:
class SupersetAppInitializer: # pylint: disable=too-many-public-methods
def __init__(self, app: Flask) -> None:
super().__init__()

Expand Down Expand Up @@ -534,6 +535,7 @@ def init_app_in_ctx(self) -> None:
self.configure_data_sources()
self.configure_auth_provider()
self.configure_async_queries()
self.configure_dashboard_jwt()

# Hook that provides administrators a handle on the Flask APP
# after initialization
Expand Down Expand Up @@ -698,3 +700,7 @@ def register_blueprints(self) -> None:

def setup_bundle_manifest(self) -> None:
manifest_processor.init_app(self.flask_app)

def configure_dashboard_jwt(self) -> None:
if feature_flag_manager.is_feature_enabled("DASHBOARD_RBAC"):
dashboard_jwt_manager.init_app(self.flask_app)
8 changes: 8 additions & 0 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,14 @@ class ChartDataQueryContextSchema(Schema):

result_type = EnumField(ChartDataResultType, by_value=True)
result_format = EnumField(ChartDataResultFormat, by_value=True)
extra_jwt = fields.String(
required=False,
allow_none=True,
description="represents a security jwt that was "
"originally generated by the backend in "
"order to allow temporary access for that "
"chart data",
)

# pylint: disable=no-self-use,unused-argument
@post_load
Expand Down
5 changes: 4 additions & 1 deletion superset/common/query_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
logger = logging.getLogger(__name__)


class QueryContext:
class QueryContext: # pylint: disable=too-many-instance-attributes
"""
The query context contains the query object and additional fields necessary
to retrieve the data payload for a given viz.
Expand All @@ -70,6 +70,7 @@ class QueryContext:
custom_cache_timeout: Optional[int]
result_type: ChartDataResultType
result_format: ChartDataResultFormat
extra_jwt: Optional[str]

# TODO: Type datasource and query_object dictionary with TypedDict when it becomes
# a vanilla python type https://github.com/python/mypy/issues/5288
Expand All @@ -81,6 +82,7 @@ def __init__( # pylint: disable=too-many-arguments
custom_cache_timeout: Optional[int] = None,
result_type: Optional[ChartDataResultType] = None,
result_format: Optional[ChartDataResultFormat] = None,
extra_jwt: Optional[str] = None,
) -> None:
self.datasource = ConnectorRegistry.get_datasource(
str(datasource["type"]), int(datasource["id"]), db.session
Expand All @@ -96,6 +98,7 @@ def __init__( # pylint: disable=too-many-arguments
"result_type": self.result_type,
"result_format": self.result_format,
}
self.extra_jwt = extra_jwt

def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
"""Returns a pandas dataframe based on the query object"""
Expand Down
4 changes: 4 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,10 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
SQLALCHEMY_DOCS_URL = "https://docs.sqlalchemy.org/en/13/core/engines.html"
SQLALCHEMY_DISPLAY_TEXT = "SQLAlchemy docs"

# This secret is used to sign dashboard context in order to manage data access when
# using DASHBOARD_RBAC feature flag
DASHBOARD_JWT_SECRET = None

# -------------------------------------------------------------------
# * WARNING: STOP EDITING HERE *
# -------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions superset/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from superset.utils.async_query_manager import AsyncQueryManager
from superset.utils.cache_manager import CacheManager
from superset.utils.dashboard_jwt_manager import DashboardJwtManager
from superset.utils.feature_flag_manager import FeatureFlagManager
from superset.utils.machine_auth import MachineAuthProviderFactory

Expand Down Expand Up @@ -100,6 +101,7 @@ def get_manifest_files(self, bundle: str, asset_type: str) -> List[str]:
appbuilder = AppBuilder(update_perms=False)
async_query_manager = AsyncQueryManager()
cache_manager = CacheManager()
dashboard_jwt_manager = DashboardJwtManager()
celery_app = celery.Celery()
csrf = CSRFProtect()
db = SQLA()
Expand Down
19 changes: 17 additions & 2 deletions superset/security/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from superset.constants import RouteMethod
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
from superset.extensions import dashboard_jwt_manager, feature_flag_manager
from superset.utils.core import DatasourceName, RowLevelSecurityFilterType

if TYPE_CHECKING:
Expand Down Expand Up @@ -923,7 +924,8 @@ def set_perm( # pylint: disable=no-self-use,unused-argument
)
)

def raise_for_access( # pylint: disable=too-many-arguments,too-many-branches
def raise_for_access( # pylint: disable=too-many-arguments,too-many-branches,
# pylint: disable=too-many-locals
self,
database: Optional["Database"] = None,
datasource: Optional["BaseDatasource"] = None,
Expand Down Expand Up @@ -987,15 +989,28 @@ def raise_for_access( # pylint: disable=too-many-arguments,too-many-branches
)

if datasource or query_context or viz:
extra_jwt = None
if query_context:
datasource = query_context.datasource
extra_jwt = query_context.extra_jwt
elif viz:
datasource = viz.datasource
extra_jwt = viz.extra_jwt

assert datasource

ds_allowed_in_dashboard = False
if feature_flag_manager.is_feature_enabled("DASHBOARD_RBAC"):
dashboard_data_context = dashboard_jwt_manager.parse_jwt(extra_jwt)

if dashboard_data_context:
ds_allowed_in_dashboard = (
datasource.id in dashboard_data_context.dataset_ids
)

if not (
self.can_access_schema(datasource)
ds_allowed_in_dashboard
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ktmud @suddjian upon each chart data query request (legacy or v1 API) this part is being called of validating from extracting the signed jwt if that dataset is being called from a queryContext /Viz

Only dashboard owner/admin an give dashboard access (see the video)

Let's assume that daahboard access is being granted to a read only persona

And to that persona explore chart permission is not granted as well

So that type of a user can only see data under the dashboard
He cannot edit the dataset or play around with it in the explore
In theory he could use that jwt to query the dataset freely in SQLlab(if he permission to sqllab to begin with)

From a dashboard point of view is up to the dashboard creator to wrap data with the relevant datasets(either physical or virtual) that will restrict the readonly personas to what is relevant for them

If there is a huge physical dataset that might expose sensitive data then you can always have a thiner portion of it by creating a virtual one

How would you suggest make that flow secure?


@suddjian I would be happy to do a follow up PR just to support SPA , no worries there

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you elaborate more on why you need users without direct access to certain datasets to have access to that dataset through dashboards? Can you just create virtual datasets and grant them read access to the dataset instead?

Copy link
Member Author

@amitmiran137 amitmiran137 Apr 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in our org every dashboard might serve different set of users which basically means a separated role per dashboard.
so managing access for every dataset on every dashboard is a cumbersome work.
it will require managing all datasets permissions for every role.
this problem scale when having hundreds of dashboards.

additionally, dataset permission mechanism has a defect , when changing a virtual dataset name then permission name changes as well but if it is already associated to a role you'll need to remove it and then re-associate bc permission is based on the name of the dataset and not by the id

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be happy to do a follow up PR just to support SPA , no worries there

In the SPA project we have already done the work to remove dashboard-specific bootstrap data so that the apps can be cleanly merged. I think rather than adding new bootstrap properties at this stage, it would be better to use an endpoint. That said, if you're okay with this feature breaking in the very near future, I won't stand in your way. The compatibility discussion is a relatively minor issue.

I think the security concerns have already been discussed somewhat in SIP-51. Anyone using the feature flag should understand that dashboard owners have the ability to grant data access to others arbitrarily. These effects should be well documented. It's an entirely different security profile for the application.

I am of the opinion that the security manager should make a database query to find the information it needs, rather than using this new JWT pattern. I haven't seen anything that indicates that a JWT is actually required here, though I could be mistaken.

Introducing new authorization patterns specific to a given feature flag will make it more difficult to onboard into the Superset codebase, as well as making it more complicated to change system behavior. The JWT pattern also makes it complicated to revoke or change a user's access, and introduces a new responsibility for the client to correctly manage the JWT.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The jwt and it's data indicates that the request indeed coming from the Dashboard and with the nessecery dataset permission.

Otherwise upon chart data request it would be impossible to know if the request does originates from a dashboard or not

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you could write a query selecting dashboards the user can access which use the dataset that's being requested, no?

Something akin to:

query = query(BaseDatasource).join(Slice).join(Dashboard).filter(BaseDatasource.id == datasource.id)
query = DashboardFilter("id", SQLAInterface(Dashboard, db.session)).apply(query)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if you want to publish a dashboard to some users, but a subset of these users cannot have access to a couple of more sensitive charts in the dashboard? If seems by turning on the DASHBOARD_RBAC flag, there is no way to accommodate this case, unless you add some kind of dashboard level metadata to indicate this dashboard should not issue a JWT.

Copy link
Member Author

@amitmiran137 amitmiran137 Apr 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this use case we would probably just have 2 separated Dashboards
Just bc there no sense of providing dashboard to a client and show him only have of charts, that is a very bad UX don't you think ? 💆

I know Air BnB have this portion of a dashboard use case
But for us it just doesnt makes sense 🙅

Our Dashboards are for external clients , try to explain to them why they see only half

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say even your external clients may have people with different access levels. Maybe not now, but it's possible.

Sometimes you want a single dashboard for all audience because it's easier to navigate and easier to communicate---you can share the same short link to all users and executives don't have to jump between two dashboards just to see the more sensitive data. You may also have two duplicate dashboards with 80% of the content the same, but it'd gradually become difficult to maintain.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we need a "nested dashboard" viz plugin 😁

or self.can_access_schema(datasource)
or self.can_access("datasource_access", datasource.perm or "")
):
raise SupersetSecurityException(
Expand Down
53 changes: 53 additions & 0 deletions superset/utils/dashboard_jwt_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import List, Optional

import jwt
from flask import Flask


class DashboardJwtDataObject: # pylint: disable=too-few-public-methods
dashboard_id: int
dataset_ids: List[int]

def __init__(self, dashboard_id: int, dataset_ids: List[int]) -> None:
super().__init__()
self.dashboard_id = dashboard_id
self.dataset_ids = dataset_ids


class DashboardJwtManager:
def __init__(self) -> None:
super().__init__()
self._jwt_secret: str

def init_app(self, app: Flask) -> None:
config = app.config

self._jwt_secret = config["DASHBOARD_JWT_SECRET"]

def generate_jwt(self, data: DashboardJwtDataObject) -> str:
encoded_jwt = jwt.encode(data.__dict__, self._jwt_secret, algorithm="HS256")
return encoded_jwt.decode("utf-8")

def parse_jwt(self, token: Optional[str]) -> Optional[DashboardJwtDataObject]:
if token:
data = jwt.decode(token, self._jwt_secret, algorithms=["HS256"])
return DashboardJwtDataObject(
dashboard_id=data["id"], dataset_ids=data["dataset_ids"]
)
return None
22 changes: 21 additions & 1 deletion superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@
SupersetTemplateParamsErrorException,
SupersetTimeoutException,
)
from superset.extensions import async_query_manager, cache_manager
from superset.extensions import (
async_query_manager,
cache_manager,
dashboard_jwt_manager,
feature_flag_manager,
)
from superset.jinja_context import get_template_processor
from superset.models.core import Database, FavStar, Log
from superset.models.dashboard import Dashboard
Expand All @@ -102,6 +107,7 @@
from superset.utils.async_query_manager import AsyncQueryTokenException
from superset.utils.cache import etag_cache
from superset.utils.core import ReservedUrlParameters
from superset.utils.dashboard_jwt_manager import DashboardJwtDataObject
from superset.utils.dates import now_as_float
from superset.utils.decorators import check_dashboard_access
from superset.views.base import (
Expand Down Expand Up @@ -1869,6 +1875,19 @@ def dashboard( # pylint: disable=too-many-locals
if key not in [param.value for param in utils.ReservedUrlParameters]
}

extra_jwt = {}
if feature_flag_manager.is_feature_enabled("DASHBOARD_RBAC"):
Copy link
Member

@suddjian suddjian Mar 31, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a project underway to make Dashboards part of a single page app. This code will only work if dashboards are accessed from this route, but in a SPA that will not necessarily be the case. If you click a link to a dashboard from the dashboards list or the homepage, the routing will be done on the frontend. Maybe this code should be moved to a separate endpoint that the frontend calls to get the jwt.

(see also #13306)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also not really clear on why the jwt is needed. Some more background info would be helpful.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok so the jwt is being used to manage read data access of charts that exists on a dashboard implicitly just by having a access to the dashboard

This really upgrade the dashboard access RBAC mechanism by also giving Temporary access to datasets that are being used within the dash lard without the need to explicitly giving permission to those datasets

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm totally aware of SPA project and I plan to also support and implement the dashboard jwt feature under that as well

But for now until SPA maturity I want to to be supported in the old style

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm expecting to have a PR up that removes dashboard as a standalone app this week, so I think compatibility is an important consideration now.

What about adding logic to the security of the chart data and dataset endpoints, to check whether access should be granted at the time of request? Is that an option? Sorry if I'm missing something, I'd just like to fully understand the need for this JWT pattern. Is the time limit for access a feature requirement or an implementation detail?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little bit wary of the security implication of allowing dashboard readers to have access to charts and datasets they explicitly don't have access to.

What if someone creates a dashboard with charts that use datasets they don't have access to, but add themselves to the allowed list of roles? What is the access control flow look like? Would you restrict who can change dashboard roles only to those who have admin access to the underlying datasets? Because if someone has view access to a dataset or edit access to a dashboard, it doesn't mean they can just publish this dataset or the content of this dashboard to any audience.

extra_jwt = {
"extra_jwt": dashboard_jwt_manager.generate_jwt(
DashboardJwtDataObject(
dashboard.id,
list(
map(lambda datasource: datasource.id, dashboard.datasources)
),
)
)
}

bootstrap_data = {
"user_id": g.user.get_id(),
"common": common_bootstrap_payload(),
Expand All @@ -1883,6 +1902,7 @@ def dashboard( # pylint: disable=too-many-locals
"superset_can_csv": superset_can_csv,
"slice_can_edit": slice_can_edit,
},
**extra_jwt,
"datasources": data["datasources"],
}

Expand Down
1 change: 1 addition & 0 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def __init__(
force: bool = False,
force_cached: bool = False,
) -> None:
self.extra_jwt = form_data.get("extra_jwt") or None
if not datasource:
raise QueryObjectValidationError(_("Viz is missing a datasource"))

Expand Down
11 changes: 10 additions & 1 deletion tests/dashboards/security/security_rbac_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import pytest

from superset.extensions import dashboard_jwt_manager
from tests.dashboards.dashboard_test_utils import *
from tests.dashboards.security.base_case import BaseTestDashboardSecurity
from tests.dashboards.superset_factory_util import (
Expand All @@ -28,6 +29,7 @@
create_slice_to_db,
)
from tests.fixtures.public_role import public_role_like_gamma
from tests.test_app import app


@mock.patch.dict(
Expand All @@ -36,6 +38,7 @@
class TestDashboardRoleBasedSecurity(BaseTestDashboardSecurity):
def test_get_dashboard_view__admin_can_access(self):
# arrange
self.init_dashboard_jwt_manager()
dashboard_to_access = create_dashboard_to_db(
owners=[], slices=[create_slice_to_db()], published=False
)
Expand All @@ -47,8 +50,13 @@ def test_get_dashboard_view__admin_can_access(self):
# assert
self.assert_dashboard_view_response(response, dashboard_to_access)

def init_dashboard_jwt_manager(self):
app.config["DASHBOARD_JWT_SECRET"] = "this is my secret"
dashboard_jwt_manager.init_app(app)

def test_get_dashboard_view__owner_can_access(self):
# arrange
self.init_dashboard_jwt_manager()
username = random_str()
new_role = f"role_{random_str()}"
owner = self.create_user_with_roles(
Expand Down Expand Up @@ -100,7 +108,7 @@ def test_get_dashboard_view__user_with_dashboard_permission_can_not_access_draft

def test_get_dashboard_view__user_access_with_dashboard_permission(self):
# arrange

self.init_dashboard_jwt_manager()
username = random_str()
new_role = f"role_{random_str()}"
self.create_user_with_roles(username, [new_role], should_create_roles=True)
Expand Down Expand Up @@ -136,6 +144,7 @@ def test_get_dashboard_view__public_user_with_dashboard_permission_can_not_acces
self,
):
# arrange
self.init_dashboard_jwt_manager()
dashboard_to_access = create_dashboard_to_db(published=False)
grant_access_to_dashboard(dashboard_to_access, "Public")
self.logout()
Expand Down