Skip to content

Commit

Permalink
feat(ingest/tableau): support ingestion of access roles (datahub-proj…
Browse files Browse the repository at this point in the history
…ect#11157)

Co-authored-by: Yanik Häni <[email protected]>
Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
3 people authored Oct 24, 2024
1 parent b89ca3f commit 7c8dba4
Show file tree
Hide file tree
Showing 13 changed files with 44,451 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ public class ContainerType
Constants.DEPRECATION_ASPECT_NAME,
Constants.DATA_PRODUCTS_ASPECT_NAME,
Constants.STRUCTURED_PROPERTIES_ASPECT_NAME,
Constants.FORMS_ASPECT_NAME);
Constants.FORMS_ASPECT_NAME,
Constants.ACCESS_ASPECT_NAME);

private static final Set<String> FACET_FIELDS = ImmutableSet.of("origin", "platform");
private static final String ENTITY_NAME = "container";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static com.linkedin.metadata.Constants.*;

import com.linkedin.common.Access;
import com.linkedin.common.DataPlatformInstance;
import com.linkedin.common.Deprecation;
import com.linkedin.common.Forms;
Expand Down Expand Up @@ -30,6 +31,7 @@
import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper;
import com.linkedin.datahub.graphql.types.form.FormsMapper;
import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper;
import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper;
import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper;
import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper;
import com.linkedin.domain.Domains;
Expand Down Expand Up @@ -105,6 +107,11 @@ public static Container map(
context, new GlossaryTerms(envelopedTerms.getValue().data()), entityUrn));
}

final EnvelopedAspect accessAspect = aspects.get(ACCESS_ASPECT_NAME);
if (accessAspect != null) {
result.setAccess(AccessMapper.map(new Access(accessAspect.getValue().data()), entityUrn));
}

final EnvelopedAspect envelopedInstitutionalMemory =
aspects.get(Constants.INSTITUTIONAL_MEMORY_ASPECT_NAME);
if (envelopedInstitutionalMemory != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public class DatasetType
EMBED_ASPECT_NAME,
DATA_PRODUCTS_ASPECT_NAME,
BROWSE_PATHS_V2_ASPECT_NAME,
ACCESS_DATASET_ASPECT_NAME,
ACCESS_ASPECT_NAME,
STRUCTURED_PROPERTIES_ASPECT_NAME,
FORMS_ASPECT_NAME,
SUB_TYPES_ASPECT_NAME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ public Dataset apply(
(dataset, dataMap) ->
dataset.setBrowsePathV2(BrowsePathsV2Mapper.map(context, new BrowsePathsV2(dataMap))));
mappingHelper.mapToResult(
ACCESS_DATASET_ASPECT_NAME,
ACCESS_ASPECT_NAME,
((dataset, dataMap) ->
dataset.setAccess(AccessMapper.map(new Access(dataMap), entityUrn))));
mappingHelper.mapToResult(
Expand Down
5 changes: 5 additions & 0 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -2837,6 +2837,11 @@ type Container implements Entity {
"""
exists: Boolean

"""
The Roles and the properties to access the container
"""
access: Access

"""
Experimental API.
For fetching extra entities that do not have custom UI code yet
Expand Down
23 changes: 22 additions & 1 deletion datahub-web-react/src/app/entity/container/ContainerEntity.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'
import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection';
import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection';
import { getDataForEntityType } from '../shared/containers/profile/utils';
import { useGetContainerQuery } from '../../../graphql/container.generated';
import { useGetContainerQuery, GetContainerQuery } from '../../../graphql/container.generated';
import { ContainerEntitiesTab } from './ContainerEntitiesTab';
import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection';
import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab';
Expand All @@ -17,6 +17,8 @@ import { capitalizeFirstLetterOnly } from '../../shared/textUtil';
import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection';
import { getDataProduct } from '../shared/utils';
import EmbeddedProfile from '../shared/embed/EmbeddedProfile';
import AccessManagement from '../shared/tabs/Dataset/AccessManagement/AccessManagement';
import { useAppConfig } from '../../useAppConfig';

/**
* Definition of the DataHub Container entity.
Expand Down Expand Up @@ -65,6 +67,8 @@ export class ContainerEntity implements Entity<Container> {

useEntityQuery = useGetContainerQuery;

appconfig = useAppConfig;

renderProfile = (urn: string) => (
<EntityProfile
urn={urn}
Expand All @@ -85,6 +89,23 @@ export class ContainerEntity implements Entity<Container> {
name: 'Properties',
component: PropertiesTab,
},
{
name: 'Access Management',
component: AccessManagement,
display: {
visible: (_, container: GetContainerQuery) => {
return (
this.appconfig().config.featureFlags.showAccessManagement &&
!!container?.container?.access
);
},
enabled: (_, container: GetContainerQuery) => {
const accessAspect = container?.container?.access;
const rolesList = accessAspect?.roles;
return !!accessAspect && !!rolesList && rolesList.length > 0;
},
},
},
]}
sidebarSections={this.getSidebarSections()}
/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import styled from 'styled-components';
import { Button, Table } from 'antd';
import { SpinProps } from 'antd/es/spin';
import { LoadingOutlined } from '@ant-design/icons';
import { useBaseEntity } from '../../../EntityContext';
import { GetDatasetQuery, useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated';
import { useEntityData } from '../../../EntityContext';
import { useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated';
import { handleAccessRoles } from './utils';
import AccessManagerDescription from './AccessManagerDescription';

Expand Down Expand Up @@ -60,11 +60,12 @@ const AccessButton = styled(Button)`
`;

export default function AccessManagement() {
const baseEntity = useBaseEntity<GetDatasetQuery>();
const { entityData } = useEntityData();
const entityUrn = (entityData as any)?.urn;

const { data: externalRoles, loading: isLoading } = useGetExternalRolesQuery({
variables: { urn: baseEntity?.dataset?.urn as string },
skip: !baseEntity?.dataset?.urn,
variables: { urn: entityUrn as string },
skip: !entityUrn,
});

const columns = [
Expand Down
7 changes: 7 additions & 0 deletions datahub-web-react/src/graphql/container.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ query getContainer($urn: String!) {
status {
removed
}
access {
roles {
role {
urn
}
}
}
autoRenderAspects: aspects(input: { autoRenderOnly: true }) {
...autoRenderAspectFields
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ public class Constants {

// ExternalRoleMetadata
public static final String ROLE_ENTITY_NAME = "role";
public static final String ACCESS_DATASET_ASPECT_NAME = "access";
public static final String ACCESS_ASPECT_NAME = "access";
public static final String ROLE_KEY = "roleKey";
public static final String ROLE_PROPERTIES_ASPECT_NAME = "roleProperties";
public static final String ROLE_ACTORS_ASPECT_NAME = "actors";
Expand Down
90 changes: 88 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import re
import time
Expand Down Expand Up @@ -26,6 +27,8 @@
from pydantic.fields import Field
from requests.adapters import HTTPAdapter
from tableauserverclient import (
GroupItem,
PermissionsRule,
PersonalAccessTokenAuth,
Server,
ServerResponseError,
Expand Down Expand Up @@ -216,6 +219,11 @@ class TableauConnectionConfig(ConfigModel):
description="Whether to verify SSL certificates. If using self-signed certificates, set to false or provide the path to the .pem certificate bundle.",
)

session_trust_env: bool = Field(
False,
description="Configures the trust_env property in the requests session. If set to false (default value) it will bypass proxy settings. See https://requests.readthedocs.io/en/latest/api/#requests.Session.trust_env for more information.",
)

extract_column_level_lineage: bool = Field(
True,
description="When enabled, extracts column-level lineage from Tableau Datasources",
Expand Down Expand Up @@ -265,8 +273,7 @@ def make_tableau_client(self, site: str) -> Server:
},
)

# From https://stackoverflow.com/a/50159273/5004662.
server._session.trust_env = False
server._session.trust_env = self.session_trust_env

# Setup request retries.
adapter = HTTPAdapter(
Expand Down Expand Up @@ -298,6 +305,23 @@ def make_tableau_client(self, site: str) -> Server:
) from e


class PermissionIngestionConfig(ConfigModel):
enable_workbooks: bool = Field(
default=True,
description="Whether or not to enable group permission ingestion for workbooks. "
"Default: True",
)

group_name_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Filter for Tableau group names when ingesting group permissions. "
"For example, you could filter for groups that include the term 'Consumer' in their name by adding '^.*Consumer$' to the allow list."
"By default, all groups will be ingested. "
"You can both allow and deny groups based on their name using their name, or a Regex pattern. "
"Deny patterns always take precedence over allow patterns. ",
)


class TableauConfig(
DatasetLineageProviderConfigBase,
StatefulIngestionConfigBase,
Expand Down Expand Up @@ -459,6 +483,11 @@ class TableauConfig(
description="When enabled, sites are added as containers and therefore visible in the folder structure within Datahub.",
)

permission_ingestion: Optional[PermissionIngestionConfig] = Field(
default=None,
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
)

# pre = True because we want to take some decision before pydantic initialize the configuration to default values
@root_validator(pre=True)
def projects_backward_compatibility(cls, values: Dict) -> Dict:
Expand Down Expand Up @@ -732,6 +761,8 @@ def __init__(
self.workbook_project_map: Dict[str, str] = {}
self.datasource_project_map: Dict[str, str] = {}

self.group_map: Dict[str, GroupItem] = {}

# This map keeps track of the database server connection hostnames.
self.database_server_hostname_map: Dict[str, str] = {}
# This list keeps track of sheets in workbooks so that we retrieve those
Expand Down Expand Up @@ -2803,6 +2834,18 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni
f"Could not load project hierarchy for workbook {workbook_name}({workbook_id}). Please check permissions."
)

custom_props = None
if (
self.config.permission_ingestion
and self.config.permission_ingestion.enable_workbooks
):
logger.debug(f"Ingest access roles of workbook-id='{workbook.get(c.LUID)}'")
workbook_instance = self.server.workbooks.get_by_id(workbook.get(c.LUID))
self.server.workbooks.populate_permissions(workbook_instance)
custom_props = self._create_workbook_properties(
workbook_instance.permissions
)

yield from gen_containers(
container_key=workbook_container_key,
name=workbook.get(c.NAME) or "",
Expand All @@ -2811,6 +2854,7 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni
sub_types=[BIContainerSubTypes.TABLEAU_WORKBOOK],
owner_urn=owner_urn,
external_url=workbook_external_url,
extra_properties=custom_props,
tags=tags,
)

Expand Down Expand Up @@ -3168,11 +3212,53 @@ def emit_site_container(self):
sub_types=[c.SITE],
)

def _fetch_groups(self):
for group in TSC.Pager(self.server.groups):
self.group_map[group.id] = group

def _get_allowed_capabilities(self, capabilities: Dict[str, str]) -> List[str]:
if not self.config.permission_ingestion:
return []

allowed_capabilities = [
key for key, value in capabilities.items() if value == "Allow"
]
return allowed_capabilities

def _create_workbook_properties(
self, permissions: List[PermissionsRule]
) -> Optional[Dict[str, str]]:
if not self.config.permission_ingestion:
return None

groups = []
for rule in permissions:
if rule.grantee.tag_name == "group":
group = self.group_map.get(rule.grantee.id)
if not group or not group.name:
logger.debug(f"Group {rule.grantee.id} not found in group map.")
continue
if not self.config.permission_ingestion.group_name_pattern.allowed(
group.name
):
logger.info(
f"Skip permission '{group.name}' as it's excluded in group_name_pattern."
)
continue

capabilities = self._get_allowed_capabilities(rule.capabilities)
groups.append({"group": group.name, "capabilities": capabilities})

return {"permissions": json.dumps(groups)} if len(groups) > 0 else None

def ingest_tableau_site(self):
# Initialise the dictionary to later look-up for chart and dashboard stat
if self.config.extract_usage_stats:
self._populate_usage_stat_registry()

if self.config.permission_ingestion:
self._fetch_groups()

# Populate the map of database names and database hostnames to be used later to map
# databases to platform instances.
if self.config.database_hostname_to_platform_instance_map:
Expand Down
Loading

0 comments on commit 7c8dba4

Please sign in to comment.