Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(glossary): support multiple ownership types #12050

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ class Owners(ConfigModel):
groups: Optional[List[str]] = None


OwnersMultipleTypes = Union[List[Owners], Owners]


class KnowledgeCard(ConfigModel):
url: Optional[str] = None
label: Optional[str] = None
Expand All @@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel):
term_source: Optional[str] = None
source_ref: Optional[str] = None
source_url: Optional[str] = None
owners: Optional[Owners] = None
owners: Optional[OwnersMultipleTypes] = None
inherits: Optional[List[str]] = None
contains: Optional[List[str]] = None
values: Optional[List[str]] = None
Expand All @@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel):
id: Optional[str] = None
name: str
description: str
owners: Optional[Owners] = None
owners: Optional[OwnersMultipleTypes] = None
terms: Optional[List["GlossaryTermConfig"]] = None
nodes: Optional[List["GlossaryNodeConfig"]] = None
knowledge_links: Optional[List[KnowledgeCard]] = None
Expand All @@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel):
"""Holds defaults for populating fields in glossary terms"""

source: Optional[str] = None
owners: Owners
owners: OwnersMultipleTypes
url: Optional[str] = None
source_type: str = "INTERNAL"

Expand Down Expand Up @@ -153,30 +156,44 @@ def make_glossary_term_urn(
return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id)


def get_owners(owners: Owners) -> models.OwnershipClass:
ownership_type, ownership_type_urn = validate_ownership_type(owners.type)
def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass:
"""Allows owner types to be a list and maintains backward compatibility"""
if isinstance(owners, Owners):
return models.OwnershipClass(owners=list(get_owners(owners)))

owners_meta: List[models.OwnerClass] = []
for owner in owners:
owners_meta.extend(get_owners(owner))

return models.OwnershipClass(owners=owners_meta)


def get_owners(owners: Owners) -> Iterable[models.OwnerClass]:
actual_type = owners.type or models.OwnershipTypeClass.DEVELOPER

if actual_type.startswith("urn:li:ownershipType:"):
ownership_type: str = "CUSTOM"
ownership_type_urn: Optional[str] = actual_type
else:
ownership_type, ownership_type_urn = validate_ownership_type(actual_type)

if owners.typeUrn is not None:
ownership_type_urn = owners.typeUrn
owners_meta: List[models.OwnerClass] = []

if owners.users is not None:
owners_meta = owners_meta + [
models.OwnerClass(
for o in owners.users:
yield models.OwnerClass(
owner=make_user_urn(o),
type=ownership_type,
typeUrn=ownership_type_urn,
)
for o in owners.users
]
if owners.groups is not None:
owners_meta = owners_meta + [
models.OwnerClass(
for o in owners.groups:
yield models.OwnerClass(
owner=make_group_urn(o),
type=ownership_type,
typeUrn=ownership_type_urn,
)
for o in owners.groups
]
return models.OwnershipClass(owners=owners_meta)


def get_mces(
Expand All @@ -185,7 +202,7 @@ def get_mces(
ingestion_config: BusinessGlossarySourceConfig,
ctx: PipelineContext,
) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]:
root_owners = get_owners(glossary.owners)
root_owners = get_owners_multiple_types(glossary.owners)

if glossary.nodes:
for node in glossary.nodes:
Expand Down Expand Up @@ -270,7 +287,7 @@ def get_mces_from_node(
node_owners = parentOwners
if glossaryNode.owners is not None:
assert glossaryNode.owners is not None
node_owners = get_owners(glossaryNode.owners)
node_owners = get_owners_multiple_types(glossaryNode.owners)

node_snapshot = models.GlossaryNodeSnapshotClass(
urn=node_urn,
Expand Down Expand Up @@ -426,7 +443,7 @@ def get_mces_from_term(
ownership: models.OwnershipClass = parentOwnership
if glossaryTerm.owners is not None:
assert glossaryTerm.owners is not None
ownership = get_owners(glossaryTerm.owners)
ownership = get_owners_multiple_types(glossaryTerm.owners)
aspects.append(ownership)

if glossaryTerm.domain is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
version: "1"
source: DataHub
owners:
users:
- mjames
url: "https://github.com/datahub-project/datahub/"

nodes:
- name: Custom URN Types
description: Testing custom ownership URN types
owners:
- type: urn:li:ownershipType:custom_type_1
users:
- user1
groups:
- group1
- type: urn:li:ownershipType:custom_type_2
users:
- user2
terms:
- name: Mixed URN Types
description: Term with custom URN types
owners:
- type: urn:li:ownershipType:custom_type_3
users:
- user3
- type: urn:li:ownershipType:custom_type_4
groups:
- group2
- name: Mixed Standard and URN
description: Term with both standard and URN types
owners:
- type: DEVELOPER
users:
- dev1
- type: urn:li:ownershipType:custom_type_5
groups:
- group3
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
[
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
"urn": "urn:li:glossaryNode:Custom URN Types",
"aspects": [
{
"com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
"customProperties": {},
"definition": "Testing custom ownership URN types",
"name": "Custom URN Types"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:user1",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_1"
},
{
"owner": "urn:li:corpGroup:group1",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_1"
},
{
"owner": "urn:li:corpuser:user2",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_2"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
}
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
"urn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
"aspects": [
{
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
"customProperties": {},
"name": "Mixed URN Types",
"definition": "Term with custom URN types",
"parentNode": "urn:li:glossaryNode:Custom URN Types",
"termSource": "INTERNAL",
"sourceRef": "DataHub",
"sourceUrl": "https://github.com/datahub-project/datahub/"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:user3",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_3"
},
{
"owner": "urn:li:corpGroup:group2",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_4"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
}
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
"urn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
"aspects": [
{
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
"customProperties": {},
"name": "Mixed Standard and URN",
"definition": "Term with both standard and URN types",
"parentNode": "urn:li:glossaryNode:Custom URN Types",
"termSource": "INTERNAL",
"sourceRef": "DataHub",
"sourceUrl": "https://github.com/datahub-project/datahub/"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:dev1",
"type": "DEVELOPER"
},
{
"owner": "urn:li:corpGroup:group3",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_5"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
}
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryNode",
"entityUrn": "urn:li:glossaryNode:Custom URN Types",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
version: "1"
source: DataHub
owners:
users:
- mjames
url: "https://github.com/datahub-project/datahub/"

nodes:
- name: Different Owner Types
description: Testing multiple owners with different types
owners:
- type: DEVELOPER
users:
- dev1
groups:
- engineering
- type: DATAOWNER
users:
- owner1
groups:
- data_stewards
- type: PRODUCER
users:
- producer1
terms:
- name: Mixed Ownership
description: Term with different owner types
owners:
- type: STAKEHOLDER
users:
- stakeholder1
groups:
- business
- type: DEVELOPER
users:
- dev2
- type: DATAOWNER
groups:
- compliance
Loading
Loading