Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest-idp): emit empty GroupMembership when there are no groups #7196

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -419,18 +419,14 @@ def ingest_ad_users(
for user_count, datahub_corp_user_snapshot in enumerate(
datahub_corp_user_snapshots
):
# Add GroupMembership if applicable
if (
datahub_corp_user_snapshot.urn
in datahub_corp_user_urn_to_group_membership.keys()
):
datahub_group_membership = (
datahub_corp_user_urn_to_group_membership.get(
datahub_corp_user_snapshot.urn
)
)
assert datahub_group_membership
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
# TODO: Refactor common code between this and Okta to a common base class or utils
# Add group membership aspect
datahub_group_membership: GroupMembershipClass = (
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_snapshot.urn
]
)
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
wu_id = f"user-snapshot-{user_count + 1 if self.config.mask_user_id else datahub_corp_user_snapshot.urn}"
wu = MetadataWorkUnit(id=wu_id, mce=mce)
Expand Down
44 changes: 17 additions & 27 deletions metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import re
import urllib
from collections import defaultdict
from dataclasses import dataclass, field
from time import sleep
from typing import Dict, Iterable, List, Optional, Union
Expand Down Expand Up @@ -31,7 +32,7 @@
CorpUserSnapshot,
)
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.metadata.schema_classes import ( # GroupMembershipClass,
from datahub.metadata.schema_classes import (
ChangeTypeClass,
CorpGroupInfoClass,
CorpUserInfoClass,
Expand Down Expand Up @@ -316,7 +317,9 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
yield group_status_wu

# Step 2: Populate GroupMembership Aspects for CorpUsers
datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = {}
datahub_corp_user_urn_to_group_membership: Dict[
str, GroupMembershipClass
] = defaultdict(lambda: GroupMembershipClass(groups=[]))
hsheth2 marked this conversation as resolved.
Show resolved Hide resolved
if self.config.ingest_group_membership and okta_groups is not None:
# Fetch membership for each group.
for okta_group in okta_groups:
Expand All @@ -341,20 +344,10 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
self.report.report_failure("okta_user_mapping", error_str)
continue

# Either update or create the GroupMembership aspect for this group member.
# TODO: Production of the GroupMembership aspect will overwrite the existing
# group membership for the DataHub user.
if (
# Update the GroupMembership aspect for this group member.
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_urn
in datahub_corp_user_urn_to_group_membership
):
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_urn
].groups.append(datahub_corp_group_urn)
else:
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_urn
] = GroupMembershipClass(groups=[datahub_corp_group_urn])
].groups.append(datahub_corp_group_urn)

# Step 3: Produce MetadataWorkUnits for CorpUsers.
if self.config.ingest_users:
Expand All @@ -364,18 +357,15 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
for user_count, datahub_corp_user_snapshot in enumerate(
datahub_corp_user_snapshots
):
# Add GroupMembership aspect populated in Step 2 if applicable.
if (
datahub_corp_user_snapshot.urn
in datahub_corp_user_urn_to_group_membership
):
datahub_group_membership = (
datahub_corp_user_urn_to_group_membership.get(
datahub_corp_user_snapshot.urn
)
)
assert datahub_group_membership is not None
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
# TODO: Refactor common code between this and Okta to a common base class or utils
# Add GroupMembership aspect populated in Step 2.
datahub_group_membership: GroupMembershipClass = (
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_snapshot.urn
]
)
assert datahub_group_membership is not None
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
wu_id = f"user-snapshot-{user_count + 1 if self.config.mask_user_id else datahub_corp_user_snapshot.urn}"
wu = MetadataWorkUnit(id=wu_id, mce=mce)
Expand Down
3 changes: 1 addition & 2 deletions metadata-ingestion/src/datahub/ingestion/source/ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@ def build_corp_user_mce(
],
)

if groups:
user_snapshot.aspects.append(GroupMembershipClass(groups=groups))
user_snapshot.aspects.append(GroupMembershipClass(groups=groups))

return MetadataChangeEvent(proposedSnapshot=user_snapshot)

Expand Down
34 changes: 34 additions & 0 deletions metadata-ingestion/tests/integration/azure_ad/azure_ad_groups.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,39 @@
"theme": null,
"visibility": null,
"onPremisesProvisioningErrors": []
},
{
"id": "00000000-0000-0000-0000-0000000000002",
"deletedDateTime": null,
"classification": null,
"createdDateTime": "2021-08-20 11: 00: 00",
"creationOptions": [],
"description": "This is an interesting description",
"displayName": "groupDisplayName3",
"expirationDateTime": null,
"groupTypes": [],
"isAssignableToRole": null,
"mail": "[email protected]",
"mailEnabled": false,
"mailNickname": "groupDisplayName3",
"membershipRule": null,
"membershipRuleProcessingState": null,
"onPremisesDomainName": null,
"onPremisesLastSyncDateTime": null,
"onPremisesNetBiosName": null,
"onPremisesSamAccountName": null,
"onPremisesSecurityIdentifier": null,
"onPremisesSyncEnabled": null,
"preferredDataLocation": null,
"preferredLanguage": null,
"proxyAddresses": [],
"renewedDateTime": "2021-08-20 11:00:00",
"resourceBehaviorOptions": [],
"resourceProvisioningOptions": [],
"securityEnabled": true,
"securityIdentifier": "xxxxx",
"theme": null,
"visibility": null,
"onPremisesProvisioningErrors": []
}
]
Loading