Skip to content

Commit

Permalink
fix(ingest-idp): emit empty GroupMembership when there are no groups
Browse files Browse the repository at this point in the history
  • Loading branch information
aditya-radhakrishnan committed Feb 1, 2023
1 parent efaf092 commit f7a1f4f
Show file tree
Hide file tree
Showing 13 changed files with 925 additions and 553 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -419,18 +419,10 @@ def ingest_ad_users(
for user_count, datahub_corp_user_snapshot in enumerate(
datahub_corp_user_snapshots
):
# Add GroupMembership if applicable
if (
datahub_corp_user_snapshot.urn
in datahub_corp_user_urn_to_group_membership.keys()
):
datahub_group_membership = (
datahub_corp_user_urn_to_group_membership.get(
datahub_corp_user_snapshot.urn
)
)
assert datahub_group_membership
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
datahub_corp_user_urn: str = datahub_corp_user_snapshot.urn
datahub_group_membership: GroupMembershipClass = datahub_corp_user_urn_to_group_membership[datahub_corp_user_urn]
assert datahub_group_membership is not None
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
wu_id = f"user-snapshot-{user_count + 1 if self.config.mask_user_id else datahub_corp_user_snapshot.urn}"
wu = MetadataWorkUnit(id=wu_id, mce=mce)
Expand Down
40 changes: 13 additions & 27 deletions metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import re
import urllib
from collections import defaultdict
from dataclasses import dataclass, field
from time import sleep
from typing import Dict, Iterable, List, Optional, Union
Expand Down Expand Up @@ -31,7 +32,7 @@
CorpUserSnapshot,
)
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.metadata.schema_classes import ( # GroupMembershipClass,
from datahub.metadata.schema_classes import (
ChangeTypeClass,
CorpGroupInfoClass,
CorpUserInfoClass,
Expand Down Expand Up @@ -316,7 +317,9 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
yield group_status_wu

# Step 2: Populate GroupMembership Aspects for CorpUsers
datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = {}
datahub_corp_user_urn_to_group_membership: Dict[
str, GroupMembershipClass
] = defaultdict(lambda: GroupMembershipClass(groups=[]))
if self.config.ingest_group_membership and okta_groups is not None:
# Fetch membership for each group.
for okta_group in okta_groups:
Expand All @@ -341,20 +344,10 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
self.report.report_failure("okta_user_mapping", error_str)
continue

# Either update or create the GroupMembership aspect for this group member.
# TODO: Production of the GroupMembership aspect will overwrite the existing
# group membership for the DataHub user.
if (
# Update the GroupMembership aspect for this group member.
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_urn
in datahub_corp_user_urn_to_group_membership
):
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_urn
].groups.append(datahub_corp_group_urn)
else:
datahub_corp_user_urn_to_group_membership[
datahub_corp_user_urn
] = GroupMembershipClass(groups=[datahub_corp_group_urn])
].groups.append(datahub_corp_group_urn)

# Step 3: Produce MetadataWorkUnits for CorpUsers.
if self.config.ingest_users:
Expand All @@ -364,18 +357,11 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
for user_count, datahub_corp_user_snapshot in enumerate(
datahub_corp_user_snapshots
):
# Add GroupMembership aspect populated in Step 2 if applicable.
if (
datahub_corp_user_snapshot.urn
in datahub_corp_user_urn_to_group_membership
):
datahub_group_membership = (
datahub_corp_user_urn_to_group_membership.get(
datahub_corp_user_snapshot.urn
)
)
assert datahub_group_membership is not None
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
# Add GroupMembership aspect populated in Step 2.
datahub_corp_user_urn: str = datahub_corp_user_snapshot.urn
datahub_group_membership: GroupMembershipClass = datahub_corp_user_urn_to_group_membership[datahub_corp_user_urn]
assert datahub_group_membership is not None
datahub_corp_user_snapshot.aspects.append(datahub_group_membership)
mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)
wu_id = f"user-snapshot-{user_count + 1 if self.config.mask_user_id else datahub_corp_user_snapshot.urn}"
wu = MetadataWorkUnit(id=wu_id, mce=mce)
Expand Down
3 changes: 1 addition & 2 deletions metadata-ingestion/src/datahub/ingestion/source/ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@ def build_corp_user_mce(
],
)

if groups:
user_snapshot.aspects.append(GroupMembershipClass(groups=groups))
user_snapshot.aspects.append(GroupMembershipClass(groups=groups))

return MetadataChangeEvent(proposedSnapshot=user_snapshot)

Expand Down
34 changes: 34 additions & 0 deletions metadata-ingestion/tests/integration/azure_ad/azure_ad_groups.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,39 @@
"theme": null,
"visibility": null,
"onPremisesProvisioningErrors": []
},
{
"id": "00000000-0000-0000-0000-0000000000002",
"deletedDateTime": null,
"classification": null,
"createdDateTime": "2021-08-20 11: 00: 00",
"creationOptions": [],
"description": "This is an interesting description",
"displayName": "groupDisplayName3",
"expirationDateTime": null,
"groupTypes": [],
"isAssignableToRole": null,
"mail": "[email protected]",
"mailEnabled": false,
"mailNickname": "groupDisplayName3",
"membershipRule": null,
"membershipRuleProcessingState": null,
"onPremisesDomainName": null,
"onPremisesLastSyncDateTime": null,
"onPremisesNetBiosName": null,
"onPremisesSamAccountName": null,
"onPremisesSecurityIdentifier": null,
"onPremisesSyncEnabled": null,
"preferredDataLocation": null,
"preferredLanguage": null,
"proxyAddresses": [],
"renewedDateTime": "2021-08-20 11:00:00",
"resourceBehaviorOptions": [],
"resourceProvisioningOptions": [],
"securityEnabled": true,
"securityIdentifier": "xxxxx",
"theme": null,
"visibility": null,
"onPremisesProvisioningErrors": []
}
]
Loading

0 comments on commit f7a1f4f

Please sign in to comment.