diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml index 36ab172a6de61d..efc96c25fe9661 100644 --- a/.github/workflows/close-stale-issues.yml +++ b/.github/workflows/close-stale-issues.yml @@ -12,11 +12,11 @@ jobs: steps: - uses: actions/stale@v5 with: - days-before-issue-stale: 15 - days-before-issue-close: 15 + days-before-issue-stale: 30 + days-before-issue-close: 30 stale-issue-label: "stale" - stale-issue-message: "This issue is stale because it has been open for 15 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io" - close-issue-message: "This issue was closed because it has been inactive for 15 days since being marked as stale." + stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io" + close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale." days-before-pr-stale: -1 days-before-pr-close: -1 repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 45ecb09cf7f808..827496dfb3b0e9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -144,8 +144,10 @@ import com.linkedin.datahub.graphql.resolvers.mutate.AddTagsResolver; import com.linkedin.datahub.graphql.resolvers.mutate.AddTermResolver; import com.linkedin.datahub.graphql.resolvers.mutate.AddTermsResolver; +import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddOwnersResolver; import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddTagsResolver; import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddTermsResolver; +import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveOwnersResolver; import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveTagsResolver; import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveTermsResolver; import com.linkedin.datahub.graphql.resolvers.mutate.BatchSetDomainResolver; @@ -705,7 +707,9 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("updateDescription", new UpdateDescriptionResolver(entityService)) .dataFetcher("addOwner", new AddOwnerResolver(entityService)) .dataFetcher("addOwners", new AddOwnersResolver(entityService)) + .dataFetcher("batchAddOwners", new BatchAddOwnersResolver(entityService)) .dataFetcher("removeOwner", new RemoveOwnerResolver(entityService)) + .dataFetcher("batchRemoveOwners", new BatchRemoveOwnersResolver(entityService)) .dataFetcher("addLink", new AddLinkResolver(entityService)) .dataFetcher("removeLink", new RemoveLinkResolver(entityService)) .dataFetcher("addGroupMembers", new AddGroupMembersResolver(this.groupService)) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java index faddf984f7bbf0..ca91cefc4d748c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.mutate; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; @@ -7,7 +8,9 @@ import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddOwnerInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; +import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.OwnershipType; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; @@ -50,11 +53,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw log.debug("Adding Owner. input: {}", input.toString()); Urn actor = CorpuserUrn.createFromString(((QueryContext) environment.getContext()).getActorUrn()); - OwnerUtils.addOwner( - ownerUrn, - // Assumption Alert: Assumes that GraphQL ownership type === GMS ownership type - com.linkedin.common.OwnershipType.valueOf(type.name()), - targetUrn, + OwnerUtils.addOwnersToResources( + ImmutableList.of(new OwnerInput(input.getOwnerUrn(), ownerEntityType, type)), + ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)), actor, _entityService ); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java index 8d9630f08ad4ed..057e6dc80ef4f4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.mutate; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; @@ -7,6 +8,7 @@ import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddOwnersInput; import com.linkedin.datahub.graphql.generated.OwnerInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; @@ -47,9 +49,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw log.debug("Adding Owners. input: {}", input.toString()); Urn actor = CorpuserUrn.createFromString(((QueryContext) environment.getContext()).getActorUrn()); - OwnerUtils.addOwners( + OwnerUtils.addOwnersToResources( owners, - targetUrn, + ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)), actor, _entityService ); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java new file mode 100644 index 00000000000000..ae1ba8a50ab011 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java @@ -0,0 +1,90 @@ +package com.linkedin.datahub.graphql.resolvers.mutate; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.BatchAddOwnersInput; +import com.linkedin.datahub.graphql.generated.OwnerInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; +import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.metadata.entity.EntityService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; + + +@Slf4j +@RequiredArgsConstructor +public class BatchAddOwnersResolver implements DataFetcher> { + + private final EntityService _entityService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final BatchAddOwnersInput input = bindArgument(environment.getArgument("input"), BatchAddOwnersInput.class); + final List owners = input.getOwners(); + final List resources = input.getResources(); + final QueryContext context = environment.getContext(); + + return CompletableFuture.supplyAsync(() -> { + + // First, validate the batch + validateOwners(owners); + validateInputResources(resources, context); + + try { + // Then execute the bulk add + batchAddOwners(owners, resources, context); + return true; + } catch (Exception e) { + log.error("Failed to perform update against input {}, {}", input.toString(), e.getMessage()); + throw new RuntimeException(String.format("Failed to perform update against input %s", input.toString()), e); + } + }); + } + + private void validateOwners(List owners) { + for (OwnerInput ownerInput : owners) { + OwnerUtils.validateOwner(UrnUtils.getUrn(ownerInput.getOwnerUrn()), ownerInput.getOwnerEntityType(), _entityService); + } + } + + private void validateInputResources(List resources, QueryContext context) { + for (ResourceRefInput resource : resources) { + validateInputResource(resource, context); + } + } + + private void validateInputResource(ResourceRefInput resource, QueryContext context) { + final Urn resourceUrn = UrnUtils.getUrn(resource.getResourceUrn()); + + if (resource.getSubResource() != null) { + throw new IllegalArgumentException("Malformed input provided: owners cannot be applied to subresources."); + } + + if (!OwnerUtils.isAuthorizedToUpdateOwners(context, resourceUrn)) { + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + } + LabelUtils.validateResource(resourceUrn, resource.getSubResource(), resource.getSubResourceType(), _entityService); + } + + private void batchAddOwners(List owners, List resources, QueryContext context) { + log.debug("Batch adding owners. owners: {}, resources: {}", owners, resources); + try { + OwnerUtils.addOwnersToResources(owners, resources, UrnUtils.getUrn(context.getActorUrn()), _entityService); + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to batch add Owners %s to resources with urns %s!", + owners, + resources.stream().map(ResourceRefInput::getResourceUrn).collect(Collectors.toList())), + e); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java new file mode 100644 index 00000000000000..53ac37800848cc --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java @@ -0,0 +1,83 @@ +package com.linkedin.datahub.graphql.resolvers.mutate; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.BatchRemoveOwnersInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; +import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.metadata.entity.EntityService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; + + +@Slf4j +@RequiredArgsConstructor +public class BatchRemoveOwnersResolver implements DataFetcher> { + + private final EntityService _entityService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final BatchRemoveOwnersInput input = bindArgument(environment.getArgument("input"), BatchRemoveOwnersInput.class); + final List owners = input.getOwnerUrns(); + final List resources = input.getResources(); + final QueryContext context = environment.getContext(); + + return CompletableFuture.supplyAsync(() -> { + + // First, validate the batch + validateInputResources(resources, context); + + try { + // Then execute the bulk remove + batchRemoveOwners(owners, resources, context); + return true; + } catch (Exception e) { + log.error("Failed to perform update against input {}, {}", input.toString(), e.getMessage()); + throw new RuntimeException(String.format("Failed to perform update against input %s", input.toString()), e); + } + }); + } + + private void validateInputResources(List resources, QueryContext context) { + for (ResourceRefInput resource : resources) { + validateInputResource(resource, context); + } + } + + private void validateInputResource(ResourceRefInput resource, QueryContext context) { + final Urn resourceUrn = UrnUtils.getUrn(resource.getResourceUrn()); + + if (resource.getSubResource() != null) { + throw new IllegalArgumentException("Malformed input provided: owners cannot be removed from subresources."); + } + + if (!OwnerUtils.isAuthorizedToUpdateOwners(context, resourceUrn)) { + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + } + LabelUtils.validateResource(resourceUrn, resource.getSubResource(), resource.getSubResourceType(), _entityService); + } + + private void batchRemoveOwners(List ownerUrns, List resources, QueryContext context) { + log.debug("Batch removing owners. owners: {}, resources: {}", ownerUrns, resources); + try { + OwnerUtils.removeOwnersFromResources(ownerUrns.stream().map(UrnUtils::getUrn).collect( + Collectors.toList()), resources, UrnUtils.getUrn(context.getActorUrn()), _entityService); + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to batch remove Owners %s to resources with urns %s!", + ownerUrns, + resources.stream().map(ResourceRefInput::getResourceUrn).collect(Collectors.toList())), + e); + } + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java index 1aa16e094151cb..4df5d27ebe8bcc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java @@ -1,10 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.mutate; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RemoveOwnerInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; @@ -39,12 +41,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw _entityService ); try { - log.debug("Removing Link input: {}", input); - Urn actor = CorpuserUrn.createFromString(((QueryContext) environment.getContext()).getActorUrn()); - OwnerUtils.removeOwner( - ownerUrn, - targetUrn, + OwnerUtils.removeOwnersFromResources( + ImmutableList.of(ownerUrn), + ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)), actor, _entityService ); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index a0770dd32e4568..2e06cec2686d1f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -16,10 +16,13 @@ import com.linkedin.datahub.graphql.authorization.DisjunctivePrivilegeGroup; import com.linkedin.datahub.graphql.generated.OwnerEntityType; import com.linkedin.datahub.graphql.generated.OwnerInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -36,28 +39,34 @@ public class OwnerUtils { private OwnerUtils() { } - public static void addOwner( - Urn ownerUrn, - OwnershipType type, - Urn resourceUrn, + public static void addOwnersToResources( + List owners, + List resources, Urn actor, EntityService entityService ) { - Ownership ownershipAspect = (Ownership) getAspectFromEntity( - resourceUrn.toString(), - Constants.OWNERSHIP_ASPECT_NAME, - entityService, - new Ownership()); - addOwner(ownershipAspect, ownerUrn, type); - persistAspect(resourceUrn, Constants.OWNERSHIP_ASPECT_NAME, ownershipAspect, actor, entityService); + final List changes = new ArrayList<>(); + for (ResourceRefInput resource : resources) { + changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), actor, entityService)); + } + ingestChangeProposals(changes, entityService, actor); } - public static void addOwners( - List owners, - Urn resourceUrn, + public static void removeOwnersFromResources( + List ownerUrns, + List resources, Urn actor, EntityService entityService ) { + final List changes = new ArrayList<>(); + for (ResourceRefInput resource : resources) { + changes.add(buildRemoveOwnersProposal(ownerUrns, UrnUtils.getUrn(resource.getResourceUrn()), actor, entityService)); + } + ingestChangeProposals(changes, entityService, actor); + } + + + private static MetadataChangeProposal buildAddOwnersProposal(List owners, Urn resourceUrn, Urn actor, EntityService entityService) { Ownership ownershipAspect = (Ownership) getAspectFromEntity( resourceUrn.toString(), Constants.OWNERSHIP_ASPECT_NAME, @@ -66,11 +75,11 @@ public static void addOwners( for (OwnerInput input : owners) { addOwner(ownershipAspect, UrnUtils.getUrn(input.getOwnerUrn()), OwnershipType.valueOf(input.getType().toString())); } - persistAspect(resourceUrn, Constants.OWNERSHIP_ASPECT_NAME, ownershipAspect, actor, entityService); + return buildMetadataChangeProposal(resourceUrn, Constants.OWNERSHIP_ASPECT_NAME, ownershipAspect, actor, entityService); } - public static void removeOwner( - Urn ownerUrn, + public static MetadataChangeProposal buildRemoveOwnersProposal( + List ownerUrns, Urn resourceUrn, Urn actor, EntityService entityService @@ -81,8 +90,8 @@ public static void removeOwner( entityService, new Ownership()); ownershipAspect.setLastModified(getAuditStamp(actor)); - removeOwner(ownershipAspect, ownerUrn); - persistAspect(resourceUrn, Constants.OWNERSHIP_ASPECT_NAME, ownershipAspect, actor, entityService); + removeOwnersIfExists(ownershipAspect, ownerUrns); + return buildMetadataChangeProposal(resourceUrn, Constants.OWNERSHIP_ASPECT_NAME, ownershipAspect, actor, entityService); } private static void addOwner(Ownership ownershipAspect, Urn ownerUrn, OwnershipType type) { @@ -103,13 +112,15 @@ private static void addOwner(Ownership ownershipAspect, Urn ownerUrn, OwnershipT ownershipAspect.setOwners(ownerArray); } - private static void removeOwner(Ownership ownership, Urn ownerUrn) { + private static void removeOwnersIfExists(Ownership ownership, List ownerUrns) { if (!ownership.hasOwners()) { ownership.setOwners(new OwnerArray()); } OwnerArray ownerArray = ownership.getOwners(); - ownerArray.removeIf(owner -> owner.getOwner().equals(ownerUrn)); + for (Urn ownerUrn : ownerUrns) { + ownerArray.removeIf(owner -> owner.getOwner().equals(ownerUrn)); + } } public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context, Urn resourceUrn) { @@ -170,6 +181,26 @@ public static Boolean validateAddInput( return true; } + public static void validateOwner( + Urn ownerUrn, + OwnerEntityType ownerEntityType, + EntityService entityService + ) { + if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Failed to change ownership for resource(s). Expected a corp group urn, found %s", ownerUrn)); + } + + if (OwnerEntityType.CORP_USER.equals(ownerEntityType) && !Constants.CORP_USER_ENTITY_NAME.equals(ownerUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Failed to change ownership for resource(s). Expected a corp user urn, found %s.", ownerUrn)); + } + + if (!entityService.exists(ownerUrn)) { + throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Owner with urn %s does not exist.", ownerUrn)); + } + } + public static Boolean validateRemoveInput( Urn resourceUrn, EntityService entityService @@ -179,4 +210,11 @@ public static Boolean validateRemoveInput( } return true; } + + private static void ingestChangeProposals(List changes, EntityService entityService, Urn actor) { + // TODO: Replace this with a batch ingest proposals endpoint. + for (MetadataChangeProposal change : changes) { + entityService.ingestProposal(change, getAuditStamp(actor)); + } + } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 00e5efbab946a0..5e50ab7b01740f 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -305,6 +305,11 @@ type Mutation { """ addOwner(input: AddOwnerInput!): Boolean + """ + Add owners to multiple Entities + """ + batchAddOwners(input: BatchAddOwnersInput!): Boolean + """ Add multiple owners to a particular Entity """ @@ -315,6 +320,11 @@ type Mutation { """ removeOwner(input: RemoveOwnerInput!): Boolean + """ + Remove owners from multiple Entities + """ + batchRemoveOwners(input: BatchRemoveOwnersInput!): Boolean + """ Add a link, or institutional memory, from a particular Entity """ @@ -6743,6 +6753,36 @@ input AddOwnersInput { resourceUrn: String! } +""" +Input provided when adding owners to a batch of assets +""" +input BatchAddOwnersInput { + """ + The primary key of the owners + """ + owners: [OwnerInput!]! + + """ + The target assets to attach the owners to + """ + resources: [ResourceRefInput]! +} + +""" +Input provided when removing owners from a batch of assets +""" +input BatchRemoveOwnersInput { + """ + The primary key of the owners + """ + ownerUrns: [String!]! + + """ + The target assets to remove the owners from + """ + resources: [ResourceRefInput]! +} + """ Input provided when removing the association between a Metadata Entity and an user or group owner """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java new file mode 100644 index 00000000000000..43121fa592fc92 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java @@ -0,0 +1,290 @@ +package com.linkedin.datahub.graphql.resolvers.owner; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.Owner; +import com.linkedin.common.OwnerArray; +import com.linkedin.common.Ownership; +import com.linkedin.common.OwnershipType; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchAddOwnersInput; +import com.linkedin.datahub.graphql.generated.OwnerEntityType; +import com.linkedin.datahub.graphql.generated.OwnerInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; +import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddOwnersResolver; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.testng.Assert.*; + + +public class BatchAddOwnersResolverTest { + + private static final String TEST_ENTITY_URN_1 = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + private static final String TEST_ENTITY_URN_2 = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test-2,PROD)"; + private static final String TEST_OWNER_URN_1 = "urn:li:corpuser:test-id-1"; + private static final String TEST_OWNER_URN_2 = "urn:li:corpuser:test-id-2"; + + @Test + public void testGetSuccessNoExistingOwners() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_2)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_2))).thenReturn(true); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_2))).thenReturn(true); + + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchAddOwnersInput input = new BatchAddOwnersInput(ImmutableList.of(new OwnerInput( + TEST_OWNER_URN_1, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER), + new OwnerInput( + TEST_OWNER_URN_2, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER)), + ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService, Mockito.times(2)).ingestProposal( + Mockito.any(), // Ownership has a dynamically generated timestamp + Mockito.any(AuditStamp.class) + ); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_URN_1)) + ); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_URN_2)) + ); + } + + @Test + public void testGetSuccessExistingOwners() throws Exception { + final Ownership originalOwnership = new Ownership().setOwners(new OwnerArray(ImmutableList.of( + new Owner().setOwner(Urn.createFromString(TEST_OWNER_URN_1)).setType(OwnershipType.TECHNICAL_OWNER) + ))); + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(originalOwnership); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_2)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(originalOwnership); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_2))).thenReturn(true); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_2))).thenReturn(true); + + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchAddOwnersInput input = new BatchAddOwnersInput(ImmutableList.of(new OwnerInput( + TEST_OWNER_URN_1, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER), + new OwnerInput( + TEST_OWNER_URN_2, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER)), + ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService, Mockito.times(2)).ingestProposal( + Mockito.any(), // Ownership has a dynamically generated timestamp + Mockito.any(AuditStamp.class) + ); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_URN_1)) + ); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_URN_2)) + ); + } + + @Test + public void testGetFailureOwnerDoesNotExist() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(false); + + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchAddOwnersInput input = new BatchAddOwnersInput(ImmutableList.of(new OwnerInput( + TEST_OWNER_URN_1, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER), + new OwnerInput( + TEST_OWNER_URN_2, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER)), + ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + } + + @Test + public void testGetFailureResourceDoesNotExist() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_2)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(false); + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_2))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(true); + + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchAddOwnersInput input = new BatchAddOwnersInput(ImmutableList.of(new OwnerInput( + TEST_OWNER_URN_1, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER), + new OwnerInput( + TEST_OWNER_URN_2, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER)), + ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + } + + @Test + public void testGetUnauthorized() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchAddOwnersInput input = new BatchAddOwnersInput(ImmutableList.of(new OwnerInput( + TEST_OWNER_URN_1, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER), + new OwnerInput( + TEST_OWNER_URN_2, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER)), + ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + QueryContext mockContext = getMockDenyContext(); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + } + + @Test + public void testGetEntityClientException() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + QueryContext mockContext = getMockAllowContext(); + BatchAddOwnersInput input = new BatchAddOwnersInput(ImmutableList.of(new OwnerInput( + TEST_OWNER_URN_1, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER), + new OwnerInput( + TEST_OWNER_URN_2, + OwnerEntityType.CORP_USER, + com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER)), + ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java new file mode 100644 index 00000000000000..ac4e0a7cdbef63 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java @@ -0,0 +1,206 @@ +package com.linkedin.datahub.graphql.resolvers.owner; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.Owner; +import com.linkedin.common.OwnerArray; +import com.linkedin.common.Ownership; +import com.linkedin.common.OwnershipType; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchRemoveOwnersInput; +import com.linkedin.datahub.graphql.generated.ResourceRefInput; +import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveOwnersResolver; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.testng.Assert.*; + + +public class BatchRemoveOwnersResolverTest { + + private static final String TEST_ENTITY_URN_1 = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + private static final String TEST_ENTITY_URN_2 = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test-2,PROD)"; + private static final String TEST_OWNER_URN_1 = "urn:li:corpuser:test-id-1"; + private static final String TEST_OWNER_URN_2 = "urn:li:corpuser:test-id-2"; + + @Test + public void testGetSuccessNoExistingOwners() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_2)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_2))).thenReturn(true); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_2))).thenReturn(true); + + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchRemoveOwnersInput input = new BatchRemoveOwnersInput(ImmutableList.of( + TEST_OWNER_URN_1, + TEST_OWNER_URN_2 + ), ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService, Mockito.times(2)).ingestProposal( + Mockito.any(), // Ownership has a dynamically generated timestamp + Mockito.any(AuditStamp.class) + ); + } + + @Test + public void testGetSuccessExistingOwners() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + final Ownership oldOwners1 = new Ownership().setOwners(new OwnerArray(ImmutableList.of( + new Owner().setOwner(Urn.createFromString(TEST_OWNER_URN_1)).setType(OwnershipType.TECHNICAL_OWNER) + ))); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(oldOwners1); + + final Ownership oldOwners2 = new Ownership().setOwners(new OwnerArray(ImmutableList.of( + new Owner().setOwner(Urn.createFromString(TEST_OWNER_URN_2)).setType(OwnershipType.TECHNICAL_OWNER) + ))); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_2)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(oldOwners2); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_2))).thenReturn(true); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_2))).thenReturn(true); + + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchRemoveOwnersInput input = new BatchRemoveOwnersInput(ImmutableList.of(TEST_OWNER_URN_1, TEST_OWNER_URN_2 + ), ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService, Mockito.times(2)).ingestProposal( + Mockito.any(MetadataChangeProposal.class), + Mockito.any(AuditStamp.class) + ); + } + + @Test + public void testGetFailureResourceDoesNotExist() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_1)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN_2)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(null); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_1))).thenReturn(false); + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN_2))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_URN_1))).thenReturn(true); + + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchRemoveOwnersInput input = new BatchRemoveOwnersInput(ImmutableList.of(TEST_OWNER_URN_1, TEST_OWNER_URN_2 + ), ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + } + + @Test + public void testGetUnauthorized() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + BatchRemoveOwnersInput input = new BatchRemoveOwnersInput(ImmutableList.of(TEST_OWNER_URN_1, TEST_OWNER_URN_2 + ), ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + QueryContext mockContext = getMockDenyContext(); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + } + + @Test + public void testGetEntityClientException() throws Exception { + EntityService mockService = Mockito.mock(EntityService.class); + + Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + QueryContext mockContext = getMockAllowContext(); + BatchRemoveOwnersInput input = new BatchRemoveOwnersInput(ImmutableList.of(TEST_OWNER_URN_1, TEST_OWNER_URN_2 + ), ImmutableList.of( + new ResourceRefInput(TEST_ENTITY_URN_1, null, null), + new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} \ No newline at end of file diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 6c2b12ba1fac56..41144de7092aaf 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -75,6 +75,7 @@ "react-color": "^2.19.3", "react-dom": "^17.0.0", "react-icons": "4.3.1", + "react-js-cron": "^2.1.0", "react-router": "^5.2.0", "react-router-dom": "^5.1.6", "react-scripts": "4.0.3", diff --git a/datahub-web-react/src/app/analytics/event.ts b/datahub-web-react/src/app/analytics/event.ts index d49ff9dfef33ec..ff6f76d0f4459f 100644 --- a/datahub-web-react/src/app/analytics/event.ts +++ b/datahub-web-react/src/app/analytics/event.ts @@ -15,6 +15,7 @@ export enum EventType { EntityViewEvent, EntitySectionViewEvent, EntityActionEvent, + BatchEntityActionEvent, RecommendationImpressionEvent, RecommendationClickEvent, SearchAcrossLineageEvent, @@ -156,10 +157,16 @@ export const EntityActionType = { export interface EntityActionEvent extends BaseEvent { type: EventType.EntityActionEvent; actionType: string; - entityType: EntityType; + entityType?: EntityType; entityUrn: string; } +export interface BatchEntityActionEvent extends BaseEvent { + type: EventType.BatchEntityActionEvent; + actionType: string; + entityUrns: string[]; +} + export interface RecommendationImpressionEvent extends BaseEvent { type: EventType.RecommendationImpressionEvent; renderId: string; // TODO : Determine whether we need a render id to join with click event. @@ -221,4 +228,5 @@ export type Event = | SearchAcrossLineageEvent | SearchAcrossLineageResultsViewEvent | DownloadAsCsvEvent - | RecommendationClickEvent; + | RecommendationClickEvent + | BatchEntityActionEvent; diff --git a/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx b/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx index 2cf84bc375c0cb..90f5c43e0432c3 100644 --- a/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx +++ b/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx @@ -4,7 +4,7 @@ import React, { useState } from 'react'; import styled from 'styled-components'; import { EntityType, Ownership } from '../../../types.generated'; import { ExpandedOwner } from '../shared/components/styled/ExpandedOwner'; -import { AddOwnersModal } from '../shared/containers/profile/sidebar/Ownership/AddOwnersModal'; +import { EditOwnersModal } from '../shared/containers/profile/sidebar/Ownership/EditOwnersModal'; import { DisplayCount, GroupSectionTitle, GroupSectionHeader } from '../shared/SidebarStyledComponents'; const TITLE = 'Owners'; @@ -51,10 +51,10 @@ export default function GroupOwnerSideBarSection({ urn, ownership, refetch }: Pr )} {showAddModal && ( - { setShowAddModal(false); diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/SearchSelectActions.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/SearchSelectActions.tsx index 7605fb56dff422..99018c21a4b915 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/SearchSelectActions.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/SearchSelectActions.tsx @@ -20,6 +20,7 @@ const DEFAULT_ACTION_GROUPS = [ SelectActionGroups.CHANGE_TAGS, SelectActionGroups.CHANGE_GLOSSARY_TERMS, SelectActionGroups.CHANGE_DOMAINS, + SelectActionGroups.CHANGE_OWNERS, ]; type Props = { @@ -64,6 +65,7 @@ export const SearchSelectActions = ({ selectedEntityUrns.length === 0 || !isEntityCapabilitySupported(EntityCapabilityType.OWNERS, selectedEntityTypes) } + refetch={refetch} /> )} {visibleActionGroups.has(SelectActionGroups.CHANGE_GLOSSARY_TERMS) && ( diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/action/OwnersDropdown.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/action/OwnersDropdown.tsx index f5267d7e8ffc33..5482d287ada321 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/action/OwnersDropdown.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/action/OwnersDropdown.tsx @@ -1,16 +1,18 @@ import React, { useState } from 'react'; -import { EntityType } from '../../../../../../../types.generated'; -import { AddOwnersModal } from '../../../../containers/profile/sidebar/Ownership/AddOwnersModal'; +import { EditOwnersModal, OperationType } from '../../../../containers/profile/sidebar/Ownership/EditOwnersModal'; import ActionDropdown from './ActionDropdown'; type Props = { urns: Array; disabled: boolean; + refetch?: () => void; }; // eslint-disable-next-line -export default function OwnersDropdown({ urns, disabled = false }: Props) { - const [showAddModal, setShowAddModal] = useState(false); +export default function OwnersDropdown({ urns, disabled = false, refetch }: Props) { + const [isEditModalVisible, setIsEditModalVisible] = useState(false); + const [operationType, setOperationType] = useState(OperationType.ADD); + return ( <> setShowAddModal(true), + onClick: () => { + setOperationType(OperationType.ADD); + setIsEditModalVisible(true); + }, }, { title: 'Remove owners', - onClick: () => null, + onClick: () => { + setOperationType(OperationType.REMOVE); + setIsEditModalVisible(true); + }, }, ]} disabled={disabled} /> - {showAddModal && urns.length > 0 && ( - { - setShowAddModal(false); + setIsEditModalVisible(false); + refetch?.(); }} + hideOwnerType={operationType === OperationType.REMOVE} /> )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnersModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx similarity index 82% rename from datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnersModal.tsx rename to datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx index c72f9135e6d4b9..99f78fa03e6a9e 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnersModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx @@ -6,7 +6,10 @@ import { CorpUser, Entity, EntityType, OwnerEntityType, OwnershipType } from '.. import { useEntityRegistry } from '../../../../../../useEntityRegistry'; import analytics, { EventType, EntityActionType } from '../../../../../../analytics'; import { OWNERSHIP_DISPLAY_TYPES } from './ownershipUtils'; -import { useAddOwnersMutation } from '../../../../../../../graphql/mutations.generated'; +import { + useBatchAddOwnersMutation, + useBatchRemoveOwnersMutation, +} from '../../../../../../../graphql/mutations.generated'; import { useGetSearchResultsLazyQuery } from '../../../../../../../graphql/search.generated'; import { useGetRecommendations } from '../../../../../../shared/recommendation'; import { OwnerLabel } from '../../../../../../shared/OwnerLabel'; @@ -25,13 +28,19 @@ const StyleTag = styled(Tag)` align-items: center; `; +export enum OperationType { + ADD, + REMOVE, +} + type Props = { - urn: string; - type: EntityType; + urns: string[]; defaultOwnerType?: OwnershipType; hideOwnerType?: boolean | undefined; + operationType?: OperationType; onCloseModal: () => void; refetch?: () => Promise; + entityType?: EntityType; // Only used for tracking events }; // value: {ownerUrn: string, ownerEntityType: EntityType} @@ -40,10 +49,19 @@ type SelectedOwner = { value; }; -export const AddOwnersModal = ({ urn, type, hideOwnerType, defaultOwnerType, onCloseModal, refetch }: Props) => { +export const EditOwnersModal = ({ + urns, + hideOwnerType, + defaultOwnerType, + operationType = OperationType.ADD, + onCloseModal, + refetch, + entityType, +}: Props) => { const entityRegistry = useEntityRegistry(); const [inputValue, setInputValue] = useState(''); - const [addOwnersMutation] = useAddOwnersMutation(); + const [batchAddOwnersMutation] = useBatchAddOwnersMutation(); + const [batchRemoveOwnersMutation] = useBatchRemoveOwnersMutation(); const ownershipTypes = OWNERSHIP_DISPLAY_TYPES; const [selectedOwners, setSelectedOwners] = useState([]); const [selectedOwnerType, setSelectedOwnerType] = useState(defaultOwnerType || OwnershipType.None); @@ -64,12 +82,12 @@ export const AddOwnersModal = ({ urn, type, hideOwnerType, defaultOwnerType, onC }, [ownershipTypes]); // Invokes the search API as the owner types - const handleSearch = (entityType: EntityType, text: string, searchQuery: any) => { + const handleSearch = (type: EntityType, text: string, searchQuery: any) => { if (text.length > 2) { searchQuery({ variables: { input: { - type: entityType, + type, query: text, start: 0, count: 5, @@ -167,35 +185,35 @@ export const AddOwnersModal = ({ urn, type, hideOwnerType, defaultOwnerType, onC ); }; - // Function to handle the modal action's - const onOk = async () => { - if (selectedOwners.length === 0) { - return; + const emitAnalytics = async () => { + if (urns.length > 1) { + analytics.event({ + type: EventType.BatchEntityActionEvent, + actionType: EntityActionType.UpdateOwnership, + entityUrns: urns, + }); + } else { + analytics.event({ + type: EventType.EntityActionEvent, + actionType: EntityActionType.UpdateOwnership, + entityType, + entityUrn: urns[0], + }); } - const inputs = selectedOwners.map((selectedActor) => { - const input = { - ownerUrn: selectedActor.value.ownerUrn, - ownerEntityType: selectedActor.value.ownerEntityType, - type: selectedOwnerType, - }; - return input; - }); + }; + + const batchAddOwners = async (inputs) => { try { - await addOwnersMutation({ + await batchAddOwnersMutation({ variables: { input: { owners: inputs, - resourceUrn: urn, + resources: urns.map((urn) => ({ resourceUrn: urn })), }, }, }); message.success({ content: 'Owners Added', duration: 2 }); - analytics.event({ - type: EventType.EntityActionEvent, - actionType: EntityActionType.UpdateOwnership, - entityType: type, - entityUrn: urn, - }); + emitAnalytics(); } catch (e: unknown) { message.destroy(); if (e instanceof Error) { @@ -207,13 +225,57 @@ export const AddOwnersModal = ({ urn, type, hideOwnerType, defaultOwnerType, onC } }; + const batchRemoveOwners = async (inputs) => { + try { + await batchRemoveOwnersMutation({ + variables: { + input: { + ownerUrns: inputs.map((input) => input.ownerUrn), + resources: urns.map((urn) => ({ resourceUrn: urn })), + }, + }, + }); + message.success({ content: 'Owners Removed', duration: 2 }); + emitAnalytics(); + } catch (e: unknown) { + message.destroy(); + if (e instanceof Error) { + message.error({ content: `Failed to remove owners: \n ${e.message || ''}`, duration: 3 }); + } + } finally { + refetch?.(); + onModalClose(); + } + }; + + // Function to handle the modal action's + const onOk = async () => { + if (selectedOwners.length === 0) { + return; + } + const inputs = selectedOwners.map((selectedActor) => { + const input = { + ownerUrn: selectedActor.value.ownerUrn, + ownerEntityType: selectedActor.value.ownerEntityType, + type: selectedOwnerType, + }; + return input; + }); + + if (operationType === OperationType.ADD) { + batchAddOwners(inputs); + } else { + batchRemoveOwners(inputs); + } + }; + function handleBlur() { setInputValue(''); } return ( } diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx index 2bbaf0551c4178..3ee949732eddfc 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx @@ -5,7 +5,7 @@ import { ExpandedOwner } from '../../../../components/styled/ExpandedOwner'; import { EMPTY_MESSAGES } from '../../../../constants'; import { useEntityData, useMutationUrn, useRefetch } from '../../../../EntityContext'; import { SidebarHeader } from '../SidebarHeader'; -import { AddOwnersModal } from './AddOwnersModal'; +import { EditOwnersModal } from './EditOwnersModal'; export const SidebarOwnerSection = ({ properties }: { properties?: any }) => { const { entityType, entityData } = useEntityData(); @@ -38,11 +38,11 @@ export const SidebarOwnerSection = ({ properties }: { properties?: any }) => { {showAddModal && ( - { setShowAddModal(false); diff --git a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx index 1ad28d8b92260f..2d755d29c9c8a8 100644 --- a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx @@ -1,5 +1,7 @@ -import { Button, Form, Input, Typography } from 'antd'; +import { Button, Form, Typography } from 'antd'; import React, { useMemo } from 'react'; +import { Cron } from 'react-js-cron'; +import 'react-js-cron/dist/styles.css'; import styled from 'styled-components'; import cronstrue from 'cronstrue'; import { CheckCircleOutlined } from '@ant-design/icons'; @@ -24,7 +26,6 @@ const SelectTemplateHeader = styled(Typography.Title)` const CronText = styled(Typography.Paragraph)` &&& { - margin-top: 8px; margin-bottom: 0px; } color: ${ANTD_GRAY[7]}; @@ -41,10 +42,21 @@ const ControlsContainer = styled.div` margin-top: 8px; `; +const StyledFormItem = styled(Form.Item)` + .cron-builder { + color: ${ANTD_GRAY[7]}; + } + .cron-builder-select { + min-width: 100px; + } +`; + const ItemDescriptionText = styled(Typography.Paragraph)``; +const DAILY_MIDNIGHT_CRON_INTERVAL = '0 0 * * *'; + export const CreateScheduleStep = ({ state, updateState, goTo, prev }: StepProps) => { - const interval = state.schedule?.interval || ''; + const interval = state.schedule?.interval?.replaceAll(', ', ' ') || DAILY_MIDNIGHT_CRON_INTERVAL; const timezone = state.schedule?.timezone || Intl.DateTimeFormat().resolvedOptions().timeZone; const setTimezone = (tz: string) => { @@ -110,9 +122,14 @@ export const CreateScheduleStep = ({ state, updateState, goTo, prev }: StepProps Configure your ingestion source to run on a schedule.
- Schedule}> - Provide a custom cron schedule. - setCronInterval(e.target.value)} placeholder="* * * * *" /> + Schedule}> + {cronAsText.error && <>Invalid cron schedule. Cron must be of UNIX form:} {!cronAsText.text && ( @@ -127,7 +144,7 @@ export const CreateScheduleStep = ({ state, updateState, goTo, prev }: StepProps )} - + Timezone}> Select the timezone to run the cron schedule in. diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index cbb0315b096dd8..cb924b6e7b67b5 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -83,7 +83,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) Advanced: Provide a custom CLI version to use for ingestion. setVersion(event.target.value)} /> diff --git a/datahub-web-react/src/app/shared/TagStyleEntity.tsx b/datahub-web-react/src/app/shared/TagStyleEntity.tsx index 9cbcd9dd286b12..4ea6be06d263f7 100644 --- a/datahub-web-react/src/app/shared/TagStyleEntity.tsx +++ b/datahub-web-react/src/app/shared/TagStyleEntity.tsx @@ -17,7 +17,7 @@ import { useUpdateDescriptionMutation, useSetTagColorMutation } from '../../grap import { useGetSearchResultsForMultipleQuery } from '../../graphql/search.generated'; import analytics, { EventType, EntityActionType } from '../analytics'; import { GetSearchResultsParams, SearchResultInterface } from '../entity/shared/components/styled/search/types'; -import { AddOwnersModal } from '../entity/shared/containers/profile/sidebar/Ownership/AddOwnersModal'; +import { EditOwnersModal } from '../entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal'; import CopyUrn from './CopyUrn'; import EntityDropdown from '../entity/shared/EntityDropdown'; import { EntityMenuItems } from '../entity/shared/EntityDropdown/EntityDropdown'; @@ -420,14 +420,14 @@ export default function TagStyleEntity({ urn, useGetSearchResults = useWrappedSe
{showAddModal && ( - { setShowAddModal(false); }} - urn={urn} - type={EntityType.Tag} + urns={[urn]} + entityType={EntityType.Tag} /> )}
diff --git a/datahub-web-react/src/graphql/mutations.graphql b/datahub-web-react/src/graphql/mutations.graphql index 9245c0f1ef26f1..cab44542351751 100644 --- a/datahub-web-react/src/graphql/mutations.graphql +++ b/datahub-web-react/src/graphql/mutations.graphql @@ -42,10 +42,18 @@ mutation addOwner($input: AddOwnerInput!) { addOwner(input: $input) } +mutation batchAddOwners($input: BatchAddOwnersInput!) { + batchAddOwners(input: $input) +} + mutation removeOwner($input: RemoveOwnerInput!) { removeOwner(input: $input) } +mutation batchRemoveOwners($input: BatchRemoveOwnersInput!) { + batchRemoveOwners(input: $input) +} + mutation updateDescription($input: DescriptionUpdateInput!) { updateDescription(input: $input) } diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 16c16f9574335d..e81f0284f2b41e 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -13978,6 +13978,11 @@ react-is@^17.0.0, react-is@^17.0.1: resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== +react-js-cron@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/react-js-cron/-/react-js-cron-2.1.0.tgz#ce88a3260617222d8e1dc51534bb6606088304fc" + integrity sha512-mxpSS8WJAY6gRZ+XR8z22u4mRKfHmB4ej2BO3DKrNRZhTGdIdF19F45LZT6GKjQqVyWOYMK1mFafTvgqqoAXlQ== + react-markdown@6.0.2: version "6.0.2" resolved "https://registry.yarnpkg.com/react-markdown/-/react-markdown-6.0.2.tgz#d89be45c278b1e5f0196f851fffb11e30c69f027" diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index 24c8c82f002797..379c815e99159b 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -47,6 +47,8 @@ FROM base as dev-install FROM ${APP_ENV}-install as final +RUN mkdir -p /etc/datahub/plugins/auth/resources + RUN addgroup -S datahub && adduser -S datahub -G datahub USER datahub diff --git a/docker/datahub-gms/env/docker-without-neo4j.env b/docker/datahub-gms/env/docker-without-neo4j.env index 9773ff19557607..6356b33d5eb32d 100644 --- a/docker/datahub-gms/env/docker-without-neo4j.env +++ b/docker/datahub-gms/env/docker-without-neo4j.env @@ -16,7 +16,7 @@ MAE_CONSUMER_ENABLED=true MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41 +UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 # Uncomment to disable persistence of client-side analytics events # DATAHUB_ANALYTICS_ENABLED=false diff --git a/docker/datahub-gms/env/docker.env b/docker/datahub-gms/env/docker.env index cacb387281527d..58003b128acc09 100644 --- a/docker/datahub-gms/env/docker.env +++ b/docker/datahub-gms/env/docker.env @@ -19,7 +19,7 @@ MAE_CONSUMER_ENABLED=true MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41 +UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 # Uncomment to enable Metadata Service Authentication # METADATA_SERVICE_AUTH_ENABLED=true @@ -70,4 +70,4 @@ UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41 # SECRET_SERVICE_ENCRYPTION_KEY= # Uncomment to increase concurrency across Kafka consumers -# KAFKA_LISTENER_CONCURRENCY=2 \ No newline at end of file +# KAFKA_LISTENER_CONCURRENCY=2 diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 298ea8c47d21e3..54617de53b46cb 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -82,7 +82,7 @@ services: - MCE_CONSUMER_ENABLED=true - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true - - UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41 + - UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} ports: diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 6e18d3f5e912b2..a0db2bcc059bff 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -87,7 +87,7 @@ services: - MCE_CONSUMER_ENABLED=true - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true - - UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41 + - UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} ports: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 1ebee1353d5574..c6b250c4d98316 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -4,6 +4,16 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next +### Breaking Changes + +### Potential Downtime + +### Deprecations + +### Other notable Changes + +## `v0.8.42` + ### Breaking Changes - #5451 `GMS_HOST` and `GMS_PORT` environment variables deprecated in `v0.8.39` have been removed. Use `DATAHUB_GMS_HOST` and `DATAHUB_GMS_PORT` instead. - #5478 DataHub CLI `delete` command when used with `--hard` option will delete soft-deleted entities which match the other filters given. diff --git a/metadata-ingestion/archived/source_docs/elastic_search.md b/metadata-ingestion/archived/source_docs/elastic_search.md index 8e30e17dc34714..cbb43ee2837738 100644 --- a/metadata-ingestion/archived/source_docs/elastic_search.md +++ b/metadata-ingestion/archived/source_docs/elastic_search.md @@ -41,6 +41,9 @@ source: index_pattern: allow: [".*some_index_name_pattern*"] deny: [".*skip_index_name_pattern*"] + ingest_index_templates: False + index_template_pattern: + allow: [".*some_index_template_name_pattern*"] sink: # sink configs @@ -51,17 +54,20 @@ sink: Note that a `.` is used to denote nested fields in the YAML recipe. -| Field | Required | Default | Description | -| --------------------------- | -------- |--------------------|---------------------------------------------------------------| -| `host` | ✅ | `"localhost:9092"` | The elastic search host URI. | -| `username` | | None | The username credential. | -| `password` | | None | The password credential. | -| `url_prefix` | | "" | There are cases where an enterprise would have multiple elastic search clusters. One way for them to manage is to have a single endpoint for all the elastic search clusters and use url_prefix for routing requests to different clusters. | -| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | -| `platform_instance` | | None | The Platform instance to use while constructing URNs. | -| `index_pattern.allow` | | | List of regex patterns for indexes to include in ingestion. | -| `index_pattern.deny` | | | List of regex patterns for indexes to exclude from ingestion. | -| `index_pattern.ignoreCase` | | `True` | Whether regex matching should ignore case or not | +| Field | Required | Default | Description | +|--------------------------------| -------- |--------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `host` | ✅ | `"localhost:9092"` | The elastic search host URI. | +| `username` | | None | The username credential. | +| `password` | | None | The password credential. | +| `url_prefix` | | "" | There are cases where an enterprise would have multiple elastic search clusters. One way for them to manage is to have a single endpoint for all the elastic search clusters and use url_prefix for routing requests to different clusters. | +| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | +| `platform_instance` | | None | The Platform instance to use while constructing URNs. | +| `index_pattern.allow` | | | List of regex patterns for indexes to include in ingestion. | +| `index_pattern.deny` | | | List of regex patterns for indexes to exclude from ingestion. | +| `index_pattern.ignoreCase` | | `True` | Whether regex matching should ignore case or not | +| `ingest_index_templates` | | `False` | Whether index templates should be ingested | +| `index_template_pattern.allow` | | | List of regex patterns for index templates to include in ingestion. | +| `index_template_pattern.deny` | | | List of regex patterns for index templates to exclude from ingestion. | ## Compatibility diff --git a/metadata-ingestion/docs/sources/elastic-search/elasticsearch_recipe.yml b/metadata-ingestion/docs/sources/elastic-search/elasticsearch_recipe.yml index 689f1fb1962e60..d93588153fd2ed 100644 --- a/metadata-ingestion/docs/sources/elastic-search/elasticsearch_recipe.yml +++ b/metadata-ingestion/docs/sources/elastic-search/elasticsearch_recipe.yml @@ -23,6 +23,9 @@ source: index_pattern: allow: [".*some_index_name_pattern*"] deny: [".*skip_index_name_pattern*"] + ingest_index_templates: False + index_template_pattern: + allow: [".*some_index_template_name_pattern*"] sink: # sink configs diff --git a/metadata-ingestion/examples/recipes/elasticsearch_to_datahub.dhub.yaml b/metadata-ingestion/examples/recipes/elasticsearch_to_datahub.dhub.yaml index e3b56aa0e6c891..33876dff9efa0a 100644 --- a/metadata-ingestion/examples/recipes/elasticsearch_to_datahub.dhub.yaml +++ b/metadata-ingestion/examples/recipes/elasticsearch_to_datahub.dhub.yaml @@ -11,7 +11,11 @@ source: client_key: "./path/client.key" ssl_assert_hostname: False ssl_assert_fingerprint: "./path/cert.fingerprint" - + ingest_index_templates: False + # index_template_pattern: + # allow: + # - "^.+" + sink: type: "datahub-rest" config: diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index e5758d159bdee5..0a6b1b730aacd6 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -46,6 +46,7 @@ def default(self, obj): # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py # Do not modify manually! +# pylint: skip-file # fmt: off """ autogen_footer = """ diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 89cf4e9ee39c98..f683fdc736d3d5 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -355,7 +355,7 @@ def get_long_description(): "flake8>=3.8.3", "flake8-tidy-imports>=4.3.0", "isort>=5.7.0", - "mypy>=0.920", + "mypy>=0.950", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. "pydantic>=1.9.0", diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index fae132f543eab8..0fbaf1e74e3dd2 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -1,8 +1,9 @@ import re from abc import ABC, abstractmethod +from enum import Enum from typing import IO, Any, Dict, List, Optional, Pattern, cast -from pydantic import BaseModel, Extra +from pydantic import BaseModel, Extra, validator from pydantic.fields import Field @@ -11,6 +12,23 @@ class Config: extra = Extra.forbid +class TransformerSemantics(Enum): + """Describes semantics for aspect changes""" + + OVERWRITE = "OVERWRITE" # Apply changes blindly + PATCH = "PATCH" # Only apply differences from what exists already on the server + + +class TransformerSemanticsConfigModel(ConfigModel): + semantics: TransformerSemantics = TransformerSemantics.OVERWRITE + + @validator("semantics", pre=True) + def ensure_semantics_is_upper_case(cls, v: str) -> str: + if isinstance(v, str): + return v.upper() + return v + + class DynamicTypedConfig(ConfigModel): type: str = Field( description="The type of the dynamic object", diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index f79e792b54f65d..be4116ae00267c 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -15,6 +15,7 @@ from datahub.emitter.serialization_helper import post_json_transform from datahub.metadata.schema_classes import ( DatasetUsageStatisticsClass, + DomainPropertiesClass, DomainsClass, GlobalTagsClass, GlossaryTermsClass, @@ -183,6 +184,13 @@ def get_ownership(self, entity_urn: str) -> Optional[OwnershipClass]: aspect_type=OwnershipClass, ) + def get_domain_properties(self, entity_urn: str) -> Optional[DomainPropertiesClass]: + return self.get_aspect_v2( + entity_urn=entity_urn, + aspect="domainProperties", + aspect_type=DomainPropertiesClass, + ) + def get_tags(self, entity_urn: str) -> Optional[GlobalTagsClass]: return self.get_aspect_v2( entity_urn=entity_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 84878b940f3536..2a93c4ffe69e97 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -124,7 +124,7 @@ class Pipeline: def _record_initialization_failure(self, e: Exception, msg: str) -> None: self.pipeline_init_exception: Optional[Exception] = e self.pipeline_init_failures: Optional[str] = f"{msg} due to {e}" - logger.error(e) + logger.exception(e) def __init__( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py index 74fca0d2654519..938628b72769ee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py +++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py @@ -85,10 +85,12 @@ class ElasticToSchemaFieldConverter: "match_only_text": StringTypeClass, "completion": StringTypeClass, "search_as_you_type": StringTypeClass, + "ip": StringTypeClass, # Records "object": RecordTypeClass, "flattened": RecordTypeClass, "nested": RecordTypeClass, + "geo_point": RecordTypeClass, # Arrays "histogram": ArrayTypeClass, "aggregate_metric_double": ArrayTypeClass, @@ -224,6 +226,13 @@ class ElasticsearchSourceConfig(DatasetSourceConfigBase): default=AllowDenyPattern(allow=[".*"], deny=["^_.*", "^ilm-history.*"]), description="regex patterns for indexes to filter in ingestion.", ) + ingest_index_templates: bool = Field( + default=False, description="Ingests ES index templates if enabled." + ) + index_template_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern(allow=[".*"], deny=["^_.*"]), + description="The regex patterns for filtering index templates to ingest.", + ) @validator("host") def host_colon_port_comma(cls, host_val: str) -> str: @@ -304,7 +313,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: self.report.report_index_scanned(index) if self.source_config.index_pattern.allowed(index): - for mcp in self._extract_mcps(index): + for mcp in self._extract_mcps(index, is_index=True): wu = MetadataWorkUnit(id=f"index-{index}", mcp=mcp) self.report.report_workunit(wu) yield wu @@ -315,6 +324,14 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: wu = MetadataWorkUnit(id=f"index-{index}", mcp=mcp) self.report.report_workunit(wu) yield wu + if self.source_config.ingest_index_templates: + templates = self.client.indices.get_template() + for template in templates: + if self.source_config.index_template_pattern.allowed(template): + for mcp in self._extract_mcps(template, is_index=False): + wu = MetadataWorkUnit(id=f"template-{template}", mcp=mcp) + self.report.report_workunit(wu) + yield wu def _get_data_stream_index_count_mcps( self, @@ -336,19 +353,26 @@ def _get_data_stream_index_count_mcps( changeType=ChangeTypeClass.UPSERT, ) - def _extract_mcps(self, index: str) -> Iterable[MetadataChangeProposalWrapper]: - logger.debug(f"index = {index}") - raw_index = self.client.indices.get(index=index) - raw_index_metadata = raw_index[index] - - # 0. Dedup data_streams. - data_stream = raw_index_metadata.get("data_stream") - if data_stream: - index = data_stream - self.data_stream_partition_count[index] += 1 - if self.data_stream_partition_count[index] > 1: - # This is a duplicate, skip processing it further. - return + def _extract_mcps( + self, index: str, is_index: bool = True + ) -> Iterable[MetadataChangeProposalWrapper]: + logger.debug(f"index='{index}', is_index={is_index}") + + if is_index: + raw_index = self.client.indices.get(index=index) + raw_index_metadata = raw_index[index] + + # 0. Dedup data_streams. + data_stream = raw_index_metadata.get("data_stream") + if data_stream: + index = data_stream + self.data_stream_partition_count[index] += 1 + if self.data_stream_partition_count[index] > 1: + # This is a duplicate, skip processing it further. + return + else: + raw_index = self.client.indices.get_template(name=index) + raw_index_metadata = raw_index[index] # 1. Construct and emit the schemaMetadata aspect # 1.1 Generate the schema fields from ES mappings. @@ -401,23 +425,47 @@ def _extract_mcps(self, index: str) -> Iterable[MetadataChangeProposalWrapper]: entityUrn=dataset_urn, aspectName="subTypes", aspect=SubTypesClass( - typeNames=["Index" if not data_stream else "DataStream"] + typeNames=[ + "Index Template" + if not is_index + else "Index" + if not data_stream + else "Datastream" + ] ), changeType=ChangeTypeClass.UPSERT, ) - # 4. Construct and emit properties if needed - index_aliases = raw_index_metadata.get("aliases", {}).keys() + # 4. Construct and emit properties if needed. Will attempt to get the following properties + custom_properties: Dict[str, str] = {} + # 4.1 aliases + index_aliases: List[str] = raw_index_metadata.get("aliases", {}).keys() if index_aliases: - yield MetadataChangeProposalWrapper( - entityType="dataset", - entityUrn=dataset_urn, - aspectName="datasetProperties", - aspect=DatasetPropertiesClass( - customProperties={"aliases": ",".join(index_aliases)} - ), - changeType=ChangeTypeClass.UPSERT, - ) + custom_properties["aliases"] = ",".join(index_aliases) + # 4.2 index_patterns + index_patterns: List[str] = raw_index_metadata.get("index_patterns", []) + if index_patterns: + custom_properties["index_patterns"] = ",".join(index_patterns) + + # 4.3 number_of_shards + index_settings: Dict[str, Any] = raw_index_metadata.get("settings", {}).get( + "index", {} + ) + num_shards: str = index_settings.get("number_of_shards", "") + if num_shards: + custom_properties["num_shards"] = num_shards + # 4.4 number_of_replicas + num_replicas: str = index_settings.get("number_of_replicas", "") + if num_replicas: + custom_properties["num_replicas"] = num_replicas + + yield MetadataChangeProposalWrapper( + entityType="dataset", + entityUrn=dataset_urn, + aspectName="datasetProperties", + aspect=DatasetPropertiesClass(customProperties=custom_properties), + changeType=ChangeTypeClass.UPSERT, + ) # 5. Construct and emit platform instance aspect if self.source_config.platform_instance: diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index c2dd7c0f15d59b..e3e081d65bcf9c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -226,10 +226,9 @@ def _is_single_row_query_method(query: Any) -> bool: return False -# mypy does not yet support ParamSpec. See https://github.com/python/mypy/issues/8645. def _run_with_query_combiner( - method: Callable[Concatenate["_SingleDatasetProfiler", P], None] # type: ignore -) -> Callable[Concatenate["_SingleDatasetProfiler", P], None]: # type: ignore + method: Callable[Concatenate["_SingleDatasetProfiler", P], None] +) -> Callable[Concatenate["_SingleDatasetProfiler", P], None]: @functools.wraps(method) def inner( self: "_SingleDatasetProfiler", *args: P.args, **kwargs: P.kwargs diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 7cd99608b95af1..29dc2d88c7f5af 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -773,7 +773,7 @@ def get_identifier( self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any ) -> str: # Many SQLAlchemy dialects have three-level hierarchies. This method, which - # subclasses can override, enables them to modify the identifers as needed. + # subclasses can override, enables them to modify the identifiers as needed. if hasattr(self.config, "get_identifier"): # This path is deprecated and will eventually be removed. return self.config.get_identifier(schema=schema, table=entity) # type: ignore @@ -1209,7 +1209,7 @@ def _process_view( canonical_schema=schema_fields, ) try: - # SQLALchemy stubs are incomplete and missing this method. + # SQLAlchemy stubs are incomplete and missing this method. # PR: https://github.com/dropbox/sqlalchemy-stubs/pull/223. view_info: dict = inspector.get_table_comment(view, schema) # type: ignore except NotImplementedError: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index aedd081f98cadf..da5c5aa8d03f47 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -228,9 +228,10 @@ def resolve_postgres_modified_type(type_string: str) -> Any: def resolve_trino_modified_type(type_string: str) -> Any: - # for cases like timestamp(3) - if re.match(r"[a-zA-Z]+\([0-9]+\)", type_string): - modified_type_base = re.match(r"([a-zA-Z]+)\([0-9]+\)", type_string).group(1) # type: ignore + # for cases like timestamp(3), decimal(10,0), row(...) + match = re.match(r"([a-zA-Z]+)\(.+\)", type_string) + if match: + modified_type_base: str = match.group(1) return TRINO_SQL_TYPES_MAP[modified_type_base] else: return TRINO_SQL_TYPES_MAP[type_string] @@ -337,4 +338,5 @@ def resolve_trino_modified_type(type_string: str) -> Any: "date": DateType, "time": TimeType, "timestamp": TimeType, + "row": RecordType, } diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py index 82cfecbddfed39..a157d0d955ba97 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py @@ -17,6 +17,7 @@ DatasetPropertiesClass, DatasetSnapshotClass, DatasetUpstreamLineageClass, + DomainsClass, EditableDatasetPropertiesClass, EditableSchemaMetadataClass, GlobalTagsClass, @@ -41,6 +42,7 @@ class SnapshotAspectRegistry: def __init__(self): self.aspect_name_type_mapping = { "ownership": OwnershipClass, + "domains": DomainsClass, "globalTags": GlobalTagsClass, "datasetProperties": DatasetPropertiesClass, "editableDatasetProperties": EditableDatasetPropertiesClass, diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain.py new file mode 100644 index 00000000000000..5ed7d7d7616b3d --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain.py @@ -0,0 +1,169 @@ +from typing import Callable, List, Optional, Union, cast + +from datahub.configuration.common import ( + ConfigurationError, + KeyValuePattern, + TransformerSemantics, + TransformerSemanticsConfigModel, +) +from datahub.configuration.import_resolver import pydantic_resolve_key +from datahub.emitter.mce_builder import Aspect +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.transformer.dataset_transformer import DatasetDomainTransformer +from datahub.metadata.schema_classes import DomainsClass +from datahub.utilities.registries.domain_registry import DomainRegistry + + +class AddDatasetDomainSemanticsConfig(TransformerSemanticsConfigModel): + get_domains_to_add: Union[ + Callable[[str], DomainsClass], + Callable[[str], DomainsClass], + ] + + _resolve_domain_fn = pydantic_resolve_key("get_domains_to_add") + + +class SimpleDatasetDomainSemanticsConfig(TransformerSemanticsConfigModel): + domain_urns: List[str] + + +class PatternDatasetDomainSemanticsConfig(TransformerSemanticsConfigModel): + domain_pattern: KeyValuePattern = KeyValuePattern.all() + + +class AddDatasetDomain(DatasetDomainTransformer): + """Transformer that adds domains to datasets according to a callback function.""" + + ctx: PipelineContext + config: AddDatasetDomainSemanticsConfig + + def __init__(self, config: AddDatasetDomainSemanticsConfig, ctx: PipelineContext): + super().__init__() + self.ctx = ctx + self.config = config + + @classmethod + def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDomain": + config = AddDatasetDomainSemanticsConfig.parse_obj(config_dict) + return cls(config, ctx) + + @staticmethod + def get_domain_class( + graph: Optional[DataHubGraph], domains: List[str] + ) -> DomainsClass: + domain_registry: DomainRegistry = DomainRegistry( + cached_domains=[k for k in domains], graph=graph + ) + domain_class = DomainsClass( + domains=[domain_registry.get_domain_urn(domain) for domain in domains] + ) + return domain_class + + @staticmethod + def get_domains_to_set( + graph: DataHubGraph, urn: str, mce_domain: Optional[DomainsClass] + ) -> Optional[DomainsClass]: + if not mce_domain or not mce_domain.domains: + # nothing to add, no need to consult server + return None + + server_domain = graph.get_domain(entity_urn=urn) + if server_domain: + # compute patch + # we only include domain who are not present in the server domain list + domains_to_add: List[str] = [] + for domain in mce_domain.domains: + if domain not in server_domain.domains: + domains_to_add.append(domain) + + mce_domain.domains.extend(server_domain.domains) + mce_domain.domains.extend(domains_to_add) + + return mce_domain + + def transform_aspect( + self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] + ) -> Optional[Aspect]: + + domain_aspect: DomainsClass = DomainsClass(domains=[]) + # Check if we have received existing aspect + if aspect is not None: + domain_aspect.domains.extend(cast(DomainsClass, aspect).domains) + + domain_to_add = self.config.get_domains_to_add(entity_urn) + + domain_aspect.domains.extend(domain_to_add.domains) + + if self.config.semantics == TransformerSemantics.PATCH: + assert self.ctx.graph + patch_domain_aspect: Optional[ + DomainsClass + ] = AddDatasetDomain.get_domains_to_set( + self.ctx.graph, entity_urn, domain_aspect + ) + # This will pass the mypy lint + domain_aspect = ( + patch_domain_aspect + if patch_domain_aspect is not None + else domain_aspect + ) + + return cast(Optional[Aspect], domain_aspect) + + +class SimpleAddDatasetDomain(AddDatasetDomain): + """Transformer that adds a specified set of domains to each dataset.""" + + def __init__( + self, config: SimpleDatasetDomainSemanticsConfig, ctx: PipelineContext + ): + if ctx.graph is None: + raise ConfigurationError( + "AddDatasetDomain requires a datahub_api to connect to. Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe" + ) + + domains = AddDatasetDomain.get_domain_class(ctx.graph, config.domain_urns) + generic_config = AddDatasetDomainSemanticsConfig( + get_domains_to_add=lambda _: domains, + semantics=config.semantics, + ) + super().__init__(generic_config, ctx) + + @classmethod + def create( + cls, config_dict: dict, ctx: PipelineContext + ) -> "SimpleAddDatasetDomain": + config = SimpleDatasetDomainSemanticsConfig.parse_obj(config_dict) + return cls(config, ctx) + + +class PatternAddDatasetDomain(AddDatasetDomain): + """Transformer that adds a specified set of domains to each dataset.""" + + def __init__( + self, config: PatternDatasetDomainSemanticsConfig, ctx: PipelineContext + ): + if ctx.graph is None: + raise ConfigurationError( + "AddDatasetDomain requires a datahub_api to connect to. Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe" + ) + + domain_pattern = config.domain_pattern + + def resolve_domain(domain_urn: str) -> DomainsClass: + domains = domain_pattern.value(domain_urn) + return self.get_domain_class(ctx.graph, domains) + + generic_config = AddDatasetDomainSemanticsConfig( + get_domains_to_add=resolve_domain, + semantics=config.semantics, + ) + super().__init__(generic_config, ctx) + + @classmethod + def create( + cls, config_dict: dict, ctx: PipelineContext + ) -> "PatternAddDatasetDomain": + config = PatternDatasetDomainSemanticsConfig.parse_obj(config_dict) + return cls(config, ctx) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py index e7c2806b8cee0a..c2e6ddf141c5bf 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py @@ -1,5 +1,5 @@ import logging -from abc import abstractmethod +from abc import ABCMeta, abstractmethod from typing import List, Optional from deprecated import deprecated @@ -59,11 +59,27 @@ def transform_aspect( # not marked as @abstractmethod to avoid impacting transf ) +# TODO: rename DatasetTransformerV2 to DatasetTransformer after upgrading all existing dataset transformer +class DatasetTransformerV2(BaseTransformer, SingleAspectTransformer, metaclass=ABCMeta): + """Transformer that does transforms sequentially on each dataset.""" + + def __init__(self): + super().__init__() + + def entity_types(self) -> List[str]: + return ["dataset"] + + class DatasetOwnershipTransformer(DatasetTransformer, SingleAspectTransformer): def aspect_name(self) -> str: return "ownership" +class DatasetDomainTransformer(DatasetTransformerV2, SingleAspectTransformer): + def aspect_name(self) -> str: + return "domains" + + class DatasetStatusTransformer(DatasetTransformer, SingleAspectTransformer): def aspect_name(self) -> str: return "status" diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py b/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py index ed806f8e2ff860..12662f55e03997 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py @@ -1,5 +1,6 @@ from datahub.ingestion.api.registry import PluginRegistry from datahub.ingestion.api.transform import Transformer +from datahub.ingestion.transformer import dataset_domain from datahub.ingestion.transformer.add_dataset_browse_path import ( AddDatasetBrowsePathTransformer, ) @@ -45,6 +46,15 @@ transform_registry.register("simple_add_dataset_ownership", SimpleAddDatasetOwnership) transform_registry.register("pattern_add_dataset_ownership", PatternAddDatasetOwnership) +transform_registry.register("add_dataset_domain", dataset_domain.AddDatasetDomain) +transform_registry.register( + "simple_add_dataset_domain", dataset_domain.SimpleAddDatasetDomain +) +transform_registry.register( + "pattern_add_dataset_domain", dataset_domain.PatternAddDatasetDomain +) + + transform_registry.register("add_dataset_tags", AddDatasetTags) transform_registry.register("simple_add_dataset_tags", SimpleAddDatasetTags) transform_registry.register("pattern_add_dataset_tags", PatternAddDatasetTags) diff --git a/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py b/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py index 4e719c939b6f2b..061cfaf2e3b7df 100644 --- a/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py +++ b/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py @@ -30,11 +30,7 @@ def __init__( assert graph # first try to check if this domain exists by urn maybe_domain_urn = f"urn:li:domain:{domain_identifier}" - from datahub.metadata.schema_classes import DomainPropertiesClass - - maybe_domain_properties = graph.get_aspect_v2( - maybe_domain_urn, DomainPropertiesClass, "domainProperties" - ) + maybe_domain_properties = graph.get_domain_properties(maybe_domain_urn) if maybe_domain_properties: self.domain_registry[domain_identifier] = maybe_domain_urn else: diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 05a6f80c2dddde..12186d014adaff 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -644,8 +644,25 @@ def test_dbt_stateful_tests(pytestconfig, tmp_path, mock_time, mock_datahub_grap @pytest.mark.parametrize( "data_type, expected_data_type", [ - ("timestamp(3)", "timestamp"), + ("boolean", "boolean"), + ("tinyint", "tinyint"), + ("smallint", "smallint"), + ("int", "int"), + ("integer", "integer"), + ("bigint", "bigint"), + ("real", "real"), + ("double", "double"), + ("decimal(10,0)", "decimal"), ("varchar(20)", "varchar"), + ("char", "char"), + ("varbinary", "varbinary"), + ("json", "json"), + ("date", "date"), + ("time", "time"), + ("time(12)", "time"), + ("timestamp", "timestamp"), + ("timestamp(3)", "timestamp"), + ("row(x bigint, y double)", "row"), ], ) def test_resolve_trino_modified_type(data_type, expected_data_type): diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index 32f0bdb36ea924..6ccd9d7511723b 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -44,7 +44,7 @@ authorization: ingestion: enabled: ${UI_INGESTION_ENABLED:true} - defaultCliVersion: '${UI_INGESTION_DEFAULT_CLI_VERSION:0.8.41}' + defaultCliVersion: '${UI_INGESTION_DEFAULT_CLI_VERSION:0.8.42}' telemetry: enabledCli: ${CLI_TELEMETRY_ENABLED:true} diff --git a/smoke-test/tests/cypress/cypress.json b/smoke-test/tests/cypress/cypress.json index 2c3a885a2c5df9..9564b0077bd277 100644 --- a/smoke-test/tests/cypress/cypress.json +++ b/smoke-test/tests/cypress/cypress.json @@ -4,6 +4,7 @@ "viewportHeight": 960, "viewportWidth": 1536, "projectId": "hkrxk5", + "defaultCommandTimeout": 10000, "retries": { "runMode": 2, "openMode": 0 diff --git a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js index c696ef421669ec..bcdea9f64b0182 100644 --- a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js +++ b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js @@ -1,6 +1,7 @@ describe('mutations', () => { before(() => { // warm up elastic by issuing a `*` search + cy.login(); cy.visit('http://localhost:9002/search?query=%2A'); cy.wait(5000); }); @@ -118,7 +119,7 @@ describe('mutations', () => { // verify dataset shows up in search now cy.contains('of 1 result').click(); cy.contains('cypress_logging_events').click(); - cy.contains('CypressTestAddTag2').within(() => cy.get('span[aria-label=close]').click()); + cy.contains('CypressTestAddTag2').within(() => cy.get('span[aria-label=close]').trigger('mouseover', {force: true}).click()); cy.contains('Yes').click(); cy.contains('CypressTestAddTag2').should('not.exist'); diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 2d0ddb3cd14d11..a41c5f7659399f 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -24,10 +24,12 @@ def test_run_cypress(frontend_session, wait_for_healthchecks): record_key = os.getenv("CYPRESS_RECORD_KEY") if record_key: print('Running Cypress tests with recording') - command = f"NO_COLOR=1 npx cypress run --record" + command = "NO_COLOR=1 npx cypress run --record" else: print('Running Cypress tests without recording') - command = f"NO_COLOR=1 npx cypress run" + command = "NO_COLOR=1 npx cypress run" + # Add --headed --spec '**/mutations/mutations.js' (change spec name) + # in case you want to see the browser for debugging proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd="tests/cypress") stdout = proc.stdout.read() stderr = proc.stderr.read()