diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsClientFactory.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsClientFactory.java index 29ffdd606cfdc..dd790d228d4e8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsClientFactory.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsClientFactory.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql; import com.linkedin.dataplatform.client.DataPlatforms; +import com.linkedin.entity.client.AspectClient; import com.linkedin.entity.client.EntityClient; import com.linkedin.lineage.client.Lineages; import com.linkedin.lineage.client.Relationships; @@ -34,6 +35,7 @@ public class GmsClientFactory { private static Lineages _lineages; private static Relationships _relationships; private static EntityClient _entities; + private static AspectClient _aspects; private GmsClientFactory() { } @@ -81,4 +83,15 @@ public static EntityClient getEntitiesClient() { } return _entities; } + + public static AspectClient getAspectsClient() { + if (_aspects == null) { + synchronized (GmsClientFactory.class) { + if (_aspects == null) { + _aspects = new AspectClient(REST_CLIENT); + } + } + } + return _aspects; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index a0bb7a1492d12..d7cfcec78f17c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql; import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.graphql.generated.Aspect; import com.linkedin.datahub.graphql.generated.Chart; import com.linkedin.datahub.graphql.generated.ChartInfo; import com.linkedin.datahub.graphql.generated.DashboardInfo; @@ -12,15 +13,18 @@ import com.linkedin.datahub.graphql.generated.RelatedDataset; import com.linkedin.datahub.graphql.generated.SearchResult; import com.linkedin.datahub.graphql.generated.InstitutionalMemoryMetadata; +import com.linkedin.datahub.graphql.resolvers.load.AspectResolver; import com.linkedin.datahub.graphql.resolvers.load.EntityTypeResolver; import com.linkedin.datahub.graphql.resolvers.load.LoadableTypeBatchResolver; import com.linkedin.datahub.graphql.resolvers.mutate.MutableTypeResolver; +import com.linkedin.datahub.graphql.resolvers.type.AspectInterfaceTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.HyperParameterValueTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.ResultsTypeResolver; import com.linkedin.datahub.graphql.types.BrowsableEntityType; import com.linkedin.datahub.graphql.types.EntityType; import com.linkedin.datahub.graphql.types.LoadableType; import com.linkedin.datahub.graphql.types.SearchableEntityType; +import com.linkedin.datahub.graphql.types.aspect.AspectType; import com.linkedin.datahub.graphql.types.chart.ChartType; import com.linkedin.datahub.graphql.types.corpuser.CorpUserType; import com.linkedin.datahub.graphql.types.corpgroup.CorpGroupType; @@ -50,6 +54,7 @@ import com.linkedin.datahub.graphql.types.lineage.DataFlowDataJobsRelationshipsType; import com.linkedin.datahub.graphql.types.glossary.GlossaryTermType; +import graphql.execution.DataFetcherResult; import graphql.schema.idl.RuntimeWiring; import org.apache.commons.io.IOUtils; import org.dataloader.BatchLoaderContextProvider; @@ -97,6 +102,7 @@ public class GmsGraphQLEngine { GmsClientFactory.getRelationshipsClient() ); public static final GlossaryTermType GLOSSARY_TERM_TYPE = new GlossaryTermType(GmsClientFactory.getEntitiesClient()); + public static final AspectType ASPECT_TYPE = new AspectType(GmsClientFactory.getAspectsClient()); /** * Configures the graph objects that can be fetched primary key. @@ -196,6 +202,7 @@ public static GraphQLEngine.Builder builder() { return GraphQLEngine.builder() .addSchema(schema()) .addDataLoaders(loaderSuppliers(LOADABLE_TYPES)) + .addDataLoader("Aspect", (context) -> createAspectLoader(context)) .configureRuntimeWiring(GmsGraphQLEngine::configureRuntimeWiring); } @@ -293,6 +300,9 @@ private static void configureDatasetResolvers(final RuntimeWiring.Builder builde UPSTREAM_LINEAGE_TYPE, (env) -> ((Entity) env.getSource()).getUrn())) ) + .dataFetcher("schemaMetadata", new AuthenticatedResolver<>( + new AspectResolver()) + ) ) .type("Owner", typeWiring -> typeWiring .dataFetcher("owner", new AuthenticatedResolver<>( @@ -459,6 +469,7 @@ private static void configureTypeResolvers(final RuntimeWiring.Builder builder) .type("HyperParameterValueType", typeWiring -> typeWiring .typeResolver(new HyperParameterValueTypeResolver()) ) + .type("Aspect", typeWiring -> typeWiring.typeResolver(new AspectInterfaceTypeResolver())) .type("ResultsType", typeWiring -> typeWiring .typeResolver(new ResultsTypeResolver())); } @@ -519,7 +530,7 @@ private static void configureDataJobResolvers(final RuntimeWiring.Builder builde } - private static DataLoader createDataLoader(final LoadableType graphType, final QueryContext queryContext) { + private static DataLoader> createDataLoader(final LoadableType graphType, final QueryContext queryContext) { BatchLoaderContextProvider contextProvider = () -> queryContext; DataLoaderOptions loaderOptions = DataLoaderOptions.newOptions().setBatchLoaderContextProvider(contextProvider); return DataLoader.newDataLoader((keys, context) -> CompletableFuture.supplyAsync(() -> { @@ -531,6 +542,18 @@ private static DataLoader createDataLoader(final LoadableType }), loaderOptions); } + private static DataLoader> createAspectLoader(final QueryContext queryContext) { + BatchLoaderContextProvider contextProvider = () -> queryContext; + DataLoaderOptions loaderOptions = DataLoaderOptions.newOptions().setBatchLoaderContextProvider(contextProvider); + return DataLoader.newDataLoader((keys, context) -> CompletableFuture.supplyAsync(() -> { + try { + return ASPECT_TYPE.batchLoad(keys, context.getContext()); + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to retrieve entities of type Aspect", e)); + } + }), loaderOptions); + } + private GmsGraphQLEngine() { } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/VersionedAspectKey.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/VersionedAspectKey.java new file mode 100644 index 0000000000000..b0c0436ffd891 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/VersionedAspectKey.java @@ -0,0 +1,16 @@ +package com.linkedin.datahub.graphql; + +import lombok.Data; + +@Data +public class VersionedAspectKey { + private String aspectName; + private String urn; + private Long version; + + public VersionedAspectKey(String urn, String aspectName, Long version) { + this.urn = urn; + this.version = version; + this.aspectName = aspectName; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index fe444103a6b39..61b1e6a893bdd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -1,9 +1,14 @@ package com.linkedin.datahub.graphql.resolvers; import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.data.DataMap; +import com.linkedin.data.element.DataElement; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.metadata.aspect.VersionedAspect; +import graphql.schema.DataFetchingEnvironment; +import java.lang.reflect.InvocationTargetException; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.Collections; @@ -11,12 +16,17 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.commons.lang.reflect.ConstructorUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class ResolverUtils { private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final Logger _logger = LoggerFactory.getLogger(ResolverUtils.class.getName()); + private ResolverUtils() { } @Nonnull @@ -55,4 +65,67 @@ public static Map buildFacetFilters(@Nullable List) localContext).getOrDefault(fieldName, null); + + if (prefetchedAspect != null) { + try { + Object constructedAspect = constructAspectFromDataElement(prefetchedAspect); + + VersionedAspect resultWithMetadata = new VersionedAspect(); + + resultWithMetadata.setAspect(constructAspectUnionInstanceFromAspect(constructedAspect)); + + resultWithMetadata.setVersion(0); + + return resultWithMetadata; + } catch (IllegalAccessException | InstantiationException | InvocationTargetException | ClassNotFoundException | NoSuchMethodException e) { + _logger.error( + "Error fetch aspect from local context. field: {} version: {}. Error: {}", + fieldName, + version, + e.toString() + ); + e.printStackTrace(); + } + } + } + } + return null; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/AspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/AspectResolver.java new file mode 100644 index 0000000000000..62e8078e1e46d --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/AspectResolver.java @@ -0,0 +1,45 @@ +package com.linkedin.datahub.graphql.resolvers.load; + +import com.linkedin.datahub.graphql.VersionedAspectKey; +import com.linkedin.datahub.graphql.generated.Aspect; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.datahub.graphql.types.aspect.AspectMapper; +import com.linkedin.metadata.aspect.VersionedAspect; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletableFuture; +import org.dataloader.DataLoader; + + +/** + * Generic GraphQL resolver responsible for + * + * 1. Generating a single input AspectLoadKey. + * 2. Resolving a single {@link Aspect}. + * + */ +public class AspectResolver implements DataFetcher> { + + public AspectResolver() { + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + final DataLoader loader = environment.getDataLoaderRegistry().getDataLoader("Aspect"); + + String fieldName = environment.getField().getName(); + Long version = environment.getArgument("version"); + String urn = ((Entity) environment.getSource()).getUrn(); + + // first, we try fetching the aspect from the local cache + // we need to convert it into a VersionedAspect so we can make use of existing mappers + VersionedAspect aspectFromContext = ResolverUtils.getAspectFromLocalContext(environment); + if (aspectFromContext != null) { + return CompletableFuture.completedFuture(AspectMapper.map(aspectFromContext)); + } + + // if the aspect is not in the cache, we need to fetch it from GMS Aspect Resource + return loader.load(new VersionedAspectKey(urn, fieldName, version)); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/AspectInterfaceTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/AspectInterfaceTypeResolver.java new file mode 100644 index 0000000000000..45998bdae45b0 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/AspectInterfaceTypeResolver.java @@ -0,0 +1,20 @@ +package com.linkedin.datahub.graphql.resolvers.type; + +import graphql.TypeResolutionEnvironment; +import graphql.schema.GraphQLObjectType; +import graphql.schema.TypeResolver; + +/** + * Responsible for resolving the {@link com.linkedin.datahub.graphql.generated.Aspect} interface type. + */ +public class AspectInterfaceTypeResolver implements TypeResolver { + + public AspectInterfaceTypeResolver() { } + @Override + public GraphQLObjectType getType(TypeResolutionEnvironment env) { + // TODO(Gabe): Fill this out. This method is not called today. We will need to fill this + // out in the case we ever want to return fields of type Aspect in graphql. Right now + // we just use Aspect to define the shared `version` field. + return null; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/LoadableType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/LoadableType.java index c9d9315668389..6ee0faedf6642 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/LoadableType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/LoadableType.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import java.util.List; @@ -31,7 +32,7 @@ default String name() { * @param urn to retrieve * @param context the {@link QueryContext} corresponding to the request. */ - default T load(@Nonnull final String urn, @Nonnull final QueryContext context) throws Exception { + default DataFetcherResult load(@Nonnull final String urn, @Nonnull final QueryContext context) throws Exception { return batchLoad(ImmutableList.of(urn), context).get(0); }; @@ -42,6 +43,6 @@ default T load(@Nonnull final String urn, @Nonnull final QueryContext context) t * @param urns to retrieve * @param context the {@link QueryContext} corresponding to the request. */ - List batchLoad(@Nonnull final List urns, @Nonnull final QueryContext context) throws Exception; + List> batchLoad(@Nonnull final List urns, @Nonnull final QueryContext context) throws Exception; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java new file mode 100644 index 0000000000000..2accd6b846555 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java @@ -0,0 +1,25 @@ +package com.linkedin.datahub.graphql.types.aspect; + +import com.linkedin.datahub.graphql.generated.Aspect; +import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaMetadataMapper; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.metadata.aspect.VersionedAspect; +import javax.annotation.Nonnull; + + +public class AspectMapper implements ModelMapper { + + public static final AspectMapper INSTANCE = new AspectMapper(); + + public static Aspect map(@Nonnull final VersionedAspect restliAspect) { + return INSTANCE.apply(restliAspect); + } + + @Override + public Aspect apply(@Nonnull final VersionedAspect restliAspect) { + if (restliAspect.getAspect().isSchemaMetadata()) { + return SchemaMetadataMapper.map(restliAspect); + } + return null; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java new file mode 100644 index 0000000000000..21d9bed26a7bb --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java @@ -0,0 +1,41 @@ +package com.linkedin.datahub.graphql.types.aspect; + +import com.linkedin.datahub.graphql.VersionedAspectKey; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Aspect; +import com.linkedin.entity.client.AspectClient; +import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; +import java.util.List; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + + +public class AspectType { + private final AspectClient _aspectClient; + + public AspectType(final AspectClient aspectClient) { + _aspectClient = aspectClient; + } + /** + * Retrieves an list of aspects given a list of {@link VersionedAspectKey} structs. The list returned is expected to + * be of same length of the list of keys, where nulls are provided in place of an aspect object if an entity cannot be found. + * @param keys to retrieve + * @param context the {@link QueryContext} corresponding to the request. + */ + public List> batchLoad(@Nonnull List keys, @Nonnull QueryContext context) throws Exception { + try { + return keys.stream().map(key -> { + try { + VersionedAspect entity = _aspectClient.getAspect(key.getUrn(), key.getAspectName(), key.getVersion()); + return DataFetcherResult.newResult().data(AspectMapper.map(entity)).build(); + } catch (RemoteInvocationException e) { + throw new RuntimeException(String.format("Failed to load Aspect for entity %s", key.getUrn()), e); + } + }).collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Aspects", e); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java index ec154623be967..16e9fdc10f2dd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java @@ -25,6 +25,7 @@ import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.configs.ChartSearchConfig; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.BrowseResult; import com.linkedin.metadata.query.SearchResult; @@ -32,6 +33,7 @@ import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.net.URISyntaxException; @@ -68,7 +70,7 @@ public Class objectClass() { } @Override - public List batchLoad(@Nonnull List urns, @Nonnull QueryContext context) throws Exception { + public List> batchLoad(@Nonnull List urns, @Nonnull QueryContext context) throws Exception { final List chartUrns = urns.stream() .map(this::getChartUrn) .collect(Collectors.toList()); @@ -84,7 +86,11 @@ public List batchLoad(@Nonnull List urns, @Nonnull QueryContext c gmsResults.add(chartMap.getOrDefault(urn, null)); } return gmsResults.stream() - .map(gmsChart -> gmsChart == null ? null : ChartSnapshotMapper.map(gmsChart.getValue().getChartSnapshot())) + .map(gmsChart -> gmsChart == null ? null + : DataFetcherResult.newResult() + .data(ChartSnapshotMapper.map(gmsChart.getValue().getChartSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsChart.getValue().getChartSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load Charts", e); @@ -129,7 +135,8 @@ public BrowseResults browse(@Nonnull List path, start, count); final List urns = result.getEntities().stream().map(entity -> entity.getUrn().toString()).collect(Collectors.toList()); - final List charts = batchLoad(urns, context); + final List charts = batchLoad(urns, context).stream().map(chartResult -> chartResult.getData()).collect( + Collectors.toList()); final BrowseResults browseResults = new BrowseResults(); browseResults.setStart(result.getFrom()); browseResults.setCount(result.getPageSize()); @@ -168,6 +175,6 @@ public Chart update(@Nonnull ChartUpdateInput input, @Nonnull QueryContext conte throw new RuntimeException(String.format("Failed to write entity with urn %s", input.getUrn()), e); } - return load(input.getUrn(), context); + return load(input.getUrn(), context).getData(); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java index 4fbae386b0105..79e88be9ccc65 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java @@ -17,6 +17,7 @@ import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.SearchResult; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.net.URISyntaxException; @@ -46,7 +47,7 @@ public EntityType type() { } @Override - public List batchLoad(final List urns, final QueryContext context) { + public List> batchLoad(final List urns, final QueryContext context) { try { final List corpGroupUrns = urns .stream() @@ -61,7 +62,8 @@ public List batchLoad(final List urns, final QueryContext con results.add(corpGroupMap.getOrDefault(urn, null)); } return results.stream() - .map(gmsCorpGroup -> gmsCorpGroup == null ? null : CorpGroupSnapshotMapper.map(gmsCorpGroup.getValue().getCorpGroupSnapshot())) + .map(gmsCorpGroup -> gmsCorpGroup == null ? null + : DataFetcherResult.newResult().data(CorpGroupSnapshotMapper.map(gmsCorpGroup.getValue().getCorpGroupSnapshot())).build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load CorpGroup", e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java index 567769360013a..c8032760d6000 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.SearchResult; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.net.URISyntaxException; @@ -47,7 +48,7 @@ public EntityType type() { } @Override - public List batchLoad(final List urns, final QueryContext context) { + public List> batchLoad(final List urns, final QueryContext context) { try { final List corpUserUrns = urns .stream() @@ -62,7 +63,8 @@ public List batchLoad(final List urns, final QueryContext cont results.add(corpUserMap.getOrDefault(urn, null)); } return results.stream() - .map(gmsCorpUser -> gmsCorpUser == null ? null : CorpUserSnapshotMapper.map(gmsCorpUser.getValue().getCorpUserSnapshot())) + .map(gmsCorpUser -> gmsCorpUser == null ? null + : DataFetcherResult.newResult().data(CorpUserSnapshotMapper.map(gmsCorpUser.getValue().getCorpUserSnapshot())).build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load Datasets", e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java index c69cfdfd3a2ef..4d81eeba36776 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java @@ -27,6 +27,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.Entity; import com.linkedin.metadata.configs.DashboardSearchConfig; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.BrowseResult; import com.linkedin.metadata.query.SearchResult; @@ -34,6 +35,7 @@ import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.net.URISyntaxException; @@ -70,7 +72,7 @@ public Class objectClass() { } @Override - public List batchLoad(@Nonnull List urns, @Nonnull QueryContext context) throws Exception { + public List> batchLoad(@Nonnull List urns, @Nonnull QueryContext context) throws Exception { final List dashboardUrns = urns.stream() .map(this::getDashboardUrn) .collect(Collectors.toList()); @@ -86,8 +88,11 @@ public List batchLoad(@Nonnull List urns, @Nonnull QueryConte gmsResults.add(dashboardMap.getOrDefault(urn, null)); } return gmsResults.stream() - .map(gmsDashboard -> gmsDashboard == null ? null : DashboardSnapshotMapper.map( - gmsDashboard.getValue().getDashboardSnapshot())) + .map(gmsDashboard -> gmsDashboard == null ? null + : DataFetcherResult.newResult() + .data(DashboardSnapshotMapper.map(gmsDashboard.getValue().getDashboardSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsDashboard.getValue().getDashboardSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load Dashboards", e); @@ -130,7 +135,8 @@ public BrowseResults browse(@Nonnull List path, start, count); final List urns = result.getEntities().stream().map(entity -> entity.getUrn().toString()).collect(Collectors.toList()); - final List dashboards = batchLoad(urns, context); + final List dashboards = batchLoad(urns, context).stream().map(dashboardDataFetcherResult -> dashboardDataFetcherResult.getData()).collect( + Collectors.toList()); final BrowseResults browseResults = new BrowseResults(); browseResults.setStart(result.getFrom()); browseResults.setCount(result.getPageSize()); @@ -169,6 +175,6 @@ public Dashboard update(@Nonnull DashboardUpdateInput input, @Nonnull QueryConte throw new RuntimeException(String.format("Failed to write entity with urn %s", input.getUrn()), e); } - return load(input.getUrn(), context); + return load(input.getUrn(), context).getData(); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java index 92e480cf3ccf6..6314b55b0d030 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java @@ -29,12 +29,14 @@ import com.linkedin.entity.Entity; import com.linkedin.metadata.aspect.DataFlowAspect; import com.linkedin.metadata.dao.utils.ModelUtils; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.BrowseResult; import com.linkedin.metadata.query.SearchResult; import com.linkedin.metadata.snapshot.DataFlowSnapshot; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; @@ -73,7 +75,7 @@ public Class inputClass() { } @Override - public List batchLoad(final List urns, final QueryContext context) throws Exception { + public List> batchLoad(final List urns, final QueryContext context) throws Exception { final List dataFlowUrns = urns.stream() .map(this::getDataFlowUrn) .collect(Collectors.toList()); @@ -88,8 +90,10 @@ public List batchLoad(final List urns, final QueryContext cont .map(flowUrn -> dataFlowMap.getOrDefault(flowUrn, null)).collect(Collectors.toList()); return gmsResults.stream() - .map(gmsDataFlow -> gmsDataFlow == null ? null : DataFlowSnapshotMapper.map( - gmsDataFlow.getValue().getDataFlowSnapshot())) + .map(gmsDataFlow -> gmsDataFlow == null ? null : DataFetcherResult.newResult() + .data(DataFlowSnapshotMapper.map(gmsDataFlow.getValue().getDataFlowSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsDataFlow.getValue().getDataFlowSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load DataFlows", e); @@ -138,7 +142,8 @@ public BrowseResults browse(@Nonnull List path, @Nullable List urns = result.getEntities().stream().map(entity -> entity.getUrn().toString()).collect(Collectors.toList()); - final List dataFlows = batchLoad(urns, context); + final List dataFlows = batchLoad(urns, context).stream().map(dataFlow -> dataFlow.getData()).collect( + Collectors.toList()); final BrowseResults browseResults = new BrowseResults(); browseResults.setStart(result.getFrom()); browseResults.setCount(result.getPageSize()); @@ -187,6 +192,6 @@ public DataFlow update(@Nonnull DataFlowUpdateInput input, @Nonnull QueryContext throw new RuntimeException(String.format("Failed to write entity with urn %s", input.getUrn()), e); } - return load(input.getUrn(), context); + return load(input.getUrn(), context).getData(); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java index 5c3d9822fdd25..f4a9cf27ec6bb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java @@ -29,11 +29,13 @@ import com.linkedin.entity.Entity; import com.linkedin.metadata.aspect.DataJobAspect; import com.linkedin.metadata.dao.utils.ModelUtils; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.BrowseResult; import com.linkedin.metadata.query.SearchResult; import com.linkedin.metadata.snapshot.DataJobSnapshot; import com.linkedin.metadata.snapshot.Snapshot; +import graphql.execution.DataFetcherResult; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; @@ -74,7 +76,7 @@ public Class inputClass() { } @Override - public List batchLoad(final List urns, final QueryContext context) throws Exception { + public List> batchLoad(final List urns, final QueryContext context) throws Exception { final List dataJobUrns = urns.stream() .map(this::getDataJobUrn) .collect(Collectors.toList()); @@ -89,7 +91,11 @@ public List batchLoad(final List urns, final QueryContext conte .map(jobUrn -> dataJobMap.getOrDefault(jobUrn, null)).collect(Collectors.toList()); return gmsResults.stream() - .map(gmsDataJob -> gmsDataJob == null ? null : DataJobSnapshotMapper.map(gmsDataJob.getValue().getDataJobSnapshot())) + .map(gmsDataJob -> gmsDataJob == null ? null + : DataFetcherResult.newResult() + .data(DataJobSnapshotMapper.map(gmsDataJob.getValue().getDataJobSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsDataJob.getValue().getDataJobSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load DataJobs", e); @@ -139,7 +145,8 @@ public BrowseResults browse(@Nonnull List path, @Nullable List urns = result.getEntities().stream().map(entity -> entity.getUrn().toString()).collect(Collectors.toList()); - final List dataJobs = batchLoad(urns, context); + final List dataJobs = batchLoad(urns, context).stream().map(dataFetcherResult -> dataFetcherResult.getData()).collect( + Collectors.toList()); final BrowseResults browseResults = new BrowseResults(); browseResults.setStart(result.getFrom()); browseResults.setCount(result.getPageSize()); @@ -192,6 +199,6 @@ public DataJob update(@Nonnull DataJobUpdateInput input, @Nonnull QueryContext c throw new RuntimeException(String.format("Failed to write entity with urn %s", input.getUrn()), e); } - return load(input.getUrn(), context); + return load(input.getUrn(), context).getData(); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/DataPlatformType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/DataPlatformType.java index 09ae409285641..03c8be3c30d09 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/DataPlatformType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/DataPlatformType.java @@ -7,6 +7,7 @@ import com.linkedin.datahub.graphql.types.dataplatform.mappers.DataPlatformMapper; import com.linkedin.dataplatform.client.DataPlatforms; +import graphql.execution.DataFetcherResult; import java.net.URISyntaxException; import java.util.List; import java.util.Map; @@ -27,7 +28,7 @@ public Class objectClass() { } @Override - public List batchLoad(final List urns, final QueryContext context) { + public List> batchLoad(final List urns, final QueryContext context) { try { if (_urnToPlatform == null) { _urnToPlatform = _dataPlatformsClient.getAllPlatforms().stream() @@ -36,6 +37,7 @@ public List batchLoad(final List urns, final QueryContext } return urns.stream() .map(key -> _urnToPlatform.containsKey(key) ? _urnToPlatform.get(key) : getUnknownDataPlatform(key)) + .map(dataPlatform -> DataFetcherResult.newResult().data(dataPlatform).build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load DataPlatforms", e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index a5820239c2626..e0d806769ec03 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -29,12 +29,14 @@ import com.linkedin.dataset.client.Datasets; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.Entity; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.BrowseResult; import com.linkedin.metadata.query.SearchResult; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.ArrayList; @@ -72,7 +74,7 @@ public EntityType type() { } @Override - public List batchLoad(final List urns, final QueryContext context) { + public List> batchLoad(final List urns, final QueryContext context) { final List datasetUrns = urns.stream() .map(DatasetUtils::getDatasetUrn) @@ -89,9 +91,13 @@ public List batchLoad(final List urns, final QueryContext conte gmsResults.add(datasetMap.getOrDefault(urn, null)); } return gmsResults.stream() - .map(gmsDataset -> gmsDataset == null ? null : DatasetSnapshotMapper.map( - gmsDataset.getValue().getDatasetSnapshot())) - .collect(Collectors.toList()); + .map(gmsDataset -> + gmsDataset == null ? null : DataFetcherResult.newResult() + .data(DatasetSnapshotMapper.map(gmsDataset.getValue().getDatasetSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsDataset.getValue().getDatasetSnapshot())) + .build() + ) + .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load Datasets", e); } @@ -134,7 +140,8 @@ public BrowseResults browse(@Nonnull List path, start, count); final List urns = result.getEntities().stream().map(entity -> entity.getUrn().toString()).collect(Collectors.toList()); - final List datasets = batchLoad(urns, context); + final List datasets = batchLoad(urns, context) + .stream().map(datasetDataFetcherResult -> datasetDataFetcherResult.getData()).collect(Collectors.toList()); final BrowseResults browseResults = new BrowseResults(); browseResults.setStart(result.getFrom()); browseResults.setCount(result.getPageSize()); @@ -187,6 +194,6 @@ public Dataset update(@Nonnull DatasetUpdateInput input, @Nonnull QueryContext c throw new RuntimeException(String.format("Failed to write entity with urn %s", input.getUrn()), e); } - return load(input.getUrn(), context); + return load(input.getUrn(), context).getData(); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index 66dcc5b839b9f..9bd61ddcab737 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -46,7 +46,7 @@ public Dataset apply(@Nonnull final com.linkedin.dataset.Dataset dataset) { result.setExternalUrl(dataset.getExternalUrl().toString()); } if (dataset.hasSchemaMetadata()) { - result.setSchema(SchemaMetadataMapper.map(dataset.getSchemaMetadata())); + result.setSchema(SchemaMapper.map(dataset.getSchemaMetadata())); } if (dataset.hasEditableSchemaMetadata()) { result.setEditableSchemaMetadata(EditableSchemaMetadataMapper.map(dataset.getEditableSchemaMetadata())); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetSnapshotMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetSnapshotMapper.java index de4d977d3a9bc..792fbb7dc56f9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetSnapshotMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetSnapshotMapper.java @@ -71,7 +71,7 @@ public Dataset apply(@Nonnull final DatasetSnapshot dataset) { result.setOwnership(OwnershipMapper.map((Ownership) aspect)); } else if (aspect instanceof SchemaMetadata) { result.setSchema( - SchemaMetadataMapper.map((SchemaMetadata) aspect) + SchemaMapper.map((SchemaMetadata) aspect) ); } else if (aspect instanceof Status) { result.setStatus(StatusMapper.map((Status) aspect)); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java new file mode 100644 index 0000000000000..05db591f42e63 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java @@ -0,0 +1,34 @@ +package com.linkedin.datahub.graphql.types.dataset.mappers; + +import com.linkedin.datahub.graphql.generated.Schema; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.schema.SchemaMetadata; + +import javax.annotation.Nonnull; +import java.util.stream.Collectors; + +public class SchemaMapper implements ModelMapper { + + public static final SchemaMapper INSTANCE = new SchemaMapper(); + + public static Schema map(@Nonnull final SchemaMetadata metadata) { + return INSTANCE.apply(metadata); + } + + @Override + public Schema apply(@Nonnull final com.linkedin.schema.SchemaMetadata input) { + final Schema result = new Schema(); + if (input.hasDataset()) { + result.setDatasetUrn(input.getDataset().toString()); + } + result.setName(input.getSchemaName()); + result.setPlatformUrn(input.getPlatform().toString()); + result.setVersion(input.getVersion()); + result.setCluster(input.getCluster()); + result.setHash(input.getHash()); + result.setPrimaryKeys(input.getPrimaryKeys()); + result.setFields(input.getFields().stream().map(SchemaFieldMapper::map).collect(Collectors.toList())); + result.setPlatformSchema(PlatformSchemaMapper.map(input.getPlatformSchema())); + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java index b36957cbd38ff..9b38fbf834737 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java @@ -1,23 +1,26 @@ package com.linkedin.datahub.graphql.types.dataset.mappers; -import com.linkedin.datahub.graphql.generated.Schema; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.schema.SchemaMetadata; - -import javax.annotation.Nonnull; import java.util.stream.Collectors; +import javax.annotation.Nonnull; -public class SchemaMetadataMapper implements ModelMapper { + +public class SchemaMetadataMapper implements ModelMapper { public static final SchemaMetadataMapper INSTANCE = new SchemaMetadataMapper(); - public static Schema map(@Nonnull final SchemaMetadata metadata) { + public static com.linkedin.datahub.graphql.generated.SchemaMetadata map(@Nonnull final VersionedAspect metadata) { return INSTANCE.apply(metadata); } @Override - public Schema apply(@Nonnull final com.linkedin.schema.SchemaMetadata input) { - final Schema result = new Schema(); + public com.linkedin.datahub.graphql.generated.SchemaMetadata apply(@Nonnull final VersionedAspect inputWithMetadata) { + SchemaMetadata input = inputWithMetadata.getAspect().getSchemaMetadata(); + final com.linkedin.datahub.graphql.generated.SchemaMetadata result = + new com.linkedin.datahub.graphql.generated.SchemaMetadata(); + if (input.hasDataset()) { result.setDatasetUrn(input.getDataset().toString()); } @@ -29,6 +32,7 @@ public Schema apply(@Nonnull final com.linkedin.schema.SchemaMetadata input) { result.setPrimaryKeys(input.getPrimaryKeys()); result.setFields(input.getFields().stream().map(SchemaFieldMapper::map).collect(Collectors.toList())); result.setPlatformSchema(PlatformSchemaMapper.map(input.getPlatformSchema())); + result.setAspectVersion(inputWithMetadata.getVersion()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/GlossaryTermType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/GlossaryTermType.java index 88ceeac4345b7..34a75097b8ad4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/GlossaryTermType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/GlossaryTermType.java @@ -22,10 +22,12 @@ import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.Entity; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.BrowseResult; import com.linkedin.metadata.query.SearchResult; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.ArrayList; @@ -58,7 +60,7 @@ public EntityType type() { } @Override - public List batchLoad(final List urns, final QueryContext context) { + public List> batchLoad(final List urns, final QueryContext context) { final List glossaryTermUrns = urns.stream() .map(GlossaryTermUtils::getGlossaryTermUrn) .collect(Collectors.toList()); @@ -76,7 +78,10 @@ public List batchLoad(final List urns, final QueryContext return gmsResults.stream() .map(gmsGlossaryTerm -> gmsGlossaryTerm == null ? null - : GlossaryTermSnapshotMapper.map(gmsGlossaryTerm.getValue().getGlossaryTermSnapshot())) + : DataFetcherResult.newResult() + .data(GlossaryTermSnapshotMapper.map(gmsGlossaryTerm.getValue().getGlossaryTermSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsGlossaryTerm.getValue().getGlossaryTermSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load GlossaryTerms", e); @@ -122,7 +127,8 @@ public BrowseResults browse(@Nonnull List path, start, count); final List urns = result.getEntities().stream().map(entity -> entity.getUrn().toString()).collect(Collectors.toList()); - final List glossaryTerms = batchLoad(urns, context); + final List glossaryTerms = batchLoad(urns, context).stream().map(term -> term.getData()).collect( + Collectors.toList()); final BrowseResults browseResults = new BrowseResults(); browseResults.setStart(result.getFrom()); browseResults.setCount(result.getPageSize()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DataFlowDataJobsRelationshipsType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DataFlowDataJobsRelationshipsType.java index 3a8a03189182e..0ef2a97be5ceb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DataFlowDataJobsRelationshipsType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DataFlowDataJobsRelationshipsType.java @@ -8,6 +8,7 @@ import com.linkedin.metadata.query.RelationshipDirection; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -27,14 +28,13 @@ public Class objectClass() { } @Override - public List batchLoad(final List keys, final QueryContext context) { - + public List> batchLoad(final List keys, final QueryContext context) { try { return keys.stream().map(urn -> { try { com.linkedin.common.EntityRelationships relationships = _relationshipsClient.getRelationships(urn, _direction, "IsPartOf"); - return DataFlowDataJobsRelationshipsMapper.map(relationships); + return DataFetcherResult.newResult().data(DataFlowDataJobsRelationshipsMapper.map(relationships)).build(); } catch (RemoteInvocationException | URISyntaxException e) { throw new RuntimeException(String.format("Failed to batch load DataJobs for DataFlow %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DownstreamLineageType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DownstreamLineageType.java index 1974fea6e63d4..ba6ca3c4caf6d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DownstreamLineageType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/DownstreamLineageType.java @@ -8,6 +8,7 @@ import com.linkedin.metadata.query.RelationshipDirection; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -27,14 +28,14 @@ public Class objectClass() { } @Override - public List batchLoad(final List keys, final QueryContext context) { + public List> batchLoad(final List keys, final QueryContext context) { try { return keys.stream().map(urn -> { try { com.linkedin.common.EntityRelationships relationships = _lineageClient.getLineage(urn, _direction); - return DownstreamEntityRelationshipsMapper.map(relationships); + return DataFetcherResult.newResult().data(DownstreamEntityRelationshipsMapper.map(relationships)).build(); } catch (RemoteInvocationException | URISyntaxException e) { throw new RuntimeException(String.format("Failed to batch load DownstreamLineage for entity %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/UpstreamLineageType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/UpstreamLineageType.java index c69e616662242..8a21168d667d3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/UpstreamLineageType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/lineage/UpstreamLineageType.java @@ -8,6 +8,7 @@ import com.linkedin.metadata.query.RelationshipDirection; import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -27,14 +28,14 @@ public Class objectClass() { } @Override - public List batchLoad(final List keys, final QueryContext context) { + public List> batchLoad(final List keys, final QueryContext context) { try { return keys.stream().map(urn -> { try { com.linkedin.common.EntityRelationships relationships = _lineageClient.getLineage(urn, _direction); - return UpstreamEntityRelationshipsMapper.map(relationships); + return DataFetcherResult.newResult().data(UpstreamEntityRelationshipsMapper.map(relationships)).build(); } catch (RemoteInvocationException | URISyntaxException e) { throw new RuntimeException(String.format("Failed to batch load DownstreamLineage for entity %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/MLModelType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/MLModelType.java index a77aa2e00b21b..596f3837e32b6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/MLModelType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/MLModelType.java @@ -6,7 +6,9 @@ import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLModelSnapshotMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.Entity; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.SearchResult; +import graphql.execution.DataFetcherResult; import java.util.List; import java.util.Map; import java.util.Objects; @@ -49,7 +51,7 @@ public Class objectClass() { } @Override - public List batchLoad(final List urns, final QueryContext context) throws Exception { + public List> batchLoad(final List urns, final QueryContext context) throws Exception { final List mlModelUrns = urns.stream() .map(MLModelUtils::getMLModelUrn) .collect(Collectors.toList()); @@ -64,8 +66,11 @@ public List batchLoad(final List urns, final QueryContext conte .map(modelUrn -> mlModelMap.getOrDefault(modelUrn, null)).collect(Collectors.toList()); return gmsResults.stream() - .map(gmsMlModel -> gmsMlModel == null ? null : MLModelSnapshotMapper.map( - gmsMlModel.getValue().getMLModelSnapshot())) + .map(gmsMlModel -> gmsMlModel == null ? null + : DataFetcherResult.newResult() + .data(MLModelSnapshotMapper.map(gmsMlModel.getValue().getMLModelSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsMlModel.getValue().getMLModelSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load MLModels", e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java index 53ccc84aca5c0..c4daadfff577f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java @@ -30,6 +30,7 @@ import com.linkedin.metadata.aspect.TagAspect; import com.linkedin.metadata.configs.TagSearchConfig; import com.linkedin.metadata.dao.utils.ModelUtils; +import com.linkedin.metadata.extractor.SnapshotToAspectMap; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.SearchResult; import com.linkedin.metadata.snapshot.Snapshot; @@ -37,6 +38,7 @@ import com.linkedin.r2.RemoteInvocationException; import com.linkedin.tag.TagProperties; +import graphql.execution.DataFetcherResult; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.net.URISyntaxException; @@ -72,7 +74,7 @@ public Class inputClass() { } @Override - public List batchLoad(final List urns, final QueryContext context) { + public List> batchLoad(final List urns, final QueryContext context) { final List tagUrns = urns.stream() .map(this::getTagUrn) @@ -89,7 +91,11 @@ public List batchLoad(final List urns, final QueryContext context) gmsResults.add(tagMap.getOrDefault(urn, null)); } return gmsResults.stream() - .map(gmsTag -> gmsTag == null ? null : TagSnapshotMapper.map(gmsTag.getValue().getTagSnapshot())) + .map(gmsTag -> gmsTag == null ? null + : DataFetcherResult.newResult() + .data(TagSnapshotMapper.map(gmsTag.getValue().getTagSnapshot())) + .localContext(SnapshotToAspectMap.extractAspectMap(gmsTag.getValue().getTagSnapshot())) + .build()) .collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("Failed to batch load Tags", e); @@ -155,7 +161,7 @@ public Tag update(@Nonnull TagUpdate input, @Nonnull QueryContext context) throw throw new RuntimeException(String.format("Failed to write entity with urn %s", input.getUrn()), e); } - return load(input.getUrn(), context); + return load(input.getUrn(), context).getData(); } private TagUrn getTagUrn(final String urnStr) { diff --git a/datahub-graphql-core/src/main/resources/gms.graphql b/datahub-graphql-core/src/main/resources/gms.graphql index 38a044e3279e1..0951fbd36ee3e 100644 --- a/datahub-graphql-core/src/main/resources/gms.graphql +++ b/datahub-graphql-core/src/main/resources/gms.graphql @@ -21,6 +21,10 @@ interface Entity { type: EntityType! } +interface Aspect { + aspectVersion: Long +} + interface EntityWithRelationships implements Entity { """ GMS Entity urn @@ -265,7 +269,12 @@ type Dataset implements EntityWithRelationships & Entity { """ Schema metadata of the dataset """ - schema: Schema + schema: Schema @deprecated(reason: "Use `schemaMetadata`") + + """ + Schema metadata of the dataset + """ + schemaMetadata(version: Long): SchemaMetadata """ Editable schema metadata of the dataset @@ -516,6 +525,53 @@ type InstitutionalMemoryMetadata { created: AuditStamp! } +type SchemaMetadata implements Aspect { + aspectVersion: Long + createdAt: Long + """ + Dataset this schema metadata is associated with + """ + datasetUrn: String + + """ + Schema name + """ + name: String! + + """ + Platform this schema metadata is associated with + """ + platformUrn: String! + + """ + The version of the GMS Schema metadata + """ + version: Long! + + """ + The cluster this schema metadata is derived from + """ + cluster: String + + """ + The SHA1 hash of the schema content + """ + hash: String! + + """ + The native schema in the datasets platform, schemaless if it was not provided + """ + platformSchema: PlatformSchema + """ + Client provided a list of fields from value schema + """ + fields: [SchemaField!]! + """ + Client provided list of fields that define primary keys to access record + """ + primaryKeys: [String!] +} + type Schema { """ Dataset this schema metadata is associated with diff --git a/datahub-web/@datahub/metadata-types/addon/constants/metadata/aspect.ts b/datahub-web/@datahub/metadata-types/addon/constants/metadata/aspect.ts index b20be77a121da..38cbbdb5ec7b6 100644 --- a/datahub-web/@datahub/metadata-types/addon/constants/metadata/aspect.ts +++ b/datahub-web/@datahub/metadata-types/addon/constants/metadata/aspect.ts @@ -6,7 +6,7 @@ type AspectsOfSnapshot = Snapshot extends { aspects: Array) => boolean)} */ -const getMetadataAspectWithMetadataAspectKey = ( +const getMetadataVersionedAspectAspectKey = ( metadataAspectKey: AspectKey ): ((aspect: Aspect) => boolean) => (aspect: Aspect): boolean => aspect.hasOwnProperty(metadataAspectKey); @@ -40,7 +40,7 @@ export const getMetadataAspect = < ): AspectsOfSnapshot[AspectKey] | undefined => { const { aspects = [] } = snapshot || {}; // Find the aspect with the metadata key that matches the passed in metadataAspectKey - const [relevantAspect] = aspects.filter(getMetadataAspectWithMetadataAspectKey(metadataAspectKey)); + const [relevantAspect] = aspects.filter(getMetadataVersionedAspectAspectKey(metadataAspectKey)); return relevantAspect ? getMetadataAspectValue(metadataAspectKey, relevantAspect) : undefined; }; diff --git a/gms/api/src/main/idl/com.linkedin.entity.aspects.restspec.json b/gms/api/src/main/idl/com.linkedin.entity.aspects.restspec.json new file mode 100644 index 0000000000000..98a372f180f37 --- /dev/null +++ b/gms/api/src/main/idl/com.linkedin.entity.aspects.restspec.json @@ -0,0 +1,30 @@ +{ + "name" : "aspects", + "namespace" : "com.linkedin.entity", + "path" : "/aspects", + "schema" : "com.linkedin.metadata.aspect.VersionedAspect", + "doc" : "Single unified resource for fetching, updating, searching, & browsing DataHub entities\n\ngenerated from: com.linkedin.metadata.resources.entity.AspectResource", + "collection" : { + "identifier" : { + "name" : "aspectsId", + "type" : "string" + }, + "supports" : [ "get" ], + "methods" : [ { + "method" : "get", + "doc" : "Retrieves the value for an entity that is made up of latest versions of specified aspects.", + "parameters" : [ { + "name" : "aspect", + "type" : "string", + "optional" : true + }, { + "name" : "version", + "type" : "long", + "optional" : true + } ] + } ], + "entity" : { + "path" : "/aspects/{aspectsId}" + } + } +} diff --git a/gms/api/src/main/snapshot/com.linkedin.chart.charts.snapshot.json b/gms/api/src/main/snapshot/com.linkedin.chart.charts.snapshot.json index 0543e176f8867..968bbd3562b12 100644 --- a/gms/api/src/main/snapshot/com.linkedin.chart.charts.snapshot.json +++ b/gms/api/src/main/snapshot/com.linkedin.chart.charts.snapshot.json @@ -158,7 +158,13 @@ "items" : "ChartDataSourceType" }, "doc" : "Data sources for the chart", - "optional" : true + "optional" : true, + "Relationship" : { + "/*/string" : { + "entityTypes" : [ "dataset" ], + "name" : "Consumes" + } + } }, { "name" : "type", "type" : { diff --git a/gms/api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/gms/api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json new file mode 100644 index 0000000000000..c3f14eaf8188f --- /dev/null +++ b/gms/api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -0,0 +1,2589 @@ +{ + "models" : [ { + "type" : "typeref", + "name" : "ChartDataSourceType", + "namespace" : "com.linkedin.chart", + "doc" : "Input source type for a chart such as dataset or metric", + "ref" : [ { + "type" : "typeref", + "name" : "DatasetUrn", + "namespace" : "com.linkedin.common", + "doc" : "Standardized dataset identifier.", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.DatasetUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized dataset identifier.", + "entityType" : "dataset", + "fields" : [ { + "doc" : "Standardized platform urn where dataset is defined.", + "name" : "platform", + "type" : "com.linkedin.common.urn.DataPlatformUrn" + }, { + "doc" : "Dataset native name e.g. ., /dir/subdir/, or ", + "maxLength" : 210, + "name" : "datasetName", + "type" : "string" + }, { + "doc" : "Fabric type where dataset belongs to or where it was generated.", + "name" : "origin", + "type" : "com.linkedin.common.FabricType" + } ], + "maxLength" : 284, + "name" : "Dataset", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + } ] + }, { + "type" : "record", + "name" : "ChartInfo", + "namespace" : "com.linkedin.chart", + "doc" : "Information about a chart", + "include" : [ { + "type" : "record", + "name" : "CustomProperties", + "namespace" : "com.linkedin.common", + "doc" : "Misc. properties about an entity.", + "fields" : [ { + "name" : "customProperties", + "type" : { + "type" : "map", + "values" : "string" + }, + "doc" : "Custom property bag.", + "default" : { } + } ] + }, { + "type" : "record", + "name" : "ExternalReference", + "namespace" : "com.linkedin.common", + "doc" : "A reference to an external platform.", + "fields" : [ { + "name" : "externalUrl", + "type" : { + "type" : "typeref", + "name" : "Url", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.url.Url", + "coercerClass" : "com.linkedin.common.url.UrlCoercer" + } + }, + "doc" : "URL where the reference exist", + "optional" : true + } ] + } ], + "fields" : [ { + "name" : "title", + "type" : "string", + "doc" : "Title of the chart", + "Searchable" : { + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "description", + "type" : "string", + "doc" : "Detailed description about the chart", + "Searchable" : { } + }, { + "name" : "lastModified", + "type" : { + "type" : "record", + "name" : "ChangeAuditStamps", + "namespace" : "com.linkedin.common", + "doc" : "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into various lifecycle stages, and who acted to move it into those lifecycle stages. The recommended best practice is to include this record in your record schema, and annotate its fields as @readOnly in your resource. See https://github.com/linkedin/rest.li/wiki/Validation-in-Rest.li#restli-validation-annotations", + "fields" : [ { + "name" : "created", + "type" : { + "type" : "record", + "name" : "AuditStamp", + "doc" : "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage.", + "fields" : [ { + "name" : "time", + "type" : { + "type" : "typeref", + "name" : "Time", + "doc" : "Number of milliseconds since midnight, January 1, 1970 UTC. It must be a positive number", + "ref" : "long" + }, + "doc" : "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent." + }, { + "name" : "actor", + "type" : { + "type" : "typeref", + "name" : "Urn", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.Urn" + } + }, + "doc" : "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change." + }, { + "name" : "impersonator", + "type" : "Urn", + "doc" : "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.", + "optional" : true + } ] + }, + "doc" : "An AuditStamp corresponding to the creation of this resource/association/sub-resource" + }, { + "name" : "lastModified", + "type" : "AuditStamp", + "doc" : "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created" + }, { + "name" : "deleted", + "type" : "AuditStamp", + "doc" : "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", + "optional" : true + } ] + }, + "doc" : "Captures information about who created/last modified/deleted this chart and when" + }, { + "name" : "chartUrl", + "type" : "com.linkedin.common.Url", + "doc" : "URL for the chart. This could be used as an external link on DataHub to allow users access/view the chart", + "optional" : true + }, { + "name" : "inputs", + "type" : { + "type" : "array", + "items" : "ChartDataSourceType" + }, + "doc" : "Data sources for the chart", + "optional" : true, + "Relationship" : { + "/*/string" : { + "entityTypes" : [ "dataset" ], + "name" : "Consumes" + } + } + }, { + "name" : "type", + "type" : { + "type" : "enum", + "name" : "ChartType", + "doc" : "The various types of charts", + "symbols" : [ "BAR", "PIE", "SCATTER", "TABLE", "TEXT", "LINE", "AREA", "HISTOGRAM", "BOX_PLOT" ], + "symbolDocs" : { + "BAR" : "Chart showing a Bar chart", + "PIE" : "Chart showing a Pie chart", + "SCATTER" : "Chart showing a Scatter plot", + "TABLE" : "Chart showing a table", + "TEXT" : "Chart showing Markdown formatted text" + } + }, + "doc" : "Type of the chart", + "optional" : true, + "Searchable" : { + "addToFilters" : true, + "fieldType" : "KEYWORD" + } + }, { + "name" : "access", + "type" : { + "type" : "enum", + "name" : "AccessLevel", + "namespace" : "com.linkedin.common", + "doc" : "The various access levels", + "symbols" : [ "PUBLIC", "PRIVATE" ], + "symbolDocs" : { + "PRIVATE" : "Private availability to certain set of users", + "PUBLIC" : "Publicly available access level" + } + }, + "doc" : "Access level for the chart", + "optional" : true, + "Searchable" : { + "addToFilters" : true, + "fieldType" : "KEYWORD" + } + }, { + "name" : "lastRefreshed", + "type" : "com.linkedin.common.Time", + "doc" : "The time when this chart last refreshed", + "optional" : true + } ], + "Aspect" : { + "name" : "chartInfo" + } + }, { + "type" : "record", + "name" : "ChartQuery", + "namespace" : "com.linkedin.chart", + "doc" : "Information for chart query which is used for getting data of the chart", + "fields" : [ { + "name" : "rawQuery", + "type" : "string", + "doc" : "Raw query to build a chart from input datasets" + }, { + "name" : "type", + "type" : { + "type" : "enum", + "name" : "ChartQueryType", + "symbols" : [ "LOOKML", "SQL" ], + "symbolDocs" : { + "LOOKML" : "LookML queries", + "SQL" : "SQL type queries" + } + }, + "doc" : "Chart query type", + "Searchable" : { + "addToFilters" : true, + "fieldType" : "KEYWORD" + } + } ], + "Aspect" : { + "name" : "chartQuery" + } + }, "com.linkedin.chart.ChartQueryType", "com.linkedin.chart.ChartType", "com.linkedin.common.AccessLevel", "com.linkedin.common.AuditStamp", { + "type" : "record", + "name" : "BrowsePaths", + "namespace" : "com.linkedin.common", + "doc" : "Shared aspect containing Browse Paths to be indexed for an entity.", + "fields" : [ { + "name" : "paths", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "A list of valid browse paths for the entity.\n\nBrowse paths are expected to be backslash-separated strings. For example: 'prod/snowflake/datasetName'", + "Searchable" : { + "/*" : { + "fieldName" : "browsePaths", + "fieldType" : "BROWSE_PATH" + } + } + } ], + "Aspect" : { + "name" : "browsePaths" + } + }, "com.linkedin.common.ChangeAuditStamps", { + "type" : "typeref", + "name" : "ChartUrn", + "namespace" : "com.linkedin.common", + "doc" : "Standardized chart identifier", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.ChartUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized chart identifier", + "entityType" : "chart", + "fields" : [ { + "doc" : "The name of the dashboard tool such as looker, redash etc.", + "maxLength" : 20, + "name" : "dashboardTool", + "type" : "string" + }, { + "doc" : "Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as 'looker.linkedin.com/looks/1234'", + "maxLength" : 200, + "name" : "chartId", + "type" : "string" + } ], + "maxLength" : 236, + "name" : "Chart", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, { + "type" : "typeref", + "name" : "CorpGroupUrn", + "namespace" : "com.linkedin.common", + "doc" : "Corporate group's AD/LDAP login", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.CorpGroupUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Corporate group's AD/LDAP login", + "entityType" : "corpGroup", + "fields" : [ { + "doc" : "The name of the AD/LDAP group.", + "maxLength" : 64, + "name" : "groupName", + "type" : "string" + } ], + "maxLength" : 81, + "name" : "CorpGroup", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:security" + } + } + }, { + "type" : "typeref", + "name" : "CorpuserUrn", + "namespace" : "com.linkedin.common", + "doc" : "Corporate user's AD/LDAP login", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.CorpuserUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Corporate user's AD/LDAP login", + "entityType" : "corpuser", + "fields" : [ { + "doc" : "The name of the AD/LDAP user.", + "maxLength" : 20, + "name" : "username", + "type" : "string" + } ], + "maxLength" : 36, + "name" : "Corpuser", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:security" + } + } + }, "com.linkedin.common.CustomProperties", { + "type" : "typeref", + "name" : "DataFlowUrn", + "namespace" : "com.linkedin.common", + "doc" : "Standardized data processing flow identifier.", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.DataFlowUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized data processing flow identifier.", + "entityType" : "dataFlow", + "fields" : [ { + "doc" : "Workflow manager like azkaban, airflow which orchestrates the flow", + "maxLength" : 50, + "name" : "orchestrator", + "type" : "string" + }, { + "doc" : "Unique Identifier of the data flow", + "maxLength" : 200, + "name" : "flowId", + "type" : "string" + }, { + "doc" : "Cluster where the flow is executed", + "maxLength" : 100, + "name" : "cluster", + "type" : "string" + } ], + "maxLength" : 373, + "name" : "DataFlow", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, { + "type" : "typeref", + "name" : "DataJobUrn", + "namespace" : "com.linkedin.common", + "doc" : "Standardized data processing job identifier.", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.DataJobUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized data processing job identifier.", + "entityType" : "dataJob", + "fields" : [ { + "doc" : "Standardized data processing flow urn representing the flow for the job", + "name" : "flow", + "type" : "com.linkedin.common.urn.DataFlowUrn" + }, { + "doc" : "Unique identifier of the data job", + "maxLength" : 200, + "name" : "jobID", + "type" : "string" + } ], + "maxLength" : 594, + "name" : "DataJob", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, { + "type" : "typeref", + "name" : "DataPlatformUrn", + "namespace" : "com.linkedin.common", + "doc" : "Standardized data platforms available", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.DataPlatformUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized data platforms available", + "entityType" : "dataPlatform", + "fields" : [ { + "doc" : "data platform name i.e. hdfs, oracle, espresso", + "maxLength" : 25, + "name" : "platformName", + "type" : "string" + } ], + "maxLength" : 45, + "name" : "DataPlatform", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:wherehows" + } + } + }, "com.linkedin.common.DatasetUrn", { + "type" : "typeref", + "name" : "EmailAddress", + "namespace" : "com.linkedin.common", + "ref" : "string" + }, "com.linkedin.common.ExternalReference", { + "type" : "enum", + "name" : "FabricType", + "namespace" : "com.linkedin.common", + "doc" : "Fabric group type", + "symbols" : [ "DEV", "EI", "PROD", "CORP" ], + "symbolDocs" : { + "CORP" : "Designates corporation fabrics", + "DEV" : "Designates development fabrics", + "EI" : "Designates early-integration (staging) fabrics", + "PROD" : "Designates production fabrics" + } + }, { + "type" : "record", + "name" : "GlobalTags", + "namespace" : "com.linkedin.common", + "doc" : "Tag aspect used for applying tags to an entity", + "fields" : [ { + "name" : "tags", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "TagAssociation", + "doc" : "Properties of an applied tag. For now, just an Urn. In the future we can extend this with other properties, e.g.\npropagation parameters.", + "fields" : [ { + "name" : "tag", + "type" : { + "type" : "typeref", + "name" : "TagUrn", + "doc" : "Globally defined tag", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.TagUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Globally defined tags", + "entityType" : "tag", + "fields" : [ { + "doc" : "tag name", + "maxLength" : 200, + "name" : "name", + "type" : "string" + } ], + "maxLength" : 220, + "name" : "Tag", + "namespace" : "li", + "owners" : [ ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, + "doc" : "Urn of the applied tag", + "Searchable" : { + "fieldName" : "tags", + "fieldType" : "URN_PARTIAL", + "hasValuesFieldName" : "hasTags" + } + } ] + } + }, + "doc" : "Tags associated with a given entity" + } ], + "Aspect" : { + "name" : "globalTags" + } + }, { + "type" : "typeref", + "name" : "GlossaryNodeUrn", + "namespace" : "com.linkedin.common", + "doc" : "Business Node", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.GlossaryNodeUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized business node identifier", + "entityType" : "glossaryNode", + "fields" : [ { + "doc" : "The name of business node with hierarchy.", + "name" : "name", + "type" : "string" + } ], + "maxLength" : 56, + "name" : "GlossaryNode", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, { + "type" : "record", + "name" : "GlossaryTermAssociation", + "namespace" : "com.linkedin.common", + "doc" : "Properties of an applied glossary term.", + "fields" : [ { + "name" : "urn", + "type" : { + "type" : "typeref", + "name" : "GlossaryTermUrn", + "doc" : "Business Term", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.GlossaryTermUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "business term", + "entityType" : "glossaryTerm", + "fields" : [ { + "doc" : "The name of business term with hierarchy.", + "name" : "name", + "type" : "string" + } ], + "maxLength" : 56, + "name" : "GlossaryTerm", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, + "doc" : "Urn of the applied glossary term" + } ] + }, "com.linkedin.common.GlossaryTermUrn", { + "type" : "record", + "name" : "GlossaryTerms", + "namespace" : "com.linkedin.common", + "doc" : "Related business terms information", + "fields" : [ { + "name" : "terms", + "type" : { + "type" : "array", + "items" : "GlossaryTermAssociation" + }, + "doc" : "The related business terms" + }, { + "name" : "auditStamp", + "type" : "AuditStamp", + "doc" : "Audit stamp containing who reported the related business term" + } ], + "Aspect" : { + "name" : "glossaryTerms" + } + }, { + "type" : "record", + "name" : "InstitutionalMemory", + "namespace" : "com.linkedin.common", + "doc" : "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity.", + "fields" : [ { + "name" : "elements", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "InstitutionalMemoryMetadata", + "doc" : "Metadata corresponding to a record of institutional memory.", + "fields" : [ { + "name" : "url", + "type" : "Url", + "doc" : "Link to an engineering design document or a wiki page." + }, { + "name" : "description", + "type" : "string", + "doc" : "Description of the link." + }, { + "name" : "createStamp", + "type" : "AuditStamp", + "doc" : "Audit stamp associated with creation of this record" + } ] + } + }, + "doc" : "List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record." + } ], + "Aspect" : { + "name" : "institutionalMemory" + } + }, "com.linkedin.common.InstitutionalMemoryMetadata", { + "type" : "enum", + "name" : "MLFeatureDataType", + "namespace" : "com.linkedin.common", + "doc" : "MLFeature Data Type", + "symbols" : [ "USELESS", "NOMINAL", "ORDINAL", "BINARY", "COUNT", "TIME", "INTERVAL", "IMAGE", "VIDEO", "AUDIO", "TEXT", "MAP", "SEQUENCE", "SET", "CONTINUOUS", "BYTE", "UNKNOWN" ], + "symbolDocs" : { + "AUDIO" : "Audio Data", + "BINARY" : "Binary data is discrete data that can be in only one of two categories — either yes or no, 1 or 0, off or on, etc", + "BYTE" : "Bytes data are binary-encoded values that can represent complex objects.", + "CONTINUOUS" : "Continuous data are made of uncountable values, often the result of a measurement such as height, weight, age etc.", + "COUNT" : "Count data is discrete whole number data — no negative numbers here.\nCount data often has many small values, such as zero and one.", + "IMAGE" : "Image Data", + "INTERVAL" : "Interval data has equal spaces between the numbers and does not represent a temporal pattern.\nExamples include percentages, temperatures, and income.", + "MAP" : "Mapping Data Type ex: dict, map", + "NOMINAL" : "Nominal data is made of discrete values with no numerical relationship between the different categories — mean and median are meaningless.\nAnimal species is one example. For example, pig is not higher than bird and lower than fish.", + "ORDINAL" : "Ordinal data are discrete integers that can be ranked or sorted.\nFor example, the distance between first and second may not be the same as the distance between second and third.", + "SEQUENCE" : "Sequence Data Type ex: list, tuple, range", + "SET" : "Set Data Type ex: set, frozenset", + "TEXT" : "Text Data", + "TIME" : "Time data is a cyclical, repeating continuous form of data.\nThe relevant time features can be any period— daily, weekly, monthly, annual, etc.", + "UNKNOWN" : "Unknown data are data that we don't know the type for.", + "USELESS" : "Useless data is unique, discrete data with no potential relationship with the outcome variable.\nA useless feature has high cardinality. An example would be bank account numbers that were generated randomly.", + "VIDEO" : "Video Data" + } + }, { + "type" : "typeref", + "name" : "MLFeatureUrn", + "namespace" : "com.linkedin.common", + "doc" : "Standardized MLFeature identifier.", + "ref" : "string", + "java" : { + "class" : "com.linkedin.common.urn.MLFeatureUrn" + }, + "validate" : { + "com.linkedin.common.validator.TypedUrnValidator" : { + "accessible" : true, + "constructable" : true, + "doc" : "Standardized MLFeature identifier.", + "entityType" : "mlFeature", + "fields" : [ { + "doc" : "Namespace for the MLFeature", + "name" : "mlFeatureNamespace", + "type" : "string" + }, { + "doc" : "Name of the MLFeature", + "maxLength" : 210, + "name" : "mlFeatureName", + "type" : "string" + } ], + "maxLength" : 284, + "name" : "MLFeature", + "namespace" : "li", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "owningTeam" : "urn:li:internalTeam:datahub" + } + } + }, { + "type" : "record", + "name" : "Owner", + "namespace" : "com.linkedin.common", + "doc" : "Ownership information", + "fields" : [ { + "name" : "owner", + "type" : "Urn", + "doc" : "Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name\n(Caveat: only corpuser is currently supported in the frontend.)", + "Relationship" : { + "entityTypes" : [ "corpUser", "corpGroup" ], + "name" : "OwnedBy" + }, + "Searchable" : { + "fieldName" : "owners", + "fieldType" : "URN", + "hasValuesFieldName" : "hasOwners", + "queryByDefault" : false + } + }, { + "name" : "type", + "type" : { + "type" : "enum", + "name" : "OwnershipType", + "doc" : "Owner category or owner role", + "symbols" : [ "DEVELOPER", "DATAOWNER", "DELEGATE", "PRODUCER", "CONSUMER", "STAKEHOLDER" ], + "symbolDocs" : { + "CONSUMER" : "A person, group, or service that consumes the data", + "DATAOWNER" : "A person or group that is owning the data", + "DELEGATE" : "A person or a group that overseas the operation, e.g. a DBA or SRE.", + "DEVELOPER" : "A person or group that is in charge of developing the code", + "PRODUCER" : "A person, group, or service that produces/generates the data", + "STAKEHOLDER" : "A person or a group that has direct business interest" + } + }, + "doc" : "The type of the ownership" + }, { + "name" : "source", + "type" : { + "type" : "record", + "name" : "OwnershipSource", + "doc" : "Source/provider of the ownership information", + "fields" : [ { + "name" : "type", + "type" : { + "type" : "enum", + "name" : "OwnershipSourceType", + "symbols" : [ "AUDIT", "DATABASE", "FILE_SYSTEM", "ISSUE_TRACKING_SYSTEM", "MANUAL", "SERVICE", "SOURCE_CONTROL", "OTHER" ], + "symbolDocs" : { + "AUDIT" : "Auditing system or audit logs", + "DATABASE" : "Database, e.g. GRANTS table", + "FILE_SYSTEM" : "File system, e.g. file/directory owner", + "ISSUE_TRACKING_SYSTEM" : "Issue tracking system, e.g. Jira", + "MANUAL" : "Manually provided by a user", + "OTHER" : "Other sources", + "SERVICE" : "Other ownership-like service, e.g. Nuage, ACL service etc", + "SOURCE_CONTROL" : "SCM system, e.g. GIT, SVN" + } + }, + "doc" : "The type of the source" + }, { + "name" : "url", + "type" : "string", + "doc" : "A reference URL for the source", + "optional" : true + } ] + }, + "doc" : "Source information for the ownership", + "optional" : true + } ] + }, { + "type" : "record", + "name" : "Ownership", + "namespace" : "com.linkedin.common", + "doc" : "Ownership information of an entity.", + "fields" : [ { + "name" : "owners", + "type" : { + "type" : "array", + "items" : "Owner" + }, + "doc" : "List of owners of the entity." + }, { + "name" : "lastModified", + "type" : "AuditStamp", + "doc" : "Audit stamp containing who last modified the record and when." + } ], + "Aspect" : { + "name" : "ownership" + } + }, "com.linkedin.common.OwnershipSource", "com.linkedin.common.OwnershipSourceType", "com.linkedin.common.OwnershipType", { + "type" : "record", + "name" : "Status", + "namespace" : "com.linkedin.common", + "doc" : "The status metadata of an entity, e.g. dataset, metric, feature, etc.", + "fields" : [ { + "name" : "removed", + "type" : "boolean", + "doc" : "whether the entity is removed or not", + "default" : false, + "Searchable" : { + "fieldType" : "BOOLEAN" + } + } ], + "Aspect" : { + "name" : "status" + } + }, "com.linkedin.common.TagAssociation", "com.linkedin.common.TagUrn", "com.linkedin.common.Time", { + "type" : "typeref", + "name" : "Uri", + "namespace" : "com.linkedin.common", + "ref" : "string", + "java" : { + "class" : "java.net.URI" + } + }, "com.linkedin.common.Url", "com.linkedin.common.Urn", { + "type" : "record", + "name" : "VersionTag", + "namespace" : "com.linkedin.common", + "doc" : "A resource-defined string representing the resource state for the purpose of concurrency control", + "fields" : [ { + "name" : "versionTag", + "type" : "string", + "optional" : true + } ] + }, { + "type" : "record", + "name" : "DashboardInfo", + "namespace" : "com.linkedin.dashboard", + "doc" : "Information about a dashboard", + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "fields" : [ { + "name" : "title", + "type" : "string", + "doc" : "Title of the dashboard", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "description", + "type" : "string", + "doc" : "Detailed description about the dashboard", + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } + }, { + "name" : "charts", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.ChartUrn" + }, + "doc" : "Charts in a dashboard", + "default" : [ ], + "Relationship" : { + "/*" : { + "entityTypes" : [ "chart" ], + "name" : "Contains" + } + } + }, { + "name" : "lastModified", + "type" : "com.linkedin.common.ChangeAuditStamps", + "doc" : "Captures information about who created/last modified/deleted this dashboard and when" + }, { + "name" : "dashboardUrl", + "type" : "com.linkedin.common.Url", + "doc" : "URL for the dashboard. This could be used as an external link on DataHub to allow users access/view the dashboard", + "optional" : true + }, { + "name" : "access", + "type" : "com.linkedin.common.AccessLevel", + "doc" : "Access level for the dashboard", + "optional" : true, + "Searchable" : { + "addToFilters" : true, + "fieldType" : "KEYWORD" + } + }, { + "name" : "lastRefreshed", + "type" : "com.linkedin.common.Time", + "doc" : "The time when this dashboard last refreshed", + "optional" : true + } ], + "Aspect" : { + "name" : "dashboardInfo" + } + }, { + "type" : "record", + "name" : "DataFlowInfo", + "namespace" : "com.linkedin.datajob", + "doc" : "Information about a Data processing flow", + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "fields" : [ { + "name" : "name", + "type" : "string", + "doc" : "Flow name", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "description", + "type" : "string", + "doc" : "Flow description", + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } + }, { + "name" : "project", + "type" : "string", + "doc" : "Optional project/namespace associated with the flow", + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT_PARTIAL", + "queryByDefault" : false + } + } ], + "Aspect" : { + "name" : "dataFlowInfo" + } + }, { + "type" : "record", + "name" : "DataJobInfo", + "namespace" : "com.linkedin.datajob", + "doc" : "Information about a Data processing job", + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "fields" : [ { + "name" : "name", + "type" : "string", + "doc" : "Job name", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "description", + "type" : "string", + "doc" : "Job description", + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } + }, { + "name" : "type", + "type" : [ { + "type" : "enum", + "name" : "AzkabanJobType", + "namespace" : "com.linkedin.datajob.azkaban", + "doc" : "The various types of support azkaban jobs", + "symbols" : [ "COMMAND", "HADOOP_JAVA", "HADOOP_SHELL", "HIVE", "PIG", "SQL" ], + "symbolDocs" : { + "COMMAND" : "The command job type is one of the basic built-in types. It runs multiple UNIX commands using java processbuilder.\nUpon execution, Azkaban spawns off a process to run the command.", + "HADOOP_JAVA" : "Runs a java program with ability to access Hadoop cluster.\nhttps://azkaban.readthedocs.io/en/latest/jobTypes.html#java-job-type", + "HADOOP_SHELL" : "In large part, this is the same Command type. The difference is its ability to talk to a Hadoop cluster\nsecurely, via Hadoop tokens.", + "HIVE" : "Hive type is for running Hive jobs.", + "PIG" : "Pig type is for running Pig jobs.", + "SQL" : "SQL is for running Presto, mysql queries etc" + } + } ], + "doc" : "Datajob type" + }, { + "name" : "flowUrn", + "type" : "com.linkedin.common.DataFlowUrn", + "doc" : "DataFlow urn that this job is part of", + "optional" : true, + "Relationship" : { + "entityTypes" : [ "dataFlow" ], + "name" : "IsPartOf" + } + } ], + "Aspect" : { + "name" : "dataJobInfo" + } + }, { + "type" : "record", + "name" : "DataJobInputOutput", + "namespace" : "com.linkedin.datajob", + "doc" : "Information about the inputs and outputs of a Data processing job", + "fields" : [ { + "name" : "inputDatasets", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.DatasetUrn" + }, + "doc" : "Input datasets consumed by the data job during processing", + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "name" : "Consumes" + } + }, + "Searchable" : { + "/*" : { + "fieldName" : "inputs", + "fieldType" : "URN", + "numValuesFieldName" : "numInputDatasets", + "queryByDefault" : false + } + } + }, { + "name" : "outputDatasets", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.DatasetUrn" + }, + "doc" : "Output datasets produced by the data job during processing", + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "name" : "Produces" + } + }, + "Searchable" : { + "/*" : { + "fieldName" : "outputs", + "fieldType" : "URN", + "numValuesFieldName" : "numOutputDatasets", + "queryByDefault" : false + } + } + }, { + "name" : "inputDatajobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.DataJobUrn" + }, + "doc" : "Input datajobs that this data job depends on", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob" ], + "name" : "DownstreamOf" + } + } + } ], + "Aspect" : { + "name" : "dataJobInputOutput" + } + }, "com.linkedin.datajob.azkaban.AzkabanJobType", { + "type" : "record", + "name" : "DatasetDeprecation", + "namespace" : "com.linkedin.dataset", + "doc" : "Dataset deprecation status", + "fields" : [ { + "name" : "deprecated", + "type" : "boolean", + "doc" : "Whether the dataset is deprecated by owner.", + "Searchable" : { + "fieldType" : "BOOLEAN", + "weightsPerFieldValue" : { + "true" : 0.5 + } + } + }, { + "name" : "decommissionTime", + "type" : "long", + "doc" : "The time user plan to decommission this dataset.", + "optional" : true + }, { + "name" : "note", + "type" : "string", + "doc" : "Additional information about the dataset deprecation plan, such as the wiki, doc, RB." + }, { + "name" : "actor", + "type" : "com.linkedin.common.Urn", + "doc" : "The corpuser URN which will be credited for modifying this deprecation content.", + "optional" : true + } ], + "Aspect" : { + "name" : "datasetDeprecation" + } + }, { + "type" : "enum", + "name" : "DatasetLineageType", + "namespace" : "com.linkedin.dataset", + "doc" : "The various types of supported dataset lineage", + "symbols" : [ "COPY", "TRANSFORMED", "VIEW" ], + "symbolDocs" : { + "COPY" : "Direct copy without modification", + "TRANSFORMED" : "Transformed data with modification (format or content change)", + "VIEW" : "Represents a view defined on the sources e.g. Hive view defined on underlying hive tables or a Hive table pointing to a HDFS dataset or DALI view defined on multiple sources" + } + }, { + "type" : "record", + "name" : "DatasetProperties", + "namespace" : "com.linkedin.dataset", + "doc" : "Properties associated with a Dataset", + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "fields" : [ { + "name" : "description", + "type" : "string", + "doc" : "Documentation of the dataset", + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } + }, { + "name" : "uri", + "type" : "com.linkedin.common.Uri", + "doc" : "The abstracted URI such as hdfs:///data/tracking/PageViewEvent, file:///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic).", + "optional" : true + }, { + "name" : "tags", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "[Legacy] Unstructured tags for the dataset. Structured tags can be applied via the `GlobalTags` aspect.", + "default" : [ ] + } ], + "Aspect" : { + "name" : "datasetProperties" + } + }, { + "type" : "typeref", + "name" : "SchemaFieldPath", + "namespace" : "com.linkedin.dataset", + "doc" : "Schema field path. TODO: Add formal documentation on normalization rules.", + "ref" : "string" + }, { + "type" : "record", + "name" : "Upstream", + "namespace" : "com.linkedin.dataset", + "doc" : "Upstream lineage information about a dataset including the source reporting the lineage", + "fields" : [ { + "name" : "auditStamp", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "Audit stamp containing who reported the lineage and when" + }, { + "name" : "dataset", + "type" : "com.linkedin.common.DatasetUrn", + "doc" : "The upstream dataset the lineage points to", + "Relationship" : { + "entityTypes" : [ "dataset" ], + "name" : "DownstreamOf" + }, + "Searchable" : { + "fieldName" : "upstreams", + "fieldType" : "URN", + "queryByDefault" : false + } + }, { + "name" : "type", + "type" : "DatasetLineageType", + "doc" : "The type of the lineage" + } ] + }, { + "type" : "record", + "name" : "UpstreamLineage", + "namespace" : "com.linkedin.dataset", + "doc" : "Upstream lineage of a dataset", + "fields" : [ { + "name" : "upstreams", + "type" : { + "type" : "array", + "items" : "Upstream" + }, + "doc" : "List of upstream dataset lineage information" + } ], + "Aspect" : { + "name" : "upstreamLineage" + } + }, { + "type" : "record", + "name" : "GlossaryNodeInfo", + "namespace" : "com.linkedin.glossary", + "doc" : "Properties associated with a GlossaryNode", + "fields" : [ { + "name" : "definition", + "type" : "string", + "doc" : "Definition of business node", + "Searchable" : { } + }, { + "name" : "parentNode", + "type" : "com.linkedin.common.GlossaryNodeUrn", + "doc" : "Parent node of the glossary term", + "optional" : true + } ], + "Aspect" : { + "name" : "glossaryNodeInfo" + } + }, { + "type" : "record", + "name" : "GlossaryTermInfo", + "namespace" : "com.linkedin.glossary", + "doc" : "Properties associated with a GlossaryTerm", + "fields" : [ { + "name" : "definition", + "type" : "string", + "doc" : "Definition of business term", + "Searchable" : { } + }, { + "name" : "parentNode", + "type" : "com.linkedin.common.GlossaryNodeUrn", + "doc" : "Parent node of the glossary term", + "optional" : true + }, { + "name" : "termSource", + "type" : "string", + "doc" : "Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL", + "Searchable" : { + "fieldType" : "KEYWORD" + } + }, { + "name" : "sourceRef", + "type" : "string", + "doc" : "External Reference to the business-term", + "optional" : true, + "Searchable" : { + "fieldType" : "KEYWORD" + } + }, { + "name" : "sourceUrl", + "type" : "com.linkedin.common.Url", + "doc" : "The abstracted URL such as https://spec.edmcouncil.org/fibo/ontology/FBC/FinancialInstruments/FinancialInstruments/CashInstrument.", + "optional" : true + }, { + "name" : "customProperties", + "type" : { + "type" : "map", + "values" : "string" + }, + "doc" : "A key-value map to capture any other non-standardized properties for the glossary term", + "default" : { } + } ], + "Aspect" : { + "name" : "glossaryTermInfo" + } + }, { + "type" : "record", + "name" : "CorpGroupInfo", + "namespace" : "com.linkedin.identity", + "doc" : "group of corpUser, it may contains nested group", + "fields" : [ { + "name" : "email", + "type" : "com.linkedin.common.EmailAddress", + "doc" : "email of this group" + }, { + "name" : "admins", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.CorpuserUrn" + }, + "doc" : "owners of this group", + "Relationship" : { + "/*" : { + "entityTypes" : [ "corpUser" ], + "name" : "OwnedBy" + } + } + }, { + "name" : "members", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.CorpuserUrn" + }, + "doc" : "List of ldap urn in this group.", + "Relationship" : { + "/*" : { + "entityTypes" : [ "corpUser" ], + "name" : "IsPartOf" + } + } + }, { + "name" : "groups", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.CorpGroupUrn" + }, + "doc" : "List of groups in this group.", + "Relationship" : { + "/*" : { + "entityTypes" : [ "corpGroup" ], + "name" : "IsPartOf" + } + } + } ], + "Aspect" : { + "EntityUrns" : [ "com.linkedin.common.CorpGroupUrn" ], + "name" : "corpGroupInfo" + } + }, { + "type" : "record", + "name" : "CorpUserEditableInfo", + "namespace" : "com.linkedin.identity", + "doc" : "Linkedin corp user information that can be edited from UI", + "fields" : [ { + "name" : "aboutMe", + "type" : "string", + "doc" : "About me section of the user", + "optional" : true + }, { + "name" : "teams", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Teams that the user belongs to e.g. Metadata", + "default" : [ ], + "Searchable" : { + "/*" : { + "fieldType" : "TEXT" + } + } + }, { + "name" : "skills", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Skills that the user possesses e.g. Machine Learning", + "default" : [ ], + "Searchable" : { + "/*" : { + "fieldType" : "TEXT" + } + } + }, { + "name" : "pictureLink", + "type" : "com.linkedin.common.Url", + "doc" : "A URL which points to a picture which user wants to set as a profile photo", + "default" : "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web/packages/data-portal/public/assets/images/default_avatar.png" + } ], + "Aspect" : { + "EntityUrns" : [ "com.linkedin.common.CorpuserUrn" ], + "name" : "corpUserEditableInfo" + } + }, { + "type" : "record", + "name" : "CorpUserInfo", + "namespace" : "com.linkedin.identity", + "doc" : "Linkedin corp user information", + "fields" : [ { + "name" : "active", + "type" : "boolean", + "doc" : "Whether the corpUser is active, ref: https://iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools", + "Searchable" : { + "fieldType" : "BOOLEAN", + "weightsPerFieldValue" : { + "true" : 2.0 + } + } + }, { + "name" : "displayName", + "type" : "string", + "doc" : "displayName of this user , e.g. Hang Zhang(DataHQ)", + "optional" : true + }, { + "name" : "email", + "type" : "com.linkedin.common.EmailAddress", + "doc" : "email address of this user", + "Searchable" : { + "fieldType" : "KEYWORD", + "queryByDefault" : true + } + }, { + "name" : "title", + "type" : "string", + "doc" : "title of this user", + "optional" : true, + "Searchable" : { + "fieldType" : "KEYWORD", + "queryByDefault" : true + } + }, { + "name" : "managerUrn", + "type" : "com.linkedin.common.CorpuserUrn", + "doc" : "direct manager of this user", + "optional" : true, + "Relationship" : { + "entityTypes" : [ "corpUser" ], + "name" : "ReportsTo" + }, + "Searchable" : { + "fieldName" : "managerLdap", + "fieldType" : "URN", + "queryByDefault" : true + } + }, { + "name" : "departmentId", + "type" : "long", + "doc" : "department id this user belong to", + "optional" : true + }, { + "name" : "departmentName", + "type" : "string", + "doc" : "department name this user belong to", + "optional" : true + }, { + "name" : "firstName", + "type" : "string", + "doc" : "first name of this user", + "optional" : true + }, { + "name" : "lastName", + "type" : "string", + "doc" : "last name of this user", + "optional" : true + }, { + "name" : "fullName", + "type" : "string", + "doc" : "Common name of this user, format is firstName + lastName (split by a whitespace)", + "optional" : true, + "Searchable" : { + "boostScore" : 10.0, + "fieldType" : "TEXT_PARTIAL", + "queryByDefault" : true + } + }, { + "name" : "countryCode", + "type" : "string", + "doc" : "two uppercase letters country code. e.g. US", + "optional" : true + } ], + "Aspect" : { + "EntityUrns" : [ "com.linkedin.common.CorpuserUrn" ], + "name" : "corpUserInfo" + } + }, { + "type" : "typeref", + "name" : "Aspect", + "namespace" : "com.linkedin.metadata.aspect", + "doc" : "A union of all supported metadata aspects for a Chart", + "ref" : [ { + "type" : "record", + "name" : "ChartKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a Chart", + "fields" : [ { + "name" : "dashboardTool", + "type" : "string", + "doc" : "The name of the dashboard tool such as looker, redash etc.", + "Searchable" : { + "addToFilters" : true, + "boostScore" : 4.0, + "fieldName" : "tool", + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "chartId", + "type" : "string", + "doc" : "Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as 'looker.linkedin.com/looks/1234'" + } ], + "Aspect" : { + "name" : "chartKey" + } + }, "com.linkedin.chart.ChartInfo", "com.linkedin.chart.ChartQuery", { + "type" : "record", + "name" : "CorpGroupKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a CorpGroup", + "fields" : [ { + "name" : "name", + "type" : "string", + "doc" : "The name of the AD/LDAP group." + } ], + "Aspect" : { + "name" : "corpGroupKey" + } + }, "com.linkedin.identity.CorpGroupInfo", { + "type" : "record", + "name" : "CorpUserKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a CorpUser", + "fields" : [ { + "name" : "username", + "type" : "string", + "doc" : "The name of the AD/LDAP user.", + "Searchable" : { + "boostScore" : 2.0, + "enableAutocomplete" : true, + "fieldName" : "ldap", + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "corpUserKey" + } + }, "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", { + "type" : "record", + "name" : "DashboardKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a Dashboard", + "fields" : [ { + "name" : "dashboardTool", + "type" : "string", + "doc" : "The name of the dashboard tool such as looker, redash etc.", + "Searchable" : { + "addToFilters" : true, + "boostScore" : 4.0, + "fieldName" : "tool", + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "dashboardId", + "type" : "string", + "doc" : "Unique id for the dashboard. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, dashboard URL could be used here for Looker such as 'looker.linkedin.com/dashboards/1234'" + } ], + "Aspect" : { + "name" : "dashboardKey" + } + }, "com.linkedin.dashboard.DashboardInfo", { + "type" : "record", + "name" : "DataFlowKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a Data Flow", + "fields" : [ { + "name" : "orchestrator", + "type" : "string", + "doc" : "Workflow manager like azkaban, airflow which orchestrates the flow", + "Searchable" : { + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "flowId", + "type" : "string", + "doc" : "Unique Identifier of the data flow", + "Searchable" : { + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "cluster", + "type" : "string", + "doc" : "Cluster where the flow is executed", + "Searchable" : { + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "dataFlowKey" + } + }, "com.linkedin.datajob.DataFlowInfo", { + "type" : "record", + "name" : "DataJobKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a Data Job", + "fields" : [ { + "name" : "flow", + "type" : "com.linkedin.common.Urn", + "doc" : "Standardized data processing flow urn representing the flow for the job", + "Relationship" : { + "entityTypes" : [ "dataFlow" ], + "name" : "IsPartOf" + }, + "Searchable" : { + "fieldName" : "dataFlow", + "fieldType" : "URN_PARTIAL", + "queryByDefault" : false + } + }, { + "name" : "jobId", + "type" : "string", + "doc" : "Unique Identifier of the data job", + "Searchable" : { + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "dataJobKey" + } + }, "com.linkedin.datajob.DataJobInfo", "com.linkedin.datajob.DataJobInputOutput", "com.linkedin.dataset.DatasetDeprecation", "com.linkedin.dataset.DatasetProperties", "com.linkedin.dataset.UpstreamLineage", { + "type" : "record", + "name" : "SchemaMetadata", + "namespace" : "com.linkedin.schema", + "doc" : "SchemaMetadata to describe metadata related to store schema", + "include" : [ { + "type" : "record", + "name" : "SchemaMetadataKey", + "doc" : "Key to retrieve schema metadata.", + "fields" : [ { + "name" : "schemaName", + "type" : "string", + "doc" : "Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking", + "validate" : { + "strlen" : { + "max" : 500, + "min" : 1 + } + } + }, { + "name" : "platform", + "type" : "com.linkedin.common.DataPlatformUrn", + "doc" : "Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})" + }, { + "name" : "version", + "type" : "long", + "doc" : "Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version." + } ] + }, "com.linkedin.common.ChangeAuditStamps" ], + "fields" : [ { + "name" : "dataset", + "type" : "com.linkedin.common.DatasetUrn", + "doc" : "Dataset this schema metadata is associated with.", + "optional" : true + }, { + "name" : "cluster", + "type" : "string", + "doc" : "The cluster this schema metadata resides from", + "optional" : true + }, { + "name" : "hash", + "type" : "string", + "doc" : "the SHA1 hash of the schema content" + }, { + "name" : "platformSchema", + "type" : [ { + "type" : "record", + "name" : "EspressoSchema", + "doc" : "Schema text of an espresso table schema.", + "fields" : [ { + "name" : "documentSchema", + "type" : "string", + "doc" : "The native espresso document schema." + }, { + "name" : "tableSchema", + "type" : "string", + "doc" : "The espresso table schema definition." + } ] + }, { + "type" : "record", + "name" : "OracleDDL", + "doc" : "Schema holder for oracle data definition language that describes an oracle table.", + "fields" : [ { + "name" : "tableSchema", + "type" : "string", + "doc" : "The native schema in the dataset's platform. This is a human readable (json blob) table schema." + } ] + }, { + "type" : "record", + "name" : "MySqlDDL", + "doc" : "Schema holder for MySql data definition language that describes an MySql table.", + "fields" : [ { + "name" : "tableSchema", + "type" : "string", + "doc" : "The native schema in the dataset's platform. This is a human readable (json blob) table schema." + } ] + }, { + "type" : "record", + "name" : "PrestoDDL", + "doc" : "Schema holder for presto data definition language that describes a presto view.", + "fields" : [ { + "name" : "rawSchema", + "type" : "string", + "doc" : "The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL." + } ] + }, { + "type" : "record", + "name" : "KafkaSchema", + "doc" : "Schema holder for kafka schema.", + "fields" : [ { + "name" : "documentSchema", + "type" : "string", + "doc" : "The native kafka document schema. This is a human readable avro document schema." + } ] + }, { + "type" : "record", + "name" : "BinaryJsonSchema", + "doc" : "Schema text of binary JSON schema.", + "fields" : [ { + "name" : "schema", + "type" : "string", + "doc" : "The native schema text for binary JSON file format." + } ] + }, { + "type" : "record", + "name" : "OrcSchema", + "doc" : "Schema text of an ORC schema.", + "fields" : [ { + "name" : "schema", + "type" : "string", + "doc" : "The native schema for ORC file format." + } ] + }, { + "type" : "record", + "name" : "Schemaless", + "doc" : "The dataset has no specific schema associated with it", + "fields" : [ ] + }, { + "type" : "record", + "name" : "KeyValueSchema", + "doc" : "Schema text of a key-value store schema.", + "fields" : [ { + "name" : "keySchema", + "type" : "string", + "doc" : "The raw schema for the key in the key-value store." + }, { + "name" : "valueSchema", + "type" : "string", + "doc" : "The raw schema for the value in the key-value store." + } ] + }, { + "type" : "record", + "name" : "OtherSchema", + "doc" : "Schema holder for undefined schema types.", + "fields" : [ { + "name" : "rawSchema", + "type" : "string", + "doc" : "The native schema in the dataset's platform." + } ] + } ], + "doc" : "The native schema in the dataset's platform." + }, { + "name" : "fields", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "SchemaField", + "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema", + "fields" : [ { + "name" : "fieldPath", + "type" : "com.linkedin.dataset.SchemaFieldPath", + "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.", + "Searchable" : { + "fieldName" : "fieldPaths", + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "jsonPath", + "type" : "string", + "doc" : "Flattened name of a field in JSON Path notation.", + "optional" : true + }, { + "name" : "nullable", + "type" : "boolean", + "doc" : "Indicates if this field is optional or nullable", + "default" : false + }, { + "name" : "description", + "type" : "string", + "doc" : "Description", + "optional" : true, + "Searchable" : { + "boostScore" : 0.1, + "fieldName" : "fieldDescriptions", + "fieldType" : "TEXT" + } + }, { + "name" : "type", + "type" : { + "type" : "record", + "name" : "SchemaFieldDataType", + "doc" : "Schema field data types", + "fields" : [ { + "name" : "type", + "type" : [ { + "type" : "record", + "name" : "BooleanType", + "doc" : "Boolean field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "FixedType", + "doc" : "Fixed field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "StringType", + "doc" : "String field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "BytesType", + "doc" : "Bytes field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "NumberType", + "doc" : "Number data type: long, integer, short, etc..", + "fields" : [ ] + }, { + "type" : "record", + "name" : "DateType", + "doc" : "Date field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "TimeType", + "doc" : "Time field type. This should also be used for datetimes.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "EnumType", + "doc" : "Enum field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "NullType", + "doc" : "Null field type.", + "fields" : [ ] + }, { + "type" : "record", + "name" : "MapType", + "doc" : "Map field type.", + "fields" : [ { + "name" : "keyType", + "type" : "string", + "doc" : "Key type in a map", + "optional" : true + }, { + "name" : "valueType", + "type" : "string", + "doc" : "Type of the value in a map", + "optional" : true + } ] + }, { + "type" : "record", + "name" : "ArrayType", + "doc" : "Array field type.", + "fields" : [ { + "name" : "nestedType", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "List of types this array holds.", + "optional" : true + } ] + }, { + "type" : "record", + "name" : "UnionType", + "doc" : "Union field type.", + "fields" : [ { + "name" : "nestedTypes", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "List of types in union type.", + "optional" : true + } ] + }, { + "type" : "record", + "name" : "RecordType", + "doc" : "Record field type.", + "fields" : [ ] + } ], + "doc" : "Data platform specific types" + } ] + }, + "doc" : "Platform independent field type of the field." + }, { + "name" : "nativeDataType", + "type" : "string", + "doc" : "The native type of the field in the dataset's platform as declared by platform schema." + }, { + "name" : "recursive", + "type" : "boolean", + "doc" : "There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.", + "default" : false + }, { + "name" : "globalTags", + "type" : "com.linkedin.common.GlobalTags", + "doc" : "Tags associated with the field", + "optional" : true, + "Searchable" : { + "/tags/*/tag" : { + "boostScore" : 0.5, + "fieldName" : "fieldTags", + "fieldType" : "URN_PARTIAL" + } + } + }, { + "name" : "glossaryTerms", + "type" : "com.linkedin.common.GlossaryTerms", + "doc" : "Glossary terms associated with the field", + "optional" : true + } ] + } + }, + "doc" : "Client provided a list of fields from document schema." + }, { + "name" : "primaryKeys", + "type" : { + "type" : "array", + "items" : "com.linkedin.dataset.SchemaFieldPath" + }, + "doc" : "Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.", + "optional" : true + }, { + "name" : "foreignKeysSpecs", + "type" : { + "type" : "map", + "values" : { + "type" : "record", + "name" : "ForeignKeySpec", + "doc" : "Description of a foreign key in a schema.", + "fields" : [ { + "name" : "foreignKey", + "type" : [ { + "type" : "record", + "name" : "DatasetFieldForeignKey", + "doc" : "For non-urn based foregin keys.", + "fields" : [ { + "name" : "parentDataset", + "type" : "com.linkedin.common.DatasetUrn", + "doc" : "dataset that stores the resource." + }, { + "name" : "currentFieldPaths", + "type" : { + "type" : "array", + "items" : "com.linkedin.dataset.SchemaFieldPath" + }, + "doc" : "List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset." + }, { + "name" : "parentField", + "type" : "com.linkedin.dataset.SchemaFieldPath", + "doc" : "SchemaField@fieldPath that uniquely identify field in parent dataset that this field references." + } ] + }, { + "type" : "record", + "name" : "UrnForeignKey", + "doc" : "If SchemaMetadata fields make any external references and references are of type com.linkedin.common.Urn or any children, this models can be used to mark it.", + "fields" : [ { + "name" : "currentFieldPath", + "type" : "com.linkedin.dataset.SchemaFieldPath", + "doc" : "Field in hosting(current) SchemaMetadata." + } ] + } ], + "doc" : "Foreign key definition in metadata schema." + } ] + } + }, + "doc" : "Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.", + "optional" : true + } ], + "Aspect" : { + "name" : "schemaMetadata" + } + }, { + "type" : "record", + "name" : "EditableSchemaMetadata", + "namespace" : "com.linkedin.schema", + "doc" : "EditableSchemaMetadata stores editable changes made to schema metadata. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines.", + "include" : [ "com.linkedin.common.ChangeAuditStamps" ], + "fields" : [ { + "name" : "editableSchemaFieldInfo", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "EditableSchemaFieldInfo", + "doc" : "SchemaField to describe metadata related to dataset schema.", + "fields" : [ { + "name" : "fieldPath", + "type" : "string", + "doc" : "FieldPath uniquely identifying the SchemaField this metadata is associated with" + }, { + "name" : "description", + "type" : "string", + "doc" : "Description", + "optional" : true, + "Searchable" : { + "boostScore" : 0.1, + "fieldName" : "editedFieldDescriptions", + "fieldType" : "TEXT" + } + }, { + "name" : "globalTags", + "type" : "com.linkedin.common.GlobalTags", + "doc" : "Tags associated with the field", + "optional" : true, + "Searchable" : { + "/tags/*/tag" : { + "boostScore" : 0.5, + "fieldName" : "editedFieldTags", + "fieldType" : "URN_PARTIAL" + } + } + } ] + } + }, + "doc" : "Client provided a list of fields from document schema." + } ], + "Aspect" : { + "name" : "editableSchemaMetadata" + } + }, "com.linkedin.common.InstitutionalMemory", "com.linkedin.glossary.GlossaryNodeInfo", { + "type" : "record", + "name" : "GlossaryNodeKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a GlossaryNode", + "fields" : [ { + "name" : "name", + "type" : "string", + "Searchable" : { + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "glossaryNodeKey" + } + }, "com.linkedin.glossary.GlossaryTermInfo", { + "type" : "record", + "name" : "GlossaryTermKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a GlossaryTerm", + "fields" : [ { + "name" : "name", + "type" : "string", + "Searchable" : { + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "glossaryTermKey" + } + }, { + "type" : "record", + "name" : "MLFeatureKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for an MLFeature", + "fields" : [ { + "name" : "featureNamespace", + "type" : "string", + "doc" : "Namespace for the feature", + "Searchable" : { + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "name", + "type" : "string", + "doc" : "Name of the feature", + "Searchable" : { + "boostScore" : 8.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "mlFeatureKey" + } + }, { + "type" : "record", + "name" : "MLFeatureProperties", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Properties associated with a MLFeature", + "fields" : [ { + "name" : "description", + "type" : "string", + "doc" : "Documentation of the MLFeature", + "optional" : true + }, { + "name" : "dataType", + "type" : "com.linkedin.common.MLFeatureDataType", + "doc" : "Data Type of the MLFeature", + "optional" : true + }, { + "name" : "version", + "type" : "com.linkedin.common.VersionTag", + "doc" : "Version of the MLFeature", + "optional" : true + }, { + "name" : "sources", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "Source of the MLFeature", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "name" : "DerivedFrom" + } + } + } ], + "Aspect" : { + "name" : "mlFeatureProperties" + } + }, { + "type" : "record", + "name" : "MLModelKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for an ML model", + "fields" : [ { + "name" : "platform", + "type" : "com.linkedin.common.Urn", + "doc" : "Standardized platform urn for the model", + "Searchable" : { + "addToFilters" : true, + "fieldType" : "URN" + } + }, { + "name" : "name", + "type" : "string", + "doc" : "Name of the MLModel", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "origin", + "type" : "com.linkedin.common.FabricType", + "doc" : "Fabric type where model belongs to or where it was generated", + "Searchable" : { + "fieldType" : "TEXT_PARTIAL", + "queryByDefault" : false + } + } ], + "Aspect" : { + "name" : "mlModelKey" + } + }, { + "type" : "record", + "name" : "CaveatsAndRecommendations", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?", + "fields" : [ { + "name" : "caveats", + "type" : { + "type" : "record", + "name" : "CaveatDetails", + "doc" : "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?", + "fields" : [ { + "name" : "needsFurtherTesting", + "type" : "boolean", + "doc" : "Did the results suggest any further testing?", + "optional" : true + }, { + "name" : "caveatDescription", + "type" : "string", + "doc" : "Caveat Description\nFor ex: Given gender classes are binary (male/not male), which we include as male/female. Further work needed to evaluate across a spectrum of genders.", + "optional" : true + }, { + "name" : "groupsNotRepresented", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Relevant groups that were not represented in the evaluation dataset?", + "optional" : true + } ] + }, + "doc" : "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset?", + "optional" : true + }, { + "name" : "recommendations", + "type" : "string", + "doc" : "Recommendations on where this MLModel should be used.", + "optional" : true + }, { + "name" : "idealDatasetCharacteristics", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Ideal characteristics of an evaluation dataset for this MLModel", + "optional" : true + } ], + "Aspect" : { + "name" : "mlModelCaveatsAndRecommendations" + } + }, { + "type" : "record", + "name" : "EthicalConsiderations", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "This section is intended to demonstrate the ethical considerations that went into MLModel development, surfacing ethical challenges and solutions to stakeholders.", + "fields" : [ { + "name" : "data", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Does the MLModel use any sensitive data (e.g., protected classes)?", + "optional" : true + }, { + "name" : "humanLife", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : " Is the MLModel intended to inform decisions about matters central to human life or flourishing – e.g., health or safety? Or could it be used in such a way?", + "optional" : true + }, { + "name" : "mitigations", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "What risk mitigation strategies were used during MLModel development?", + "optional" : true + }, { + "name" : "risksAndHarms", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "What risks may be present in MLModel usage? Try to identify the potential recipients, likelihood, and magnitude of harms. If these cannot be determined, note that they were considered but remain unknown.", + "optional" : true + }, { + "name" : "useCases", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Are there any known MLModel use cases that are especially fraught? This may connect directly to the intended use section", + "optional" : true + } ], + "Aspect" : { + "name" : "mlModelEthicalConsiderations" + } + }, { + "type" : "record", + "name" : "EvaluationData", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "All referenced datasets would ideally point to any set of documents that provide visibility into the source and composition of the dataset.", + "fields" : [ { + "name" : "evaluationData", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "BaseData", + "doc" : "BaseData record", + "fields" : [ { + "name" : "dataset", + "type" : "com.linkedin.common.DatasetUrn", + "doc" : "What dataset were used in the MLModel?" + }, { + "name" : "motivation", + "type" : "string", + "doc" : "Why was this dataset chosen?", + "optional" : true + }, { + "name" : "preProcessing", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "How was the data preprocessed (e.g., tokenization of sentences, cropping of images, any filtering such as dropping images without faces)?", + "optional" : true + } ] + } + }, + "doc" : "Details on the dataset(s) used for the quantitative analyses in the MLModel" + } ], + "Aspect" : { + "name" : "mlModelEvaluationData" + } + }, { + "type" : "record", + "name" : "IntendedUse", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Intended Use for the ML Model", + "fields" : [ { + "name" : "primaryUses", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Primary Use cases for the MLModel.", + "optional" : true + }, { + "name" : "primaryUsers", + "type" : { + "type" : "array", + "items" : { + "type" : "enum", + "name" : "IntendedUserType", + "symbols" : [ "ENTERPRISE", "HOBBY", "ENTERTAINMENT" ] + } + }, + "doc" : "Primary Intended Users - For example, was the MLModel developed for entertainment purposes, for hobbyists, or enterprise solutions?", + "optional" : true + }, { + "name" : "outOfScopeUses", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Highlight technology that the MLModel might easily be confused with, or related contexts that users could try to apply the MLModel to.", + "optional" : true + } ], + "Aspect" : { + "name" : "intendedUse" + } + }, { + "type" : "record", + "name" : "Metrics", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Metrics to be featured for the MLModel.", + "fields" : [ { + "name" : "performanceMeasures", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Measures of MLModel performance", + "optional" : true + }, { + "name" : "decisionThreshold", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Decision Thresholds used (if any)?", + "optional" : true + } ], + "Aspect" : { + "name" : "mlModelMetrics" + } + }, { + "type" : "record", + "name" : "MLModelFactorPrompts", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Prompts which affect the performance of the MLModel", + "fields" : [ { + "name" : "relevantFactors", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "MLModelFactors", + "doc" : "Factors affecting the performance of the MLModel.", + "fields" : [ { + "name" : "groups", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Groups refers to distinct categories with similar characteristics that are present in the evaluation data instances.\nFor human-centric machine learning MLModels, groups are people who share one or multiple characteristics.", + "optional" : true + }, { + "name" : "instrumentation", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "The performance of a MLModel can vary depending on what instruments were used to capture the input to the MLModel.\nFor example, a face detection model may perform differently depending on the camera’s hardware and software,\nincluding lens, image stabilization, high dynamic range techniques, and background blurring for portrait mode.", + "optional" : true + }, { + "name" : "environment", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "A further factor affecting MLModel performance is the environment in which it is deployed.", + "optional" : true + } ] + } + }, + "doc" : "What are foreseeable salient factors for which MLModel performance may vary, and how were these determined?", + "optional" : true + }, { + "name" : "evaluationFactors", + "type" : { + "type" : "array", + "items" : "MLModelFactors" + }, + "doc" : "Which factors are being reported, and why were these chosen?", + "optional" : true + } ], + "Aspect" : { + "name" : "mlModelFactorPrompts" + } + }, { + "type" : "record", + "name" : "MLModelProperties", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Properties associated with a ML Model", + "fields" : [ { + "name" : "description", + "type" : "string", + "doc" : "Documentation of the MLModel", + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } + }, { + "name" : "date", + "type" : "com.linkedin.common.Time", + "doc" : "Date when the MLModel was developed", + "optional" : true + }, { + "name" : "version", + "type" : "com.linkedin.common.VersionTag", + "doc" : "Version of the MLModel", + "optional" : true + }, { + "name" : "type", + "type" : "string", + "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT_PARTIAL" + } + }, { + "name" : "hyperParameters", + "type" : { + "type" : "map", + "values" : { + "type" : "typeref", + "name" : "HyperParameterValueType", + "doc" : "A union of all supported metadata aspects for HyperParameter Value", + "ref" : [ "string", "int", "float", "double", "boolean" ] + } + }, + "doc" : "Hyper Parameters of the MLModel", + "optional" : true + }, { + "name" : "mlFeatures", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.MLFeatureUrn" + }, + "doc" : "List of features used for MLModel training", + "optional" : true + }, { + "name" : "tags", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Tags for the MLModel", + "default" : [ ] + } ], + "Aspect" : { + "name" : "mlModelProperties" + } + }, { + "type" : "record", + "name" : "QuantitativeAnalyses", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Quantitative analyses should be disaggregated, that is, broken down by the chosen factors. Quantitative analyses should provide the results of evaluating the MLModel according to the chosen metrics, providing confidence interval values when possible.", + "fields" : [ { + "name" : "unitaryResults", + "type" : { + "type" : "typeref", + "name" : "ResultsType", + "doc" : "A union of all supported metadata aspects for ResultsType", + "ref" : [ "string" ] + }, + "doc" : "Link to a dashboard with results showing how the MLModel performed with respect to each factor", + "optional" : true + }, { + "name" : "intersectionalResults", + "type" : "ResultsType", + "doc" : "Link to a dashboard with results showing how the MLModel performed with respect to the intersection of evaluated factors?", + "optional" : true + } ], + "Aspect" : { + "name" : "mlModelQuantitativeAnalyses" + } + }, { + "type" : "record", + "name" : "TrainingData", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Ideally, the MLModel card would contain as much information about the training data as the evaluation data. However, there might be cases where it is not feasible to provide this level of detailed information about the training data. For example, the data may be proprietary, or require a non-disclosure agreement. In these cases, we advocate for basic details about the distributions over groups in the data, as well as any other details that could inform stakeholders on the kinds of biases the model may have encoded.", + "fields" : [ { + "name" : "trainingData", + "type" : { + "type" : "array", + "items" : "BaseData" + }, + "doc" : "Details on the dataset(s) used for training the MLModel" + } ], + "Aspect" : { + "name" : "mlModelTrainingData" + } + }, { + "type" : "record", + "name" : "SourceCode", + "namespace" : "com.linkedin.ml.metadata", + "doc" : "Source Code", + "fields" : [ { + "name" : "sourceCode", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "SourceCodeUrl", + "doc" : "Source Code Url Entity", + "fields" : [ { + "name" : "type", + "type" : { + "type" : "enum", + "name" : "SourceCodeUrlType", + "symbols" : [ "ML_MODEL_SOURCE_CODE", "TRAINING_PIPELINE_SOURCE_CODE", "EVALUATION_PIPELINE_SOURCE_CODE" ] + }, + "doc" : "Source Code Url Types" + }, { + "name" : "sourceCodeUrl", + "type" : "com.linkedin.common.Url", + "doc" : "Source Code Url" + } ] + } + }, + "doc" : "Source Code along with types" + } ], + "Aspect" : { + "name" : "sourceCode" + } + }, { + "type" : "record", + "name" : "TagKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a Tag", + "fields" : [ { + "name" : "name", + "type" : "string", + "doc" : "The unique tag name", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } + } ], + "Aspect" : { + "name" : "tagKey" + } + }, { + "type" : "record", + "name" : "TagProperties", + "namespace" : "com.linkedin.tag", + "doc" : "Properties associated with a Tag", + "fields" : [ { + "name" : "name", + "type" : "string", + "doc" : "Name of the tag" + }, { + "name" : "description", + "type" : "string", + "doc" : "Documentation of the tag", + "optional" : true + } ], + "Aspect" : { + "name" : "tagProperties" + } + }, "com.linkedin.common.Ownership", "com.linkedin.common.Status", "com.linkedin.common.GlobalTags", "com.linkedin.common.BrowsePaths" ] + }, { + "type" : "record", + "name" : "VersionedAspect", + "namespace" : "com.linkedin.metadata.aspect", + "fields" : [ { + "name" : "aspect", + "type" : "Aspect" + }, { + "name" : "version", + "type" : "long" + } ] + }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], + "schema" : { + "name" : "aspects", + "namespace" : "com.linkedin.entity", + "path" : "/aspects", + "schema" : "com.linkedin.metadata.aspect.VersionedAspect", + "doc" : "Single unified resource for fetching, updating, searching, & browsing DataHub entities\n\ngenerated from: com.linkedin.metadata.resources.entity.AspectResource", + "collection" : { + "identifier" : { + "name" : "aspectsId", + "type" : "string" + }, + "supports" : [ "get" ], + "methods" : [ { + "method" : "get", + "doc" : "Retrieves the value for an entity that is made up of latest versions of specified aspects.", + "parameters" : [ { + "name" : "aspect", + "type" : "string", + "optional" : true + }, { + "name" : "version", + "type" : "long", + "optional" : true + } ] + } ], + "entity" : { + "path" : "/aspects/{aspectsId}" + } + } + } +} diff --git a/gms/api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/gms/api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 807110e180970..1aa5a7a434272 100644 --- a/gms/api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/gms/api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -158,7 +158,13 @@ "items" : "ChartDataSourceType" }, "doc" : "Data sources for the chart", - "optional" : true + "optional" : true, + "Relationship" : { + "/*/string" : { + "entityTypes" : [ "dataset" ], + "name" : "Consumes" + } + } }, { "name" : "type", "type" : { diff --git a/gms/client/src/main/java/com/linkedin/entity/client/AspectClient.java b/gms/client/src/main/java/com/linkedin/entity/client/AspectClient.java new file mode 100644 index 0000000000000..a1c66aec96a58 --- /dev/null +++ b/gms/client/src/main/java/com/linkedin/entity/client/AspectClient.java @@ -0,0 +1,41 @@ +package com.linkedin.entity.client; + +import com.linkedin.entity.AspectsGetRequestBuilder; +import com.linkedin.entity.AspectsRequestBuilders; +import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.restli.client.Client; +import javax.annotation.Nonnull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class AspectClient { + + private static final AspectsRequestBuilders ASPECTS_REQUEST_BUILDERS = new AspectsRequestBuilders(); + + private final Client _client; + private final Logger _logger = LoggerFactory.getLogger("AspectClient"); + + public AspectClient(@Nonnull final Client restliClient) { + _client = restliClient; + } + + /** + * Gets aspect at veresion for an entity + * + * @param urn urn for the entity + * @return list of paths given urn + * @throws RemoteInvocationException + */ + @Nonnull + public VersionedAspect getAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version) + throws RemoteInvocationException { + + AspectsGetRequestBuilder requestBuilder = + ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version); + + return _client.sendRequest(requestBuilder.build()).getResponse().getEntity(); + + } +} diff --git a/gms/impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/gms/impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java new file mode 100644 index 0000000000000..207fbc0c63a33 --- /dev/null +++ b/gms/impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -0,0 +1,54 @@ +package com.linkedin.metadata.resources.entity; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.restli.RestliUtils; +import com.linkedin.parseq.Task; +import com.linkedin.restli.server.annotations.Optional; +import com.linkedin.restli.server.annotations.QueryParam; +import com.linkedin.restli.server.annotations.RestLiCollection; +import com.linkedin.restli.server.annotations.RestMethod; +import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; +import java.net.URISyntaxException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.inject.Inject; +import javax.inject.Named; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Single unified resource for fetching, updating, searching, & browsing DataHub entities + */ +@RestLiCollection(name = "aspects", namespace = "com.linkedin.entity") +public class AspectResource extends CollectionResourceTaskTemplate { + + private final Logger _logger = LoggerFactory.getLogger("EntityResource"); + + @Inject + @Named("entityService") + private EntityService _entityService; + + /** + * Retrieves the value for an entity that is made up of latest versions of specified aspects. + */ + @RestMethod.Get + @Nonnull + public Task get( + @Nonnull String urnStr, + @QueryParam("aspect") @Optional @Nullable String aspectName, + @QueryParam("version") @Optional @Nullable Long version + ) throws URISyntaxException { + _logger.info("GET ASPECT urn: {} aspect: {} version: {}", urnStr, aspectName, version); + final Urn urn = Urn.createFromString(urnStr); + return RestliUtils.toTask(() -> { + final VersionedAspect aspect = _entityService.getVersionedAspect(urn, aspectName, version); + if (aspect == null) { + throw RestliUtils.resourceNotFoundException(); + } + return aspect; + }); + } + +} diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index 499f2798c0b07..4b025a38036e5 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -208,7 +208,7 @@ }, "fields": [ { - "fieldPath": "field_foo", + "fieldPath": "field_foo_2", "jsonPath": null, "nullable": false, "description": { @@ -1118,13 +1118,7 @@ "deleted": null }, "chartUrl": null, - "inputs": { - "array": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)" - } - ] - }, + "inputs": ["urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"], "type": null, "access": null, "lastRefreshed": null diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java index 7646c62602431..b73d04fe4573d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -8,6 +8,7 @@ import com.linkedin.data.template.UnionTemplate; import com.linkedin.entity.Entity; import com.linkedin.metadata.PegasusUtils; +import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.dao.exception.ModelConversionException; import com.linkedin.metadata.dao.utils.RecordUtils; import com.linkedin.metadata.event.EntityEventProducer; @@ -103,6 +104,11 @@ public abstract RecordTemplate getAspect( @Nonnull final String aspectName, long version); + public abstract VersionedAspect getVersionedAspect( + @Nonnull final Urn urn, + @Nonnull final String aspectName, + long version); + /** * Retrieves a list of all persisted aspects with a specific name, sorted by corresponding urn. * diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index c000aa4616fc9..a327c3e846246 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -179,6 +179,21 @@ protected EbeanAspectV2 getLatestAspect(@Nonnull final String urn, @Nonnull fina return _server.find(EbeanAspectV2.class, key); } + @Nullable + public long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) { + validateConnection(); + List result = _server.find(EbeanAspectV2.class) + .where() + .eq("urn", urn).eq("aspect", aspectName) + .orderBy() + .desc("version") + .findList(); + if (result.size() == 0) { + return -1; + } + return result.get(0).getKey().getVersion(); + } + @Nullable public EbeanAspectV2 getAspect(@Nonnull final String urn, @Nonnull final String aspectName, final long version) { validateConnection(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java index ce7e246148140..d55af94ce91f4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java @@ -5,6 +5,8 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.template.DataTemplateUtil; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.Aspect; +import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.dao.utils.RecordUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ListResult; @@ -92,6 +94,9 @@ public Map> getLatestAspects(@Nonnull final Set u @Override @Nullable public RecordTemplate getAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version) { + if (version < 0) { + version = _entityDao.getMaxVersion(urn.toString(), aspectName) - version + 1; + } final EbeanAspectV2.PrimaryKey primaryKey = new EbeanAspectV2.PrimaryKey(urn.toString(), aspectName, version); final Optional maybeAspect = Optional.ofNullable(_entityDao.getAspect(primaryKey)); return maybeAspect @@ -99,6 +104,37 @@ public RecordTemplate getAspect(@Nonnull final Urn urn, @Nonnull final String as .orElse(null); } + @Override + public VersionedAspect getVersionedAspect(@Nonnull Urn urn, @Nonnull String aspectName, long version) { + VersionedAspect result = new VersionedAspect(); + + if (version < 0) { + version = _entityDao.getMaxVersion(urn.toString(), aspectName) + version + 1; + } + + final EbeanAspectV2.PrimaryKey primaryKey = new EbeanAspectV2.PrimaryKey(urn.toString(), aspectName, version); + final Optional maybeAspect = Optional.ofNullable(_entityDao.getAspect(primaryKey)); + RecordTemplate aspect = maybeAspect + .map(ebeanAspect -> toAspectRecord(urn, aspectName, ebeanAspect.getMetadata())) + .orElse(null); + + if (aspect == null) { + return null; + } + + Aspect resultAspect = new Aspect(); + + RecordUtils.setSelectedRecordTemplateInUnion( + resultAspect, + aspect + ); +; + result.setAspect(resultAspect); + result.setVersion(version); + + return result; + } + @Override @Nonnull public ListResult listLatestAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/extractor/SnapshotToAspectMap.java b/metadata-io/src/main/java/com/linkedin/metadata/extractor/SnapshotToAspectMap.java new file mode 100644 index 0000000000000..1ab68be4ca7e8 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/extractor/SnapshotToAspectMap.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.extractor; + +import com.linkedin.data.element.DataElement; +import com.linkedin.data.it.IterationOrder; +import com.linkedin.data.it.ObjectIterator; +import com.linkedin.data.schema.PathSpec; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.PegasusUtils; +import com.linkedin.metadata.models.FieldSpec; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}. + */ +public class SnapshotToAspectMap { + private SnapshotToAspectMap() { + } + + /** + * Function to extract the fields that match the input fieldSpecs + */ + public static Map extractAspectMap(RecordTemplate snapshot) { + + final ObjectIterator iterator = new ObjectIterator(snapshot.data(), snapshot.schema(), IterationOrder.PRE_ORDER); + final Map aspectsByName = new HashMap<>(); + + for (DataElement dataElement = iterator.next(); dataElement != null; dataElement = iterator.next()) { + final PathSpec pathSpec = dataElement.getSchemaPathSpec(); + List pathComponents = pathSpec.getPathComponents(); + // three components representing /aspect/*/ + if (pathComponents.size() != 3) { + continue; + } + String aspectName = PegasusUtils.getAspectNameFromFullyQualifiedName(pathComponents.get(2)); + aspectsByName.put(aspectName, dataElement); + } + + return aspectsByName; + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 6735332339280..76ecd1e2f1d3d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -9,6 +9,8 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.identity.CorpUserInfo; import com.linkedin.metadata.PegasusUtils; +import com.linkedin.metadata.aspect.Aspect; +import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.aspect.CorpUserAspect; import com.linkedin.metadata.aspect.CorpUserAspectArray; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; @@ -242,6 +244,40 @@ public void testUpdateGetAspect() throws Exception { verifyNoMoreInteractions(_mockProducer); } + @Test + public void testGetAspectAtVersion() throws Exception { + // Test Writing a CorpUser Entity + Urn entityUrn = Urn.createFromString("urn:li:corpuser:test"); + + String aspectName = PegasusUtils.getAspectNameFromSchema(new CorpUserInfo().schema()); + + // Ingest CorpUserInfo Aspect #1 + CorpUserInfo writeAspect = createCorpUserInfo("email@test.com"); + + // Validate retrieval of CorpUserInfo Aspect #1 + _entityService.updateAspect(entityUrn, aspectName, writeAspect, TEST_AUDIT_STAMP, 1, true); + + + VersionedAspect writtenVersionedAspect = new VersionedAspect(); + writtenVersionedAspect.setAspect(Aspect.create(writeAspect)); + writtenVersionedAspect.setVersion(1); + + VersionedAspect readAspect1 = _entityService.getVersionedAspect(entityUrn, aspectName, 1); + assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect, readAspect1)); + verify(_mockProducer, times(1)).produceMetadataAuditEvent( + Mockito.eq(entityUrn), + Mockito.eq(null), + Mockito.any()); + + VersionedAspect readAspect2 = _entityService.getVersionedAspect(entityUrn, aspectName, -1); + assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect, readAspect2)); + + VersionedAspect readAspectVersion0 = _entityService.getVersionedAspect(entityUrn, aspectName, 0); + assertFalse(DataTemplateUtil.areEqual(writtenVersionedAspect, readAspectVersion0)); + + verifyNoMoreInteractions(_mockProducer); + } + @Nonnull private com.linkedin.entity.Entity createCorpUserEntity(Urn entityUrn, String email) throws Exception { CorpuserUrn corpuserUrn = CorpuserUrn.createFromUrn(entityUrn); diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/Aspect.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/Aspect.pdl new file mode 100644 index 0000000000000..43c6ae05e9ad3 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/Aspect.pdl @@ -0,0 +1,98 @@ +namespace com.linkedin.metadata.aspect + +import com.linkedin.metadata.key.ChartKey +import com.linkedin.chart.ChartInfo +import com.linkedin.chart.ChartQuery +import com.linkedin.common.Ownership +import com.linkedin.common.Status +import com.linkedin.common.GlobalTags +import com.linkedin.common.BrowsePaths +import com.linkedin.metadata.key.CorpGroupKey +import com.linkedin.identity.CorpGroupInfo +import com.linkedin.metadata.key.CorpUserKey +import com.linkedin.identity.CorpUserEditableInfo +import com.linkedin.identity.CorpUserInfo +import com.linkedin.metadata.key.DashboardKey +import com.linkedin.dashboard.DashboardInfo +import com.linkedin.metadata.key.DataFlowKey +import com.linkedin.datajob.DataFlowInfo +import com.linkedin.metadata.key.DataJobKey +import com.linkedin.datajob.DataJobInfo +import com.linkedin.datajob.DataJobInputOutput +import com.linkedin.metadata.key.DatasetKey +import com.linkedin.dataset.DatasetDeprecation +import com.linkedin.dataset.DatasetProperties +import com.linkedin.dataset.DatasetUpstreamLineage +import com.linkedin.dataset.UpstreamLineage +import com.linkedin.schema.SchemaMetadata +import com.linkedin.schema.EditableSchemaMetadata +import com.linkedin.common.InstitutionalMemory +import com.linkedin.glossary.GlossaryNodeInfo +import com.linkedin.metadata.key.GlossaryNodeKey +import com.linkedin.glossary.GlossaryTermInfo +import com.linkedin.metadata.key.GlossaryTermKey +import com.linkedin.metadata.key.MLFeatureKey +import com.linkedin.ml.metadata.MLFeatureProperties +import com.linkedin.metadata.key.MLModelKey +import com.linkedin.ml.metadata.CaveatsAndRecommendations +import com.linkedin.ml.metadata.EthicalConsiderations +import com.linkedin.ml.metadata.EvaluationData +import com.linkedin.ml.metadata.IntendedUse +import com.linkedin.ml.metadata.Metrics +import com.linkedin.ml.metadata.MLModelFactorPrompts +import com.linkedin.ml.metadata.MLModelProperties +import com.linkedin.ml.metadata.QuantitativeAnalyses +import com.linkedin.ml.metadata.TrainingData +import com.linkedin.ml.metadata.SourceCode +import com.linkedin.metadata.key.TagKey +import com.linkedin.tag.TagProperties + +/** + * A union of all supported metadata aspects for a Chart + */ +typeref Aspect = union[ + ChartKey, + ChartInfo, + ChartQuery, + CorpGroupKey, + CorpGroupInfo, + CorpUserKey, + CorpUserEditableInfo, + CorpUserInfo, + DashboardKey, + DashboardInfo, + DataFlowKey, + DataFlowInfo, + DataJobKey, + DataJobInfo, + DataJobInputOutput, + DatasetDeprecation, + DatasetProperties, + UpstreamLineage, + SchemaMetadata, + EditableSchemaMetadata, + InstitutionalMemory, + GlossaryNodeInfo, + GlossaryNodeKey, + GlossaryTermInfo, + GlossaryTermKey, + MLFeatureKey, + MLFeatureProperties, + MLModelKey, + CaveatsAndRecommendations, + EthicalConsiderations, + EvaluationData, + IntendedUse, + Metrics, + MLModelFactorPrompts, + MLModelProperties, + QuantitativeAnalyses, + TrainingData, + SourceCode, + TagKey, + TagProperties, + Ownership, + Status, + GlobalTags, + BrowsePaths, +] diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/VersionedAspect.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/VersionedAspect.pdl new file mode 100644 index 0000000000000..48e16503eb409 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/VersionedAspect.pdl @@ -0,0 +1,8 @@ +namespace com.linkedin.metadata.aspect + +import com.linkedin.common.AuditStamp + +record VersionedAspect { + aspect: Aspect + version: long +}