Skip to content

Commit

Permalink
Merge branch 'master' into add-domain-csv-source
Browse files Browse the repository at this point in the history
  • Loading branch information
pedro93 authored Jul 15, 2022
2 parents 0fc4174 + 4857af5 commit 76e7793
Show file tree
Hide file tree
Showing 197 changed files with 4,306 additions and 1,284 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/docker-ingestion-base.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
name: ingestion base
on:
release:
types: [published, edited]
push:
branches:
- master
Expand Down
42 changes: 42 additions & 0 deletions .github/workflows/docker-ingestion-smoke.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: ingestion smoke
on:
release:
types: [published, edited]
push:
branches:
- master
paths:
- "docker/datahub-ingestion/**"
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:

build-smoke:
name: Build and Push Docker Image to Docker Hub
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Build and Push image
uses: docker/build-push-action@v2
with:
context: ./docker/datahub-ingestion
file: ./docker/datahub-ingestion/smoke.Dockerfile
platforms: linux/amd64,linux/arm64
tags: acryldata/datahub-ingestion-base:smoke
push: true
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ project.ext.externalDependency = [
'parquet': 'org.apache.parquet:parquet-avro:1.12.2',
'picocli': 'info.picocli:picocli:4.5.0',
'playCache': 'com.typesafe.play:play-cache_2.12:2.7.6',
'playEhcache': 'com.typesafe.play:play-ehcache_2.12:2.7.6',
'playWs': 'com.typesafe.play:play-ahc-ws-standalone_2.12:2.0.8',
'playDocs': 'com.typesafe.play:play-docs_2.12:2.7.6',
'playGuice': 'com.typesafe.play:play-guice_2.12:2.7.6',
Expand Down
50 changes: 35 additions & 15 deletions datahub-frontend/app/auth/AuthModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.pac4j.core.context.session.SessionStore;
import org.pac4j.play.LogoutController;
import org.pac4j.play.http.PlayHttpActionAdapter;
import org.pac4j.play.store.PlayCacheSessionStore;
import org.pac4j.play.store.PlayCookieSessionStore;
import org.pac4j.play.store.PlaySessionStore;
import org.pac4j.play.store.ShiroAesDataEncrypter;
Expand All @@ -32,6 +33,7 @@
import auth.sso.SsoConfigs;
import auth.sso.SsoManager;
import controllers.SsoCallbackController;
import play.cache.SyncCacheApi;
import utils.ConfigUtil;

import static auth.AuthUtils.*;
Expand All @@ -51,6 +53,8 @@ public class AuthModule extends AbstractModule {
* We hash this value (SHA1), then take the first 16 bytes as the AES key.
*/
private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key";
private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider";

private final com.typesafe.config.Config _configs;

public AuthModule(final Environment environment, final com.typesafe.config.Config configs) {
Expand All @@ -59,22 +63,38 @@ public AuthModule(final Environment environment, final com.typesafe.config.Confi

@Override
protected void configure() {
PlayCookieSessionStore playCacheCookieStore;
try {
// To generate a valid encryption key from an input value, we first
// hash the input to generate a fixed-length string. Then, we convert
// it to hex and slice the first 16 bytes, because AES key length must strictly
// have a specific length.
final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
final String aesEncryptionKey = aesKeyHash.substring(0, 16);
playCacheCookieStore = new PlayCookieSessionStore(
new ShiroAesDataEncrypter(aesEncryptionKey));
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate Pac4j cookie session store!", e);
/**
* In Pac4J, you are given the option to store the profiles of authenticated users in either
* (i) PlayCacheSessionStore - saves your data in the Play cache or
* (ii) PlayCookieSessionStore saves your data in the Play session cookie
* However there is problem (https://github.com/datahub-project/datahub/issues/4448) observed when storing the Pac4j profile in cookie.
* Whenever the profile returned by Pac4j is greater than 4096 characters, the response will be rejected by the browser.
* Default to PlayCacheCookieStore so that datahub-frontend container remains as a stateless service
*/
String sessionStoreProvider = _configs.getString(PAC4J_SESSIONSTORE_PROVIDER_CONF);

if (sessionStoreProvider.equals("PlayCacheSessionStore")) {
final PlayCacheSessionStore playCacheSessionStore = new PlayCacheSessionStore(getProvider(SyncCacheApi.class));
bind(SessionStore.class).toInstance(playCacheSessionStore);
bind(PlaySessionStore.class).toInstance(playCacheSessionStore);
} else {
PlayCookieSessionStore playCacheCookieStore;
try {
// To generate a valid encryption key from an input value, we first
// hash the input to generate a fixed-length string. Then, we convert
// it to hex and slice the first 16 bytes, because AES key length must strictly
// have a specific length.
final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
final String aesEncryptionKey = aesKeyHash.substring(0, 16);
playCacheCookieStore = new PlayCookieSessionStore(
new ShiroAesDataEncrypter(aesEncryptionKey));
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate Pac4j cookie session store!", e);
}
bind(SessionStore.class).toInstance(playCacheCookieStore);
bind(PlaySessionStore.class).toInstance(playCacheCookieStore);
}
bind(SessionStore.class).toInstance(playCacheCookieStore);
bind(PlaySessionStore.class).toInstance(playCacheCookieStore);

try {
bind(SsoCallbackController.class).toConstructor(SsoCallbackController.class.getConstructor(
Expand Down
5 changes: 5 additions & 0 deletions datahub-frontend/conf/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ play.http.server.akka.max-header-count = ${?DATAHUB_AKKA_MAX_HEADER_COUNT}
play.server.akka.max-header-value-length = 8k
play.server.akka.max-header-value-length = ${?DATAHUB_AKKA_MAX_HEADER_VALUE_LENGTH}

# pac4j configuration
# default to PlayCookieSessionStore to keep datahub-frontend's statelessness
pac4j.sessionStore.provider= "PlayCookieSessionStore"
pac4j.sessionStore.provider= ${?PAC4J_SESSIONSTORE_PROVIDER}

# Database configuration
# ~~~~~
# You can declare as many datasources as you want.
Expand Down
1 change: 1 addition & 0 deletions datahub-frontend/play.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies {
implementation externalDependency.playPac4j
implementation externalDependency.shiroCore
implementation externalDependency.playCache
implementation externalDependency.playEhcache
implementation externalDependency.playWs
implementation externalDependency.playServer
implementation externalDependency.playAkkaHttpServer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,10 +544,6 @@ private void configureContainerResolvers(final RuntimeWiring.Builder builder) {
.type("Container", typeWiring -> typeWiring
.dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
.dataFetcher("entities", new ContainerEntitiesResolver(entityClient))
.dataFetcher("domain", new LoadableTypeResolver<>(domainType, (env) -> {
final Container container = env.getSource();
return container.getDomain() != null ? container.getDomain().getUrn() : null;
}))
.dataFetcher("platform",
new LoadableTypeResolver<>(dataPlatformType,
(env) -> ((Container) env.getSource()).getPlatform().getUrn()))
Expand Down Expand Up @@ -819,13 +815,6 @@ private void configureDatasetResolvers(final RuntimeWiring.Builder builder) {
.type("Dataset", typeWiring -> typeWiring
.dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
.dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService))
.dataFetcher("domain",
new LoadableTypeResolver<>(
domainType,
(env) -> {
final Dataset dataset = env.getSource();
return dataset.getDomain() != null ? dataset.getDomain().getUrn() : null;
}))
.dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType,
(env) -> ((Dataset) env.getSource()).getPlatform().getUrn())
)
Expand Down Expand Up @@ -1000,9 +989,7 @@ private void configureNotebookResolvers(final RuntimeWiring.Builder builder) {
return notebook.getDataPlatformInstance() != null ? notebook.getDataPlatformInstance().getUrn() : null;
})
)
.dataFetcher("domain", new LoadableTypeResolver<>(domainType,
(env) -> ((Notebook) env.getSource()).getDomain().getUrn())
));
);
}

/**
Expand All @@ -1014,14 +1001,6 @@ private void configureDashboardResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService))
.dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType,
(env) -> ((Dashboard) env.getSource()).getPlatform().getUrn()))
.dataFetcher("domain", new LoadableTypeResolver<>(
domainType,
(env) -> {
final Dashboard dashboard = env.getSource();
return dashboard.getDomain() != null ? dashboard.getDomain().getUrn() : null;
}
)
)
.dataFetcher("dataPlatformInstance",
new LoadableTypeResolver<>(dataPlatformInstanceType,
(env) -> {
Expand Down Expand Up @@ -1054,13 +1033,6 @@ private void configureChartResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService))
.dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType,
(env) -> ((Chart) env.getSource()).getPlatform().getUrn()))
.dataFetcher("domain", new LoadableTypeResolver<>(
domainType,
(env) -> {
final Chart chart = env.getSource();
return chart.getDomain() != null ? chart.getDomain().getUrn() : null;
})
)
.dataFetcher("dataPlatformInstance",
new LoadableTypeResolver<>(dataPlatformInstanceType,
(env) -> {
Expand Down Expand Up @@ -1138,13 +1110,6 @@ private void configureDataJobResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService))
.dataFetcher("dataFlow", new LoadableTypeResolver<>(dataFlowType,
(env) -> ((DataJob) env.getSource()).getDataFlow().getUrn()))
.dataFetcher("domain", new LoadableTypeResolver<>(
domainType,
(env) -> {
final DataJob dataJob = env.getSource();
return dataJob.getDomain() != null ? dataJob.getDomain().getUrn() : null;
})
)
.dataFetcher("dataPlatformInstance",
new LoadableTypeResolver<>(dataPlatformInstanceType,
(env) -> {
Expand Down Expand Up @@ -1180,13 +1145,6 @@ private void configureDataFlowResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService))
.dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType,
(env) -> ((DataFlow) env.getSource()).getPlatform().getUrn()))
.dataFetcher("domain", new LoadableTypeResolver<>(
domainType,
(env) -> {
final DataFlow dataFlow = env.getSource();
return dataFlow.getDomain() != null ? dataFlow.getDomain().getUrn() : null;
})
)
.dataFetcher("dataPlatformInstance",
new LoadableTypeResolver<>(dataPlatformInstanceType,
(env) -> {
Expand Down Expand Up @@ -1215,12 +1173,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde
return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null;
})
)
.dataFetcher("domain", new LoadableTypeResolver<>(
domainType,
(env) -> {
final MLFeatureTable entity = env.getSource();
return entity.getDomain() != null ? entity.getDomain().getUrn() : null;
}))
)
.type("MLFeatureTableProperties", typeWiring -> typeWiring
.dataFetcher("mlFeatures",
Expand Down Expand Up @@ -1264,13 +1216,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde
return mlModel.getDataPlatformInstance() != null ? mlModel.getDataPlatformInstance().getUrn() : null;
})
)
.dataFetcher("domain",
new LoadableTypeResolver<>(
domainType,
(env) -> {
final MLModel mlModel = env.getSource();
return mlModel.getDomain() != null ? mlModel.getDomain().getUrn() : null;
}))
)
.type("MLModelProperties", typeWiring -> typeWiring
.dataFetcher("groups", new LoadableTypeBatchResolver<>(mlModelGroupType,
Expand Down Expand Up @@ -1298,13 +1243,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde
return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null;
})
)
.dataFetcher("domain",
new LoadableTypeResolver<>(
domainType,
(env) -> {
final MLModelGroup entity = env.getSource();
return entity.getDomain() != null ? entity.getDomain().getUrn() : null;
}))
)
.type("MLFeature", typeWiring -> typeWiring
.dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
Expand All @@ -1316,13 +1254,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde
return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null;
})
)
.dataFetcher("domain",
new LoadableTypeResolver<>(
domainType,
(env) -> {
final MLFeature entity = env.getSource();
return entity.getDomain() != null ? entity.getDomain().getUrn() : null;
}))
)
.type("MLPrimaryKey", typeWiring -> typeWiring
.dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
Expand All @@ -1334,13 +1265,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde
return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null;
})
)
.dataFetcher("domain",
new LoadableTypeResolver<>(
domainType,
(env) -> {
final MLPrimaryKey entity = env.getSource();
return entity.getDomain() != null ? entity.getDomain().getUrn() : null;
}))
);
}

Expand All @@ -1357,6 +1281,11 @@ private void configureDomainResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)
)
);
builder.type("DomainAssociation", typeWiring -> typeWiring
.dataFetcher("domain",
new LoadableTypeResolver<>(domainType,
(env) -> ((com.linkedin.datahub.graphql.generated.DomainAssociation) env.getSource()).getDomain().getUrn()))
);
}

private void configureAssertionResolvers(final RuntimeWiring.Builder builder) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,23 @@ public CompletableFuture<EntityLineageResult> get(DataFetchingEnvironment enviro
final Integer start = input.getStart(); // Optional!
@Nullable
final Integer count = input.getCount(); // Optional!
@Nullable
final Boolean separateSiblings = input.getSeparateSiblings(); // Optional!

com.linkedin.metadata.graph.LineageDirection resolvedDirection =
com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString());

return CompletableFuture.supplyAsync(() -> {
try {
return mapEntityRelationships(lineageDirection,
_siblingGraphService.getLineage(Urn.createFromString(urn), resolvedDirection, start != null ? start : 0, count != null ? count : 100, 1));
_siblingGraphService.getLineage(
Urn.createFromString(urn),
resolvedDirection,
start != null ? start : 0,
count != null ? count : 100,
1,
separateSiblings != null ? input.getSeparateSiblings() : false
));
} catch (URISyntaxException e) {
log.error("Failed to fetch lineage for {}", urn);
throw new RuntimeException(String.format("Failed to fetch lineage for {}", urn), e);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
package com.linkedin.datahub.graphql.types.aspect;

import com.linkedin.common.urn.Urn;
import com.linkedin.datahub.graphql.generated.Aspect;
import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaMetadataMapper;
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
import com.linkedin.entity.EnvelopedAspect;
import com.linkedin.metadata.Constants;
import javax.annotation.Nonnull;


public class AspectMapper implements ModelMapper<EnvelopedAspect, Aspect> {
public class AspectMapper {

public static final AspectMapper INSTANCE = new AspectMapper();

public static Aspect map(@Nonnull final EnvelopedAspect aspect) {
return INSTANCE.apply(aspect);
public static Aspect map(@Nonnull final EnvelopedAspect aspect, @Nonnull final Urn entityUrn) {
return INSTANCE.apply(aspect, entityUrn);
}

@Override
public Aspect apply(@Nonnull final EnvelopedAspect aspect) {
public Aspect apply(@Nonnull final EnvelopedAspect aspect, @Nonnull final Urn entityUrn) {
if (Constants.SCHEMA_METADATA_ASPECT_NAME.equals(aspect.getName())) {
return SchemaMetadataMapper.map(aspect);
return SchemaMetadataMapper.map(aspect, entityUrn);
}
return null;
}
Expand Down
Loading

0 comments on commit 76e7793

Please sign in to comment.