Skip to content

Commit

Permalink
refactor(restli-mce-consumer) (#6744)
Browse files Browse the repository at this point in the history
* fix(security): commons-text in frontend

* refactor(restli): set threads based on cpu cores
feat(mce-consumers): hit local restli endpoint

* testing docker build

* Add retry configuration options for entity client

* Kafka debugging

* fix(kafka-setup): parallelize topic creation

* Adjust docker build

* Docker build updates

* WIP

* fix(lint): metadata-ingestion lint

* fix(gradle-docker): fix docker frontend dep

* fix(elastic): fix race condition between gms and mae for index creation

* Revert "fix(elastic): fix race condition between gms and mae for index creation"

This reverts commit 9629d12.

* fix(test): fix datahub frontend test for clean/test cycle

* fix(test): datahub-frontend missing assets in test

* fix(security): set protobuf lib datahub-upgrade & mce/mae-consumer

* gitingore update

* fix(docker): remove platform on docker base image, set by buildx

* refactor(kafka-producer): update kafka producer tracking/logging

* updates per PR feedback

* Add documentation around mce standalone consumer
Kafka consumer concurrency to follow thread count for restli & sql connection pool

Co-authored-by: leifker <[email protected]>
Co-authored-by: Pedro Silva <[email protected]>
  • Loading branch information
3 people authored Dec 26, 2022
1 parent 6fdbf6b commit ecc01b9
Show file tree
Hide file tree
Showing 73 changed files with 1,401 additions and 473 deletions.
5 changes: 5 additions & 0 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ inputs:
# e.g. linkedin/datahub-gms
description: "List of Docker images to use as base name for tags"
required: true
build-args:
description: "List of build-time variables. Same as docker/build-push-action"
required: false
tags:
# e.g. latest,head,sha12345
description: "List of tags to use for the Docker image"
Expand Down Expand Up @@ -57,6 +60,7 @@ runs:
# TODO this only does single-platform builds in testing?
# leaving it for now since it matches the previous behavior
platforms: linux/amd64
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
load: true
push: false
Expand Down Expand Up @@ -86,6 +90,7 @@ runs:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
push: true

Expand Down
23 changes: 22 additions & 1 deletion .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Pre-build artifacts for docker image
run: |
./gradlew :metadata-service:war:build -x test --parallel
mv ./metadata-service/war/build/libs/war.war .
- name: Build and push
uses: ./.github/actions/docker-custom-build-and-push
with:
Expand Down Expand Up @@ -127,6 +131,10 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Pre-build artifacts for docker image
run: |
./gradlew :metadata-jobs:mae-consumer-job:build -x test --parallel
mv ./metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar .
- name: Build and push
uses: ./.github/actions/docker-custom-build-and-push
with:
Expand Down Expand Up @@ -183,6 +191,10 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Pre-build artifacts for docker image
run: |
./gradlew :metadata-jobs:mce-consumer-job:build -x test --parallel
mv ./metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar .
- name: Build and push
uses: ./.github/actions/docker-custom-build-and-push
with:
Expand Down Expand Up @@ -239,6 +251,10 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Pre-build artifacts for docker image
run: |
./gradlew :datahub-upgrade:build -x test --parallel
mv ./datahub-upgrade/build/libs/datahub-upgrade.jar .
- name: Build and push
uses: ./.github/actions/docker-custom-build-and-push
with:
Expand Down Expand Up @@ -294,6 +310,11 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Pre-build artifacts for docker image
run: |
export USE_SYSTEM_NODE="true"
./gradlew :datahub-frontend:dist -PuseSystemNode=${USE_SYSTEM_NODE} -x test -x yarnTest -x yarnLint --parallel
mv ./datahub-frontend/build/distributions/datahub-frontend-*.zip datahub-frontend.zip
- name: Build and push
uses: ./.github/actions/docker-custom-build-and-push
with:
Expand Down Expand Up @@ -358,7 +379,7 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
publish: ${{ needs.setup.outputs.publish }}
context: ./docker/kafka-setup
context: .
file: ./docker/kafka-setup/Dockerfile
platforms: linux/amd64,linux/arm64

Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ temp/**

# frontend assets
datahub-frontend/public/**
datahub-frontend/test/resources/public/**

.remote*
# Ignore runtime generated authenticatior/authorizer jar files
Expand Down
7 changes: 7 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
buildscript {
ext.junitJupiterVersion = '5.6.1'
// Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md
ext.pegasusVersion = '29.22.16'
ext.mavenVersion = '3.6.3'
ext.springVersion = '5.3.20'
Expand Down Expand Up @@ -32,8 +33,13 @@ buildscript {
plugins {
id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2'
id 'com.github.johnrengelman.shadow' version '6.1.0'
id "com.palantir.docker" version "0.34.0"
// https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/
// TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0"
}

apply from: "gradle/docker/docker.gradle"

project.ext.spec = [
'product' : [
'pegasus' : [
Expand Down Expand Up @@ -199,6 +205,7 @@ allprojects {
apply plugin: 'idea'
apply plugin: 'eclipse'
apply plugin: 'checkstyle'
// apply plugin: 'org.gradlex.java-ecosystem-capabilities'
}

configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) {
Expand Down
7 changes: 6 additions & 1 deletion datahub-frontend/app/auth/AuthModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.linkedin.entity.client.EntityClient;
import com.linkedin.entity.client.RestliEntityClient;
import com.linkedin.metadata.restli.DefaultRestliClientFactory;
import com.linkedin.parseq.retry.backoff.ExponentialBackoff;
import com.linkedin.util.Configuration;
import controllers.SsoCallbackController;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -55,6 +56,8 @@ public class AuthModule extends AbstractModule {
*/
private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key";
private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider";
private static final String ENTITY_CLIENT_RETRY_INTERVAL = "entityClient.retryInterval";
private static final String ENTITY_CLIENT_NUM_RETRIES = "entityClient.numRetries";

private final com.typesafe.config.Config _configs;

Expand Down Expand Up @@ -158,7 +161,9 @@ protected Authentication provideSystemAuthentication() {
@Provides
@Singleton
protected EntityClient provideEntityClient() {
return new RestliEntityClient(buildRestliClient());
return new RestliEntityClient(buildRestliClient(),
new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)),
_configs.getInt(ENTITY_CLIENT_NUM_RETRIES));
}

@Provides
Expand Down
107 changes: 107 additions & 0 deletions datahub-frontend/app/client/KafkaTrackingProducer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package client;

import com.typesafe.config.Config;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.config.SaslConfigs;
import org.apache.kafka.common.config.SslConfigs;
import org.apache.kafka.common.security.auth.SecurityProtocol;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import play.api.inject.ApplicationLifecycle;
import utils.ConfigUtil;

import javax.inject.Inject;

import javax.annotation.Nonnull;
import javax.inject.Singleton;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.CompletableFuture;

@Singleton
public class KafkaTrackingProducer {
private final Logger _logger = LoggerFactory.getLogger(KafkaTrackingProducer.class.getName());
private static final List<String> KAFKA_SSL_PROTOCOLS = Collections.unmodifiableList(
Arrays.asList(SecurityProtocol.SSL.name(), SecurityProtocol.SASL_SSL.name(),
SecurityProtocol.SASL_PLAINTEXT.name()));

private final Boolean _isEnabled;
private final KafkaProducer<String, String> _producer;

@Inject
public KafkaTrackingProducer(@Nonnull Config config, ApplicationLifecycle lifecycle) {
_isEnabled = !config.hasPath("analytics.enabled") || config.getBoolean("analytics.enabled");

if (_isEnabled) {
_logger.debug("Analytics tracking is enabled");
_producer = createKafkaProducer(config);

lifecycle.addStopHook(
() -> {
_producer.flush();
_producer.close();
return CompletableFuture.completedFuture(null);
});
} else {
_logger.debug("Analytics tracking is disabled");
_producer = null;
}
}

public Boolean isEnabled() {
return _isEnabled;
}

public void send(ProducerRecord<String, String> record) {
_producer.send(record);
}

private static KafkaProducer createKafkaProducer(Config config) {
final Properties props = new Properties();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "datahub-frontend");
props.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, config.getString("analytics.kafka.delivery.timeout.ms"));
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("analytics.kafka.bootstrap.server"));
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); // Actor urn.
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); // JSON object.

final String securityProtocolConfig = "analytics.kafka.security.protocol";
if (config.hasPath(securityProtocolConfig)
&& KAFKA_SSL_PROTOCOLS.contains(config.getString(securityProtocolConfig))) {
props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, config.getString(securityProtocolConfig));
setConfig(config, props, SslConfigs.SSL_KEY_PASSWORD_CONFIG, "analytics.kafka.ssl.key.password");

setConfig(config, props, SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, "analytics.kafka.ssl.keystore.type");
setConfig(config, props, SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, "analytics.kafka.ssl.keystore.location");
setConfig(config, props, SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, "analytics.kafka.ssl.keystore.password");

setConfig(config, props, SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, "analytics.kafka.ssl.truststore.type");
setConfig(config, props, SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, "analytics.kafka.ssl.truststore.location");
setConfig(config, props, SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, "analytics.kafka.ssl.truststore.password");

setConfig(config, props, SslConfigs.SSL_PROTOCOL_CONFIG, "analytics.kafka.ssl.protocol");
setConfig(config, props, SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG, "analytics.kafka.ssl.endpoint.identification.algorithm");

final String securityProtocol = config.getString(securityProtocolConfig);
if (securityProtocol.equals(SecurityProtocol.SASL_SSL.name())
|| securityProtocol.equals(SecurityProtocol.SASL_PLAINTEXT.name())) {
setConfig(config, props, SaslConfigs.SASL_MECHANISM, "analytics.kafka.sasl.mechanism");
setConfig(config, props, SaslConfigs.SASL_JAAS_CONFIG, "analytics.kafka.sasl.jaas.config");
setConfig(config, props, SaslConfigs.SASL_KERBEROS_SERVICE_NAME, "analytics.kafka.sasl.kerberos.service.name");
setConfig(config, props, SaslConfigs.SASL_LOGIN_CALLBACK_HANDLER_CLASS, "analytics.kafka.sasl.login.callback.handler.class");
}
}

return new org.apache.kafka.clients.producer.KafkaProducer<String, String>(props);
}

private static void setConfig(Config config, Properties props, String key, String configKey) {
Optional.ofNullable(ConfigUtil.getString(config, configKey, null))
.ifPresent(v -> props.put(key, v));
}
}
19 changes: 15 additions & 4 deletions datahub-frontend/app/controllers/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.util.Pair;
import com.typesafe.config.Config;

import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import play.Environment;
import play.http.HttpEntity;
import play.libs.ws.InMemoryBodyWritable;
Expand Down Expand Up @@ -43,6 +46,7 @@


public class Application extends Controller {
private final Logger _logger = LoggerFactory.getLogger(Application.class.getName());
private final Config _config;
private final StandaloneWSClient _ws;
private final Environment _environment;
Expand All @@ -63,10 +67,17 @@ public Application(Environment environment, @Nonnull Config config) {
*/
@Nonnull
private Result serveAsset(@Nullable String path) {
InputStream indexHtml = _environment.resourceAsStream("public/index.html");
return ok(indexHtml)
.withHeader("Cache-Control", "no-cache")
.as("text/html");
try {
InputStream indexHtml = _environment.resourceAsStream("public/index.html");
return ok(indexHtml)
.withHeader("Cache-Control", "no-cache")
.as("text/html");
} catch (Exception e) {
_logger.warn("Cannot load public/index.html resource. Static assets or assets jar missing?");
return notFound()
.withHeader("Cache-Control", "no-cache")
.as("text/html");
}
}

@Nonnull
Expand Down
Loading

0 comments on commit ecc01b9

Please sign in to comment.