Skip to content

Commit

Permalink
feat(protobuf): support for custom platform, subtypes, misc improveme…
Browse files Browse the repository at this point in the history
…nts (#5973)
  • Loading branch information
shirshanka authored Sep 19, 2022
1 parent 6f48356 commit 4174669
Show file tree
Hide file tree
Showing 15 changed files with 644 additions and 117 deletions.
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ project.ext.externalDependency = [
'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10',
'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1',
'cacheApi' : 'javax.cache:cache-api:1.1.0',
'commonsCli': 'commons-cli:commons-cli:1.5.0',
'commonsIo': 'commons-io:commons-io:2.4',
'commonsLang': 'commons-lang:commons-lang:2.6',
'commonsCollections': 'commons-collections:commons-collections:3.2.2',
Expand Down Expand Up @@ -230,6 +231,11 @@ subprojects {
}
}
} else {
tasks.withType(JavaExec).configureEach {
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(11)
}
}
tasks.withType(Javadoc).configureEach {
javadocTool = javaToolchains.javadocToolFor {
languageVersion = JavaLanguageVersion.of(11)
Expand Down
21 changes: 16 additions & 5 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ module.exports = {
favicon: "img/favicon.ico",
organizationName: "datahub-project", // Usually your GitHub org/user name.
projectName: "datahub", // Usually your repo name.
stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;600&display=swap"],
stylesheets: [
"https://fonts.googleapis.com/css2?family=Manrope:wght@400;600&display=swap",
],
noIndex: isSaas,
customFields: {
isSaas: isSaas,
Expand Down Expand Up @@ -49,8 +51,12 @@ module.exports = {
title: null,
logo: {
alt: "DataHub Logo",
src: `img/${isSaas ? "acryl" : "datahub"}-logo-color-light-horizontal.svg`,
srcDark: `img/${isSaas ? "acryl" : "datahub"}-logo-color-dark-horizontal.svg`,
src: `img/${
isSaas ? "acryl" : "datahub"
}-logo-color-light-horizontal.svg`,
srcDark: `img/${
isSaas ? "acryl" : "datahub"
}-logo-color-dark-horizontal.svg`,
},
items: [
{
Expand Down Expand Up @@ -205,15 +211,20 @@ module.exports = {
blog: false,
theme: {
customCss: [
isSaas ? require.resolve("./src/styles/acryl.scss") : require.resolve("./src/styles/datahub.scss"),
isSaas
? require.resolve("./src/styles/acryl.scss")
: require.resolve("./src/styles/datahub.scss"),
require.resolve("./src/styles/global.scss"),
],
},
},
],
],
plugins: [
["@docusaurus/plugin-ideal-image", { quality: 100, sizes: [320, 640, 1280, 1440, 1600] }],
[
"@docusaurus/plugin-ideal-image",
{ quality: 100, sizes: [320, 640, 1280, 1440, 1600] },
],
"docusaurus-plugin-sass",
[
"docusaurus-graphql-plugin",
Expand Down
1 change: 1 addition & 0 deletions metadata-integration/java/datahub-client/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ shadowJar {
mergeServiceFiles()
// we relocate namespaces manually, because we want to know exactly which libs we are exposing and why
// we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first
relocate 'org.springframework', 'datahub.shaded.org.springframework'
relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson'
relocate 'net.jcip.annotations', 'datahub.shaded.annotations'
relocate 'javassist', 'datahub.shaded.javassist'
Expand Down
6 changes: 4 additions & 2 deletions metadata-integration/java/datahub-client/scripts/check_jar.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This script checks the shadow jar to ensure that we only have allowed classes being exposed through the jar
jarFiles=$(find build/libs -name "datahub-client*.jar" | grep -v sources | grep -v javadoc)
libName=datahub-client
jarishFile=$(find build/libs -name "${libName}*.jar" -exec ls -1rt "{}" +;)
jarFiles=$(echo "$jarishFile" | grep -v sources | grep -v javadoc | tail -n 1)
for jarFile in ${jarFiles}; do
jar -tvf $jarFile |\
grep -v "datahub/shaded" |\
Expand All @@ -14,7 +16,7 @@ jar -tvf $jarFile |\
grep -v " org/$" |\
grep -v " io/$" |\
grep -v "git.properties" |\
grep -v "org/springframework" |\
#grep -v "org/springframework" |\
grep -v "org/aopalliance" |\
grep -v "javax/" |\
grep -v "io/swagger" |\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ task publishSchema(dependsOn: build) {
javaexec {
executable = javaLauncher.get().getExecutablePath().getAsFile().getAbsolutePath()
classpath = configurations.datahub
main = "datahub.protobuf.App"
args = ["${projectDir}/build/descriptors/main.dsc", file(f).getAbsoluteFile()]
main = "datahub.protobuf.Proto2DataHub"
args = ["--descriptor", "${projectDir}/build/descriptors/main.dsc", "--file", file(f).getAbsoluteFile()]
}
}
}
Expand Down
111 changes: 107 additions & 4 deletions metadata-integration/java/datahub-protobuf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -546,13 +546,115 @@ Add the following to your `pom.xml`.
</dependency>
```

## Example Application
## Example Application (embedded)

An example application is included which works with the `protobuf-gradle-plugin`, see the standalone [example project](../datahub-protobuf-example).
An example application **Proto2DataHub** is included as part of this project.
You can also set up a standalone project that works with the `protobuf-gradle-plugin`, see the standalone [example project](../datahub-protobuf-example) as an example of such a project.

### Usage

Using the example application:
#### Standalone Application: Proto2DataHub

```
shell
java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --help
usage: Proto2DataHub
--datahub_api <arg> [Optional] The API endpoint for DataHub GMS.
(defaults to https://localhost:8080)
--datahub_token <arg> [Optional] The authentication token for
DataHub API access. (defaults to empty)
--datahub_user <arg> [Optional] The datahub user to attribute this
ingestion to. (defaults to ..)
--descriptor <arg> [Required] The generated protobuf descriptor
file. Typically a single .dsc file for the
repo or a .protoc file (1:1 with each src
file)
--directory <arg> [Optional if using --file] The root directory
containing protobuf source files.
--env <arg> [Optional] The environment to attach all
entities to. Typically, DEV, PROD etc.
(defaults to DEV)
--exclude <arg> [Optional] Exclude patterns to avoid
processing all source files, separated by ,.
Typically used with --directory option.
Follows glob patterns: e.g. --exclude
"build/**,generated/**" will exclude all files
in the build and generated directories under
the rootDirectory given by the --directory
option
--file <arg> [Optional if using --directory] The protobuf
source file. Typically a .proto file.
--filename <arg> [Required if using transport file] Filename to
write output to.
--github_org <arg> [Optional] The GitHub organization that this
schema repository belongs to. We will
translate comments in your protoc files like
@datahub-project/data-team to GitHub team urls
like:
https://github.com/orgs/datahub-project/teams/
data-team
--help Print this help message
--platform <arg> [Optional] The data platform to produce
schemas for. e.g. kafka, snowflake, etc.
(defaults to kafka)
--slack_id <arg> [Optional] The Slack team id if your protobuf
files contain comments with references to
channel names. We will translate comments like
#data-eng in your protobuf file to slack urls
like:
https://slack.com/app_redirect?channel=data-en
g&team=T1234 following the documentation at
(https://api.slack.com/reference/deep-linking#
deep-linking-into-your-slack-app__opening-a-ch
annel-by-name-or-id) The easiest way to find
your Slack team id is to open your workspace
in your browser. It should look something
like:
https://app.slack.com/client/TUMKD5EGJ/... In
this case, the team-id is TUMKD5EGJ.
--subtype [Optional] A custom subtype to attach to all
entities produced. e.g. event, schema, topic
etc.(Default is schema)
--transport <arg> [Optional] What transport to use to
communicate with DataHub. Options are: rest
(default), kafka and file.
```

You can run it like a standard java jar application:
```shell

java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/v1/clickstream/ --transport rest
```

or using gradle
```shell
../../../gradlew run --args="--descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/v1/clickstream/ --transport rest"
```

Result:
```
java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/v1/clickstream/ --transport rest
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
✅ Successfully emitted 90 events for 5 files to DataHub REST
```

You can also route results to a file by using the `--transport file --filename events.json` options.

##### Important Flags
Here are a few important flags to use with this command
- --env : Defaults to DEV, you should use PROD once you have ironed out all the issues with running this command.
- --platform: Defaults to Kafka (as most people use protobuf schema repos with Kafka), but you can provide a custom platform name for this e.g. (`schema_repo` or `<company_name>_schemas`). If you use a custom platform, make sure to provision the custom platform on your DataHub instance with a logo etc, to get a native experience.
- --subtype : This gives your entities a more descriptive category than Dataset in the UI. Defaults to schema, but you might find topic, event or message more descriptive.



## Example Application (separate project)

The standalone [example project](../datahub-protobuf-example) shows you how you can create an independent project that uses this as part of a build task.

### Sample Usage:

```shell
export DATAHUB_API=...
Expand All @@ -563,5 +665,6 @@ export DATAHUB_TOKEN=...
# export DATAHUB_GITHUBORG=datahub-project
# export DATAHUB_SLACKID=

# publishSchema task will publish all the protobuf files into DataHub
./gradlew publishSchema
```
```
11 changes: 11 additions & 0 deletions metadata-integration/java/datahub-protobuf/build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
plugins {
id("com.palantir.git-version") apply false
id "application"
}
apply plugin: 'java'
apply plugin: 'jacoco'
Expand All @@ -17,6 +18,13 @@ afterEvaluate {
targetCompatibility = 11
}
}
ext {
javaMainClass = "datahub.protobuf.Proto2DataHub"
}

application {
mainClassName = javaMainClass
}

dependencies {
implementation project(':metadata-models')
Expand All @@ -25,6 +33,7 @@ dependencies {
implementation externalDependency.protobuf
implementation externalDependency.jgrapht
implementation externalDependency.gson
implementation externalDependency.commonsCli

compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
Expand Down Expand Up @@ -198,3 +207,5 @@ nexusStaging {
username = System.getenv("NEXUS_USERNAME")
password = System.getenv("NEXUS_PASSWORD")
}


Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This script checks the shadow jar to ensure that we only have allowed classes being exposed through the jar
jarFiles=$(find build/libs -name "datahub-protobuf*.jar" | grep -v sources | grep -v javadoc)
libName=datahub-protobuf
jarishFile=$(find build/libs -name "${libName}*.jar" -exec ls -1rt "{}" +;)
jarFiles=$(echo "$jarishFile" | grep -v sources | grep -v javadoc | tail -n 1)
for jarFile in ${jarFiles}; do
jar -tvf $jarFile |\
grep -v "datahub/shaded" |\
Expand All @@ -18,7 +20,6 @@ jar -tvf $jarFile |\
grep -v " org/$" |\
grep -v " io/$" |\
grep -v "git.properties" |\
grep -v "org/springframework" |\
grep -v "org/aopalliance" |\
grep -v "javax/" |\
grep -v "io/swagger" |\
Expand Down

This file was deleted.

Loading

0 comments on commit 4174669

Please sign in to comment.