Skip to content

Commit

Permalink
feat(dev): Make repositories configurable for enterprise developers (d…
Browse files Browse the repository at this point in the history
…atahub-project#9230)

Co-authored-by: Hendrik Richert <[email protected]>
Co-authored-by: david-leifker <[email protected]>
  • Loading branch information
3 people authored Nov 28, 2023
1 parent 08fb730 commit 966cb17
Show file tree
Hide file tree
Showing 27 changed files with 409 additions and 52 deletions.
18 changes: 17 additions & 1 deletion datahub-frontend/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ docker {
buildx(true)
load(true)
push(false)

// Add build args if they are defined (needed for some CI or enterprise environments)
def dockerBuildArgs = [:]
if (project.hasProperty('alpineApkRepositoryUrl')) {
dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl')
}
if (project.hasProperty('githubMirrorUrl')) {
dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl')
}
if (project.hasProperty('mavenCentralRepositoryUrl')) {
dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl')
}

if (dockerBuildArgs.size() > 0) {
buildArgs(dockerBuildArgs)
}
}

task unversionZip(type: Copy, dependsOn: [':datahub-web-react:build', dist]) {
Expand All @@ -104,4 +120,4 @@ task cleanLocalDockerImages {
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}")
}
}
dockerClean.finalizedBy(cleanLocalDockerImages)
dockerClean.finalizedBy(cleanLocalDockerImages)
16 changes: 16 additions & 0 deletions datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,22 @@ docker {
buildx(true)
load(true)
push(false)

// Add build args if they are defined (needed for some CI or enterprise environments)
def dockerBuildArgs = [:]
if (project.hasProperty('alpineApkRepositoryUrl')) {
dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl')
}
if (project.hasProperty('githubMirrorUrl')) {
dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl')
}
if (project.hasProperty('mavenCentralRepositoryUrl')) {
dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl')
}

if (dockerBuildArgs.size() > 0) {
buildArgs(dockerBuildArgs)
}
}
tasks.getByPath(":datahub-upgrade:docker").dependsOn([bootJar])

Expand Down
16 changes: 12 additions & 4 deletions docker/datahub-frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,22 @@ ARG APP_ENV=prod

FROM alpine:3 AS base

# Configurable repositories
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2

RUN addgroup -S datahub && adduser -S datahub -G datahub

# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

# Upgrade Alpine and base packages
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/
&& apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \
&& apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/

ENV LD_LIBRARY_PATH="/lib:/lib64"

Expand All @@ -22,8 +30,8 @@ COPY ./docker/monitoring/client-prometheus-config.yaml /datahub-frontend/
RUN chown -R datahub:datahub /datahub-frontend && chmod 755 /datahub-frontend

ENV JMX_VERSION=0.18.0
RUN wget https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar -O opentelemetry-javaagent.jar \
&& wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar
RUN wget ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar -O opentelemetry-javaagent.jar \
&& wget ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar

FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
Expand Down
35 changes: 28 additions & 7 deletions docker/datahub-gms/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
# Defining environment
ARG APP_ENV=prod

# Defining custom repo urls for use in enterprise environments. Re-used between stages below.
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2

FROM golang:1-alpine3.18 AS binary
FROM golang:1-alpine3.18 AS binary

# Re-declaring arg from above to make it available in this stage (will inherit default value)
ARG ALPINE_REPO_URL

ENV DOCKERIZE_VERSION v0.6.1
WORKDIR /go/src/github.com/jwilder

# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

RUN apk --no-cache --update add openssl git tar curl

WORKDIR /go/src/github.com/jwilder/dockerize
Expand All @@ -16,16 +28,25 @@ FROM alpine:3 AS base

# Upgrade Alpine and base packages
ENV JMX_VERSION=0.18.0

# Re-declaring args from above to make them available in this stage (will inherit default values)
ARG ALPINE_REPO_URL
ARG GITHUB_REPO_URL
ARG MAVEN_CENTRAL_REPO_URL

# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \
&& curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \
&& curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \
&& curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
&& wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \
&& apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \
&& apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \
&& wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
&& wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \
&& cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
COPY --from=binary /go/bin/dockerize /usr/local/bin

Expand Down
24 changes: 22 additions & 2 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
ARG APP_ENV=full
ARG BASE_IMAGE=base

# Defining custom repo urls for use in enterprise environments. Re-used between stages below.
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
ARG DEBIAN_REPO_URL=http://deb.debian.org/debian
ARG PIP_MIRROR_URL=null

FROM golang:1-alpine3.18 AS dockerize-binary

# Re-declaring arg from above to make it available in this stage (will inherit default value)
ARG ALPINE_REPO_URL

ENV DOCKERIZE_VERSION v0.6.1
WORKDIR /go/src/github.com/jwilder

# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

RUN apk --no-cache --update add openssl git tar curl

WORKDIR /go/src/github.com/jwilder/dockerize
Expand All @@ -14,11 +26,19 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION

FROM python:3.10 as base

ARG DEBIAN_REPO_URL
ARG PIP_MIRROR_URL
ARG GITHUB_REPO_URL

ENV LIBRDKAFKA_VERSION=1.6.2
ENV CONFLUENT_KAFKA_VERSION=1.6.1

ENV DEBIAN_FRONTEND noninteractive

# Optionally set corporate mirror for apk and pip
RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi
RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi

RUN apt-get update && apt-get install -y -qq \
make \
python3-ldap \
Expand All @@ -33,7 +53,7 @@ RUN apt-get update && apt-get install -y -qq \
unzip \
ldap-utils \
&& python -m pip install --no-cache --upgrade pip wheel setuptools \
&& wget -q https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz -O - | \
&& wget -q ${GITHUB_REPO_URL}/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz -O - | \
tar -xz -C /root \
&& cd /root/librdkafka-${LIBRDKAFKA_VERSION} \
&& ./configure --prefix /usr && make && make install && cd .. && rm -rf /root/librdkafka-${LIBRDKAFKA_VERSION} \
Expand Down Expand Up @@ -84,4 +104,4 @@ FROM ${BASE_IMAGE} as slim-install
FROM ${APP_ENV}-install

USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
21 changes: 19 additions & 2 deletions docker/datahub-ingestion-base/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,24 @@ docker {
}.exclude {
i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden())
}
buildArgs([APP_ENV: docker_target])

def dockerBuildArgs = [APP_ENV: docker_target]

// Add build args if they are defined (needed for some CI or enterprise environments)
if (project.hasProperty('alpineApkRepositoryUrl')) {
dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl')
}
if (project.hasProperty('githubMirrorUrl')) {
dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl')
}
if (project.hasProperty('debianAptRepositoryUrl')) {
dockerBuildArgs.DEBIAN_REPO_URL = project.getProperty('debianAptRepositoryUrl')
}
if (project.hasProperty('pipMirrorUrl')) {
dockerBuildArgs.PIP_MIRROR_URL = project.getProperty('pipMirrorUrl')
}

buildArgs(dockerBuildArgs)
}
tasks.getByName('docker').dependsOn('build')

Expand All @@ -42,4 +59,4 @@ task cleanLocalDockerImages {
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}")
}
}
dockerClean.finalizedBy(cleanLocalDockerImages)
dockerClean.finalizedBy(cleanLocalDockerImages)
9 changes: 9 additions & 0 deletions docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
ARG APP_ENV=full
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head
ARG PIP_MIRROR_URL=null
ARG DEBIAN_REPO_URL=http://deb.debian.org/debian

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
Expand All @@ -20,16 +22,23 @@ USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"

FROM base as slim-install
ARG PIP_MIRROR_URL

RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"

FROM base as full-install-build
ARG PIP_MIRROR_URL
ARG DEBIAN_REPO_URL

USER 0
RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi
RUN apt-get update && apt-get install -y -qq maven

USER datahub
COPY ./docker/datahub-ingestion/pyspark_jars.sh .

RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN pip install --no-cache --user ".[base]" && \
pip install --no-cache --user "./airflow-plugin[acryl-datahub-airflow-plugin]" && \
pip install --no-cache --user ".[all]"
Expand Down
5 changes: 5 additions & 0 deletions docker/datahub-ingestion/Dockerfile-slim-only
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Defining environment
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head-slim
ARG PIP_MIRROR_URL=null

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
Expand All @@ -17,6 +18,10 @@ USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"

FROM base as slim-install

ARG PIP_MIRROR_URL

RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"

FROM slim-install as final
Expand Down
16 changes: 13 additions & 3 deletions docker/datahub-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,18 @@ docker {
}.exclude {
i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden())
}
buildArgs([DOCKER_VERSION: version,
RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", '')])

def dockerBuildArgs = [DOCKER_VERSION: version, RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", '')]

// Add build args if they are defined (needed for some CI or enterprise environments)
if (project.hasProperty('pipMirrorUrl')) {
dockerBuildArgs.PIP_MIRROR_URL = project.getProperty('pipMirrorUrl')
}
if (project.hasProperty('debianAptRepositoryUrl')) {
dockerBuildArgs.DEBIAN_REPO_URL = project.getProperty('debianAptRepositoryUrl')
}

buildArgs(dockerBuildArgs)
}
tasks.getByName('docker').dependsOn(['build',
':docker:datahub-ingestion-base:docker',
Expand All @@ -51,4 +61,4 @@ task cleanLocalDockerImages {
rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}")
}
}
dockerClean.finalizedBy(cleanLocalDockerImages)
dockerClean.finalizedBy(cleanLocalDockerImages)
27 changes: 23 additions & 4 deletions docker/datahub-mae-consumer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
# Defining environment
ARG APP_ENV=prod

# Defining custom repo urls for use in enterprise environments. Re-used between stages below.
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2

FROM golang:1-alpine3.18 AS binary

# Re-declaring arg from above to make it available in this stage (will inherit default value)
ARG ALPINE_REPO_URL

ENV DOCKERIZE_VERSION v0.6.1
WORKDIR /go/src/github.com/jwilder

# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

RUN apk --no-cache --update add openssl git tar curl

WORKDIR /go/src/github.com/jwilder/dockerize
Expand All @@ -14,15 +25,23 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION

FROM alpine:3 AS base

# Re-declaring args from above to make them available in this stage (will inherit default values)
ARG ALPINE_REPO_URL
ARG GITHUB_REPO_URL
ARG MAVEN_CENTRAL_REPO_URL

# Optionally set corporate mirror for apk
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

# Upgrade Alpine and base packages
ENV JMX_VERSION=0.18.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl bash coreutils sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
&& wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \
&& apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \
&& apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \
&& wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
&& wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \
&& cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
COPY --from=binary /go/bin/dockerize /usr/local/bin

Expand Down
Loading

0 comments on commit 966cb17

Please sign in to comment.