diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index dfe4b8354d435e..d83dd2766770b2 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -73,6 +73,10 @@ jobs: - uses: actions/setup-python@v4 with: python-version: "3.7" + - name: Download YQ + uses: chrisdickinson/setup-yq@v1.0.1 + with: + yq-version: v4.28.2 - name: Quickstart Compose Validation run: ./docker/quickstart/generate_and_compare.sh diff --git a/docker/datahub-mce-consumer/env/docker.env b/docker/datahub-mce-consumer/env/docker.env index f11ce88f72cdda..73b981d9883077 100644 --- a/docker/datahub-mce-consumer/env/docker.env +++ b/docker/datahub-mce-consumer/env/docker.env @@ -2,7 +2,7 @@ MCE_CONSUMER_ENABLED=true EBEAN_DATASOURCE_USERNAME=datahub EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_HOST=mysql:3306 -EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 @@ -15,6 +15,10 @@ JAVA_OPTS=-Xms1g -Xmx1g ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml DATAHUB_SYSTEM_CLIENT_ID=__datahub_system DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing +ENTITY_SERVICE_ENABLE_RETENTION=true +MAE_CONSUMER_ENABLED=false +PE_CONSUMER_ENABLED=false +UI_INGESTION_ENABLED=false # Uncomment to configure kafka topic names # Make sure these names are consistent across the whole deployment diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index e7d2ae9eb842b9..1059ae2234d913 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -7,7 +7,7 @@ version: '3.8' services: zookeeper: - image: confluentinc/cp-zookeeper:5.4.0 + image: confluentinc/cp-zookeeper:7.2.2 env_file: zookeeper/env/docker.env hostname: zookeeper container_name: zookeeper @@ -17,7 +17,7 @@ services: - zkdata:/var/lib/zookeeper broker: - image: confluentinc/cp-kafka:5.4.0 + image: confluentinc/cp-kafka:7.2.2 env_file: broker/env/docker.env hostname: broker container_name: broker @@ -29,25 +29,12 @@ services: volumes: - broker:/var/lib/kafka/data/ - # This "container" is a workaround to pre-create topics - kafka-setup: - build: - context: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} - env_file: kafka-setup/env/docker.env - hostname: kafka-setup - container_name: kafka-setup - depends_on: - - broker - - schema-registry - schema-registry: - image: confluentinc/cp-schema-registry:5.4.0 + image: confluentinc/cp-schema-registry:7.2.2 env_file: schema-registry/env/docker.env hostname: schema-registry container_name: schema-registry depends_on: - - zookeeper - broker ports: - "8081:8081" diff --git a/docker/docker-compose-without-neo4j.m1.yml b/docker/docker-compose-without-neo4j.m1.yml index da47c7944045b1..f8f98ce13d9b8a 100644 --- a/docker/docker-compose-without-neo4j.m1.yml +++ b/docker/docker-compose-without-neo4j.m1.yml @@ -1,9 +1,3 @@ services: - broker: - image: kymeric/cp-kafka:latest mysql: image: mariadb:10.5.8 - schema-registry: - image: eugenetea/schema-registry-arm64:latest - zookeeper: - image: kymeric/cp-zookeeper:latest diff --git a/docker/docker-compose-without-neo4j.override.yml b/docker/docker-compose-without-neo4j.override.yml index f09ab0ce08b1e8..afaec5df0e6658 100644 --- a/docker/docker-compose-without-neo4j.override.yml +++ b/docker/docker-compose-without-neo4j.override.yml @@ -6,7 +6,7 @@ services: hostname: mysql image: mysql:5.7 env_file: mysql/env/docker.env - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index d8eb39f49e39db..a0bd0fd34c3912 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -7,7 +7,7 @@ version: '3.8' services: zookeeper: - image: confluentinc/cp-zookeeper:5.4.0 + image: confluentinc/cp-zookeeper:7.2.2 env_file: zookeeper/env/docker.env hostname: zookeeper container_name: zookeeper @@ -17,7 +17,7 @@ services: - zkdata:/var/lib/zookeeper broker: - image: confluentinc/cp-kafka:5.4.0 + image: confluentinc/cp-kafka:7.2.2 env_file: broker/env/docker.env hostname: broker container_name: broker @@ -26,25 +26,12 @@ services: ports: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 - # This "container" is a workaround to pre-create topics - kafka-setup: - build: - context: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} - env_file: kafka-setup/env/docker.env - hostname: kafka-setup - container_name: kafka-setup - depends_on: - - broker - - schema-registry - schema-registry: - image: confluentinc/cp-schema-registry:5.4.0 + image: confluentinc/cp-schema-registry:7.2.2 env_file: schema-registry/env/docker.env hostname: schema-registry container_name: schema-registry depends_on: - - zookeeper - broker ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 diff --git a/docker/docker-compose.consumers-without-neo4j.yml b/docker/docker-compose.consumers-without-neo4j.yml index d80beae8bfad35..cc31c722dbc99a 100644 --- a/docker/docker-compose.consumers-without-neo4j.yml +++ b/docker/docker-compose.consumers-without-neo4j.yml @@ -25,5 +25,8 @@ services: env_file: datahub-mce-consumer/env/docker.env hostname: datahub-mce-consumer container_name: datahub-mce-consumer + environment: + - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} ports: - "9090:9090" diff --git a/docker/docker-compose.consumers.yml b/docker/docker-compose.consumers.yml index ffe9827bc9c22a..dcfdff49aa3cda 100644 --- a/docker/docker-compose.consumers.yml +++ b/docker/docker-compose.consumers.yml @@ -27,5 +27,15 @@ services: env_file: datahub-mce-consumer/env/docker.env hostname: datahub-mce-consumer container_name: datahub-mce-consumer + environment: + - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - NEO4J_HOST=http://neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + - GRAPH_SERVICE_IMPL=neo4j ports: - "9090:9090" + depends_on: + - neo4j diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 3dbd9ba34bda45..581a8a49bda4ce 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -24,17 +24,6 @@ services: - ./elasticsearch-setup/create-indices.sh:/create-indices.sh - ../metadata-service/restli-servlet-impl/src/main/resources/index/:/index - kafka-setup: - image: linkedin/datahub-kafka-setup:debug - build: - context: ../ - dockerfile: ./docker/kafka-setup/Dockerfile - args: - APP_ENV: dev - depends_on: - - broker - - schema-registry - datahub-gms: image: linkedin/datahub-gms:debug build: diff --git a/docker/docker-compose.kafka-setup.yml b/docker/docker-compose.kafka-setup.yml new file mode 100644 index 00000000000000..f4e5a743ce0b51 --- /dev/null +++ b/docker/docker-compose.kafka-setup.yml @@ -0,0 +1,15 @@ +# Service definitions for Kafka Setup container. +version: '3.8' +services: + + # This "container" is a workaround to pre-create topics + kafka-setup: + build: + context: kafka-setup + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + env_file: kafka-setup/env/docker.env + hostname: kafka-setup + container_name: kafka-setup + depends_on: + - broker + - schema-registry \ No newline at end of file diff --git a/docker/docker-compose.m1.yml b/docker/docker-compose.m1.yml index ea844e852bcf29..368cf795e6ad4f 100644 --- a/docker/docker-compose.m1.yml +++ b/docker/docker-compose.m1.yml @@ -1,11 +1,5 @@ services: - broker: - image: kymeric/cp-kafka:latest mysql: image: mariadb:10.5.8 - schema-registry: - image: eugenetea/schema-registry-arm64:latest - zookeeper: - image: kymeric/cp-zookeeper:latest neo4j: image: neo4j/neo4j-arm64-experimental:4.0.6-arm64 diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index 4446c454832200..0b99c643781df4 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -7,7 +7,7 @@ services: hostname: mysql image: mysql:5.7 env_file: mysql/env/docker.env - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: diff --git a/docker/docker-compose.tools.yml b/docker/docker-compose.tools.yml index bbb77084bff2e9..bc51a226ecd94f 100644 --- a/docker/docker-compose.tools.yml +++ b/docker/docker-compose.tools.yml @@ -3,7 +3,7 @@ version: '3.8' services: kafka-rest-proxy: - image: confluentinc/cp-kafka-rest:5.4.0 + image: confluentinc/cp-kafka-rest:7.2.2 env_file: kafka-rest-proxy/env/docker.env hostname: kafka-rest-proxy container_name: kafka-rest-proxy diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 764ebf2e555dda..470746f31890de 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -7,7 +7,7 @@ version: '3.8' services: zookeeper: - image: confluentinc/cp-zookeeper:5.4.0 + image: confluentinc/cp-zookeeper:7.2.2 env_file: zookeeper/env/docker.env hostname: zookeeper container_name: zookeeper @@ -17,7 +17,7 @@ services: - zkdata:/var/lib/zookeeper broker: - image: confluentinc/cp-kafka:5.4.0 + image: confluentinc/cp-kafka:7.2.2 env_file: broker/env/docker.env hostname: broker container_name: broker @@ -28,26 +28,12 @@ services: volumes: - broker:/var/lib/kafka/data/ - # This "container" is a workaround to pre-create topics - kafka-setup: - build: - dockerfile: ./docker/kafka-setup/Dockerfile - context: ../ - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} - env_file: kafka-setup/env/docker.env - hostname: kafka-setup - container_name: kafka-setup - depends_on: - - broker - - schema-registry - schema-registry: - image: confluentinc/cp-schema-registry:5.4.0 + image: confluentinc/cp-schema-registry:7.2.2 env_file: schema-registry/env/docker.env hostname: schema-registry container_name: schema-registry depends_on: - - zookeeper - broker ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml new file mode 100644 index 00000000000000..752e076c1c48a7 --- /dev/null +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -0,0 +1,204 @@ +networks: + default: + name: datahub_network +services: + broker: + container_name: broker + depends_on: + - zookeeper + environment: + - KAFKA_BROKER_ID=1 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 + - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + hostname: broker + image: confluentinc/cp-kafka:7.2.2 + ports: + - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 + volumes: + - broker:/var/lib/kafka/data/ + datahub-actions: + depends_on: + - datahub-gms + environment: + - DATAHUB_GMS_PROTOCOL=http + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - SCHEMA_REGISTRY_URL=http://schema-registry:8081 + - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 + - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 + - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system + - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing + - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT + hostname: actions + image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} + restart: on-failure:5 + datahub-frontend-react: + container_name: datahub-frontend-react + depends_on: + - datahub-gms + environment: + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - DATAHUB_SECRET=YouKnowNothing + - DATAHUB_APP_VERSION=1.0 + - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB + - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf + -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml + -Dlogback.debug=false -Dpidfile.path=/dev/null + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + - ELASTIC_CLIENT_HOST=elasticsearch + - ELASTIC_CLIENT_PORT=9200 + hostname: datahub-frontend-react + image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + ports: + - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 + volumes: + - ${HOME}/.datahub/plugins:/etc/datahub/plugins + datahub-gms: + container_name: datahub-gms + depends_on: + - neo4j + - mysql + environment: + - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - EBEAN_DATASOURCE_USERNAME=datahub + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_HOST=mysql:3306 + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 + - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ES_BULK_REFRESH_POLICY=WAIT_UNTIL + - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true + - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true + - NEO4J_HOST=http://neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + - JAVA_OPTS=-Xms1g -Xmx1g + - GRAPH_SERVICE_DIFF_MODE_ENABLED=true + - GRAPH_SERVICE_IMPL=neo4j + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + - ENTITY_SERVICE_ENABLE_RETENTION=true + - MAE_CONSUMER_ENABLED=true + - MCE_CONSUMER_ENABLED=true + - PE_CONSUMER_ENABLED=true + - UI_INGESTION_ENABLED=true + - METADATA_SERVICE_AUTH_ENABLED=false + hostname: datahub-gms + image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + ports: + - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 + volumes: + - ${HOME}/.datahub/plugins/:/etc/datahub/plugins + - ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources + elasticsearch: + container_name: elasticsearch + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true + healthcheck: + retries: 4 + start_period: 2m + test: + - CMD-SHELL + - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' + || exit 1 + hostname: elasticsearch + image: elasticsearch:7.9.3 + mem_limit: 1g + ports: + - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 + volumes: + - esdata:/usr/share/elasticsearch/data + elasticsearch-setup: + container_name: elasticsearch-setup + depends_on: + - elasticsearch + environment: + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ELASTICSEARCH_PROTOCOL=http + hostname: elasticsearch-setup + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + mysql: + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password + container_name: mysql + environment: + - MYSQL_DATABASE=datahub + - MYSQL_USER=datahub + - MYSQL_PASSWORD=datahub + - MYSQL_ROOT_PASSWORD=datahub + hostname: mysql + image: mariadb:10.5.8 + ports: + - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 + volumes: + - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql + - mysqldata:/var/lib/mysql + mysql-setup: + container_name: mysql-setup + depends_on: + - mysql + environment: + - MYSQL_HOST=mysql + - MYSQL_PORT=3306 + - MYSQL_USERNAME=datahub + - MYSQL_PASSWORD=datahub + - DATAHUB_DB_NAME=datahub + hostname: mysql-setup + image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head} + neo4j: + container_name: neo4j + environment: + - NEO4J_AUTH=neo4j/datahub + - NEO4J_dbms_default__database=graph.db + - NEO4J_dbms_allow__upgrade=true + hostname: neo4j + image: neo4j/neo4j-arm64-experimental:4.0.6-arm64 + ports: + - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 + - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 + volumes: + - neo4jdata:/data + schema-registry: + container_name: schema-registry + depends_on: + - broker + environment: + - SCHEMA_REGISTRY_HOST_NAME=schemaregistry + - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT + - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092 + hostname: schema-registry + image: confluentinc/cp-schema-registry:7.2.2 + ports: + - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 + zookeeper: + container_name: zookeeper + environment: + - ZOOKEEPER_CLIENT_PORT=2181 + - ZOOKEEPER_TICK_TIME=2000 + hostname: zookeeper + image: confluentinc/cp-zookeeper:7.2.2 + ports: + - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 + volumes: + - zkdata:/var/lib/zookeeper +version: '2.3' +volumes: + broker: null + esdata: null + mysqldata: null + neo4jdata: null + zkdata: null diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index c549296d58064d..28d06b7ae2877a 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -21,28 +21,18 @@ services: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 datahub-actions: depends_on: - - datahub-gms + - datahub-gms environment: - - DATAHUB_GMS_PROTOCOL=http - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - SCHEMA_REGISTRY_URL=http://schema-registry:8081 - - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 - - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing - - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT - - DATAHUB_ACTIONS_SLACK_ENABLED - - DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL - - DATAHUB_ACTIONS_SLACK_BOT_TOKEN - - DATAHUB_ACTIONS_SLACK_SIGNING_SECRET - - DATAHUB_ACTIONS_SLACK_CHANNEL - - DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY - - DATAHUB_ACTIONS_TEAMS_ENABLED - - DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL - - DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL - - DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY + - DATAHUB_GMS_PROTOCOL=http + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - SCHEMA_REGISTRY_URL=http://schema-registry:8081 + - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 + - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 + - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system + - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing + - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT hostname: actions image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} restart: on-failure:5 @@ -68,31 +58,33 @@ services: ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: - - ${HOME}/.datahub/plugins:/etc/datahub/plugins + - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-gms: container_name: datahub-gms depends_on: - mysql environment: - - EBEAN_DATASOURCE_USERNAME=datahub - - EBEAN_DATASOURCE_PASSWORD=datahub - - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 - - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ES_BULK_REFRESH_POLICY=WAIT_UNTIL - - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - - GRAPH_SERVICE_DIFF_MODE_ENABLED=true - - GRAPH_SERVICE_IMPL=elasticsearch + - ES_BULK_REFRESH_POLICY=WAIT_UNTIL + - EBEAN_DATASOURCE_USERNAME=datahub + - MCE_CONSUMER_ENABLED=true + - EBEAN_DATASOURCE_PASSWORD=datahub - JAVA_OPTS=-Xms1g -Xmx1g - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - UI_INGESTION_ENABLED=true + - ENTITY_SERVICE_ENABLE_RETENTION=true - MAE_CONSUMER_ENABLED=true - - MCE_CONSUMER_ENABLED=true + - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true + - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 + - GRAPH_SERVICE_IMPL=elasticsearch - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - ELASTICSEARCH_PORT=9200 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - GRAPH_SERVICE_DIFF_MODE_ENABLED=true + - EBEAN_DATASOURCE_HOST=mysql:3306 - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - PE_CONSUMER_ENABLED=true hostname: datahub-gms @@ -131,16 +123,6 @@ services: - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} - kafka-setup: - container_name: kafka-setup - depends_on: - - broker - - schema-registry - environment: - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} mysql: command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password container_name: mysql @@ -151,7 +133,6 @@ services: - MYSQL_ROOT_PASSWORD=datahub hostname: mysql image: mariadb:10.5.8 - # image: mysql:8 ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: @@ -172,13 +153,13 @@ services: schema-registry: container_name: schema-registry depends_on: - - zookeeper - broker environment: - SCHEMA_REGISTRY_HOST_NAME=schemaregistry - - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=PLAINTEXT://broker:29092 + - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT + - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092 hostname: schema-registry - image: confluentinc/cp-schema-registry:7.2.0 + image: confluentinc/cp-schema-registry:7.2.2 ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 zookeeper: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index e815ff60ca42b1..177cf7f52404ce 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -5,195 +5,172 @@ services: broker: container_name: broker depends_on: - - zookeeper + - zookeeper environment: - - KAFKA_BROKER_ID=1 - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT - - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 - - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + - KAFKA_BROKER_ID=1 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 + - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false hostname: broker - image: confluentinc/cp-kafka:5.4.0 + image: confluentinc/cp-kafka:7.2.2 ports: - - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 + - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 datahub-actions: depends_on: - - datahub-gms + - datahub-gms environment: - - DATAHUB_GMS_PROTOCOL=http - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - SCHEMA_REGISTRY_URL=http://schema-registry:8081 - - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 - - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing - - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT - - DATAHUB_ACTIONS_SLACK_ENABLED - - DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL - - DATAHUB_ACTIONS_SLACK_BOT_TOKEN - - DATAHUB_ACTIONS_SLACK_SIGNING_SECRET - - DATAHUB_ACTIONS_SLACK_CHANNEL - - DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY - - DATAHUB_ACTIONS_TEAMS_ENABLED - - DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL - - DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL - - DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY + - DATAHUB_GMS_PROTOCOL=http + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - SCHEMA_REGISTRY_URL=http://schema-registry:8081 + - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 + - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 + - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system + - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing + - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT hostname: actions image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} restart: on-failure:5 datahub-frontend-react: container_name: datahub-frontend-react depends_on: - - datahub-gms + - datahub-gms environment: - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - DATAHUB_SECRET=YouKnowNothing - - DATAHUB_APP_VERSION=1.0 - - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB - - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf - -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml - -Dlogback.debug=false -Dpidfile.path=/dev/null - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 - - ELASTIC_CLIENT_HOST=elasticsearch - - ELASTIC_CLIENT_PORT=9200 + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - DATAHUB_SECRET=YouKnowNothing + - DATAHUB_APP_VERSION=1.0 + - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB + - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + - ELASTIC_CLIENT_HOST=elasticsearch + - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 + - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: - - ${HOME}/.datahub/plugins:/etc/datahub/plugins + - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-gms: container_name: datahub-gms depends_on: - - mysql + - mysql environment: - - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - - EBEAN_DATASOURCE_USERNAME=datahub - - EBEAN_DATASOURCE_PASSWORD=datahub - - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 - - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - - ES_BULK_REFRESH_POLICY=WAIT_UNTIL - - GRAPH_SERVICE_DIFF_MODE_ENABLED=true - - GRAPH_SERVICE_IMPL=elasticsearch - - JAVA_OPTS=-Xms1g -Xmx1g - - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - - MAE_CONSUMER_ENABLED=true - - MCE_CONSUMER_ENABLED=true - - PE_CONSUMER_ENABLED=true - - UI_INGESTION_ENABLED=true - - ENTITY_SERVICE_ENABLE_RETENTION=true + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 + - ES_BULK_REFRESH_POLICY=WAIT_UNTIL + - UI_INGESTION_ENABLED=true + - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true + - GRAPH_SERVICE_IMPL=elasticsearch + - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - MCE_CONSUMER_ENABLED=true + - GRAPH_SERVICE_DIFF_MODE_ENABLED=true + - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - MAE_CONSUMER_ENABLED=true + - JAVA_OPTS=-Xms1g -Xmx1g + - EBEAN_DATASOURCE_HOST=mysql:3306 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - ENTITY_SERVICE_ENABLE_RETENTION=true + - EBEAN_DATASOURCE_USERNAME=datahub + - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true + - ELASTICSEARCH_PORT=9200 + - ELASTICSEARCH_HOST=elasticsearch + - PE_CONSUMER_ENABLED=true + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} ports: - - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 + - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: - - ${HOME}/.datahub/plugins:/etc/datahub/plugins + - ${HOME}/.datahub/plugins:/etc/datahub/plugins elasticsearch: container_name: elasticsearch environment: - - discovery.type=single-node - - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true healthcheck: retries: 4 start_period: 2m test: - - CMD-SHELL - - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' - || exit 1 + - CMD-SHELL + - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1 hostname: elasticsearch image: elasticsearch:7.9.3 mem_limit: 1g ports: - - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 + - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: - - esdata:/usr/share/elasticsearch/data + - esdata:/usr/share/elasticsearch/data elasticsearch-setup: container_name: elasticsearch-setup depends_on: - - elasticsearch + - elasticsearch environment: - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ELASTICSEARCH_PROTOCOL=http + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} - kafka-setup: - container_name: kafka-setup - depends_on: - - broker - - schema-registry - environment: - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} mysql: - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password container_name: mysql environment: - - MYSQL_DATABASE=datahub - - MYSQL_USER=datahub - - MYSQL_PASSWORD=datahub - - MYSQL_ROOT_PASSWORD=datahub + - MYSQL_DATABASE=datahub + - MYSQL_USER=datahub + - MYSQL_PASSWORD=datahub + - MYSQL_ROOT_PASSWORD=datahub hostname: mysql image: mysql:5.7 ports: - - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 + - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: - - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - - mysqldata:/var/lib/mysql + - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql + - mysqldata:/var/lib/mysql mysql-setup: container_name: mysql-setup depends_on: - - mysql + - mysql environment: - - MYSQL_HOST=mysql - - MYSQL_PORT=3306 - - MYSQL_USERNAME=datahub - - MYSQL_PASSWORD=datahub - - DATAHUB_DB_NAME=datahub + - MYSQL_HOST=mysql + - MYSQL_PORT=3306 + - MYSQL_USERNAME=datahub + - MYSQL_PASSWORD=datahub + - DATAHUB_DB_NAME=datahub hostname: mysql-setup image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head} schema-registry: container_name: schema-registry depends_on: - - zookeeper - - broker + - broker environment: - - SCHEMA_REGISTRY_HOST_NAME=schemaregistry - - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 + - SCHEMA_REGISTRY_HOST_NAME=schemaregistry + - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT + - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092 hostname: schema-registry - image: confluentinc/cp-schema-registry:5.4.0 + image: confluentinc/cp-schema-registry:7.2.2 ports: - - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 + - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 zookeeper: container_name: zookeeper environment: - - ZOOKEEPER_CLIENT_PORT=2181 - - ZOOKEEPER_TICK_TIME=2000 + - ZOOKEEPER_CLIENT_PORT=2181 + - ZOOKEEPER_TICK_TIME=2000 hostname: zookeeper - image: confluentinc/cp-zookeeper:5.4.0 + image: confluentinc/cp-zookeeper:7.2.2 ports: - - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 + - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/lib/zookeeper -version: '2.3' + - zkdata:/var/lib/zookeeper +version: "2.3" volumes: esdata: null mysqldata: null diff --git a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml index eb645a6abae2f9..cee66989456e7f 100644 --- a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml @@ -24,16 +24,16 @@ services: datahub-mce-consumer: container_name: datahub-mce-consumer environment: - - MCE_CONSUMER_ENABLED=true - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - MCE_CONSUMER_ENABLED=true - EBEAN_DATASOURCE_USERNAME=datahub - EBEAN_DATASOURCE_PASSWORD=datahub - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - ES_BULK_REFRESH_POLICY=WAIT_UNTIL diff --git a/docker/quickstart/docker-compose.consumers.quickstart.yml b/docker/quickstart/docker-compose.consumers.quickstart.yml index 7edfd52026473c..8dbe776a183e2e 100644 --- a/docker/quickstart/docker-compose.consumers.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers.quickstart.yml @@ -29,27 +29,29 @@ services: - 9091:9091 datahub-mce-consumer: container_name: datahub-mce-consumer + depends_on: + - neo4j environment: - - MCE_CONSUMER_ENABLED=true - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - NEO4J_HOST=http://neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + - GRAPH_SERVICE_IMPL=neo4j + - MCE_CONSUMER_ENABLED=true - EBEAN_DATASOURCE_USERNAME=datahub - EBEAN_DATASOURCE_PASSWORD=datahub - EBEAN_DATASOURCE_HOST=mysql:3306 - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - ES_BULK_REFRESH_POLICY=WAIT_UNTIL - - NEO4J_HOST=http://neo4j:7474 - - NEO4J_URI=bolt://neo4j - - NEO4J_USERNAME=neo4j - - NEO4J_PASSWORD=datahub - - JAVA_OPTS=-Xms1g -Xmx1g - GRAPH_SERVICE_DIFF_MODE_ENABLED=true - - GRAPH_SERVICE_IMPL=neo4j + - JAVA_OPTS=-Xms1g -Xmx1g - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing diff --git a/docker/quickstart/docker-compose.kafka-setup.quickstart.yml b/docker/quickstart/docker-compose.kafka-setup.quickstart.yml new file mode 100644 index 00000000000000..2818806e8d0b5e --- /dev/null +++ b/docker/quickstart/docker-compose.kafka-setup.quickstart.yml @@ -0,0 +1,12 @@ +services: + kafka-setup: + container_name: kafka-setup + depends_on: + - broker + - schema-registry + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + hostname: kafka-setup + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} +version: '2.3' diff --git a/docker/quickstart/docker-compose.monitoring.quickstart.yml b/docker/quickstart/docker-compose.monitoring.quickstart.yml index 07966926057195..37ccd057a00ed8 100644 --- a/docker/quickstart/docker-compose.monitoring.quickstart.yml +++ b/docker/quickstart/docker-compose.monitoring.quickstart.yml @@ -1,47 +1,47 @@ services: datahub-frontend-react: environment: - - ENABLE_PROMETHEUS=true - - ENABLE_OTEL=true - - OTEL_TRACES_EXPORTER=jaeger - - OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250 - - OTEL_METRICS_EXPORTER=none - - OTEL_SERVICE_NAME=datahub-gms + - ENABLE_PROMETHEUS=true + - ENABLE_OTEL=true + - OTEL_TRACES_EXPORTER=jaeger + - OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250 + - OTEL_METRICS_EXPORTER=none + - OTEL_SERVICE_NAME=datahub-gms ports: - - '4318' + - "4318" datahub-gms: environment: - - ENABLE_PROMETHEUS=true - - ENABLE_OTEL=true - - OTEL_TRACES_EXPORTER=jaeger - - OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250 - - OTEL_METRICS_EXPORTER=none - - OTEL_SERVICE_NAME=datahub-gms + - ENABLE_PROMETHEUS=true + - ENABLE_OTEL=true + - OTEL_TRACES_EXPORTER=jaeger + - OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250 + - OTEL_METRICS_EXPORTER=none + - OTEL_SERVICE_NAME=datahub-gms ports: - - '4318' + - "4318" grafana: depends_on: - - prometheus + - prometheus image: grafana/grafana:9.1.4 ports: - - 3001:3000 + - 3001:3000 volumes: - - grafana-storage:/var/lib/grafana - - ../monitoring/grafana/datasources:/etc/grafana/provisioning/datasources - - ../monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards + - grafana-storage:/var/lib/grafana + - ../monitoring/grafana/datasources:/etc/grafana/provisioning/datasources + - ../monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards jaeger-all-in-one: image: jaegertracing/all-in-one:latest ports: - - 16686:16686 - - '14268' - - '14250' + - 16686:16686 + - "14268" + - "14250" prometheus: container_name: prometheus image: prom/prometheus:latest ports: - - 9089:9090 + - 9089:9090 volumes: - - ../monitoring/prometheus.yaml:/etc/prometheus/prometheus.yml -version: '2.3' + - ../monitoring/prometheus.yaml:/etc/prometheus/prometheus.yml +version: "2.3" volumes: grafana-storage: null diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 01e3b799265a16..4524570c1cb486 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -5,216 +5,194 @@ services: broker: container_name: broker depends_on: - - zookeeper + - zookeeper environment: - - KAFKA_BROKER_ID=1 - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT - - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 - - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + - KAFKA_BROKER_ID=1 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 + - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m + - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false hostname: broker - image: confluentinc/cp-kafka:5.4.0 + image: confluentinc/cp-kafka:7.2.2 ports: - - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 + - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 volumes: - - broker:/var/lib/kafka/data/ + - broker:/var/lib/kafka/data/ datahub-actions: depends_on: - - datahub-gms + - datahub-gms environment: - - DATAHUB_GMS_PROTOCOL=http - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - SCHEMA_REGISTRY_URL=http://schema-registry:8081 - - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 - - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 - - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing - - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT - - DATAHUB_ACTIONS_SLACK_ENABLED - - DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL - - DATAHUB_ACTIONS_SLACK_BOT_TOKEN - - DATAHUB_ACTIONS_SLACK_SIGNING_SECRET - - DATAHUB_ACTIONS_SLACK_CHANNEL - - DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY - - DATAHUB_ACTIONS_TEAMS_ENABLED - - DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL - - DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL - - DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY + - DATAHUB_GMS_PROTOCOL=http + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - SCHEMA_REGISTRY_URL=http://schema-registry:8081 + - METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 + - METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 + - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system + - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing + - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT hostname: actions image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} restart: on-failure:5 datahub-frontend-react: container_name: datahub-frontend-react depends_on: - - datahub-gms + - datahub-gms environment: - - DATAHUB_GMS_HOST=datahub-gms - - DATAHUB_GMS_PORT=8080 - - DATAHUB_SECRET=YouKnowNothing - - DATAHUB_APP_VERSION=1.0 - - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB - - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf - -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml - -Dlogback.debug=false -Dpidfile.path=/dev/null - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 - - ELASTIC_CLIENT_HOST=elasticsearch - - ELASTIC_CLIENT_PORT=9200 + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - DATAHUB_SECRET=YouKnowNothing + - DATAHUB_APP_VERSION=1.0 + - DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB + - JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + - ELASTIC_CLIENT_HOST=elasticsearch + - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 + - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: - - ${HOME}/.datahub/plugins:/etc/datahub/plugins + - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-gms: container_name: datahub-gms depends_on: - - mysql + - mysql + - neo4j environment: - - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - - EBEAN_DATASOURCE_USERNAME=datahub - - EBEAN_DATASOURCE_PASSWORD=datahub - - EBEAN_DATASOURCE_HOST=mysql:3306 - - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 - - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ES_BULK_REFRESH_POLICY=WAIT_UNTIL - - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true - - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true - - NEO4J_HOST=http://neo4j:7474 - - NEO4J_URI=bolt://neo4j - - NEO4J_USERNAME=neo4j - - NEO4J_PASSWORD=datahub - - JAVA_OPTS=-Xms1g -Xmx1g - - GRAPH_SERVICE_DIFF_MODE_ENABLED=true - - GRAPH_SERVICE_IMPL=neo4j - - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - - ENTITY_SERVICE_ENABLE_RETENTION=true - - MAE_CONSUMER_ENABLED=true - - MCE_CONSUMER_ENABLED=true - - PE_CONSUMER_ENABLED=true - - UI_INGESTION_ENABLED=true - - METADATA_SERVICE_AUTH_ENABLED=false + - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - EBEAN_DATASOURCE_USERNAME=datahub + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_HOST=mysql:3306 + - EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2 + - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ES_BULK_REFRESH_POLICY=WAIT_UNTIL + - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true + - ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true + - NEO4J_HOST=http://neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + - JAVA_OPTS=-Xms1g -Xmx1g + - GRAPH_SERVICE_DIFF_MODE_ENABLED=true + - GRAPH_SERVICE_IMPL=neo4j + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + - ENTITY_SERVICE_ENABLE_RETENTION=true + - MAE_CONSUMER_ENABLED=true + - MCE_CONSUMER_ENABLED=true + - PE_CONSUMER_ENABLED=true + - UI_INGESTION_ENABLED=true + - METADATA_SERVICE_AUTH_ENABLED=false hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} ports: - - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 + - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: - - ${HOME}/.datahub/plugins/:/etc/datahub/plugins - - ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources + - ${HOME}/.datahub/plugins/:/etc/datahub/plugins + - ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources elasticsearch: container_name: elasticsearch environment: - - discovery.type=single-node - - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true healthcheck: retries: 4 start_period: 2m test: - - CMD-SHELL - - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' - || exit 1 + - CMD-SHELL + - curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s' || exit 1 hostname: elasticsearch image: elasticsearch:7.9.3 mem_limit: 1g ports: - - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 + - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: - - esdata:/usr/share/elasticsearch/data + - esdata:/usr/share/elasticsearch/data elasticsearch-setup: container_name: elasticsearch-setup depends_on: - - elasticsearch + - elasticsearch environment: - - ELASTICSEARCH_HOST=elasticsearch - - ELASTICSEARCH_PORT=9200 - - ELASTICSEARCH_PROTOCOL=http + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} - kafka-setup: - container_name: kafka-setup - depends_on: - - broker - - schema-registry - environment: - - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} mysql: - command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin + command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password container_name: mysql environment: - - MYSQL_DATABASE=datahub - - MYSQL_USER=datahub - - MYSQL_PASSWORD=datahub - - MYSQL_ROOT_PASSWORD=datahub + - MYSQL_DATABASE=datahub + - MYSQL_USER=datahub + - MYSQL_PASSWORD=datahub + - MYSQL_ROOT_PASSWORD=datahub hostname: mysql image: mysql:5.7 ports: - - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 + - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: - - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - - mysqldata:/var/lib/mysql + - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql + - mysqldata:/var/lib/mysql mysql-setup: container_name: mysql-setup depends_on: - - mysql + - mysql environment: - - MYSQL_HOST=mysql - - MYSQL_PORT=3306 - - MYSQL_USERNAME=datahub - - MYSQL_PASSWORD=datahub - - DATAHUB_DB_NAME=datahub + - MYSQL_HOST=mysql + - MYSQL_PORT=3306 + - MYSQL_USERNAME=datahub + - MYSQL_PASSWORD=datahub + - DATAHUB_DB_NAME=datahub hostname: mysql-setup image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head} neo4j: container_name: neo4j environment: - - NEO4J_AUTH=neo4j/datahub - - NEO4J_dbms_default__database=graph.db - - NEO4J_dbms_allow__upgrade=true + - NEO4J_AUTH=neo4j/datahub + - NEO4J_dbms_default__database=graph.db + - NEO4J_dbms_allow__upgrade=true hostname: neo4j image: neo4j:4.4.9-community ports: - - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 - - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 + - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 + - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 volumes: - - neo4jdata:/data + - neo4jdata:/data schema-registry: container_name: schema-registry depends_on: - - zookeeper - - broker + - broker environment: - - SCHEMA_REGISTRY_HOST_NAME=schemaregistry - - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 + - SCHEMA_REGISTRY_HOST_NAME=schemaregistry + - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT + - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092 hostname: schema-registry - image: confluentinc/cp-schema-registry:5.4.0 + image: confluentinc/cp-schema-registry:7.2.2 ports: - - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 + - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 zookeeper: container_name: zookeeper environment: - - ZOOKEEPER_CLIENT_PORT=2181 - - ZOOKEEPER_TICK_TIME=2000 + - ZOOKEEPER_CLIENT_PORT=2181 + - ZOOKEEPER_TICK_TIME=2000 hostname: zookeeper - image: confluentinc/cp-zookeeper:5.4.0 + image: confluentinc/cp-zookeeper:7.2.2 ports: - - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 + - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/lib/zookeeper -version: '2.3' + - zkdata:/var/lib/zookeeper +version: "2.3" volumes: broker: null esdata: null diff --git a/docker/quickstart/generate_and_compare.sh b/docker/quickstart/generate_and_compare.sh index 4e7c26b2655a24..587c9d6ec3bf11 100755 --- a/docker/quickstart/generate_and_compare.sh +++ b/docker/quickstart/generate_and_compare.sh @@ -14,13 +14,16 @@ source venv/bin/activate pip install -r requirements.txt python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml ../docker-compose.m1.yml temp-m1.quickstart.yml +python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml ../docker-compose-without-neo4j.m1.yml temp-without-neo4j-m1.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml python generate_docker_quickstart.py ../docker-compose.consumers.yml temp.consumers.quickstart.yml python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml temp.consumers-without-neo4j.quickstart.yml for flavour in "${FLAVOURS[@]}" do - if cmp docker-compose$flavour.quickstart.yml temp$flavour.quickstart.yml; then + + if cmp <(yq -i -P 'sort_keys(..)' docker-compose$flavour.quickstart.yml) <(yq -i -P 'sort_keys(..)' temp$flavour.quickstart.yml); then echo "docker-compose$flavour.quickstart.yml is up to date." else echo "docker-compose$flavour.quickstart.yml is out of date." diff --git a/docker/quickstart/generate_docker_quickstart.py b/docker/quickstart/generate_docker_quickstart.py index 4888adda2d0382..5b31a8b7f6e009 100644 --- a/docker/quickstart/generate_docker_quickstart.py +++ b/docker/quickstart/generate_docker_quickstart.py @@ -1,5 +1,4 @@ import os -from collections import OrderedDict from collections.abc import Mapping import click @@ -27,10 +26,14 @@ def dict_merge(dct, merge_dct): for k, v in merge_dct.items(): if k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], Mapping): dict_merge(dct[k], merge_dct[k]) + elif k in dct and isinstance(dct[k], list): + a = set(dct[k]) + b = set(merge_dct[k]) + if a != b: + dct[k] = list(a.union(b)) else: dct[k] = merge_dct[k] - def modify_docker_config(base_path, docker_yaml_config): # 0. Filter out services to be omitted. for key in list(docker_yaml_config["services"]): @@ -80,7 +83,7 @@ def modify_docker_config(base_path, docker_yaml_config): elif volumes[i].startswith("./"): volumes[i] = "." + volumes[i] - # 9. Set docker compose version to 2. + # 10. Set docker compose version to 2. # We need at least this version, since we use features like start_period for # healthchecks and shell-like variable interpolation. docker_yaml_config["version"] = "2.3" @@ -113,6 +116,9 @@ def generate(compose_files, output_file) -> None: for modified_file in modified_files: dict_merge(merged_docker_config, modified_file) + # Dedup env vars, last wins + dedup_env_vars(merged_docker_config) + # Write output file output_dir = os.path.dirname(output_file) if len(output_dir) and not os.path.exists(output_dir): @@ -127,5 +133,25 @@ def generate(compose_files, output_file) -> None: print(f"Successfully generated {output_file}.") +def dedup_env_vars(merged_docker_config): + for service in merged_docker_config['services']: + if 'environment' in merged_docker_config['services'][service]: + lst = merged_docker_config['services'][service]['environment'] + if lst is not None: + # use a set to cache duplicates + caches = set() + results = [] + for item in lst: + partitions = item.rpartition('=') + prefix = partitions[0] + suffix = partitions[1] + # check whether prefix already exists + if prefix not in caches and suffix != "": + results.append(item) + caches.add(prefix) + if set(lst) != set(results): + merged_docker_config['services'][service]['environment'] = results + + if __name__ == "__main__": generate() diff --git a/docker/quickstart/generate_docker_quickstart.sh b/docker/quickstart/generate_docker_quickstart.sh index aa3c767430df1c..3d8ac7de9aa4c6 100755 --- a/docker/quickstart/generate_docker_quickstart.sh +++ b/docker/quickstart/generate_docker_quickstart.sh @@ -11,6 +11,9 @@ source venv/bin/activate pip install -r requirements.txt python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml ../docker-compose.m1.yml docker-compose-m1.quickstart.yml +python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml ../docker-compose-without-neo4j.m1.yml docker-compose-without-neo4j-m1.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml python generate_docker_quickstart.py ../docker-compose.consumers.yml docker-compose.consumers.quickstart.yml python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml docker-compose.consumers-without-neo4j.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.kafka-setup.yml docker-compose.kafka-setup.quickstart.yml diff --git a/docker/schema-registry/env/docker.env b/docker/schema-registry/env/docker.env index fbf6840116b9d7..f829f418a6c8ef 100644 --- a/docker/schema-registry/env/docker.env +++ b/docker/schema-registry/env/docker.env @@ -1,9 +1,11 @@ SCHEMA_REGISTRY_HOST_NAME=schemaregistry -SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 +SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT +SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092 -# Uncomment to customize the Schema Registry kafka store connection -# SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT -# SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092 +# Uncomment to customize the Schema Registry kafka store connection # ZOOKEEPER_SASL_ENABLED=false # KAFKA_OPTS=-Xms1g -Xmx1g # SCHEMA_REGISTRY_JMX_OPTS=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false + +# Uncomment to use schema registry < v5.4.0 +# SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/cli/docker_check.py b/metadata-ingestion/src/datahub/cli/docker_check.py index 5c7a86357118ed..81eae41869d683 100644 --- a/metadata-ingestion/src/datahub/cli/docker_check.py +++ b/metadata-ingestion/src/datahub/cli/docker_check.py @@ -9,7 +9,6 @@ "elasticsearch", "datahub-gms", "datahub-frontend-react", - "kafka-setup", "schema-registry", "broker", "zookeeper", diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 7bbd7577b5880a..c1437ee600f361 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -43,7 +43,10 @@ ELASTIC_QUICKSTART_COMPOSE_FILE = ( "docker/quickstart/docker-compose-without-neo4j.quickstart.yml" ) -M1_QUICKSTART_COMPOSE_FILE = ( +NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = ( + "docker/quickstart/docker-compose-m1.quickstart.yml" +) +ELASTIC_M1_QUICKSTART_COMPOSE_FILE = ( "docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml" ) CONSUMERS_QUICKSTART_COMPOSE_FILE = ( @@ -52,14 +55,21 @@ ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = ( "docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml" ) - +KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = ( + "docker/quickstart/docker-compose.kafka-setup.quickstart.yml" +) NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = ( f"{DOCKER_COMPOSE_BASE}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}" ) ELASTIC_QUICKSTART_COMPOSE_URL = ( f"{DOCKER_COMPOSE_BASE}/{ELASTIC_QUICKSTART_COMPOSE_FILE}" ) -M1_QUICKSTART_COMPOSE_URL = f"{DOCKER_COMPOSE_BASE}/{M1_QUICKSTART_COMPOSE_FILE}" +NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_URL = ( + f"{DOCKER_COMPOSE_BASE}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}" +) +ELASTIC_M1_QUICKSTART_COMPOSE_URL = ( + f"{DOCKER_COMPOSE_BASE}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}" +) class Architectures(Enum): @@ -166,7 +176,7 @@ def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> click.echo( "No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n" "To use neo4j as a graph backend, run \n" - "`datahub docker quickstart --quickstart-compose-file ./docker/quickstart/docker-compose.quickstart.yml`" + "`datahub docker quickstart --graph-service-impl neo4j`" "\nfrom the root of the datahub repo\n" ) return False @@ -581,6 +591,13 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures: default=False, help="Launches MAE & MCE consumers as stand alone docker containers", ) +@click.option( + "--kafka-setup", + required=False, + is_flag=True, + default=False, + help="Launches Kafka setup job as part of the compose deployment", +) @click.option( "--arch", required=False, @@ -608,6 +625,7 @@ def quickstart( restore_indices: bool, no_restore_indices: bool, standalone_consumers: bool, + kafka_setup: bool, arch: Optional[str], ) -> None: """Start an instance of DataHub locally using docker-compose. @@ -650,70 +668,21 @@ def quickstart( auth_resources_folder = Path(DATAHUB_ROOT_FOLDER) / "plugins/auth/resources" os.makedirs(auth_resources_folder, exist_ok=True) - default_quickstart_compose_file = _get_default_quickstart_compose_file() + quickstart_compose_file_name = _get_default_quickstart_compose_file() if stop: _attempt_stop(quickstart_compose_file) return elif not quickstart_compose_file: - # download appropriate quickstart file - should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl) - if should_use_neo4j and is_arch_m1(quickstart_arch): - click.secho( - "Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph", - fg="red", - ) - github_file = ( - NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL - if should_use_neo4j and not is_arch_m1(quickstart_arch) - else ELASTIC_QUICKSTART_COMPOSE_URL - if not is_arch_m1(quickstart_arch) - else M1_QUICKSTART_COMPOSE_URL + print("compose file name", quickstart_compose_file_name) + download_compose_files( + quickstart_compose_file_name, + quickstart_compose_file, + graph_service_impl, + kafka_setup, + quickstart_arch, + standalone_consumers, ) - # also allow local files - request_session = requests.Session() - request_session.mount("file://", FileAdapter()) - - with open( - default_quickstart_compose_file, "wb" - ) if default_quickstart_compose_file else tempfile.NamedTemporaryFile( - suffix=".yml", delete=False - ) as tmp_file: - path = pathlib.Path(tmp_file.name) - quickstart_compose_file.append(path) - click.echo(f"Fetching docker-compose file {github_file} from GitHub") - # Download the quickstart docker-compose file from GitHub. - quickstart_download_response = request_session.get(github_file) - quickstart_download_response.raise_for_status() - tmp_file.write(quickstart_download_response.content) - logger.debug(f"Copied to {path}") - - if standalone_consumers: - consumer_github_file = ( - f"{DOCKER_COMPOSE_BASE}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}" - if should_use_neo4j - else f"{DOCKER_COMPOSE_BASE}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}" - ) - - default_consumer_compose_file = ( - Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml" - ) - with open( - default_consumer_compose_file, "wb" - ) if default_consumer_compose_file else tempfile.NamedTemporaryFile( - suffix=".yml", delete=False - ) as tmp_file: - path = pathlib.Path(tmp_file.name) - quickstart_compose_file.append(path) - click.echo( - f"Fetching consumer docker-compose file {consumer_github_file} from GitHub" - ) - # Download the quickstart docker-compose file from GitHub. - quickstart_download_response = request_session.get(consumer_github_file) - quickstart_download_response.raise_for_status() - tmp_file.write(quickstart_download_response.content) - logger.debug(f"Copied to {path}") - # set version _set_environment_variables( version=version, @@ -833,6 +802,94 @@ def quickstart( ) +def download_compose_files( + quickstart_compose_file_name, + quickstart_compose_file_list, + graph_service_impl, + kafka_setup, + quickstart_arch, + standalone_consumers, +): + # download appropriate quickstart file + should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl) + if should_use_neo4j: + github_file = ( + NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL + if not is_arch_m1(quickstart_arch) + else NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_URL + ) + else: + github_file = ( + ELASTIC_QUICKSTART_COMPOSE_URL + if not is_arch_m1(quickstart_arch) + else ELASTIC_M1_QUICKSTART_COMPOSE_URL + ) + # also allow local files + request_session = requests.Session() + request_session.mount("file://", FileAdapter()) + with open( + quickstart_compose_file_name, "wb" + ) if quickstart_compose_file_name else tempfile.NamedTemporaryFile( + suffix=".yml", delete=False + ) as tmp_file: + path = pathlib.Path(tmp_file.name) + quickstart_compose_file_list.append(path) + click.echo(f"Fetching docker-compose file {github_file} from GitHub") + # Download the quickstart docker-compose file from GitHub. + quickstart_download_response = request_session.get(github_file) + quickstart_download_response.raise_for_status() + tmp_file.write(quickstart_download_response.content) + logger.debug(f"Copied to {path}") + if standalone_consumers: + consumer_github_file = ( + f"{DOCKER_COMPOSE_BASE}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}" + if should_use_neo4j + else f"{DOCKER_COMPOSE_BASE}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}" + ) + + default_consumer_compose_file = ( + Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml" + ) + with open( + default_consumer_compose_file, "wb" + ) if default_consumer_compose_file else tempfile.NamedTemporaryFile( + suffix=".yml", delete=False + ) as tmp_file: + path = pathlib.Path(tmp_file.name) + quickstart_compose_file_list.append(path) + click.echo( + f"Fetching consumer docker-compose file {consumer_github_file} from GitHub" + ) + # Download the quickstart docker-compose file from GitHub. + quickstart_download_response = request_session.get(consumer_github_file) + quickstart_download_response.raise_for_status() + tmp_file.write(quickstart_download_response.content) + logger.debug(f"Copied to {path}") + if kafka_setup: + kafka_setup_github_file = ( + f"{DOCKER_COMPOSE_BASE}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}" + ) + + default_consumer_compose_file = ( + Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml" + ) + with open( + default_consumer_compose_file, "wb" + ) if default_consumer_compose_file else tempfile.NamedTemporaryFile( + suffix=".yml", delete=False + ) as tmp_file: + path = pathlib.Path(tmp_file.name) + quickstart_compose_file_list.append(path) + click.echo( + f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub" + ) + # Download the quickstart docker-compose file from GitHub. + quickstart_download_response = request_session.get(kafka_setup_github_file) + quickstart_download_response.raise_for_status() + tmp_file.write(quickstart_download_response.content) + logger.debug(f"Copied to {path}") + + def valid_restore_options( restore: bool, restore_indices: bool, no_restore_indices: bool ) -> bool: