From ad93a6276e9949ceb59c5dbfcf39080e5a8a0cc2 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Fri, 3 May 2024 09:34:01 -0400 Subject: [PATCH 1/5] Fix flink versioning issues --- .../test-properties.json | 2 +- CHANGES.md | 2 ++ .../runner-concepts/description.md | 2 +- sdks/go/examples/wasm/README.md | 2 +- .../python/apache_beam/options/pipeline_options.py | 2 +- .../runners/interactive/interactive_beam.py | 2 +- sdks/typescript/src/apache_beam/runners/flink.ts | 2 +- .../site/content/en/documentation/runners/flink.md | 14 ++++++++------ .../en/documentation/sdks/java/testing/tpcds.md | 2 +- 9 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/actions/setup-default-test-properties/test-properties.json b/.github/actions/setup-default-test-properties/test-properties.json index b53169ffef9c..96d649338e77 100644 --- a/.github/actions/setup-default-test-properties/test-properties.json +++ b/.github/actions/setup-default-test-properties/test-properties.json @@ -14,7 +14,7 @@ }, "JavaTestProperties": { "SUPPORTED_VERSIONS": ["8", "11", "17"], - "FLINK_VERSIONS": ["1.13", "1.14", "1.15"], + "FLINK_VERSIONS": ["1.14", "1.15", "1.16", "1.17"], "SPARK_VERSIONS": ["2", "3"] }, "GoTestProperties": { diff --git a/CHANGES.md b/CHANGES.md index e372b2090124..e2e70ce61960 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -127,6 +127,8 @@ ## Known Issues +* The beam interactive runner does not correctly run on flink, and support for Flink 1.17 may not be propogated to all non-Java contexts. + # [2.55.1] - 2024-04-08 ## Bugfixes diff --git a/learning/tour-of-beam/learning-content/introduction/introduction-concepts/runner-concepts/description.md b/learning/tour-of-beam/learning-content/introduction/introduction-concepts/runner-concepts/description.md index 600a1f904e84..47c0d92286cf 100644 --- a/learning/tour-of-beam/learning-content/introduction/introduction-concepts/runner-concepts/description.md +++ b/learning/tour-of-beam/learning-content/introduction/introduction-concepts/runner-concepts/description.md @@ -191,7 +191,7 @@ $ wordcount --input gs://dataflow-samples/shakespeare/kinglear.txt \ {{if (eq .Sdk "java")}} ##### Portable -1. Starting with Beam 2.18.0, pre-built Flink Job Service Docker images are available at Docker Hub: `Flink 1.10`, `Flink 1.11`, `Flink 1.12`, `Flink 1.13`, `Flink 1.14`. +1. Starting with Beam 2.18.0, pre-built Flink Job Service Docker images are available at Docker Hub: `Flink 1.14`, `Flink 1.15`, `Flink 1.16`, `Flink 1.17`. 2. Start the JobService endpoint: `docker run --net=host apache/beam_flink1.10_job_server:latest` 3. Submit the pipeline to the above endpoint by using the PortableRunner, job_endpoint set to localhost:8099 (this is the default address of the JobService). Optionally set environment_type set to LOOPBACK. For example: diff --git a/sdks/go/examples/wasm/README.md b/sdks/go/examples/wasm/README.md index aba113be3718..cb0816588502 100644 --- a/sdks/go/examples/wasm/README.md +++ b/sdks/go/examples/wasm/README.md @@ -68,7 +68,7 @@ cd $BEAM_HOME Expected output should include the following, from which you acquire the latest flink runner version. ```shell -'flink_versions: 1.12,1.13,1.14,1.15,1.16' +'flink_versions: 1.14,1.15,1.16,1.17' ``` #### 2. Set to the latest flink runner version i.e. 1.16 diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 65e7824f6891..93cde0cf2d40 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1591,7 +1591,7 @@ def _add_argparse_args(cls, parser): class FlinkRunnerOptions(PipelineOptions): # These should stay in sync with gradle.properties. - PUBLISHED_FLINK_VERSIONS = ['1.12', '1.13', '1.14', '1.15', '1.16'] + PUBLISHED_FLINK_VERSIONS = ['1.14', '1.15', '1.16', '1.17'] @classmethod def _add_argparse_args(cls, parser): diff --git a/sdks/python/apache_beam/runners/interactive/interactive_beam.py b/sdks/python/apache_beam/runners/interactive/interactive_beam.py index 207b0f4e6451..5165ea3ac62d 100644 --- a/sdks/python/apache_beam/runners/interactive/interactive_beam.py +++ b/sdks/python/apache_beam/runners/interactive/interactive_beam.py @@ -408,7 +408,7 @@ class Clusters: # Explicitly set the Flink version here to ensure compatibility with 2.0 # Dataproc images: # https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-release-2.0 - DATAPROC_FLINK_VERSION = '1.12' + DATAPROC_FLINK_VERSION = '1.17' # The minimum worker number to create a Dataproc cluster. DATAPROC_MINIMUM_WORKER_NUM = 2 diff --git a/sdks/typescript/src/apache_beam/runners/flink.ts b/sdks/typescript/src/apache_beam/runners/flink.ts index c34a50c1939c..3b672b48105c 100644 --- a/sdks/typescript/src/apache_beam/runners/flink.ts +++ b/sdks/typescript/src/apache_beam/runners/flink.ts @@ -28,7 +28,7 @@ import { JavaJarService } from "../utils/service"; const MAGIC_HOST_NAMES = ["[local]", "[auto]"]; // These should stay in sync with gradle.properties. -const PUBLISHED_FLINK_VERSIONS = ["1.12", "1.13", "1.14"]; +const PUBLISHED_FLINK_VERSIONS = ["1.14", "1.15", "1.16", "1.17"]; const defaultOptions = { flinkMaster: "[local]", diff --git a/website/www/site/content/en/documentation/runners/flink.md b/website/www/site/content/en/documentation/runners/flink.md index 64a6e9bade9b..1e1a095aae33 100644 --- a/website/www/site/content/en/documentation/runners/flink.md +++ b/website/www/site/content/en/documentation/runners/flink.md @@ -196,13 +196,10 @@ The optional `flink_version` option may be required as well for older versions o {{< paragraph class="language-portable" >}} Starting with Beam 2.18.0, pre-built Flink Job Service Docker images are available at Docker Hub: -[Flink 1.10](https://hub.docker.com/r/apache/beam_flink1.10_job_server), -[Flink 1.11](https://hub.docker.com/r/apache/beam_flink1.11_job_server), -[Flink 1.12](https://hub.docker.com/r/apache/beam_flink1.12_job_server). -[Flink 1.13](https://hub.docker.com/r/apache/beam_flink1.13_job_server). [Flink 1.14](https://hub.docker.com/r/apache/beam_flink1.14_job_server). [Flink 1.15](https://hub.docker.com/r/apache/beam_flink1.15_job_server). [Flink 1.16](https://hub.docker.com/r/apache/beam_flink1.16_job_server). +[Flink 1.17](https://hub.docker.com/r/apache/beam_flink1.17_job_server). {{< /paragraph >}} @@ -329,6 +326,11 @@ To find out which version of Flink is compatible with Beam please see the table Artifact Id Supported Beam Versions + + 1.17.x + beam-runners-flink-1.16 + ≥ 2.56.0 + 1.16.x beam-runners-flink-1.16 @@ -347,12 +349,12 @@ To find out which version of Flink is compatible with Beam please see the table 1.13.x beam-runners-flink-1.13 - ≥ 2.31.0 + ≥ 2.31.0 - 2.55.0 1.12.x beam-runners-flink-1.12 - ≥ 2.27.0 + ≥ 2.27.0 - 2.55.0 1.11.x diff --git a/website/www/site/content/en/documentation/sdks/java/testing/tpcds.md b/website/www/site/content/en/documentation/sdks/java/testing/tpcds.md index de18bcc60c37..2211c05a6aae 100644 --- a/website/www/site/content/en/documentation/sdks/java/testing/tpcds.md +++ b/website/www/site/content/en/documentation/sdks/java/testing/tpcds.md @@ -138,7 +138,7 @@ When running via Gradle, the following two parameters control the execution: -P tpcds.runner The Gradle project name of the runner, such as ":runners:spark:3" or - ":runners:flink:1.13. The project names can be found in the root + ":runners:flink:1.17. The project names can be found in the root `settings.gradle.kts`. Test data has to be generated before running a suite and stored to accessible file system. The query results will be written into output files. From 55d2ef873e202d019c9a6d97fc963215ebf48ba6 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Fri, 3 May 2024 09:37:09 -0400 Subject: [PATCH 2/5] Add pr reference --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e2e70ce61960..d30f120625cb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -127,7 +127,7 @@ ## Known Issues -* The beam interactive runner does not correctly run on flink, and support for Flink 1.17 may not be propogated to all non-Java contexts. +* The beam interactive runner does not correctly run on flink, and support for Flink 1.17 may not be propogated to all non-Java contexts ([#31168](https://github.com/apache/beam/issues/31168)). # [2.55.1] - 2024-04-08 From 2a9bf30e2dba957122b609d147f0cc051a1f08f2 Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Fri, 3 May 2024 09:37:54 -0400 Subject: [PATCH 3/5] dataproc 2.2 --- .../apache_beam/runners/interactive/interactive_beam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/runners/interactive/interactive_beam.py b/sdks/python/apache_beam/runners/interactive/interactive_beam.py index 5165ea3ac62d..f15506b36793 100644 --- a/sdks/python/apache_beam/runners/interactive/interactive_beam.py +++ b/sdks/python/apache_beam/runners/interactive/interactive_beam.py @@ -405,9 +405,9 @@ class Clusters: To configure a pipeline to run on a local FlinkRunner, explicitly set the default cluster metadata to None: ib.clusters.set_default_cluster(None). """ - # Explicitly set the Flink version here to ensure compatibility with 2.0 + # Explicitly set the Flink version here to ensure compatibility with 2.2 # Dataproc images: - # https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-release-2.0 + # https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-release-2.2 DATAPROC_FLINK_VERSION = '1.17' # The minimum worker number to create a Dataproc cluster. From d26d8d42896ce9b8ae4439a3edf5733c6bf67e2e Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Fri, 3 May 2024 09:42:30 -0400 Subject: [PATCH 4/5] 1.15 for dataproc version --- .../apache_beam/runners/interactive/interactive_beam.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/runners/interactive/interactive_beam.py b/sdks/python/apache_beam/runners/interactive/interactive_beam.py index f15506b36793..ee1b37cd7043 100644 --- a/sdks/python/apache_beam/runners/interactive/interactive_beam.py +++ b/sdks/python/apache_beam/runners/interactive/interactive_beam.py @@ -405,10 +405,10 @@ class Clusters: To configure a pipeline to run on a local FlinkRunner, explicitly set the default cluster metadata to None: ib.clusters.set_default_cluster(None). """ - # Explicitly set the Flink version here to ensure compatibility with 2.2 + # Explicitly set the Flink version here to ensure compatibility with 2.1 # Dataproc images: - # https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-release-2.2 - DATAPROC_FLINK_VERSION = '1.17' + # https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-release-2.1 + DATAPROC_FLINK_VERSION = '1.15' # The minimum worker number to create a Dataproc cluster. DATAPROC_MINIMUM_WORKER_NUM = 2 From 1e98d6c795081b93b7b5d8d0da235a6ef69ab45c Mon Sep 17 00:00:00 2001 From: Danny McCormick Date: Fri, 3 May 2024 10:32:51 -0400 Subject: [PATCH 5/5] More explicit changes --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index d30f120625cb..23cefee43950 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -127,7 +127,8 @@ ## Known Issues -* The beam interactive runner does not correctly run on flink, and support for Flink 1.17 may not be propogated to all non-Java contexts ([#31168](https://github.com/apache/beam/issues/31168)). +* The beam interactive runner does not correctly run on flink ([#31168](https://github.com/apache/beam/issues/31168)). +* When using the Flink runner from Python, 1.17 is not supported and 1.12/13 do not work correctly. Support for 1.17 will be added in 2.57.0, and the ability to choose 1.12/13 will be cleaned up and fully removed in 2.57.0 as well ([#31168](https://github.com/apache/beam/issues/31168)). # [2.55.1] - 2024-04-08