From ff1afbf211271e39feda5d3e05d1d3b74918f297 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 28 Aug 2024 14:52:11 +0000 Subject: [PATCH 1/6] Update pinning --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 + conda/environments/all_cuda-125_arch-x86_64.yaml | 1 + conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/pylibcudf/meta.yaml | 2 +- dependencies.yaml | 12 ++---------- python/cudf/pyproject.toml | 2 +- python/pylibcudf/pyproject.toml | 2 +- 7 files changed, 8 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c4c32da8af2..f39db3edf4a 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -67,6 +67,7 @@ dependencies: - pandoc - pre-commit - ptxcompiler +- pyarrow>=14.0.0 - pydata-sphinx-theme!=0.14.2 - pytest-benchmark - pytest-cases>=3.8.2 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7439c9543a5..b97609070f3 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -64,6 +64,7 @@ dependencies: - pandas>=2.0,<2.2.3dev0 - pandoc - pre-commit +- pyarrow>=14.0.0 - pydata-sphinx-theme!=0.14.2 - pynvjitlink>=0.0.0a0 - pytest-benchmark diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 53f52a35651..0b8dc1bac38 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -82,7 +82,7 @@ requirements: - cupy >=12.0.0 - numba >=0.57 - numpy >=1.23,<3.0a0 - - pyarrow ==16.1.0.* + - pyarrow >=14.0.0 - libcudf ={{ version }} - pylibcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml index 67b9b76bb8c..3712c3b3bd2 100644 --- a/conda/recipes/pylibcudf/meta.yaml +++ b/conda/recipes/pylibcudf/meta.yaml @@ -79,7 +79,7 @@ requirements: - typing_extensions >=4.0.0 - pandas >=2.0,<2.2.3dev0 - numpy >=1.23,<3.0a0 - - pyarrow ==16.1.0.* + - pyarrow >=14.0.0 - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} diff --git a/dependencies.yaml b/dependencies.yaml index 5be291b3671..0824f799435 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -19,6 +19,7 @@ files: - docs - notebooks - py_version + - pyarrow_run - rapids_build_skbuild - rapids_build_setuptools - run_common @@ -46,7 +47,6 @@ files: includes: - cuda_version - py_version - - pyarrow_run - test_python_common - test_python_cudf - test_python_dask_cudf @@ -136,13 +136,6 @@ files: - build_base - build_cpp - depends_on_librmm - py_run_libcudf: - output: pyproject - pyproject_dir: python/libcudf - extras: - table: project - includes: - - pyarrow_run py_build_pylibcudf: output: pyproject pyproject_dir: python/pylibcudf @@ -390,8 +383,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - # Allow runtime version to float up to patch version - - pyarrow>=16.1.0,<16.2.0a0 + - pyarrow>=14.0.0 cuda_version: specific: - output_types: conda diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 0c1d5015078..a08d83a700c 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "packaging", "pandas>=2.0,<2.2.3dev0", "ptxcompiler", - "pyarrow>=16.1.0,<16.2.0a0", + "pyarrow>=14.0.0", "pylibcudf==24.10.*,>=0.0.0a0", "rich", "rmm==24.10.*,>=0.0.0a0", diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index e4c6edc6141..b943e1f7a08 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "libcudf==24.10.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", - "pyarrow>=16.1.0,<16.2.0a0", + "pyarrow>=14.0.0", "rmm==24.10.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From 0de696fae553600db51abd5f76bdf8619cc41a64 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 28 Aug 2024 14:52:49 +0000 Subject: [PATCH 2/6] Skip tests that require a newer arrow version --- python/cudf/cudf/tests/test_parquet.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index db4f1c9c8bd..dd8bffcc69b 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2393,6 +2393,10 @@ def test_parquet_writer_list_large_mixed(tmpdir): @pytest.mark.parametrize("store_schema", [True, False]) def test_parquet_writer_list_chunked(tmpdir, store_schema): + if store_schema and version.parse(pa.__version__) < version.parse( + "15.0.0" + ): + pytest.skip("https://github.com/apache/arrow/pull/37792") table1 = cudf.DataFrame( { "a": list_gen(string_gen, 128, 80, 50), @@ -2578,6 +2582,10 @@ def normalized_equals(value1, value2): @pytest.mark.parametrize("add_nulls", [True, False]) @pytest.mark.parametrize("store_schema", [True, False]) def test_parquet_writer_statistics(tmpdir, pdf, add_nulls, store_schema): + if store_schema and version.parse(pa.__version__) < version.parse( + "15.0.0" + ): + pytest.skip("https://github.com/apache/arrow/pull/37792") file_path = tmpdir.join("cudf.parquet") if "col_category" in pdf.columns: pdf = pdf.drop(columns=["col_category", "col_bool"]) @@ -2957,6 +2965,10 @@ def test_per_column_options_string_col(tmpdir, encoding): assert encoding in fmd.row_group(0).column(0).encodings +@pytest.mark.skipif( + version.parse(pa.__version__) < version.parse("16.0.0"), + reason="https://github.com/apache/arrow/pull/39748", +) @pytest.mark.parametrize( "num_rows", [200, 10000], @@ -3557,6 +3569,10 @@ def test_parquet_reader_roundtrip_structs_with_arrow_schema(tmpdir, data): @pytest.mark.parametrize("index", [None, True, False]) +@pytest.mark.skipif( + version.parse(pa.__version__) < version.parse("15.0.0"), + reason="https://github.com/apache/arrow/pull/37792", +) def test_parquet_writer_roundtrip_with_arrow_schema(index): # Ensure that the concrete and nested types are faithfully being roundtripped # across Parquet with arrow schema @@ -3707,6 +3723,10 @@ def test_parquet_writer_int96_timestamps_and_arrow_schema(): ], ) @pytest.mark.parametrize("index", [None, True, False]) +@pytest.mark.skipif( + version.parse(pa.__version__) < version.parse("15.0.0"), + reason="https://github.com/apache/arrow/pull/37792", +) def test_parquet_writer_roundtrip_structs_with_arrow_schema( tmpdir, data, index ): From 87001733a478c69a53b21067b16f62b06c44e0d6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 28 Aug 2024 14:54:10 +0000 Subject: [PATCH 3/6] Remove extremely outdated skip --- python/cudf/cudf/tests/test_parquet.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index dd8bffcc69b..879b2bd3d74 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -515,10 +515,6 @@ def test_parquet_read_filtered_multiple_files(tmpdir): ) -@pytest.mark.skipif( - version.parse(pa.__version__) < version.parse("1.0.1"), - reason="pyarrow 1.0.0 needed for various operators and operand types", -) @pytest.mark.parametrize( "predicate,expected_len", [ From 32fb65e97df3a072dee98373a3182e1ada448567 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 28 Aug 2024 17:48:04 +0000 Subject: [PATCH 4/6] Remove erroneous ChunkedArray overload of from_arrow --- python/pylibcudf/pylibcudf/interop.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx index d54e5b7ba1f..1a03fa5b45b 100644 --- a/python/pylibcudf/pylibcudf/interop.pyx +++ b/python/pylibcudf/pylibcudf/interop.pyx @@ -152,7 +152,6 @@ def _from_arrow_scalar(pyarrow_object, *, DataType data_type=None): @from_arrow.register(pa.Array) -@from_arrow.register(pa.ChunkedArray) def _from_arrow_column(pyarrow_object, *, DataType data_type=None): if data_type is not None: raise ValueError("data_type may not be passed for arrays") From 45b10cefbd11983283437431f1196393947700d9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 28 Aug 2024 17:49:56 +0000 Subject: [PATCH 5/6] Update pinnings with upper bounds --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/pylibcudf/meta.yaml | 2 +- dependencies.yaml | 2 +- python/cudf/pyproject.toml | 2 +- python/pylibcudf/pyproject.toml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index f39db3edf4a..7f6967d7287 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -67,7 +67,7 @@ dependencies: - pandoc - pre-commit - ptxcompiler -- pyarrow>=14.0.0 +- pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 - pytest-benchmark - pytest-cases>=3.8.2 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index b97609070f3..c1315e73f16 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -64,7 +64,7 @@ dependencies: - pandas>=2.0,<2.2.3dev0 - pandoc - pre-commit -- pyarrow>=14.0.0 +- pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 - pynvjitlink>=0.0.0a0 - pytest-benchmark diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 0b8dc1bac38..e22b4a4eddc 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -82,7 +82,7 @@ requirements: - cupy >=12.0.0 - numba >=0.57 - numpy >=1.23,<3.0a0 - - pyarrow >=14.0.0 + - pyarrow>=14.0.0,<18.0.0a0 - libcudf ={{ version }} - pylibcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml index 3712c3b3bd2..7c1efa0176c 100644 --- a/conda/recipes/pylibcudf/meta.yaml +++ b/conda/recipes/pylibcudf/meta.yaml @@ -79,7 +79,7 @@ requirements: - typing_extensions >=4.0.0 - pandas >=2.0,<2.2.3dev0 - numpy >=1.23,<3.0a0 - - pyarrow >=14.0.0 + - pyarrow>=14.0.0,<18.0.0a0 - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} diff --git a/dependencies.yaml b/dependencies.yaml index 0824f799435..c6851d9cb90 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -383,7 +383,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - pyarrow>=14.0.0 + - pyarrow>=14.0.0,<18.0.0a0 cuda_version: specific: - output_types: conda diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index a08d83a700c..17d1292980b 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "packaging", "pandas>=2.0,<2.2.3dev0", "ptxcompiler", - "pyarrow>=14.0.0", + "pyarrow>=14.0.0,<18.0.0a0", "pylibcudf==24.10.*,>=0.0.0a0", "rich", "rmm==24.10.*,>=0.0.0a0", diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index b943e1f7a08..bfade41353c 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "libcudf==24.10.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", - "pyarrow>=14.0.0", + "pyarrow>=14.0.0,<18.0.0a0", "rmm==24.10.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From c5763332c72ccb359ed572242e0c58122243e94a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 28 Aug 2024 18:11:50 +0000 Subject: [PATCH 6/6] Remove unnecessary pyarrow dependency from libcudf dependencies --- python/libcudf/pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 43878d0aec2..5f4b9957fd0 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -37,9 +37,6 @@ classifiers = [ "Programming Language :: C++", "Environment :: GPU :: NVIDIA CUDA", ] -dependencies = [ - "pyarrow>=16.1.0,<16.2.0a0", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cudf"