From 02d53c5a17b4bb09adf26f4cee4abbbcfc07a3ba Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 30 Jan 2024 10:30:12 +0100 Subject: [PATCH 1/7] Initial commit --- ci/conda_env_python.txt | 3 +- dev/tasks/conda-recipes/arrow-cpp/meta.yaml | 1 - python/pyarrow/tests/conftest.py | 5 +- python/pyarrow/tests/parquet/test_dataset.py | 3 +- python/pyarrow/tests/test_array.py | 12 ++-- python/pyarrow/tests/test_compute.py | 9 ++- python/pyarrow/tests/test_csv.py | 12 ++-- python/pyarrow/tests/test_dataset.py | 72 +++++++++++++------- python/pyarrow/tests/test_extension_type.py | 20 +++--- python/pyarrow/tests/test_fs.py | 60 +++++++++------- python/pyarrow/tests/test_io.py | 15 ++-- python/pyarrow/tests/test_ipc.py | 7 +- python/pyarrow/tests/test_json.py | 6 +- python/pyarrow/tests/test_scalars.py | 15 ++-- python/pyarrow/tests/test_schema.py | 3 +- python/pyarrow/tests/test_table.py | 9 ++- python/pyarrow/tests/test_types.py | 3 +- python/requirements-test.txt | 3 +- python/requirements-wheel-test.txt | 3 +- 19 files changed, 160 insertions(+), 101 deletions(-) diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 5fdd21d2bd1f9..0389d538229e8 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -23,9 +23,8 @@ cloudpickle fsspec hypothesis numpy>=1.16.6 -pytest<8 # pytest-lazy-fixture broken on pytest 8.0.0 +pytest pytest-faulthandler -pytest-lazy-fixture s3fs>=2023.10.0 setuptools setuptools_scm<8.0.0 diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml index b8ffbfdb715b6..367445c595c4b 100644 --- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml +++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml @@ -340,7 +340,6 @@ outputs: # test_cpp_extension_in_python requires a compiler - {{ compiler("cxx") }} # [linux] - pytest - - pytest-lazy-fixture - backports.zoneinfo # [py<39] - boto3 - cffi diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index a5941e8c8d1a8..23ac18c7dd27c 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -24,7 +24,6 @@ import urllib.request import pytest -from pytest_lazyfixture import lazy_fixture import hypothesis as h from ..conftest import groups, defaults @@ -259,8 +258,8 @@ def gcs_server(): @pytest.fixture( params=[ - lazy_fixture('builtin_pickle'), - lazy_fixture('cloudpickle') + 'builtin_pickle', + 'cloudpickle' ], scope='session' ) diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index b6e351bdef9a7..48a538251fb64 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1137,7 +1137,8 @@ def _make_dataset_for_pickling(tempdir, N=100): @pytest.mark.pandas -def test_pickle_dataset(tempdir, pickle_module): +def test_pickle_dataset(tempdir, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) def is_pickleable(obj): return obj == pickle_module.loads(pickle_module.dumps(obj)) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index f851d4e0b6c29..fc9fddf8d8932 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2029,7 +2029,8 @@ def test_cast_identities(ty, values): @pickle_test_parametrize -def test_array_pickle(data, typ, pickle_module): +def test_array_pickle(data, typ, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # Allocate here so that we don't have any Arrow data allocated. # This is needed to ensure that allocator tests can be reliable. array = pa.array(data, type=typ) @@ -2038,7 +2039,8 @@ def test_array_pickle(data, typ, pickle_module): assert array.equals(result) -def test_array_pickle_dictionary(pickle_module): +def test_array_pickle_dictionary(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # not included in the above as dictionary array cannot be created with # the pa.array function array = pa.DictionaryArray.from_arrays([0, 1, 2, 0, 1], ['a', 'b', 'c']) @@ -2054,14 +2056,16 @@ def test_array_pickle_dictionary(pickle_module): size=st.integers(min_value=0, max_value=10) ) ) -def test_pickling(pickle_module, arr): +def test_pickling(arr, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) data = pickle_module.dumps(arr) restored = pickle_module.loads(data) assert arr.equals(restored) @pickle_test_parametrize -def test_array_pickle_protocol5(data, typ, pickle_module): +def test_array_pickle_protocol5(data, typ, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # Test zero-copy pickling with protocol 5 (PEP 574) array = pa.array(data, type=typ) addresses = [buf.address if buf is not None else 0 diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 4b58dc65bae9b..0d1dd0df9b514 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -277,7 +277,8 @@ def test_call_function_with_memory_pool(): assert result3.equals(expected) -def test_pickle_functions(pickle_module): +def test_pickle_functions(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # Pickle registered functions for name in pc.list_functions(): func = pc.get_function(name) @@ -288,7 +289,8 @@ def test_pickle_functions(pickle_module): assert reconstructed.num_kernels == func.num_kernels -def test_pickle_global_functions(pickle_module): +def test_pickle_global_functions(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # Pickle global wrappers (manual or automatic) of registered functions for name in pc.list_functions(): try: @@ -3392,7 +3394,8 @@ def create_sample_expressions(): # Tests the Arrow-specific serialization mechanism -def test_expression_serialization_arrow(pickle_module): +def test_expression_serialization_arrow(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) for expr in create_sample_expressions()["all"]: assert isinstance(expr, pc.Expression) restored = pickle_module.loads(pickle_module.dumps(expr)) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index 31f24187e3b37..99b9c2b1d6665 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -127,7 +127,8 @@ def __ne__(self, other): other.result != self.result) -def test_read_options(pickle_module): +def test_read_options(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) cls = ReadOptions opts = cls() @@ -182,7 +183,8 @@ def test_read_options(pickle_module): opts.validate() -def test_parse_options(pickle_module): +def test_parse_options(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) cls = ParseOptions skip_handler = InvalidRowHandler('skip') @@ -242,7 +244,8 @@ def test_parse_options(pickle_module): opts.validate() -def test_convert_options(pickle_module): +def test_convert_options(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) cls = ConvertOptions opts = cls() @@ -624,7 +627,8 @@ def format_msg(msg_format, row, *args): read_options=read_options, convert_options=convert_options) - def test_invalid_row_handler(self, pickle_module): + def test_invalid_row_handler(self, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) rows = b"a,b\nc\nd,e\nf,g,h\ni,j\n" parse_opts = ParseOptions() with pytest.raises( diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index a4838d63a6b0b..49d5e271c52b8 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -100,7 +100,6 @@ def assert_dataset_fragment_convenience_methods(dataset): @pytest.fixture -@pytest.mark.parquet def mockfs(): mockfs = fs._MockFileSystem() @@ -221,7 +220,6 @@ def multisourcefs(request): @pytest.fixture -@pytest.mark.parquet def dataset(mockfs): format = ds.ParquetFileFormat() selector = fs.FileSelector('subdir', recursive=True) @@ -713,7 +711,8 @@ def test_partitioning(): assert load_back is None -def test_partitioning_pickling(pickle_module): +def test_partitioning_pickling(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) schema = pa.schema([ pa.field('i64', pa.int64()), pa.field('f64', pa.float64()) @@ -845,7 +844,8 @@ def test_parquet_scan_options(): assert opts7 != opts1 -def test_file_format_pickling(pickle_module): +def test_file_format_pickling(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) formats = [ ds.IpcFileFormat(), ds.CsvFileFormat(), @@ -884,7 +884,8 @@ def test_file_format_pickling(pickle_module): assert pickle_module.loads(pickle_module.dumps(file_format)) == file_format -def test_fragment_scan_options_pickling(pickle_module): +def test_fragment_scan_options_pickling(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) options = [ ds.CsvFragmentScanOptions(), ds.CsvFragmentScanOptions( @@ -1067,7 +1068,8 @@ def test_make_fragment_with_size(s3_example_simple): table = dataset_with_size.to_table() -def test_make_csv_fragment_from_buffer(dataset_reader, pickle_module): +def test_make_csv_fragment_from_buffer(dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) content = textwrap.dedent(""" alpha,num,animal a,12,dog @@ -1092,7 +1094,8 @@ def test_make_csv_fragment_from_buffer(dataset_reader, pickle_module): assert dataset_reader.to_table(pickled).equals(fragment.to_table()) -def test_make_json_fragment_from_buffer(dataset_reader, pickle_module): +def test_make_json_fragment_from_buffer(dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) content = '{"alpha" : "a", "num": 12, "animal" : "dog"}\n' + \ '{"alpha" : "b", "num": 11, "animal" : "cat"}\n' + \ '{"alpha" : "c", "num": 10, "animal" : "rabbit"}\n' @@ -1115,7 +1118,8 @@ def test_make_json_fragment_from_buffer(dataset_reader, pickle_module): @pytest.mark.parquet -def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module): +def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) arrays = [ pa.array(['a', 'b', 'c']), pa.array([12, 11, 10]), @@ -1218,7 +1222,8 @@ def test_fragments_implicit_cast(tempdir): @pytest.mark.parquet -def test_fragments_reconstruct(tempdir, dataset_reader, pickle_module): +def test_fragments_reconstruct(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) table, dataset = _create_dataset_for_fragments(tempdir) def assert_yields_projected(fragment, row_slice, @@ -1342,7 +1347,8 @@ def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader): @pytest.mark.parquet -def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_module): +def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments( tempdir, chunk_size=2, filesystem=fs @@ -1383,7 +1389,8 @@ def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_modu @pytest.mark.parquet -def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, pickle_module): +def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # https://issues.apache.org/jira/browse/ARROW-15796 fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments(tempdir, filesystem=fs) @@ -1548,7 +1555,8 @@ def test_fragments_parquet_row_groups_predicate(tempdir): @pytest.mark.parquet def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader, - pickle_module): + pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2) fragment = list(dataset.get_fragments())[0] @@ -1737,7 +1745,8 @@ def test_fragments_repr(tempdir, dataset): @pytest.mark.parquet @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory(mockfs, pickled, pickle_module): +def test_partitioning_factory(mockfs, pickled, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) paths_or_selector = fs.FileSelector('subdir', recursive=True) format = ds.ParquetFileFormat() @@ -1772,7 +1781,8 @@ def test_partitioning_factory(mockfs, pickled, pickle_module): @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) def test_partitioning_factory_dictionary(mockfs, infer_dictionary, pickled, - pickle_module): + pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) paths_or_selector = fs.FileSelector('subdir', recursive=True) format = ds.ParquetFileFormat() options = ds.FileSystemFactoryOptions('subdir') @@ -1805,7 +1815,8 @@ def test_partitioning_factory_dictionary(mockfs, infer_dictionary, pickled, @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory_segment_encoding(pickled, pickle_module): +def test_partitioning_factory_segment_encoding(pickled, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) mockfs = fs._MockFileSystem() format = ds.IpcFileFormat() schema = pa.schema([("i64", pa.int64())]) @@ -1907,7 +1918,8 @@ def test_partitioning_factory_segment_encoding(pickled, pickle_module): @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, pickle_module): +def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) mockfs = fs._MockFileSystem() format = ds.IpcFileFormat() schema = pa.schema([("i64", pa.int64())]) @@ -2205,13 +2217,15 @@ def _check_dataset_from_path(path, table, dataset_reader, pickler, **kwargs): @pytest.mark.parquet -def test_open_dataset_single_file(tempdir, dataset_reader, pickle_module): +def test_open_dataset_single_file(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) table, path = _create_single_file(tempdir) _check_dataset_from_path(path, table, dataset_reader, pickle_module) @pytest.mark.parquet -def test_deterministic_row_order(tempdir, dataset_reader, pickle_module): +def test_deterministic_row_order(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # ARROW-8447 Ensure that dataset.to_table (and Scanner::ToTable) returns a # deterministic row ordering. This is achieved by constructing a single # parquet file with one row per RowGroup. @@ -2220,14 +2234,16 @@ def test_deterministic_row_order(tempdir, dataset_reader, pickle_module): @pytest.mark.parquet -def test_open_dataset_directory(tempdir, dataset_reader, pickle_module): +def test_open_dataset_directory(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) tables, _ = _create_directory_of_files(tempdir) table = pa.concat_tables(tables) _check_dataset_from_path(tempdir, table, dataset_reader, pickle_module) @pytest.mark.parquet -def test_open_dataset_list_of_files(tempdir, dataset_reader, pickle_module): +def test_open_dataset_list_of_files(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) tables, (path1, path2) = _create_directory_of_files(tempdir) table = pa.concat_tables(tables) @@ -2266,7 +2282,8 @@ def test_open_dataset_filesystem_fspath(tempdir): @pytest.mark.parquet -def test_construct_from_single_file(tempdir, dataset_reader, pickle_module): +def test_construct_from_single_file(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) directory = tempdir / 'single-file' directory.mkdir() table, path = _create_single_file(directory) @@ -2286,7 +2303,8 @@ def test_construct_from_single_file(tempdir, dataset_reader, pickle_module): @pytest.mark.parquet -def test_construct_from_single_directory(tempdir, dataset_reader, pickle_module): +def test_construct_from_single_directory(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) directory = tempdir / 'single-directory' directory.mkdir() tables, paths = _create_directory_of_files(directory) @@ -2496,7 +2514,8 @@ def _create_partitioned_dataset(basedir): @pytest.mark.parquet -def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_module): +def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) full_table, path = _create_partitioned_dataset(tempdir) # no partitioning specified, just read all individual files @@ -2563,7 +2582,8 @@ def test_open_dataset_unsupported_format(tempdir): @pytest.mark.parquet -def test_open_union_dataset(tempdir, dataset_reader, pickle_module): +def test_open_union_dataset(tempdir, dataset_reader, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) _, path = _create_single_file(tempdir) dataset = ds.dataset(path) @@ -2661,7 +2681,8 @@ def expected_type(key): @pytest.mark.pandas -def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module): +def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # https://issues.apache.org/jira/browse/ARROW-11400 table = pa.table({'part': np.repeat(['A', 'B'], 5), 'col': range(10)}) part = ds.partitioning(table.select(['part']).schema, flavor="hive") @@ -2692,7 +2713,6 @@ def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module) @pytest.fixture -@pytest.mark.parquet def s3_example_simple(s3_server): from pyarrow.fs import FileSystem diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index a88e20eefe098..ae574f8fc1053 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -299,7 +299,8 @@ def test_ext_type_as_py(): assert result.as_py() == expected -def test_uuid_type_pickle(pickle_module): +def test_uuid_type_pickle(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): ty = UuidType() ser = pickle_module.dumps(ty, protocol=proto) @@ -492,7 +493,8 @@ def test_ext_scalar_from_storage(): assert s.value == pa.scalar(b"0123456789abcdef", ty.storage_type) -def test_ext_array_pickling(pickle_module): +def test_ext_array_pickling(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): ty = ParamExtType(3) storage = pa.array([b"foo", b"bar"], type=pa.binary(3)) @@ -934,7 +936,8 @@ def test_generic_ext_type_equality(): assert not period_type == period_type3 -def test_generic_ext_type_pickling(registered_period_type, pickle_module): +def test_generic_ext_type_pickling(registered_period_type, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # GH-36038 for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): period_type, _ = registered_period_type @@ -943,7 +946,8 @@ def test_generic_ext_type_pickling(registered_period_type, pickle_module): assert period_type == period_type_pickled -def test_generic_ext_array_pickling(registered_period_type, pickle_module): +def test_generic_ext_array_pickling(registered_period_type, pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): period_type, _ = registered_period_type storage = pa.array([1, 2, 3, 4], pa.int64()) @@ -1442,7 +1446,8 @@ def test_extension_to_pandas_storage_type(registered_period_type): assert isinstance(result["ext"].dtype, pd.ArrowDtype) -def test_tensor_type_is_picklable(pickle_module): +def test_tensor_type_is_picklable(pickle_module, request): + pickle_module = request.getfixturevalue(pickle_module) # GH-35599 expected_type = pa.fixed_shape_tensor(pa.int32(), (2, 2)) @@ -1485,10 +1490,7 @@ def test_legacy_int_type(): batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext']) buf = ipc_write_batch(batch) - with pytest.warns( - RuntimeWarning, - match="pickle-based deserialization of pyarrow.PyExtensionType " - "subclasses is disabled by default"): + with pytest.warns((RuntimeWarning,FutureWarning)): batch = ipc_read_batch(buf) assert isinstance(batch.column(0).type, pa.UnknownExtensionType) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index d0fa253e314e9..b308b017835e5 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -362,54 +362,54 @@ def py_fsspec_s3fs(request, s3_server): @pytest.fixture(params=[ pytest.param( - pytest.lazy_fixture('localfs'), + 'localfs', id='LocalFileSystem()' ), pytest.param( - pytest.lazy_fixture('localfs_with_mmap'), + 'localfs_with_mmap', id='LocalFileSystem(use_mmap=True)' ), pytest.param( - pytest.lazy_fixture('subtree_localfs'), + 'subtree_localfs', id='SubTreeFileSystem(LocalFileSystem())' ), pytest.param( - pytest.lazy_fixture('s3fs'), + 's3fs', id='S3FileSystem', marks=pytest.mark.s3 ), pytest.param( - pytest.lazy_fixture('gcsfs'), + 'gcsfs', id='GcsFileSystem', marks=pytest.mark.gcs ), pytest.param( - pytest.lazy_fixture('hdfs'), + 'hdfs', id='HadoopFileSystem', marks=pytest.mark.hdfs ), pytest.param( - pytest.lazy_fixture('mockfs'), + 'mockfs', id='_MockFileSystem()' ), pytest.param( - pytest.lazy_fixture('py_localfs'), + 'py_localfs', id='PyFileSystem(ProxyHandler(LocalFileSystem()))' ), pytest.param( - pytest.lazy_fixture('py_mockfs'), + 'py_mockfs', id='PyFileSystem(ProxyHandler(_MockFileSystem()))' ), pytest.param( - pytest.lazy_fixture('py_fsspec_localfs'), + 'py_fsspec_localfs', id='PyFileSystem(FSSpecHandler(fsspec.LocalFileSystem()))' ), pytest.param( - pytest.lazy_fixture('py_fsspec_memoryfs'), + 'py_fsspec_memoryfs', id='PyFileSystem(FSSpecHandler(fsspec.filesystem("memory")))' ), pytest.param( - pytest.lazy_fixture('py_fsspec_s3fs'), + 'py_fsspec_s3fs', id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))', marks=pytest.mark.s3 ), @@ -419,22 +419,26 @@ def filesystem_config(request): @pytest.fixture -def fs(request, filesystem_config): +def fs(filesystem_config, request): + filesystem_config = request.getfixturevalue(filesystem_config) return filesystem_config['fs'] @pytest.fixture -def pathfn(request, filesystem_config): +def pathfn(filesystem_config, request): + filesystem_config = request.getfixturevalue(filesystem_config) return filesystem_config['pathfn'] @pytest.fixture -def allow_move_dir(request, filesystem_config): +def allow_move_dir(filesystem_config, request): + filesystem_config = request.getfixturevalue(filesystem_config) return filesystem_config['allow_move_dir'] @pytest.fixture -def allow_append_to_file(request, filesystem_config): +def allow_append_to_file(filesystem_config, request): + filesystem_config = request.getfixturevalue(filesystem_config) return filesystem_config['allow_append_to_file'] @@ -542,7 +546,8 @@ def test_filesystem_equals(): assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/other', fs0) -def test_filesystem_equals_none(fs): +def test_filesystem_equals_none(fs, request): + fs = request.getfixturevalue(fs) with pytest.raises(TypeError, match="got NoneType"): fs.equals(None) @@ -565,7 +570,8 @@ def test_subtree_filesystem(): ' base_fs= Date: Tue, 30 Jan 2024 11:19:47 +0100 Subject: [PATCH 2/7] Fix long lines --- python/pyarrow/tests/parquet/test_dataset.py | 1 + python/pyarrow/tests/test_dataset.py | 19 +++++++++++++------ python/pyarrow/tests/test_extension_type.py | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 48a538251fb64..191c4a823a222 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1139,6 +1139,7 @@ def _make_dataset_for_pickling(tempdir, N=100): @pytest.mark.pandas def test_pickle_dataset(tempdir, pickle_module, request): pickle_module = request.getfixturevalue(pickle_module) + def is_pickleable(obj): return obj == pickle_module.loads(pickle_module.dumps(obj)) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 49d5e271c52b8..d9b50a3c0e0dd 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -1347,7 +1347,8 @@ def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader): @pytest.mark.parquet -def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_module, request): +def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, + pickle_module, request): pickle_module = request.getfixturevalue(pickle_module) fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments( @@ -1389,7 +1390,8 @@ def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_modu @pytest.mark.parquet -def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, pickle_module, request): +def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, + pickle_module, request): pickle_module = request.getfixturevalue(pickle_module) # https://issues.apache.org/jira/browse/ARROW-15796 fs, assert_opens = open_logging_fs @@ -1918,7 +1920,9 @@ def test_partitioning_factory_segment_encoding(pickled, pickle_module, request): @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, pickle_module, request): +def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, + pickle_module, + request): pickle_module = request.getfixturevalue(pickle_module) mockfs = fs._MockFileSystem() format = ds.IpcFileFormat() @@ -2303,7 +2307,8 @@ def test_construct_from_single_file(tempdir, dataset_reader, pickle_module, requ @pytest.mark.parquet -def test_construct_from_single_directory(tempdir, dataset_reader, pickle_module, request): +def test_construct_from_single_directory(tempdir, dataset_reader, + pickle_module, request): pickle_module = request.getfixturevalue(pickle_module) directory = tempdir / 'single-directory' directory.mkdir() @@ -2514,7 +2519,8 @@ def _create_partitioned_dataset(basedir): @pytest.mark.parquet -def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_module, request): +def test_open_dataset_partitioned_directory(tempdir, dataset_reader, + pickle_module, request): pickle_module = request.getfixturevalue(pickle_module) full_table, path = _create_partitioned_dataset(tempdir) @@ -2681,7 +2687,8 @@ def expected_type(key): @pytest.mark.pandas -def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module, request): +def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module, + request): pickle_module = request.getfixturevalue(pickle_module) # https://issues.apache.org/jira/browse/ARROW-11400 table = pa.table({'part': np.repeat(['A', 'B'], 5), 'col': range(10)}) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index ae574f8fc1053..82071509c15f8 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1490,7 +1490,7 @@ def test_legacy_int_type(): batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext']) buf = ipc_write_batch(batch) - with pytest.warns((RuntimeWarning,FutureWarning)): + with pytest.warns((RuntimeWarning, FutureWarning)): batch = ipc_read_batch(buf) assert isinstance(batch.column(0).type, pa.UnknownExtensionType) From bf87bc52e77a07a89d6fccae1a5e163ea64e6e95 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 30 Jan 2024 11:43:12 +0100 Subject: [PATCH 3/7] Move getfixturevalue to the pickle_module fixture --- python/pyarrow/tests/conftest.py | 2 +- python/pyarrow/tests/parquet/test_dataset.py | 3 +- python/pyarrow/tests/test_array.py | 12 ++-- python/pyarrow/tests/test_compute.py | 9 +-- python/pyarrow/tests/test_csv.py | 12 ++-- python/pyarrow/tests/test_dataset.py | 71 +++++++------------- python/pyarrow/tests/test_extension_type.py | 15 ++--- python/pyarrow/tests/test_fs.py | 21 ++---- python/pyarrow/tests/test_io.py | 15 ++--- python/pyarrow/tests/test_json.py | 6 +- python/pyarrow/tests/test_scalars.py | 15 ++--- python/pyarrow/tests/test_schema.py | 3 +- python/pyarrow/tests/test_table.py | 9 +-- python/pyarrow/tests/test_types.py | 3 +- 14 files changed, 65 insertions(+), 131 deletions(-) diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 23ac18c7dd27c..0da757a4bc56e 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -264,7 +264,7 @@ def gcs_server(): scope='session' ) def pickle_module(request): - return request.param + return request.getfixturevalue(request.param) @pytest.fixture(scope='session') diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 191c4a823a222..f05f5641634ea 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1137,8 +1137,7 @@ def _make_dataset_for_pickling(tempdir, N=100): @pytest.mark.pandas -def test_pickle_dataset(tempdir, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_pickle_dataset(tempdir, pickle_module): def is_pickleable(obj): return obj == pickle_module.loads(pickle_module.dumps(obj)) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index fc9fddf8d8932..271c39a4555b9 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2029,8 +2029,7 @@ def test_cast_identities(ty, values): @pickle_test_parametrize -def test_array_pickle(data, typ, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_array_pickle(data, typ, pickle_module): # Allocate here so that we don't have any Arrow data allocated. # This is needed to ensure that allocator tests can be reliable. array = pa.array(data, type=typ) @@ -2039,8 +2038,7 @@ def test_array_pickle(data, typ, pickle_module, request): assert array.equals(result) -def test_array_pickle_dictionary(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_array_pickle_dictionary(pickle_module): # not included in the above as dictionary array cannot be created with # the pa.array function array = pa.DictionaryArray.from_arrays([0, 1, 2, 0, 1], ['a', 'b', 'c']) @@ -2056,16 +2054,14 @@ def test_array_pickle_dictionary(pickle_module, request): size=st.integers(min_value=0, max_value=10) ) ) -def test_pickling(arr, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_pickling(arr, pickle_module): data = pickle_module.dumps(arr) restored = pickle_module.loads(data) assert arr.equals(restored) @pickle_test_parametrize -def test_array_pickle_protocol5(data, typ, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_array_pickle_protocol5(data, typ, pickle_module): # Test zero-copy pickling with protocol 5 (PEP 574) array = pa.array(data, type=typ) addresses = [buf.address if buf is not None else 0 diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 0d1dd0df9b514..4b58dc65bae9b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -277,8 +277,7 @@ def test_call_function_with_memory_pool(): assert result3.equals(expected) -def test_pickle_functions(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_pickle_functions(pickle_module): # Pickle registered functions for name in pc.list_functions(): func = pc.get_function(name) @@ -289,8 +288,7 @@ def test_pickle_functions(pickle_module, request): assert reconstructed.num_kernels == func.num_kernels -def test_pickle_global_functions(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_pickle_global_functions(pickle_module): # Pickle global wrappers (manual or automatic) of registered functions for name in pc.list_functions(): try: @@ -3394,8 +3392,7 @@ def create_sample_expressions(): # Tests the Arrow-specific serialization mechanism -def test_expression_serialization_arrow(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_expression_serialization_arrow(pickle_module): for expr in create_sample_expressions()["all"]: assert isinstance(expr, pc.Expression) restored = pickle_module.loads(pickle_module.dumps(expr)) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index 99b9c2b1d6665..31f24187e3b37 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -127,8 +127,7 @@ def __ne__(self, other): other.result != self.result) -def test_read_options(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_read_options(pickle_module): cls = ReadOptions opts = cls() @@ -183,8 +182,7 @@ def test_read_options(pickle_module, request): opts.validate() -def test_parse_options(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_parse_options(pickle_module): cls = ParseOptions skip_handler = InvalidRowHandler('skip') @@ -244,8 +242,7 @@ def test_parse_options(pickle_module, request): opts.validate() -def test_convert_options(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_convert_options(pickle_module): cls = ConvertOptions opts = cls() @@ -627,8 +624,7 @@ def format_msg(msg_format, row, *args): read_options=read_options, convert_options=convert_options) - def test_invalid_row_handler(self, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + def test_invalid_row_handler(self, pickle_module): rows = b"a,b\nc\nd,e\nf,g,h\ni,j\n" parse_opts = ParseOptions() with pytest.raises( diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index d9b50a3c0e0dd..969c30aa59c1e 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -711,8 +711,7 @@ def test_partitioning(): assert load_back is None -def test_partitioning_pickling(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_partitioning_pickling(pickle_module): schema = pa.schema([ pa.field('i64', pa.int64()), pa.field('f64', pa.float64()) @@ -844,8 +843,7 @@ def test_parquet_scan_options(): assert opts7 != opts1 -def test_file_format_pickling(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_file_format_pickling(pickle_module): formats = [ ds.IpcFileFormat(), ds.CsvFileFormat(), @@ -884,8 +882,7 @@ def test_file_format_pickling(pickle_module, request): assert pickle_module.loads(pickle_module.dumps(file_format)) == file_format -def test_fragment_scan_options_pickling(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_fragment_scan_options_pickling(pickle_module): options = [ ds.CsvFragmentScanOptions(), ds.CsvFragmentScanOptions( @@ -1068,8 +1065,7 @@ def test_make_fragment_with_size(s3_example_simple): table = dataset_with_size.to_table() -def test_make_csv_fragment_from_buffer(dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_make_csv_fragment_from_buffer(dataset_reader, pickle_module): content = textwrap.dedent(""" alpha,num,animal a,12,dog @@ -1094,8 +1090,7 @@ def test_make_csv_fragment_from_buffer(dataset_reader, pickle_module, request): assert dataset_reader.to_table(pickled).equals(fragment.to_table()) -def test_make_json_fragment_from_buffer(dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_make_json_fragment_from_buffer(dataset_reader, pickle_module): content = '{"alpha" : "a", "num": 12, "animal" : "dog"}\n' + \ '{"alpha" : "b", "num": 11, "animal" : "cat"}\n' + \ '{"alpha" : "c", "num": 10, "animal" : "rabbit"}\n' @@ -1118,8 +1113,7 @@ def test_make_json_fragment_from_buffer(dataset_reader, pickle_module, request): @pytest.mark.parquet -def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_make_parquet_fragment_from_buffer(dataset_reader, pickle_module): arrays = [ pa.array(['a', 'b', 'c']), pa.array([12, 11, 10]), @@ -1222,8 +1216,7 @@ def test_fragments_implicit_cast(tempdir): @pytest.mark.parquet -def test_fragments_reconstruct(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_fragments_reconstruct(tempdir, dataset_reader, pickle_module): table, dataset = _create_dataset_for_fragments(tempdir) def assert_yields_projected(fragment, row_slice, @@ -1348,8 +1341,7 @@ def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader): @pytest.mark.parquet def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, - pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments( tempdir, chunk_size=2, filesystem=fs @@ -1391,8 +1383,7 @@ def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, @pytest.mark.parquet def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, - pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): # https://issues.apache.org/jira/browse/ARROW-15796 fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments(tempdir, filesystem=fs) @@ -1557,8 +1548,7 @@ def test_fragments_parquet_row_groups_predicate(tempdir): @pytest.mark.parquet def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader, - pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2) fragment = list(dataset.get_fragments())[0] @@ -1747,8 +1737,7 @@ def test_fragments_repr(tempdir, dataset): @pytest.mark.parquet @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory(mockfs, pickled, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_partitioning_factory(mockfs, pickled, pickle_module): paths_or_selector = fs.FileSelector('subdir', recursive=True) format = ds.ParquetFileFormat() @@ -1783,8 +1772,7 @@ def test_partitioning_factory(mockfs, pickled, pickle_module, request): @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) def test_partitioning_factory_dictionary(mockfs, infer_dictionary, pickled, - pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): paths_or_selector = fs.FileSelector('subdir', recursive=True) format = ds.ParquetFileFormat() options = ds.FileSystemFactoryOptions('subdir') @@ -1817,8 +1805,7 @@ def test_partitioning_factory_dictionary(mockfs, infer_dictionary, pickled, @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory_segment_encoding(pickled, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_partitioning_factory_segment_encoding(pickled, pickle_module): mockfs = fs._MockFileSystem() format = ds.IpcFileFormat() schema = pa.schema([("i64", pa.int64())]) @@ -1921,9 +1908,7 @@ def test_partitioning_factory_segment_encoding(pickled, pickle_module, request): @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, - pickle_module, - request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): mockfs = fs._MockFileSystem() format = ds.IpcFileFormat() schema = pa.schema([("i64", pa.int64())]) @@ -2221,15 +2206,13 @@ def _check_dataset_from_path(path, table, dataset_reader, pickler, **kwargs): @pytest.mark.parquet -def test_open_dataset_single_file(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_open_dataset_single_file(tempdir, dataset_reader, pickle_module): table, path = _create_single_file(tempdir) _check_dataset_from_path(path, table, dataset_reader, pickle_module) @pytest.mark.parquet -def test_deterministic_row_order(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_deterministic_row_order(tempdir, dataset_reader, pickle_module): # ARROW-8447 Ensure that dataset.to_table (and Scanner::ToTable) returns a # deterministic row ordering. This is achieved by constructing a single # parquet file with one row per RowGroup. @@ -2238,16 +2221,14 @@ def test_deterministic_row_order(tempdir, dataset_reader, pickle_module, request @pytest.mark.parquet -def test_open_dataset_directory(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_open_dataset_directory(tempdir, dataset_reader, pickle_module): tables, _ = _create_directory_of_files(tempdir) table = pa.concat_tables(tables) _check_dataset_from_path(tempdir, table, dataset_reader, pickle_module) @pytest.mark.parquet -def test_open_dataset_list_of_files(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_open_dataset_list_of_files(tempdir, dataset_reader, pickle_module): tables, (path1, path2) = _create_directory_of_files(tempdir) table = pa.concat_tables(tables) @@ -2286,8 +2267,7 @@ def test_open_dataset_filesystem_fspath(tempdir): @pytest.mark.parquet -def test_construct_from_single_file(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_construct_from_single_file(tempdir, dataset_reader, pickle_module): directory = tempdir / 'single-file' directory.mkdir() table, path = _create_single_file(directory) @@ -2308,8 +2288,7 @@ def test_construct_from_single_file(tempdir, dataset_reader, pickle_module, requ @pytest.mark.parquet def test_construct_from_single_directory(tempdir, dataset_reader, - pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): directory = tempdir / 'single-directory' directory.mkdir() tables, paths = _create_directory_of_files(directory) @@ -2520,8 +2499,7 @@ def _create_partitioned_dataset(basedir): @pytest.mark.parquet def test_open_dataset_partitioned_directory(tempdir, dataset_reader, - pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) + pickle_module): full_table, path = _create_partitioned_dataset(tempdir) # no partitioning specified, just read all individual files @@ -2588,8 +2566,7 @@ def test_open_dataset_unsupported_format(tempdir): @pytest.mark.parquet -def test_open_union_dataset(tempdir, dataset_reader, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_open_union_dataset(tempdir, dataset_reader, pickle_module): _, path = _create_single_file(tempdir) dataset = ds.dataset(path) @@ -2687,9 +2664,7 @@ def expected_type(key): @pytest.mark.pandas -def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module, - request): - pickle_module = request.getfixturevalue(pickle_module) +def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module): # https://issues.apache.org/jira/browse/ARROW-11400 table = pa.table({'part': np.repeat(['A', 'B'], 5), 'col': range(10)}) part = ds.partitioning(table.select(['part']).schema, flavor="hive") diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 82071509c15f8..d8c792ef00c6b 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -299,8 +299,7 @@ def test_ext_type_as_py(): assert result.as_py() == expected -def test_uuid_type_pickle(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_uuid_type_pickle(pickle_module): for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): ty = UuidType() ser = pickle_module.dumps(ty, protocol=proto) @@ -493,8 +492,7 @@ def test_ext_scalar_from_storage(): assert s.value == pa.scalar(b"0123456789abcdef", ty.storage_type) -def test_ext_array_pickling(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_ext_array_pickling(pickle_module): for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): ty = ParamExtType(3) storage = pa.array([b"foo", b"bar"], type=pa.binary(3)) @@ -936,8 +934,7 @@ def test_generic_ext_type_equality(): assert not period_type == period_type3 -def test_generic_ext_type_pickling(registered_period_type, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_generic_ext_type_pickling(registered_period_type, pickle_module): # GH-36038 for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): period_type, _ = registered_period_type @@ -946,8 +943,7 @@ def test_generic_ext_type_pickling(registered_period_type, pickle_module, reques assert period_type == period_type_pickled -def test_generic_ext_array_pickling(registered_period_type, pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_generic_ext_array_pickling(registered_period_type, pickle_module): for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1): period_type, _ = registered_period_type storage = pa.array([1, 2, 3, 4], pa.int64()) @@ -1446,8 +1442,7 @@ def test_extension_to_pandas_storage_type(registered_period_type): assert isinstance(result["ext"].dtype, pd.ArrowDtype) -def test_tensor_type_is_picklable(pickle_module, request): - pickle_module = request.getfixturevalue(pickle_module) +def test_tensor_type_is_picklable(pickle_module): # GH-35599 expected_type = pa.fixed_shape_tensor(pa.int32(), (2, 2)) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index b308b017835e5..3384867e44a45 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -570,8 +570,7 @@ def test_subtree_filesystem(): ' base_fs= Date: Tue, 30 Jan 2024 12:20:09 +0100 Subject: [PATCH 4/7] Add suggested changes --- python/pyarrow/tests/test_dataset.py | 15 +++++---------- python/pyarrow/tests/test_fs.py | 17 ++++++----------- python/pyarrow/tests/test_ipc.py | 5 ++--- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 969c30aa59c1e..a9054f0b174aa 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -1340,8 +1340,7 @@ def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader): @pytest.mark.parquet -def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, - pickle_module): +def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_module): fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments( tempdir, chunk_size=2, filesystem=fs @@ -1382,8 +1381,7 @@ def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, @pytest.mark.parquet -def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, - pickle_module): +def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, pickle_module): # https://issues.apache.org/jira/browse/ARROW-15796 fs, assert_opens = open_logging_fs _, dataset = _create_dataset_for_fragments(tempdir, filesystem=fs) @@ -1907,8 +1905,7 @@ def test_partitioning_factory_segment_encoding(pickled, pickle_module): @pytest.mark.parametrize( "pickled", [lambda x, m: x, lambda x, m: m.loads(m.dumps(x))]) -def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, - pickle_module): +def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, pickle_module): mockfs = fs._MockFileSystem() format = ds.IpcFileFormat() schema = pa.schema([("i64", pa.int64())]) @@ -2287,8 +2284,7 @@ def test_construct_from_single_file(tempdir, dataset_reader, pickle_module): @pytest.mark.parquet -def test_construct_from_single_directory(tempdir, dataset_reader, - pickle_module): +def test_construct_from_single_directory(tempdir, dataset_reader, pickle_module): directory = tempdir / 'single-directory' directory.mkdir() tables, paths = _create_directory_of_files(directory) @@ -2498,8 +2494,7 @@ def _create_partitioned_dataset(basedir): @pytest.mark.parquet -def test_open_dataset_partitioned_directory(tempdir, dataset_reader, - pickle_module): +def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_module): full_table, path = _create_partitioned_dataset(tempdir) # no partitioning specified, just read all individual files diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 3384867e44a45..ab10addfc3d4c 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -415,30 +415,26 @@ def py_fsspec_s3fs(request, s3_server): ), ]) def filesystem_config(request): - return request.param + return request.getfixturevalue(request.param) @pytest.fixture -def fs(filesystem_config, request): - filesystem_config = request.getfixturevalue(filesystem_config) +def fs(filesystem_config): return filesystem_config['fs'] @pytest.fixture -def pathfn(filesystem_config, request): - filesystem_config = request.getfixturevalue(filesystem_config) +def pathfn(filesystem_config): return filesystem_config['pathfn'] @pytest.fixture -def allow_move_dir(filesystem_config, request): - filesystem_config = request.getfixturevalue(filesystem_config) +def allow_move_dir(filesystem_config): return filesystem_config['allow_move_dir'] @pytest.fixture -def allow_append_to_file(filesystem_config, request): - filesystem_config = request.getfixturevalue(filesystem_config) +def allow_append_to_file(filesystem_config): return filesystem_config['allow_append_to_file'] @@ -546,8 +542,7 @@ def test_filesystem_equals(): assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/other', fs0) -def test_filesystem_equals_none(fs, request): - fs = request.getfixturevalue(fs) +def test_filesystem_equals_none(fs): with pytest.raises(TypeError, match="got NoneType"): fs.equals(None) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index 20edaae2aa6c2..407011d90b734 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -151,7 +151,7 @@ def stream_fixture(): ) ]) def format_fixture(request): - return request.param + return request.getfixturevalue(request.param) def test_empty_file(): @@ -590,8 +590,7 @@ def test_read_options_included_fields(stream_fixture): assert result1 == table -def test_dictionary_delta(format_fixture, request): - format_fixture = request.getfixturevalue(format_fixture) +def test_dictionary_delta(format_fixture): ty = pa.dictionary(pa.int8(), pa.utf8()) data = [["foo", "foo", None], ["foo", "bar", "foo"], # potential delta From c740098c32c3b8ae5d4d989a0a0e945b4fc1b4c1 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 30 Jan 2024 12:24:00 +0100 Subject: [PATCH 5/7] Remove minor leftover --- python/pyarrow/tests/parquet/test_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index f05f5641634ea..b6e351bdef9a7 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1138,7 +1138,6 @@ def _make_dataset_for_pickling(tempdir, N=100): @pytest.mark.pandas def test_pickle_dataset(tempdir, pickle_module): - def is_pickleable(obj): return obj == pickle_module.loads(pickle_module.dumps(obj)) From e4146cb26a86c2f2696030bab3e367f4fea7c325 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 1 Feb 2024 10:26:56 +0100 Subject: [PATCH 6/7] Keep the pin for pytest --- ci/conda_env_python.txt | 2 +- python/requirements-test.txt | 2 +- python/requirements-wheel-test.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 0389d538229e8..59e2def1bf339 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -23,7 +23,7 @@ cloudpickle fsspec hypothesis numpy>=1.16.6 -pytest +pytest<8 pytest-faulthandler s3fs>=2023.10.0 setuptools diff --git a/python/requirements-test.txt b/python/requirements-test.txt index 975477c4223dd..2108d70a543f5 100644 --- a/python/requirements-test.txt +++ b/python/requirements-test.txt @@ -1,5 +1,5 @@ cffi hypothesis pandas -pytest +pytest<8 pytz diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt index 46bedc13ba1a7..a1046bc18c704 100644 --- a/python/requirements-wheel-test.txt +++ b/python/requirements-wheel-test.txt @@ -1,7 +1,7 @@ cffi cython hypothesis -pytest +pytest<8 pytz tzdata; sys_platform == 'win32' From 702b73f6e361274a8633e0382e464ca80ed1f474 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 1 Feb 2024 14:03:33 +0100 Subject: [PATCH 7/7] Update python/pyarrow/tests/test_array.py --- python/pyarrow/tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 271c39a4555b9..f851d4e0b6c29 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2054,7 +2054,7 @@ def test_array_pickle_dictionary(pickle_module): size=st.integers(min_value=0, max_value=10) ) ) -def test_pickling(arr, pickle_module): +def test_pickling(pickle_module, arr): data = pickle_module.dumps(arr) restored = pickle_module.loads(data) assert arr.equals(restored)