diff --git a/docs/redirects.json b/docs/redirects.json index a54ffb1d81..19d3482d8a 100644 --- a/docs/redirects.json +++ b/docs/redirects.json @@ -120,7 +120,7 @@ "_auto/ak.to_arrow.any-ext": "../reference/generated/ak.to_arrow.html", "_auto/ak.to_arrow_table.any-ext": "../reference/generated/ak.to_arrow_table.html", "_auto/ak.to_buffers.any-ext": "../reference/generated/ak.to_buffers.html", - "_auto/ak.to_categorical.any-ext": "../reference/generated/ak.to_categorical.html", + "_auto/ak.to_categorical.any-ext": "../reference/generated/ak.str.to_categorical.html", "_auto/ak.to_cupy.any-ext": "../reference/generated/ak.to_cupy.html", "_auto/ak.to_jax.any-ext": "../reference/generated/ak.to_jax.html", "_auto/ak.to_json.any-ext": "../reference/generated/ak.to_json.html", diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index 287471ba6e..4ae6c64722 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -232,10 +232,10 @@ .. toctree:: :caption: Arrays of categorical data - generated/ak.to_categorical generated/ak.from_categorical generated/ak.is_categorical generated/ak.categories + generated/ak.str.to_categorical .. toctree:: :caption: Indexing and grouping diff --git a/docs/user-guide/how-to-create-strings.md b/docs/user-guide/how-to-create-strings.md index 9cce330bb1..3e9b5ca4a5 100644 --- a/docs/user-guide/how-to-create-strings.md +++ b/docs/user-guide/how-to-create-strings.md @@ -95,7 +95,7 @@ Categorical strings A large set of strings with few unique values are more efficiently manipulated as integers than as strings. In Pandas, this is [categorical data](https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html), in R, it's called a [factor](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/factor), and in Arrow and Parquet, it's [dictionary encoding](https://arrow.apache.org/blog/2019/09/05/faster-strings-cpp-parquet/). -The {func}`ak.to_categorical` function makes Awkward Arrays categorical in this sense. {func}`ak.to_arrow` and {func}`ak.to_parquet` recognize categorical data and convert it to the corresponding Arrow and Parquet types. +The {func}`ak.str.to_categorical` (requires PyArrow) function makes Awkward Arrays categorical in this sense. {func}`ak.to_arrow` and {func}`ak.to_parquet` recognize categorical data and convert it to the corresponding Arrow and Parquet types. ```{code-cell} ipython3 uncategorized = ak.Array(["three", "one", "two", "two", "three", "one", "one", "one"]) @@ -103,7 +103,7 @@ uncategorized ``` ```{code-cell} ipython3 -categorized = ak.to_categorical(uncategorized) +categorized = ak.str.to_categorical(uncategorized) categorized ``` diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index 1e920ebc5d..0792544c32 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -78,7 +78,6 @@ from awkward.operations.ak_to_arrow_table import * from awkward.operations.ak_to_backend import * from awkward.operations.ak_to_buffers import * -from awkward.operations.ak_to_categorical import * from awkward.operations.ak_to_cupy import * from awkward.operations.ak_to_dataframe import * from awkward.operations.ak_to_feather import * diff --git a/src/awkward/operations/ak_categories.py b/src/awkward/operations/ak_categories.py index d507920c9d..7f36896b9e 100644 --- a/src/awkward/operations/ak_categories.py +++ b/src/awkward/operations/ak_categories.py @@ -18,7 +18,7 @@ def categories(array, highlevel=True): #ak.contents.IndexedOptionArray labeled with parameter `"__array__" = "categorical"`), then this function returns its categories. - See also #ak.is_categorical, #ak.to_categorical, #ak.from_categorical. + See also #ak.is_categorical, #ak.str.to_categorical, #ak.from_categorical. """ # Dispatch yield (array,) diff --git a/src/awkward/operations/ak_from_categorical.py b/src/awkward/operations/ak_from_categorical.py index c7f9640325..ed798cd3cc 100644 --- a/src/awkward/operations/ak_from_categorical.py +++ b/src/awkward/operations/ak_from_categorical.py @@ -23,8 +23,7 @@ def from_categorical(array, *, highlevel=True, behavior=None): size of the dataset. (Conversion to categorical is expensive; conversion from categorical is cheap.) - See also #ak.is_categorical, #ak.categories, #ak.to_categorical, - #ak.str.to_categorical, #ak.from_categorical. + See also #ak.is_categorical, #ak.categories, #ak.str.to_categorical. """ # Dispatch yield (array,) diff --git a/src/awkward/operations/ak_is_categorical.py b/src/awkward/operations/ak_is_categorical.py index 7c4e3f8e1e..d23c75a2b7 100644 --- a/src/awkward/operations/ak_is_categorical.py +++ b/src/awkward/operations/ak_is_categorical.py @@ -15,7 +15,7 @@ def is_categorical(array): `"__array__" = "categorical"`), then this function returns True; otherwise, it returns False. - See also #ak.categories, #ak.to_categorical, #ak.from_categorical. + See also #ak.categories, #ak.str.to_categorical, #ak.from_categorical. """ # Dispatch yield (array,) diff --git a/src/awkward/operations/ak_to_categorical.py b/src/awkward/operations/ak_to_categorical.py deleted file mode 100644 index 87eb7f0aff..0000000000 --- a/src/awkward/operations/ak_to_categorical.py +++ /dev/null @@ -1,160 +0,0 @@ -# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE -__all__ = ("to_categorical",) -import awkward as ak -from awkward._behavior import behavior_of -from awkward._categorical import as_hashable -from awkward._dispatch import high_level_function -from awkward._errors import deprecate -from awkward._layout import wrap_layout -from awkward._nplikes.numpy import Numpy -from awkward._nplikes.numpy_like import NumpyMetadata - -np = NumpyMetadata.instance() -numpy = Numpy.instance() - - -@high_level_function() -def to_categorical(array, *, highlevel=True, behavior=None): - """ - Args: - array: Array-like data (anything #ak.to_layout recognizes). - highlevel (bool): If True, return an #ak.Array; otherwise, return - a low-level #ak.contents.Content subclass. - behavior (None or dict): Custom #ak.behavior for the output array, if - high-level. - - Creates a categorical dataset, which has the following properties: - - * only distinct values (categories) are stored in their entirety, - * pointers to those distinct values are represented by integers - (an #ak.contents.IndexedArray or #ak.contents.IndexedOptionArray - labeled with parameter `"__array__" = "categorical"`. - - This is equivalent to R's "factor", Pandas's "categorical", and - Arrow/Parquet's "dictionary encoding." It differs from generic uses of - #ak.contents.IndexedArray and #ak.contents.IndexedOptionArray in Awkward - Arrays by the guarantee of no duplicate categories and the `"categorical"` - parameter. - - >>> array = ak.Array([["one", "two", "three"], [], ["three", "two"]]) - >>> categorical = ak.to_categorical(array) - >>> categorical - - >>> categorical.type.show() - 3 * var * categorical[type=string] - >>> categorical.to_list() == array.to_list() - True - >>> ak.categories(categorical) - - >>> ak.is_categorical(categorical) - True - >>> ak.from_categorical(categorical) - - - This function descends through nested lists, but not into the fields of - records, so records can be categories. To make categorical record - fields, split up the record, apply this function to each desired field, - and #ak.zip the results together. - - >>> records = ak.Array([ - ... {"x": 1.1, "y": "one"}, - ... {"x": 2.2, "y": "two"}, - ... {"x": 3.3, "y": "three"}, - ... {"x": 2.2, "y": "two"}, - ... {"x": 1.1, "y": "one"} - ... ]) - >>> records - - >>> categorical_records = ak.zip({ - ... "x": ak.to_categorical(records["x"]), - ... "y": ak.to_categorical(records["y"]), - ... }) - >>> categorical_records - - >>> categorical_records.type.show() - 5 * { - x: categorical[type=float64], - y: categorical[type=string] - } - >>> categorical_records.to_list() == records.to_list() - True - - The check for uniqueness is currently implemented in a Python loop, so - conversion to categorical should be regarded as expensive. (This can - change, but it would always be an _n log(n)_ operation.) - - See also #ak.is_categorical, #ak.categories, #ak.from_categorical. - """ - # Dispatch - yield (array,) - - # Implementation - return _impl(array, highlevel, behavior) - - -def _impl(array, highlevel, behavior): - deprecate( - "The general purpose `ak.to_categorical` has been replaced by `ak.str.to_categorical`", - "2.5.0", - ) - - def action(layout, **kwargs): - if layout.purelist_depth == 1: - if layout.is_indexed and layout.is_option: - content = layout.content - cls = ak.contents.IndexedOptionArray - elif layout.is_indexed: - content = layout.content - cls = ak.contents.IndexedArray - elif layout.is_option: - content = layout.content - cls = ak.contents.IndexedOptionArray - else: - content = layout - cls = ak.contents.IndexedArray - - content_list = ak.operations.to_list(content) - hashable = [as_hashable(x) for x in content_list] - - lookup = {} - is_first = numpy.empty(len(hashable), dtype=np.bool_) - mapping = numpy.empty(len(hashable), dtype=np.int64) - for i, x in enumerate(hashable): - if x in lookup: - is_first[i] = False - mapping[i] = lookup[x] - else: - is_first[i] = True - lookup[x] = j = len(lookup) - mapping[i] = j - - if layout.is_indexed and layout.is_option: - original_index = numpy.asarray(layout.index.data) - index = mapping[original_index] - index[original_index < 0] = -1 - index = ak.index.Index64(index) - - elif layout.is_indexed: - original_index = numpy.asarray(layout.index.data) - index = ak.index.Index64(mapping[original_index]) - - elif layout.is_option: - mask = numpy.asarray(layout.mask_as_bool(valid_when=False)) - mapping[mask.view(np.bool_)] = -1 - index = ak.index.Index64(mapping) - - else: - index = ak.index.Index64(mapping) - - out = cls(index, content[is_first], parameters={"__array__": "categorical"}) - return out - - else: - return None - - layout = ak.operations.to_layout( - array, allow_record=False, allow_unknown=False, primitive_policy="error" - ) - behavior = behavior_of(array, behavior=behavior) - out = ak._do.recursively_apply(layout, action, behavior) - return wrap_layout(out, behavior, highlevel) diff --git a/tests/test_0401_add_categorical_type_for_arrow_dictionary.py b/tests/test_0401_add_categorical_type_for_arrow_dictionary.py index 0419d5963f..6144439b5f 100644 --- a/tests/test_0401_add_categorical_type_for_arrow_dictionary.py +++ b/tests/test_0401_add_categorical_type_for_arrow_dictionary.py @@ -206,273 +206,6 @@ def test_option_two_extra(): ] -def test_to_categorical_numbers(): - array = ak.Array([1.1, 2.2, 3.3, 1.1, 2.2, 3.3, 1.1, 2.2, 3.3]) - assert not ak.operations.ak_is_categorical.is_categorical(array) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) - assert ak.operations.ak_is_categorical.is_categorical(categorical) - assert to_list(array) == categorical.to_list() - assert to_list(categorical.layout.content) == [1.1, 2.2, 3.3] - not_categorical = ak.operations.ak_from_categorical.from_categorical(categorical) - assert not ak.operations.ak_is_categorical.is_categorical(not_categorical) - assert ak.operations.ak_categories.categories(categorical).to_list() == [ - 1.1, - 2.2, - 3.3, - ] - - -def test_to_categorical_nested(): - array = ak.Array([["one", "two", "three"], [], ["one", "two"], ["three"]]) - assert not ak.operations.ak_is_categorical.is_categorical(array) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) - assert ak.operations.ak_is_categorical.is_categorical(categorical) - assert to_list(array) == categorical.to_list() - not_categorical = ak.operations.ak_from_categorical.from_categorical(categorical) - assert not ak.operations.ak_is_categorical.is_categorical(not_categorical) - assert ak.operations.ak_categories.categories(categorical).to_list() == [ - "one", - "two", - "three", - ] - - -def test_to_categorical(): - array = ak.Array( - ["one", "two", "three", "one", "two", "three", "one", "two", "three"] - ) - assert not ak.operations.ak_is_categorical.is_categorical(array) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) - assert ak.operations.ak_is_categorical.is_categorical(categorical) - assert to_list(array) == categorical.to_list() - assert to_list(categorical.layout.content) == ["one", "two", "three"] - not_categorical = ak.operations.ak_from_categorical.from_categorical(categorical) - assert not ak.operations.ak_is_categorical.is_categorical(not_categorical) - assert ak.operations.ak_categories.categories(categorical).to_list() == [ - "one", - "two", - "three", - ] - - -def test_to_categorical_none(): - array = ak.Array( - [ - "one", - "two", - "three", - None, - "one", - "two", - "three", - None, - "one", - "two", - "three", - None, - ] - ) - assert not ak.operations.ak_is_categorical.is_categorical(array) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) - assert ak.operations.ak_is_categorical.is_categorical(categorical) - assert to_list(array) == categorical.to_list() - assert to_list(categorical.layout.content) == ["one", "two", "three"] - not_categorical = ak.operations.ak_from_categorical.from_categorical(categorical) - assert not ak.operations.ak_is_categorical.is_categorical(not_categorical) - assert ak.operations.ak_categories.categories(categorical).to_list() == [ - "one", - "two", - "three", - ] - - -def test_to_categorical_masked(): - content = ak.Array( - [ - "one", - "two", - "three", - "one", - "one", - "two", - "three", - "two", - "one", - "two", - "three", - "three", - ] - ).layout - mask = ak.index.Index8( - np.array( - [ - False, - False, - False, - True, - False, - False, - False, - True, - False, - False, - False, - True, - ] - ) - ) - array = ak.Array(ak.contents.ByteMaskedArray(mask, content, valid_when=False)) - assert not ak.operations.ak_is_categorical.is_categorical(array) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) - assert ak.operations.ak_is_categorical.is_categorical(categorical) - assert to_list(array) == categorical.to_list() - assert to_list(categorical.layout.content) == ["one", "two", "three"] - not_categorical = ak.operations.ak_from_categorical.from_categorical(categorical) - assert not ak.operations.ak_is_categorical.is_categorical(not_categorical) - assert ak.operations.ak_categories.categories(categorical).to_list() == [ - "one", - "two", - "three", - ] - - -def test_to_categorical_masked_again(): - content = ak.Array( - ["one", "two", "three", "one", "one", "two", "three", "two"] - ).layout - index = ak.index.Index64( - np.array([0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3], dtype=np.int64) - ) - indexedarray = ak.contents.IndexedArray(index, content) - mask = ak.index.Index8( - np.array( - [ - False, - False, - False, - True, - False, - False, - False, - True, - False, - False, - False, - True, - ] - ) - ) - array = ak.Array( - ak.contents.ByteMaskedArray.simplified(mask, indexedarray, valid_when=False) - ) - assert not ak.operations.ak_is_categorical.is_categorical(array) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) - assert ak.operations.ak_is_categorical.is_categorical(categorical) - assert to_list(array) == categorical.to_list() - assert to_list(categorical.layout.content) == ["one", "two", "three"] - not_categorical = ak.operations.ak_from_categorical.from_categorical(categorical) - assert not ak.operations.ak_is_categorical.is_categorical(not_categorical) - assert ak.operations.ak_categories.categories(categorical).to_list() == [ - "one", - "two", - "three", - ] - - -@pytest.mark.skip(reason="Fix issues for categorical type") -def test_typestr(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - assert ( - str( - ak.operations.type( - ak.operations.ak_to_categorical.to_categorical( - ak.Array([1.1, 2.2, 2.2, 3.3]) - ) - ) - ) - == "4 * categorical[type=float64]" - ) - - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - assert ( - str( - ak.operations.type( - ak.operations.ak_to_categorical.to_categorical( - ak.Array([1.1, 2.2, None, 2.2, 3.3]) - ) - ) - ) - == "5 * categorical[type=?float64]" - ) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - assert ( - str( - ak.operations.type( - ak.operations.ak_to_categorical.to_categorical( - ak.Array(["one", "two", "two", "three"]) - ) - ) - ) - == "4 * categorical[type=string]" - ) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - assert ( - str( - ak.operations.type( - ak.operations.ak_to_categorical.to_categorical( - ak.Array(["one", "two", None, "two", "three"]) - ) - ) - ) - == "5 * categorical[type=?string]" - ) - - -def test_zip(): - x = ak.Array([1.1, 2.2, 3.3]) - y = ak.Array(["one", "two", "three"]) - assert ak.zip({"x": x, "y": y}).to_list() == [ - {"x": 1.1, "y": "one"}, - {"x": 2.2, "y": "two"}, - {"x": 3.3, "y": "three"}, - ] - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - y = ak.operations.ak_to_categorical.to_categorical(y) - assert ak.zip({"x": x, "y": y}).to_list() == [ - {"x": 1.1, "y": "one"}, - {"x": 2.2, "y": "two"}, - {"x": 3.3, "y": "three"}, - ] - - pyarrow = pytest.importorskip("pyarrow") diff --git a/tests/test_0404_array_validity_check.py b/tests/test_0404_array_validity_check.py index 1f2f7bae8f..d972e41f8d 100644 --- a/tests/test_0404_array_validity_check.py +++ b/tests/test_0404_array_validity_check.py @@ -212,11 +212,10 @@ def test_subranges_equal(): def test_categorical(): + pytest.importorskip("pyarrow") + array = ak.highlevel.Array(["1chchc", "1chchc", "2sss", "3", "4", "5"]) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(array) + categorical = ak.str.to_categorical(array) assert ak.operations.is_valid(categorical) is True assert ak._do.is_unique(categorical.layout) is False diff --git a/tests/test_0674_categorical_validation.py b/tests/test_0674_categorical_validation.py index 7b04b715a7..99e1ccf4e6 100644 --- a/tests/test_0674_categorical_validation.py +++ b/tests/test_0674_categorical_validation.py @@ -11,10 +11,7 @@ def test_categorical_is_valid(): # validate a categorical array by its content arr = ak.Array([2019, 2020, 2021, 2020, 2019]) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - categorical = ak.operations.ak_to_categorical.to_categorical(arr) + categorical = ak.str.to_categorical(arr) assert ak.operations.is_valid(categorical) diff --git a/tests/test_0773_typeparser.py b/tests/test_0773_typeparser.py index aef56b3395..d95fc327e3 100644 --- a/tests/test_0773_typeparser.py +++ b/tests/test_0773_typeparser.py @@ -235,28 +235,22 @@ def test_arraytype_bytestring(): def test_arraytype_categorical_1(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - text = str( - ak.operations.ak_to_categorical.to_categorical( - ak.Array(["one", "one", "two", "three", "one", "three"]) - ).type - ) + pytest.importorskip("pyarrow") + + text = str( + ak.str.to_categorical( + ak.Array(["one", "one", "two", "three", "one", "three"]) + ).type + ) parsedtype = ak.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak.types.ArrayType) assert str(parsedtype) == text def test_arraytype_categorical_2(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - text = str( - ak.operations.ak_to_categorical.to_categorical( - ak.Array([1.1, 1.1, 2.2, 3.3, 1.1, 3.3]) - ).type - ) + pytest.importorskip("pyarrow") + + text = str(ak.str.to_categorical(ak.Array([1.1, 1.1, 2.2, 3.3, 1.1, 3.3])).type) parsedtype = ak.types.from_datashape(text, highlevel=True) assert isinstance(parsedtype, ak.types.ArrayType) assert str(parsedtype) == text diff --git a/tests/test_1671_categorical_type.py b/tests/test_1671_categorical_type.py index ea8f9def64..2eae11179e 100644 --- a/tests/test_1671_categorical_type.py +++ b/tests/test_1671_categorical_type.py @@ -6,11 +6,10 @@ def test_to_categorical(): + pytest.importorskip("pyarrow") + array1 = ak.Array(["one", "two", "one", "one"]) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - array2 = ak.operations.ak_to_categorical.to_categorical(array1) + array2 = ak.str.to_categorical(array1) assert array1.type != array2.type assert array2.type == ak.types.ArrayType( ak.types.ListType( @@ -23,11 +22,9 @@ def test_to_categorical(): def test_categorical_type(): pytest.importorskip("pyarrow") + array1 = ak.Array(["one", "two", "one", "one"]) - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - array2 = ak.to_categorical(array1) + array2 = ak.str.to_categorical(array1) assert array1.type != array2.type assert array2.type == ak.types.ArrayType( ak.types.ListType( diff --git a/tests/test_1688_pack_categorical.py b/tests/test_1688_pack_categorical.py index 73ad5d2c3a..c9f6ab46ea 100644 --- a/tests/test_1688_pack_categorical.py +++ b/tests/test_1688_pack_categorical.py @@ -9,10 +9,9 @@ def test(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - this = ak.to_categorical(["one", "two", "one", "three", "one", "four"]) + pytest.importorskip("pyarrow") + + this = ak.str.to_categorical(["one", "two", "one", "three", "one", "four"]) assert ak.is_categorical(this) # Ensure packing by itself doesn't change the type this_packed = ak.to_packed(this) diff --git a/tests/test_2071_unflatten_non_packed_counts.py b/tests/test_2071_unflatten_non_packed_counts.py index 04ecf2caee..470e164530 100644 --- a/tests/test_2071_unflatten_non_packed_counts.py +++ b/tests/test_2071_unflatten_non_packed_counts.py @@ -36,10 +36,9 @@ def test_option_counts(): def test_categorical_counts(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - assert ak.almost_equal( - ak.unflatten([1.1, 2.2, 3.3, 4.4, 5.5], ak.to_categorical([3, 0, 2])), - [[1.1, 2.2, 3.3], [], [4.4, 5.5]], - ) + pytest.importorskip("pyarrow") + + assert ak.almost_equal( + ak.unflatten([1.1, 2.2, 3.3, 4.4, 5.5], ak.str.to_categorical([3, 0, 2])), + [[1.1, 2.2, 3.3], [], [4.4, 5.5]], + ) diff --git a/tests/test_2425_forms_from_type.py b/tests/test_2425_forms_from_type.py index 9d9d61e35d..77a0d5f167 100644 --- a/tests/test_2425_forms_from_type.py +++ b/tests/test_2425_forms_from_type.py @@ -23,26 +23,16 @@ def test_regular(): def test_categorical(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - array = ak.to_categorical( - [ - 1, - 1, - 2, - 1, - 1, - ] - ) + pytest.importorskip("pyarrow") + + array = ak.str.to_categorical(["do", "re", "mi", "fa", "so"]) form_from_type = ak.forms.from_type(array.type.content) assert form_from_type == array.layout.form def test_categorical_option(): - with pytest.warns( - DeprecationWarning, match=r"has been replaced by.*ak\.str\.to_categorical" - ): - array = ak.to_categorical([1, 1, 2, 1, 1, None]) + pytest.importorskip("pyarrow") + + array = ak.str.to_categorical(["do", "re", "mi", "fa", "so", None]) form_from_type = ak.forms.from_type(array.type.content) assert form_from_type == array.layout.form diff --git a/tests/test_2630_akstr_to_categorical.py b/tests/test_2630_akstr_to_categorical.py index ac5999ff49..0cea79447d 100644 --- a/tests/test_2630_akstr_to_categorical.py +++ b/tests/test_2630_akstr_to_categorical.py @@ -8,11 +8,6 @@ pytest.importorskip("pyarrow") -# These tests are copied from `test_0401_add_categorical_type_for_arrow_dictionary.py` -# Once `ak.to_categorical` is no-longer deprecated, the old tests can be deleted -# Other tests that check categorical properties (but not to_categorical) should be converted to use `ak.str.to_categorical` - - def test_to_categorical_nested(): array = ak.Array([["one", "two", "three"], [], ["one", "two"], ["three"]]) assert not ak.operations.ak_is_categorical.is_categorical(array)