diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index ea9f9abc4a4c7..da2fa304e5b07 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -9,7 +9,6 @@ ) from pandas.core.internals.blocks import ( # io.pytables, io.packers Block, - CategoricalBlock, DatetimeBlock, DatetimeTZBlock, ExtensionBlock, @@ -28,7 +27,6 @@ __all__ = [ "Block", - "CategoricalBlock", "NumericBlock", "DatetimeBlock", "DatetimeTZBlock", @@ -48,3 +46,18 @@ "create_block_manager_from_arrays", "create_block_manager_from_blocks", ] + + +def __getattr__(name: str): + import warnings + + if name == "CategoricalBlock": + warnings.warn( + "CategoricalBlock is deprecated and will be removed in a future version. " + "Use ExtensionBlock instead.", + FutureWarning, + stacklevel=2, + ) + return ExtensionBlock + + raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 99e54bace8915..174ea8760b0db 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -673,6 +673,7 @@ def copy(self, deep: bool = True): # --------------------------------------------------------------------- # Replace + @final def replace( self, to_replace, @@ -687,6 +688,23 @@ def replace( """ inplace = validate_bool_kwarg(inplace, "inplace") + # Note: the checks we do in NDFrame.replace ensure we never get + # here with listlike to_replace or value, as those cases + # go through _replace_list + + values = self.values + + if isinstance(values, Categorical): + # TODO: avoid special-casing + blk = self if inplace else self.copy() + blk.values.replace(to_replace, value, inplace=True) + return [blk] + + regex = should_use_regex(regex, to_replace) + + if regex: + return self._replace_regex(to_replace, value, inplace=inplace) + if not self._can_hold_element(to_replace): # We cannot hold `to_replace`, so we know immediately that # replacing it is a no-op. @@ -694,8 +712,6 @@ def replace( # replace_list instead of replace. return [self] if inplace else [self.copy()] - values = self.values - mask = missing.mask_missing(values, to_replace) if not mask.any(): # Note: we get here with test_replace_extension_other incorrectly @@ -720,7 +736,7 @@ def replace( else: # split so that we only upcast where necessary return self.split_and_operate( - type(self).replace, to_replace, value, inplace=inplace, regex=regex + type(self).replace, to_replace, value, inplace=True, regex=regex ) @final @@ -1223,7 +1239,7 @@ def take_nd( Take values according to indexer and return them as a block.bb """ - # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock + # algos.take_nd dispatches for DatetimeTZBlock # so need to preserve types # sparse is treated like an ndarray, but needs .get_values() shaping @@ -1422,7 +1438,7 @@ class ExtensionBlock(Block): Notes ----- This holds all 3rd-party extension array types. It's also the immediate - parent class for our internal extension types' blocks, CategoricalBlock. + parent class for our internal extension types' blocks. ExtensionArrays are limited to 1-D. """ @@ -1579,7 +1595,6 @@ def take_nd( def _can_hold_element(self, element: Any) -> bool: # TODO: We may need to think about pushing this onto the array. - # We're doing the same as CategoricalBlock here. return True def _slice(self, slicer): @@ -2019,41 +2034,6 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: def _can_hold_element(self, element: Any) -> bool: return True - def replace( - self, - to_replace, - value, - inplace: bool = False, - regex: bool = False, - ) -> List[Block]: - # Note: the checks we do in NDFrame.replace ensure we never get - # here with listlike to_replace or value, as those cases - # go through _replace_list - - regex = should_use_regex(regex, to_replace) - - if regex: - return self._replace_regex(to_replace, value, inplace=inplace) - else: - return super().replace(to_replace, value, inplace=inplace, regex=False) - - -class CategoricalBlock(ExtensionBlock): - __slots__ = () - - def replace( - self, - to_replace, - value, - inplace: bool = False, - regex: bool = False, - ) -> List[Block]: - inplace = validate_bool_kwarg(inplace, "inplace") - result = self if inplace else self.copy() - - result.values.replace(to_replace, value, inplace=True) - return [result] - # ----------------------------------------------------------------- # Constructor Helpers @@ -2116,7 +2096,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): # Need this first(ish) so that Sparse[datetime] is sparse cls = ExtensionBlock elif isinstance(dtype, CategoricalDtype): - cls = CategoricalBlock + cls = ExtensionBlock elif vtype is Timestamp: cls = DatetimeTZBlock elif vtype is Interval or vtype is Period: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ea264da4c7b5f..da78fc5dfba76 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -67,7 +67,6 @@ ) from pandas.core.internals.blocks import ( Block, - CategoricalBlock, DatetimeTZBlock, ExtensionBlock, ObjectValuesExtensionBlock, @@ -1867,13 +1866,6 @@ def _form_blocks( object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) blocks.extend(object_blocks) - if len(items_dict["CategoricalBlock"]) > 0: - cat_blocks = [ - new_block(array, klass=CategoricalBlock, placement=i, ndim=2) - for i, array in items_dict["CategoricalBlock"] - ] - blocks.extend(cat_blocks) - if len(items_dict["ExtensionBlock"]): external_blocks = [ new_block(array, klass=ExtensionBlock, placement=i, ndim=2) diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index d4630b20db85f..0665a07c482f9 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -26,7 +26,6 @@ def test_namespace(): ] expected = [ "Block", - "CategoricalBlock", "NumericBlock", "DatetimeBlock", "DatetimeTZBlock", diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index e530f3e37883a..21d55e40a07fb 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -258,6 +258,7 @@ def test_read_expands_user_home_dir( ), ], ) + @pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning") def test_read_fspath_all(self, reader, module, path, datapath): pytest.importorskip(module) path = datapath(*path) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index ab0b3b08a11e8..162094428dbc0 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -20,6 +20,7 @@ @filter_sparse @pytest.mark.single +@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning") class TestFeather: def check_error_on_write(self, df, exc, err_msg): # check that we are raising the exception diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3ef77d2fbacd0..f3cfa033409cb 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -573,6 +573,7 @@ def test_write_column_index_nonstring(self, pa): self.check_error_on_write(df, engine, ValueError, msg) +@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full):