pandas-dev · cgangwar11 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -567,9 +567,11 @@ Sparse
 ExtensionArray
 ^^^^^^^^^^^^^^
 
+
 - Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`)
 - Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`)
 - Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`)
+- Fixed bug in :meth:`Dataframe.replace` not working for ``regex=True`` and ``dtype='string'`` Now working as expected (:issue:`35977`)
 - Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`)
 - Reductions for :class:`BooleanArray`, :class:`Categorical`, :class:`DatetimeArray`, :class:`FloatingArray`, :class:`IntegerArray`, :class:`PeriodArray`, :class:`TimedeltaArray`, and :class:`PandasArray` are now keyword-only methods (:issue:`37541`)
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -2027,6 +2027,32 @@ def _unstack(self, unstacker, fill_value, new_placement):
         ]
         return blocks, mask
 
+    def replace(
+        self,
+        to_replace,
+        value,
+        inplace: bool = False,
+        regex: bool = False
+    ):
+        """
+        replace the to_replace value with value, regex is not supported by super class
+        when regex is required ObjectBlock replace method is called
+        """
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        regex = validate_bool_kwarg(regex, "regex")
+        if regex:
+            dtype = self.values.dtype
+            block = self.astype(object)
+            if not inplace:
+                return [
+                    b.astype(dtype)
+                    for b in block.replace(to_replace, value, inplace, regex)
+                ]
+            block.replace(to_replace, value, inplace, regex)
+            return block.astype(dtype)
+        else:
+            return super().replace(to_replace, value, inplace, regex)
+
 
 class ObjectValuesExtensionBlock(ExtensionBlock):
     """

diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
@@ -71,4 +71,4 @@ def test_replace2(to_replace, value, result, expected_error_msg):
             # ensure non-inplace call does not affect original
             tm.assert_categorical_equal(cat, expected)
     cat.replace(to_replace, value, inplace=True)
-    tm.assert_categorical_equal(cat, expected)
+    tm.assert_categorical_equal(cat, expected)
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -1632,3 +1632,37 @@ def test_replace_unicode(self):
         result = df1.replace(columns_values_map)
         expected = DataFrame({"positive": np.ones(3)})
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "to_replace,value,input_data,expected_data,inplace",
+        [
+            (r"^\s*$", pd.NA, ["d", "ee", "f", ""], ["d", "ee", "f", pd.NA], False),
+            (r"e{2}", "replace", ["d", "ee", "f", ""], ["d", "replace", "f", ""], False),
+            (r"f", "replace", ["d", "ee", "f", ""], ["d", "ee", "replace", ""], False),
+            (r"^\s*$", pd.NA, ["d", "ee", "f", ""], ["d", "ee", "f", pd.NA], True),
+            (r"e{2}", "replace", ["d", "ee", "f", ""], ["d", "replace", "f", ""], True),
+            (r"f", "replace", ["d", "ee", "f", ""], ["d", "ee", "replace", ""], True),
+        ],
+    )
+    def test_replace_regex(self, to_replace, value, input_data, expected_data, inplace):
+        # GH35977
+        df = pd.DataFrame({"col1": input_data}, dtype="string")
+        expected = pd.DataFrame({"col1": expected_data}, dtype="string")
+        df_replaced = df.replace(to_replace, value, inplace=inplace, regex=True)
+        result = df if inplace else df_replaced
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "to_replace,value,input_data,expected_data",
+        [
+            ("", pd.NA, ["d", "ee", "f", ""], ["d", "ee", "f", pd.NA]),
+            ("ee", "replace", ["d", "ee", "f", ""], ["d", "replace", "f", ""]),
+            ("f", "replace", ["d", "ee", "f", ""], ["d", "ee", "replace", ""]),
+        ],
+    )
+    def test_replace_string(self, to_replace, value, input_data, expected_data):
+        # GH35977
+        df = pd.DataFrame({"col1": input_data}, dtype="string")
+        expected = pd.DataFrame({"col1": expected_data}, dtype="string")
+        result = df.replace(to_replace, value, inplace=False, regex=False)
+        tm.assert_frame_equal(result, expected)