pandas-dev · jreback · May 11, 2021 · May 6, 2021 · May 11, 2021 · May 11, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -748,7 +748,7 @@ Strings
 ^^^^^^^
 
 - Bug in the conversion from ``pyarrow.ChunkedArray`` to :class:`~arrays.StringArray` when the original had zero chunks (:issue:`41040`)
--
+- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` ignoring replacements with ``regex=True`` for ``StringDType`` data (:issue:`41333`, :issue:`35977`)
 
 Interval
 ^^^^^^^^

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1153,6 +1153,27 @@ def object_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        "object",
+        "string",
+        pytest.param(
+            "arrow_string", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
+        ),
+    ]
+)
+def any_string_dtype(request):
+    """
+    Parametrized fixture for string dtypes.
+    * 'object'
+    * 'string'
+    * 'arrow_string'
+    """
+    from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+
+    return request.param
+
+
 @pytest.fixture(params=tm.DATETIME64_DTYPES)
 def datetime64_dtype(request):
     """

diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
@@ -149,7 +149,7 @@ def re_replacer(s):
             else:
                 return s
 
-    f = np.vectorize(re_replacer, otypes=[values.dtype])
+    f = np.vectorize(re_replacer, otypes=[np.object_])
 
     if mask is None:
         values[:] = f(values)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -49,6 +49,7 @@
     is_extension_array_dtype,
     is_list_like,
     is_sparse,
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
@@ -788,7 +789,7 @@ def _replace_list(
 
         src_len = len(pairs) - 1
 
-        if values.dtype == _dtype_obj:
+        if is_string_dtype(values):
             # Calculate the mask once, prior to the call of comp
             # in order to avoid repeating the same computations
             mask = ~isna(values)

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -563,10 +563,11 @@ def test_regex_replace_dict_nested(self, mix_abc):
         tm.assert_frame_equal(res3, expec)
         tm.assert_frame_equal(res4, expec)
 
-    def test_regex_replace_dict_nested_non_first_character(self):
+    def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
         # GH 25259
-        df = DataFrame({"first": ["abc", "bca", "cab"]})
-        expected = DataFrame({"first": [".bc", "bc.", "c.b"]})
+        dtype = any_string_dtype
+        df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
+        expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
         result = df.replace({"a": "."}, regex=True)
         tm.assert_frame_equal(result, expected)
 
@@ -685,6 +686,24 @@ def test_replace_regex_metachar(self, metachar):
         expected = DataFrame({"a": ["paren", "else"]})
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "data,to_replace,expected",
+        [
+            (["xax", "xbx"], {"a": "c", "b": "d"}, ["xcx", "xdx"]),
+            (["d", "", ""], {r"^\s*$": pd.NA}, ["d", pd.NA, pd.NA]),
+        ],
+    )
+    def test_regex_replace_string_types(
+        self, data, to_replace, expected, frame_or_series, any_string_dtype
+    ):
+        # GH-41333, GH-35977
+        dtype = any_string_dtype
+        obj = frame_or_series(data, dtype=dtype)
+        result = obj.replace(to_replace, regex=True)
+        expected = frame_or_series(expected, dtype=dtype)
+
+        tm.assert_equal(result, expected)
+
     def test_replace(self, datetime_frame):
         datetime_frame["A"][:5] = np.nan
         datetime_frame["A"][-5:] = np.nan

diff --git a/pandas/tests/strings/conftest.py b/pandas/tests/strings/conftest.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas import Series
 from pandas.core import strings as strings
 
@@ -175,24 +173,3 @@ def any_allowed_skipna_inferred_dtype(request):
 
     # correctness of inference tested in tests/dtypes/test_inference.py
     return inferred_dtype, values
-
-
-@pytest.fixture(
-    params=[
-        "object",
-        "string",
-        pytest.param(
-            "arrow_string", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
-        ),
-    ]
-)
-def any_string_dtype(request):
-    """
-    Parametrized fixture for string dtypes.
-    * 'object'
-    * 'string'
-    * 'arrow_string'
-    """
-    from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
-    return request.param