Deprecate merge_sorted, change dask cudf usage to internal method (#…

…10713) This PR deprecates non-pandas conform method `cudf.merge_sorted` and change dask cudf usage to internal method `_merge_sorted`. I also updated msg keyword in pytest.skip in multiple tests to reason, this removes 1000+ test warnings. cc @vyasr @rjzamora Authors: - Michael Wang (https://github.com/isVoid) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Richard (Rick) Zamora (https://github.com/rjzamora) - Vyas Ramasubramani (https://github.com/vyasr) URL: #10713
rapidsai · May 2, 2022 · a9eb47c · a9eb47c
1 parent 0ddb3d9
commit a9eb47c
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 9 deletions.
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import itertools
+import warnings
 from collections import abc
 from typing import Dict, Optional
 
@@ -791,6 +792,24 @@ def merge_sorted(
     A new, lexicographically sorted, DataFrame/Series.
     """
 
+    warnings.warn(
+        "merge_sorted is deprecated and will be removed in a "
+        "future release.",
+        FutureWarning,
+    )
+    return _merge_sorted(
+        objs, keys, by_index, ignore_index, ascending, na_position
+    )
+
+
+def _merge_sorted(
+    objs,
+    keys=None,
+    by_index=False,
+    ignore_index=False,
+    ascending=True,
+    na_position="last",
+):
     if not pd.api.types.is_list_like(objs):
         raise TypeError("objs must be a list-like of Frame-like objects")
 

diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
@@ -24,7 +24,7 @@
 @pytest.mark.parametrize("nulls", ["none", "some", "all"])
 def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
     if dtype not in ["float32", "float64"] and nulls in ["some", "all"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame()
     id_vars = []
@@ -87,7 +87,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_df_stack(nulls, num_cols, num_rows, dtype):
     if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame()
     for i in range(num_cols):
@@ -139,7 +139,7 @@ def test_df_stack_reset_index():
 def test_interleave_columns(nulls, num_cols, num_rows, dtype):
 
     if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame(dtype=dtype)
     for i in range(num_cols):
@@ -176,7 +176,7 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype):
 def test_tile(nulls, num_cols, num_rows, dtype, count):
 
     if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame(dtype=dtype)
     for i in range(num_cols):
@@ -269,7 +269,7 @@ def test_df_merge_sorted(nparts, keys, na_position, ascending):
     expect = df.sort_values(
         keys_1, na_position=na_position, ascending=ascending
     )
-    result = cudf.merge_sorted(
+    result = cudf.core.reshape._merge_sorted(
         dfs, keys=keys, na_position=na_position, ascending=ascending
     )
     if keys:
@@ -290,7 +290,8 @@ def test_df_merge_sorted_index(nparts, index, ascending):
     )
 
     expect = df.sort_index(ascending=ascending)
-    result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)
+    with pytest.warns(FutureWarning, match="deprecated and will be removed"):
+        result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)
 
     assert_eq(expect.index, result.index)
 
@@ -317,7 +318,7 @@ def test_df_merge_sorted_ignore_index(keys, na_position, ascending):
     expect = df.sort_values(
         keys_1, na_position=na_position, ascending=ascending
     )
-    result = cudf.merge_sorted(
+    result = cudf.core.reshape._merge_sorted(
         dfs,
         keys=keys,
         na_position=na_position,
@@ -347,7 +348,7 @@ def test_series_merge_sorted(nparts, key, na_position, ascending):
     )
 
     expect = df.sort_values(na_position=na_position, ascending=ascending)
-    result = cudf.merge_sorted(
+    result = cudf.core.reshape._merge_sorted(
         dfs, na_position=na_position, ascending=ascending
     )
 

diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
@@ -85,7 +85,7 @@ def _append_counts(val, count):
         return val
 
     # Sort by calculated quantile values, then number of observations.
-    combined_vals_counts = gd.merge_sorted(
+    combined_vals_counts = gd.core.reshape._merge_sorted(
         [*map(_append_counts, vals, counts)]
     )
     combined_counts = cupy.asnumpy(combined_vals_counts["_counts"].values)