Skip to content

Commit

Permalink
Deprecate merge_sorted, change dask cudf usage to internal method (#…
Browse files Browse the repository at this point in the history
…10713)

This PR deprecates non-pandas conform method `cudf.merge_sorted` and change dask cudf usage to internal method `_merge_sorted`.

I also updated msg keyword in pytest.skip in multiple tests to reason, this removes 1000+ test warnings.

cc @vyasr @rjzamora

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #10713
  • Loading branch information
isVoid authored May 2, 2022
1 parent 0ddb3d9 commit a9eb47c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 9 deletions.
19 changes: 19 additions & 0 deletions python/cudf/cudf/core/reshape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

import itertools
import warnings
from collections import abc
from typing import Dict, Optional

Expand Down Expand Up @@ -791,6 +792,24 @@ def merge_sorted(
A new, lexicographically sorted, DataFrame/Series.
"""

warnings.warn(
"merge_sorted is deprecated and will be removed in a "
"future release.",
FutureWarning,
)
return _merge_sorted(
objs, keys, by_index, ignore_index, ascending, na_position
)


def _merge_sorted(
objs,
keys=None,
by_index=False,
ignore_index=False,
ascending=True,
na_position="last",
):
if not pd.api.types.is_list_like(objs):
raise TypeError("objs must be a list-like of Frame-like objects")

Expand Down
17 changes: 9 additions & 8 deletions python/cudf/cudf/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
@pytest.mark.parametrize("nulls", ["none", "some", "all"])
def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
if dtype not in ["float32", "float64"] and nulls in ["some", "all"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame()
id_vars = []
Expand Down Expand Up @@ -87,7 +87,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
@pytest.mark.parametrize("nulls", ["none", "some"])
def test_df_stack(nulls, num_cols, num_rows, dtype):
if dtype not in ["float32", "float64"] and nulls in ["some"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame()
for i in range(num_cols):
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_df_stack_reset_index():
def test_interleave_columns(nulls, num_cols, num_rows, dtype):

if dtype not in ["float32", "float64"] and nulls in ["some"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame(dtype=dtype)
for i in range(num_cols):
Expand Down Expand Up @@ -176,7 +176,7 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype):
def test_tile(nulls, num_cols, num_rows, dtype, count):

if dtype not in ["float32", "float64"] and nulls in ["some"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame(dtype=dtype)
for i in range(num_cols):
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_df_merge_sorted(nparts, keys, na_position, ascending):
expect = df.sort_values(
keys_1, na_position=na_position, ascending=ascending
)
result = cudf.merge_sorted(
result = cudf.core.reshape._merge_sorted(
dfs, keys=keys, na_position=na_position, ascending=ascending
)
if keys:
Expand All @@ -290,7 +290,8 @@ def test_df_merge_sorted_index(nparts, index, ascending):
)

expect = df.sort_index(ascending=ascending)
result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)
with pytest.warns(FutureWarning, match="deprecated and will be removed"):
result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)

assert_eq(expect.index, result.index)

Expand All @@ -317,7 +318,7 @@ def test_df_merge_sorted_ignore_index(keys, na_position, ascending):
expect = df.sort_values(
keys_1, na_position=na_position, ascending=ascending
)
result = cudf.merge_sorted(
result = cudf.core.reshape._merge_sorted(
dfs,
keys=keys,
na_position=na_position,
Expand Down Expand Up @@ -347,7 +348,7 @@ def test_series_merge_sorted(nparts, key, na_position, ascending):
)

expect = df.sort_values(na_position=na_position, ascending=ascending)
result = cudf.merge_sorted(
result = cudf.core.reshape._merge_sorted(
dfs, na_position=na_position, ascending=ascending
)

Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _append_counts(val, count):
return val

# Sort by calculated quantile values, then number of observations.
combined_vals_counts = gd.merge_sorted(
combined_vals_counts = gd.core.reshape._merge_sorted(
[*map(_append_counts, vals, counts)]
)
combined_counts = cupy.asnumpy(combined_vals_counts["_counts"].values)
Expand Down

0 comments on commit a9eb47c

Please sign in to comment.