Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enh: Deprecate native_namespace in favour of backend in from_dict #1931

Merged
merged 18 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,16 +754,16 @@ def unique(
def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame:
from narwhals.utils import parse_version

pandas_df = self.to_pandas()
if backend is None:
return self
elif backend is Implementation.DUCKDB:
import duckdb # ignore-banned-import

from narwhals._duckdb.dataframe import DuckDBLazyFrame

df = self._native_frame # noqa: F841
return DuckDBLazyFrame(
df=duckdb.table("df"),
df=duckdb.table("pandas_df"),
backend_version=parse_version(duckdb.__version__),
version=self._version,
)
Expand All @@ -773,7 +773,7 @@ def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyF
from narwhals._polars.dataframe import PolarsLazyFrame

return PolarsLazyFrame(
df=pl.from_pandas(self._native_frame).lazy(),
df=pl.from_pandas(pandas_df).lazy(),
backend_version=parse_version(pl.__version__),
version=self._version,
)
Expand All @@ -784,7 +784,7 @@ def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyF
from narwhals._dask.dataframe import DaskLazyFrame

return DaskLazyFrame(
native_dataframe=dd.from_pandas(self._native_frame),
native_dataframe=dd.from_pandas(pandas_df),
backend_version=parse_version(dask.__version__),
version=self._version,
)
Expand Down
1 change: 0 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,6 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob

def lazy(
self: Self,
*,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.
Expand Down
75 changes: 51 additions & 24 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from narwhals.utils import flatten
from narwhals.utils import parse_version
from narwhals.utils import validate_laziness
from narwhals.utils import validate_native_namespace_and_backend

# Missing type parameters for generic type "DataFrame"
# However, trying to provide one results in mypy still complaining...
Expand Down Expand Up @@ -374,6 +375,7 @@ def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Expand All @@ -388,9 +390,22 @@ def from_dict(
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}.
native_namespace: The native library to use for DataFrame creation. Only
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.

`backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
native_namespace: The native library to use for DataFrame creation.

**Deprecated** (v1.26.0):
Please use `backend` instead. Note that `native_namespace` is still available
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
see [perfect backwards compatibility policy](../backcompat.md/).

Returns:
A new DataFrame.

Expand All @@ -400,24 +415,20 @@ def from_dict(
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}

Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
Let's create a new dataframe and specify the backend argument.

>>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT:
... new_data = {"c": [5, 2], "d": [1, 4]}
... native_namespace = nw.get_native_namespace(df_native)
... return nw.from_dict(
... new_data, native_namespace=native_namespace
... ).to_native()
>>> def agnostic_from_dict(backend: str) -> IntoFrameT:
... data = {"c": [5, 2], "d": [1, 4]}
... return nw.from_dict(data, backend=backend).to_native()

Let's see what happens when passing pandas, Polars or PyArrow input:

>>> agnostic_from_dict(pd.DataFrame(data))
>>> agnostic_from_dict(backend="pandas")
c d
0 5 1
1 2 4
>>> agnostic_from_dict(pl.DataFrame(data))
>>> agnostic_from_dict(backend="polars")
shape: (2, 2)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ c ┆ d β”‚
Expand All @@ -427,27 +438,30 @@ def from_dict(
β”‚ 5 ┆ 1 β”‚
β”‚ 2 ┆ 4 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
>>> agnostic_from_dict(pa.table(data))
>>> agnostic_from_dict(backend="pyarrow")
pyarrow.Table
c: int64
d: int64
----
c: [[5,2]]
d: [[1,4]]
"""
backend = validate_native_namespace_and_backend(
backend, native_namespace, emit_deprecation_warning=True
)
return _from_dict_impl(
data,
schema,
native_namespace=native_namespace,
backend=backend,
version=Version.MAIN,
)


def _from_dict_impl(
def _from_dict_impl( # noqa: PLR0915
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
native_namespace: ModuleType | None = None,
backend: ModuleType | Implementation | str | None = None,
version: Version,
) -> DataFrame[Any]:
from narwhals.series import Series
Expand All @@ -456,18 +470,31 @@ def _from_dict_impl(
if not data:
msg = "from_dict cannot be called with empty dictionary"
raise ValueError(msg)
if native_namespace is None:
if backend is None:
for val in data.values():
if isinstance(val, Series):
native_namespace = val.__native_namespace__()
break
else:
msg = "Calling `from_dict` without `native_namespace` is only supported if all input values are already Narwhals Series"
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
raise TypeError(msg)
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
implementation = Implementation.from_native_namespace(native_namespace)
eager_backend = Implementation.from_native_namespace(native_namespace)
else:
eager_backend = Implementation.from_backend(backend)
native_namespace = eager_backend.to_native_namespace()

if implementation is Implementation.POLARS:
supported_eager_backends = (
Implementation.POLARS,
Implementation.PANDAS,
Implementation.PYARROW,
Implementation.MODIN,
Implementation.CUDF,
)
if eager_backend is not None and eager_backend not in supported_eager_backends:
msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
raise ValueError(msg)
if eager_backend is Implementation.POLARS:
if schema:
from narwhals._polars.utils import (
narwhals_to_native_dtype as polars_narwhals_to_native_dtype,
Expand All @@ -481,11 +508,11 @@ def _from_dict_impl(
schema_pl = None

native_frame = native_namespace.from_dict(data, schema=schema_pl)
elif implementation in {
elif eager_backend in (
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
}:
):
aligned_data = {}
left_most_series = None
for key, native_series in data.items():
Expand Down Expand Up @@ -515,16 +542,16 @@ def _from_dict_impl(
schema = {
name: pandas_like_narwhals_to_native_dtype(
dtype=schema[name],
dtype_backend=get_dtype_backend(native_type, implementation),
implementation=implementation,
dtype_backend=get_dtype_backend(native_type, eager_backend),
implementation=eager_backend,
backend_version=backend_version,
version=version,
)
for name, native_type in native_frame.dtypes.items()
}
native_frame = native_frame.astype(schema)

elif implementation is Implementation.PYARROW:
elif eager_backend is Implementation.PYARROW:
if schema:
from narwhals._arrow.utils import (
narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
Expand Down
23 changes: 20 additions & 3 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from narwhals.utils import maybe_get_index
from narwhals.utils import maybe_reset_index
from narwhals.utils import maybe_set_index
from narwhals.utils import validate_native_namespace_and_backend
from narwhals.utils import validate_strict_and_pass_though

if TYPE_CHECKING:
Expand Down Expand Up @@ -169,7 +170,6 @@ def __getitem__(self: Self, item: Any) -> Any:

def lazy(
self: Self,
*,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.
Expand Down Expand Up @@ -2162,6 +2162,7 @@ def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Expand All @@ -2176,17 +2177,33 @@ def from_dict(
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}.
native_namespace: The native library to use for DataFrame creation. Only
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.

`backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
native_namespace: The native library to use for DataFrame creation.

**Deprecated** (v1.26.0):
Please use `backend` instead. Note that `native_namespace` is still available
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
see [perfect backwards compatibility policy](../backcompat.md/).

Returns:
A new DataFrame.
"""
backend = validate_native_namespace_and_backend(
backend, native_namespace, emit_deprecation_warning=False
)
return _stableify( # type: ignore[no-any-return]
_from_dict_impl(
data,
schema,
native_namespace=native_namespace,
backend=backend,
version=Version.V1,
)
)
Expand Down
66 changes: 56 additions & 10 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,16 +161,41 @@ def to_native_namespace(self: Self) -> ModuleType:
Returns:
Native module.
"""
mapping = {
Implementation.PANDAS: get_pandas(),
Implementation.MODIN: get_modin(),
Implementation.CUDF: get_cudf(),
Implementation.PYARROW: get_pyarrow(),
Implementation.PYSPARK: get_pyspark_sql(),
Implementation.POLARS: get_polars(),
Implementation.DASK: get_dask_dataframe(),
}
return mapping[self] # type: ignore[no-any-return]
if self is Implementation.PANDAS:
import pandas as pd # ignore-banned-import

return pd # type: ignore[no-any-return]
if self is Implementation.MODIN:
import modin.pandas

return modin.pandas # type: ignore[no-any-return]
if self is Implementation.CUDF: # pragma: no cover
import cudf # ignore-banned-import

return cudf # type: ignore[no-any-return]
if self is Implementation.PYARROW:
import pyarrow as pa # ignore-banned-import

return pa # type: ignore[no-any-return]
if self is Implementation.PYSPARK: # pragma: no cover
import pyspark.sql

return pyspark.sql # type: ignore[no-any-return]
if self is Implementation.POLARS:
import polars as pl # ignore-banned-import

return pl
if self is Implementation.DASK:
import dask.dataframe # ignore-banned-import

return dask.dataframe # type: ignore[no-any-return]

if self is Implementation.DUCKDB:
import duckdb # ignore-banned-import

return duckdb # type: ignore[no-any-return]
msg = "Not supported Implementation" # pragma: no cover
raise AssertionError(msg)

def is_pandas(self: Self) -> bool:
"""Return whether implementation is pandas.
Expand Down Expand Up @@ -1041,6 +1066,27 @@ def validate_strict_and_pass_though(
return pass_through


def validate_native_namespace_and_backend(
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
*,
emit_deprecation_warning: bool,
) -> ModuleType | Implementation | str | None:
if native_namespace is not None and backend is None: # pragma: no cover
if emit_deprecation_warning:
msg = (
"`native_namespace` is deprecated, please use `pass_through` instead.\n\n"
"Note: `native_namespace` will remain available in `narwhals.stable.v1`.\n"
"See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
)
issue_deprecation_warning(msg, _version="1.25.1")
backend = native_namespace
elif native_namespace is not None and backend is not None:
msg = "Can't pass both `native_namespace` and `backend`"
raise ValueError(msg)
return backend


def _validate_rolling_arguments(
window_size: int, min_samples: int | None
) -> tuple[int, int]:
Expand Down
Loading
Loading