Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(python): Verify the integrity of pandas column names before implied string conversion #17433

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions py-polars/polars/_utils/construction/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,14 @@ def pandas_to_pydf(
include_index: bool = False,
) -> PyDataFrame:
"""Construct a PyDataFrame from a pandas DataFrame."""
stringified_cols = {str(col) for col in data.columns}
if len(stringified_cols) < len(data.columns):
msg = (
"Polars dataframes must have unique string column names."
"Please check your pandas dataframe for duplicates."
)
raise ValueError(msg)

convert_index = include_index and not _pandas_has_default_index(data)
if not convert_index and all(
is_simple_numpy_backed_pandas_series(data[col]) for col in data.columns
Expand Down
8 changes: 7 additions & 1 deletion py-polars/tests/unit/interop/test_from_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,15 @@ def test_from_pandas_include_indexes() -> None:
assert df.to_dict(as_series=False) == data


def test_duplicate_cols_diff_types() -> None:
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["0", 0, "1", 1])
with pytest.raises(ValueError, match="Polars dataframes must have unique string"):
pl.from_pandas(df)


def test_from_pandas_duplicated_columns() -> None:
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["a", "b", "c", "b"])
with pytest.raises(ValueError, match="duplicate column names found: "):
with pytest.raises(ValueError, match="Polars dataframes must have unique string"):
pl.from_pandas(df)


Expand Down