Skip to content

Commit

Permalink
refactor(python): Update test suite to explicitly use orient="row"
Browse files Browse the repository at this point in the history
…in DataFrame constructor when applicable (#16977)
  • Loading branch information
stinodego authored Jun 16, 2024
1 parent 9f097ea commit b6cd77d
Show file tree
Hide file tree
Showing 22 changed files with 92 additions and 65 deletions.
4 changes: 3 additions & 1 deletion py-polars/tests/unit/constructors/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ def test_init_numpy_scalars() -> None:
df_expected = pl.from_records(
data=[(True, 16, 1234), (False, 64, 9876)],
schema=OrderedDict([("bool", pl.Boolean), ("i8", pl.Int8), ("u32", pl.UInt32)]),
orient="row",
)
assert_frame_equal(df, df_expected)

Expand Down Expand Up @@ -837,13 +838,14 @@ def test_init_py_dtype_misc_float() -> None:

def test_init_seq_of_seq() -> None:
# List of lists
df = pl.DataFrame([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"])
df = pl.DataFrame([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"], orient="row")
expected = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
assert_frame_equal(df, expected)

df = pl.DataFrame(
[[1, 2, 3], [4, 5, 6]],
schema=[("a", pl.Int8), ("b", pl.Int16), ("c", pl.Int32)],
orient="row",
)
assert df.schema == {"a": pl.Int8, "b": pl.Int16, "c": pl.Int32}
assert df.rows() == [(1, 2, 3), (4, 5, 6)]
Expand Down
4 changes: 4 additions & 0 deletions py-polars/tests/unit/dataframe/test_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def test_df_describe_nested() -> None:
],
schema=["statistic"] + df.columns,
schema_overrides={"struct": pl.Float64, "list": pl.Float64},
orient="row",
)
assert_frame_equal(result, expected)

Expand All @@ -154,6 +155,7 @@ def test_df_describe_custom_percentiles() -> None:
("max", 2.0),
],
schema=["statistic"] + df.columns,
orient="row",
)
assert_frame_equal(result, expected)

Expand All @@ -172,6 +174,7 @@ def test_df_describe_no_percentiles(pcts: list[float] | None) -> None:
("max", 2.0),
],
schema=["statistic"] + df.columns,
orient="row",
)
assert_frame_equal(result, expected)

Expand All @@ -192,6 +195,7 @@ def test_df_describe_empty_column() -> None:
("max", None),
],
schema=["statistic"] + df.columns,
orient="row",
)
assert_frame_equal(result, expected)

Expand Down
5 changes: 3 additions & 2 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,7 @@ def test_literal_series() -> None:
(21.0, 2, "reg3", datetime(2022, 8, 18, 0), 3),
],
schema=expected_schema, # type: ignore[arg-type]
orient="row",
),
out,
atol=0.00001,
Expand Down Expand Up @@ -1107,7 +1108,7 @@ def __iter__(self) -> Iterator[Any]:


def test_from_rows() -> None:
df = pl.from_records([[1, 2, "foo"], [2, 3, "bar"]])
df = pl.from_records([[1, 2, "foo"], [2, 3, "bar"]], orient="row")
assert_frame_equal(
df,
pl.DataFrame(
Expand All @@ -1123,7 +1124,7 @@ def test_from_rows() -> None:

# auto-inference with same num rows/cols
data = [(1, 2, "foo"), (2, 3, "bar"), (3, 4, "baz")]
df = pl.from_records(data)
df = pl.from_records(data, orient="row")
assert data == df.rows()


Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/unit/datatypes/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ def test_decimal_convert_to_float_by_schema() -> None:


def test_df_constructor_convert_decimal_to_float_9873() -> None:
result = pl.DataFrame([[D("45.0000")], [D("45.0000")]], schema={"a": pl.Float64})
result = pl.DataFrame(
[[D("45.0000")], [D("45.0000")]], schema={"a": pl.Float64}, orient="row"
)
expected = pl.DataFrame({"a": [45.0, 45.0]})
assert_frame_equal(result, expected)

Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/datatypes/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,8 @@ def test_list_null_pickle() -> None:

def test_struct_with_nulls_as_list() -> None:
df = pl.DataFrame([[{"a": 1, "b": 2}], [{"c": 3, "d": None}]])
assert df.select(pl.concat_list(pl.all()).alias("as_list")).to_dict(
as_series=False
) == {
result = df.select(pl.concat_list(pl.all()).alias("as_list"))
assert result.to_dict(as_series=False) == {
"as_list": [
[
{"a": 1, "b": 2, "c": None, "d": None},
Expand Down Expand Up @@ -833,6 +832,7 @@ def test_null_list_categorical_16405() -> None:
"match": pl.List(pl.Categorical),
"what": pl.Categorical,
},
orient="row",
)

df = df.select(
Expand Down
96 changes: 48 additions & 48 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,56 +757,56 @@ def test_temporal_dtypes_map_elements(
PolarsInefficientMapWarning,
match=r"(?s)Replace this expression.*lambda x:",
):
assert_frame_equal(
df.with_columns(
# don't actually do this; native expressions are MUCH faster ;)
pl.col("timestamp")
.map_elements(
lambda x: const_dtm,
skip_nulls=skip_nulls,
return_dtype=pl.Datetime,
)
.alias("const_dtm"),
# note: the below now trigger a PolarsInefficientMapWarning
pl.col("timestamp")
.map_elements(
lambda x: x and x.date(),
skip_nulls=skip_nulls,
return_dtype=pl.Date,
)
.alias("date"),
pl.col("timestamp")
.map_elements(
lambda x: x and x.time(),
skip_nulls=skip_nulls,
return_dtype=pl.Time,
)
.alias("time"),
result = df.with_columns(
# don't actually do this; native expressions are MUCH faster ;)
pl.col("timestamp")
.map_elements(
lambda x: const_dtm,
skip_nulls=skip_nulls,
return_dtype=pl.Datetime,
)
.alias("const_dtm"),
# note: the below now trigger a PolarsInefficientMapWarning
pl.col("timestamp")
.map_elements(
lambda x: x and x.date(),
skip_nulls=skip_nulls,
return_dtype=pl.Date,
)
.alias("date"),
pl.col("timestamp")
.map_elements(
lambda x: x and x.time(),
skip_nulls=skip_nulls,
return_dtype=pl.Time,
)
.alias("time"),
)
expected = pl.DataFrame(
[
(
datetime(2010, 9, 12, 10, 19, 54),
datetime(2010, 9, 12, 0, 0),
date(2010, 9, 12),
time(10, 19, 54),
),
pl.DataFrame(
[
(
datetime(2010, 9, 12, 10, 19, 54),
datetime(2010, 9, 12, 0, 0),
date(2010, 9, 12),
time(10, 19, 54),
),
(None, expected_value, None, None),
(
datetime(2009, 2, 13, 23, 31, 30),
datetime(2010, 9, 12, 0, 0),
date(2009, 2, 13),
time(23, 31, 30),
),
],
schema={
"timestamp": pl.Datetime("ms"),
"const_dtm": pl.Datetime("us"),
"date": pl.Date,
"time": pl.Time,
},
(None, expected_value, None, None),
(
datetime(2009, 2, 13, 23, 31, 30),
datetime(2010, 9, 12, 0, 0),
date(2009, 2, 13),
time(23, 31, 30),
),
)
],
schema={
"timestamp": pl.Datetime("ms"),
"const_dtm": pl.Datetime("us"),
"date": pl.Date,
"time": pl.Time,
},
orient="row",
)
assert_frame_equal(result, expected)


def test_timelike_init() -> None:
Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/unit/functions/test_cum_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def test_cum_count_multi_arg_reverse() -> None:


def test_cum_count() -> None:
df = pl.DataFrame([["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], schema=["A"])
df = pl.DataFrame(
[["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], schema=["A"], orient="row"
)

out = df.group_by("A", maintain_order=True).agg(
pl.col("A").cum_count().alias("foo")
Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/unit/interop/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,9 @@ def test_dataframe_from_repr() -> None:
)
assert_frame_equal(
df,
pl.DataFrame(data=[(None, None)], schema={"c1": pl.Int32, "c2": pl.Float64}),
pl.DataFrame(
data=[(None, None)], schema={"c1": pl.Int32, "c2": pl.Float64}, orient="row"
),
)

df = cast(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ def test_nan_inf_aggregation() -> None:
("inf and null", None),
],
schema=["group", "value"],
orient="row",
)

assert_frame_equal(
Expand All @@ -398,6 +399,7 @@ def test_nan_inf_aggregation() -> None:
("inf and null", np.inf, np.inf, np.inf),
],
schema=["group", "min", "max", "mean"],
orient="row",
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def test_all_any_horizontally() -> None:
[None, None, False],
],
schema=["var1", "var2", "var3"],
orient="row",
)
result = df.select(
any=pl.any_horizontal(pl.col("var2"), pl.col("var3")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ def test_str_find(strict: bool) -> None:
"pat": pl.String,
"lit": pl.String,
},
orient="row",
)
city, pop, pat, lit = (pl.col(c) for c in ("city", "population", "pat", "lit"))

Expand Down Expand Up @@ -791,6 +792,7 @@ def test_contains() -> None:
df = pl.DataFrame(
data=[(1, "some * * text"), (2, "(with) special\n * chars"), (3, "**etc...?$")],
schema=["idx", "text"],
orient="row",
)
for pattern, as_literal, expected in (
(r"\* \*", False, [True, False, False]),
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/operations/namespaces/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_contains() -> None:
(4, None),
],
schema=["idx", "bin"],
orient="row",
)
for pattern, expected in (
(b"e * ", [True, False, False, None]),
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/operations/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def test_ewm_with_multiple_chunks() -> None:
("z", 3.0, 4.0),
],
schema=["a", "b", "c"],
orient="row",
).with_columns(
pl.col(pl.Float64).log().diff().name.prefix("ld_"),
)
Expand Down
2 changes: 2 additions & 0 deletions py-polars/tests/unit/operations/test_is_in.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ def test_cat_list_is_in_from_cat(dtype: pl.DataType) -> None:
(["a"], "d"),
],
schema={"li": pl.List(dtype), "x": dtype},
orient="row",
)
res = df.select(pl.col("li").list.contains(pl.col("x")))
expected_df = pl.DataFrame({"li": [False, True, True, False, False]})
Expand Down Expand Up @@ -371,6 +372,7 @@ def test_cat_list_is_in_from_str() -> None:
(["a"], "d"),
],
schema={"li": pl.List(pl.Categorical), "x": pl.String},
orient="row",
)
res = df.select(pl.col("li").list.contains(pl.col("x")))
expected_df = pl.DataFrame({"li": [False, True, True, False, False]})
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/operations/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def test_pivot() -> None:
("C", None, None, None, None, 2),
],
schema=["foo", "k", "l", "m", "n", "o"],
orient="row",
)
assert_frame_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/operations/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def test_shift_fill_value_group_logicals() -> None:
(date(2001, 1, 4), "B"),
],
schema=["d", "s"],
orient="row",
)
result = df.select(pl.col("d").shift(fill_value=pl.col("d").max(), n=-1).over("s"))

Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/sql/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def test_extract_century_millennium(dt: date, expected: list[int]) -> None:
right=pl.DataFrame(
data=[expected + expected],
schema=["c1", "c2", "c3", "c4"],
orient="row",
).cast(pl.Int32),
)

Expand Down
7 changes: 5 additions & 2 deletions py-polars/tests/unit/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,15 @@ def by_column_dtypes(self) -> list[pl.LazyFrame]:

df1, df2 = (d.collect() for d in ldf.split.by_column_dtypes()) # type: ignore[attr-defined]
assert_frame_equal(
df1, pl.DataFrame([("xx",), ("xy",), ("yy",), ("yz",)], schema=["a1"])
df1,
pl.DataFrame([("xx",), ("xy",), ("yy",), ("yz",)], schema=["a1"], orient="row"),
)
assert_frame_equal(
df2,
pl.DataFrame(
[(2, 3, 4), (4, 5, 6), (5, 6, 7), (6, 7, 8)], schema=["a2", "b1", "b2"]
[(2, 3, 4), (4, 5, 6), (5, 6, 7), (6, 7, 8)],
schema=["a2", "b1", "b2"],
orient="row",
),
)

Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/test_cse.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def test_windows_cse_excluded() -> None:
("b", "qqq", 0),
],
schema=["a", "b", "c"],
orient="row",
)

result = lf.select(
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def test_group_by_agg_equals_zero_3535() -> None:
("val1", pl.Int16),
("val2", pl.Float32),
],
orient="row",
)
# group by the key, aggregating the two numeric cols
assert df.group_by(pl.col("key"), maintain_order=True).agg(
Expand Down
8 changes: 2 additions & 6 deletions py-polars/tests/unit/test_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,7 @@ def test_iter_rows() -> None:
],
)
def test_row_constructor_schema(primitive: pl.DataType) -> None:
result = pl.DataFrame(
data=[[1], [2], [3]],
schema={"d": primitive},
)
result = pl.DataFrame(data=[[1], [2], [3]], schema={"d": primitive}, orient="row")

assert result.dtypes == [primitive]
assert result.to_dict(as_series=False) == {"d": [1, 2, 3]}
Expand All @@ -246,7 +243,6 @@ def test_row_constructor_schema(primitive: pl.DataType) -> None:
def test_row_constructor_uint64() -> None:
# validate init with a valid UInt64 that exceeds Int64 upper bound
df = pl.DataFrame(
data=[[0], [int(2**63) + 1]],
schema={"x": pl.UInt64},
data=[[0], [int(2**63) + 1]], schema={"x": pl.UInt64}, orient="row"
)
assert df.rows() == [(0,), (9223372036854775809,)]
1 change: 1 addition & 0 deletions py-polars/tests/unit/test_string_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def test_string_cache_eager_lazy() -> None:
):
df3 = pl.DataFrame( # type: ignore[arg-type]
data=[["reg1"], ["reg2"], ["reg3"], ["reg4"], ["reg5"]],
orient="row",
**params,
)
assert_frame_equal(df1, df3)

0 comments on commit b6cd77d

Please sign in to comment.