refactor(python): Update test suite to explicitly use orient="row" …

…in DataFrame constructor when applicable (#16977)
pola-rs · Jun 16, 2024 · b6cd77d · b6cd77d
1 parent 9f097ea
commit b6cd77d
Show file tree

Hide file tree

Showing 22 changed files with 92 additions and 65 deletions.
diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py
@@ -663,6 +663,7 @@ def test_init_numpy_scalars() -> None:
     df_expected = pl.from_records(
         data=[(True, 16, 1234), (False, 64, 9876)],
         schema=OrderedDict([("bool", pl.Boolean), ("i8", pl.Int8), ("u32", pl.UInt32)]),
+        orient="row",
     )
     assert_frame_equal(df, df_expected)
 
@@ -837,13 +838,14 @@ def test_init_py_dtype_misc_float() -> None:
 
 def test_init_seq_of_seq() -> None:
     # List of lists
-    df = pl.DataFrame([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"])
+    df = pl.DataFrame([[1, 2, 3], [4, 5, 6]], schema=["a", "b", "c"], orient="row")
     expected = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
     assert_frame_equal(df, expected)
 
     df = pl.DataFrame(
         [[1, 2, 3], [4, 5, 6]],
         schema=[("a", pl.Int8), ("b", pl.Int16), ("c", pl.Int32)],
+        orient="row",
     )
     assert df.schema == {"a": pl.Int8, "b": pl.Int16, "c": pl.Int32}
     assert df.rows() == [(1, 2, 3), (4, 5, 6)]

diff --git a/py-polars/tests/unit/dataframe/test_describe.py b/py-polars/tests/unit/dataframe/test_describe.py
@@ -132,6 +132,7 @@ def test_df_describe_nested() -> None:
         ],
         schema=["statistic"] + df.columns,
         schema_overrides={"struct": pl.Float64, "list": pl.Float64},
+        orient="row",
     )
     assert_frame_equal(result, expected)
 
@@ -154,6 +155,7 @@ def test_df_describe_custom_percentiles() -> None:
             ("max", 2.0),
         ],
         schema=["statistic"] + df.columns,
+        orient="row",
     )
     assert_frame_equal(result, expected)
 
@@ -172,6 +174,7 @@ def test_df_describe_no_percentiles(pcts: list[float] | None) -> None:
             ("max", 2.0),
         ],
         schema=["statistic"] + df.columns,
+        orient="row",
     )
     assert_frame_equal(result, expected)
 
@@ -192,6 +195,7 @@ def test_df_describe_empty_column() -> None:
             ("max", None),
         ],
         schema=["statistic"] + df.columns,
+        orient="row",
     )
     assert_frame_equal(result, expected)
 

diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -998,6 +998,7 @@ def test_literal_series() -> None:
                 (21.0, 2, "reg3", datetime(2022, 8, 18, 0), 3),
             ],
             schema=expected_schema,  # type: ignore[arg-type]
+            orient="row",
         ),
         out,
         atol=0.00001,
@@ -1107,7 +1108,7 @@ def __iter__(self) -> Iterator[Any]:
 
 
 def test_from_rows() -> None:
-    df = pl.from_records([[1, 2, "foo"], [2, 3, "bar"]])
+    df = pl.from_records([[1, 2, "foo"], [2, 3, "bar"]], orient="row")
     assert_frame_equal(
         df,
         pl.DataFrame(
@@ -1123,7 +1124,7 @@ def test_from_rows() -> None:
 
     # auto-inference with same num rows/cols
     data = [(1, 2, "foo"), (2, 3, "bar"), (3, 4, "baz")]
-    df = pl.from_records(data)
+    df = pl.from_records(data, orient="row")
     assert data == df.rows()
 
 

diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py
@@ -128,7 +128,9 @@ def test_decimal_convert_to_float_by_schema() -> None:
 
 
 def test_df_constructor_convert_decimal_to_float_9873() -> None:
-    result = pl.DataFrame([[D("45.0000")], [D("45.0000")]], schema={"a": pl.Float64})
+    result = pl.DataFrame(
+        [[D("45.0000")], [D("45.0000")]], schema={"a": pl.Float64}, orient="row"
+    )
     expected = pl.DataFrame({"a": [45.0, 45.0]})
     assert_frame_equal(result, expected)
 

diff --git a/py-polars/tests/unit/datatypes/test_list.py b/py-polars/tests/unit/datatypes/test_list.py
@@ -586,9 +586,8 @@ def test_list_null_pickle() -> None:
 
 def test_struct_with_nulls_as_list() -> None:
     df = pl.DataFrame([[{"a": 1, "b": 2}], [{"c": 3, "d": None}]])
-    assert df.select(pl.concat_list(pl.all()).alias("as_list")).to_dict(
-        as_series=False
-    ) == {
+    result = df.select(pl.concat_list(pl.all()).alias("as_list"))
+    assert result.to_dict(as_series=False) == {
         "as_list": [
             [
                 {"a": 1, "b": 2, "c": None, "d": None},
@@ -833,6 +832,7 @@ def test_null_list_categorical_16405() -> None:
             "match": pl.List(pl.Categorical),
             "what": pl.Categorical,
         },
+        orient="row",
     )
 
     df = df.select(

diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py
@@ -757,56 +757,56 @@ def test_temporal_dtypes_map_elements(
         PolarsInefficientMapWarning,
         match=r"(?s)Replace this expression.*lambda x:",
     ):
-        assert_frame_equal(
-            df.with_columns(
-                # don't actually do this; native expressions are MUCH faster ;)
-                pl.col("timestamp")
-                .map_elements(
-                    lambda x: const_dtm,
-                    skip_nulls=skip_nulls,
-                    return_dtype=pl.Datetime,
-                )
-                .alias("const_dtm"),
-                # note: the below now trigger a PolarsInefficientMapWarning
-                pl.col("timestamp")
-                .map_elements(
-                    lambda x: x and x.date(),
-                    skip_nulls=skip_nulls,
-                    return_dtype=pl.Date,
-                )
-                .alias("date"),
-                pl.col("timestamp")
-                .map_elements(
-                    lambda x: x and x.time(),
-                    skip_nulls=skip_nulls,
-                    return_dtype=pl.Time,
-                )
-                .alias("time"),
+        result = df.with_columns(
+            # don't actually do this; native expressions are MUCH faster ;)
+            pl.col("timestamp")
+            .map_elements(
+                lambda x: const_dtm,
+                skip_nulls=skip_nulls,
+                return_dtype=pl.Datetime,
+            )
+            .alias("const_dtm"),
+            # note: the below now trigger a PolarsInefficientMapWarning
+            pl.col("timestamp")
+            .map_elements(
+                lambda x: x and x.date(),
+                skip_nulls=skip_nulls,
+                return_dtype=pl.Date,
+            )
+            .alias("date"),
+            pl.col("timestamp")
+            .map_elements(
+                lambda x: x and x.time(),
+                skip_nulls=skip_nulls,
+                return_dtype=pl.Time,
+            )
+            .alias("time"),
+        )
+    expected = pl.DataFrame(
+        [
+            (
+                datetime(2010, 9, 12, 10, 19, 54),
+                datetime(2010, 9, 12, 0, 0),
+                date(2010, 9, 12),
+                time(10, 19, 54),
             ),
-            pl.DataFrame(
-                [
-                    (
-                        datetime(2010, 9, 12, 10, 19, 54),
-                        datetime(2010, 9, 12, 0, 0),
-                        date(2010, 9, 12),
-                        time(10, 19, 54),
-                    ),
-                    (None, expected_value, None, None),
-                    (
-                        datetime(2009, 2, 13, 23, 31, 30),
-                        datetime(2010, 9, 12, 0, 0),
-                        date(2009, 2, 13),
-                        time(23, 31, 30),
-                    ),
-                ],
-                schema={
-                    "timestamp": pl.Datetime("ms"),
-                    "const_dtm": pl.Datetime("us"),
-                    "date": pl.Date,
-                    "time": pl.Time,
-                },
+            (None, expected_value, None, None),
+            (
+                datetime(2009, 2, 13, 23, 31, 30),
+                datetime(2010, 9, 12, 0, 0),
+                date(2009, 2, 13),
+                time(23, 31, 30),
             ),
-        )
+        ],
+        schema={
+            "timestamp": pl.Datetime("ms"),
+            "const_dtm": pl.Datetime("us"),
+            "date": pl.Date,
+            "time": pl.Time,
+        },
+        orient="row",
+    )
+    assert_frame_equal(result, expected)
 
 
 def test_timelike_init() -> None:

diff --git a/py-polars/tests/unit/functions/test_cum_count.py b/py-polars/tests/unit/functions/test_cum_count.py
@@ -62,7 +62,9 @@ def test_cum_count_multi_arg_reverse() -> None:
 
 
 def test_cum_count() -> None:
-    df = pl.DataFrame([["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], schema=["A"])
+    df = pl.DataFrame(
+        [["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], schema=["A"], orient="row"
+    )
 
     out = df.group_by("A", maintain_order=True).agg(
         pl.col("A").cum_count().alias("foo")

diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py
@@ -708,7 +708,9 @@ def test_dataframe_from_repr() -> None:
     )
     assert_frame_equal(
         df,
-        pl.DataFrame(data=[(None, None)], schema={"c1": pl.Int32, "c2": pl.Float64}),
+        pl.DataFrame(
+            data=[(None, None)], schema={"c1": pl.Int32, "c2": pl.Float64}, orient="row"
+        ),
     )
 
     df = cast(

diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py
@@ -380,6 +380,7 @@ def test_nan_inf_aggregation() -> None:
             ("inf and null", None),
         ],
         schema=["group", "value"],
+        orient="row",
     )
 
     assert_frame_equal(
@@ -398,6 +399,7 @@ def test_nan_inf_aggregation() -> None:
                 ("inf and null", np.inf, np.inf, np.inf),
             ],
             schema=["group", "min", "max", "mean"],
+            orient="row",
         ),
     )
 

diff --git a/py-polars/tests/unit/operations/aggregation/test_horizontal.py b/py-polars/tests/unit/operations/aggregation/test_horizontal.py
@@ -24,6 +24,7 @@ def test_all_any_horizontally() -> None:
             [None, None, False],
         ],
         schema=["var1", "var2", "var3"],
+        orient="row",
     )
     result = df.select(
         any=pl.any_horizontal(pl.col("var2"), pl.col("var3")),

diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py
@@ -265,6 +265,7 @@ def test_str_find(strict: bool) -> None:
             "pat": pl.String,
             "lit": pl.String,
         },
+        orient="row",
     )
     city, pop, pat, lit = (pl.col(c) for c in ("city", "population", "pat", "lit"))
 
@@ -791,6 +792,7 @@ def test_contains() -> None:
     df = pl.DataFrame(
         data=[(1, "some * * text"), (2, "(with) special\n * chars"), (3, "**etc...?$")],
         schema=["idx", "text"],
+        orient="row",
     )
     for pattern, as_literal, expected in (
         (r"\* \*", False, [True, False, False]),

diff --git a/py-polars/tests/unit/operations/namespaces/test_binary.py b/py-polars/tests/unit/operations/namespaces/test_binary.py
@@ -28,6 +28,7 @@ def test_contains() -> None:
             (4, None),
         ],
         schema=["idx", "bin"],
+        orient="row",
     )
     for pattern, expected in (
         (b"e * ", [True, False, False, None]),

diff --git a/py-polars/tests/unit/operations/test_ewm.py b/py-polars/tests/unit/operations/test_ewm.py
@@ -210,6 +210,7 @@ def test_ewm_with_multiple_chunks() -> None:
             ("z", 3.0, 4.0),
         ],
         schema=["a", "b", "c"],
+        orient="row",
     ).with_columns(
         pl.col(pl.Float64).log().diff().name.prefix("ld_"),
     )

diff --git a/py-polars/tests/unit/operations/test_is_in.py b/py-polars/tests/unit/operations/test_is_in.py
@@ -336,6 +336,7 @@ def test_cat_list_is_in_from_cat(dtype: pl.DataType) -> None:
             (["a"], "d"),
         ],
         schema={"li": pl.List(dtype), "x": dtype},
+        orient="row",
     )
     res = df.select(pl.col("li").list.contains(pl.col("x")))
     expected_df = pl.DataFrame({"li": [False, True, True, False, False]})
@@ -371,6 +372,7 @@ def test_cat_list_is_in_from_str() -> None:
             (["a"], "d"),
         ],
         schema={"li": pl.List(pl.Categorical), "x": pl.String},
+        orient="row",
     )
     res = df.select(pl.col("li").list.contains(pl.col("x")))
     expected_df = pl.DataFrame({"li": [False, True, True, False, False]})

diff --git a/py-polars/tests/unit/operations/test_pivot.py b/py-polars/tests/unit/operations/test_pivot.py
@@ -31,6 +31,7 @@ def test_pivot() -> None:
             ("C", None, None, None, None, 2),
         ],
         schema=["foo", "k", "l", "m", "n", "o"],
+        orient="row",
     )
     assert_frame_equal(result, expected)
 

diff --git a/py-polars/tests/unit/operations/test_shift.py b/py-polars/tests/unit/operations/test_shift.py
@@ -111,6 +111,7 @@ def test_shift_fill_value_group_logicals() -> None:
             (date(2001, 1, 4), "B"),
         ],
         schema=["d", "s"],
+        orient="row",
     )
     result = df.select(pl.col("d").shift(fill_value=pl.col("d").max(), n=-1).over("s"))
 

diff --git a/py-polars/tests/unit/sql/test_temporal.py b/py-polars/tests/unit/sql/test_temporal.py
@@ -174,6 +174,7 @@ def test_extract_century_millennium(dt: date, expected: list[int]) -> None:
             right=pl.DataFrame(
                 data=[expected + expected],
                 schema=["c1", "c2", "c3", "c4"],
+                orient="row",
             ).cast(pl.Int32),
         )
 

diff --git a/py-polars/tests/unit/test_api.py b/py-polars/tests/unit/test_api.py
@@ -93,12 +93,15 @@ def by_column_dtypes(self) -> list[pl.LazyFrame]:
 
     df1, df2 = (d.collect() for d in ldf.split.by_column_dtypes())  # type: ignore[attr-defined]
     assert_frame_equal(
-        df1, pl.DataFrame([("xx",), ("xy",), ("yy",), ("yz",)], schema=["a1"])
+        df1,
+        pl.DataFrame([("xx",), ("xy",), ("yy",), ("yz",)], schema=["a1"], orient="row"),
     )
     assert_frame_equal(
         df2,
         pl.DataFrame(
-            [(2, 3, 4), (4, 5, 6), (5, 6, 7), (6, 7, 8)], schema=["a2", "b1", "b2"]
+            [(2, 3, 4), (4, 5, 6), (5, 6, 7), (6, 7, 8)],
+            schema=["a2", "b1", "b2"],
+            orient="row",
         ),
     )
 

diff --git a/py-polars/tests/unit/test_cse.py b/py-polars/tests/unit/test_cse.py
@@ -242,6 +242,7 @@ def test_windows_cse_excluded() -> None:
             ("b", "qqq", 0),
         ],
         schema=["a", "b", "c"],
+        orient="row",
     )
 
     result = lf.select(

diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py
@@ -157,6 +157,7 @@ def test_group_by_agg_equals_zero_3535() -> None:
             ("val1", pl.Int16),
             ("val2", pl.Float32),
         ],
+        orient="row",
     )
     # group by the key, aggregating the two numeric cols
     assert df.group_by(pl.col("key"), maintain_order=True).agg(

diff --git a/py-polars/tests/unit/test_rows.py b/py-polars/tests/unit/test_rows.py
@@ -234,10 +234,7 @@ def test_iter_rows() -> None:
     ],
 )
 def test_row_constructor_schema(primitive: pl.DataType) -> None:
-    result = pl.DataFrame(
-        data=[[1], [2], [3]],
-        schema={"d": primitive},
-    )
+    result = pl.DataFrame(data=[[1], [2], [3]], schema={"d": primitive}, orient="row")
 
     assert result.dtypes == [primitive]
     assert result.to_dict(as_series=False) == {"d": [1, 2, 3]}
@@ -246,7 +243,6 @@ def test_row_constructor_schema(primitive: pl.DataType) -> None:
 def test_row_constructor_uint64() -> None:
     # validate init with a valid UInt64 that exceeds Int64 upper bound
     df = pl.DataFrame(
-        data=[[0], [int(2**63) + 1]],
-        schema={"x": pl.UInt64},
+        data=[[0], [int(2**63) + 1]], schema={"x": pl.UInt64}, orient="row"
     )
     assert df.rows() == [(0,), (9223372036854775809,)]
diff --git a/py-polars/tests/unit/test_string_cache.py b/py-polars/tests/unit/test_string_cache.py
@@ -180,6 +180,7 @@ def test_string_cache_eager_lazy() -> None:
         ):
             df3 = pl.DataFrame(  # type: ignore[arg-type]
                 data=[["reg1"], ["reg2"], ["reg3"], ["reg4"], ["reg5"]],
+                orient="row",
                 **params,
             )
             assert_frame_equal(df1, df3)