From 834867d427bba1e53579fbab7dce940118d3dffb Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Tue, 11 Jun 2024 09:21:28 +0200
Subject: [PATCH 1/3] Move existing tests

---
 .../tests/unit/{ => datatypes}/test_schema.py | 23 -------------------
 .../unit/functions/as_datatype/test_struct.py | 19 +++++++++++++++
 .../operations/namespaces/list/test_list.py   |  5 ++++
 3 files changed, 24 insertions(+), 23 deletions(-)
 rename py-polars/tests/unit/{ => datatypes}/test_schema.py (97%)
 create mode 100644 py-polars/tests/unit/functions/as_datatype/test_struct.py

diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/datatypes/test_schema.py
similarity index 97%
rename from py-polars/tests/unit/test_schema.py
rename to py-polars/tests/unit/datatypes/test_schema.py
index fd6b154f06cf..d85beeba346d 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/datatypes/test_schema.py
@@ -667,29 +667,6 @@ def test_alias_prune_in_fold_15438() -> None:
     assert_frame_equal(df, expected)
 
 
-def test_resolved_names_15442() -> None:
-    df = pl.DataFrame(
-        {
-            "x": [206.0],
-            "y": [225.0],
-        }
-    )
-    center = pl.struct(
-        x=pl.col("x"),
-        y=pl.col("y"),
-    )
-
-    left = 0
-    right = 1000
-    in_x = (left < center.struct.field("x")) & (center.struct.field("x") <= right)
-    assert df.lazy().filter(in_x).collect().shape == (1, 2)
-
-
-def test_list_sum_bool_schema() -> None:
-    q = pl.LazyFrame({"x": [[True, True, False]]})
-    assert q.select(pl.col("x").list.sum()).schema["x"] == pl.UInt32
-
-
 @pytest.mark.parametrize("op", ["and_", "or_"])
 def test_bitwise_integral_schema(op: str) -> None:
     df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
diff --git a/py-polars/tests/unit/functions/as_datatype/test_struct.py b/py-polars/tests/unit/functions/as_datatype/test_struct.py
new file mode 100644
index 000000000000..beeff23fb24c
--- /dev/null
+++ b/py-polars/tests/unit/functions/as_datatype/test_struct.py
@@ -0,0 +1,19 @@
+import polars as pl
+
+
+def test_resolved_names_15442() -> None:
+    df = pl.DataFrame(
+        {
+            "x": [206.0],
+            "y": [225.0],
+        }
+    )
+    center = pl.struct(
+        x=pl.col("x"),
+        y=pl.col("y"),
+    )
+
+    left = 0
+    right = 1000
+    in_x = (left < center.struct.field("x")) & (center.struct.field("x") <= right)
+    assert df.lazy().filter(in_x).collect().shape == (1, 2)
diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py
index 0b86f7eaca7f..86ed6b719f13 100644
--- a/py-polars/tests/unit/operations/namespaces/list/test_list.py
+++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py
@@ -896,3 +896,8 @@ def test_list_eval_err_raise_15653() -> None:
     df = pl.DataFrame({"foo": [[]]})
     with pytest.raises(pl.StructFieldNotFoundError):
         df.with_columns(bar=pl.col("foo").list.eval(pl.element().struct.field("baz")))
+
+
+def test_list_sum_bool_schema() -> None:
+    q = pl.LazyFrame({"x": [[True, True, False]]})
+    assert q.select(pl.col("x").list.sum()).schema["x"] == pl.UInt32

From 44d2ec3ffb9b565a0c1e5e8213aa11652200baba Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Tue, 11 Jun 2024 13:59:31 +0200
Subject: [PATCH 2/3] Move more tests

---
 .../tests/unit/constructors/test_dataframe.py |  34 +-
 .../tests/unit/constructors/test_series.py    |   6 +
 py-polars/tests/unit/datatypes/test_schema.py | 447 +-----------------
 py-polars/tests/unit/functions/test_concat.py |  39 ++
 py-polars/tests/unit/lazyframe/test_rename.py |   8 +
 .../operations/aggregation/test_horizontal.py |  33 ++
 .../operations/arithmetic/test_arithmetic.py  |  27 ++
 .../unit/operations/arithmetic/test_pow.py    |  59 +++
 .../unit/operations/namespaces/test_struct.py |  12 +
 py-polars/tests/unit/operations/test_cast.py  |  17 +
 .../tests/unit/operations/test_comparison.py  |   6 +
 py-polars/tests/unit/operations/test_diff.py  |  12 +
 .../tests/unit/operations/test_fill_null.py   |  37 ++
 .../tests/unit/operations/test_group_by.py    |  69 +++
 .../unit/operations/test_shrink_dtype.py      |  46 ++
 py-polars/tests/unit/test_convert.py          |  40 ++
 16 files changed, 445 insertions(+), 447 deletions(-)
 create mode 100644 py-polars/tests/unit/lazyframe/test_rename.py
 create mode 100644 py-polars/tests/unit/operations/arithmetic/test_pow.py
 create mode 100644 py-polars/tests/unit/operations/test_diff.py
 create mode 100644 py-polars/tests/unit/operations/test_fill_null.py
 create mode 100644 py-polars/tests/unit/operations/test_shrink_dtype.py
 create mode 100644 py-polars/tests/unit/test_convert.py

diff --git a/py-polars/tests/unit/constructors/test_dataframe.py b/py-polars/tests/unit/constructors/test_dataframe.py
index 9a03d3a2f2eb..9de21d42bbbe 100644
--- a/py-polars/tests/unit/constructors/test_dataframe.py
+++ b/py-polars/tests/unit/constructors/test_dataframe.py
@@ -2,7 +2,7 @@
 
 import sys
 from collections import OrderedDict
-from typing import Any
+from typing import Any, Iterator, Mapping
 
 import pytest
 
@@ -159,3 +159,35 @@ def test_unit_and_empty_construction_15896() -> None:
             A=pl.int_range("A"),  # creates empty series
         )
     )
+
+
+class CustomSchema(Mapping[str, Any]):
+    """Dummy schema object for testing compatibility with Mapping."""
+
+    _entries: dict[str, Any]
+
+    def __init__(self, **named_entries: Any) -> None:
+        self._items = OrderedDict(named_entries.items())
+
+    def __getitem__(self, key: str) -> Any:
+        return self._items[key]
+
+    def __len__(self) -> int:
+        return len(self._items)
+
+    def __iter__(self) -> Iterator[str]:
+        yield from self._items
+
+
+def test_custom_schema() -> None:
+    df = pl.DataFrame(schema=CustomSchema(bool=pl.Boolean, misc=pl.UInt8))
+    assert df.schema == OrderedDict([("bool", pl.Boolean), ("misc", pl.UInt8)])
+
+    with pytest.raises(ValueError):
+        pl.DataFrame(schema=CustomSchema(bool="boolean", misc="unsigned int"))
+
+
+def test_list_null_constructor_schema() -> None:
+    expected = pl.List(pl.Null)
+    assert pl.DataFrame({"a": [[]]}).dtypes[0] == expected
+    assert pl.DataFrame(schema={"a": pl.List}).dtypes[0] == expected
diff --git a/py-polars/tests/unit/constructors/test_series.py b/py-polars/tests/unit/constructors/test_series.py
index 464f6b494712..fd6dc683bda8 100644
--- a/py-polars/tests/unit/constructors/test_series.py
+++ b/py-polars/tests/unit/constructors/test_series.py
@@ -148,3 +148,9 @@ def test_series_init_np_2d_zero_zero_shape() -> None:
         match=re.escape("cannot reshape empty array into shape (0, 0)"),
     ):
         pl.Series(arr)
+
+
+def test_list_null_constructor_schema() -> None:
+    expected = pl.List(pl.Null)
+    assert pl.Series([[]]).dtype == expected
+    assert pl.Series([[]], dtype=pl.List).dtype == expected
diff --git a/py-polars/tests/unit/datatypes/test_schema.py b/py-polars/tests/unit/datatypes/test_schema.py
index d85beeba346d..ab2260d71dbe 100644
--- a/py-polars/tests/unit/datatypes/test_schema.py
+++ b/py-polars/tests/unit/datatypes/test_schema.py
@@ -1,186 +1,11 @@
 from __future__ import annotations
 
-from collections import OrderedDict
 from datetime import date, timedelta
-from typing import TYPE_CHECKING, Any, Iterator, Mapping
 
 import pytest
 
 import polars as pl
-from polars.testing import assert_frame_equal, assert_series_equal
-
-if TYPE_CHECKING:
-    from polars.type_aliases import PolarsDataType
-
-
-class CustomSchema(Mapping[str, Any]):
-    """Dummy schema object for testing compatibility with Mapping."""
-
-    _entries: dict[str, Any]
-
-    def __init__(self, **named_entries: Any) -> None:
-        self._items = OrderedDict(named_entries.items())
-
-    def __getitem__(self, key: str) -> Any:
-        return self._items[key]
-
-    def __len__(self) -> int:
-        return len(self._items)
-
-    def __iter__(self) -> Iterator[str]:
-        yield from self._items
-
-
-def test_custom_schema() -> None:
-    df = pl.DataFrame(schema=CustomSchema(bool=pl.Boolean, misc=pl.UInt8))
-    assert df.schema == OrderedDict([("bool", pl.Boolean), ("misc", pl.UInt8)])
-
-    with pytest.raises(ValueError):
-        pl.DataFrame(schema=CustomSchema(bool="boolean", misc="unsigned int"))
-
-
-def test_schema_on_agg() -> None:
-    df = pl.DataFrame({"a": ["x", "x", "y", "n"], "b": [1, 2, 3, 4]})
-
-    assert (
-        df.lazy()
-        .group_by("a")
-        .agg(
-            [
-                pl.col("b").min().alias("min"),
-                pl.col("b").max().alias("max"),
-                pl.col("b").sum().alias("sum"),
-                pl.col("b").first().alias("first"),
-                pl.col("b").last().alias("last"),
-            ]
-        )
-    ).schema == {
-        "a": pl.String,
-        "min": pl.Int64,
-        "max": pl.Int64,
-        "sum": pl.Int64,
-        "first": pl.Int64,
-        "last": pl.Int64,
-    }
-
-
-def test_fill_null_minimal_upcast_4056() -> None:
-    df = pl.DataFrame({"a": [-1, 2, None]})
-    df = df.with_columns(pl.col("a").cast(pl.Int8))
-    assert df.with_columns(pl.col(pl.Int8).fill_null(-1)).dtypes[0] == pl.Int8
-    assert df.with_columns(pl.col(pl.Int8).fill_null(-1000)).dtypes[0] == pl.Int16
-
-
-def test_fill_enum_upcast() -> None:
-    dtype = pl.Enum(["a", "b"])
-    s = pl.Series(["a", "b", None], dtype=dtype)
-    s_filled = s.fill_null("b")
-    expected = pl.Series(["a", "b", "b"], dtype=dtype)
-    assert s_filled.dtype == dtype
-    assert_series_equal(s_filled, expected)
-
-
-def test_pow_dtype() -> None:
-    df = pl.DataFrame(
-        {
-            "foo": [1, 2, 3, 4, 5],
-            "a": [1, 2, 3, 4, 5],
-            "b": [1, 2, 3, 4, 5],
-            "c": [1, 2, 3, 4, 5],
-            "d": [1, 2, 3, 4, 5],
-            "e": [1, 2, 3, 4, 5],
-            "f": [1, 2, 3, 4, 5],
-            "g": [1, 2, 1, 2, 1],
-            "h": [1, 2, 1, 2, 1],
-        },
-        schema_overrides={
-            "a": pl.Int64,
-            "b": pl.UInt64,
-            "c": pl.Int32,
-            "d": pl.UInt32,
-            "e": pl.Int16,
-            "f": pl.UInt16,
-            "g": pl.Int8,
-            "h": pl.UInt8,
-        },
-    ).lazy()
-
-    df = (
-        df.with_columns([pl.col("foo").cast(pl.UInt32)])
-        .with_columns(
-            (pl.col("foo") * 2**2).alias("scaled_foo"),
-            (pl.col("foo") * 2**2.1).alias("scaled_foo2"),
-            (pl.col("a") ** pl.col("h")).alias("a_pow_h"),
-            (pl.col("b") ** pl.col("h")).alias("b_pow_h"),
-            (pl.col("c") ** pl.col("h")).alias("c_pow_h"),
-            (pl.col("d") ** pl.col("h")).alias("d_pow_h"),
-            (pl.col("e") ** pl.col("h")).alias("e_pow_h"),
-            (pl.col("f") ** pl.col("h")).alias("f_pow_h"),
-            (pl.col("g") ** pl.col("h")).alias("g_pow_h"),
-            (pl.col("h") ** pl.col("h")).alias("h_pow_h"),
-        )
-        .drop(["a", "b", "c", "d", "e", "f", "g", "h"])
-    )
-    expected = [
-        pl.UInt32,
-        pl.UInt32,
-        pl.Float64,
-        pl.Int64,
-        pl.UInt64,
-        pl.Int32,
-        pl.UInt32,
-        pl.Int16,
-        pl.UInt16,
-        pl.Int8,
-        pl.UInt8,
-    ]
-    assert df.collect().dtypes == expected
-    assert df.dtypes == expected
-
-
-def test_bool_numeric_supertype() -> None:
-    df = pl.DataFrame({"v": [1, 2, 3, 4, 5, 6]})
-    for dt in [
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-    ]:
-        assert (
-            df.select([(pl.col("v") < 3).sum().cast(dt) / pl.len()]).item() - 0.3333333
-            <= 0.00001
-        )
-
-
-def test_from_dicts_nested_nulls() -> None:
-    assert pl.from_dicts([{"a": [None, None]}, {"a": [1, 2]}]).to_dict(
-        as_series=False
-    ) == {"a": [[None, None], [1, 2]]}
-
-
-def test_group_schema_err() -> None:
-    df = pl.DataFrame({"foo": [None, 1, 2], "bar": [1, 2, 3]}).lazy()
-    with pytest.raises(pl.ColumnNotFoundError):
-        df.group_by("not-existent").agg(pl.col("bar").max().alias("max_bar")).schema
-
-
-def test_schema_inference_from_rows() -> None:
-    # these have to upcast to float
-    result = pl.from_records([[1, 2.1, 3], [4, 5, 6.4]])
-    assert result.to_dict(as_series=False) == {
-        "column_0": [1.0, 2.1, 3.0],
-        "column_1": [4.0, 5.0, 6.4],
-    }
-
-    result = pl.from_dicts([{"a": 1, "b": 2}, {"a": 3.1, "b": 4.5}])
-    assert result.to_dict(as_series=False) == {
-        "a": [1.0, 3.1],
-        "b": [2.0, 4.5],
-    }
+from polars.testing import assert_frame_equal
 
 
 def test_lazy_map_schema() -> None:
@@ -264,114 +89,6 @@ def test_fold_all_schema() -> None:
     assert result.dtypes == [pl.UInt64]
 
 
-def test_fill_null_static_schema_4843() -> None:
-    df1 = pl.DataFrame(
-        {
-            "a": [1, 2, None],
-            "b": [1, None, 4],
-        }
-    ).lazy()
-
-    df2 = df1.select([pl.col(pl.Int64).fill_null(0)])
-    df3 = df2.select(pl.col(pl.Int64))
-    assert df3.schema == {"a": pl.Int64, "b": pl.Int64}
-
-
-def test_shrink_dtype() -> None:
-    out = pl.DataFrame(
-        {
-            "a": [1, 2, 3],
-            "b": [1, 2, 2 << 32],
-            "c": [-1, 2, 1 << 30],
-            "d": [-112, 2, 112],
-            "e": [-112, 2, 129],
-            "f": ["a", "b", "c"],
-            "g": [0.1, 1.32, 0.12],
-            "h": [True, None, False],
-            "i": pl.Series([None, None, None], dtype=pl.UInt64),
-            "j": pl.Series([None, None, None], dtype=pl.Int64),
-            "k": pl.Series([None, None, None], dtype=pl.Float64),
-        }
-    ).select(pl.all().shrink_dtype())
-    assert out.dtypes == [
-        pl.Int8,
-        pl.Int64,
-        pl.Int32,
-        pl.Int8,
-        pl.Int16,
-        pl.String,
-        pl.Float32,
-        pl.Boolean,
-        pl.UInt8,
-        pl.Int8,
-        pl.Float32,
-    ]
-
-    assert out.to_dict(as_series=False) == {
-        "a": [1, 2, 3],
-        "b": [1, 2, 8589934592],
-        "c": [-1, 2, 1073741824],
-        "d": [-112, 2, 112],
-        "e": [-112, 2, 129],
-        "f": ["a", "b", "c"],
-        "g": [0.10000000149011612, 1.3200000524520874, 0.11999999731779099],
-        "h": [True, None, False],
-        "i": [None, None, None],
-        "j": [None, None, None],
-        "k": [None, None, None],
-    }
-
-
-def test_diff_duration_dtype() -> None:
-    data = ["2022-01-01", "2022-01-02", "2022-01-03", "2022-01-03"]
-    df = pl.Series("date", data).str.to_date("%Y-%m-%d").to_frame()
-
-    result = df.select(pl.col("date").diff() < pl.duration(days=1))
-
-    expected = pl.Series("date", [None, False, False, True]).to_frame()
-    assert_frame_equal(result, expected)
-
-
-def test_schema_owned_arithmetic_5669() -> None:
-    df = (
-        pl.DataFrame({"A": [1, 2, 3]})
-        .lazy()
-        .filter(pl.col("A") >= 3)
-        .with_columns(-pl.col("A").alias("B"))
-        .collect()
-    )
-    assert df.columns == ["A", "B"]
-    assert df.rows() == [(3, -3)]
-
-
-def test_fill_null_f32_with_lit() -> None:
-    # ensure the literal integer does not upcast the f32 to an f64
-    df = pl.DataFrame({"a": [1.1, 1.2]}, schema=[("a", pl.Float32)])
-    assert df.fill_null(value=0).dtypes == [pl.Float32]
-
-
-def test_lazy_rename() -> None:
-    df = pl.DataFrame({"x": [1], "y": [2]})
-
-    assert (
-        df.lazy().rename({"y": "x", "x": "y"}).select(["x", "y"]).collect()
-    ).to_dict(as_series=False) == {"x": [2], "y": [1]}
-
-
-def test_all_null_cast_5826() -> None:
-    df = pl.DataFrame(data=[pl.Series("a", [None], dtype=pl.String)])
-    out = df.with_columns(pl.col("a").cast(pl.Boolean))
-    assert out.dtypes == [pl.Boolean]
-    assert out.item() is None
-
-
-def test_empty_list_eval_schema_5734() -> None:
-    df = pl.DataFrame({"a": [[{"b": 1, "c": 2}]]})
-    assert df.filter(False).select(
-        pl.col("a").list.eval(pl.element().struct.field("b"))
-    ).schema == {"a": pl.List(pl.Int64)}
-
-
 def test_list_eval_type_cast_11188() -> None:
     df = pl.DataFrame(
         [
@@ -384,27 +101,6 @@ def test_list_eval_type_cast_11188() -> None:
     ).schema == {"a_str": pl.List(pl.String)}
 
 
-def test_schema_true_divide_6643() -> None:
-    df = pl.DataFrame({"a": [1]})
-    a = pl.col("a")
-    assert df.lazy().select(a / 2).select(pl.col(pl.Int64)).collect().shape == (0, 0)
-
-
-def test_from_dicts_all_cols_6716() -> None:
-    dicts = [{"a": None} for _ in range(20)] + [{"a": "crash"}]
-
-    with pytest.raises(
-        pl.ComputeError, match="make sure that all rows have the same schema"
-    ):
-        pl.from_dicts(dicts, infer_schema_length=20)
-    assert pl.from_dicts(dicts, infer_schema_length=None).dtypes == [pl.String]
-
-
-def test_from_dicts_empty() -> None:
-    with pytest.raises(pl.NoDataError, match="no data, cannot infer schema"):
-        pl.from_dicts([])
-
-
 def test_duration_division_schema() -> None:
     df = pl.DataFrame({"a": [1]})
     q = (
@@ -472,101 +168,6 @@ def sub_col_min(column: str, min_column: str) -> pl.Expr:
     ]
 
 
-@pytest.mark.parametrize(
-    ("data", "expr", "expected_select", "expected_gb"),
-    [
-        (
-            {"x": ["x"], "y": ["y"]},
-            pl.coalesce(pl.col("x"), pl.col("y")),
-            {"x": pl.String},
-            {"x": pl.List(pl.String)},
-        ),
-        (
-            {"x": [True]},
-            pl.col("x").sum(),
-            {"x": pl.UInt32},
-            {"x": pl.UInt32},
-        ),
-        (
-            {"a": [[1, 2]]},
-            pl.col("a").list.sum(),
-            {"a": pl.Int64},
-            {"a": pl.List(pl.Int64)},
-        ),
-    ],
-)
-def test_schemas(
-    data: dict[str, list[Any]],
-    expr: pl.Expr,
-    expected_select: dict[str, pl.PolarsDataType],
-    expected_gb: dict[str, pl.PolarsDataType],
-) -> None:
-    df = pl.DataFrame(data)
-
-    # test selection schema
-    schema = df.select(expr).schema
-    for key, dtype in expected_select.items():
-        assert schema[key] == dtype
-
-    # test group_by schema
-    schema = df.group_by(pl.lit(1)).agg(expr).schema
-    for key, dtype in expected_gb.items():
-        assert schema[key] == dtype
-
-
-def test_list_null_constructor_schema() -> None:
-    expected = pl.List(pl.Null)
-    assert pl.Series([[]]).dtype == expected
-    assert pl.Series([[]], dtype=pl.List).dtype == expected
-    assert pl.DataFrame({"a": [[]]}).dtypes[0] == expected
-    assert pl.DataFrame(schema={"a": pl.List}).dtypes[0] == expected
-
-
-def test_schema_ne_missing_9256() -> None:
-    df = pl.DataFrame({"a": [0, 1, None], "b": [True, False, True]})
-
-    assert df.select(pl.col("a").ne_missing(0).or_(pl.col("b")))["a"].all()
-
-
-def test_concat_vertically_relaxed() -> None:
-    a = pl.DataFrame(
-        data={"a": [1, 2, 3], "b": [True, False, None]},
-        schema={"a": pl.Int8, "b": pl.Boolean},
-    )
-    b = pl.DataFrame(
-        data={"a": [43, 2, 3], "b": [32, 1, None]},
-        schema={"a": pl.Int16, "b": pl.Int64},
-    )
-    out = pl.concat([a, b], how="vertical_relaxed")
-    assert out.schema == {"a": pl.Int16, "b": pl.Int64}
-    assert out.to_dict(as_series=False) == {
-        "a": [1, 2, 3, 43, 2, 3],
-        "b": [1, 0, None, 32, 1, None],
-    }
-    out = pl.concat([b, a], how="vertical_relaxed")
-    assert out.schema == {"a": pl.Int16, "b": pl.Int64}
-    assert out.to_dict(as_series=False) == {
-        "a": [43, 2, 3, 1, 2, 3],
-        "b": [32, 1, None, 1, 0, None],
-    }
-
-    c = pl.DataFrame({"a": [1, 2], "b": [2, 1]})
-    d = pl.DataFrame({"a": [1.0, 0.2], "b": [None, 0.1]})
-
-    out = pl.concat([c, d], how="vertical_relaxed")
-    assert out.schema == {"a": pl.Float64, "b": pl.Float64}
-    assert out.to_dict(as_series=False) == {
-        "a": [1.0, 2.0, 1.0, 0.2],
-        "b": [2.0, 1.0, None, 0.1],
-    }
-    out = pl.concat([d, c], how="vertical_relaxed")
-    assert out.schema == {"a": pl.Float64, "b": pl.Float64}
-    assert out.to_dict(as_series=False) == {
-        "a": [1.0, 0.2, 1.0, 2.0],
-        "b": [None, 0.1, 2.0, 1.0],
-    }
-
-
 def test_lit_iter_schema() -> None:
     df = pl.DataFrame(
         {
@@ -602,52 +203,6 @@ def test_nested_binary_literal_super_type_12227() -> None:
     )
 
 
-def test_literal_subtract_schema_13284() -> None:
-    assert (
-        pl.LazyFrame({"a": [23, 30]}, schema={"a": pl.UInt8})
-        .with_columns(pl.col("a") - pl.lit(1))
-        .group_by("a")
-        .len()
-    ).schema == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
-
-
-def test_schema_boolean_sum_horizontal() -> None:
-    lf = pl.LazyFrame({"a": [True, False]}).select(pl.sum_horizontal("a"))
-    assert lf.schema == OrderedDict([("a", pl.UInt32)])
-
-
-@pytest.mark.parametrize(
-    ("in_dtype", "out_dtype"),
-    [
-        (pl.Boolean, pl.Float64),
-        (pl.UInt8, pl.Float64),
-        (pl.UInt16, pl.Float64),
-        (pl.UInt32, pl.Float64),
-        (pl.UInt64, pl.Float64),
-        (pl.Int8, pl.Float64),
-        (pl.Int16, pl.Float64),
-        (pl.Int32, pl.Float64),
-        (pl.Int64, pl.Float64),
-        (pl.Float32, pl.Float32),
-        (pl.Float64, pl.Float64),
-    ],
-)
-def test_schema_mean_horizontal_single_column(
-    in_dtype: PolarsDataType,
-    out_dtype: PolarsDataType,
-) -> None:
-    lf = pl.LazyFrame({"a": pl.Series([1, 0], dtype=in_dtype)}).select(
-        pl.mean_horizontal(pl.all())
-    )
-
-    assert lf.schema == OrderedDict([("a", out_dtype)])
-
-
-def test_struct_alias_prune_15401() -> None:
-    df = pl.DataFrame({"a": []}, schema={"a": pl.Struct({"b": pl.Int8})})
-    assert df.select(pl.col("a").alias("c").struct.field("b")).columns == ["b"]
-
-
 def test_alias_prune_in_fold_15438() -> None:
     df = pl.DataFrame({"x": [1, 2], "expected_result": ["first", "second"]}).select(
         actual_result=pl.fold(
diff --git a/py-polars/tests/unit/functions/test_concat.py b/py-polars/tests/unit/functions/test_concat.py
index dacd997d49f7..8e7c4c9f31e3 100644
--- a/py-polars/tests/unit/functions/test_concat.py
+++ b/py-polars/tests/unit/functions/test_concat.py
@@ -20,3 +20,42 @@ def test_concat_lf_stack_overflow() -> None:
     for i in range(n):
         bar = pl.concat([bar, pl.DataFrame({"a": i}).lazy()])
     assert bar.collect().shape == (1001, 1)
+
+
+def test_concat_vertically_relaxed() -> None:
+    a = pl.DataFrame(
+        data={"a": [1, 2, 3], "b": [True, False, None]},
+        schema={"a": pl.Int8, "b": pl.Boolean},
+    )
+    b = pl.DataFrame(
+        data={"a": [43, 2, 3], "b": [32, 1, None]},
+        schema={"a": pl.Int16, "b": pl.Int64},
+    )
+    out = pl.concat([a, b], how="vertical_relaxed")
+    assert out.schema == {"a": pl.Int16, "b": pl.Int64}
+    assert out.to_dict(as_series=False) == {
+        "a": [1, 2, 3, 43, 2, 3],
+        "b": [1, 0, None, 32, 1, None],
+    }
+    out = pl.concat([b, a], how="vertical_relaxed")
+    assert out.schema == {"a": pl.Int16, "b": pl.Int64}
+    assert out.to_dict(as_series=False) == {
+        "a": [43, 2, 3, 1, 2, 3],
+        "b": [32, 1, None, 1, 0, None],
+    }
+
+    c = pl.DataFrame({"a": [1, 2], "b": [2, 1]})
+    d = pl.DataFrame({"a": [1.0, 0.2], "b": [None, 0.1]})
+
+    out = pl.concat([c, d], how="vertical_relaxed")
+    assert out.schema == {"a": pl.Float64, "b": pl.Float64}
+    assert out.to_dict(as_series=False) == {
+        "a": [1.0, 2.0, 1.0, 0.2],
+        "b": [2.0, 1.0, None, 0.1],
+    }
+    out = pl.concat([d, c], how="vertical_relaxed")
+    assert out.schema == {"a": pl.Float64, "b": pl.Float64}
+    assert out.to_dict(as_series=False) == {
+        "a": [1.0, 0.2, 1.0, 2.0],
+        "b": [None, 0.1, 2.0, 1.0],
+    }
diff --git a/py-polars/tests/unit/lazyframe/test_rename.py b/py-polars/tests/unit/lazyframe/test_rename.py
new file mode 100644
index 000000000000..2a7462d1edb2
--- /dev/null
+++ b/py-polars/tests/unit/lazyframe/test_rename.py
@@ -0,0 +1,8 @@
+import polars as pl
+
+
+def test_lazy_rename() -> None:
+    df = pl.DataFrame({"x": [1], "y": [2]})
+
+    result = df.lazy().rename({"y": "x", "x": "y"}).select(["x", "y"])
+    assert result.collect().to_dict(as_series=False) == {"x": [2], "y": [1]}
diff --git a/py-polars/tests/unit/operations/aggregation/test_horizontal.py b/py-polars/tests/unit/operations/aggregation/test_horizontal.py
index 0596b73417d2..4d474367af02 100644
--- a/py-polars/tests/unit/operations/aggregation/test_horizontal.py
+++ b/py-polars/tests/unit/operations/aggregation/test_horizontal.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import datetime
+from collections import OrderedDict
 from typing import Any
 
 import pytest
@@ -407,3 +408,35 @@ def test_mean_horizontal_all_null() -> None:
 
     expected = pl.LazyFrame({"a": [1.5, None]}, schema={"a": pl.Float64})
     assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    ("in_dtype", "out_dtype"),
+    [
+        (pl.Boolean, pl.Float64),
+        (pl.UInt8, pl.Float64),
+        (pl.UInt16, pl.Float64),
+        (pl.UInt32, pl.Float64),
+        (pl.UInt64, pl.Float64),
+        (pl.Int8, pl.Float64),
+        (pl.Int16, pl.Float64),
+        (pl.Int32, pl.Float64),
+        (pl.Int64, pl.Float64),
+        (pl.Float32, pl.Float32),
+        (pl.Float64, pl.Float64),
+    ],
+)
+def test_schema_mean_horizontal_single_column(
+    in_dtype: pl.PolarsDataType,
+    out_dtype: pl.PolarsDataType,
+) -> None:
+    lf = pl.LazyFrame({"a": pl.Series([1, 0], dtype=in_dtype)}).select(
+        pl.mean_horizontal(pl.all())
+    )
+
+    assert lf.schema == OrderedDict([("a", out_dtype)])
+
+
+def test_schema_boolean_sum_horizontal() -> None:
+    lf = pl.LazyFrame({"a": [True, False]}).select(pl.sum_horizontal("a"))
+    assert lf.schema == OrderedDict([("a", pl.UInt32)])
diff --git a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
index 4097f304c082..1973f0483eae 100644
--- a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
+++ b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
@@ -1,4 +1,5 @@
 import operator
+from collections import OrderedDict
 from datetime import date, datetime, timedelta
 from typing import Any
 
@@ -583,3 +584,29 @@ def test_array_arithmetic_same_size(expected: Any, expr: pl.Expr) -> None:
         df.select(expr),
         pl.Series("a", expected).to_frame(),
     )
+
+
+def test_schema_owned_arithmetic_5669() -> None:
+    df = (
+        pl.LazyFrame({"A": [1, 2, 3]})
+        .filter(pl.col("A") >= 3)
+        .with_columns(-pl.col("A").alias("B"))
+        .collect()
+    )
+    assert df.columns == ["A", "B"]
+    assert df.rows() == [(3, -3)]
+
+
+def test_schema_true_divide_6643() -> None:
+    df = pl.DataFrame({"a": [1]})
+    a = pl.col("a")
+    assert df.lazy().select(a / 2).select(pl.col(pl.Int64)).collect().shape == (0, 0)
+
+
+def test_literal_subtract_schema_13284() -> None:
+    assert (
+        pl.LazyFrame({"a": [23, 30]}, schema={"a": pl.UInt8})
+        .with_columns(pl.col("a") - pl.lit(1))
+        .group_by("a")
+        .len()
+    ).schema == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
diff --git a/py-polars/tests/unit/operations/arithmetic/test_pow.py b/py-polars/tests/unit/operations/arithmetic/test_pow.py
new file mode 100644
index 000000000000..cd606dce8afa
--- /dev/null
+++ b/py-polars/tests/unit/operations/arithmetic/test_pow.py
@@ -0,0 +1,59 @@
+import polars as pl
+
+
+def test_pow_dtype() -> None:
+    df = pl.DataFrame(
+        {
+            "foo": [1, 2, 3, 4, 5],
+            "a": [1, 2, 3, 4, 5],
+            "b": [1, 2, 3, 4, 5],
+            "c": [1, 2, 3, 4, 5],
+            "d": [1, 2, 3, 4, 5],
+            "e": [1, 2, 3, 4, 5],
+            "f": [1, 2, 3, 4, 5],
+            "g": [1, 2, 1, 2, 1],
+            "h": [1, 2, 1, 2, 1],
+        },
+        schema_overrides={
+            "a": pl.Int64,
+            "b": pl.UInt64,
+            "c": pl.Int32,
+            "d": pl.UInt32,
+            "e": pl.Int16,
+            "f": pl.UInt16,
+            "g": pl.Int8,
+            "h": pl.UInt8,
+        },
+    ).lazy()
+
+    df = (
+        df.with_columns([pl.col("foo").cast(pl.UInt32)])
+        .with_columns(
+            (pl.col("foo") * 2**2).alias("scaled_foo"),
+            (pl.col("foo") * 2**2.1).alias("scaled_foo2"),
+            (pl.col("a") ** pl.col("h")).alias("a_pow_h"),
+            (pl.col("b") ** pl.col("h")).alias("b_pow_h"),
+            (pl.col("c") ** pl.col("h")).alias("c_pow_h"),
+            (pl.col("d") ** pl.col("h")).alias("d_pow_h"),
+            (pl.col("e") ** pl.col("h")).alias("e_pow_h"),
+            (pl.col("f") ** pl.col("h")).alias("f_pow_h"),
+            (pl.col("g") ** pl.col("h")).alias("g_pow_h"),
+            (pl.col("h") ** pl.col("h")).alias("h_pow_h"),
+        )
+        .drop(["a", "b", "c", "d", "e", "f", "g", "h"])
+    )
+    expected = [
+        pl.UInt32,
+        pl.UInt32,
+        pl.Float64,
+        pl.Int64,
+        pl.UInt64,
+        pl.Int32,
+        pl.UInt32,
+        pl.Int16,
+        pl.UInt16,
+        pl.Int8,
+        pl.UInt8,
+    ]
+    assert df.collect().dtypes == expected
+    assert df.dtypes == expected
diff --git a/py-polars/tests/unit/operations/namespaces/test_struct.py b/py-polars/tests/unit/operations/namespaces/test_struct.py
index ee4806c00188..a365b1cef1d1 100644
--- a/py-polars/tests/unit/operations/namespaces/test_struct.py
+++ b/py-polars/tests/unit/operations/namespaces/test_struct.py
@@ -83,3 +83,15 @@ def test_prefix_suffix_fields() -> None:
     assert suffix_df.schema == OrderedDict(
         [("x", pl.Struct({"a_f": pl.Int64, "b_f": pl.Int64}))]
     )
+
+
+def test_struct_alias_prune_15401() -> None:
+    df = pl.DataFrame({"a": []}, schema={"a": pl.Struct({"b": pl.Int8})})
+    assert df.select(pl.col("a").alias("c").struct.field("b")).columns == ["b"]
+
+
+def test_empty_list_eval_schema_5734() -> None:
+    df = pl.DataFrame({"a": [[{"b": 1, "c": 2}]]})
+    assert df.filter(False).select(
+        pl.col("a").list.eval(pl.element().struct.field("b"))
+    ).schema == {"a": pl.List(pl.Int64)}
diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py
index 84336573c2e6..1f9bae881bca 100644
--- a/py-polars/tests/unit/operations/test_cast.py
+++ b/py-polars/tests/unit/operations/test_cast.py
@@ -672,3 +672,20 @@ def test_invalid_inner_type_cast_list() -> None:
         match=r"cannot cast List inner type: 'Int64' to Categorical",
     ):
         s.cast(pl.List(pl.Categorical))
+
+
+def test_all_null_cast_5826() -> None:
+    df = pl.DataFrame(data=[pl.Series("a", [None], dtype=pl.String)])
+    out = df.with_columns(pl.col("a").cast(pl.Boolean))
+    assert out.dtypes == [pl.Boolean]
+    assert out.item() is None
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64, pl.Int8, pl.Int16, pl.Int32, pl.Int64],
+)
+def test_bool_numeric_supertype(dtype: pl.PolarsDataType) -> None:
+    df = pl.DataFrame({"v": [1, 2, 3, 4, 5, 6]})
+    result = df.select((pl.col("v") < 3).sum().cast(dtype) / pl.len())
+    assert result.item() - 0.3333333 <= 0.00001
diff --git a/py-polars/tests/unit/operations/test_comparison.py b/py-polars/tests/unit/operations/test_comparison.py
index 6316e39aafe2..ebbb4b4c46d5 100644
--- a/py-polars/tests/unit/operations/test_comparison.py
+++ b/py-polars/tests/unit/operations/test_comparison.py
@@ -373,3 +373,9 @@ def test_cat_compare_with_bool() -> None:
 
     with pytest.raises(pl.ComputeError, match="cannot compare categorical with bool"):
         data.filter(pl.col("col1") == True)  # noqa: E712
+
+
+def test_schema_ne_missing_9256() -> None:
+    df = pl.DataFrame({"a": [0, 1, None], "b": [True, False, True]})
+
+    assert df.select(pl.col("a").ne_missing(0).or_(pl.col("b")))["a"].all()
diff --git a/py-polars/tests/unit/operations/test_diff.py b/py-polars/tests/unit/operations/test_diff.py
new file mode 100644
index 000000000000..463716c69893
--- /dev/null
+++ b/py-polars/tests/unit/operations/test_diff.py
@@ -0,0 +1,12 @@
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+def test_diff_duration_dtype() -> None:
+    data = ["2022-01-01", "2022-01-02", "2022-01-03", "2022-01-03"]
+    df = pl.Series("date", data).str.to_date("%Y-%m-%d").to_frame()
+
+    result = df.select(pl.col("date").diff() < pl.duration(days=1))
+
+    expected = pl.Series("date", [None, False, False, True]).to_frame()
+    assert_frame_equal(result, expected)
diff --git a/py-polars/tests/unit/operations/test_fill_null.py b/py-polars/tests/unit/operations/test_fill_null.py
new file mode 100644
index 000000000000..ad2411e02d52
--- /dev/null
+++ b/py-polars/tests/unit/operations/test_fill_null.py
@@ -0,0 +1,37 @@
+import polars as pl
+from polars.testing import assert_series_equal
+
+
+def test_fill_null_minimal_upcast_4056() -> None:
+    df = pl.DataFrame({"a": [-1, 2, None]})
+    df = df.with_columns(pl.col("a").cast(pl.Int8))
+    assert df.with_columns(pl.col(pl.Int8).fill_null(-1)).dtypes[0] == pl.Int8
+    assert df.with_columns(pl.col(pl.Int8).fill_null(-1000)).dtypes[0] == pl.Int16
+
+
+def test_fill_enum_upcast() -> None:
+    dtype = pl.Enum(["a", "b"])
+    s = pl.Series(["a", "b", None], dtype=dtype)
+    s_filled = s.fill_null("b")
+    expected = pl.Series(["a", "b", "b"], dtype=dtype)
+    assert s_filled.dtype == dtype
+    assert_series_equal(s_filled, expected)
+
+
+def test_fill_null_static_schema_4843() -> None:
+    df1 = pl.DataFrame(
+        {
+            "a": [1, 2, None],
+            "b": [1, None, 4],
+        }
+    ).lazy()
+
+    df2 = df1.select([pl.col(pl.Int64).fill_null(0)])
+    df3 = df2.select(pl.col(pl.Int64))
+    assert df3.schema == {"a": pl.Int64, "b": pl.Int64}
+
+
+def test_fill_null_f32_with_lit() -> None:
+    # ensure the literal integer does not upcast the f32 to an f64
+    df = pl.DataFrame({"a": [1.1, 1.2]}, schema=[("a", pl.Float32)])
+    assert df.fill_null(value=0).dtypes == [pl.Float32]
diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py
index e95ea2703970..8f6cce5a4b95 100644
--- a/py-polars/tests/unit/operations/test_group_by.py
+++ b/py-polars/tests/unit/operations/test_group_by.py
@@ -1002,3 +1002,72 @@ def test_partitioned_group_by_chunked(partition_limit: int) -> None:
         df.group_by(gps).sum().sort("oo"),
         df.rechunk().group_by(gps, maintain_order=True).sum(),
     )
+
+
+def test_schema_on_agg() -> None:
+    lf = pl.LazyFrame({"a": ["x", "x", "y", "n"], "b": [1, 2, 3, 4]})
+
+    result = lf.group_by("a").agg(
+        pl.col("b").min().alias("min"),
+        pl.col("b").max().alias("max"),
+        pl.col("b").sum().alias("sum"),
+        pl.col("b").first().alias("first"),
+        pl.col("b").last().alias("last"),
+    )
+    expected_schema = {
+        "a": pl.String,
+        "min": pl.Int64,
+        "max": pl.Int64,
+        "sum": pl.Int64,
+        "first": pl.Int64,
+        "last": pl.Int64,
+    }
+    assert result.schema == expected_schema
+
+
+def test_group_by_schema_err() -> None:
+    lf = pl.LazyFrame({"foo": [None, 1, 2], "bar": [1, 2, 3]})
+    with pytest.raises(pl.ColumnNotFoundError):
+        lf.group_by("not-existent").agg(pl.col("bar").max().alias("max_bar")).schema
+
+
+@pytest.mark.parametrize(
+    ("data", "expr", "expected_select", "expected_gb"),
+    [
+        (
+            {"x": ["x"], "y": ["y"]},
+            pl.coalesce(pl.col("x"), pl.col("y")),
+            {"x": pl.String},
+            {"x": pl.List(pl.String)},
+        ),
+        (
+            {"x": [True]},
+            pl.col("x").sum(),
+            {"x": pl.UInt32},
+            {"x": pl.UInt32},
+        ),
+        (
+            {"a": [[1, 2]]},
+            pl.col("a").list.sum(),
+            {"a": pl.Int64},
+            {"a": pl.List(pl.Int64)},
+        ),
+    ],
+)
+def test_schemas(
+    data: dict[str, list[Any]],
+    expr: pl.Expr,
+    expected_select: dict[str, pl.PolarsDataType],
+    expected_gb: dict[str, pl.PolarsDataType],
+) -> None:
+    df = pl.DataFrame(data)
+
+    # test selection schema
+    schema = df.select(expr).schema
+    for key, dtype in expected_select.items():
+        assert schema[key] == dtype
+
+    # test group_by schema
+    schema = df.group_by(pl.lit(1)).agg(expr).schema
+    for key, dtype in expected_gb.items():
+        assert schema[key] == dtype
diff --git a/py-polars/tests/unit/operations/test_shrink_dtype.py b/py-polars/tests/unit/operations/test_shrink_dtype.py
new file mode 100644
index 000000000000..443f55814ec1
--- /dev/null
+++ b/py-polars/tests/unit/operations/test_shrink_dtype.py
@@ -0,0 +1,46 @@
+import polars as pl
+
+
+def test_shrink_dtype() -> None:
+    out = pl.DataFrame(
+        {
+            "a": [1, 2, 3],
+            "b": [1, 2, 2 << 32],
+            "c": [-1, 2, 1 << 30],
+            "d": [-112, 2, 112],
+            "e": [-112, 2, 129],
+            "f": ["a", "b", "c"],
+            "g": [0.1, 1.32, 0.12],
+            "h": [True, None, False],
+            "i": pl.Series([None, None, None], dtype=pl.UInt64),
+            "j": pl.Series([None, None, None], dtype=pl.Int64),
+            "k": pl.Series([None, None, None], dtype=pl.Float64),
+        }
+    ).select(pl.all().shrink_dtype())
+    assert out.dtypes == [
+        pl.Int8,
+        pl.Int64,
+        pl.Int32,
+        pl.Int8,
+        pl.Int16,
+        pl.String,
+        pl.Float32,
+        pl.Boolean,
+        pl.UInt8,
+        pl.Int8,
+        pl.Float32,
+    ]
+
+    assert out.to_dict(as_series=False) == {
+        "a": [1, 2, 3],
+        "b": [1, 2, 8589934592],
+        "c": [-1, 2, 1073741824],
+        "d": [-112, 2, 112],
+        "e": [-112, 2, 129],
+        "f": ["a", "b", "c"],
+        "g": [0.10000000149011612, 1.3200000524520874, 0.11999999731779099],
+        "h": [True, None, False],
+        "i": [None, None, None],
+        "j": [None, None, None],
+        "k": [None, None, None],
+    }
diff --git a/py-polars/tests/unit/test_convert.py b/py-polars/tests/unit/test_convert.py
new file mode 100644
index 000000000000..e74bd6f13024
--- /dev/null
+++ b/py-polars/tests/unit/test_convert.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+
+def test_schema_inference_from_rows() -> None:
+    # these have to upcast to float
+    result = pl.from_records([[1, 2.1, 3], [4, 5, 6.4]])
+    assert result.to_dict(as_series=False) == {
+        "column_0": [1.0, 2.1, 3.0],
+        "column_1": [4.0, 5.0, 6.4],
+    }
+
+    result = pl.from_dicts([{"a": 1, "b": 2}, {"a": 3.1, "b": 4.5}])
+    assert result.to_dict(as_series=False) == {
+        "a": [1.0, 3.1],
+        "b": [2.0, 4.5],
+    }
+
+
+def test_from_dicts_nested_nulls() -> None:
+    result = pl.from_dicts([{"a": [None, None]}, {"a": [1, 2]}])
+    assert result.to_dict(as_series=False) == {"a": [[None, None], [1, 2]]}
+
+
+def test_from_dicts_empty() -> None:
+    with pytest.raises(pl.NoDataError, match="no data, cannot infer schema"):
+        pl.from_dicts([])
+
+
+def test_from_dicts_all_cols_6716() -> None:
+    dicts = [{"a": None} for _ in range(20)] + [{"a": "crash"}]
+
+    with pytest.raises(
+        pl.ComputeError, match="make sure that all rows have the same schema"
+    ):
+        pl.from_dicts(dicts, infer_schema_length=20)
+    assert pl.from_dicts(dicts, infer_schema_length=None).dtypes == [pl.String]

From 3532e92fcef32e68bd3a172ca9805aa1cd6ee979 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Tue, 11 Jun 2024 14:11:34 +0200
Subject: [PATCH 3/3] Move more tests

---
 py-polars/tests/unit/datatypes/test_schema.py | 229 ------------------
 py-polars/tests/unit/lazyframe/test_rename.py |   7 +
 .../{ => aggregation}/test_folds.py           |  19 ++
 .../operations/aggregation/test_horizontal.py |  15 ++
 .../operations/aggregation/test_vertical.py   |   8 +
 .../operations/arithmetic/test_arithmetic.py  |  22 ++
 .../unit/operations/map/test_map_batches.py   |  32 +++
 .../unit/operations/map/test_map_elements.py  |  20 ++
 .../operations/namespaces/list/test_list.py   |  12 +
 .../tests/unit/operations/test_bitwise.py     |  10 +
 .../tests/unit/operations/test_comparison.py  |   9 +
 .../tests/unit/operations/test_group_by.py    |  56 +++++
 .../tests/unit/operations/test_join_asof.py   |   7 +
 13 files changed, 217 insertions(+), 229 deletions(-)
 delete mode 100644 py-polars/tests/unit/datatypes/test_schema.py
 rename py-polars/tests/unit/operations/{ => aggregation}/test_folds.py (71%)
 create mode 100644 py-polars/tests/unit/operations/test_bitwise.py

diff --git a/py-polars/tests/unit/datatypes/test_schema.py b/py-polars/tests/unit/datatypes/test_schema.py
deleted file mode 100644
index ab2260d71dbe..000000000000
--- a/py-polars/tests/unit/datatypes/test_schema.py
+++ /dev/null
@@ -1,229 +0,0 @@
-from __future__ import annotations
-
-from datetime import date, timedelta
-
-import pytest
-
-import polars as pl
-from polars.testing import assert_frame_equal
-
-
-def test_lazy_map_schema() -> None:
-    df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
-
-    # identity
-    assert_frame_equal(df.lazy().map_batches(lambda x: x).collect(), df)
-
-    def custom(df: pl.DataFrame) -> pl.Series:
-        return df["a"]
-
-    with pytest.raises(
-        pl.ComputeError,
-        match="Expected 'LazyFrame.map' to return a 'DataFrame', got a",
-    ):
-        df.lazy().map_batches(custom).collect()  # type: ignore[arg-type]
-
-    def custom2(
-        df: pl.DataFrame,
-    ) -> pl.DataFrame:
-        # changes schema
-        return df.select(pl.all().cast(pl.String))
-
-    with pytest.raises(
-        pl.ComputeError,
-        match="The output schema of 'LazyFrame.map' is incorrect. Expected",
-    ):
-        df.lazy().map_batches(custom2).collect()
-
-    assert df.lazy().map_batches(
-        custom2, validate_output_schema=False
-    ).collect().to_dict(as_series=False) == {"a": ["1", "2", "3"], "b": ["a", "b", "c"]}
-
-
-def test_join_as_of_by_schema() -> None:
-    a = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).lazy()
-    b = pl.DataFrame({"a": [1], "b": [2], "d": [4]}).lazy()
-    q = a.join_asof(b, on=pl.col("a").set_sorted(), by="b")
-    assert q.collect().columns == q.columns
-
-
-def test_unknown_map_elements() -> None:
-    df = pl.DataFrame(
-        {
-            "Amount": [10, 1, 1, 5],
-            "Flour": ["1000g", "100g", "50g", "75g"],
-        }
-    )
-
-    q = df.lazy().select(
-        pl.col("Amount"),
-        pl.col("Flour").map_elements(lambda x: 100.0) / pl.col("Amount"),
-    )
-
-    assert q.collect().to_dict(as_series=False) == {
-        "Amount": [10, 1, 1, 5],
-        "Flour": [10.0, 100.0, 100.0, 20.0],
-    }
-    assert q.dtypes == [pl.Int64, pl.Unknown]
-
-
-def test_remove_redundant_mapping_4668() -> None:
-    df = pl.DataFrame([["a"]] * 2, ["A", "B "]).lazy()
-    clean_name_dict = {x: " ".join(x.split()) for x in df.columns}
-    df = df.rename(clean_name_dict)
-    assert df.columns == ["A", "B"]
-
-
-def test_fold_all_schema() -> None:
-    df = pl.DataFrame(
-        {
-            "A": [1, 2, 3, 4, 5],
-            "fruits": ["banana", "banana", "apple", "apple", "banana"],
-            "B": [5, 4, 3, 2, 1],
-            "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
-            "optional": [28, 300, None, 2, -30],
-        }
-    )
-    # divide because of overflow
-    result = df.select(pl.sum_horizontal(pl.all().hash(seed=1) // int(1e8)))
-    assert result.dtypes == [pl.UInt64]
-
-
-def test_list_eval_type_cast_11188() -> None:
-    df = pl.DataFrame(
-        [
-            {"a": None},
-        ],
-        schema={"a": pl.List(pl.Int64)},
-    )
-    assert df.select(
-        pl.col("a").list.eval(pl.element().cast(pl.String)).alias("a_str")
-    ).schema == {"a_str": pl.List(pl.String)}
-
-
-def test_duration_division_schema() -> None:
-    df = pl.DataFrame({"a": [1]})
-    q = (
-        df.lazy()
-        .with_columns(pl.col("a").cast(pl.Duration))
-        .select(pl.col("a") / pl.col("a"))
-    )
-
-    assert q.schema == {"a": pl.Float64}
-    assert q.collect().to_dict(as_series=False) == {"a": [1.0]}
-
-
-def test_int_operator_stability() -> None:
-    for dt in pl.datatypes.INTEGER_DTYPES:
-        s = pl.Series(values=[10], dtype=dt)
-        assert pl.select(pl.lit(s) // 2).dtypes == [dt]
-        assert pl.select(pl.lit(s) + 2).dtypes == [dt]
-        assert pl.select(pl.lit(s) - 2).dtypes == [dt]
-        assert pl.select(pl.lit(s) * 2).dtypes == [dt]
-        assert pl.select(pl.lit(s) / 2).dtypes == [pl.Float64]
-
-
-def test_deep_subexpression_f32_schema_7129() -> None:
-    df = pl.DataFrame({"a": [1.1, 2.3, 3.4, 4.5]}, schema={"a": pl.Float32()})
-    assert df.with_columns(pl.col("a") - pl.col("a").median()).dtypes == [pl.Float32]
-    assert df.with_columns(
-        (pl.col("a") - pl.col("a").mean()) / (pl.col("a").std() + 0.001)
-    ).dtypes == [pl.Float32]
-
-
-def test_absence_off_null_prop_8224() -> None:
-    # a reminder to self to not do null propagation
-    # it is inconsistent and makes output dtype
-    # dependent of the data, big no!
-
-    def sub_col_min(column: str, min_column: str) -> pl.Expr:
-        return pl.col(column).sub(pl.col(min_column).min())
-
-    df = pl.DataFrame(
-        {
-            "group": [1, 1, 2, 2],
-            "vals_num": [10.0, 11.0, 12.0, 13.0],
-            "vals_partial": [None, None, 12.0, 13.0],
-            "vals_null": [None, None, None, None],
-        }
-    )
-
-    q = (
-        df.lazy()
-        .group_by("group")
-        .agg(
-            [
-                sub_col_min("vals_num", "vals_num").alias("sub_num"),
-                sub_col_min("vals_num", "vals_partial").alias("sub_partial"),
-                sub_col_min("vals_num", "vals_null").alias("sub_null"),
-            ]
-        )
-    )
-
-    assert q.collect().dtypes == [
-        pl.Int64,
-        pl.List(pl.Float64),
-        pl.List(pl.Float64),
-        pl.List(pl.Float64),
-    ]
-
-
-def test_lit_iter_schema() -> None:
-    df = pl.DataFrame(
-        {
-            "key": ["A", "A", "A", "A"],
-            "dates": [
-                date(1970, 1, 1),
-                date(1970, 1, 1),
-                date(1970, 1, 2),
-                date(1970, 1, 3),
-            ],
-        }
-    )
-
-    result = df.group_by("key").agg(pl.col("dates").unique() + timedelta(days=1))
-    expected = {
-        "key": ["A"],
-        "dates": [[date(1970, 1, 2), date(1970, 1, 3), date(1970, 1, 4)]],
-    }
-    assert result.to_dict(as_series=False) == expected
-
-
-def test_nested_binary_literal_super_type_12227() -> None:
-    # The `.alias` is important here to trigger the bug.
-    assert (
-        pl.select(x=1).select((pl.lit(0) + ((pl.col("x") > 0) * 0.1)).alias("x")).item()
-        == 0.1
-    )
-    assert (
-        pl.select(
-            (pl.lit(0) + (pl.lit(0) == pl.lit(0)) * pl.lit(0.1)) + pl.lit(0)
-        ).item()
-        == 0.1
-    )
-
-
-def test_alias_prune_in_fold_15438() -> None:
-    df = pl.DataFrame({"x": [1, 2], "expected_result": ["first", "second"]}).select(
-        actual_result=pl.fold(
-            acc=pl.lit("other", dtype=pl.Utf8),
-            function=lambda acc, x: pl.when(x).then(pl.lit(x.name)).otherwise(acc),  # type: ignore[arg-type, return-value]
-            exprs=[
-                (pl.col("x") == 1).alias("first"),
-                (pl.col("x") == 2).alias("second"),
-            ],
-        )
-    )
-    expected = pl.DataFrame(
-        {
-            "actual_result": ["first", "second"],
-        }
-    )
-    assert_frame_equal(df, expected)
-
-
-@pytest.mark.parametrize("op", ["and_", "or_"])
-def test_bitwise_integral_schema(op: str) -> None:
-    df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
-    q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
-    assert q.schema["a"] == df.schema["a"]
diff --git a/py-polars/tests/unit/lazyframe/test_rename.py b/py-polars/tests/unit/lazyframe/test_rename.py
index 2a7462d1edb2..1638e32ce32c 100644
--- a/py-polars/tests/unit/lazyframe/test_rename.py
+++ b/py-polars/tests/unit/lazyframe/test_rename.py
@@ -6,3 +6,10 @@ def test_lazy_rename() -> None:
 
     result = df.lazy().rename({"y": "x", "x": "y"}).select(["x", "y"])
     assert result.collect().to_dict(as_series=False) == {"x": [2], "y": [1]}
+
+
+def test_remove_redundant_mapping_4668() -> None:
+    lf = pl.LazyFrame([["a"]] * 2, ["A", "B "]).lazy()
+    clean_name_dict = {x: " ".join(x.split()) for x in lf.columns}
+    lf = lf.rename(clean_name_dict)
+    assert lf.columns == ["A", "B"]
diff --git a/py-polars/tests/unit/operations/test_folds.py b/py-polars/tests/unit/operations/aggregation/test_folds.py
similarity index 71%
rename from py-polars/tests/unit/operations/test_folds.py
rename to py-polars/tests/unit/operations/aggregation/test_folds.py
index f1204cf92e15..f14bd7852347 100644
--- a/py-polars/tests/unit/operations/test_folds.py
+++ b/py-polars/tests/unit/operations/aggregation/test_folds.py
@@ -59,3 +59,22 @@ def test_cum_reduce() -> None:
         }
     )
     assert_frame_equal(result, expected)
+
+
+def test_alias_prune_in_fold_15438() -> None:
+    df = pl.DataFrame({"x": [1, 2], "expected_result": ["first", "second"]}).select(
+        actual_result=pl.fold(
+            acc=pl.lit("other", dtype=pl.Utf8),
+            function=lambda acc, x: pl.when(x).then(pl.lit(x.name)).otherwise(acc),  # type: ignore[arg-type, return-value]
+            exprs=[
+                (pl.col("x") == 1).alias("first"),
+                (pl.col("x") == 2).alias("second"),
+            ],
+        )
+    )
+    expected = pl.DataFrame(
+        {
+            "actual_result": ["first", "second"],
+        }
+    )
+    assert_frame_equal(df, expected)
diff --git a/py-polars/tests/unit/operations/aggregation/test_horizontal.py b/py-polars/tests/unit/operations/aggregation/test_horizontal.py
index 4d474367af02..5211ff2969c9 100644
--- a/py-polars/tests/unit/operations/aggregation/test_horizontal.py
+++ b/py-polars/tests/unit/operations/aggregation/test_horizontal.py
@@ -440,3 +440,18 @@ def test_schema_mean_horizontal_single_column(
 def test_schema_boolean_sum_horizontal() -> None:
     lf = pl.LazyFrame({"a": [True, False]}).select(pl.sum_horizontal("a"))
     assert lf.schema == OrderedDict([("a", pl.UInt32)])
+
+
+def test_fold_all_schema() -> None:
+    df = pl.DataFrame(
+        {
+            "A": [1, 2, 3, 4, 5],
+            "fruits": ["banana", "banana", "apple", "apple", "banana"],
+            "B": [5, 4, 3, 2, 1],
+            "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
+            "optional": [28, 300, None, 2, -30],
+        }
+    )
+    # divide because of overflow
+    result = df.select(pl.sum_horizontal(pl.all().hash(seed=1) // int(1e8)))
+    assert result.dtypes == [pl.UInt64]
diff --git a/py-polars/tests/unit/operations/aggregation/test_vertical.py b/py-polars/tests/unit/operations/aggregation/test_vertical.py
index 26f01dacc3d2..3f2dbe080c07 100644
--- a/py-polars/tests/unit/operations/aggregation/test_vertical.py
+++ b/py-polars/tests/unit/operations/aggregation/test_vertical.py
@@ -74,3 +74,11 @@ def test_mean_overflow() -> None:
 
     result = df.with_columns(pl.col("value").cast(pl.Int32)).get_column("value").mean()
     assert np.isclose(result, expected)
+
+
+def test_deep_subexpression_f32_schema_7129() -> None:
+    df = pl.DataFrame({"a": [1.1, 2.3, 3.4, 4.5]}, schema={"a": pl.Float32()})
+    assert df.with_columns(pl.col("a") - pl.col("a").median()).dtypes == [pl.Float32]
+    assert df.with_columns(
+        (pl.col("a") - pl.col("a").mean()) / (pl.col("a").std() + 0.001)
+    ).dtypes == [pl.Float32]
diff --git a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
index 1973f0483eae..e505881c6542 100644
--- a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
+++ b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py
@@ -610,3 +610,25 @@ def test_literal_subtract_schema_13284() -> None:
         .group_by("a")
         .len()
     ).schema == OrderedDict([("a", pl.UInt8), ("len", pl.UInt32)])
+
+
+def test_int_operator_stability() -> None:
+    for dt in pl.datatypes.INTEGER_DTYPES:
+        s = pl.Series(values=[10], dtype=dt)
+        assert pl.select(pl.lit(s) // 2).dtypes == [dt]
+        assert pl.select(pl.lit(s) + 2).dtypes == [dt]
+        assert pl.select(pl.lit(s) - 2).dtypes == [dt]
+        assert pl.select(pl.lit(s) * 2).dtypes == [dt]
+        assert pl.select(pl.lit(s) / 2).dtypes == [pl.Float64]
+
+
+def test_duration_division_schema() -> None:
+    df = pl.DataFrame({"a": [1]})
+    q = (
+        df.lazy()
+        .with_columns(pl.col("a").cast(pl.Duration))
+        .select(pl.col("a") / pl.col("a"))
+    )
+
+    assert q.schema == {"a": pl.Float64}
+    assert q.collect().to_dict(as_series=False) == {"a": [1.0]}
diff --git a/py-polars/tests/unit/operations/map/test_map_batches.py b/py-polars/tests/unit/operations/map/test_map_batches.py
index 127f1079b060..c3240ba4fb7b 100644
--- a/py-polars/tests/unit/operations/map/test_map_batches.py
+++ b/py-polars/tests/unit/operations/map/test_map_batches.py
@@ -84,3 +84,35 @@ def test_ufunc_args() -> None:
     result = df.select(z=np.add(2, pl.col("a")))  # type: ignore[call-overload]
     expected = pl.DataFrame({"z": [3, 4, 5]})
     assert_frame_equal(result, expected)
+
+
+def test_lazy_map_schema() -> None:
+    df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+
+    # identity
+    assert_frame_equal(df.lazy().map_batches(lambda x: x).collect(), df)
+
+    def custom(df: pl.DataFrame) -> pl.Series:
+        return df["a"]
+
+    with pytest.raises(
+        pl.ComputeError,
+        match="Expected 'LazyFrame.map' to return a 'DataFrame', got a",
+    ):
+        df.lazy().map_batches(custom).collect()  # type: ignore[arg-type]
+
+    def custom2(
+        df: pl.DataFrame,
+    ) -> pl.DataFrame:
+        # changes schema
+        return df.select(pl.all().cast(pl.String))
+
+    with pytest.raises(
+        pl.ComputeError,
+        match="The output schema of 'LazyFrame.map' is incorrect. Expected",
+    ):
+        df.lazy().map_batches(custom2).collect()
+
+    assert df.lazy().map_batches(
+        custom2, validate_output_schema=False
+    ).collect().to_dict(as_series=False) == {"a": ["1", "2", "3"], "b": ["a", "b", "c"]}
diff --git a/py-polars/tests/unit/operations/map/test_map_elements.py b/py-polars/tests/unit/operations/map/test_map_elements.py
index 98bf9cf996e1..59bdfa39ea1f 100644
--- a/py-polars/tests/unit/operations/map/test_map_elements.py
+++ b/py-polars/tests/unit/operations/map/test_map_elements.py
@@ -344,3 +344,23 @@ def test_cabbage_strategy_14396() -> None:
         ValueError, match="strategy 'cabbage' is not supported"
     ), pytest.warns(PolarsInefficientMapWarning):
         df.select(pl.col("x").map_elements(lambda x: 2 * x, strategy="cabbage"))  # type: ignore[arg-type]
+
+
+def test_unknown_map_elements() -> None:
+    df = pl.DataFrame(
+        {
+            "Amount": [10, 1, 1, 5],
+            "Flour": ["1000g", "100g", "50g", "75g"],
+        }
+    )
+
+    q = df.lazy().select(
+        pl.col("Amount"),
+        pl.col("Flour").map_elements(lambda x: 100.0) / pl.col("Amount"),
+    )
+
+    assert q.collect().to_dict(as_series=False) == {
+        "Amount": [10, 1, 1, 5],
+        "Flour": [10.0, 100.0, 100.0, 20.0],
+    }
+    assert q.dtypes == [pl.Int64, pl.Unknown]
diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py
index 86ed6b719f13..e3d238259f89 100644
--- a/py-polars/tests/unit/operations/namespaces/list/test_list.py
+++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py
@@ -901,3 +901,15 @@ def test_list_eval_err_raise_15653() -> None:
 def test_list_sum_bool_schema() -> None:
     q = pl.LazyFrame({"x": [[True, True, False]]})
     assert q.select(pl.col("x").list.sum()).schema["x"] == pl.UInt32
+
+
+def test_list_eval_type_cast_11188() -> None:
+    df = pl.DataFrame(
+        [
+            {"a": None},
+        ],
+        schema={"a": pl.List(pl.Int64)},
+    )
+    assert df.select(
+        pl.col("a").list.eval(pl.element().cast(pl.String)).alias("a_str")
+    ).schema == {"a_str": pl.List(pl.String)}
diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py
new file mode 100644
index 000000000000..674de1aef418
--- /dev/null
+++ b/py-polars/tests/unit/operations/test_bitwise.py
@@ -0,0 +1,10 @@
+import pytest
+
+import polars as pl
+
+
+@pytest.mark.parametrize("op", ["and_", "or_"])
+def test_bitwise_integral_schema(op: str) -> None:
+    df = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
+    q = df.select(getattr(pl.col("a"), op)(pl.col("b")))
+    assert q.schema["a"] == df.schema["a"]
diff --git a/py-polars/tests/unit/operations/test_comparison.py b/py-polars/tests/unit/operations/test_comparison.py
index ebbb4b4c46d5..ef1aed2ffcb9 100644
--- a/py-polars/tests/unit/operations/test_comparison.py
+++ b/py-polars/tests/unit/operations/test_comparison.py
@@ -379,3 +379,12 @@ def test_schema_ne_missing_9256() -> None:
     df = pl.DataFrame({"a": [0, 1, None], "b": [True, False, True]})
 
     assert df.select(pl.col("a").ne_missing(0).or_(pl.col("b")))["a"].all()
+
+
+def test_nested_binary_literal_super_type_12227() -> None:
+    # The `.alias` is important here to trigger the bug.
+    result = pl.select(x=1).select((pl.lit(0) + ((pl.col("x") > 0) * 0.1)).alias("x"))
+    assert result.item() == 0.1
+
+    result = pl.select((pl.lit(0) + (pl.lit(0) == pl.lit(0)) * pl.lit(0.1)) + pl.lit(0))
+    assert result.item() == 0.1
diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py
index 8f6cce5a4b95..973e023fabb7 100644
--- a/py-polars/tests/unit/operations/test_group_by.py
+++ b/py-polars/tests/unit/operations/test_group_by.py
@@ -1071,3 +1071,59 @@ def test_schemas(
     schema = df.group_by(pl.lit(1)).agg(expr).schema
     for key, dtype in expected_gb.items():
         assert schema[key] == dtype
+
+
+def test_lit_iter_schema() -> None:
+    df = pl.DataFrame(
+        {
+            "key": ["A", "A", "A", "A"],
+            "dates": [
+                date(1970, 1, 1),
+                date(1970, 1, 1),
+                date(1970, 1, 2),
+                date(1970, 1, 3),
+            ],
+        }
+    )
+
+    result = df.group_by("key").agg(pl.col("dates").unique() + timedelta(days=1))
+    expected = {
+        "key": ["A"],
+        "dates": [[date(1970, 1, 2), date(1970, 1, 3), date(1970, 1, 4)]],
+    }
+    assert result.to_dict(as_series=False) == expected
+
+
+def test_absence_off_null_prop_8224() -> None:
+    # a reminder to self to not do null propagation
+    # it is inconsistent and makes output dtype
+    # dependent of the data, big no!
+
+    def sub_col_min(column: str, min_column: str) -> pl.Expr:
+        return pl.col(column).sub(pl.col(min_column).min())
+
+    df = pl.DataFrame(
+        {
+            "group": [1, 1, 2, 2],
+            "vals_num": [10.0, 11.0, 12.0, 13.0],
+            "vals_partial": [None, None, 12.0, 13.0],
+            "vals_null": [None, None, None, None],
+        }
+    )
+
+    q = (
+        df.lazy()
+        .group_by("group")
+        .agg(
+            sub_col_min("vals_num", "vals_num").alias("sub_num"),
+            sub_col_min("vals_num", "vals_partial").alias("sub_partial"),
+            sub_col_min("vals_num", "vals_null").alias("sub_null"),
+        )
+    )
+
+    assert q.collect().dtypes == [
+        pl.Int64,
+        pl.List(pl.Float64),
+        pl.List(pl.Float64),
+        pl.List(pl.Float64),
+    ]
diff --git a/py-polars/tests/unit/operations/test_join_asof.py b/py-polars/tests/unit/operations/test_join_asof.py
index 3f3f9ad7745c..b10d857c3041 100644
--- a/py-polars/tests/unit/operations/test_join_asof.py
+++ b/py-polars/tests/unit/operations/test_join_asof.py
@@ -1165,3 +1165,10 @@ def test_join_asof_invalid_args() -> None:
         TypeError, match="expected `right_on` to be str or Expr, got 'list'"
     ):
         df1.join_asof(df2, left_on="a", right_on=["a"])  # type: ignore[arg-type]
+
+
+def test_join_as_of_by_schema() -> None:
+    a = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).lazy()
+    b = pl.DataFrame({"a": [1], "b": [2], "d": [4]}).lazy()
+    q = a.join_asof(b, on=pl.col("a").set_sorted(), by="b")
+    assert q.collect().columns == q.columns