pola-rs · ritchie46 · Sep 23, 2024 · Jul 19, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/crates/polars-core/src/series/arithmetic/borrowed.rs b/crates/polars-core/src/series/arithmetic/borrowed.rs
@@ -171,6 +171,50 @@ impl NumOpsDispatchInner for FixedSizeListType {
     }
 }
 
+impl ListChunked {
+    fn arithm_helper(
+        &self,
+        rhs: &Series,
+        op: &dyn Fn(&Series, &Series) -> PolarsResult<Series>,
+    ) -> PolarsResult<Series> {
+        polars_ensure!(self.len() == rhs.len(), InvalidOperation: "can only do arithmetic operations on Series of the same size; got {} and {}", self.len(), rhs.len());
+
+        let mut result = self.clear();
+        let combined = self.amortized_iter().zip(rhs.list()?.amortized_iter()).map(|(a, b)| {
+            // We ensured the original Series are the same length, so we can
+            // assume no None:
+            let a_owner = a.unwrap();
+            let b_owner = b.unwrap();
+            let a = a_owner.as_ref();
+            let b = b_owner.as_ref();
+            polars_ensure!(a.len() == b.len(), InvalidOperation: "can only do arithmetic operations on lists of the same size; got {} and {}", a.len(), b.len());
+            op(a, b).and_then(|s| s.implode()).map(Series::from)
+        });
+        for c in combined.into_iter() {
+            result.append(c?.list()?)?;
+        }
+        Ok(result.into())
+    }
+}
+
+impl NumOpsDispatchInner for ListType {
+    fn add_to(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
+        lhs.arithm_helper(rhs, &|l, r| l.add_to(r))
+    }
+    fn subtract(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
+        lhs.arithm_helper(rhs, &|l, r| l.subtract(r))
+    }
+    fn multiply(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
+        lhs.arithm_helper(rhs, &|l, r| l.multiply(r))
+    }
+    fn divide(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
+        lhs.arithm_helper(rhs, &|l, r| l.divide(r))
+    }
+    fn remainder(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
+        lhs.arithm_helper(rhs, &|l, r| l.remainder(r))
+    }
+}
+
 #[cfg(feature = "checked_arithmetic")]
 pub mod checked {
     use num_traits::{CheckedDiv, One, ToPrimitive, Zero};

@@ -47,6 +47,24 @@ impl private::PrivateSeries for SeriesWrap<ListChunked> {
     fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
         (&self.0).into_total_eq_inner()
     }
+
+    fn add_to(&self, rhs: &Series) -> PolarsResult<Series> {
+        self.0.add_to(rhs)
+    }
+
+    fn subtract(&self, rhs: &Series) -> PolarsResult<Series> {
+        self.0.subtract(rhs)
+    }
+
+    fn multiply(&self, rhs: &Series) -> PolarsResult<Series> {
+        self.0.multiply(rhs)
+    }
+    fn divide(&self, rhs: &Series) -> PolarsResult<Series> {
+        self.0.divide(rhs)
+    }
+    fn remainder(&self, rhs: &Series) -> PolarsResult<Series> {
+        self.0.remainder(rhs)
+    }
 }
 
 impl SeriesTrait for SeriesWrap<ListChunked> {

@@ -72,6 +72,11 @@ pub fn apply_operator(left: &Series, right: &Series, op: Operator) -> PolarsResu
                 let right_dt = right.dtype().cast_leaf(Float64);
                 left.cast(&left_dt)? / right.cast(&right_dt)?
             },
+            dt @ List(_) => {
+                let left_dt = dt.cast_leaf(Float64);
+                let right_dt = right.dtype().cast_leaf(Float64);
+                left.cast(&left_dt)? / right.cast(&right_dt)?
+            },
             _ => {
                 if right.dtype().is_temporal() {
                     return left / right;

@@ -1061,6 +1061,22 @@ def __sub__(self, other: Any) -> Self | Expr:
             return F.lit(self) - other
         return self._arithmetic(other, "sub", "sub_<>")
 
+    def _recursive_cast_to_float64(self) -> Series:
+        """
+        Convert leaf dtypes to Float64 dtypes.
+
+        This is equivalent to logic in DataType::cast_leaf() in Rust.
+        """
+
+        def convert_to_float64(dtype: PolarsDataType) -> PolarsDataType:
+            if isinstance(dtype, Array):
+                return Array(convert_to_float64(dtype.inner), shape=dtype.shape)
+            if isinstance(dtype, List):
+                return List(convert_to_float64(dtype.inner))
+            return Float64()
+
+        return self.cast(convert_to_float64(self.dtype))
+
     @overload
     def __truediv__(self, other: Expr) -> Expr:  # type: ignore[overload-overlap]
         ...
@@ -1077,9 +1093,11 @@ def __truediv__(self, other: Any) -> Series | Expr:
 
         # this branch is exactly the floordiv function without rounding the floats
         if self.dtype.is_float() or self.dtype == Decimal:
-            return self._arithmetic(other, "div", "div_<>")
+            as_float = self
+        else:
+            as_float = self._recursive_cast_to_float64()
 
-        return self.cast(Float64) / other
+        return as_float._arithmetic(other, "div", "div_<>")
 
     @overload
     def __floordiv__(self, other: Expr) -> Expr:  # type: ignore[overload-overlap]

@@ -1,7 +1,9 @@
+from __future__ import annotations
+
 import operator
 from collections import OrderedDict
 from datetime import date, datetime, timedelta
-from typing import Any
+from typing import Any, Callable
 
 import numpy as np
 import pytest
@@ -558,33 +560,120 @@ def test_power_series() -> None:
 
 
 @pytest.mark.parametrize(
-    ("expected", "expr"),
+    ("expected", "expr", "column_names"),
     [
+        (np.array([[2, 4], [6, 8]], dtype=np.int64), lambda a, b: a + b, ("a", "a")),
+        (np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a - b, ("a", "a")),
+        (np.array([[1, 4], [9, 16]], dtype=np.int64), lambda a, b: a * b, ("a", "a")),
         (
-            np.array([[2, 4], [6, 8]]),
-            pl.col("a") + pl.col("a"),
+            np.array([[1.0, 1.0], [1.0, 1.0]], dtype=np.float64),
+            lambda a, b: a / b,
+            ("a", "a"),
         ),
+        (np.array([[0, 0], [0, 0]], dtype=np.int64), lambda a, b: a % b, ("a", "a")),
         (
-            np.array([[0, 0], [0, 0]]),
-            pl.col("a") - pl.col("a"),
+            np.array([[3, 4], [7, 8]], dtype=np.int64),
+            lambda a, b: a + b,
+            ("a", "uint8"),
         ),
+        # This fails because the code is buggy, see
+        # https://github.com/pola-rs/polars/issues/17820
+        #
+        # (
+        #     np.array([[[2, 4]], [[6, 8]]], dtype=np.int64),
+        #     lambda a, b: a + b,
+        #     ("nested", "nested"),
+        # ),
+    ],
+)
+def test_array_arithmetic_same_size(
+    expected: Any,
+    expr: Callable[[pl.Series | pl.Expr, pl.Series | pl.Expr], pl.Series],
+    column_names: tuple[str, str],
+) -> None:
+    df = pl.DataFrame(
+        [
+            pl.Series("a", np.array([[1, 2], [3, 4]], dtype=np.int64)),
+            pl.Series("uint8", np.array([[2, 2], [4, 4]], dtype=np.uint8)),
+            pl.Series("nested", np.array([[[1, 2]], [[3, 4]]], dtype=np.int64)),
+        ]
+    )
+    print(df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))))
+    # Expr-based arithmetic:
+    assert_frame_equal(
+        df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))),
+        pl.Series(column_names[0], expected).to_frame(),
+    )
+    # Direct arithmetic on the Series:
+    assert_series_equal(
+        expr(df[column_names[0]], df[column_names[1]]),
+        pl.Series(column_names[0], expected),
+    )
+
+
+@pytest.mark.parametrize(
+    ("expected", "expr", "column_names"),
+    [
+        ([[2, 4], [6]], lambda a, b: a + b, ("a", "a")),
+        ([[0, 0], [0]], lambda a, b: a - b, ("a", "a")),
+        ([[1, 4], [9]], lambda a, b: a * b, ("a", "a")),
+        ([[1.0, 1.0], [1.0]], lambda a, b: a / b, ("a", "a")),
+        ([[0, 0], [0]], lambda a, b: a % b, ("a", "a")),
         (
-            np.array([[1, 4], [9, 16]]),
-            pl.col("a") * pl.col("a"),
+            [[3, 4], [7]],
+            lambda a, b: a + b,
+            ("a", "uint8"),
         ),
         (
-            np.array([[1.0, 1.0], [1.0, 1.0]]),
-            pl.col("a") / pl.col("a"),
+            [[[2, 4]], [[6]]],
+            lambda a, b: a + b,
+            ("nested", "nested"),
         ),
     ],
 )
-def test_array_arithmetic_same_size(expected: Any, expr: pl.Expr) -> None:
-    df = pl.Series("a", np.array([[1, 2], [3, 4]])).to_frame()
-
+def test_list_arithmetic_same_size(
+    expected: Any,
+    expr: Callable[[pl.Series | pl.Expr, pl.Series | pl.Expr], pl.Series],
+    column_names: tuple[str, str],
+) -> None:
+    print(expected)
+    df = pl.DataFrame(
+        [
+            pl.Series("a", [[1, 2], [3]]),
+            pl.Series("uint8", [[2, 2], [4]]),
+            pl.Series("nested", [[[1, 2]], [[3]]]),
+        ]
+    )
+    # Expr-based arithmetic:
     assert_frame_equal(
-        df.select(expr),
-        pl.Series("a", expected).to_frame(),
+        df.select(expr(pl.col(column_names[0]), pl.col(column_names[1]))),
+        pl.Series(column_names[0], expected).to_frame(),
     )
+    # Direct arithmetic on the Series:
+    assert_series_equal(
+        expr(df[column_names[0]], df[column_names[1]]),
+        pl.Series(column_names[0], expected),
+    )
+
+
+def test_list_arithmetic_error_cases() -> None:
+    # Different series length:
+    with pytest.raises(
+        InvalidOperationError, match="Series of the same size; got 1 and 2"
+    ):
+        _ = pl.Series("a", [[1, 2]]) / pl.Series("b", [[1, 2], [3, 4]])
+
+    # Different list length:
+    # Different series length:
+    with pytest.raises(
+        InvalidOperationError, match="lists of the same size; got 2 and 1"
+    ):
+        _ = pl.Series("a", [[1, 2]]) / pl.Series("b", [[1]])
+
+    # Wrong types:
+    # Different series length:
+    with pytest.raises(InvalidOperationError, match="cannot cast List type"):
+        _ = pl.Series("a", [[1, 2]]) + pl.Series("b", ["hello"])
 
 
 def test_schema_owned_arithmetic_5669() -> None: