Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EXPRESSIONS] Implement Expression.float.is_inf #2371

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,7 @@ class PyExpr:
def __hash__(self) -> int: ...
def __reduce__(self) -> tuple: ...
def is_nan(self) -> PyExpr: ...
def is_inf(self) -> PyExpr: ...
def dt_date(self) -> PyExpr: ...
def dt_day(self) -> PyExpr: ...
def dt_hour(self) -> PyExpr: ...
Expand Down Expand Up @@ -1207,6 +1208,7 @@ class PySeries:
def utf8_ilike(self, pattern: PySeries) -> PySeries: ...
def utf8_substr(self, start: PySeries, length: PySeries | None = None) -> PySeries: ...
def is_nan(self) -> PySeries: ...
def is_inf(self) -> PySeries: ...
def dt_date(self) -> PySeries: ...
def dt_day(self) -> PySeries: ...
def dt_hour(self) -> PySeries: ...
Expand Down
15 changes: 15 additions & 0 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,21 @@ def is_nan(self) -> Expression:
"""
return Expression._from_pyexpr(self._expr.is_nan())

def is_inf(self) -> Expression:
"""Checks if values in the Expression are Infinity.

.. NOTE::
Nulls will be propagated! I.e. this operation will return a null for null values.

Example:
>>> # [-float("inf"), 0., float("inf"), None] -> [True, False, True, None]
>>> col("x").float.is_inf()

Returns:
Expression: Boolean Expression indicating whether values are Infinity.
"""
return Expression._from_pyexpr(self._expr.is_inf())


class ExpressionDatetimeNamespace(ExpressionNamespace):
def date(self) -> Expression:
Expand Down
3 changes: 3 additions & 0 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,9 @@ class SeriesFloatNamespace(SeriesNamespace):
def is_nan(self) -> Series:
return Series._from_pyseries(self._series.is_nan())

def is_inf(self) -> Series:
return Series._from_pyseries(self._series.is_inf())


class SeriesStringNamespace(SeriesNamespace):
def endswith(self, suffix: Series) -> Series:
Expand Down
14 changes: 14 additions & 0 deletions docs/source/api_docs/expressions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ The following methods are available under the ``expr.str`` attribute.
Expression.str.ilike
Expression.str.substr

.. _api-float-expression-operations:

Floats
#######

The following methods are available under the ``expr.float`` attribute.

.. autosummary::
:nosignatures:
:toctree: doc_gen/expression_methods
:template: autosummary/accessor_method.rst

Expression.float.is_inf

.. _api-expressions-temporal:

Temporal
Expand Down
30 changes: 30 additions & 0 deletions src/daft-core/src/array/ops/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::{
use common_error::DaftResult;
use num_traits::Float;

use super::DaftIsInf;
use super::DaftIsNan;

use super::as_arrow::AsArrow;
Expand Down Expand Up @@ -38,3 +39,32 @@ impl DaftIsNan for DataArray<NullType> {
)))
}
}

impl<T> DaftIsInf for DataArray<T>
where
T: DaftFloatType,
<T as DaftNumericType>::Native: Float,
{
type Output = DaftResult<DataArray<BooleanType>>;

fn is_inf(&self) -> Self::Output {
let arrow_array = self.as_arrow();
let result_arrow_array = arrow2::array::BooleanArray::from_trusted_len_values_iter(
arrow_array.values_iter().map(|v| v.is_infinite()),
)
.with_validity(arrow_array.validity().cloned());
Ok(BooleanArray::from((self.name(), result_arrow_array)))
}
}

impl DaftIsInf for DataArray<NullType> {
type Output = DaftResult<DataArray<BooleanType>>;

fn is_inf(&self) -> Self::Output {
Ok(BooleanArray::from((
self.name(),
arrow2::array::BooleanArray::from_slice(vec![false; self.len()])
.with_validity(Some(arrow2::bitmap::Bitmap::from(vec![false; self.len()]))),
)))
}
}
5 changes: 5 additions & 0 deletions src/daft-core/src/array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ pub trait DaftIsNan {
fn is_nan(&self) -> Self::Output;
}

pub trait DaftIsInf {
type Output;
fn is_inf(&self) -> Self::Output;
}

pub type VecIndices = Vec<u64>;
pub type GroupIndices = Vec<VecIndices>;
pub type GroupIndicesPair = (VecIndices, GroupIndices);
Expand Down
4 changes: 4 additions & 0 deletions src/daft-core/src/python/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,10 @@ impl PySeries {
Ok(self.series.is_nan()?.into())
}

pub fn is_inf(&self) -> PyResult<Self> {
Ok(self.series.is_inf()?.into())
}

pub fn dt_date(&self) -> PyResult<Self> {
Ok(self.series.dt_date()?.into())
}
Expand Down
7 changes: 7 additions & 0 deletions src/daft-core/src/series/ops/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,11 @@ impl Series {
Ok(DaftIsNan::is_nan(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series())
})
}

pub fn is_inf(&self) -> DaftResult<Series> {
use crate::array::ops::DaftIsInf;
with_match_float_and_null_daft_types!(self.data_type(), |$T| {
Ok(DaftIsInf::is_inf(self.downcast::<<$T as DaftDataType>::ArrayType>()?)?.into_series())
})
}
}
51 changes: 51 additions & 0 deletions src/daft-dsl/src/functions/float/is_inf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use daft_core::{
datatypes::{DataType, Field},
schema::Schema,
series::Series,
};

use crate::ExprRef;

use crate::functions::FunctionExpr;
use common_error::{DaftError, DaftResult};

use super::super::FunctionEvaluator;

pub(super) struct IsInfEvaluator {}

impl FunctionEvaluator for IsInfEvaluator {
fn fn_name(&self) -> &'static str {
"is_inf"
}

fn to_field(&self, inputs: &[ExprRef], schema: &Schema, _: &FunctionExpr) -> DaftResult<Field> {
match inputs {
[data] => match data.to_field(schema) {
Ok(data_field) => match &data_field.dtype {
// DataType::Float16 |
DataType::Float32 | DataType::Float64 => {
Ok(Field::new(data_field.name, DataType::Boolean))
}
_ => Err(DaftError::TypeError(format!(
"Expects input to is_inf to be float, but received {data_field}",
))),
},
Err(e) => Err(e),
},
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input args, got {}",
inputs.len()
))),
}
}

fn evaluate(&self, inputs: &[Series], _: &FunctionExpr) -> DaftResult<Series> {
match inputs {
[data] => data.is_inf(),
_ => Err(DaftError::ValueError(format!(
"Expected 1 input args, got {}",
inputs.len()
))),
}
}
}
12 changes: 12 additions & 0 deletions src/daft-dsl/src/functions/float/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod is_inf;
mod is_nan;

use is_inf::IsInfEvaluator;
use is_nan::IsNanEvaluator;
use serde::{Deserialize, Serialize};

Expand All @@ -10,6 +12,7 @@ use super::FunctionEvaluator;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum FloatExpr {
IsNan,
IsInf,
}

impl FloatExpr {
Expand All @@ -18,6 +21,7 @@ impl FloatExpr {
use FloatExpr::*;
match self {
IsNan => &IsNanEvaluator {},
IsInf => &IsInfEvaluator {},
}
}
}
Expand All @@ -29,3 +33,11 @@ pub fn is_nan(data: ExprRef) -> ExprRef {
}
.into()
}

pub fn is_inf(data: ExprRef) -> ExprRef {
Expr::Function {
func: super::FunctionExpr::Float(FloatExpr::IsInf),
inputs: vec![data],
}
.into()
}
5 changes: 5 additions & 0 deletions src/daft-dsl/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ impl PyExpr {
Ok(is_nan(self.into()).into())
}

pub fn is_inf(&self) -> PyResult<Self> {
use functions::float::is_inf;
Ok(is_inf(self.into()).into())
}

pub fn dt_date(&self) -> PyResult<Self> {
use functions::temporal::date;
Ok(date(self.into()).into())
Expand Down
7 changes: 7 additions & 0 deletions tests/expressions/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,13 @@ def test_float_is_nan() -> None:
assert output == "is_nan(col(a))"


def test_float_is_inf() -> None:
a = col("a")
c = a.float.is_inf()
output = repr(c)
assert output == "is_inf(col(a))"


def test_date_lit_post_epoch() -> None:
d = lit(date(2022, 1, 1))
output = repr(d)
Expand Down
9 changes: 9 additions & 0 deletions tests/expressions/typing/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,12 @@ def test_float_is_nan(unary_data_fixture):
run_kernel=unary_data_fixture.float.is_nan,
resolvable=unary_data_fixture.datatype() in (DataType.float32(), DataType.float64()),
)


def test_float_is_inf(unary_data_fixture):
assert_typing_resolve_vs_runtime_behavior(
data=[unary_data_fixture],
expr=col(unary_data_fixture.name()).float.is_inf(),
run_kernel=unary_data_fixture.float.is_inf,
resolvable=unary_data_fixture.datatype() in (DataType.float32(), DataType.float64()),
)
24 changes: 24 additions & 0 deletions tests/series/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,27 @@ def test_float_is_nan_all_null() -> None:
s = Series.from_arrow(pa.array([None, None, None]))
result = s.float.is_nan()
assert result.to_pylist() == [None, None, None]


def test_float_is_inf() -> None:
s = Series.from_arrow(pa.array([-float("inf"), 0.0, np.inf]))
result = s.float.is_inf()
assert result.to_pylist() == [True, False, True]


def test_float_is_inf_with_nulls() -> None:
s = Series.from_arrow(pa.array([-np.inf, None, 1.0, None, float("inf")]))
result = s.float.is_inf()
assert result.to_pylist() == [True, None, False, None, True]


def test_float_is_inf_empty() -> None:
s = Series.from_arrow(pa.array([], type=pa.float64()))
result = s.float.is_inf()
assert result.to_pylist() == []


def test_float_is_inf_all_null() -> None:
s = Series.from_arrow(pa.array([None, None, None]))
result = s.float.is_inf()
assert result.to_pylist() == [None, None, None]
7 changes: 7 additions & 0 deletions tests/table/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,13 @@ def test_table_float_is_nan() -> None:
assert result_table.to_pydict() == {"a": [False, True, False, None, True]}


def test_table_float_is_inf() -> None:
table = MicroPartition.from_pydict({"a": [-np.inf, 0.0, None, float("inf")]})
result_table = table.eval_expression_list([col("a").float.is_inf()])
# Note that null entries are _not_ treated as float NaNs.
assert result_table.to_pydict() == {"a": [True, False, None, True]}


def test_table_if_else() -> None:
table = MicroPartition.from_arrow(
pa.Table.from_pydict({"ones": [1, 1, 1], "zeros": [0, 0, 0], "pred": [True, False, None]})
Expand Down
Loading