Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use resource binaries on Series.to_list for strings #346

Merged
merged 3 commits into from
Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions native/explorer/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ anyhow = "1"
chrono = "0.4"
rand = { version = "0.8.4", features = ["alloc"] }
rand_pcg = "0.3.1"
rustler = "0.26.0"
rustler = { git = "https://github.com/rusterlium/rustler" }
thiserror = "1"

# MiMalloc won´t compile on Windows with the GCC compiler.
Expand Down
3 changes: 1 addition & 2 deletions native/explorer/src/datatypes.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::atoms;
use chrono::prelude::*;
use polars::prelude::*;
use rustler::resource::ResourceArc;
use rustler::{Atom, NifStruct};
use rustler::{Atom, NifStruct, ResourceArc};
use std::convert::TryInto;

pub struct ExDataFrameRef(pub DataFrame);
Expand Down
114 changes: 59 additions & 55 deletions native/explorer/src/encoding.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,17 @@
use chrono::prelude::*;
use polars::prelude::*;
use rustler::{Binary, Encoder, Env, NewBinary, Term};
use rustler::{Encoder, Env, ResourceArc, Term};

use crate::atoms::{
self, calendar, day, hour, infinity, microsecond, minute, month, nan, neg_infinity, second,
year,
};
use crate::datatypes::{days_to_date, timestamp_to_datetime, ExSeriesRef};
use crate::datatypes::{days_to_date, timestamp_to_datetime, ExSeries, ExSeriesRef};

use rustler::types::atom;
use rustler::wrapper::list;
use rustler::wrapper::{map, NIF_TERM};
use rustler::wrapper::{binary, list, map, NIF_TERM};

pub fn term_from_value<'b>(v: AnyValue, env: Env<'b>) -> Term<'b> {
match v {
AnyValue::Null => None::<bool>.encode(env),
AnyValue::Boolean(v) => Some(v).encode(env),
AnyValue::Utf8(v) => Some(v).encode(env),
AnyValue::Int8(v) => Some(v).encode(env),
AnyValue::Int16(v) => Some(v).encode(env),
AnyValue::Int32(v) => Some(v).encode(env),
AnyValue::Int64(v) => Some(v).encode(env),
AnyValue::UInt8(v) => Some(v).encode(env),
AnyValue::UInt16(v) => Some(v).encode(env),
AnyValue::UInt32(v) => Some(v).encode(env),
AnyValue::UInt64(v) => Some(v).encode(env),
AnyValue::Float64(v) => Some(v).encode(env),
AnyValue::Float32(v) => Some(v).encode(env),
AnyValue::Date(v) => encode_date(v, env),
AnyValue::Datetime(v, time_unit, None) => encode_datetime(v, time_unit, env),
dt => panic!("get/2 not implemented for {:?}", dt),
}
}

// ExSeriesRef encoding
// Encoding helpers

// TODO: Implement this as a regular function or encapsulate it inside Rustler.
macro_rules! unsafe_iterator_to_list {
Expand Down Expand Up @@ -208,18 +186,23 @@ fn encode_datetime_series<'b>(s: &Series, time_unit: TimeUnit, env: Env<'b>) ->
}

#[inline]
fn encode_utf8_series<'b>(s: &Series, env: Env<'b>) -> Term<'b> {
fn encode_utf8_series<'b>(
resource: &ResourceArc<ExSeriesRef>,
s: &Series,
env: Env<'b>,
) -> Term<'b> {
let utf8 = s.utf8().unwrap();
let nil_atom = atom::nil().to_term(env);
let env_as_c_arg = env.as_c_arg();
let nil_as_c_arg = atom::nil().to_term(env).as_c_arg();
let acc = unsafe { list::make_list(env_as_c_arg, &[]) };

let list = utf8.downcast_iter().rfold(acc, |acc, array| {
// Create a binary per array buffer
let values = array.values();
let mut new_binary = NewBinary::new(env, values.len());
new_binary.copy_from_slice(values.as_slice());
let binary: Binary = new_binary.into();

let binary = unsafe { resource.make_binary_unsafe(env, |_| values) }
.to_term(env)
.as_c_arg();

// Offsets have one more element than values and validity,
// so we read the last one as the initial accumulator and skip it.
Expand All @@ -235,17 +218,16 @@ fn encode_utf8_series<'b>(s: &Series, env: Env<'b>) -> Term<'b> {
iter.rfold(acc, |acc, uncast_offset| {
let offset = *uncast_offset as NIF_TERM;

let term = if validity_iter.next_back().unwrap_or(true) {
binary
.make_subbinary(offset, last_offset - offset)
.unwrap()
.to_term(env)
let term_as_c_arg = if validity_iter.next_back().unwrap_or(true) {
unsafe {
binary::make_subbinary(env_as_c_arg, binary, offset, last_offset - offset)
}
} else {
nil_atom
nil_as_c_arg
};

last_offset = offset;
unsafe { list::make_list_cell(env_as_c_arg, term.as_c_arg(), acc) }
unsafe { list::make_list_cell(env_as_c_arg, term_as_c_arg, acc) }
})
});

Expand Down Expand Up @@ -315,23 +297,45 @@ macro_rules! encode_list {
};
}

impl Encoder for ExSeriesRef {
fn encode<'b>(&self, env: Env<'b>) -> Term<'b> {
let s = &self.0;
match s.dtype() {
DataType::Boolean => encode!(s, env, bool),
DataType::Int32 => encode!(s, env, i32),
DataType::Int64 => encode!(s, env, i64),
DataType::UInt8 => encode!(s, env, u8),
DataType::UInt32 => encode!(s, env, u32),
DataType::Utf8 => encode_utf8_series(s, env),
DataType::Float64 => encode_float64_series(s, env),
DataType::Date => encode_date_series(s, env),
DataType::Datetime(time_unit, None) => encode_datetime_series(s, *time_unit, env),
DataType::List(t) if t as &DataType == &DataType::UInt32 => {
encode_list!(s, env, u32, u32)
}
dt => panic!("to_list/1 not implemented for {:?}", dt),
// API

pub fn term_from_value<'b>(v: AnyValue, env: Env<'b>) -> Term<'b> {
match v {
AnyValue::Null => None::<bool>.encode(env),
AnyValue::Boolean(v) => Some(v).encode(env),
AnyValue::Utf8(v) => Some(v).encode(env),
AnyValue::Int8(v) => Some(v).encode(env),
AnyValue::Int16(v) => Some(v).encode(env),
AnyValue::Int32(v) => Some(v).encode(env),
AnyValue::Int64(v) => Some(v).encode(env),
AnyValue::UInt8(v) => Some(v).encode(env),
AnyValue::UInt16(v) => Some(v).encode(env),
AnyValue::UInt32(v) => Some(v).encode(env),
AnyValue::UInt64(v) => Some(v).encode(env),
AnyValue::Float64(v) => Some(v).encode(env),
AnyValue::Float32(v) => Some(v).encode(env),
AnyValue::Date(v) => encode_date(v, env),
AnyValue::Datetime(v, time_unit, None) => encode_datetime(v, time_unit, env),
dt => panic!("get/2 not implemented for {:?}", dt),
}
}

pub fn list_from_series(data: ExSeries, env: Env) -> Term {
let s = &data.resource.0;

match s.dtype() {
DataType::Boolean => encode!(s, env, bool),
DataType::Int32 => encode!(s, env, i32),
DataType::Int64 => encode!(s, env, i64),
DataType::UInt8 => encode!(s, env, u8),
DataType::UInt32 => encode!(s, env, u32),
DataType::Utf8 => encode_utf8_series(&data.resource, s, env),
DataType::Float64 => encode_float64_series(s, env),
DataType::Date => encode_date_series(s, env),
DataType::Datetime(time_unit, None) => encode_datetime_series(s, *time_unit, env),
DataType::List(t) if t as &DataType == &DataType::UInt32 => {
encode_list!(s, env, u32, u32)
}
dt => panic!("to_list/1 not implemented for {:?}", dt),
}
}
10 changes: 4 additions & 6 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::{
datatypes::{ExDate, ExDateTime},
encoding::term_from_value,
ExDataFrame, ExSeries, ExSeriesRef, ExplorerError,
encoding, ExDataFrame, ExSeries, ExplorerError,
};

use polars::prelude::*;
Expand Down Expand Up @@ -443,8 +442,7 @@ pub fn rolling_opts(

#[rustler::nif(schedule = "DirtyCpu")]
pub fn s_to_list(env: Env, data: ExSeries) -> Result<Term, ExplorerError> {
let s = ExSeriesRef(data.resource.0.clone());
Ok(s.encode(env))
Ok(encoding::list_from_series(data, env))
}

#[rustler::nif(schedule = "DirtyCpu")]
Expand Down Expand Up @@ -574,7 +572,7 @@ pub fn s_std(env: Env, data: ExSeries) -> Result<Term, ExplorerError> {
#[rustler::nif]
pub fn s_get(env: Env, data: ExSeries, idx: usize) -> Result<Term, ExplorerError> {
let s = &data.resource.0;
Ok(term_from_value(s.get(idx), env))
Ok(encoding::term_from_value(s.get(idx), env))
}

#[rustler::nif(schedule = "DirtyCpu")]
Expand Down Expand Up @@ -616,7 +614,7 @@ pub fn s_quantile<'a>(
Some(microseconds) => Ok(ExDateTime::from(microseconds as i64).encode(env)),
}
}
_ => Ok(term_from_value(
_ => Ok(encoding::term_from_value(
s.quantile_as_series(quantile, strategy)?
.cast(dtype)?
.get(0),
Expand Down