Skip to content

Commit

Permalink
Configurable Duration Display (#4581)
Browse files Browse the repository at this point in the history
* Make FormatOptions const (#4580)

* Add non-ISO duration display (#4554)

* Review feedback
  • Loading branch information
tustvold authored Jul 30, 2023
1 parent 1e0f02f commit fb926a4
Show file tree
Hide file tree
Showing 2 changed files with 234 additions and 36 deletions.
10 changes: 10 additions & 0 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8959,4 +8959,14 @@ mod tests {
assert_eq!(formatted.value(0).to_string(), "[[1], [2], [3]]");
assert_eq!(formatted.value(1).to_string(), "[[4], [null], [6]]");
}

const CAST_OPTIONS: CastOptions<'static> = CastOptions {
safe: true,
format_options: FormatOptions::new(),
};

#[test]
fn test_const_options() {
assert!(CAST_OPTIONS.safe)
}
}
260 changes: 224 additions & 36 deletions arrow-cast/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ use lexical_core::FormattedSize;

type TimeFormat<'a> = Option<&'a str>;

/// Format for displaying durations
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum DurationFormat {
/// ISO 8601 - `P198DT72932.972880S`
ISO8601,
/// A human readable representation - `198 days 16 hours 34 mins 15.407810000 secs`
Pretty,
}

/// Options for formatting arrays
///
/// By default nulls are formatted as `""` and temporal types formatted
Expand All @@ -56,10 +66,18 @@ pub struct FormatOptions<'a> {
timestamp_tz_format: TimeFormat<'a>,
/// Time format for time arrays
time_format: TimeFormat<'a>,
/// Duration format
duration_format: DurationFormat,
}

impl<'a> Default for FormatOptions<'a> {
fn default() -> Self {
Self::new()
}
}

impl<'a> FormatOptions<'a> {
pub const fn new() -> Self {
Self {
safe: true,
null: "",
Expand All @@ -68,64 +86,76 @@ impl<'a> Default for FormatOptions<'a> {
timestamp_format: None,
timestamp_tz_format: None,
time_format: None,
duration_format: DurationFormat::ISO8601,
}
}
}

impl<'a> FormatOptions<'a> {
/// If set to `true` any formatting errors will be written to the output
/// instead of being converted into a [`std::fmt::Error`]
pub fn with_display_error(mut self, safe: bool) -> Self {
pub const fn with_display_error(mut self, safe: bool) -> Self {
self.safe = safe;
self
}

/// Overrides the string used to represent a null
///
/// Defaults to `""`
pub fn with_null(self, null: &'a str) -> Self {
pub const fn with_null(self, null: &'a str) -> Self {
Self { null, ..self }
}

/// Overrides the format used for [`DataType::Date32`] columns
pub fn with_date_format(self, date_format: Option<&'a str>) -> Self {
pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
Self {
date_format,
..self
}
}

/// Overrides the format used for [`DataType::Date64`] columns
pub fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
Self {
datetime_format,
..self
}
}

/// Overrides the format used for [`DataType::Timestamp`] columns without a timezone
pub fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
Self {
timestamp_format,
..self
}
}

/// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
pub fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
pub const fn with_timestamp_tz_format(
self,
timestamp_tz_format: Option<&'a str>,
) -> Self {
Self {
timestamp_tz_format,
..self
}
}

/// Overrides the format used for [`DataType::Time32`] and [`DataType::Time64`] columns
pub fn with_time_format(self, time_format: Option<&'a str>) -> Self {
pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
Self {
time_format,
..self
}
}

/// Overrides the format used for duration columns
///
/// Defaults to [`DurationFormat::ISO8601`]
pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self {
Self {
duration_format,
..self
}
}
}

/// Implements [`Display`] for a specific array value
Expand Down Expand Up @@ -534,20 +564,82 @@ temporal_display!(time64us_to_time, time_format, Time64MicrosecondType);
temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);

macro_rules! duration_display {
($convert:ident, $t:ty) => {
impl<'a> DisplayIndex for &'a PrimitiveArray<$t> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
write!(f, "{}", $convert(self.value(idx)))?;
($convert:ident, $t:ty, $scale:tt) => {
impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
type State = DurationFormat;

fn prepare(
&self,
options: &FormatOptions<'a>,
) -> Result<Self::State, ArrowError> {
Ok(options.duration_format)
}

fn write(
&self,
fmt: &Self::State,
idx: usize,
f: &mut dyn Write,
) -> FormatResult {
let v = self.value(idx);
match fmt {
DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
}
Ok(())
}
}
};
}

duration_display!(duration_s_to_duration, DurationSecondType);
duration_display!(duration_ms_to_duration, DurationMillisecondType);
duration_display!(duration_us_to_duration, DurationMicrosecondType);
duration_display!(duration_ns_to_duration, DurationNanosecondType);
macro_rules! duration_fmt {
($f:ident, $v:expr, 0) => {{
let secs = $v;
let mins = secs / 60;
let hours = mins / 60;
let days = hours / 24;

let secs = secs - (mins * 60);
let mins = mins - (hours * 60);
write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
}};
($f:ident, $v:expr, $scale:tt) => {{
let subsec = $v;
let secs = subsec / 10_i64.pow($scale);
let mins = secs / 60;
let hours = mins / 60;
let days = hours / 24;

let subsec = subsec - (secs * 10_i64.pow($scale));
let secs = secs - (mins * 60);
let mins = mins - (hours * 60);
match subsec.is_negative() {
true => {
write!(
$f,
concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"),
days,
hours,
mins,
secs.abs(),
subsec.abs()
)
}
false => {
write!(
$f,
concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"),
days, hours, mins, secs, subsec
)
}
}
}};
}

duration_display!(duration_s_to_duration, DurationSecondType, 0);
duration_display!(duration_ms_to_duration, DurationMillisecondType, 3);
duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);

impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalYearMonthType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
Expand Down Expand Up @@ -866,8 +958,18 @@ pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
mod tests {
use super::*;

/// Test to verify options can be constant. See #4580
const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
.with_date_format(Some("foo"))
.with_timestamp_format(Some("404"));

#[test]
fn test_const_options() {
assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
}

#[test]
fn test_map_arry_to_string() {
fn test_map_array_to_string() {
let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);

Expand All @@ -887,25 +989,111 @@ mod tests {
);
}

fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
(0..array.len()).map(|x| fmt.value(x).to_string()).collect()
}

#[test]
fn test_array_value_to_string_duration() {
let ns_array = DurationNanosecondArray::from(vec![Some(1), None]);
assert_eq!(
array_value_to_string(&ns_array, 0).unwrap(),
"PT0.000000001S"
);
assert_eq!(array_value_to_string(&ns_array, 1).unwrap(), "");

let us_array = DurationMicrosecondArray::from(vec![Some(1), None]);
assert_eq!(array_value_to_string(&us_array, 0).unwrap(), "PT0.000001S");
assert_eq!(array_value_to_string(&us_array, 1).unwrap(), "");

let ms_array = DurationMillisecondArray::from(vec![Some(1), None]);
assert_eq!(array_value_to_string(&ms_array, 0).unwrap(), "PT0.001S");
assert_eq!(array_value_to_string(&ms_array, 1).unwrap(), "");

let s_array = DurationSecondArray::from(vec![Some(1), None]);
assert_eq!(array_value_to_string(&s_array, 0).unwrap(), "PT1S");
assert_eq!(array_value_to_string(&s_array, 1).unwrap(), "");
let iso_fmt = FormatOptions::new();
let pretty_fmt =
FormatOptions::new().with_duration_format(DurationFormat::Pretty);

let array = DurationNanosecondArray::from(vec![
1,
-1,
1000,
-1000,
(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789,
-(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789,
]);
let iso = format_array(&array, &iso_fmt);
let pretty = format_array(&array, &pretty_fmt);

assert_eq!(iso[0], "PT0.000000001S");
assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
assert_eq!(iso[1], "-PT0.000000001S");
assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
assert_eq!(iso[2], "PT0.000001S");
assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
assert_eq!(iso[3], "-PT0.000001S");
assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
assert_eq!(iso[4], "P45DT50554.123456789S");
assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123456789 secs");
assert_eq!(iso[5], "-P45DT50554.123456789S");
assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123456789 secs");

let array = DurationMicrosecondArray::from(vec![
1,
-1,
1000,
-1000,
(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456,
-(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456,
]);
let iso = format_array(&array, &iso_fmt);
let pretty = format_array(&array, &pretty_fmt);

assert_eq!(iso[0], "PT0.000001S");
assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
assert_eq!(iso[1], "-PT0.000001S");
assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
assert_eq!(iso[2], "PT0.001S");
assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
assert_eq!(iso[3], "-PT0.001S");
assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
assert_eq!(iso[4], "P45DT50554.123456S");
assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123456 secs");
assert_eq!(iso[5], "-P45DT50554.123456S");
assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123456 secs");

let array = DurationMillisecondArray::from(vec![
1,
-1,
1000,
-1000,
(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
-(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
]);
let iso = format_array(&array, &iso_fmt);
let pretty = format_array(&array, &pretty_fmt);

assert_eq!(iso[0], "PT0.001S");
assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
assert_eq!(iso[1], "-PT0.001S");
assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
assert_eq!(iso[2], "PT1S");
assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
assert_eq!(iso[3], "-PT1S");
assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
assert_eq!(iso[4], "P45DT50554.123S");
assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123 secs");
assert_eq!(iso[5], "-P45DT50554.123S");
assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123 secs");

let array = DurationSecondArray::from(vec![
1,
-1,
1000,
-1000,
45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
-45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
]);
let iso = format_array(&array, &iso_fmt);
let pretty = format_array(&array, &pretty_fmt);

assert_eq!(iso[0], "PT1S");
assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
assert_eq!(iso[1], "-PT1S");
assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
assert_eq!(iso[2], "PT1000S");
assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
assert_eq!(iso[3], "-PT1000S");
assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
assert_eq!(iso[4], "P45DT50554S");
assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34 secs");
assert_eq!(iso[5], "-P45DT50554S");
assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34 secs");
}
}

0 comments on commit fb926a4

Please sign in to comment.