From fb926a4ff9f84fcfb5c853b6f3cb5d2d11bdf916 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Sun, 30 Jul 2023 16:20:19 +0100 Subject: [PATCH] Configurable Duration Display (#4581) * Make FormatOptions const (#4580) * Add non-ISO duration display (#4554) * Review feedback --- arrow-cast/src/cast.rs | 10 ++ arrow-cast/src/display.rs | 260 ++++++++++++++++++++++++++++++++------ 2 files changed, 234 insertions(+), 36 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 2ee8c51b0aa6..e7ca2d0ed4ca 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -8959,4 +8959,14 @@ mod tests { assert_eq!(formatted.value(0).to_string(), "[[1], [2], [3]]"); assert_eq!(formatted.value(1).to_string(), "[[4], [null], [6]]"); } + + const CAST_OPTIONS: CastOptions<'static> = CastOptions { + safe: true, + format_options: FormatOptions::new(), + }; + + #[test] + fn test_const_options() { + assert!(CAST_OPTIONS.safe) + } } diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 07e78f8984f9..b373891ecb2a 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -34,6 +34,16 @@ use lexical_core::FormattedSize; type TimeFormat<'a> = Option<&'a str>; +/// Format for displaying durations +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum DurationFormat { + /// ISO 8601 - `P198DT72932.972880S` + ISO8601, + /// A human readable representation - `198 days 16 hours 34 mins 15.407810000 secs` + Pretty, +} + /// Options for formatting arrays /// /// By default nulls are formatted as `""` and temporal types formatted @@ -56,10 +66,18 @@ pub struct FormatOptions<'a> { timestamp_tz_format: TimeFormat<'a>, /// Time format for time arrays time_format: TimeFormat<'a>, + /// Duration format + duration_format: DurationFormat, } impl<'a> Default for FormatOptions<'a> { fn default() -> Self { + Self::new() + } +} + +impl<'a> FormatOptions<'a> { + pub const fn new() -> Self { Self { safe: true, null: "", @@ -68,14 +86,13 @@ impl<'a> Default for FormatOptions<'a> { timestamp_format: None, timestamp_tz_format: None, time_format: None, + duration_format: DurationFormat::ISO8601, } } -} -impl<'a> FormatOptions<'a> { /// If set to `true` any formatting errors will be written to the output /// instead of being converted into a [`std::fmt::Error`] - pub fn with_display_error(mut self, safe: bool) -> Self { + pub const fn with_display_error(mut self, safe: bool) -> Self { self.safe = safe; self } @@ -83,12 +100,12 @@ impl<'a> FormatOptions<'a> { /// Overrides the string used to represent a null /// /// Defaults to `""` - pub fn with_null(self, null: &'a str) -> Self { + pub const fn with_null(self, null: &'a str) -> Self { Self { null, ..self } } /// Overrides the format used for [`DataType::Date32`] columns - pub fn with_date_format(self, date_format: Option<&'a str>) -> Self { + pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self { Self { date_format, ..self @@ -96,7 +113,7 @@ impl<'a> FormatOptions<'a> { } /// Overrides the format used for [`DataType::Date64`] columns - pub fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self { + pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self { Self { datetime_format, ..self @@ -104,7 +121,7 @@ impl<'a> FormatOptions<'a> { } /// Overrides the format used for [`DataType::Timestamp`] columns without a timezone - pub fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self { + pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self { Self { timestamp_format, ..self @@ -112,7 +129,10 @@ impl<'a> FormatOptions<'a> { } /// Overrides the format used for [`DataType::Timestamp`] columns with a timezone - pub fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self { + pub const fn with_timestamp_tz_format( + self, + timestamp_tz_format: Option<&'a str>, + ) -> Self { Self { timestamp_tz_format, ..self @@ -120,12 +140,22 @@ impl<'a> FormatOptions<'a> { } /// Overrides the format used for [`DataType::Time32`] and [`DataType::Time64`] columns - pub fn with_time_format(self, time_format: Option<&'a str>) -> Self { + pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self { Self { time_format, ..self } } + + /// Overrides the format used for duration columns + /// + /// Defaults to [`DurationFormat::ISO8601`] + pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self { + Self { + duration_format, + ..self + } + } } /// Implements [`Display`] for a specific array value @@ -534,20 +564,82 @@ temporal_display!(time64us_to_time, time_format, Time64MicrosecondType); temporal_display!(time64ns_to_time, time_format, Time64NanosecondType); macro_rules! duration_display { - ($convert:ident, $t:ty) => { - impl<'a> DisplayIndex for &'a PrimitiveArray<$t> { - fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { - write!(f, "{}", $convert(self.value(idx)))?; + ($convert:ident, $t:ty, $scale:tt) => { + impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> { + type State = DurationFormat; + + fn prepare( + &self, + options: &FormatOptions<'a>, + ) -> Result { + Ok(options.duration_format) + } + + fn write( + &self, + fmt: &Self::State, + idx: usize, + f: &mut dyn Write, + ) -> FormatResult { + let v = self.value(idx); + match fmt { + DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?, + DurationFormat::Pretty => duration_fmt!(f, v, $scale)?, + } Ok(()) } } }; } -duration_display!(duration_s_to_duration, DurationSecondType); -duration_display!(duration_ms_to_duration, DurationMillisecondType); -duration_display!(duration_us_to_duration, DurationMicrosecondType); -duration_display!(duration_ns_to_duration, DurationNanosecondType); +macro_rules! duration_fmt { + ($f:ident, $v:expr, 0) => {{ + let secs = $v; + let mins = secs / 60; + let hours = mins / 60; + let days = hours / 24; + + let secs = secs - (mins * 60); + let mins = mins - (hours * 60); + write!($f, "{days} days {hours} hours {mins} mins {secs} secs") + }}; + ($f:ident, $v:expr, $scale:tt) => {{ + let subsec = $v; + let secs = subsec / 10_i64.pow($scale); + let mins = secs / 60; + let hours = mins / 60; + let days = hours / 24; + + let subsec = subsec - (secs * 10_i64.pow($scale)); + let secs = secs - (mins * 60); + let mins = mins - (hours * 60); + match subsec.is_negative() { + true => { + write!( + $f, + concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"), + days, + hours, + mins, + secs.abs(), + subsec.abs() + ) + } + false => { + write!( + $f, + concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"), + days, hours, mins, secs, subsec + ) + } + } + }}; +} + +duration_display!(duration_s_to_duration, DurationSecondType, 0); +duration_display!(duration_ms_to_duration, DurationMillisecondType, 3); +duration_display!(duration_us_to_duration, DurationMicrosecondType, 6); +duration_display!(duration_ns_to_duration, DurationNanosecondType, 9); impl<'a> DisplayIndex for &'a PrimitiveArray { fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { @@ -866,8 +958,18 @@ pub fn lexical_to_string(n: N) -> String { mod tests { use super::*; + /// Test to verify options can be constant. See #4580 + const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new() + .with_date_format(Some("foo")) + .with_timestamp_format(Some("404")); + + #[test] + fn test_const_options() { + assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo")); + } + #[test] - fn test_map_arry_to_string() { + fn test_map_array_to_string() { let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"]; let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]); @@ -887,25 +989,111 @@ mod tests { ); } + fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec { + let fmt = ArrayFormatter::try_new(array, fmt).unwrap(); + (0..array.len()).map(|x| fmt.value(x).to_string()).collect() + } + #[test] fn test_array_value_to_string_duration() { - let ns_array = DurationNanosecondArray::from(vec![Some(1), None]); - assert_eq!( - array_value_to_string(&ns_array, 0).unwrap(), - "PT0.000000001S" - ); - assert_eq!(array_value_to_string(&ns_array, 1).unwrap(), ""); - - let us_array = DurationMicrosecondArray::from(vec![Some(1), None]); - assert_eq!(array_value_to_string(&us_array, 0).unwrap(), "PT0.000001S"); - assert_eq!(array_value_to_string(&us_array, 1).unwrap(), ""); - - let ms_array = DurationMillisecondArray::from(vec![Some(1), None]); - assert_eq!(array_value_to_string(&ms_array, 0).unwrap(), "PT0.001S"); - assert_eq!(array_value_to_string(&ms_array, 1).unwrap(), ""); - - let s_array = DurationSecondArray::from(vec![Some(1), None]); - assert_eq!(array_value_to_string(&s_array, 0).unwrap(), "PT1S"); - assert_eq!(array_value_to_string(&s_array, 1).unwrap(), ""); + let iso_fmt = FormatOptions::new(); + let pretty_fmt = + FormatOptions::new().with_duration_format(DurationFormat::Pretty); + + let array = DurationNanosecondArray::from(vec![ + 1, + -1, + 1000, + -1000, + (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789, + -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789, + ]); + let iso = format_array(&array, &iso_fmt); + let pretty = format_array(&array, &pretty_fmt); + + assert_eq!(iso[0], "PT0.000000001S"); + assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs"); + assert_eq!(iso[1], "-PT0.000000001S"); + assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs"); + assert_eq!(iso[2], "PT0.000001S"); + assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs"); + assert_eq!(iso[3], "-PT0.000001S"); + assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs"); + assert_eq!(iso[4], "P45DT50554.123456789S"); + assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123456789 secs"); + assert_eq!(iso[5], "-P45DT50554.123456789S"); + assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123456789 secs"); + + let array = DurationMicrosecondArray::from(vec![ + 1, + -1, + 1000, + -1000, + (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456, + -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456, + ]); + let iso = format_array(&array, &iso_fmt); + let pretty = format_array(&array, &pretty_fmt); + + assert_eq!(iso[0], "PT0.000001S"); + assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs"); + assert_eq!(iso[1], "-PT0.000001S"); + assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs"); + assert_eq!(iso[2], "PT0.001S"); + assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs"); + assert_eq!(iso[3], "-PT0.001S"); + assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs"); + assert_eq!(iso[4], "P45DT50554.123456S"); + assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123456 secs"); + assert_eq!(iso[5], "-P45DT50554.123456S"); + assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123456 secs"); + + let array = DurationMillisecondArray::from(vec![ + 1, + -1, + 1000, + -1000, + (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123, + -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123, + ]); + let iso = format_array(&array, &iso_fmt); + let pretty = format_array(&array, &pretty_fmt); + + assert_eq!(iso[0], "PT0.001S"); + assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs"); + assert_eq!(iso[1], "-PT0.001S"); + assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs"); + assert_eq!(iso[2], "PT1S"); + assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs"); + assert_eq!(iso[3], "-PT1S"); + assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs"); + assert_eq!(iso[4], "P45DT50554.123S"); + assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34.123 secs"); + assert_eq!(iso[5], "-P45DT50554.123S"); + assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34.123 secs"); + + let array = DurationSecondArray::from(vec![ + 1, + -1, + 1000, + -1000, + 45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34, + -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34, + ]); + let iso = format_array(&array, &iso_fmt); + let pretty = format_array(&array, &pretty_fmt); + + assert_eq!(iso[0], "PT1S"); + assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs"); + assert_eq!(iso[1], "-PT1S"); + assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs"); + assert_eq!(iso[2], "PT1000S"); + assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs"); + assert_eq!(iso[3], "-PT1000S"); + assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs"); + assert_eq!(iso[4], "P45DT50554S"); + assert_eq!(pretty[4], "45 days 1094 hours 2 mins 34 secs"); + assert_eq!(iso[5], "-P45DT50554S"); + assert_eq!(pretty[5], "-45 days -1094 hours -2 mins -34 secs"); } }