Skip to content

Commit

Permalink
preserve stucture of timezone designation list
Browse files Browse the repository at this point in the history
Signed-off-by: Petros Angelatos <[email protected]>
  • Loading branch information
petrosagg committed Sep 23, 2024
1 parent 6eebd88 commit d4b9bd0
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 97 deletions.
14 changes: 14 additions & 0 deletions utils/tzif/src/data/tzif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,17 @@ pub struct DataBlock {
/// A series of [`UtLocalIndicator`] objects.
pub ut_local_indicators: Vec<UtLocalIndicator>,
}

impl DataBlock {
/// Retrieves the timezone designation at index `idx`.
pub fn time_zone_designation(&self, mut idx: usize) -> Option<&str> {
self.time_zone_designations.iter().find_map(|d| {
if idx <= d.len() {
Some(&d[idx..])
} else {
idx -= d.len() + 1;
None
}
})
}
}
161 changes: 64 additions & 97 deletions utils/tzif/src/parse/tzif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,65 +376,40 @@ where
count_min_max(typecnt, typecnt, local_time_type_record(charcnt))
}

/// A series of bytes constituting an array of
/// NUL-terminated (0x00) time zone designation strings. The total
/// number of bytes is specified by the "charcnt" field in the header.
fn raw_time_zone_designations<Input>(charcnt: usize) -> impl Parser<Input, Output = String>
where
Input: Stream<Token = u8>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
count_min_max(charcnt, charcnt, any())
.map(|bytes: Vec<u8>| String::from_utf8_lossy(&bytes).into_owned())
}

/// A series of bytes constituting an array of
/// NUL-terminated (0x00) time zone designation strings. The total
/// number of bytes is specified by the "charcnt" field in the
/// header.
///
/// Splits each designation into a vector of [`String`] where each string
/// starts at an index defined by a local time type record and ends at a
/// NUL-terminator (0x00)
/// Splits the list of bytes by the NULL-terminator (0x00) character
/// and puts each designation into a [`String`].
///
/// > e.g.
/// > ```text
/// > "LMT\u{0}HMT\u{0}MMT\u{0}IST\u{0}+0630\u{0}"
/// > ```
///
/// Note that two designations MAY overlap if one is a suffix
/// of the other. The character encoding of time zone designation
/// strings is not specified.
/// Note that a local time record index might point in the middle of a
/// designation. In that case the record's designation is the specified
/// suffix. The [DataBlock::time_zone_designation] method can be used to
/// access the correct designation string given an index.
///
/// The character encoding of time zone designation strings is not specified.
/// However, time zone designations SHOULD consist of at least three (3) and no
/// more than six (6) ASCII characters from the set of alphanumerics,
/// '-', and '+'. This is for compatibility with POSIX requirements
/// for time zone abbreviations, so this parser enforces a UTF-8 ASCII encoding,
/// to ensure compatability with Rust strings.
fn time_zone_designations<Input>(
charcnt: usize,
local_time_type_records: Vec<LocalTimeTypeRecord>,
) -> impl Parser<Input, Output = Vec<String>>
fn time_zone_designations<Input>(charcnt: usize) -> impl Parser<Input, Output = Vec<String>>
where
Input: Stream<Token = u8>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
raw_time_zone_designations(charcnt).map(move |raw_time_zone_designations| {
let mut time_zone_designations = Vec::with_capacity(local_time_type_records.len());
for record in &local_time_type_records {
for end_idx in record.idx..charcnt {
if raw_time_zone_designations.as_bytes()[end_idx] == b'\0' {
time_zone_designations.push(
String::from_utf8_lossy(
raw_time_zone_designations[record.idx..end_idx].as_bytes(),
)
.into_owned(),
);
break;
}
}
}
time_zone_designations
count_min_max(charcnt, charcnt, any()).map(|bytes: Vec<u8>| {
bytes
.split_inclusive(|b| *b == 0)
.map(|s| String::from_utf8_lossy(&s[0..s.len() - 1]).into_owned())
.collect()
})
}

Expand Down Expand Up @@ -678,45 +653,17 @@ where
Input: Stream<Token = u8>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
(
historic_transition_times::<V, _>(header.timecnt),
transition_types(header.timecnt, header.typecnt),
local_time_type_records(header.typecnt, header.charcnt),
)
.then(
move |(transition_times, transition_types, local_time_type_records)| {
(
value(transition_times),
value(transition_types),
value(local_time_type_records.clone()),
time_zone_designations(header.charcnt, local_time_type_records),
leap_second_records::<V, _>(header.leapcnt),
standard_wall_indicators(header.isstdcnt),
)
},
)
.then(
move |(
transition_times,
transition_types,
local_time_type_records,
time_zone_designations,
leap_second_records,
standard_wall_indicators,
)| {
combine::struct_parser! {
DataBlock {
transition_times: value(transition_times),
transition_types: value(transition_types),
local_time_type_records: value(local_time_type_records),
time_zone_designations: value(time_zone_designations),
leap_second_records: value(leap_second_records),
standard_wall_indicators: value(standard_wall_indicators),
ut_local_indicators: ut_local_indicators(header.isutcnt),
}
}
},
)
combine::struct_parser! {
DataBlock {
transition_times: historic_transition_times::<V, _>(header.timecnt),
transition_types: transition_types(header.timecnt, header.typecnt),
local_time_type_records: local_time_type_records(header.typecnt, header.charcnt),
time_zone_designations: time_zone_designations(header.charcnt),
leap_second_records: leap_second_records::<V, _>(header.leapcnt),
standard_wall_indicators: standard_wall_indicators(header.isstdcnt),
ut_local_indicators: ut_local_indicators(header.isutcnt),
}
}
}

/// Parses a `TZif` footer.
Expand Down Expand Up @@ -1196,31 +1143,51 @@ mod test {
#[test]
fn parse_time_zone_designations() {
assert_parse_eq!(
time_zone_designations(
14,
vec![
LocalTimeTypeRecord {
utoff: Seconds(35356),
is_dst: false,
idx: 0,
},
LocalTimeTypeRecord {
utoff: Seconds(39600),
is_dst: true,
idx: 4,
},
LocalTimeTypeRecord {
utoff: Seconds(36000),
is_dst: false,
idx: 9,
},
]
),
time_zone_designations(14),
"LMT\0AEDT\0AEST\0",
vec!["LMT".to_owned(), "AEDT".to_owned(), "AEST".to_owned()],
);
}

#[test]
fn time_zone_designation_indexing() {
let block: &[u8] = &[
0x00, 0x00, 0x00, 0x10, 0x01, 0x00, // local time record 0
0x00, 0x00, 0x00, 0x10, 0x01, 0x03, // local time record 1
0x00, 0x00, 0x00, 0x10, 0x01, 0x04, // local time record 2
0x00, 0x00, 0x00, 0x10, 0x01, 0x05, // local time record 3
b'L', b'M', b'T', 0x00, b'A', b'E', b'D', b'T', 0x00, // timezone designations
];
let header = TzifHeader {
version: 0,
isutcnt: 0,
isstdcnt: 0,
leapcnt: 0,
timecnt: 0,
typecnt: 4,
charcnt: 9,
};
let (block, _) = data_block::<1, _>(header).parse(block).unwrap();
assert_eq!(
block.time_zone_designation(block.local_time_type_records[0].idx),
Some("LMT")
);
assert_eq!(
block.time_zone_designation(block.local_time_type_records[1].idx),
Some("")
);
assert_eq!(
block.time_zone_designation(block.local_time_type_records[2].idx),
Some("AEDT")
);
assert_eq!(
block.time_zone_designation(block.local_time_type_records[3].idx),
Some("EDT")
);
assert_eq!(block.time_zone_designation(8), Some(""));
assert_eq!(block.time_zone_designation(9), None);
}

#[test]
fn parse_leap_second_occurrence() {
const FIVE: &[u8] = 5i64.to_be_bytes().as_slice();
Expand Down

0 comments on commit d4b9bd0

Please sign in to comment.