Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse timestamps with leap seconds (#3861) #3862

Merged
merged 2 commits into from
Mar 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,21 @@ impl TimestampParser {
///
/// Returning the end byte offset
fn time(&self) -> Option<(NaiveTime, usize)> {
// Make a NaiveTime handling leap seconds
let time = |hour, min, sec, nano| match sec {
60 => {
let nano = 1_000_000_000 + nano;
NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
}
_ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
};

match (self.mask >> 11) & 0b11111111 {
// 09:26:56
0b11011011 if self.test(13, b':') && self.test(16, b':') => {
let hour = self.digits[11] * 10 + self.digits[12];
let minute = self.digits[14] * 10 + self.digits[15];
let second = self.digits[17] * 10 + self.digits[18];
let time = NaiveTime::from_hms_opt(hour as _, minute as _, second as _)?;

match self.test(19, b'.') {
true => {
Expand All @@ -112,17 +120,17 @@ impl TimestampParser {
8 => parse_nanos::<8>(&self.digits[20..28]),
_ => parse_nanos::<9>(&self.digits[20..29]),
};
Some((time.with_nanosecond(nanos)?, 20 + digits as usize))
Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
}
false => Some((time, 19)),
false => Some((time(hour, minute, second, 0)?, 19)),
}
}
// 092656
0b111111 => {
let hour = self.digits[11] * 10 + self.digits[12];
let minute = self.digits[13] * 10 + self.digits[14];
let second = self.digits[15] * 10 + self.digits[16];
let time = NaiveTime::from_hms_opt(hour as _, minute as _, second as _)?;
let time = time(hour, minute, second, 0)?;
Some((time, 17))
}
_ => None,
Expand Down Expand Up @@ -188,7 +196,7 @@ pub fn string_to_datetime<T: TimeZone>(
return Ok(DateTime::from_local(date.and_time(time), offset));
}

if !parser.test(10, b'T') && !parser.test(10, b' ') {
if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
return Err(err("invalid timestamp separator"));
}

Expand Down Expand Up @@ -1009,6 +1017,14 @@ mod tests {
"2020-09-08T12:00:12.123456789123+02:00",
"2020-09-08T12:00:12.12345678912345Z",
"2020-09-08T12:00:12.1234567891234567+02:00",
"2020-09-08T12:00:60Z",
"2020-09-08T12:00:60.123Z",
"2020-09-08T12:00:60.123456+02:00",
"2020-09-08T12:00:60.1234567891234567+02:00",
"2020-09-08T12:00:60.999999999+02:00",
"2020-09-08t12:00:12.12345678+00:00",
"2020-09-08t12:00:12+00:00",
"2020-09-08t12:00:12Z",
];

for case in cases {
Expand Down