From ba4917a134a875d90004714e0223d8a9d863b95c Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Thu, 16 Mar 2023 08:45:50 -0400 Subject: [PATCH 1/7] Set up test cases for parse_with --- dateparser/src/lib.rs | 94 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/dateparser/src/lib.rs b/dateparser/src/lib.rs index 036653f..592cd28 100644 --- a/dateparser/src/lib.rs +++ b/dateparser/src/lib.rs @@ -828,4 +828,98 @@ mod tests { }; } } + + #[test] + fn parse_with() { + // Two sets of tests - one for EDT and one for EST + // TODO: add eastern hemisphere set of timezones? + + // Both will use these naive times + let midnight_naive = NaiveTime::from_hms_opt(0, 0, 0).unwrap(); + let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); + + // EDT + // Eastern Daylight Time is from (as of 2023) 2nd Sun in Mar to 1st Sun in Nov + // It is UTC -4 + let us_edt = &FixedOffset::west(4 * 3600); + + let edt_test_cases = vec![ + ("ymd", "2023-04-21"), + // ("ymd_z", "2023-04-21 EDT"), // not sure about this one + ("month_ymd", "2023-Apr-21"), + ("month_mdy", "April 21, 2023"), + ("month_dmy", "21 April 2023"), + ("slash_mdy", "04/21/23"), + ("slash_ymd", "2023/4/21"), + ("dot_mdy_or_ymd", "2023.04.21"), + // ( + // "chinese_ymd", + // "2014年04月08日", + // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), + // ), + ]; + + // test us_edt at midnight + let us_edt_midnight_as_utc = Utc.ymd(2023, 4, 21).and_hms(4, 0, 0); + + for &(test, input) in edt_test_cases.iter() { + assert_eq!( + super::parse_with(input, us_edt, midnight_naive).unwrap(), + us_edt_midnight_as_utc, + "parse_with/{test}/{input}", + ) + } + + // test us_edt at 23:59:59 - UTC will be one day ahead + let us_edt_before_midnight_as_utc = Utc.ymd(2023, 4, 22).and_hms(03, 59, 59); + for &(test, input) in edt_test_cases.iter() { + assert_eq!( + super::parse_with(input, us_edt, before_midnight_naive).unwrap(), + us_edt_before_midnight_as_utc, + "parse_with/{test}/{input}", + ) + } + + // EST + // Eastern Standard Time is from (as of 2023) 1st Sun in Nov to 2nd Sun in Mar + // It is UTC -5 + let us_est = &FixedOffset::west(5 * 3600); + + let est_test_cases = vec![ + ("ymd", "2023-12-21"), + // ("ymd_z", "2023-12-21 EST"), // not sure about this one + ("month_ymd", "2023-Dec-21"), + ("month_mdy", "December 21, 2023"), + ("month_dmy", "21 December 2023"), + ("slash_mdy", "12/21/23"), + ("slash_ymd", "2023/12/21"), + ("dot_mdy_or_ymd", "2023.12.21"), + // ( + // "chinese_ymd", + // "2014年04月08日", + // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), + // ), + ]; + + // test us_est at midnight + let us_est_midnight_as_utc = Utc.ymd(2023, 12, 21).and_hms(5, 0, 0); + + for &(test, input) in est_test_cases.iter() { + assert_eq!( + super::parse_with(input, us_est, midnight_naive).unwrap(), + us_est_midnight_as_utc, + "parse_with/{test}/{input}", + ) + } + + // test us_est at 23:59:59 - UTC will be one day ahead + let us_est_before_midnight_as_utc = Utc.ymd(2023, 12, 22).and_hms(04, 59, 59); + for &(test, input) in est_test_cases.iter() { + assert_eq!( + super::parse_with(input, us_est, before_midnight_naive).unwrap(), + us_est_before_midnight_as_utc, + "parse_with/{test}/{input}", + ) + } + } } From dccd51880b64f3570c988451f511c7f849658647 Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Sat, 18 Mar 2023 12:27:10 -0400 Subject: [PATCH 2/7] Update to 2021 rust ed & chrono 0.4.24 --- Cargo.lock | 132 ++++++++++++++++++++++++++++++++++++++++-- dateparser/Cargo.toml | 4 +- 2 files changed, 129 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f84db18..b2fb943 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" version = "1.0.40" @@ -117,6 +126,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + [[package]] name = "cfg-if" version = "0.1.10" @@ -131,14 +146,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.19" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ - "libc", + "iana-time-zone", + "js-sys", "num-integer", "num-traits", "time", + "wasm-bindgen", "winapi", ] @@ -217,6 +234,16 @@ dependencies = [ "syn", ] +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + [[package]] name = "colored" version = "2.0.0" @@ -245,6 +272,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + [[package]] name = "criterion" version = "0.3.6" @@ -348,6 +381,50 @@ dependencies = [ "memchr", ] +[[package]] +name = "cxx" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e599641dff337570f6aa9c304ecca92341d30bf72e1c50287869ed6a36615a6" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60e2434bc22249c056e12d2e87db46380730da0f2648471edea3e8e11834a892" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3894ad0c6d517cb5a4ce8ec20b37cd0ea31b480fe582a104c5db67ae21270853" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34fa7e395dc1c001083c7eed28c8f0f0b5a225610f3b6284675f444af6fab86b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dateparser" version = "0.1.8" @@ -456,6 +533,30 @@ dependencies = [ "libc", ] +[[package]] +name = "iana-time-zone" +version = "0.1.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "winapi", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + [[package]] name = "indexmap" version = "1.6.2" @@ -498,9 +599,18 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.94" +version = "0.2.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" + +[[package]] +name = "link-cplusplus" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] [[package]] name = "log" @@ -555,6 +665,12 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + [[package]] name = "oorandom" version = "11.1.3" @@ -865,6 +981,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scratch" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" + [[package]] name = "serde" version = "1.0.125" diff --git a/dateparser/Cargo.toml b/dateparser/Cargo.toml index 820643f..154b4d2 100644 --- a/dateparser/Cargo.toml +++ b/dateparser/Cargo.toml @@ -8,11 +8,11 @@ homepage = "https://github.com/waltzofpearls/belt/tree/main/dateparser" repository = "https://github.com/waltzofpearls/belt/tree/main/dateparser" keywords = ["date", "time", "datetime", "parser", "parse"] license = "MIT" -edition = "2018" +edition = "2021" [dependencies] anyhow = "1.0.40" -chrono = "0.4" +chrono = "0.4.24" lazy_static = "1.4.0" regex = "1.6.0" From 3a56dbf66f9bb309dd33bb5d6218128547c11534 Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Sat, 18 Mar 2023 13:52:27 -0400 Subject: [PATCH 3/7] Make default time an Option, fix doctests ymd_z() for parse_with still needs to be fixed - tests with it are failing and those are currently commented out. Ignore deprecation warnings for now - come back to that after finishing resolving issue ymd_z (as separate pull request). --- dateparser/src/datetime.rs | 212 ++++++++++++++++++++++++------------- dateparser/src/lib.rs | 171 +++++++++++++++++++++++------- 2 files changed, 273 insertions(+), 110 deletions(-) diff --git a/dateparser/src/datetime.rs b/dateparser/src/datetime.rs index 9a8fd7b..af73d46 100644 --- a/dateparser/src/datetime.rs +++ b/dateparser/src/datetime.rs @@ -1,3 +1,4 @@ +#![allow(deprecated)] use crate::timezone; use anyhow::{anyhow, Result}; use chrono::prelude::*; @@ -7,7 +8,7 @@ use regex::Regex; /// Parse struct has methods implemented parsers for accepted formats. pub struct Parse<'z, Tz2> { tz: &'z Tz2, - default_time: NaiveTime, + default_time: Option, } impl<'z, Tz2> Parse<'z, Tz2> @@ -16,7 +17,7 @@ where { /// Create a new instrance of [`Parse`] with a custom parsing timezone that handles the /// datetime string without time offset. - pub fn new(tz: &'z Tz2, default_time: NaiveTime) -> Self { + pub fn new(tz: &'z Tz2, default_time: Option) -> Self { Self { tz, default_time } } @@ -266,13 +267,21 @@ where if !RE.is_match(input) { return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%Y-%m-%d") .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -295,13 +304,19 @@ where if let Some(matched_tz) = caps.name("tz") { return match timezone::parse(matched_tz.as_str().trim()) { Ok(offset) => { - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(&offset); + // set time to use + let time = if let Some(v) = self.default_time { + Utc::now().date().and_time(v)?.with_timezone(&offset).time() + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(&offset) + .time() + }; NaiveDate::parse_from_str(input, "%Y-%m-%d %Z") .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| offset.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -385,14 +400,21 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%Y-%m-%d") .or_else(|_| NaiveDate::parse_from_str(input, "%Y-%b-%d")) .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -500,15 +522,22 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + let dt = input.replace(", ", " ").replace(". ", " "); NaiveDate::parse_from_str(&dt, "%B %d %y") .or_else(|_| NaiveDate::parse_from_str(&dt, "%B %d %Y")) .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -554,14 +583,21 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%d %B %y") .or_else(|_| NaiveDate::parse_from_str(input, "%d %B %Y")) .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -620,14 +656,21 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%m/%d/%y") .or_else(|_| NaiveDate::parse_from_str(input, "%m/%d/%Y")) .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -673,13 +716,20 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%Y/%m/%d") .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -700,16 +750,25 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%m.%d.%y") .or_else(|_| NaiveDate::parse_from_str(input, "%m.%d.%Y")) .or_else(|_| NaiveDate::parse_from_str(input, "%Y.%m.%d")) - .or_else(|_| NaiveDate::parse_from_str(&format!("{}.{}", input, now.day()), "%Y.%m.%d")) + .or_else(|_| { + NaiveDate::parse_from_str(&format!("{}.{}", input, Utc::now().day()), "%Y.%m.%d") + }) .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -761,13 +820,20 @@ where return None; } - let now = Utc::now() - .date() - .and_time(self.default_time)? - .with_timezone(self.tz); + // set time to use + let time = if let Some(v) = self.default_time { + v + } else { + Utc::now() + .date() + .and_time(Utc::now().time())? + .with_timezone(self.tz) + .time() + }; + NaiveDate::parse_from_str(input, "%Y年%m月%d日") .ok() - .map(|parsed| parsed.and_time(now.time())) + .map(|parsed| parsed.and_time(time)) .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) .map(|at_tz| at_tz.with_timezone(&Utc)) .map(Ok) @@ -780,7 +846,7 @@ mod tests { #[test] fn unix_timestamp() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ("0000000000", Utc.ymd(1970, 1, 1).and_hms(0, 0, 0)), @@ -814,7 +880,7 @@ mod tests { #[test] fn rfc3339() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -841,7 +907,7 @@ mod tests { #[test] fn rfc2822() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -868,7 +934,7 @@ mod tests { #[test] fn postgres_timestamp() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -910,7 +976,7 @@ mod tests { #[test] fn ymd_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ("2021-04-30 21:14", Utc.ymd(2021, 4, 30).and_hms(21, 14, 0)), @@ -953,7 +1019,7 @@ mod tests { #[test] fn ymd_hms_z() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1003,7 +1069,7 @@ mod tests { #[test] fn ymd() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, Some(Utc::now().time())); let test_cases = vec![( "2021-02-21", @@ -1029,7 +1095,7 @@ mod tests { #[test] fn ymd_z() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, Some(Utc::now().time())); let now_at_pst = Utc::now().with_timezone(&FixedOffset::west(8 * 3600)); let now_at_cst = Utc::now().with_timezone(&FixedOffset::east(8 * 3600)); @@ -1076,7 +1142,7 @@ mod tests { #[test] fn hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1106,7 +1172,7 @@ mod tests { #[test] fn hms_z() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let now_at_pst = Utc::now().with_timezone(&FixedOffset::west(8 * 3600)); let test_cases = vec![ @@ -1153,7 +1219,7 @@ mod tests { #[test] fn month_ymd() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![( "2021-Feb-21", @@ -1179,7 +1245,7 @@ mod tests { #[test] fn month_md_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1205,7 +1271,7 @@ mod tests { #[test] fn month_mdy_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1235,7 +1301,7 @@ mod tests { #[test] fn month_mdy_hms_z() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1269,7 +1335,7 @@ mod tests { #[test] fn month_mdy() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1317,7 +1383,7 @@ mod tests { #[test] fn month_dmy_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1344,7 +1410,7 @@ mod tests { #[test] fn month_dmy() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ("7 oct 70", Utc.ymd(1970, 10, 7).and_time(Utc::now().time())), @@ -1381,7 +1447,7 @@ mod tests { #[test] fn slash_mdy_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ("4/8/2014 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), @@ -1420,7 +1486,7 @@ mod tests { #[test] fn slash_mdy() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ( @@ -1454,7 +1520,7 @@ mod tests { #[test] fn slash_ymd_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ ("2014/4/8 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), @@ -1484,7 +1550,7 @@ mod tests { #[test] fn slash_ymd() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, Some(Utc::now().time())); let test_cases = vec![ ( @@ -1516,7 +1582,7 @@ mod tests { #[test] fn dot_mdy_or_ymd() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, Some(Utc::now().time())); let test_cases = vec![ // mm.dd.yyyy @@ -1560,7 +1626,7 @@ mod tests { #[test] fn mysql_log_timestamp() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![ // yymmdd hh:mm:ss mysql log @@ -1580,7 +1646,7 @@ mod tests { #[test] fn chinese_ymd_hms() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, None); let test_cases = vec![( "2014年04月08日11时25分18秒", @@ -1600,7 +1666,7 @@ mod tests { #[test] fn chinese_ymd() { - let parse = Parse::new(&Utc, Utc::now().time()); + let parse = Parse::new(&Utc, Some(Utc::now().time())); let test_cases = vec![( "2014年04月08日", diff --git a/dateparser/src/lib.rs b/dateparser/src/lib.rs index 592cd28..71d908a 100644 --- a/dateparser/src/lib.rs +++ b/dateparser/src/lib.rs @@ -1,3 +1,4 @@ +#![allow(deprecated)] //! A rust library for parsing date strings in commonly used formats. Parsed date will be returned //! as `chrono`'s `DateTime`. //! @@ -194,14 +195,13 @@ /// use std::error::Error; /// /// fn main() -> Result<(), Box> { -/// let utc_now_time = Utc::now().time(); -/// let parse_with_local = Parse::new(&Local, utc_now_time); +/// let parse_with_local = Parse::new(&Local, None); /// assert_eq!( /// parse_with_local.parse("2021-06-05 06:19 PM")?, /// Local.ymd(2021, 6, 5).and_hms(18, 19, 0).with_timezone(&Utc), /// ); /// -/// let parse_with_utc = Parse::new(&Utc, utc_now_time); +/// let parse_with_utc = Parse::new(&Utc, None); /// assert_eq!( /// parse_with_utc.parse("2021-06-05 06:19 PM")?, /// Utc.ymd(2021, 6, 5).and_hms(18, 19, 0), @@ -283,7 +283,7 @@ impl std::str::FromStr for DateTimeUtc { /// ); /// ``` pub fn parse(input: &str) -> Result> { - Parse::new(&Local, Utc::now().time()).parse(input) + Parse::new(&Local, None).parse(input) } /// Similar to [`parse()`], this function takes a datetime string and a custom [`chrono::TimeZone`], @@ -314,7 +314,7 @@ pub fn parse(input: &str) -> Result> { /// ); /// ``` pub fn parse_with_timezone(input: &str, tz: &Tz2) -> Result> { - Parse::new(tz, Utc::now().time()).parse(input) + Parse::new(tz, None).parse(input) } /// Similar to [`parse()`] and [`parse_with_timezone()`], this function takes a datetime string, a @@ -326,34 +326,41 @@ pub fn parse_with_timezone(input: &str, tz: &Tz2) -> Result( @@ -361,7 +368,7 @@ pub fn parse_with( tz: &Tz2, default_time: NaiveTime, ) -> Result> { - Parse::new(tz, default_time).parse(input) + Parse::new(tz, Some(default_time)).parse(input) } #[cfg(test)] @@ -829,23 +836,20 @@ mod tests { } } + // test parse_with() with various timezones and times + #[test] - fn parse_with() { - // Two sets of tests - one for EDT and one for EST - // TODO: add eastern hemisphere set of timezones? + fn parse_with_edt() { + // Eastern Daylight Time (EDT) is from (as of 2023) 2nd Sun in Mar to 1st Sun in Nov + // It is UTC -4 - // Both will use these naive times let midnight_naive = NaiveTime::from_hms_opt(0, 0, 0).unwrap(); let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); - - // EDT - // Eastern Daylight Time is from (as of 2023) 2nd Sun in Mar to 1st Sun in Nov - // It is UTC -4 - let us_edt = &FixedOffset::west(4 * 3600); + let us_edt = &FixedOffset::west_opt(4 * 3600).unwrap(); let edt_test_cases = vec![ ("ymd", "2023-04-21"), - // ("ymd_z", "2023-04-21 EDT"), // not sure about this one + // ("ymd_z", "2023-04-21 EDT"), // FIXME not sure about this one ("month_ymd", "2023-Apr-21"), ("month_mdy", "April 21, 2023"), ("month_dmy", "21 April 2023"), @@ -879,15 +883,20 @@ mod tests { "parse_with/{test}/{input}", ) } + } - // EST - // Eastern Standard Time is from (as of 2023) 1st Sun in Nov to 2nd Sun in Mar + #[test] + fn parse_with_est() { + // Eastern Standard Time (EST) is from (as of 2023) 1st Sun in Nov to 2nd Sun in Mar // It is UTC -5 + + let midnight_naive = NaiveTime::from_hms_opt(0, 0, 0).unwrap(); + let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); let us_est = &FixedOffset::west(5 * 3600); let est_test_cases = vec![ ("ymd", "2023-12-21"), - // ("ymd_z", "2023-12-21 EST"), // not sure about this one + // ("ymd_z", "2023-12-21 EST"), // FIXME not sure about this one ("month_ymd", "2023-Dec-21"), ("month_mdy", "December 21, 2023"), ("month_dmy", "21 December 2023"), @@ -922,4 +931,92 @@ mod tests { ) } } + + #[test] + fn parse_with_utc() { + let midnight_naive = NaiveTime::from_hms_opt(0, 0, 0).unwrap(); + let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); + let utc_test_cases = vec![ + ("ymd", "2023-12-21"), + // ("ymd_z", "2023-12-21 EST"), // FIXME not sure about this one + ("month_ymd", "2023-Dec-21"), + ("month_mdy", "December 21, 2023"), + ("month_dmy", "21 December 2023"), + ("slash_mdy", "12/21/23"), + ("slash_ymd", "2023/12/21"), + ("dot_mdy_or_ymd", "2023.12.21"), + // ( + // "chinese_ymd", + // "2014年04月08日", + // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), + // ), + ]; + + // test utc at midnight + let utc_midnight = Utc.ymd(2023, 12, 21).and_hms(0, 0, 0); + + for &(test, input) in utc_test_cases.iter() { + assert_eq!( + super::parse_with(input, &Utc, midnight_naive).unwrap(), + utc_midnight, + "parse_with/{test}/{input}", + ) + } + + // test utc at 23:59:59 + let utc_before_midnight = Utc.ymd(2023, 12, 21).and_hms(23, 59, 59); + for &(test, input) in utc_test_cases.iter() { + assert_eq!( + super::parse_with(input, &Utc, before_midnight_naive).unwrap(), + utc_before_midnight, + "parse_with/{test}/{input}", + ) + } + } + + #[test] + fn parse_with_local() { + let midnight_naive = NaiveTime::from_hms_opt(0, 0, 0).unwrap(); + let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); + let local_test_cases = vec![ + ("ymd", "2023-12-21"), + // ("ymd_z", "2023-12-21 EST"), // FIXME not sure about this one + ("month_ymd", "2023-Dec-21"), + ("month_mdy", "December 21, 2023"), + ("month_dmy", "21 December 2023"), + ("slash_mdy", "12/21/23"), + ("slash_ymd", "2023/12/21"), + ("dot_mdy_or_ymd", "2023.12.21"), + // ( + // "chinese_ymd", + // "2014年04月08日", + // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), + // ), + ]; + + // test local at midnight + let local_midnight_as_utc = Local.ymd(2023, 12, 21).and_hms(0, 0, 0).with_timezone(&Utc); + + for &(test, input) in local_test_cases.iter() { + assert_eq!( + super::parse_with(input, &Local, midnight_naive).unwrap(), + local_midnight_as_utc, + "parse_with/{test}/{input}", + ) + } + + // test local at 23:59:59 + let local_before_midnight_as_utc = Local + .ymd(2023, 12, 21) + .and_hms(23, 59, 59) + .with_timezone(&Utc); + + for &(test, input) in local_test_cases.iter() { + assert_eq!( + super::parse_with(input, &Local, before_midnight_naive).unwrap(), + local_before_midnight_as_utc, + "parse_with/{test}/{input}", + ) + } + } } From f1c04cb81fe33545461b81b6a17b576e49b6c85a Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Sat, 18 Mar 2023 14:49:07 -0400 Subject: [PATCH 4/7] Simplify setting time --- dateparser/src/datetime.rs | 54 +++++++------------------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/dateparser/src/datetime.rs b/dateparser/src/datetime.rs index af73d46..98c1cb6 100644 --- a/dateparser/src/datetime.rs +++ b/dateparser/src/datetime.rs @@ -272,11 +272,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%Y-%m-%d") @@ -308,11 +304,7 @@ where let time = if let Some(v) = self.default_time { Utc::now().date().and_time(v)?.with_timezone(&offset).time() } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(&offset) - .time() + Utc::now().with_timezone(&offset).time() }; NaiveDate::parse_from_str(input, "%Y-%m-%d %Z") .ok() @@ -404,11 +396,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%Y-%m-%d") @@ -526,11 +514,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; let dt = input.replace(", ", " ").replace(". ", " "); @@ -587,11 +571,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%d %B %y") @@ -660,11 +640,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%m/%d/%y") @@ -720,11 +696,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%Y/%m/%d") @@ -754,11 +726,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%m.%d.%y") @@ -824,11 +792,7 @@ where let time = if let Some(v) = self.default_time { v } else { - Utc::now() - .date() - .and_time(Utc::now().time())? - .with_timezone(self.tz) - .time() + Utc::now().with_timezone(self.tz).time() }; NaiveDate::parse_from_str(input, "%Y年%m月%d日") From 3ccd63881d28968a5cbfdaf8ec07823d80a88cae Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Sat, 18 Mar 2023 16:32:15 -0400 Subject: [PATCH 5/7] Simplify time in ymd_z; fix & include ymd_z tests --- dateparser/src/datetime.rs | 4 ++-- dateparser/src/lib.rs | 32 +++++++------------------------- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/dateparser/src/datetime.rs b/dateparser/src/datetime.rs index 98c1cb6..8df9e0a 100644 --- a/dateparser/src/datetime.rs +++ b/dateparser/src/datetime.rs @@ -302,7 +302,7 @@ where Ok(offset) => { // set time to use let time = if let Some(v) = self.default_time { - Utc::now().date().and_time(v)?.with_timezone(&offset).time() + v } else { Utc::now().with_timezone(&offset).time() }; @@ -1059,7 +1059,7 @@ mod tests { #[test] fn ymd_z() { - let parse = Parse::new(&Utc, Some(Utc::now().time())); + let parse = Parse::new(&Utc, None); let now_at_pst = Utc::now().with_timezone(&FixedOffset::west(8 * 3600)); let now_at_cst = Utc::now().with_timezone(&FixedOffset::east(8 * 3600)); diff --git a/dateparser/src/lib.rs b/dateparser/src/lib.rs index 71d908a..3939190 100644 --- a/dateparser/src/lib.rs +++ b/dateparser/src/lib.rs @@ -849,18 +849,14 @@ mod tests { let edt_test_cases = vec![ ("ymd", "2023-04-21"), - // ("ymd_z", "2023-04-21 EDT"), // FIXME not sure about this one + ("ymd_z", "2023-04-21 EDT"), ("month_ymd", "2023-Apr-21"), ("month_mdy", "April 21, 2023"), ("month_dmy", "21 April 2023"), ("slash_mdy", "04/21/23"), ("slash_ymd", "2023/4/21"), ("dot_mdy_or_ymd", "2023.04.21"), - // ( - // "chinese_ymd", - // "2014年04月08日", - // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), - // ), + ("chinese_ymd", "2023年04月21日"), ]; // test us_edt at midnight @@ -896,18 +892,14 @@ mod tests { let est_test_cases = vec![ ("ymd", "2023-12-21"), - // ("ymd_z", "2023-12-21 EST"), // FIXME not sure about this one + ("ymd_z", "2023-12-21 EST"), ("month_ymd", "2023-Dec-21"), ("month_mdy", "December 21, 2023"), ("month_dmy", "21 December 2023"), ("slash_mdy", "12/21/23"), ("slash_ymd", "2023/12/21"), ("dot_mdy_or_ymd", "2023.12.21"), - // ( - // "chinese_ymd", - // "2014年04月08日", - // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), - // ), + ("chinese_ymd", "2023年12月21日"), ]; // test us_est at midnight @@ -938,20 +930,15 @@ mod tests { let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); let utc_test_cases = vec![ ("ymd", "2023-12-21"), - // ("ymd_z", "2023-12-21 EST"), // FIXME not sure about this one + ("ymd_z", "2023-12-21 UTC"), ("month_ymd", "2023-Dec-21"), ("month_mdy", "December 21, 2023"), ("month_dmy", "21 December 2023"), ("slash_mdy", "12/21/23"), ("slash_ymd", "2023/12/21"), ("dot_mdy_or_ymd", "2023.12.21"), - // ( - // "chinese_ymd", - // "2014年04月08日", - // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), - // ), + ("chinese_ymd", "2023年12月21日"), ]; - // test utc at midnight let utc_midnight = Utc.ymd(2023, 12, 21).and_hms(0, 0, 0); @@ -980,18 +967,13 @@ mod tests { let before_midnight_naive = NaiveTime::from_hms_opt(23, 59, 59).unwrap(); let local_test_cases = vec![ ("ymd", "2023-12-21"), - // ("ymd_z", "2023-12-21 EST"), // FIXME not sure about this one ("month_ymd", "2023-Dec-21"), ("month_mdy", "December 21, 2023"), ("month_dmy", "21 December 2023"), ("slash_mdy", "12/21/23"), ("slash_ymd", "2023/12/21"), ("dot_mdy_or_ymd", "2023.12.21"), - // ( - // "chinese_ymd", - // "2014年04月08日", - // Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), - // ), + ("chinese_ymd", "2023年12月21日"), ]; // test local at midnight From 7c76cb4baf7964607436b538cfb50f705e52b88c Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Sat, 18 Mar 2023 22:25:16 -0400 Subject: [PATCH 6/7] Use match instead of if/else --- dateparser/src/datetime.rs | 63 ++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/dateparser/src/datetime.rs b/dateparser/src/datetime.rs index 8df9e0a..725313b 100644 --- a/dateparser/src/datetime.rs +++ b/dateparser/src/datetime.rs @@ -269,10 +269,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%Y-%m-%d") @@ -301,10 +300,9 @@ where return match timezone::parse(matched_tz.as_str().trim()) { Ok(offset) => { // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(&offset).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(&offset).time(), }; NaiveDate::parse_from_str(input, "%Y-%m-%d %Z") .ok() @@ -393,10 +391,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%Y-%m-%d") @@ -511,10 +508,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; let dt = input.replace(", ", " ").replace(". ", " "); @@ -568,10 +564,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%d %B %y") @@ -637,10 +632,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%m/%d/%y") @@ -693,10 +687,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%Y/%m/%d") @@ -723,10 +716,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%m.%d.%y") @@ -789,10 +781,9 @@ where } // set time to use - let time = if let Some(v) = self.default_time { - v - } else { - Utc::now().with_timezone(self.tz).time() + let time = match self.default_time { + Some(v) => v, + None => Utc::now().with_timezone(self.tz).time(), }; NaiveDate::parse_from_str(input, "%Y年%m月%d日") From 1c3b31d154a6b84880d8237d99718b95bba0cf51 Mon Sep 17 00:00:00 2001 From: Kris Warner Date: Fri, 24 Mar 2023 06:47:55 -0400 Subject: [PATCH 7/7] Fix clippy lints --- dateparser/src/lib.rs | 4 ++-- dateparser/src/timezone.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dateparser/src/lib.rs b/dateparser/src/lib.rs index 3939190..6c70411 100644 --- a/dateparser/src/lib.rs +++ b/dateparser/src/lib.rs @@ -871,7 +871,7 @@ mod tests { } // test us_edt at 23:59:59 - UTC will be one day ahead - let us_edt_before_midnight_as_utc = Utc.ymd(2023, 4, 22).and_hms(03, 59, 59); + let us_edt_before_midnight_as_utc = Utc.ymd(2023, 4, 22).and_hms(3, 59, 59); for &(test, input) in edt_test_cases.iter() { assert_eq!( super::parse_with(input, us_edt, before_midnight_naive).unwrap(), @@ -914,7 +914,7 @@ mod tests { } // test us_est at 23:59:59 - UTC will be one day ahead - let us_est_before_midnight_as_utc = Utc.ymd(2023, 12, 22).and_hms(04, 59, 59); + let us_est_before_midnight_as_utc = Utc.ymd(2023, 12, 22).and_hms(4, 59, 59); for &(test, input) in est_test_cases.iter() { assert_eq!( super::parse_with(input, us_est, before_midnight_naive).unwrap(), diff --git a/dateparser/src/timezone.rs b/dateparser/src/timezone.rs index 5f6cd24..ffd4064 100644 --- a/dateparser/src/timezone.rs +++ b/dateparser/src/timezone.rs @@ -20,7 +20,7 @@ fn parse_offset_2822(s: &str) -> Result { let upto = s .as_bytes() .iter() - .position(|&c| !matches!(c, b'a'..=b'z' | b'A'..=b'Z')) + .position(|&c| !c.is_ascii_alphabetic()) .unwrap_or(s.len()); if upto > 0 { let name = &s[..upto];