diff --git a/experimental/provider_static/src/lib.rs b/experimental/provider_static/src/lib.rs index 93580f393df..1bd1395b0e9 100644 --- a/experimental/provider_static/src/lib.rs +++ b/experimental/provider_static/src/lib.rs @@ -63,14 +63,14 @@ where } } -impl<'de> SerdeDeDataProvider<'de> for StaticDataProvider { +impl SerdeDeDataProvider for StaticDataProvider { fn load_to_receiver( &self, req: &DataRequest, - receiver: &mut dyn SerdeDeDataReceiver<'de>, + receiver: &mut dyn SerdeDeDataReceiver, ) -> Result { let file = self.get_file(req)?; - receiver.receive_deserializer(&mut erased_serde::Deserializer::erase( + receiver.receive_static(&mut erased_serde::Deserializer::erase( &mut serde_json::Deserializer::from_reader(file.as_bytes()), ))?; diff --git a/ffi/capi/src/provider.rs b/ffi/capi/src/provider.rs index f22c1c6e290..f8beca52181 100644 --- a/ffi/capi/src/provider.rs +++ b/ffi/capi/src/provider.rs @@ -42,7 +42,7 @@ impl ICU4XDataProvider { } /// Construct a [`ICU4XDataProvider`] this from a boxed [`SerdeDeDataProvider`] - pub fn from_boxed(x: Box>) -> Self { + pub fn from_boxed(x: Box) -> Self { unsafe { // If the layout changes this will error // Once Rust gets pointer metadata APIs we should switch to using those @@ -51,7 +51,7 @@ impl ICU4XDataProvider { } /// Obtain the original boxed Rust [`SerdeDeDataProvider`] for this - pub fn into_boxed(self) -> Box> { + pub fn into_boxed(self) -> Box { debug_assert!(self._field1 != 0); // If the layout changes this will error // Once Rust gets pointer metadata APIs we should switch to using those @@ -59,7 +59,7 @@ impl ICU4XDataProvider { } /// Convert a borrowed reference to a borrowed [`SerdeDeDataProvider`] - pub fn as_dyn_ref(&self) -> &dyn SerdeDeDataProvider<'static> { + pub fn as_dyn_ref(&self) -> &dyn SerdeDeDataProvider { debug_assert!(self._field1 != 0); unsafe { // &dyn Trait and Box have the same layout diff --git a/provider/core/src/data_provider.rs b/provider/core/src/data_provider.rs index f4df940f900..7a8d221675c 100644 --- a/provider/core/src/data_provider.rs +++ b/provider/core/src/data_provider.rs @@ -98,6 +98,7 @@ where Borrowed(Yoke), RcStruct(Yoke>), Owned(Yoke), + RcBuf(Yoke>), } /// A wrapper around the payload returned in a [`DataResponse`]. @@ -149,6 +150,7 @@ where Borrowed(yoke) => Borrowed(yoke.clone()), RcStruct(yoke) => RcStruct(yoke.clone()), Owned(yoke) => Owned(yoke.clone()), + RcBuf(yoke) => RcBuf(yoke.clone()), }; Self { inner: new_inner } } @@ -219,6 +221,68 @@ where } } +impl<'d, 's, M> DataPayload<'d, 's, M> +where + M: DataMarker<'s>, +{ + /// Convert a byte buffer into a [`DataPayload`]. A function must be provided to perform the + /// conversion. This can often be a Serde deserialization operation. + /// + /// Due to [compiler bug #84937](https://github.com/rust-lang/rust/issues/84937), call sites + /// for this function may not compile; if this happens, use + /// [`try_from_rc_buffer_badly()`](Self::try_from_rc_buffer_badly) instead. + #[inline] + pub fn try_from_rc_buffer( + rc_buffer: Rc<[u8]>, + f: impl for<'de> FnOnce(&'de [u8]) -> Result<>::Output, E>, + ) -> Result { + let yoke = Yoke::try_attach_to_cart(rc_buffer, f)?; + Ok(Self { + inner: DataPayloadInner::RcBuf(yoke), + }) + } + + /// Convert a byte buffer into a [`DataPayload`]. A function must be provided to perform the + /// conversion. This can often be a Serde deserialization operation. + /// + /// For a version of this function that takes a `FnOnce` instead of a raw function pointer, + /// see [`try_from_rc_buffer()`](Self::try_from_rc_buffer). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "provider_serde")] { + /// use icu_provider::prelude::*; + /// use icu_provider::hello_world::*; + /// use std::rc::Rc; + /// use icu_provider::yoke::Yokeable; + /// + /// let json_text = "{\"message\":\"Hello World\"}"; + /// let json_rc_buffer: Rc<[u8]> = json_text.as_bytes().into(); + /// + /// let payload = DataPayload::::try_from_rc_buffer_badly( + /// json_rc_buffer.clone(), + /// |bytes| { + /// serde_json::from_slice(bytes) + /// } + /// ) + /// .expect("JSON is valid"); + /// + /// assert_eq!("Hello World", payload.get().message); + /// # } // feature = "provider_serde" + /// ``` + #[allow(clippy::type_complexity)] + pub fn try_from_rc_buffer_badly( + rc_buffer: Rc<[u8]>, + f: for<'de> fn(&'de [u8]) -> Result<>::Output, E>, + ) -> Result { + let yoke = Yoke::try_attach_to_cart_badly(rc_buffer, f)?; + Ok(Self { + inner: DataPayloadInner::RcBuf(yoke), + }) + } +} + impl<'d, 's, M> DataPayload<'d, 's, M> where M: DataMarker<'s>, @@ -289,6 +353,7 @@ where Borrowed(yoke) => yoke.with_mut(f), RcStruct(yoke) => yoke.with_mut(f), Owned(yoke) => yoke.with_mut(f), + RcBuf(yoke) => yoke.with_mut(f), } } @@ -314,6 +379,7 @@ where Borrowed(yoke) => yoke.get(), RcStruct(yoke) => yoke.get(), Owned(yoke) => yoke.get(), + RcBuf(yoke) => yoke.get(), } } } diff --git a/provider/core/src/dynutil.rs b/provider/core/src/dynutil.rs index 6bdcf1a3c9b..a90d289f528 100644 --- a/provider/core/src/dynutil.rs +++ b/provider/core/src/dynutil.rs @@ -37,6 +37,27 @@ where Self: Sized + crate::prelude::DataMarker<'s>, { /// Upcast a `DataPayload` to a `DataPayload` where `T` implements trait `S`. + /// + /// # Examples + /// + /// Upcast and then downcast a data struct of type `Cow` (cart type `String`) via + /// [`ErasedDataStruct`](crate::erased::ErasedDataStruct): + /// + /// ``` + /// use icu_provider::prelude::*; + /// use icu_provider::erased::*; + /// use icu_provider::dynutil::UpcastDataPayload; + /// use icu_provider::marker::CowStringMarker; + /// use std::borrow::Cow; + /// + /// let data = "foo".to_string(); + /// let original = DataPayload::::from_owned(Cow::Owned(data)); + /// let upcasted = ErasedDataStructMarker::upcast(original); + /// let downcasted = upcasted + /// .downcast::() + /// .expect("Type conversion"); + /// assert_eq!(downcasted.get(), "foo"); + /// ``` fn upcast( other: crate::prelude::DataPayload<'d, 's, M>, ) -> crate::prelude::DataPayload<'d, 's, Self>; diff --git a/provider/core/src/erased.rs b/provider/core/src/erased.rs index dd134be48b8..02aa58b9e0c 100644 --- a/provider/core/src/erased.rs +++ b/provider/core/src/erased.rs @@ -109,6 +109,11 @@ where let cart: Rc = Rc::from(yoke); DataPayload::from_partial_owned(cart) } + RcBuf(yoke) => { + // Case 4: Cast the whole RcBuf Yoke to the trait object. + let cart: Rc = Rc::from(yoke); + DataPayload::from_partial_owned(cart) + } } } } @@ -203,15 +208,27 @@ impl<'d> DataPayload<'d, 'static, ErasedDataStructMarker> { }, Err(any_rc) => any_rc, }; + // Check for Case 4: an RcBuf Yoke. + let y2 = any_rc.downcast::>>(); + let any_rc = match y2 { + Ok(rc_yoke) => match Rc::try_unwrap(rc_yoke) { + Ok(yoke) => return Ok(DataPayload { inner: RcBuf(yoke) }), + // Note: We could consider cloning the Yoke instead of erroring out. + Err(_) => return Err(Error::MultipleReferences), + }, + Err(any_rc) => any_rc, + }; // None of the downcasts succeeded; return an error. Err(Error::MismatchedType { actual: Some(any_rc.type_id()), generic: Some(TypeId::of::()), }) } - // This is unreachable because ErasedDataStructMarker cannot be fully owned, since it + // This is unreachable because ErasedDataStruct cannot be fully owned, since it // contains a reference. Owned(_) => unreachable!(), + // This is unreachable because ErasedDataStruct needs to reference an object. + RcBuf(_) => unreachable!(), } } } @@ -284,3 +301,58 @@ where }) } } + +#[cfg(test)] +mod test { + use super::*; + use crate::dynutil::UpcastDataPayload; + use crate::marker::CowStringMarker; + use std::borrow::Cow; + + #[test] + fn test_erased_case_1() { + let data = "foo".to_string(); + let original = DataPayload::::from_borrowed(&data); + let upcasted = ErasedDataStructMarker::upcast(original); + let downcasted = upcasted + .downcast::() + .expect("Type conversion"); + assert_eq!(downcasted.get(), "foo"); + } + + #[test] + fn test_erased_case_2() { + let data = Rc::new("foo".to_string()); + let original = DataPayload::::from_partial_owned(data); + let upcasted = ErasedDataStructMarker::upcast(original); + let downcasted = upcasted + .downcast::() + .expect("Type conversion"); + assert_eq!(downcasted.get(), "foo"); + } + + #[test] + fn test_erased_case_3() { + let data = "foo".to_string(); + let original = DataPayload::::from_owned(Cow::Owned(data)); + let upcasted = ErasedDataStructMarker::upcast(original); + let downcasted = upcasted + .downcast::() + .expect("Type conversion"); + assert_eq!(downcasted.get(), "foo"); + } + + #[test] + fn test_erased_case_4() { + let data: Rc<[u8]> = "foo".as_bytes().into(); + let original = DataPayload::::try_from_rc_buffer_badly(data, |bytes| { + std::str::from_utf8(bytes).map(|s| Cow::Borrowed(s)) + }) + .expect("String is valid UTF-8"); + let upcasted = ErasedDataStructMarker::upcast(original); + let downcasted = upcasted + .downcast::() + .expect("Type conversion"); + assert_eq!(downcasted.get(), "foo"); + } +} diff --git a/provider/core/src/serde.rs b/provider/core/src/serde.rs index dd26c618b69..143a5bb3603 100644 --- a/provider/core/src/serde.rs +++ b/provider/core/src/serde.rs @@ -23,51 +23,121 @@ use crate::error::Error; use crate::prelude::*; use std::ops::Deref; use std::rc::Rc; +use yoke::trait_hack::YokeTraitHack; use yoke::*; -/// An object that receives data from a Serde Deserializer. Implemented by [`DataPayload`]. +/// An object that receives data from a Serde Deserializer. /// -/// Lifetimes: -/// -/// - `'de` = deserializer lifetime; can usually be `'_` -pub trait SerdeDeDataReceiver<'de> { - /// Consumes a Serde Deserializer into this SerdeDeDataReceiver as owned data. +/// Implemented by `Option<`[`DataPayload`]`>`. +pub trait SerdeDeDataReceiver { + /// Receives a reference-counted byte buffer. /// - /// This method results in an owned payload, but the payload could have non-static references - /// according to the deserializer lifetime. + /// Upon calling this function, the receiver sends byte buffer back to the caller as the first + /// argument of `f1`. The caller should then map the byte buffer to an + /// [`erased_serde::Deserializer`] and pass it back to the receiver via `f2`. /// /// # Examples /// + /// Deserialize from a reference-counted buffer: + /// /// ``` /// use icu_provider::prelude::*; + /// use icu_provider::hello_world::*; /// use icu_provider::serde::SerdeDeDataReceiver; + /// use std::rc::Rc; /// - /// const JSON: &'static str = "\"hello world\""; + /// let json_text = "{\"message\":\"Hello World\"}"; + /// let rc_buffer: Rc<[u8]> = json_text.as_bytes().into(); + /// let mut receiver: Option> = None; + /// receiver + /// .receive_rc_buffer(rc_buffer, |bytes, f2| { + /// let mut d = serde_json::Deserializer::from_slice(bytes); + /// f2(&mut erased_serde::Deserializer::erase(&mut d)) + /// }) + /// .expect("Well-formed data"); + /// let payload = receiver.expect("Data is present"); /// - /// let mut receiver: Option<&str> = None; - /// let mut d = serde_json::Deserializer::from_str(JSON); - /// receiver.receive_deserializer(&mut erased_serde::Deserializer::erase(&mut d)) - /// .expect("Deserialization should be successful"); + /// assert_eq!(payload.get().message, "Hello World"); + /// ``` + fn receive_rc_buffer( + &mut self, + rc_buffer: Rc<[u8]>, + f1: for<'de> fn( + bytes: &'de [u8], + f2: &mut dyn FnMut(&mut dyn erased_serde::Deserializer<'de>), + ), + ) -> Result<(), Error>; + + /// Receives a `&'static` byte buffer via an [`erased_serde::Deserializer`]. + /// + /// Note: Since the purpose of this function is to handle zero-copy deserialization of static + /// byte buffers, we want `Deserializer<'static>` as opposed to `DeserializeOwned`. + /// + /// # Examples + /// + /// Deserialize from a string to create static references: /// - /// assert!(matches!(receiver, Some(_))); - /// assert_eq!(receiver, Some("hello world")); /// ``` - fn receive_deserializer( + /// use icu_provider::prelude::*; + /// use icu_provider::hello_world::*; + /// use icu_provider::serde::SerdeDeDataReceiver; + /// + /// let json_text = "{\"message\":\"Hello World\"}"; + /// let deserializer = &mut serde_json::Deserializer::from_str(json_text); + /// let mut receiver: Option> = None; + /// receiver + /// .receive_static(&mut erased_serde::Deserializer::erase(deserializer)) + /// .expect("Well-formed data"); + /// let payload = receiver.expect("Data is present"); + /// + /// assert_eq!(payload.get().message, "Hello World"); + /// ``` + fn receive_static( &mut self, - deserializer: &mut dyn erased_serde::Deserializer<'de>, + deserializer: &mut dyn erased_serde::Deserializer<'static>, ) -> Result<(), Error>; } -impl<'de, T> SerdeDeDataReceiver<'de> for Option +impl<'d, 's, M> SerdeDeDataReceiver for Option> where - T: serde::Deserialize<'de>, + M: DataMarker<'s>, + M::Yokeable: serde::de::Deserialize<'static>, + // Actual bound: + // for<'de> >::Output: serde::de::Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<>::Output>: serde::de::Deserialize<'de>, { - fn receive_deserializer( + fn receive_rc_buffer( + &mut self, + rc_buffer: Rc<[u8]>, + f1: for<'de> fn( + bytes: &'de [u8], + f2: &mut dyn FnMut(&mut dyn erased_serde::Deserializer<'de>), + ), + ) -> Result<(), Error> { + self.replace(DataPayload::try_from_rc_buffer(rc_buffer, move |bytes| { + let mut holder = None; + f1(bytes, &mut |deserializer| { + holder.replace( + erased_serde::deserialize::::Output>>( + deserializer, + ) + .map(|w| w.0), + ); + }); + // The holder is guaranteed to be populated so long as the lambda function was invoked, + // which is in the contract of `receive_rc_buffer`. + holder.unwrap() + })?); + Ok(()) + } + + fn receive_static( &mut self, - deserializer: &mut dyn erased_serde::Deserializer<'de>, + deserializer: &mut dyn erased_serde::Deserializer<'static>, ) -> Result<(), Error> { - let obj: T = erased_serde::deserialize(deserializer)?; - self.replace(obj); + let obj: M::Yokeable = erased_serde::deserialize(deserializer)?; + self.replace(DataPayload::from_owned(obj)); Ok(()) } } @@ -75,7 +145,7 @@ where /// A type-erased data provider that loads payloads from a Serde Deserializer. /// /// Uses [`erased_serde`] to allow the trait to be object-safe. -pub trait SerdeDeDataProvider<'de> { +pub trait SerdeDeDataProvider { /// Query the provider for data, loading it into a [`SerdeDeDataReceiver`]. /// /// Returns Ok if the request successfully loaded data. If data failed to load, returns an @@ -83,24 +153,24 @@ pub trait SerdeDeDataProvider<'de> { fn load_to_receiver( &self, req: &DataRequest, - receiver: &mut dyn SerdeDeDataReceiver<'de>, + receiver: &mut dyn SerdeDeDataReceiver, ) -> Result; } -impl<'d, 's, M> DataProvider<'d, 's, M> for dyn SerdeDeDataProvider<'s> + 'd +impl<'d, 's, M> DataProvider<'d, 's, M> for dyn SerdeDeDataProvider + 'd where M: DataMarker<'s>, - M::Cart: serde::Deserialize<'s>, - M::Yokeable: ZeroCopyFrom, + M::Yokeable: serde::de::Deserialize<'static>, + // Actual bound: + // for<'de> >::Output: serde::de::Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<>::Output>: serde::de::Deserialize<'de>, { /// Serve objects implementing [`serde::Deserialize<'s>`] from a [`SerdeDeDataProvider`]. fn load_payload(&self, req: &DataRequest) -> Result, Error> { let mut payload = None; let metadata = self.load_to_receiver(req, &mut payload)?; - Ok(DataResponse { - metadata, - payload: payload.map(|obj| DataPayload::from_partial_owned(Rc::new(obj))), - }) + Ok(DataResponse { metadata, payload }) } } @@ -193,6 +263,7 @@ where Borrowed(_) => todo!("#752"), RcStruct(yoke) => Rc::from(yoke), Owned(yoke) => Rc::from(yoke), + RcBuf(yoke) => Rc::from(yoke), }; DataPayload::from_partial_owned(cart) } diff --git a/provider/core/tests/data_receiver.rs b/provider/core/tests/data_receiver.rs index 267f378724e..18be22de1d9 100644 --- a/provider/core/tests/data_receiver.rs +++ b/provider/core/tests/data_receiver.rs @@ -2,69 +2,54 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use icu_provider::hello_world::*; +use icu_provider::prelude::*; use icu_provider::serde::SerdeDeDataReceiver; -use serde::{Deserialize, Serialize}; use std::borrow::Cow; - -#[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] -struct DataStruct<'a> { - #[serde(borrow)] - pub value: Cow<'a, str>, -} +use std::rc::Rc; #[allow(clippy::redundant_static_lifetimes)] const DATA_JSON: &'static str = r#"{ - "value": "abc" + "message": "abc" }"#; #[test] fn test_deserializer_static() { // Deserialize from a string to create static references. let deserializer = &mut serde_json::Deserializer::from_str(DATA_JSON); - let mut receiver = None; - receiver - .receive_deserializer(&mut erased_serde::Deserializer::erase(deserializer)) - .expect("Well-formed data"); - - assert!(matches!( - receiver, - Some(DataStruct { - value: Cow::Borrowed(_) - }) - )); -} - -#[test] -fn test_deserializer_borrowed() { - // Deserialize from a local string to create non-static references. - let local_data = DATA_JSON.to_string(); - let deserializer = &mut serde_json::Deserializer::from_str(&local_data); - let mut receiver = None; + let mut receiver: Option> = None; receiver - .receive_deserializer(&mut erased_serde::Deserializer::erase(deserializer)) + .receive_static(&mut erased_serde::Deserializer::erase(deserializer)) .expect("Well-formed data"); + let payload = receiver.expect("Data is present"); assert!(matches!( - receiver, - Some(DataStruct { - value: Cow::Borrowed(_) - }) + payload.get(), + &HelloWorldV1 { + // TODO(#667): This should be Borrowed once HelloWorldV1 supports it + message: Cow::Owned(_) + } )); } #[test] fn test_deserializer_owned() { - // Deserialize from a reader to create owned data. - let deserializer = &mut serde_json::Deserializer::from_reader(DATA_JSON.as_bytes()); - let mut receiver = None; + // Deserialize from a reference-counted buffer. + let rc_buffer: Rc<[u8]> = DATA_JSON.as_bytes().into(); + let mut receiver: Option> = None; receiver - .receive_deserializer(&mut erased_serde::Deserializer::erase(deserializer)) + .receive_rc_buffer(rc_buffer, |bytes, f2| { + let mut d = serde_json::Deserializer::from_slice(bytes); + f2(&mut erased_serde::Deserializer::erase(&mut d)) + }) .expect("Well-formed data"); + let payload = receiver.expect("Data is present"); assert!(matches!( - receiver, - Some(DataStruct { - value: Cow::Owned(_) - }) + payload.get(), + &HelloWorldV1 { + // TODO(#667): This should be Borrowed once HelloWorldV1 supports it + message: Cow::Owned(_) + } )); } diff --git a/provider/fs/src/deserializer.rs b/provider/fs/src/deserializer.rs index db9480f3555..e145309b873 100644 --- a/provider/fs/src/deserializer.rs +++ b/provider/fs/src/deserializer.rs @@ -4,9 +4,12 @@ use crate::manifest::SyntaxOption; use icu_provider::prelude::*; -use icu_provider::serde::SerdeDeDataReceiver; -use std::io::Read; +use icu_provider::serde::*; +use icu_provider::yoke::trait_hack::YokeTraitHack; +use icu_provider::yoke::Yokeable; +use serde::Deserialize; use std::path::Path; +use std::rc::Rc; use thiserror::Error; /// An Error type specifically for the [`Deserializer`](serde::Deserializer) that doesn't carry filenames @@ -44,70 +47,78 @@ impl Error { } } -/// Get a JSON Deserializer. Implemeted as a macro because the return type is complex/private. -macro_rules! get_json_deserializer { - ($rdr:tt) => { - serde_json::Deserializer::from_reader($rdr) +/// Get a JSON zero-copy deserializer. Implemeted as a macro because the return type is complex/private. +macro_rules! get_json_deserializer_zc { + ($bytes:tt) => { + serde_json::Deserializer::from_slice($bytes) }; } -/// Get a Bincode Deserializer. Implemeted as a macro because the return type is complex/private. +/// Get a Bincode zero-copy Deserializer. Implemeted as a macro because the return type is complex/private. #[cfg(feature = "bincode")] -macro_rules! get_bincode_deserializer { - ($rdr:tt) => {{ +macro_rules! get_bincode_deserializer_zc { + ($bytes:tt) => {{ use bincode::Options; let options = bincode::DefaultOptions::new() .with_fixint_encoding() .allow_trailing_bytes(); - bincode::de::Deserializer::with_reader($rdr, options) + bincode::de::Deserializer::from_slice($bytes, options) }}; } /// Deserialize into a generic type ([`DataProvider`]). Covers all supported data formats. -pub fn deserialize_into_type<'de, T>( - rdr: impl Read, +#[allow(clippy::type_complexity)] +pub fn deserialize_zero_copy<'s, M>( syntax_option: &SyntaxOption, -) -> Result +) -> for<'de> fn(bytes: &'de [u8]) -> Result<>::Output, Error> where - T: serde::Deserialize<'de>, + M: DataMarker<'s>, + // Actual bound: + // for<'de> >::Output: serde::de::Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<>::Output>: serde::de::Deserialize<'de>, { match syntax_option { - SyntaxOption::Json => { - let mut d = get_json_deserializer!(rdr); - let data = T::deserialize(&mut d)?; - Ok(data) - } + SyntaxOption::Json => |bytes| { + let mut d = get_json_deserializer_zc!(bytes); + let data = YokeTraitHack::<::Output>::deserialize(&mut d)?; + Ok(data.0) + }, #[cfg(feature = "bincode")] - SyntaxOption::Bincode => { - let mut d = get_bincode_deserializer!(rdr); - let data = T::deserialize(&mut d)?; - Ok(data) - } + SyntaxOption::Bincode => |bytes| { + let mut d = get_bincode_deserializer_zc!(bytes); + let data = YokeTraitHack::<::Output>::deserialize(&mut d)?; + Ok(data.0) + }, #[cfg(not(feature = "bincode"))] - SyntaxOption::Bincode => Err(Error::UnknownSyntax(syntax_option.clone())), + SyntaxOption::Bincode => |_| Err(Error::UnknownSyntax(SyntaxOption::Bincode)), } } /// Deserialize into a receiver used by [`SerdeDeDataProvider`](icu_provider::serde::SerdeDeDataProvider). /// Covers all supported data formats. pub fn deserialize_into_receiver( - rdr: impl Read, + rc_buffer: Rc<[u8]>, syntax_option: &SyntaxOption, receiver: &mut dyn SerdeDeDataReceiver, ) -> Result<(), Error> { match syntax_option { SyntaxOption::Json => { - let mut d = get_json_deserializer!(rdr); - receiver.receive_deserializer(&mut ::erase(&mut d))?; + receiver.receive_rc_buffer(rc_buffer, |bytes, f2| { + let mut d = get_json_deserializer_zc!(bytes); + f2(&mut ::erase(&mut d)) + })?; Ok(()) } #[cfg(feature = "bincode")] SyntaxOption::Bincode => { - let mut d = get_bincode_deserializer!(rdr); - receiver.receive_deserializer(&mut ::erase(&mut d))?; + receiver.receive_rc_buffer(rc_buffer, |bytes, f2| { + let mut d = get_bincode_deserializer_zc!(bytes); + f2(&mut ::erase(&mut d)) + })?; Ok(()) } #[cfg(not(feature = "bincode"))] - SyntaxOption::Bincode => Err(Error::UnknownSyntax(syntax_option.clone())), + SyntaxOption::Bincode => Err(Error::UnknownSyntax(SyntaxOption::Bincode)), } } diff --git a/provider/fs/src/fs_data_provider.rs b/provider/fs/src/fs_data_provider.rs index eeca85853d6..d0e9d6f9e84 100644 --- a/provider/fs/src/fs_data_provider.rs +++ b/provider/fs/src/fs_data_provider.rs @@ -8,6 +8,8 @@ use crate::manifest::Manifest; use crate::manifest::MANIFEST_FILE; use icu_provider::prelude::*; use icu_provider::serde::*; +use icu_provider::yoke::trait_hack::YokeTraitHack; +use icu_provider::yoke::Yokeable; use std::fmt::Debug; use std::fs; @@ -15,6 +17,7 @@ use std::fs::File; use std::io::BufReader; use std::io::Read; use std::path::PathBuf; +use std::rc::Rc; /// A data provider that reads ICU4X data from a filesystem directory. /// @@ -79,35 +82,51 @@ impl FsDataProvider { }; Ok((BufReader::new(file), path_buf)) } + + fn get_rc_buffer(&self, req: &DataRequest) -> Result<(Rc<[u8]>, PathBuf), DataError> { + let (mut reader, path_buf) = self.get_reader(req)?; + let mut buffer = Vec::::new(); + reader + .read_to_end(&mut buffer) + .map_err(|e| DataError::Resource(Box::new(Error::Io(e, Some(path_buf.clone())))))?; + let rc_buffer: Rc<[u8]> = buffer.into(); + Ok((rc_buffer, path_buf)) + } } impl<'d, 's, M> DataProvider<'d, 's, M> for FsDataProvider where M: DataMarker<'s>, - // TODO(#667): Change this to Deserialize<'s> - M::Yokeable: serde::de::DeserializeOwned, + // Actual bound: + // for<'de> >::Output: serde::de::Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<>::Output>: serde::de::Deserialize<'de>, { fn load_payload(&self, req: &DataRequest) -> Result, DataError> { - let (reader, path_buf) = self.get_reader(req)?; - let data = deserializer::deserialize_into_type(reader, &self.manifest.syntax) - .map_err(|err| err.into_resource_error(&path_buf))?; + let (rc_buffer, path_buf) = self.get_rc_buffer(req)?; Ok(DataResponse { metadata: DataResponseMetadata { data_langid: req.resource_path.options.langid.clone(), }, - payload: Some(DataPayload::from_owned(data)), + payload: Some( + DataPayload::try_from_rc_buffer( + rc_buffer, + deserializer::deserialize_zero_copy::(&self.manifest.syntax), + ) + .map_err(|e: deserializer::Error| e.into_resource_error(&path_buf))?, + ), }) } } -impl<'de> SerdeDeDataProvider<'de> for FsDataProvider { +impl SerdeDeDataProvider for FsDataProvider { fn load_to_receiver( &self, req: &DataRequest, - receiver: &mut dyn SerdeDeDataReceiver<'de>, + receiver: &mut dyn SerdeDeDataReceiver, ) -> Result { - let (reader, path_buf) = self.get_reader(req)?; - deserializer::deserialize_into_receiver(reader, &self.manifest.syntax, receiver) + let (rc_buffer, path_buf) = self.get_rc_buffer(req)?; + deserializer::deserialize_into_receiver(rc_buffer, &self.manifest.syntax, receiver) .map_err(|err| err.into_resource_error(&path_buf))?; Ok(DataResponseMetadata { data_langid: req.resource_path.options.langid.clone(), diff --git a/utils/yoke/src/lib.rs b/utils/yoke/src/lib.rs index f80140060c0..8c5b7df9c28 100644 --- a/utils/yoke/src/lib.rs +++ b/utils/yoke/src/lib.rs @@ -13,6 +13,7 @@ // them out is good even when redundant #![allow(clippy::needless_lifetimes)] +pub mod trait_hack; mod yoke; mod yokeable; mod zero_copy_from; diff --git a/utils/yoke/src/trait_hack.rs b/utils/yoke/src/trait_hack.rs new file mode 100644 index 00000000000..d6d5e78ca59 --- /dev/null +++ b/utils/yoke/src/trait_hack.rs @@ -0,0 +1,220 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Workarounds for adding trait bounds to `yoke` objects. +//! +//! # Trait bounds in Yoke +//! +//! [Compiler bug #85636](https://github.com/rust-lang/rust/issues/85636) makes it tricky to add +//! trait bounds involving `yoke` types. +//! +//! For example, you may want to write: +//! +//! `where for<'a> >::Output: MyTrait` +//! +//! The above trait bound will compile, but at call sites, you get errors such as: +//! +//! > the trait `for<'de> MyTrait` is not implemented for `>::Output` +//! +//! There are two known workarounds: +//! +//! 1. If the trait is well-defined on references, like `Debug`, bind the trait to a reference: +//! `where for<'a> &'a >::Output: MyTrait` +//! 2. If the trait involves `Self`, like `Clone`, use [`YokeTraitHack`]: +//! `where for<'a> YokeTraitHack<>::Output>: MyTrait` +//! +//! # Examples +//! +//! Code that does not compile: +//! +//! ```compile_fail +//! use yoke::Yoke; +//! use yoke::Yokeable; +//! +//! // Example trait and struct for illustration purposes: +//! trait MyTrait {} +//! struct MyStruct {} +//! impl MyTrait for MyStruct {} +//! unsafe impl<'a> Yokeable<'a> for MyStruct { +//! // (not shown; see `Yokeable` for examples) +//! # type Output = MyStruct; +//! # fn transform(&'a self) -> &'a Self::Output { +//! # self +//! # } +//! # unsafe fn make(from: Self::Output) -> Self { +//! # std::mem::transmute(from) +//! # } +//! # fn with_mut(&'a mut self, f: F) +//! # where +//! # F: 'static + for<'b> FnOnce(&'b mut Self::Output), +//! # { +//! # unsafe { +//! # f(std::mem::transmute::<&'a mut Self, &'a mut Self::Output>( +//! # self, +//! # )) +//! # } +//! # } +//! } +//! +//! impl MyTrait for Yoke +//! where +//! Y: for<'a> Yokeable<'a>, +//! for<'a> >::Output: MyTrait, +//! {} +//! +//! fn example() { +//! let y = Yoke::::new_always_owned(MyStruct {}); +//! // error[E0277]: the trait bound `for<'a> >::Output: MyTrait` is not satisfied +//! let _: &dyn MyTrait = &y; +//! } +//! ``` +//! +//! Example for binding the trait to a reference: +//! +//! ``` +//! use yoke::Yoke; +//! use yoke::Yokeable; +//! +//! // Example trait and struct for illustration purposes: +//! trait MyTrait { +//! fn demo(&self) -> u32; +//! } +//! struct MyStruct(u32); +//! impl MyTrait for MyStruct { +//! fn demo(&self) -> u32 { +//! self.0 +//! } +//! } +//! unsafe impl<'a> Yokeable<'a> for MyStruct { +//! // (not shown; see `Yokeable` for examples) +//! # type Output = MyStruct; +//! # fn transform(&'a self) -> &'a Self::Output { +//! # self +//! # } +//! # unsafe fn make(from: Self::Output) -> Self { +//! # std::mem::transmute(from) +//! # } +//! # fn with_mut(&'a mut self, f: F) +//! # where +//! # F: 'static + for<'b> FnOnce(&'b mut Self::Output), +//! # { +//! # unsafe { +//! # f(std::mem::transmute::<&'a mut Self, &'a mut Self::Output>( +//! # self, +//! # )) +//! # } +//! # } +//! } +//! +//! // The trait needs to be defined on references: +//! impl<'a, T> MyTrait for &'a T where T: MyTrait { +//! fn demo(&self) -> u32 { +//! self.demo() +//! } +//! } +//! +//! impl MyTrait for Yoke +//! where +//! Y: for<'a> Yokeable<'a>, +//! for<'a> &'a >::Output: MyTrait, +//! { +//! fn demo(&self) -> u32 { +//! self.get().demo() +//! } +//! } +//! +//! fn example() { +//! let y = Yoke::::new_always_owned(MyStruct(42)); +//! let _: &dyn MyTrait = &y; +//! } +//! ``` +//! +//! Example for using [`YokeTraitHack`]: +//! +//! ``` +//! use yoke::Yoke; +//! use yoke::Yokeable; +//! use yoke::trait_hack::YokeTraitHack; +//! use std::rc::Rc; +//! +//! // Example trait and struct for illustration purposes: +//! trait MyTrait { +//! fn demo(data: u32) -> Self; +//! } +//! struct MyStruct(u32); +//! impl MyTrait for MyStruct { +//! fn demo(data: u32) -> Self { +//! Self(data) +//! } +//! } +//! unsafe impl<'a> Yokeable<'a> for MyStruct { +//! // (not shown; see `Yokeable` for examples) +//! # type Output = MyStruct; +//! # fn transform(&'a self) -> &'a Self::Output { +//! # self +//! # } +//! # unsafe fn make(from: Self::Output) -> Self { +//! # std::mem::transmute(from) +//! # } +//! # fn with_mut(&'a mut self, f: F) +//! # where +//! # F: 'static + for<'b> FnOnce(&'b mut Self::Output), +//! # { +//! # unsafe { +//! # f(std::mem::transmute::<&'a mut Self, &'a mut Self::Output>( +//! # self, +//! # )) +//! # } +//! # } +//! } +//! +//! // The trait needs to be defined on YokeTraitHack: +//! impl<'a, T> MyTrait for YokeTraitHack where T: MyTrait { +//! fn demo(data: u32) -> Self { +//! YokeTraitHack(T::demo(data)) +//! } +//! } +//! +//! impl MyTrait for Yoke> +//! where +//! Y: for<'a> Yokeable<'a>, +//! for<'a> YokeTraitHack<>::Output>: MyTrait, +//! { +//! fn demo(data: u32) -> Self { +//! let rc_u32: Rc = Rc::new(data); +//! Yoke::attach_to_cart_badly(rc_u32, |u| { +//! YokeTraitHack::<::Output>::demo(*u).0 +//! }) +//! } +//! } +//! +//! fn example() { +//! let _ = Yoke::>::demo(42); +//! } +//! ``` + +/// A wrapper around a type `T`, forwarding trait calls down to the inner type. +/// +/// `YokeTraitHack` supports [`Clone`] and [`serde::Deserialize`] out of the box. Other traits can +/// be implemented by the caller. +/// +/// For more information, see the module-level documentation. +#[repr(transparent)] +#[derive(Clone)] +pub struct YokeTraitHack(pub T); + +// This is implemented manually to avoid the serde derive dependency. +#[cfg(feature = "serde")] +impl<'de, T> serde::de::Deserialize<'de> for YokeTraitHack +where + T: serde::de::Deserialize<'de>, +{ + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + T::deserialize(deserializer).map(YokeTraitHack) + } +} diff --git a/utils/yoke/src/yoke.rs b/utils/yoke/src/yoke.rs index d573bf08cad..0c86dc24bd7 100644 --- a/utils/yoke/src/yoke.rs +++ b/utils/yoke/src/yoke.rs @@ -18,8 +18,10 @@ use std::sync::Arc; /// not the actual lifetime of the data, rather it is a convenient way to erase /// the lifetime and make it dynamic. /// -/// `C` is the "cart", which `Y` may contain references to. A [`Yoke`] can be constructed -/// with such references using [`Self::attach_to_cart()`]. +/// `C` is the "cart", which `Y` may contain references to. +/// +/// The primary constructor for [`Yoke`] is [`Yoke::attach_to_cart()`]. Several variants of that +/// constructor are provided to serve numerous types of call sites and `Yoke` signatures. /// /// # Example /// @@ -55,11 +57,45 @@ pub struct Yoke Yokeable<'a>, C> { } impl Yokeable<'a>, C: StableDeref> Yoke { - /// Construct a [`Yoke`] by yokeing an object to a cart. This is the primary constructor - /// for [`Yoke`]. + /// Construct a [`Yoke`] by yokeing an object to a cart in a closure. /// - /// This method is currently unusable due to a [compiler bug](https://github.com/rust-lang/rust/issues/84937), - /// use [`Yoke::attach_to_cart_badly()`] instead + /// See also [`Yoke::try_attach_to_cart()`] to return a `Result` from the closure. + /// + /// Due to [compiler bug #84937](https://github.com/rust-lang/rust/issues/84937), call sites + /// for this function may not compile; if this happens, use + /// [`Yoke::attach_to_cart_badly()`] instead. + pub fn attach_to_cart(cart: C, f: F) -> Self + where + F: for<'de> FnOnce(&'de ::Target) -> >::Output, + { + let deserialized = f(cart.deref()); + Self { + yokeable: unsafe { Y::make(deserialized) }, + cart, + } + } + + /// Construct a [`Yoke`] by yokeing an object to a cart. If an error occurs in the + /// deserializer function, the error is passed up to the caller. + /// + /// Due to [compiler bug #84937](https://github.com/rust-lang/rust/issues/84937), call sites + /// for this function may not compile; if this happens, use + /// [`Yoke::try_attach_to_cart_badly()`] instead. + pub fn try_attach_to_cart(cart: C, f: F) -> Result + where + F: for<'de> FnOnce(&'de ::Target) -> Result<>::Output, E>, + { + let deserialized = f(cart.deref())?; + Ok(Self { + yokeable: unsafe { Y::make(deserialized) }, + cart, + }) + } + + /// Construct a [`Yoke`] by yokeing an object to a cart in a closure. + /// + /// For a version of this function that takes a `FnOnce` instead of a raw function pointer, + /// see [`Yoke::attach_to_cart()`]. /// /// # Example /// @@ -86,10 +122,10 @@ impl Yokeable<'a>, C: StableDeref> Yoke { /// assert_eq!(&**yoke.get(), "hello"); /// assert!(matches!(yoke.get(), &Cow::Borrowed(_))); /// ``` - pub fn attach_to_cart(cart: C, f: F) -> Self - where - F: for<'de> FnOnce(&'de ::Target) -> >::Output, - { + pub fn attach_to_cart_badly( + cart: C, + f: for<'de> fn(&'de ::Target) -> >::Output, + ) -> Self { let deserialized = f(cart.deref()); Self { yokeable: unsafe { Y::make(deserialized) }, @@ -97,19 +133,36 @@ impl Yokeable<'a>, C: StableDeref> Yoke { } } - /// Temporary version of [`Yoke::attach_to_cart()`] - /// that doesn't hit https://github.com/rust-lang/rust/issues/84937 + /// Construct a [`Yoke`] by yokeing an object to a cart. If an error occurs in the + /// deserializer function, the error is passed up to the caller. /// - /// See its docs for more details - pub fn attach_to_cart_badly( + /// For a version of this function that takes a `FnOnce` instead of a raw function pointer, + /// see [`Yoke::try_attach_to_cart()`]. + /// + /// # Examples + /// + /// ```rust + /// # use yoke::{Yoke, Yokeable}; + /// # use std::rc::Rc; + /// # use std::borrow::Cow; + /// let rc = Rc::new([0xb, 0xa, 0xd]); + /// + /// let yoke_result: Result, Rc<[u8]>>, _> = + /// Yoke::try_attach_to_cart_badly(rc, |data: &[u8]| { + /// bincode::deserialize(data) + /// }); + /// + /// assert!(matches!(yoke_result, Err(_))); + /// ``` + pub fn try_attach_to_cart_badly( cart: C, - f: for<'de> fn(&'de ::Target) -> >::Output, - ) -> Self { - let deserialized = f(cart.deref()); - Self { + f: for<'de> fn(&'de ::Target) -> Result<>::Output, E>, + ) -> Result { + let deserialized = f(cart.deref())?; + Ok(Self { yokeable: unsafe { Y::make(deserialized) }, cart, - } + }) } } diff --git a/utils/yoke/src/yokeable.rs b/utils/yoke/src/yokeable.rs index 3fec95ce110..5a01716770b 100644 --- a/utils/yoke/src/yokeable.rs +++ b/utils/yoke/src/yokeable.rs @@ -29,6 +29,12 @@ use std::{mem, ptr}; /// /// There are further constraints on implementation safety on individual methods. /// +/// # Trait bounds +/// +/// [Compiler bug #85636](https://github.com/rust-lang/rust/issues/85636) makes it tricky to add +/// trait bounds on `Yokeable::Output`. For more information and for workarounds, see +/// [`crate::trait_hack`]. +/// /// # Implementation example /// /// This crate will eventually expose a custom derive that makes it possible to implement this