diff --git a/CHANGELOG.md b/CHANGELOG.md index 84449b1c0..ce8e601dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add minimal support for `#[serde(flatten)]` with roundtripping through RON maps ([#455](https://github.com/ron-rs/ron/pull/455)) - Breaking: Bump `bitflags` dependency to 2.0, changes `serde` impls of `Extensions` ([#443](https://github.com/ron-rs/ron/pull/443)) - Bump MSRV to 1.64.0 and bump dependency: `indexmap` to 2.0 ([#459](https://github.com/ron-rs/ron/pull/459)) +- Add minimal roundtripping support for `#[serde(tag = "tag")]`, `#[serde(tag = "tag", content = "content")]`, and `#[serde(untagged)]` enums ([#451](https://github.com/ron-rs/ron/pull/451)) ## [0.8.0] - 2022-08-17 diff --git a/README.md b/README.md index 52b9e021a..0c10c9b08 100644 --- a/README.md +++ b/README.md @@ -104,13 +104,17 @@ Note the following advantages of RON over JSON: ## Limitations -RON is not designed to be a fully self-describing format (unlike JSON) and is thus not guaranteed to work when [`deserialize_any`](https://docs.rs/serde/latest/serde/trait.Deserializer.html#tymethod.deserialize_any) is used instead of its typed alternatives. In particular, the following Serde attributes are not yet supported: -- `#[serde(tag = "type")]`, i.e. internally tagged enums -- `#[serde(untagged)]`, i.e. untagged enums +RON is not designed to be a fully self-describing format (unlike JSON) and is thus not guaranteed to work when [`deserialize_any`](https://docs.rs/serde/latest/serde/trait.Deserializer.html#tymethod.deserialize_any) is used instead of its typed alternatives. In particular, the following Serde attributes only have limited support: -Furthermore, `#[serde(flatten)]` only has limited support and relies on a small hack [^serde-flatten-hack]. Specifically, flattened structs are only serialised as maps and deserialised from maps. However, this limited implementation supports full roundtripping. +- `#[serde(tag = "tag")]`, i.e. internally tagged enums [^serde-enum-hack] +- `#[serde(tag = "tag", content = "content")]`, i.e. adjacently tagged enums [^serde-enum-hack] +- `#[serde(untagged)]`, i.e. untagged enums [^serde-enum-hack] +- `#[serde(flatten)]`, i.e. flattening of structs into maps [^serde-flatten-hack] -[^serde-flatten-hack]: Deserialising a flattened struct from a map requires that the struct's [`Visitor::expecting`](https://docs.rs/serde/latest/serde/de/trait.Visitor.html#tymethod.expecting) implementation formats a string starting with `"struct "`. This is the case for automatically-derived [`Deserialize`](https://docs.rs/serde/latest/serde/de/trait.Deserialize.html) impls on structs. +While data structures with any of these attributes should roundtrip through RON, their textual representation may not always match your expectation. For instance, flattened structs are only serialised as maps and deserialised from maps. + +[^serde-enum-hack]: Deserialising an internally, adjacently, or un-tagged enum requires detecting `serde`'s internal `serde::__private::de::content::Content` content type so that RON can describe the deserialised data structure in serde's internal JSON-like format. This detection only works for the automatically-derived [`Deserialize`](https://docs.rs/serde/latest/serde/de/trait.Deserialize.html) impls on enums. See [#451](https://github.com/ron-rs/ron/pull/451) for more details. +[^serde-flatten-hack]: Deserialising a flattened struct from a map requires that the struct's [`Visitor::expecting`](https://docs.rs/serde/latest/serde/de/trait.Visitor.html#tymethod.expecting) implementation formats a string starting with `"struct "`. This is the case for automatically-derived [`Deserialize`](https://docs.rs/serde/latest/serde/de/trait.Deserialize.html) impls on structs. See [#455](https://github.com/ron-rs/ron/pull/455) for more details. ## RON syntax overview diff --git a/src/de/mod.rs b/src/de/mod.rs index daf2ba436..299c76a18 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -17,7 +17,7 @@ use crate::{ error::{Result, SpannedResult}, extensions::Extensions, options::Options, - parse::{AnyNum, Bytes, ParsedStr, BASE64_ENGINE}, + parse::{AnyNum, Bytes, ParsedStr, StructType, BASE64_ENGINE}, }; mod id; @@ -33,6 +33,7 @@ mod value; pub struct Deserializer<'de> { bytes: Bytes<'de>, newtype_variant: bool, + serde_content_newtype: bool, last_identifier: Option<&'de str>, recursion_limit: Option, } @@ -56,6 +57,7 @@ impl<'de> Deserializer<'de> { let mut deserializer = Deserializer { bytes: Bytes::new(input)?, newtype_variant: false, + serde_content_newtype: false, last_identifier: None, recursion_limit: options.recursion_limit, }; @@ -140,25 +142,49 @@ impl<'de> Deserializer<'de> { /// struct and deserializes it accordingly. /// /// This method assumes there is no identifier left. - fn handle_any_struct(&mut self, visitor: V) -> Result + fn handle_any_struct(&mut self, visitor: V, ident: Option<&str>) -> Result where V: Visitor<'de>, { - // Create a working copy - let mut bytes = self.bytes; + // HACK: switch to JSON enum semantics for JSON content + // Robust impl blocked on https://github.com/serde-rs/serde/pull/2420 + let is_serde_content = + std::any::type_name::() == "serde::__private::de::content::Content"; - if bytes.consume("(") { - bytes.skip_ws()?; + let old_serde_content_newtype = self.serde_content_newtype; + self.serde_content_newtype = false; - if bytes.check_tuple_struct()? { - // first argument is technically incorrect, but ignored anyway - self.deserialize_tuple(0, visitor) - } else { + match (self.bytes.check_struct_type()?, ident) { + (StructType::Unit, Some(ident)) if is_serde_content => { + // serde's Content type needs the ident for unit variants + visitor.visit_str(ident) + } + (StructType::Unit, _) => visitor.visit_unit(), + (_, Some(ident)) if is_serde_content => { + // serde's Content type uses a singleton map encoding for enums + visitor.visit_map(SerdeEnumContent { + de: self, + ident: Some(ident), + }) + } + (StructType::Named, _) => { // giving no name results in worse errors but is necessary here self.handle_struct_after_name("", visitor) } - } else { - visitor.visit_unit() + (StructType::NewtypeOrTuple, _) if old_serde_content_newtype => { + // deserialize a newtype struct or variant + self.bytes.consume("("); + self.bytes.skip_ws()?; + let result = self.deserialize_any(visitor); + self.bytes.skip_ws()?; + self.bytes.consume(")"); + + result + } + (StructType::Tuple | StructType::NewtypeOrTuple, _) => { + // first argument is technically incorrect, but ignored anyway + self.deserialize_tuple(0, visitor) + } } } @@ -243,14 +269,14 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { // `identifier` does not change state if it fails let ident = self.bytes.identifier().ok(); - if ident.is_some() { + if let Some(ident) = ident { self.bytes.skip_ws()?; - return self.handle_any_struct(visitor); + return self.handle_any_struct(visitor, Some(std::str::from_utf8(ident)?)); } match self.bytes.peek_or_eof()? { - b'(' => self.handle_any_struct(visitor), + b'(' => self.handle_any_struct(visitor, None), b'[' => self.deserialize_seq(visitor), b'{' => self.deserialize_map(visitor), b'0'..=b'9' | b'+' | b'-' => { @@ -925,3 +951,36 @@ fn struct_error_name(error: Error, name: Option<&str>) -> Error { e => e, } } + +struct SerdeEnumContent<'a, 'de: 'a> { + de: &'a mut Deserializer<'de>, + ident: Option<&'a str>, +} + +impl<'de, 'a> de::MapAccess<'de> for SerdeEnumContent<'a, 'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + self.ident + .take() + .map(|ident| seed.deserialize(serde::de::value::StrDeserializer::new(ident))) + .transpose() + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + self.de.bytes.skip_ws()?; + + let old_serde_content_newtype = self.de.serde_content_newtype; + self.de.serde_content_newtype = true; + let result = seed.deserialize(&mut *self.de); + self.de.serde_content_newtype = old_serde_content_newtype; + + result + } +} diff --git a/src/parse.rs b/src/parse.rs index 59047b341..e5747443a 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -427,17 +427,67 @@ impl<'a> Bytes<'a> { .map_or(false, |&b| is_ident_other_char(b)) } - /// Should only be used on a working copy - pub fn check_tuple_struct(mut self) -> Result { - if self.identifier().is_err() { - // if there's no field ident, this is a tuple struct - return Ok(true); + pub fn check_struct_type(&mut self) -> Result { + fn check_struct_type_inner(bytes: &mut Bytes) -> Result { + if !bytes.consume("(") { + return Ok(StructType::Unit); + } + + bytes.skip_ws()?; + + if bytes.identifier().is_ok() { + bytes.skip_ws()?; + + match bytes.peek() { + // Definitely a struct with named fields + Some(b':') => return Ok(StructType::Named), + // Definitely a tuple struct with fields + Some(b',') => return Ok(StructType::Tuple), + // Either a newtype or a tuple struct + Some(b')') => return Ok(StructType::NewtypeOrTuple), + // Something else, let's investigate further + _ => (), + }; + } + + let mut braces = 1; + let mut comma = false; + + // Skip ahead to see if the value is followed by a comma + while braces > 0 { + // Skip spurious braces in comments and strings + bytes.skip_ws()?; + let _ = bytes.string(); + + let c = bytes.eat_byte()?; + if c == b'(' || c == b'[' || c == b'{' { + braces += 1; + } else if c == b')' || c == b']' || c == b'}' { + braces -= 1; + } else if c == b',' && braces == 1 { + comma = true; + break; + } + } + + if comma { + Ok(StructType::Tuple) + } else { + Ok(StructType::NewtypeOrTuple) + } } - self.skip_ws()?; + // Create a temporary working copy + let mut bytes = *self; - // if there is no colon after the ident, this can only be a unit struct - self.eat_byte().map(|c| c != b':') + let result = check_struct_type_inner(&mut bytes); + + if result.is_err() { + // Adjust the error span to fit the working copy + *self = bytes; + } + + result } /// Only returns true if the char after `ident` cannot belong @@ -996,6 +1046,13 @@ pub enum ParsedStr<'a> { Slice(&'a str), } +pub enum StructType { + NewtypeOrTuple, + Tuple, + Named, + Unit, +} + #[cfg(test)] mod tests { use super::*; diff --git a/tests/117_untagged_tuple_variant.rs b/tests/117_untagged_tuple_variant.rs index 6d98deb65..0894694bd 100644 --- a/tests/117_untagged_tuple_variant.rs +++ b/tests/117_untagged_tuple_variant.rs @@ -23,7 +23,7 @@ fn test_ebkalderon_case() { flags: [ "--enable-thing", "--enable-other-thing", - If("some-conditional", ["--enable-third-thing"]), + ("some-conditional", ["--enable-third-thing"]), ] ) "#; diff --git a/tests/307_stack_overflow.rs b/tests/307_stack_overflow.rs index 7076e6e13..d58e46b60 100644 Binary files a/tests/307_stack_overflow.rs and b/tests/307_stack_overflow.rs differ diff --git a/tests/449_tagged_enum.rs b/tests/449_tagged_enum.rs new file mode 100644 index 000000000..abc111b7e --- /dev/null +++ b/tests/449_tagged_enum.rs @@ -0,0 +1,266 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +enum InnerEnum { + Unit, + Newtype(bool), + Tuple(bool, i32), + Struct { field: char }, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +struct Container { + field: InnerEnum, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +enum OuterEnum { + Variant(Container), + Sum { field: InnerEnum, value: i32 }, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "tag")] +enum OuterEnumInternal { + Variant(Container), + Sum { field: InnerEnum, value: i32 }, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "tag", content = "c")] +enum OuterEnumAdjacent { + Variant(Container), + Sum { field: InnerEnum, value: i32 }, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(untagged)] +enum OuterEnumUntagged { + Variant(Container), + Sum { field: InnerEnum, value: i32 }, +} + +#[test] +fn test_serde_content_hack() { + assert_eq!( + std::any::type_name::(), + "serde::__private::de::content::Content" + ) +} + +#[test] +fn test_enum_in_enum_roundtrip() { + let outer = OuterEnum::Variant(Container { + field: InnerEnum::Unit, + }); + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "Variant((field:Unit))"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnum::Sum { + field: InnerEnum::Newtype(true), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "Sum(field:Newtype(true),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnum::Sum { + field: InnerEnum::Tuple(true, 24), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "Sum(field:Tuple(true,24),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnum::Sum { + field: InnerEnum::Struct { field: '🦀' }, + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "Sum(field:Struct(field:'🦀'),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); +} + +#[test] +fn test_enum_in_internally_tagged_roundtrip() { + let outer = OuterEnumInternal::Variant(Container { + field: InnerEnum::Unit, + }); + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Variant\",field:Unit)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumInternal::Sum { + field: InnerEnum::Newtype(true), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Sum\",field:Newtype(true),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumInternal::Sum { + field: InnerEnum::Tuple(true, 24), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Sum\",field:Tuple(true,24),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumInternal::Sum { + field: InnerEnum::Struct { field: '🦀' }, + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Sum\",field:Struct(field:'🦀'),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); +} + +#[test] +fn test_enum_in_adjacently_tagged_roundtrip() { + let outer = OuterEnumAdjacent::Variant(Container { + field: InnerEnum::Unit, + }); + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Variant\",c:(field:Unit))"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumAdjacent::Sum { + field: InnerEnum::Newtype(true), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Sum\",c:(field:Newtype(true),value:42))"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumAdjacent::Sum { + field: InnerEnum::Tuple(true, 24), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Sum\",c:(field:Tuple(true,24),value:42))"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumAdjacent::Sum { + field: InnerEnum::Struct { field: '🦀' }, + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(tag:\"Sum\",c:(field:Struct(field:'🦀'),value:42))"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); +} + +#[test] +fn test_enum_in_untagged_roundtrip() { + let outer = OuterEnumUntagged::Variant(Container { + field: InnerEnum::Unit, + }); + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(field:Unit)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumUntagged::Sum { + field: InnerEnum::Newtype(true), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(field:Newtype(true),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumUntagged::Sum { + field: InnerEnum::Tuple(true, 24), + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(field:Tuple(true,24),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); + + let outer = OuterEnumUntagged::Sum { + field: InnerEnum::Struct { field: '🦀' }, + value: 42, + }; + + let ron = ron::to_string(&outer).unwrap(); + + assert_eq!(ron, "(field:Struct(field:'🦀'),value:42)"); + + let de = ron::from_str::(&ron); + + assert_eq!(de, Ok(outer)); +} diff --git a/tests/value.rs b/tests/value.rs index 3919058c0..65838ab08 100644 --- a/tests/value.rs +++ b/tests/value.rs @@ -4,7 +4,7 @@ use ron::{ error::Error, value::{Map, Number, Value}, }; -use serde::Serialize; +use serde_derive::{Deserialize, Serialize}; #[test] fn bool() { @@ -117,10 +117,10 @@ fn unit() { ); } -#[derive(Serialize)] +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] struct Scene(Option<(u32, u32)>); -#[derive(Serialize)] +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] struct Scene2 { foo: Option<(u32, u32)>, } @@ -130,19 +130,33 @@ fn roundtrip() { use ron::{de::from_str, ser::to_string}; { - let s = to_string(&Scene2 { + let v = Scene2 { foo: Some((122, 13)), - }) - .unwrap(); + }; + let s = to_string(&v).unwrap(); + println!("{}", s); + let val: Value = from_str(&s).unwrap(); + println!("{:?}", val); + let v2 = val.into_rust::(); + assert_eq!(v2, Ok(v)); + } + { + let v = Scene(Some((13, 122))); + let s = to_string(&v).unwrap(); println!("{}", s); - let scene: Value = from_str(&s).unwrap(); - println!("{:?}", scene); + let val: Value = from_str(&s).unwrap(); + println!("{:?}", val); + let v2 = val.into_rust::(); + assert_eq!(v2, Ok(v)); } { - let s = to_string(&Scene(Some((13, 122)))).unwrap(); + let v = (42,); + let s = to_string(&v).unwrap(); println!("{}", s); - let scene: Value = from_str(&s).unwrap(); - println!("{:?}", scene); + let val: Value = from_str(&s).unwrap(); + println!("{:?}", val); + let v2 = val.into_rust::<(i32,)>(); + assert_eq!(v2, Ok(v)); } }