Skip to content

Commit

Permalink
Edge cases over edge cases
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Jul 17, 2023
1 parent f74c9f8 commit ff44ec4
Show file tree
Hide file tree
Showing 5 changed files with 307 additions and 69 deletions.
77 changes: 46 additions & 31 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::{
error::{Result, SpannedResult},
extensions::Extensions,
options::Options,
parse::{AnyNum, Bytes, ParsedStr, BASE64_ENGINE},
parse::{AnyNum, Bytes, ParsedStr, StructType, BASE64_ENGINE},
};

mod id;
Expand All @@ -33,6 +33,7 @@ mod value;
pub struct Deserializer<'de> {
bytes: Bytes<'de>,
newtype_variant: bool,
serde_content_newtype: bool,
last_identifier: Option<&'de str>,
recursion_limit: Option<usize>,
}
Expand All @@ -56,6 +57,7 @@ impl<'de> Deserializer<'de> {
let mut deserializer = Deserializer {
bytes: Bytes::new(input)?,
newtype_variant: false,
serde_content_newtype: false,
last_identifier: None,
recursion_limit: options.recursion_limit,
};
Expand Down Expand Up @@ -140,25 +142,49 @@ impl<'de> Deserializer<'de> {
/// struct and deserializes it accordingly.
///
/// This method assumes there is no identifier left.
fn handle_any_struct<V>(&mut self, visitor: V) -> Result<V::Value>
fn handle_any_struct<V>(&mut self, visitor: V, ident: Option<&str>) -> Result<V::Value>
where
V: Visitor<'de>,
{
// Create a working copy
let mut bytes = self.bytes;
// HACK: switch to JSON enum semantics for JSON content
// Robust impl blocked on https://github.com/serde-rs/serde/pull/2420
let is_serde_content =
std::any::type_name::<V::Value>() == "serde::__private::de::content::Content";

if bytes.consume("(") {
bytes.skip_ws()?;
let old_serde_content_newtype = self.serde_content_newtype;
self.serde_content_newtype = false;

if bytes.check_tuple_struct()? {
// first argument is technically incorrect, but ignored anyway
self.deserialize_tuple(0, visitor)
} else {
match (self.bytes.check_struct_type()?, ident) {
(StructType::Unit, Some(ident)) if is_serde_content => {
// serde's Content type needs the ident for unit variants
visitor.visit_str(ident)
}
(StructType::Unit, _) => visitor.visit_unit(),
(_, Some(ident)) if is_serde_content => {
// serde's Content type uses a singleton map encoding for enums
visitor.visit_map(SerdeEnumContent {
de: self,
ident: Some(ident),
})
}
(StructType::Named, _) => {
// giving no name results in worse errors but is necessary here
self.handle_struct_after_name("", visitor)
}
} else {
visitor.visit_unit()
(StructType::NewtypeOrTuple, _) if old_serde_content_newtype => {
// deserialize a newtype struct or variant
self.bytes.consume("(");
self.bytes.skip_ws()?;
let result = self.deserialize_any(visitor);
self.bytes.skip_ws()?;
self.bytes.consume(")");

result
}
(StructType::Tuple | StructType::NewtypeOrTuple, _) => {
// first argument is technically incorrect, but ignored anyway
self.deserialize_tuple(0, visitor)
}
}
}

Expand Down Expand Up @@ -246,27 +272,11 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
if let Some(ident) = ident {
self.bytes.skip_ws()?;

// HACK: switch to JSON enum semantics for JSON content
// Robust impl blocked on https://github.com/serde-rs/serde/pull/2420
return if std::any::type_name::<V::Value>() == "serde::__private::de::content::Content"
{
let ident = std::str::from_utf8(ident)?;

if self.bytes.peek() == Some(b'(') {
visitor.visit_map(SerdeEnumContent {
de: self,
ident: Some(ident),
})
} else {
visitor.visit_str(ident)
}
} else {
self.handle_any_struct(visitor)
};
return self.handle_any_struct(visitor, Some(std::str::from_utf8(ident)?));
}

match self.bytes.peek_or_eof()? {
b'(' => self.handle_any_struct(visitor),
b'(' => self.handle_any_struct(visitor, None),
b'[' => self.deserialize_seq(visitor),
b'{' => self.deserialize_map(visitor),
b'0'..=b'9' | b'+' | b'-' => {
Expand Down Expand Up @@ -966,6 +976,11 @@ impl<'de, 'a> de::MapAccess<'de> for SerdeEnumContent<'a, 'de> {
{
self.de.bytes.skip_ws()?;

seed.deserialize(&mut *self.de)
let old_serde_content_newtype = self.de.serde_content_newtype;
self.de.serde_content_newtype = true;
let result = seed.deserialize(&mut *self.de);
self.de.serde_content_newtype = old_serde_content_newtype;

result
}
}
73 changes: 65 additions & 8 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,17 +427,67 @@ impl<'a> Bytes<'a> {
.map_or(false, |&b| is_ident_other_char(b))
}

/// Should only be used on a working copy
pub fn check_tuple_struct(mut self) -> Result<bool> {
if self.identifier().is_err() {
// if there's no field ident, this is a tuple struct
return Ok(true);
pub fn check_struct_type(&mut self) -> Result<StructType> {
fn check_struct_type_inner(bytes: &mut Bytes) -> Result<StructType> {
if !bytes.consume("(") {
return Ok(StructType::Unit);
}

bytes.skip_ws()?;

if bytes.identifier().is_ok() {
bytes.skip_ws()?;

match bytes.peek() {
// Definitely a struct with named fields
Some(b':') => return Ok(StructType::Named),
// Definitely a tuple struct with fields
Some(b',') => return Ok(StructType::Tuple),
// Either a newtype or a tuple struct
Some(b')') => return Ok(StructType::NewtypeOrTuple),
// Something else, let's investigate further
_ => (),
};
}

let mut braces = 1;
let mut comma = false;

// Skip ahead to see if the value is followed by a comma
while braces > 0 {
// Skip spurious braces in comments and strings
bytes.skip_ws()?;
let _ = bytes.string();

let c = bytes.eat_byte()?;
if c == b'(' || c == b'[' || c == b'{' {
braces += 1;
} else if c == b')' || c == b']' || c == b'}' {
braces -= 1;
} else if c == b',' && braces == 1 {
comma = true;
break;
}
}

if comma {
Ok(StructType::Tuple)
} else {
Ok(StructType::NewtypeOrTuple)
}
}

self.skip_ws()?;
// Create a temporary working copy
let mut bytes = *self;

// if there is no colon after the ident, this can only be a unit struct
self.eat_byte().map(|c| c != b':')
let result = check_struct_type_inner(&mut bytes);

if result.is_err() {
// Adjust the error span to fit the working copy
*self = bytes;
}

result
}

/// Only returns true if the char after `ident` cannot belong
Expand Down Expand Up @@ -996,6 +1046,13 @@ pub enum ParsedStr<'a> {
Slice(&'a str),
}

pub enum StructType {
NewtypeOrTuple,
Tuple,
Named,
Unit,
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Binary file modified tests/307_stack_overflow.rs
Binary file not shown.
Loading

0 comments on commit ff44ec4

Please sign in to comment.