diff --git a/src/error.rs b/src/error.rs index 470fcc0..ab879f3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -31,17 +31,17 @@ impl fmt::Display for InsertError { Self::Conflict { with } => { write!( f, - "insertion failed due to conflict with previously registered route: {}", + "Insertion failed due to conflict with previously registered route: {}", with ) } Self::InvalidParamSegment => { - write!(f, "only one parameter is allowed per path segment") + write!(f, "Only one parameter is allowed per path segment") } - Self::InvalidParam => write!(f, "parameters must be registered with a valid name"), + Self::InvalidParam => write!(f, "Parameters must be registered with a valid name"), Self::InvalidCatchAll => write!( f, - "catch-all parameters are only allowed at the end of a route" + "Catch-all parameters are only allowed at the end of a route" ), } } @@ -50,6 +50,9 @@ impl fmt::Display for InsertError { impl std::error::Error for InsertError {} impl InsertError { + /// Returns an error for a route conflict with the given node. + /// + /// This method attempts to find the full conflicting route. pub(crate) fn conflict( route: &UnescapedRoute, prefix: UnescapedRef<'_>, @@ -57,35 +60,39 @@ impl InsertError { ) -> Self { let mut route = route.clone(); - // The new route would have had to replace the current node in the tree. - if prefix.inner() == current.prefix.inner() { - denormalize_params(&mut route, ¤t.param_remapping); + // The route is conflicting with the current node. + if prefix.unescaped() == current.prefix.unescaped() { + denormalize_params(&mut route, ¤t.remapping); return InsertError::Conflict { - with: String::from_utf8(route.into_inner()).unwrap(), + with: String::from_utf8(route.into_unescaped()).unwrap(), }; } + // Remove the non-matching suffix from the route. route.truncate(route.len() - prefix.len()); + // Add the conflicting prefix. if !route.ends_with(¤t.prefix) { route.append(¤t.prefix); } + // Add the prefixes of any conflicting children. + let mut child = current.children.first(); + while let Some(node) = child { + route.append(&node.prefix); + child = node.children.first(); + } + + // Denormalize any route parameters. let mut last = current; while let Some(node) = last.children.first() { last = node; } + denormalize_params(&mut route, &last.remapping); - let mut current = current.children.first(); - while let Some(node) = current { - route.append(&node.prefix); - current = node.children.first(); - } - - denormalize_params(&mut route, &last.param_remapping); - + // Return the conflicting route. InsertError::Conflict { - with: String::from_utf8(route.into_inner()).unwrap(), + with: String::from_utf8(route.into_unescaped()).unwrap(), } } } @@ -113,7 +120,7 @@ pub enum MatchError { impl fmt::Display for MatchError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "matching route not found") + write!(f, "Matching route not found") } } diff --git a/src/escape.rs b/src/escape.rs index 91579f3..ef8ddbf 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -1,10 +1,11 @@ use std::{fmt, ops::Range}; -/// An uescaped route that keeps track of the position of escaped characters ('{{' or '}}'). +/// An unescaped route that keeps track of the position of escaped characters ('{{' or '}}'). /// /// Note that this type dereferences to `&[u8]`. #[derive(Clone, Default)] pub struct UnescapedRoute { + // The raw unescaped route. inner: Vec, escaped: Vec, } @@ -40,10 +41,10 @@ impl UnescapedRoute { range: Range, replace: Vec, ) -> impl Iterator + '_ { - // ignore any escaped characters in the range being replaced + // Ignore any escaped characters in the range being replaced. self.escaped.retain(|x| !range.contains(x)); - // update the escaped indices + // Update the escaped indices. let offset = (replace.len() as isize) - (range.len() as isize); for i in &mut self.escaped { if *i > range.end { @@ -78,13 +79,13 @@ impl UnescapedRoute { } } - /// Returns a reference to the inner slice. - pub fn inner(&self) -> &[u8] { + /// Returns a reference to the unescaped slice. + pub fn unescaped(&self) -> &[u8] { &self.inner } - /// Returns the inner slice. - pub fn into_inner(self) -> Vec { + /// Returns the unescaped route. + pub fn into_unescaped(self) -> Vec { self.inner } } @@ -131,7 +132,7 @@ impl<'a> UnescapedRef<'a> { } } - /// Returns true if the character at the given index was escaped. + /// Returns `true` if the character at the given index was escaped. pub fn is_escaped(&self, i: usize) -> bool { if let Some(i) = i.checked_add_signed(-self.offset) { return self.escaped.contains(&i); @@ -158,8 +159,8 @@ impl<'a> UnescapedRef<'a> { } } - /// Returns a reference to the inner slice. - pub fn inner(&self) -> &[u8] { + /// Returns a reference to the unescaped slice. + pub fn unescaped(&self) -> &[u8] { self.inner } } diff --git a/src/params.rs b/src/params.rs index ffb6846..625b544 100644 --- a/src/params.rs +++ b/src/params.rs @@ -3,6 +3,9 @@ use std::{fmt, iter, mem, slice}; /// A single URL parameter, consisting of a key and a value. #[derive(PartialEq, Eq, Ord, PartialOrd, Default, Copy, Clone)] struct Param<'k, 'v> { + // Keys and values are stored as byte slices internally by the router + // to avoid UTF8 checks when slicing, but UTF8 is still respected, + // so these slices are valid strings. key: &'k [u8], value: &'v [u8], } @@ -13,11 +16,12 @@ impl<'k, 'v> Param<'k, 'v> { value: b"", }; - // this could be from_utf8_unchecked, but we'll keep this safe for now + // Returns the parameter key as a string. fn key_str(&self) -> &'k str { std::str::from_utf8(self.key).unwrap() } + // Returns the parameter value as a string. fn value_str(&self) -> &'v str { std::str::from_utf8(self.value).unwrap() } @@ -31,12 +35,12 @@ impl<'k, 'v> Param<'k, 'v> { /// # router.insert("/users/{id}", true).unwrap(); /// let matched = router.at("/users/1")?; /// -/// // you can iterate through the keys and values +/// // Iterate through the keys and values. /// for (key, value) in matched.params.iter() { /// println!("key: {}, value: {}", key, value); /// } /// -/// // or get a specific value by key +/// // Get a specific value by name. /// let id = matched.params.get("id"); /// assert_eq!(id, Some("1")); /// # Ok(()) @@ -47,9 +51,11 @@ pub struct Params<'k, 'v> { kind: ParamsKind<'k, 'v>, } -// most routes have 1-3 dynamic parameters, so we can avoid a heap allocation in common cases. +// Most routes have a small number of dynamic parameters, so we can avoid +// heap allocations in the common case. const SMALL: usize = 3; +// A list of parameters, optimized to avoid allocations when possible. #[derive(PartialEq, Eq, Ord, PartialOrd, Clone)] enum ParamsKind<'k, 'v> { Small([Param<'k, 'v>; SMALL], usize), @@ -71,6 +77,7 @@ impl<'k, 'v> Params<'k, 'v> { } } + // Truncates the parameter list to the given length. pub(crate) fn truncate(&mut self, n: usize) { match &mut self.kind { ParamsKind::Small(_, len) => *len = n, @@ -133,7 +140,7 @@ impl<'k, 'v> Params<'k, 'v> { } } - // Transform each key. + // Applies a transformation function to each key. pub(crate) fn for_each_key_mut(&mut self, f: impl Fn((usize, &mut &'k [u8]))) { match &mut self.kind { ParamsKind::Small(arr, len) => arr @@ -191,6 +198,7 @@ impl<'ps, 'k, 'v> Iterator for ParamsIter<'ps, 'k, 'v> { } } } + impl ExactSizeIterator for ParamsIter<'_, '_, '_> { fn len(&self) -> usize { match self.kind { diff --git a/src/router.rs b/src/router.rs index b087659..529d20a 100644 --- a/src/router.rs +++ b/src/router.rs @@ -23,7 +23,7 @@ impl Router { Self::default() } - /// Insert a route. + /// Insert a route into the router. /// /// # Examples /// @@ -37,7 +37,7 @@ impl Router { /// # } /// ``` pub fn insert(&mut self, route: impl Into, value: T) -> Result<(), InsertError> { - self.root.insert(route, value) + self.root.insert(route.into(), value) } /// Tries to find a value in the router matching the given path. @@ -58,7 +58,7 @@ impl Router { pub fn at<'m, 'p>(&'m self, path: &'p str) -> Result, MatchError> { match self.root.at(path.as_bytes()) { Ok((value, params)) => Ok(Match { - // SAFETY: We only expose &mut T through &mut self + // Safety: We only expose `&mut T` through `&mut self` value: unsafe { &*value.get() }, params, }), @@ -88,7 +88,7 @@ impl Router { ) -> Result, MatchError> { match self.root.at(path.as_bytes()) { Ok((value, params)) => Ok(Match { - // SAFETY: We have &mut self + // Safety: We have `&mut self` value: unsafe { &mut *value.get() }, params, }), @@ -98,7 +98,8 @@ impl Router { /// Remove a given route from the router. /// - /// Returns the value stored under the route if it was found. If the route was not found or invalid, `None` is returned. + /// Returns the value stored under the route if it was found. + /// If the route was not found or invalid, `None` is returned. /// /// # Examples /// @@ -125,7 +126,7 @@ impl Router { /// assert_eq!(router.remove("/home/{id}/"), Some("Hello!")); /// ``` pub fn remove(&mut self, path: impl Into) -> Option { - self.root.remove(path) + self.root.remove(path.into()) } #[cfg(feature = "__test_helpers")] @@ -140,6 +141,7 @@ impl Router { pub struct Match<'k, 'v, V> { /// The value stored under the matched node. pub value: V, + /// The route parameters. See [parameters](crate#parameters) for more details. pub params: Params<'k, 'v>, } diff --git a/src/tree.rs b/src/tree.rs index dcde036..08e77cc 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -6,81 +6,95 @@ use std::cmp::min; use std::ops::Range; use std::{fmt, mem}; -/// The types of nodes the tree can hold -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)] -pub(crate) enum NodeType { - /// The root path - Root, - /// A route parameter, ex: `/{id}`. - Param, - /// A catchall parameter, ex: `/*file` - CatchAll, - /// Anything else - Static, -} - /// A radix tree used for URL path matching. /// /// See [the crate documentation](crate) for details. pub struct Node { - priority: u32, - wild_child: bool, - indices: Vec, - // see `at` for why an unsafe cell is needed - value: Option>, - pub(crate) param_remapping: ParamRemapping, - pub(crate) node_type: NodeType, + // This node's prefix. pub(crate) prefix: UnescapedRoute, + // The priority of this node. + // + // Nodes with more children are higher priority and searched first. + pub(crate) priority: u32, + // Whether this node contains a wildcard child. + pub(crate) wild_child: bool, + // The first character of any static children, for fast linear search. + pub(crate) indices: Vec, + // The type of this node. + pub(crate) node_type: NodeType, pub(crate) children: Vec, + // The value stored at this node. + // + // See `Node::at` for why an `UnsafeCell` is necessary. + value: Option>, + // Parameter name remapping, stored at nodes that hold values. + pub(crate) remapping: ParamRemapping, } -// SAFETY: we expose `value` per rust's usual borrowing rules, so we can just delegate these traits +/// The types of nodes a tree can hold. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)] +pub(crate) enum NodeType { + /// The root path. + Root, + /// A route parameter, e.g. `/{id}`. + Param, + /// A catch-all parameter, e.g. `/*file`. + CatchAll, + /// A static prefix, e.g. `/foo`. + Static, +} + +/// Safety: We expose `value` per Rust's usual borrowing rules, so we can just +/// delegate these traits. unsafe impl Send for Node {} unsafe impl Sync for Node {} impl Node { - pub fn insert(&mut self, route: impl Into, val: T) -> Result<(), InsertError> { - let route = route.into().into_bytes(); - let route = UnescapedRoute::new(route); - let (route, param_remapping) = normalize_params(route)?; - let mut prefix = route.as_ref(); + // Insert a route into the tree. + pub fn insert(&mut self, route: String, val: T) -> Result<(), InsertError> { + let route = UnescapedRoute::new(route.into_bytes()); + let (route, remapping) = normalize_params(route)?; + let mut remaining = route.as_ref(); self.priority += 1; - // the tree is empty + // If the tree is empty, insert the root node. if self.prefix.is_empty() && self.children.is_empty() { - let last = self.insert_child(prefix, val)?; - last.param_remapping = param_remapping; + let last = self.insert_route(remaining, val)?; + last.remapping = remapping; self.node_type = NodeType::Root; return Ok(()); } let mut current = self; - 'walk: loop { - // find the longest common prefix - let len = min(prefix.len(), current.prefix.len()); + // Find the common prefix between the route and the current node. + let len = min(remaining.len(), current.prefix.len()); let common_prefix = (0..len) .find(|&i| { - prefix[i] != current.prefix[i] - || prefix.is_escaped(i) != current.prefix.is_escaped(i) + remaining[i] != current.prefix[i] + // Make sure not confuse the start of a wildcard with an escaped `{`. + || remaining.is_escaped(i) != current.prefix.is_escaped(i) }) .unwrap_or(len); - // the common prefix is a substring of the current node's prefix, split the node - if common_prefix < current.prefix.len() { + // If this node has a longer prefix than we need, we have to fork and extract the + // common prefix into a shared parent. + if current.prefix.len() > common_prefix { + // Move the non-matching suffix into a child node. + let suffix = current.prefix.as_ref().slice_off(common_prefix).to_owned(); let child = Node { - prefix: current.prefix.as_ref().slice_off(common_prefix).to_owned(), - children: mem::take(&mut current.children), - wild_child: current.wild_child, - indices: current.indices.clone(), + prefix: suffix, value: current.value.take(), - param_remapping: mem::take(&mut current.param_remapping), + indices: current.indices.clone(), + wild_child: current.wild_child, + children: mem::take(&mut current.children), + remapping: mem::take(&mut current.remapping), priority: current.priority - 1, - ..Node::default() + node_type: NodeType::Static, }; - // the current node now holds only the common prefix + // The current node now only holds the common prefix. current.children = vec![child]; current.indices = vec![current.prefix[common_prefix]]; current.prefix = current @@ -89,154 +103,162 @@ impl Node { .slice_until(common_prefix) .to_owned(); current.wild_child = false; + continue; } - // the route has a common prefix, search deeper - if prefix.len() > common_prefix { - prefix = prefix.slice_off(common_prefix); - - let next = prefix[0]; - - // `/` after param - if current.node_type == NodeType::Param - && next == b'/' - && current.children.len() == 1 - { - current = &mut current.children[0]; - current.priority += 1; - - continue 'walk; + if remaining.len() == common_prefix { + // This node must not already contain a value. + if current.value.is_some() { + return Err(InsertError::conflict(&route, remaining, current)); } - // find a child that matches the next path byte - for mut i in 0..current.indices.len() { - // found a match - if next == current.indices[i] { - // the indice matches literally, but it's actually the start of a wildcard - if matches!(next, b'{' | b'}') && !prefix.is_escaped(0) { - continue; - } - - i = current.update_child_priority(i); - current = &mut current.children[i]; - continue 'walk; - } - } + // Insert the value. + current.value = Some(UnsafeCell::new(val)); + current.remapping = remapping; + return Ok(()); + } - // not a wildcard and there is no matching child node, create a new one - if (!matches!(next, b'{') || prefix.is_escaped(0)) - && current.node_type != NodeType::CatchAll - { - current.indices.push(next); - let mut child = current.add_child(Node::default()); - child = current.update_child_priority(child); - - // insert into the new node - let last = current.children[child].insert_child(prefix, val)?; - last.param_remapping = param_remapping; - return Ok(()); - } + // Otherwise, the route has a remaining non-matching suffix. + // + // We have to search deeper. + remaining = remaining.slice_off(common_prefix); + let next = remaining[0]; + + // After matching against a wildcard the next character is always `/`. + // + // Continue searching in the child node if it already exists. + if current.node_type == NodeType::Param && current.children.len() == 1 { + debug_assert_eq!(next, b'/'); + current = &mut current.children[0]; + current.priority += 1; + continue 'walk; + } - // inserting a wildcard, and this node already has a wildcard child - if current.wild_child { - // wildcards are always at the end - current = current.children.last_mut().unwrap(); - current.priority += 1; - - // make sure the wildcard matches - if prefix.len() < current.prefix.len() - || *current.prefix != prefix[..current.prefix.len()] - // catch-alls cannot have children - || current.node_type == NodeType::CatchAll - // check for longer wildcard, e.g. :name and :names - || (current.prefix.len() < prefix.len() - && prefix[current.prefix.len()] != b'/') - { - return Err(InsertError::conflict(&route, prefix, current)); + // Find a child node that matches the next character in the route. + for mut i in 0..current.indices.len() { + if next == current.indices[i] { + // Make sure not confuse the start of a wildcard with an escaped `{` or `}`. + if matches!(next, b'{' | b'}') && !remaining.is_escaped(0) { + continue; } + // Continue searching in the child. + i = current.update_child_priority(i); + current = &mut current.children[i]; continue 'walk; } + } - // otherwise, create the wildcard node - let last = current.insert_child(prefix, val)?; - last.param_remapping = param_remapping; + // We couldn't find a matching child. + // + // If we're not inserting a wildcard we have to create a child. + if (!matches!(next, b'{') || remaining.is_escaped(0)) + && current.node_type != NodeType::CatchAll + { + current.indices.push(next); + let mut child = current.add_child(Node::default()); + child = current.update_child_priority(child); + + // Insert into the newly created node. + let last = current.children[child].insert_route(remaining, val)?; + last.remapping = remapping; return Ok(()); } - // exact match, this node should be empty - if current.value.is_some() { - return Err(InsertError::conflict(&route, prefix, current)); - } + // We're trying to insert a wildcard. + // + // If this node already has a wildcard child, we have to make sure it matches. + if current.wild_child { + // Wildcards are always the last child. + current = current.children.last_mut().unwrap(); + current.priority += 1; - // add the value to current node - current.value = Some(UnsafeCell::new(val)); - current.param_remapping = param_remapping; + // Make sure the route parameter matches. + if let Some(wildcard) = remaining.get(..current.prefix.len()) { + if *wildcard != *current.prefix { + return Err(InsertError::conflict(&route, remaining, current)); + } + } + + // Catch-all routes cannot have children. + if current.node_type == NodeType::CatchAll { + return Err(InsertError::conflict(&route, remaining, current)); + } + // Continue with the wildcard node. + continue 'walk; + } + + // Otherwise, create a new node for the wildcard and insert the route. + let last = current.insert_route(remaining, val)?; + last.remapping = remapping; return Ok(()); } } - /// removes a route from the tree, returning the value if the route existed. - /// the provided path should be the same as the one used to insert the route (including wildcards). - pub fn remove(&mut self, full_path: impl Into) -> Option { - let mut current = self; - let unescaped = UnescapedRoute::new(full_path.into().into_bytes()); - let (full_path, param_remapping) = normalize_params(unescaped).ok()?; - let mut path: &[u8] = full_path.inner(); - - // specific case if we are removing the root node - if path == current.prefix.inner() { - let val = current.value.take().map(UnsafeCell::into_inner); - // if the root node has no children, we can just reset it - if current.children.is_empty() { - *current = Self::default(); + /// Removes a route from the tree, returning the value if the route already existed. + /// + /// The provided path should be the same as the one used to insert the route, including + /// wildcards. + pub fn remove(&mut self, route: String) -> Option { + let route = UnescapedRoute::new(route.into_bytes()); + let (route, remapping) = normalize_params(route).ok()?; + let mut remaining = route.unescaped(); + + // Check if we are removing the root node. + if remaining == self.prefix.unescaped() { + let value = self.value.take().map(UnsafeCell::into_inner); + + // If the root node has no children, we can reset it. + if self.children.is_empty() { + *self = Node::default(); } - return val; + return value; } + let mut current = self; 'walk: loop { - // the path is longer than this node's prefix, we are expecting a child node - if path.len() > current.prefix.len() { - let (prefix, rest) = path.split_at(current.prefix.len()); + // The path is longer than this node's prefix, search deeper. + if remaining.len() > current.prefix.len() { + let (prefix, rest) = remaining.split_at(current.prefix.len()); - // the prefix matches - if prefix == current.prefix.inner() { + // The prefix matches. + if prefix == current.prefix.unescaped() { let first = rest[0]; - path = rest; + remaining = rest; - // if there is only one child we can continue with the child node + // If there is a single child node, we can continue searching in the child. if current.children.len() == 1 { - if current.children[0].prefix.inner() == rest { - return current.remove_child(0, ¶m_remapping); + // The route matches, remove the node. + if current.children[0].prefix.unescaped() == remaining { + return current.remove_child(0, &remapping); } + // Otherwise, continue searching. current = &mut current.children[0]; continue 'walk; } - // if there are many we get the index of the child matching the first byte + // Find a child node that matches the next character in the route. if let Some(i) = current.indices.iter().position(|&c| c == first) { - // continue with the child node - if current.children[i].prefix.inner() == rest { - return current.remove_child(i, ¶m_remapping); + // The route matches, remove the node. + if current.children[i].prefix.unescaped() == remaining { + return current.remove_child(i, &remapping); } + // Otherwise, continue searching. current = &mut current.children[i]; continue 'walk; } - // if this node has a wildcard child and that it matches our standardized path - // we continue with that + // If the node has a matching wildcard child, continue searching in the child. if current.wild_child - && !current.children.is_empty() - && rest.first().zip(rest.get(2)) == Some((&b'{', &b'}')) + && remaining.first().zip(remaining.get(2)) == Some((&b'{', &b'}')) { - // continue with the wildcard child - if current.children.last_mut().unwrap().prefix.inner() == rest { - return current - .remove_child(current.children.len() - 1, ¶m_remapping); + // The route matches, remove the node. + if current.children.last_mut().unwrap().prefix.unescaped() == remaining { + return current.remove_child(current.children.len() - 1, &remapping); } current = current.children.last_mut().unwrap(); @@ -245,41 +267,51 @@ impl Node { } } + // Could not find a match. return None; } } - /// remove the i'th child of this node - fn remove_child(&mut self, i: usize, param_remapping: &ParamRemapping) -> Option { - if self.children[i].param_remapping != *param_remapping { + /// Remove the child node at the given index, if the route parameters match. + fn remove_child(&mut self, i: usize, remapping: &ParamRemapping) -> Option { + // Require an exact match to remove a route. + // + // For example, `/{a}` cannot be used to remove `/{b}`. + if self.children[i].remapping != *remapping { return None; } - // if the node we are dropping doesn't have any children, we can remove it - let val = if self.children[i].children.is_empty() { - // if the parent self only has one child there are no indices + // If the node does not have any children, we can remove it completely. + let value = if self.children[i].children.is_empty() { + // Removing a single child with no indices. if self.children.len() == 1 && self.indices.is_empty() { self.wild_child = false; self.children.remove(0).value.take() } else { + // Remove the child node. let child = self.children.remove(i); - // indices are only used for static selfs - if child.node_type == NodeType::Static { - self.indices.remove(i); - } else { - // it was a dynamic self, we remove the wildcard child flag - self.wild_child = false; + + match child.node_type { + // Remove the index if we removed a static prefix. + NodeType::Static => { + self.indices.remove(i); + } + // Otherwise, we removed a wildcard. + _ => self.wild_child = false, } + child.value } - } else { + } + // Otherwise, remove the value but preserve the node. + else { self.children[i].value.take() }; - val.map(UnsafeCell::into_inner) + value.map(UnsafeCell::into_inner) } - // add a child node, keeping wildcards at the end + // Adds a child to this node, keeping wildcards at the end. fn add_child(&mut self, child: Node) -> usize { let len = self.children.len(); @@ -292,22 +324,21 @@ impl Node { } } - // increments priority of the given child and reorders if necessary. + // Increments priority of the given child node, reordering the children if necessary. // - // returns the new index of the child + // Returns the new index of the node. fn update_child_priority(&mut self, i: usize) -> usize { self.children[i].priority += 1; let priority = self.children[i].priority; - // adjust position (move to front) + // Move the node to the front as necessary. let mut updated = i; while updated > 0 && self.children[updated - 1].priority < priority { - // swap node positions self.children.swap(updated - 1, updated); updated -= 1; } - // update the index position + // Update the position of the indices to match. if updated != i { self.indices[updated..=i].rotate_right(1); } @@ -315,8 +346,8 @@ impl Node { updated } - // insert a child node at this node - fn insert_child( + // Insert a route at this node. + fn insert_route( &mut self, mut prefix: UnescapedRef<'_>, val: T, @@ -324,10 +355,10 @@ impl Node { let mut current = self; loop { - // search for a wildcard segment + // Search for a wildcard segment. let wildcard = match find_wildcard(prefix)? { - Some(w) => w, - // no wildcard, simply use the current node + Some(wildcard) => wildcard, + // There is no wildcard, simply insert into the current node. None => { current.value = Some(UnsafeCell::new(val)); current.prefix = prefix.to_owned(); @@ -335,19 +366,20 @@ impl Node { } }; - // catch-all route + // Insering a catch-all route. if prefix[wildcard.clone()][1] == b'*' { - // "/foo/*x/bar" + // Ensure there is no suffix after the parameter, e.g. `/foo/{*x}/bar`. if wildcard.end != prefix.len() { return Err(InsertError::InvalidCatchAll); } - // insert prefix before the current wildcard + // Add the prefix before the wildcard into the current node. if wildcard.start > 0 { current.prefix = prefix.slice_until(wildcard.start).to_owned(); prefix = prefix.slice_off(wildcard.start); } + // Add the catch-all as a child node. let child = Self { prefix: prefix.to_owned(), node_type: NodeType::CatchAll, @@ -358,51 +390,59 @@ impl Node { let i = current.add_child(child); current.wild_child = true; - return Ok(&mut current.children[i]); - } else if prefix[wildcard.clone()][0] == b'{' { - // insert prefix before the current wildcard - if wildcard.start > 0 { - current.prefix = prefix.slice_until(wildcard.start).to_owned(); - prefix = prefix.slice_off(wildcard.start); - } + } + + // Otherwise, we're inserting a regular route parameter. + assert_eq!(prefix[wildcard.clone()][0], b'{'); + + // Add the prefix before the wildcard into the current node. + if wildcard.start > 0 { + current.prefix = prefix.slice_until(wildcard.start).to_owned(); + prefix = prefix.slice_off(wildcard.start); + } + // Add the parameter as a child node. + let child = Self { + node_type: NodeType::Param, + prefix: prefix.slice_until(wildcard.len()).to_owned(), + ..Self::default() + }; + + let child = current.add_child(child); + current.wild_child = true; + current = &mut current.children[child]; + current.priority += 1; + + // If the route doesn't end in the wildcard, we have to insert the suffix as a child. + if wildcard.len() < prefix.len() { + prefix = prefix.slice_off(wildcard.len()); let child = Self { - node_type: NodeType::Param, - prefix: prefix.slice_until(wildcard.len()).to_owned(), + priority: 1, ..Self::default() }; let child = current.add_child(child); - current.wild_child = true; current = &mut current.children[child]; - current.priority += 1; - - // if the route doesn't end with the wildcard, then there - // will be another non-wildcard subroute starting with '/' - if wildcard.len() < prefix.len() { - prefix = prefix.slice_off(wildcard.len()); - let child = Self { - priority: 1, - ..Self::default() - }; - - let child = current.add_child(child); - current = &mut current.children[child]; - continue; - } - - // otherwise we're done. Insert the value in the new leaf - current.value = Some(UnsafeCell::new(val)); - return Ok(current); + continue; } + + // Finally, insert the value. + current.value = Some(UnsafeCell::new(val)); + return Ok(current); } } } +/// A wildcard node that was skipped during a tree search. +/// +/// Contains the state necessary to backtrack to the given node. struct Skipped<'n, 'p, T> { - path: &'p [u8], + // The node that was skipped. node: &'n Node, + /// The path at the time we skipped this node. + path: &'p [u8], + // The number of parameters that were present. params: usize, } @@ -411,10 +451,11 @@ macro_rules! backtracker { ($skipped_nodes:ident, $path:ident, $current:ident, $params:ident, $backtracking:ident, $walk:lifetime) => { macro_rules! try_backtrack { () => { - // try backtracking to any matching wildcard nodes we skipped while traversing - // the tree + // Try backtracking to any matching wildcard nodes that we skipped while + // traversing the tree. while let Some(skipped) = $skipped_nodes.pop() { if skipped.path.ends_with($path) { + // Restore the search state. $path = skipped.path; $current = &skipped.node; $params.truncate(skipped.params); @@ -428,13 +469,14 @@ macro_rules! backtracker { } impl Node { - // it's a bit sad that we have to introduce unsafe here but rust doesn't really have a way - // to abstract over mutability, so `UnsafeCell` lets us avoid having to duplicate logic between - // `at` and `at_mut` - pub fn at<'n, 'p>( - &'n self, - full_path: &'p [u8], - ) -> Result<(&'n UnsafeCell, Params<'n, 'p>), MatchError> { + // Returns the node matching the given path. + // + // Returning an `UnsafeCell` allows us to avoid duplicating the logic between `Node::at` and + // `Node::at_mut`, as Rust doesn't have a great way of abstracting over mutability. + pub fn at<'node, 'path>( + &'node self, + full_path: &'path [u8], + ) -> Result<(&'node UnsafeCell, Params<'node, 'path>), MatchError> { let mut current = self; let mut path = full_path; let mut backtracking = false; @@ -444,22 +486,25 @@ impl Node { 'walk: loop { backtracker!(skipped_nodes, path, current, params, backtracking, 'walk); - // the path is longer than this node's prefix, we are expecting a child node + // The path is longer than this node's prefix, search deeper. if path.len() > current.prefix.len() { let (prefix, rest) = path.split_at(current.prefix.len()); - // the prefix matches + // Found a matching prefix. if *prefix == *current.prefix { let first = rest[0]; let consumed = path; path = rest; - // try searching for a matching static child unless we are currently - // backtracking, which would mean we already traversed them + // If we are currently backtracking, avoid searching static children + // that we already searched. if !backtracking { + // Find a child node that matches the next character in the path. if let Some(i) = current.indices.iter().position(|&c| c == first) { - // keep track of wildcard routes we skipped to backtrack to later if - // we don't find a math + // Keep track of wildcard routes that we skip. + // + // We may end up needing to backtrack later in case we do not find a + // match. if current.wild_child { skipped_nodes.push(Skipped { path: consumed, @@ -468,85 +513,93 @@ impl Node { }); } - // continue with the child node + // Continue searching. current = ¤t.children[i]; continue 'walk; } } - // we didn't find a match and there are no children with wildcards, there is no match + // We didn't find a matching static child. + // + // If there are no wildcards, then there are no matching routes in the tree. if !current.wild_child { - // try backtracking + // Try backtracking in case we skipped a wildcard that may match. try_backtrack!(); - - // nothing found return Err(MatchError::NotFound); } - // handle the wildcard child, which is always at the end of the list + // Continue searching in the wildcard, which are kept at the end of the list. current = current.children.last().unwrap(); - match current.node_type { + // Match against a route parameter. NodeType::Param => { - // check if there are more segments in the path other than this parameter + // Check for more path segments. match path.iter().position(|&c| c == b'/') { + // Found another segment. Some(i) => { let (param, rest) = path.split_at(i); + // If there is a static child, continue the search. if let [child] = current.children.as_slice() { - // store the parameter value + // Store the parameter value. + // + // Parameters are normalized so the key is irrelevant at this point. params.push(b"", param); - // continue with the child node + // Continue searching. path = rest; current = child; backtracking = false; continue 'walk; } - // try backtracking + // Try backtracking in case we skipped a wildcard that may match. try_backtrack!(); + // Otherwise, there are no matching routes in the tree. return Err(MatchError::NotFound); } - // this is the last path segment + // This is the last path segment. None => { - // store the parameter value - params.push(b"", path); - - // found the matching value + // Found the matching value. if let Some(ref value) = current.value { - // remap parameter keys + // Store the parameter value. + params.push(b"", path); + + // Remap the keys of any route parameters we accumulated during the search. params.for_each_key_mut(|(i, key)| { - *key = ¤t.param_remapping[i] + *key = ¤t.remapping[i] }); return Ok((value, params)); } - // no match, try backtracking + // Try backtracking in case we skipped a wildcard that may match. try_backtrack!(); - // this node doesn't have the value, no match + // Otherwise, there are no matching routes in the tree. return Err(MatchError::NotFound); } } } NodeType::CatchAll => { - // catch all segments are only allowed at the end of the route, - // either this node has the value or there is no match + // Catch-all segments are only allowed at the end of the route. + // + // This node must contain the value if there is a match. return match current.value { + // Found the value. Some(ref value) => { - // remap parameter keys - params.for_each_key_mut(|(i, key)| { - *key = ¤t.param_remapping[i] - }); + // Remap the keys of any route parameters we accumulated during the search. + params + .for_each_key_mut(|(i, key)| *key = ¤t.remapping[i]); - // store the final catch-all parameter - params.push(¤t.prefix[2..current.prefix.len() - 1], path); + // Store the final catch-all parameter (`{*...}`). + let key = ¤t.prefix[2..current.prefix.len() - 1]; + params.push(key, path); Ok((value, params)) } + // Otherwise, there are no matching routes in the tree. None => Err(MatchError::NotFound), }; } @@ -555,27 +608,25 @@ impl Node { } } - // this is it, we should have reached the node containing the value + // Check for an exact match. if *path == *current.prefix { + // Found the matching value. if let Some(ref value) = current.value { - // remap parameter keys - params.for_each_key_mut(|(i, key)| *key = ¤t.param_remapping[i]); + // Remap the keys of any route parameters we accumulated during the search. + params.for_each_key_mut(|(i, key)| *key = ¤t.remapping[i]); return Ok((value, params)); } - - // nope, try backtracking - try_backtrack!(); - - return Err(MatchError::NotFound); } - // last chance, try backtracking + // Try backtracking in case we skipped a wildcard that may match. try_backtrack!(); + // Otherwise, there are no matching routes in the tree. return Err(MatchError::NotFound); } } + /// Test helper that ensures route priorities are consistent. #[cfg(feature = "__test_helpers")] pub fn check_priorities(&self) -> Result { let mut priority: u32 = 0; @@ -595,11 +646,16 @@ impl Node { } } -/// An ordered list of route parameters keys for a specific route, stored at leaf nodes. +/// An ordered list of route parameters keys for a specific route. +/// +/// To support conflicting routes like `/{a}/foo` and `/{b}/bar`, route parameters +/// are normalized before being inserted into the tree. Parameter remapping are +/// stored at nodes containing values, containing the "true" names of all route parameters +/// for the given route. type ParamRemapping = Vec>; /// Returns `path` with normalized route parameters, and a parameter remapping -/// to store at the leaf node for this route. +/// to store at the node for this route. /// /// Note that the parameter remapping may contain unescaped characters. fn normalize_params( @@ -608,43 +664,46 @@ fn normalize_params( let mut start = 0; let mut original = ParamRemapping::new(); - // parameter names are normalized alphabetically + // Parameter names are normalized alphabetically. let mut next = b'a'; loop { + // Find a wildcard to normalize. let mut wildcard = match find_wildcard(path.as_ref().slice_off(start))? { - Some(w) => w, + Some(wildcard) => wildcard, + // No wildcard, we are done. None => return Ok((path, original)), }; wildcard.start += start; wildcard.end += start; - // makes sure the param has a valid name + // Ensure the parameter has a valid name. if wildcard.len() < 2 { return Err(InsertError::InvalidParam); } - // don't need to normalize catch-all parameters + // We don't need to normalize catch-all parameters, as they are always + // at the end of a route. if path[wildcard.clone()][1] == b'*' { start = wildcard.end; continue; } - // normalize the parameter + // Normalize the parameter. let removed = path.splice(wildcard.clone(), vec![b'{', next, b'}']); - // remember the original name for remappings + // Preserve the original name for remapping. let mut removed = removed.skip(1).collect::>(); removed.pop(); original.push(removed); - // get the next key next += 1; if next > b'z' { - panic!("too many route parameters"); + panic!("Too many route parameters."); } + // Continue the search after the parameter we just normalized. start = wildcard.start + 3; } } @@ -655,7 +714,7 @@ pub(crate) fn denormalize_params(route: &mut UnescapedRoute, params: &ParamRemap let mut i = 0; loop { - // find the next wildcard + // Find a wildcard to denormalize. let mut wildcard = match find_wildcard(route.as_ref().slice_off(start)).unwrap() { Some(w) => w, None => return, @@ -664,15 +723,15 @@ pub(crate) fn denormalize_params(route: &mut UnescapedRoute, params: &ParamRemap wildcard.start += start; wildcard.end += start; + // Get the corresponding parameter remapping. let mut next = match params.get(i) { Some(param) => param.clone(), None => return, }; + // Denormalize this parameter. next.insert(0, b'{'); next.push(b'}'); - - // denormalize this parameter let _ = route.splice(wildcard.clone(), next.clone()); i += 1; @@ -683,34 +742,37 @@ pub(crate) fn denormalize_params(route: &mut UnescapedRoute, params: &ParamRemap // Searches for a wildcard segment and checks the path for invalid characters. fn find_wildcard(path: UnescapedRef<'_>) -> Result>, InsertError> { for (start, &c) in path.iter().enumerate() { - // unescaped closing brace without opening brace + // Found an unescaped closing brace without a corresponding opening brace. if c == b'}' && !path.is_escaped(start) { return Err(InsertError::InvalidParam); } - // keep going till we find an unescaped opening brace + // Keep going until we find an unescaped opening brace. if c != b'{' || path.is_escaped(start) { continue; } - // empty '{}' without parameter name + // Ensure there is a non-empty parameter name. if path.get(start + 1) == Some(&b'}') { return Err(InsertError::InvalidParam); } + // Find the corresponding closing brace. for (i, &c) in path.iter().enumerate().skip(start + 2) { match c { b'}' => { + // This closing brace was escaped, keep searching. if path.is_escaped(i) { continue; } + // Ensure catch-all parameters have a non-empty name. if path.get(i - 1) == Some(&b'*') { return Err(InsertError::InvalidParam); } if let Some(&c) = path.get(i + 1) { - // prefixes after params are currently unsupported + // Prefixes after route parameters are not supported. if c != b'/' { return Err(InsertError::InvalidParamSegment); } @@ -718,11 +780,13 @@ fn find_wildcard(path: UnescapedRef<'_>) -> Result>, InsertE return Ok(Some(start..i + 1)); } + // `*` and `/` are invalid in parameter names. b'*' | b'/' => return Err(InsertError::InvalidParam), _ => {} } } + // Missing closing brace. return Err(InsertError::InvalidParam); } @@ -735,7 +799,7 @@ where { fn clone(&self) -> Self { let value = self.value.as_ref().map(|value| { - // safety: we only expose &mut T through &mut self + // Safety: We only expose `&mut T` through `&mut self`. let value = unsafe { &*value.get() }; UnsafeCell::new(value.clone()) }); @@ -747,7 +811,7 @@ where node_type: self.node_type.clone(), indices: self.indices.clone(), children: self.children.clone(), - param_remapping: self.param_remapping.clone(), + remapping: self.remapping.clone(), priority: self.priority, } } @@ -756,7 +820,7 @@ where impl Default for Node { fn default() -> Self { Self { - param_remapping: ParamRemapping::new(), + remapping: ParamRemapping::new(), prefix: UnescapedRoute::default(), wild_child: false, node_type: NodeType::Static, @@ -768,13 +832,12 @@ impl Default for Node { } } -// visualize the tree structure when debugging impl fmt::Debug for Node where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // safety: we only expose &mut T through &mut self + // Safety: We only expose `&mut T` through `&mut self`. let value = unsafe { self.value.as_ref().map(|x| &*x.get()) }; let mut f = f.debug_struct("Node"); @@ -783,6 +846,7 @@ where .field("node_type", &self.node_type) .field("children", &self.children); + // Extra information for debugging purposes. #[cfg(test)] { let indices = self @@ -792,7 +856,7 @@ where .collect::>(); let params = self - .param_remapping + .remapping .iter() .map(|x| std::str::from_utf8(x).unwrap()) .collect::>();