Skip to content

Commit

Permalink
Improved docs and more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zrho committed Jun 21, 2024
1 parent 37bd41d commit 22f62a0
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 38 deletions.
24 changes: 22 additions & 2 deletions hugr-sexpr/src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ enum EscapedToken {
#[token(r#"\\"#, |_| '\\')]
Escaped(char),

#[regex(r#"\\u\{[a-fA-F0-9]{1,4}\}"#, |lex| parse_unicode(lex.slice()))]
#[regex(r#"\\u\{[a-fA-F0-9]+\}"#, |lex| parse_unicode(lex.slice()))]
Unicode(char),

#[regex(r#"[^\\]"#)]
Expand Down Expand Up @@ -108,7 +108,7 @@ pub fn escape_symbol(str: &str) -> String {

#[cfg(test)]
mod test {
use super::escape_symbol;
use super::{escape_string, escape_symbol, unescape};
use rstest::rstest;

#[rstest]
Expand All @@ -125,7 +125,27 @@ mod test {
#[case(r#"""#, r#"|"|"#)]
#[case("+any", "+any")]
#[case("-any", "-any")]
#[case("#symbol", "|#symbol|")]
fn test_escape_symbol(#[case] symbol: &str, #[case] expected: &str) {
assert_eq!(expected, escape_symbol(symbol));
}

#[rstest]
#[case("string", "string")]
#[case("\n", r"\n")]
#[case(r"\", r"\\")]
#[case(r#"""#, r#"\""#)]
#[case("|", "|")]
#[case("", "")]
fn test_escape_string(#[case] string: &str, #[case] expected: &str) {
assert_eq!(expected, escape_string(string));
}

#[rstest]
#[case(r#"\""#, r#"""#)]
#[case(r"\|", "|")]
#[case(r"\u{1F60A}", "\u{1F60A}")]
fn test_unescape(#[case] escaped: &str, #[case] expected: &str) {
assert_eq!(expected, unescape(escaped).unwrap());
}
}
117 changes: 82 additions & 35 deletions hugr-sexpr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,43 @@
//! together with a reader and pretty printer.
//! Moreover, the crate provides derive macros to conveniently convert between
//! user defined types and s-expressions.
//! See [`Value`] for the data model and syntax.
//! See [`Value`] for the data model.
//!
//! # Syntax
//!
//! **Lists** are sequences of values, delimited on the outside by `(` and `)`
//! and separated by whitespace.
//!
//! **Strings** are delimited by double quotes `"` on both sides,
//! using the following escaping rules:
//!
//! - `\"` and `\\` are used to escape `"` and `\`.
//! - `\n`, `\r` and `\t` stand for the newline, carriage return and tab characters.
//! - `\u{HEX}` stands in for any unicode character where `HEX` is a UTF-8 codepoint in hexadecimal notation.
//!
//! **Symbols** appear verbatim without delimiters, as long as it satisfies all of the following conditions:
//!
//! - The symbol consists only of alphanumeric characters and of the special characters `!$%&*/:<=>?^_~+-.@`.
//! - The symbol does not begin with a digit.
//! - If the symbol begins with `+` or `-`, the following character (if any) is not a digit.
//!
//! Symbols that are not of this form are delimited by a pipe `|` on both sides.
//! For symbols that are delimited, the same escaping rules apply as for strings,
//! except that the double quote `"` is exchanged for the pipe `|`.
//! Notably the hash sign `#` is reserved and may not appear in a non-delimited symbol.
//! This is to allow for future extensibility if richer data types are required.
//!
//! **Booleans** are encoded by `#t` for true and `#f` for false.
//!
//! **Integers** are represented in text in decimal and with an optional sign,
//! following the format `[+-]?[0-9]+`.
//!
//! **Floats** follow the format
//! `[+-]?[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?`.
//! Positive and negative infinity are denoted by `#+inf` and `#-inf`,
//! while NaN is written as `#nan`.
//!
//! **Comments** begin with a `;` and extend to the end of the line.
//!
//! # Derive Macros
//!
Expand Down Expand Up @@ -65,66 +101,52 @@ pub use read::from_str;
/// A value that can be encoded as an s-expression.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Value {
/// Lists are sequences of zero or more values, delimited by `(` and `)`.
/// The elements of the list are separated by whitespace.
/// Lists are sequences of zero or more values.
List(Vec<Self>),

/// Strings can be any valid UTF-8 string.
/// In the text format, strings are delimited by double quotes `"` on both sides,
/// using the following escaping rules:
///
/// - `\"` and `\\` are used to escape `"` and `\`.
/// - `\n`, `\r` and `\t` stand for the newline, carriage return and tab characters.
/// - `\u{HEX}` stands in for any unicode character where `HEX` is a UTF-8 codepoint written in up to four hexadecimal digits.
String(SmolStr),

/// Symbols represent identifiers such as variables or field names.
/// A symbol can be any valid UTF-8 string.
///
/// In the textual representation, a symbol can appear verbatim without delimiters,
/// as long as it satisfies all of the following conditions:
/// - The symbol consists only of alphanumeric characters and of the special characters `!$%&*/:<=>?^_~+-.@`.
/// - The symbol does not begin with a digit.
/// - If the symbol begins with `+` or `-`, the following character (if any) is not a digit.
///
/// Symbols that are not of this form are delimited by a pipe `|` on both sides.
/// For symbols that are delimited, the same escaping rules apply as for strings,
/// except that the double quote `"` is exchanged for the pipe `|`.
///
/// Notably the hash sign `#` is reserved and may not appear in a non-delimited symbol.
/// This is to allow for future extensibility if richer data types are required.
/// Currently we make use of `#` for the encoding of booleans.
Symbol(Symbol),

/// Booleans represent truth values.
/// True is denoted by `#t` while false is denoted by `#f`.
/// Booleans.
Bool(bool),

/// Signed integers with 64bit precision.
///
/// Integers are represented in text in decimal and with an optional sign,
/// following the format `[+-]?[0-9]+`.
Int(i64),

/// Floating point numbers with 64bit precision.
///
/// Finite floats are represented in text in the format
/// `[+-]?[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?`.
/// Positive and negative infinity are denoted by `#+inf` and `#-inf`,
/// while NaN is written as `#nan`.
Float(OrderedFloat<f64>),
}

impl Value {
/// Attempts to cast this value into a list.
pub fn as_list(&self) -> Option<&[Self]> {
///
/// # Examples
///
/// ```
/// # use hugr_sexpr::Value;
/// assert_eq!(Value::List(vec![]).as_list(), Some(&vec![]));
/// assert_eq!(Value::Int(3).as_list(), None);
/// ```
pub fn as_list(&self) -> Option<&Vec<Value>> {
match self {
Value::List(list) => Some(list),
_ => None,
}
}

/// Attempts to cast this value into a symbol.
///
/// # Examples
///
/// ```
/// # use hugr_sexpr::{Value, Symbol};
/// assert_eq!(Value::Symbol("s".into()).as_symbol(), Some(&Symbol::new("s")));
/// assert_eq!(Value::String("s".into()).as_symbol(), None);
/// ```
pub fn as_symbol(&self) -> Option<&Symbol> {
match self {
Value::Symbol(symbol) => Some(symbol),
Expand All @@ -133,14 +155,31 @@ impl Value {
}

/// Attempts to cast this value into a string.
pub fn as_string(&self) -> Option<&str> {
///
/// # Examples
///
/// ```
/// # use hugr_sexpr::{Value, Symbol};
/// # use smol_str::SmolStr;
/// assert_eq!(Value::String("s".into()).as_string(), Some(&SmolStr::new("s")));
/// assert_eq!(Value::Symbol("s".into()).as_string(), None);
/// ```
pub fn as_string(&self) -> Option<&SmolStr> {
match self {
Value::String(string) => Some(string),
_ => None,
}
}

/// Attempts to cast this value into an integer.
///
/// # Examples
///
/// ```
/// # use hugr_sexpr::{Value, Symbol};
/// assert_eq!(Value::Int(12).as_int(), Some(12));
/// assert_eq!(Value::Float((12.5).into()).as_int(), None);
/// ```
pub fn as_int(&self) -> Option<i64> {
match self {
Value::Int(int) => Some(*int),
Expand All @@ -149,6 +188,14 @@ impl Value {
}

/// Attempts to cast this value into a float.
///
/// # Examples
///
/// ```
/// # use hugr_sexpr::{Value, Symbol};
/// assert_eq!(Value::Float((12.5).into()).as_float(), Some(12.5));
/// assert_eq!(Value::Int(12).as_float(), None);
/// ```
pub fn as_float(&self) -> Option<f64> {
match self {
Value::Float(float) => Some(float.into_inner()),
Expand Down
2 changes: 1 addition & 1 deletion hugr-sexpr/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ enum Token {
CloseList,

#[regex(
r#""([^"\\]|\\["\\tnr]|u\{[a-fA-F0-9]{1,4}\})*""#,
r#""([^"\\]|\\["\\tnr]|u\{[a-fA-F0-9]+\})*""#,
|lex| Some(unescape(&lex.slice()[1..lex.slice().len() - 1])?.into())
)]
String(SmolStr),
Expand Down

0 comments on commit 22f62a0

Please sign in to comment.