From 235b4799a8929519ef555ab14efba1f1599b2d2a Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Wed, 10 Jan 2024 10:59:52 +0100 Subject: [PATCH 1/8] Use the ConstantIndex type alias where appropriate in the Parser --- crates/parser/src/node.rs | 2 +- crates/parser/src/parser.rs | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/parser/src/node.rs b/crates/parser/src/node.rs index 798b11699..b4c931b71 100644 --- a/crates/parser/src/node.rs +++ b/crates/parser/src/node.rs @@ -361,7 +361,7 @@ pub struct AstString { #[derive(Clone, Debug, PartialEq, Eq)] pub enum StringNode { /// A string literal - Literal(u32), + Literal(ConstantIndex), /// An expression that should be evaluated and inserted into the string Expr(AstIndex), } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 1a10c7a1c..7b9128812 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -14,15 +14,15 @@ struct Frame { // If a frame contains yield then it represents a generator function contains_yield: bool, // IDs that have been assigned within the current frame - ids_assigned_in_frame: HashSet, + ids_assigned_in_frame: HashSet, // IDs and lookup roots which were accessed when not locally assigned at the time of access - accessed_non_locals: HashSet, + accessed_non_locals: HashSet, // While expressions are being parsed we keep track of lhs assignments and rhs accesses. // At the end of a multi-assignment expresson (see `finalize_id_accesses`), // accessed IDs that weren't locally assigned at the time of access are then counted as // non-local accesses. - pending_accesses: HashSet, - pending_assignments: HashSet, + pending_accesses: HashSet, + pending_assignments: HashSet, } impl Frame { @@ -43,12 +43,12 @@ impl Frame { } // Declare that an id has been accessed within the frame - fn add_id_access(&mut self, id: u32) { + fn add_id_access(&mut self, id: ConstantIndex) { self.pending_accesses.insert(id); } // Declare that an id is being assigned to within the frame - fn add_local_id_assignment(&mut self, id: u32) { + fn add_local_id_assignment(&mut self, id: ConstantIndex) { self.pending_assignments.insert(id); // While an assignment expression is being parsed, the LHS id is counted as an access // until the assignment operator is encountered. @@ -1005,7 +1005,7 @@ impl<'source> Parser<'source> { // # ^ ...or here fn parse_nested_function_args( &mut self, - arg_ids: &mut Vec, + arg_ids: &mut Vec, ) -> Result, ParserError> { let mut nested_args = Vec::new(); @@ -1136,7 +1136,7 @@ impl<'source> Parser<'source> { fn parse_id( &mut self, context: &ExpressionContext, - ) -> Result, ParserError> { + ) -> Result, ParserError> { match self.peek_token_with_context(context) { Some(PeekInfo { token: Token::Id, .. @@ -1980,7 +1980,9 @@ impl<'source> Parser<'source> { } // Attempts to parse a meta key - fn parse_meta_key(&mut self) -> Result)>, ParserError> { + fn parse_meta_key( + &mut self, + ) -> Result)>, ParserError> { if self.peek_next_token_on_same_line() != Some(Token::At) { return Ok(None); } @@ -3013,7 +3015,7 @@ impl<'source> Parser<'source> { } } - fn add_string_constant(&mut self, s: &str) -> Result { + fn add_string_constant(&mut self, s: &str) -> Result { match self.constants.add_string(s) { Ok(result) => Ok(result), Err(_) => self.error(InternalError::ConstantPoolCapacityOverflow), @@ -3263,6 +3265,6 @@ struct PeekInfo { // Returned by Parser::parse_id_or_wildcard() enum IdOrWildcard { - Id(u32), - Wildcard(Option), + Id(ConstantIndex), + Wildcard(Option), } From 491546f7831609cc6741371a26d14c8946433271 Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Wed, 10 Jan 2024 11:50:03 +0100 Subject: [PATCH 2/8] Use 'consume' terminology for parsing functions that must consume input --- crates/parser/src/parser.rs | 62 ++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 7b9128812..bc5881070 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -243,7 +243,7 @@ impl<'source> Parser<'source> { frame_stack: Vec::new(), }; - let main_block = parser.parse_main_block()?; + let main_block = parser.consume_main_block()?; parser.ast.set_entry_point(main_block); parser.ast.set_constants(parser.constants.build()); @@ -251,7 +251,7 @@ impl<'source> Parser<'source> { } // Parses the main 'top-level' block - fn parse_main_block(&mut self) -> Result { + fn consume_main_block(&mut self) -> Result { self.frame_stack.push(Frame::default()); let start_span = self.current_span(); @@ -730,8 +730,8 @@ impl<'source> Parser<'source> { self.consume_token_with_context(context); self.push_node(BoolFalse) } - Token::RoundOpen => self.parse_tuple(context), - Token::Number => self.parse_number(false, context), + Token::RoundOpen => self.consume_tuple(context), + Token::Number => self.consume_number(false, context), Token::DoubleQuote | Token::SingleQuote => { let (string, span, string_context) = self.parse_string(context)?.unwrap(); @@ -742,8 +742,8 @@ impl<'source> Parser<'source> { self.check_for_lookup_after_node(string_node, &string_context) } } - Token::Id => self.parse_id_expression(context), - Token::Self_ => self.parse_self_expression(context), + Token::Id => self.consume_id_expression(context), + Token::Self_ => self.consume_self_expression(context), Token::At => { let map_block_allowed = context.allow_map_block || peeked.indent > start_indent; @@ -774,18 +774,18 @@ impl<'source> Parser<'source> { } } } - Token::Wildcard => self.parse_wildcard(context), - Token::SquareOpen => self.parse_list(context), + Token::Wildcard => self.consume_wildcard(context), + Token::SquareOpen => self.consume_list(context), Token::CurlyOpen => self.parse_map_with_braces(context), Token::If => self.parse_if_expression(context), Token::Match => self.parse_match_expression(context), Token::Switch => self.parse_switch_expression(context), - Token::Function => self.parse_function(context), + Token::Function => self.consume_function(context), Token::Subtract => match self.peek_token_n(peeked.peek_count + 1) { Some(token) if token.is_whitespace() || token.is_newline() => return Ok(None), Some(Token::Number) => { self.consume_token_with_context(context); // Token::Subtract - self.parse_number(true, context) + self.consume_number(true, context) } Some(_) => { self.consume_token_with_context(context); // Token::Subtract @@ -846,10 +846,10 @@ impl<'source> Parser<'source> { self.parse_expressions(&context.start_new_expression(), TempResult::No)?; self.push_node(Node::Return(return_value)) } - Token::Throw => self.parse_throw_expression(), - Token::Debug => self.parse_debug_expression(), + Token::Throw => self.consume_throw_expression(), + Token::Debug => self.consume_debug_expression(), Token::From | Token::Import => self.parse_import(context), - Token::Export => self.parse_export(context), + Token::Export => self.consume_export(context), Token::Try => self.parse_try_expression(context), Token::Error => self.consume_token_and_error(SyntaxError::LexerError), _ => return Ok(None), @@ -863,7 +863,7 @@ impl<'source> Parser<'source> { // e.g. // f = |x, y| x + y // # ^ You are here - fn parse_function(&mut self, context: &ExpressionContext) -> Result { + fn consume_function(&mut self, context: &ExpressionContext) -> Result { let start_indent = self.current_indent(); self.consume_token_with_context(context); // Token::Function @@ -1150,7 +1150,7 @@ impl<'source> Parser<'source> { } // Parses a single `_` wildcard, along with its optional following id - fn parse_wildcard(&mut self, context: &ExpressionContext) -> Result { + fn consume_wildcard(&mut self, context: &ExpressionContext) -> Result { self.consume_token_with_context(context); let slice = self.lexer.slice(); let maybe_id = if slice.len() > 1 { @@ -1193,7 +1193,7 @@ impl<'source> Parser<'source> { } } - fn parse_id_expression( + fn consume_id_expression( &mut self, context: &ExpressionContext, ) -> Result { @@ -1210,7 +1210,7 @@ impl<'source> Parser<'source> { let lookup_context = id_context.lookup_start(); if self.next_token_is_lookup_start(&lookup_context) { let id_index = self.push_node(Node::Id(constant_index))?; - self.parse_lookup(id_index, &lookup_context) + self.consume_lookup(id_index, &lookup_context) } else { let start_span = self.current_span(); let args = self.parse_call_args(&id_context)?; @@ -1230,7 +1230,7 @@ impl<'source> Parser<'source> { } } - fn parse_self_expression( + fn consume_self_expression( &mut self, context: &ExpressionContext, ) -> Result { @@ -1242,7 +1242,7 @@ impl<'source> Parser<'source> { let self_index = self.push_node(Node::Self_)?; if self.next_token_is_lookup_start(&lookup_context) { - self.parse_lookup(self_index, &lookup_context) + self.consume_lookup(self_index, &lookup_context) } else { Ok(self_index) } @@ -1258,7 +1258,7 @@ impl<'source> Parser<'source> { ) -> Result { let lookup_context = context.lookup_start(); if self.next_token_is_lookup_start(&lookup_context) { - self.parse_lookup(node, &lookup_context) + self.consume_lookup(node, &lookup_context) } else { Ok(node) } @@ -1298,7 +1298,7 @@ impl<'source> Parser<'source> { // e.g. // y = x[0][1].foo() // # ^ You are here - fn parse_lookup( + fn consume_lookup( &mut self, root: AstIndex, context: &ExpressionContext, @@ -1332,7 +1332,7 @@ impl<'source> Parser<'source> { Token::SquareOpen => { self.consume_token(); - let index_expression = self.parse_index_expression()?; + let index_expression = self.consume_index_expression()?; if let Some(Token::SquareClose) = self.consume_next_token_on_same_line() { lookup.push((LookupNode::Index(index_expression), node_start_span)); @@ -1445,7 +1445,7 @@ impl<'source> Parser<'source> { // e.g. // foo.bar[10..20] // # ^ You are here - fn parse_index_expression(&mut self) -> Result { + fn consume_index_expression(&mut self) -> Result { let index_context = ExpressionContext::restricted(); let result = if let Some(index_expression) = self.parse_expression(&index_context)? { @@ -1595,7 +1595,7 @@ impl<'source> Parser<'source> { .map(Some) } - fn parse_export(&mut self, context: &ExpressionContext) -> Result { + fn consume_export(&mut self, context: &ExpressionContext) -> Result { self.consume_token_with_context(context); // Token::Export let start_span = self.current_span(); @@ -1607,7 +1607,7 @@ impl<'source> Parser<'source> { self.push_node_with_start_span(Node::Export(expression), start_span) } - fn parse_throw_expression(&mut self) -> Result { + fn consume_throw_expression(&mut self) -> Result { self.consume_next_token_on_same_line(); // Token::Throw let start_span = self.current_span(); @@ -1619,7 +1619,7 @@ impl<'source> Parser<'source> { self.push_node_with_start_span(Node::Throw(expression), start_span) } - fn parse_debug_expression(&mut self) -> Result { + fn consume_debug_expression(&mut self) -> Result { self.consume_next_token_on_same_line(); // Token::Debug let start_position = self.current_span().start; @@ -1650,7 +1650,7 @@ impl<'source> Parser<'source> { ) } - fn parse_number( + fn consume_number( &mut self, negate: bool, context: &ExpressionContext, @@ -1709,7 +1709,7 @@ impl<'source> Parser<'source> { // - e.g. `(1 + 1)` // - A comma-separated tuple // - e.g. `(,)`, `(x,)`, `(1, 2)` - fn parse_tuple(&mut self, context: &ExpressionContext) -> Result { + fn consume_tuple(&mut self, context: &ExpressionContext) -> Result { self.consume_token_with_context(context); // Token::RoundOpen let start_span = self.current_span(); @@ -1737,7 +1737,7 @@ impl<'source> Parser<'source> { } // Parses a list, e.g. `[1, 2, 3]` - fn parse_list(&mut self, context: &ExpressionContext) -> Result { + fn consume_list(&mut self, context: &ExpressionContext) -> Result { self.consume_token_with_context(context); // Token::SquareOpen let start_span = self.current_span(); @@ -2505,7 +2505,7 @@ impl<'source> Parser<'source> { let id_node = self.push_node(Node::Id(id))?; if self.next_token_is_lookup_start(&pattern_context) { self.frame_mut()?.add_id_access(id); - self.parse_lookup(id_node, &pattern_context)? + self.consume_lookup(id_node, &pattern_context)? } else { self.frame_mut()?.ids_assigned_in_frame.insert(id); id_node @@ -2515,7 +2515,7 @@ impl<'source> Parser<'source> { } None => return self.error(InternalError::IdParseFailure), }, - Wildcard => self.parse_wildcard(&pattern_context).map(Some)?, + Wildcard => self.consume_wildcard(&pattern_context).map(Some)?, SquareOpen => { self.consume_token_with_context(&pattern_context); From ab34848e3181ed6d5fe44ba35d816581776617a8 Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Wed, 10 Jan 2024 13:02:33 +0100 Subject: [PATCH 3/8] Extract a struct for the output of Parser::parse_string() --- crates/parser/src/parser.rs | 51 +++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index bc5881070..da36163f0 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -733,13 +733,17 @@ impl<'source> Parser<'source> { Token::RoundOpen => self.consume_tuple(context), Token::Number => self.consume_number(false, context), Token::DoubleQuote | Token::SingleQuote => { - let (string, span, string_context) = self.parse_string(context)?.unwrap(); + let string = self.parse_string(context)?.unwrap(); if self.peek_token() == Some(Token::Colon) { - self.parse_braceless_map_start(MapKey::Str(string), start_span, &string_context) + self.parse_braceless_map_start( + MapKey::Str(string.string), + start_span, + &string.context, + ) } else { - let string_node = self.push_node_with_span(Str(string), span)?; - self.check_for_lookup_after_node(string_node, &string_context) + let string_node = self.push_node_with_span(Str(string.string), string.span)?; + self.check_for_lookup_after_node(string_node, &string.context) } } Token::Id => self.consume_id_expression(context), @@ -1353,9 +1357,9 @@ impl<'source> Parser<'source> { } else if let Some((id, _)) = self.parse_id(&restricted)? { node_start_span = self.current_span(); lookup.push((LookupNode::Id(id), node_start_span)); - } else if let Some((lookup_string, span, _)) = self.parse_string(&restricted)? { - node_start_span = span; - lookup.push((LookupNode::Str(lookup_string), span)); + } else if let Some(lookup_string) = self.parse_string(&restricted)? { + node_start_span = lookup_string.span; + lookup.push((LookupNode::Str(lookup_string.string), lookup_string.span)); } else { return self.consume_token_and_error(SyntaxError::ExpectedMapKey); } @@ -1966,10 +1970,8 @@ impl<'source> Parser<'source> { fn parse_map_key(&mut self) -> Result, ParserError> { let result = if let Some((id, _)) = self.parse_id(&ExpressionContext::restricted())? { Some(MapKey::Id(id)) - } else if let Some((string_key, _span, _string_context)) = - self.parse_string(&ExpressionContext::restricted())? - { - Some(MapKey::Str(string_key)) + } else if let Some(string_key) = self.parse_string(&ExpressionContext::restricted())? { + Some(MapKey::Str(string_key.string)) } else if let Some((meta_key_id, meta_name)) = self.parse_meta_key()? { Some(MapKey::Meta(meta_key_id, meta_name)) } else { @@ -2643,9 +2645,7 @@ impl<'source> Parser<'source> { let item_root = match self.parse_id(&context)? { Some((id, _)) => ImportItemNode::Id(id), None => match self.parse_string(&context)? { - Some((import_string, _span, _string_context)) => { - ImportItemNode::Str(import_string) - } + Some(import_string) => ImportItemNode::Str(import_string.string), None => break, }, }; @@ -2659,8 +2659,8 @@ impl<'source> Parser<'source> { match self.parse_id(&ExpressionContext::restricted())? { Some((id, _)) => item.push(ImportItemNode::Id(id)), None => match self.parse_string(&ExpressionContext::restricted())? { - Some((node_string, _span, _string_context)) => { - item.push(ImportItemNode::Str(node_string)); + Some(node_string) => { + item.push(ImportItemNode::Str(node_string.string)); } None => { return self @@ -2759,7 +2759,7 @@ impl<'source> Parser<'source> { fn parse_string( &mut self, context: &ExpressionContext, - ) -> Result, ParserError> { + ) -> Result, ParserError> { use SyntaxError::*; use Token::*; @@ -2907,14 +2907,14 @@ impl<'source> Parser<'source> { nodes.push(StringNode::Literal(self.add_string_constant("")?)); } - return Ok(Some(( - AstString { + return Ok(Some(ParseStringOutput { + string: AstString { quotation_mark, nodes, }, - self.span_with_start(start_span), - string_context, - ))); + span: self.span_with_start(start_span), + context: string_context, + })); } _ => return self.error(UnexpectedToken), } @@ -3268,3 +3268,10 @@ enum IdOrWildcard { Id(ConstantIndex), Wildcard(Option), } + +// Returned by Parser::parse_string() +struct ParseStringOutput { + string: AstString, + span: Span, + context: ExpressionContext, +} From d541cff0ac1a73af35b850e8848b728545842d16 Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Wed, 10 Jan 2024 17:00:25 +0100 Subject: [PATCH 4/8] Add initial support for raw strings This involved reworking the lexer API to ensure that peeked slices are always correct: - Token::NewLineIndented has been removed - The lexer's current line number and indent always refers to the previously outputted token. - The lexer peek API has been simplified --- CHANGELOG.md | 2 + crates/bytecode/src/compiler.rs | 174 ++-- crates/lexer/src/lexer.rs | 1250 +++++++++++++++------------ crates/lexer/src/lib.rs | 2 +- crates/parser/src/error.rs | 2 + crates/parser/src/node.rs | 19 +- crates/parser/src/parser.rs | 392 +++++---- crates/parser/tests/parser_tests.rs | 104 ++- crates/runtime/tests/vm_tests.rs | 42 + docs/language/strings.md | 9 + koto/tests/strings.koto | 4 + 11 files changed, 1150 insertions(+), 850 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b00c1ffe..f30d39468 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ The Koto project adheres to #### Language - The `+` operator has been reintroduced for tuples, lists, and maps. +- Raw strings are now supported. Any string prefixed with `r` will skip + character escaping and string interpolation. #### API diff --git a/crates/bytecode/src/compiler.rs b/crates/bytecode/src/compiler.rs index 444a51366..934399cb7 100644 --- a/crates/bytecode/src/compiler.rs +++ b/crates/bytecode/src/compiler.rs @@ -1,8 +1,8 @@ use crate::{DebugInfo, FunctionFlags, Op, TypeId}; use koto_parser::{ Ast, AstBinaryOp, AstFor, AstIf, AstIndex, AstNode, AstTry, AstUnaryOp, ConstantIndex, - Function, ImportItemNode, LookupNode, MapKey, MatchArm, MetaKeyId, Node, Span, StringNode, - SwitchArm, + Function, ImportItemNode, LookupNode, MapKey, MatchArm, MetaKeyId, Node, Span, StringContents, + StringNode, SwitchArm, }; use smallvec::SmallVec; use std::collections::HashSet; @@ -540,7 +540,7 @@ impl Compiler { } result } - Node::Str(string) => self.compile_string(result_register, &string.nodes, ast)?, + Node::Str(string) => self.compile_string(result_register, &string.contents, ast)?, Node::List(elements) => { self.compile_make_sequence(result_register, elements, Op::SequenceToList, ast)? } @@ -1491,7 +1491,7 @@ impl Compiler { self.compile_access_string( import_register, from_register, - &string.nodes, + &string.contents, ast, )?; @@ -1562,7 +1562,7 @@ impl Compiler { ImportItemNode::Str(string) => self.compile_access_string( result_register, result_register, - &string.nodes, + &string.contents, ast, )?, } @@ -1593,7 +1593,11 @@ impl Compiler { } } ImportItemNode::Str(string) => { - self.compile_string(ResultRegister::Fixed(result_register), &string.nodes, ast)?; + self.compile_string( + ResultRegister::Fixed(result_register), + &string.contents, + ast, + )?; } } @@ -2016,88 +2020,100 @@ impl Compiler { fn compile_string( &mut self, result_register: ResultRegister, - nodes: &[StringNode], + contents: &StringContents, ast: &Ast, ) -> CompileNodeResult { let result = self.get_result_register(result_register)?; - let size_hint = nodes.iter().fold(0, |result, node| { - match node { - StringNode::Literal(constant_index) => { - result + ast.constants().get_str(*constant_index).len() - } - StringNode::Expr(_) => { - // Q. Why use '1' here? - // A. The expression can result in a displayed string of any length, - // We can make an assumption that the expression will almost always produce - // at least 1 character to display, but it's unhealthy to over-allocate so - // let's leave it there for now until we have real-world practice that tells - // us otherwise. - result + 1 - } - } - }); - - match nodes { - [] => return self.error(ErrorKind::MissingStringNodes), - [StringNode::Literal(constant_index)] => { + match contents { + StringContents::Raw(constant_index) | StringContents::Literal(constant_index) => { if let Some(result) = result { self.compile_load_string_constant(result.register, *constant_index); } } - _ => { - if result.is_some() { - self.push_op(Op::StringStart, &[]); - // Limit the size hint to u32::MAX, u64 size hinting can be added later if - // it would be useful in practice. - self.push_var_u32(size_hint as u32); - } - - for node in nodes.iter() { + StringContents::Interpolated(nodes) => { + let size_hint = nodes.iter().fold(0, |result, node| { match node { StringNode::Literal(constant_index) => { - if result.is_some() { - let node_register = self.push_register()?; - - self.compile_load_string_constant(node_register, *constant_index); - self.push_op_without_span(Op::StringPush, &[node_register]); + result + ast.constants().get_str(*constant_index).len() + } + StringNode::Expr(_) => { + // Q. Why use '1' here? + // A. The expression can result in a displayed string of any length, + // We can make an assumption that the expression will almost always + // produce at least 1 character to display, but it's unhealthy to + // over-allocate so let's leave it there for now until we have + // real-world practice that tells us otherwise. + result + 1 + } + } + }); - self.pop_register()?; - } + match nodes.as_slice() { + [] => return self.error(ErrorKind::MissingStringNodes), + [StringNode::Literal(constant_index)] => { + if let Some(result) = result { + self.compile_load_string_constant(result.register, *constant_index); + } + } + _ => { + if result.is_some() { + self.push_op(Op::StringStart, &[]); + // Limit the size hint to u32::MAX, u64 size hinting can be added later if + // it would be useful in practice. + self.push_var_u32(size_hint as u32); } - StringNode::Expr(expression_node) => { - if result.is_some() { - let expression_result = self - .compile_node( - ResultRegister::Any, - ast.node(*expression_node), - ast, - )? - .unwrap(); - - self.push_op_without_span( - Op::StringPush, - &[expression_result.register], - ); - - if expression_result.is_temporary { - self.pop_register()?; + + for node in nodes.iter() { + match node { + StringNode::Literal(constant_index) => { + if result.is_some() { + let node_register = self.push_register()?; + + self.compile_load_string_constant( + node_register, + *constant_index, + ); + self.push_op_without_span(Op::StringPush, &[node_register]); + + self.pop_register()?; + } + } + StringNode::Expr(expression_node) => { + if result.is_some() { + let expression_result = self + .compile_node( + ResultRegister::Any, + ast.node(*expression_node), + ast, + )? + .unwrap(); + + self.push_op_without_span( + Op::StringPush, + &[expression_result.register], + ); + + if expression_result.is_temporary { + self.pop_register()?; + } + } else { + // Compile the expression even though we don't need the result, + // so that side-effects can take place. + self.compile_node( + ResultRegister::None, + ast.node(*expression_node), + ast, + )?; + } } - } else { - // Compile the expression even though we don't need the result, - // so that side-effects can take place. - self.compile_node( - ResultRegister::None, - ast.node(*expression_node), - ast, - )?; } } - } - } - if let Some(result) = result { - self.push_op(Op::StringFinish, &[result.register]); + if let Some(result) = result { + self.push_op(Op::StringFinish, &[result.register]); + } + } } } } @@ -2498,7 +2514,7 @@ impl Compiler { self.compile_access_string( node_register, parent_register, - &lookup_string.nodes, + &lookup_string.contents, ast, )?; } @@ -2585,7 +2601,7 @@ impl Compiler { let key_register = self.push_register()?; self.compile_string( ResultRegister::Fixed(key_register), - &lookup_string.nodes, + &lookup_string.contents, ast, )? } else { @@ -2777,7 +2793,7 @@ impl Compiler { } MapKey::Str(string) => { let key_register = self.push_register()?; - self.compile_string(ResultRegister::Fixed(key_register), &string.nodes, ast)?; + self.compile_string(ResultRegister::Fixed(key_register), &string.contents, ast)?; if let Some(map_register) = map_register { self.push_op_without_span( @@ -2837,11 +2853,15 @@ impl Compiler { &mut self, result_register: u8, value_register: u8, - key_string_nodes: &[StringNode], + key_string_contents: &StringContents, ast: &Ast, ) -> Result<(), CompilerError> { let key_register = self.push_register()?; - self.compile_string(ResultRegister::Fixed(key_register), key_string_nodes, ast)?; + self.compile_string( + ResultRegister::Fixed(key_register), + key_string_contents, + ast, + )?; self.push_op( Op::AccessString, &[result_register, value_register, key_register], diff --git a/crates/lexer/src/lexer.rs b/crates/lexer/src/lexer.rs index 6d2123a2b..b41578757 100644 --- a/crates/lexer/src/lexer.rs +++ b/crates/lexer/src/lexer.rs @@ -1,5 +1,5 @@ use crate::{Position, Span}; -use std::{iter::Peekable, str::Chars}; +use std::{collections::VecDeque, iter::Peekable, ops::Range, str::Chars}; use unicode_width::UnicodeWidthChar; use unicode_xid::UnicodeXID; @@ -10,7 +10,6 @@ pub enum Token { Error, Whitespace, NewLine, - NewLineIndented, CommentSingle, CommentMulti, Number, @@ -19,6 +18,8 @@ pub enum Token { SingleQuote, DoubleQuote, + RawStringStart, + RawStringEnd, StringLiteral, // Symbols @@ -101,10 +102,9 @@ impl Token { matches!(self, Whitespace | CommentMulti | CommentSingle) } - /// Returns true if the token represents a new line - pub fn is_newline(&self) -> bool { - use Token::*; - matches!(self, NewLine | NewLineIndented) + /// Returns true if the token should be counted as whitespace, including newlines + pub fn is_whitespace_including_newline(&self) -> bool { + self.is_whitespace() || *self == Token::NewLine } } @@ -113,6 +113,10 @@ impl Token { enum StringMode { // Inside a string literal, expecting an end quote Literal(char), + // The start of a raw string has just been encountered, raw string contents follow + RawStart(char), + // The contents of the raw string have just been consumed, the end delimiter should follow + RawEnd(char), // Just after a $ symbol, either an id or a '{' will follow TemplateStart, // Inside a string template, e.g. '${...}' @@ -156,20 +160,27 @@ impl<'a> TokenLexer<'a> { } } - // The slice associated with the token that was just emitted - fn slice(&self) -> &'a str { - &self.source[self.previous_byte..self.current_byte] + fn source_bytes(&self) -> Range { + self.previous_byte..self.current_byte } fn current_position(&self) -> Position { self.span.end } + // Advance along the current line by a number of bytes + // + // The characters being advanced over should all be ANSI, + // i.e. the byte count must match the character count. + // + // If the characters have been read as UTF-8 then advance_line_utf8 should be used instead. fn advance_line(&mut self, char_bytes: usize) { self.advance_line_utf8(char_bytes, char_bytes); } + // Advance along the current line by a number of bytes, with a UTF-8 character count fn advance_line_utf8(&mut self, char_bytes: usize, char_count: usize) { + // TODO, defer to advance_to_position self.previous_byte = self.current_byte; self.current_byte += char_bytes; @@ -197,11 +208,9 @@ impl<'a> TokenLexer<'a> { use Token::*; let mut consumed_bytes = 1; - let mut newline_bytes = 1; if chars.peek() == Some(&'\r') { consumed_bytes += 1; - newline_bytes += 1; chars.next(); } @@ -210,22 +219,15 @@ impl<'a> TokenLexer<'a> { _ => return Error, } - consumed_bytes += consume_and_count(&mut chars, is_whitespace); - - self.indent = consumed_bytes - newline_bytes; self.advance_to_position( consumed_bytes, Position { line: self.current_position().line + 1, - column: (consumed_bytes - newline_bytes + 1) as u32, // indexing from 1 for column + column: 1, }, ); - if self.indent == 0 { - NewLine - } else { - NewLineIndented - } + NewLine } fn consume_comment(&mut self, mut chars: Peekable) -> Token { @@ -357,6 +359,57 @@ impl<'a> TokenLexer<'a> { Error } + fn consume_raw_string_contents( + &mut self, + mut chars: Peekable, + end_quote: char, + ) -> Token { + let mut string_bytes = 0; + + let mut position = self.current_position(); + + while let Some(c) = chars.next() { + match c { + _ if c == end_quote => { + self.advance_to_position(string_bytes, position); + self.string_mode_stack.pop(); // StringMode::RawStart + self.string_mode_stack.push(StringMode::RawEnd(end_quote)); + return Token::StringLiteral; + } + '\r' => { + if chars.next() != Some('\n') { + return Token::Error; + } + string_bytes += 2; + position.line += 1; + position.column = 1; + } + '\n' => { + string_bytes += 1; + position.line += 1; + position.column = 1; + } + _ => { + string_bytes += c.len_utf8(); + position.column += c.width().unwrap_or(0) as u32; + } + } + } + + Token::Error + } + + fn consume_raw_string_end(&mut self, mut chars: Peekable, end_quote: char) -> Token { + match chars.next() { + Some(c) if c == end_quote => { + self.string_mode_stack.pop(); // StringMode::RawEnd + self.advance_line(1); + Token::RawStringEnd + } + _ => Token::Error, + } + } + fn consume_number(&mut self, mut chars: Peekable) -> Token { use Token::*; @@ -438,18 +491,29 @@ impl<'a> TokenLexer<'a> { let id = &self.source[self.current_byte..self.current_byte + char_bytes]; - if id == "else" { - if self - .source - .get(self.current_byte..self.current_byte + char_bytes + 3) - == Some("else if") - { - self.advance_line(7); - return ElseIf; - } else { - self.advance_line(4); - return Else; + match id { + "else" => { + if self + .source + .get(self.current_byte..self.current_byte + char_bytes + 3) + == Some("else if") + { + self.advance_line(7); + return ElseIf; + } else { + self.advance_line(4); + return Else; + } } + "r" => { + // look ahead and determine if this is the start of a raw string + if let Some(c @ '\'' | c @ '"') = chars.peek() { + self.advance_line(2); + self.string_mode_stack.push(StringMode::RawStart(*c)); + return RawStringStart; + } + } + _ => {} } macro_rules! check_keyword { @@ -568,6 +632,12 @@ impl<'a> TokenLexer<'a> { let result = match self.source.get(self.current_byte..) { Some(remaining) if !remaining.is_empty() => { + if self.previous_token == Some(Token::NewLine) { + // Reset the indent after a newline. + // If whitespace follows then the indent will be increased. + self.indent = 0; + } + let mut chars = remaining.chars().peekable(); let next_char = *chars.peek().unwrap(); // At least one char is remaining @@ -592,6 +662,10 @@ impl<'a> TokenLexer<'a> { } _ => self.consume_string_literal(chars), }, + Some(StringMode::RawStart(quote)) => { + self.consume_raw_string_contents(chars, quote) + } + Some(StringMode::RawEnd(quote)) => self.consume_raw_string_end(chars, quote), Some(StringMode::TemplateStart) => match next_char { _ if is_id_start(next_char) => match self.consume_id_or_keyword(chars) { Id => { @@ -612,6 +686,9 @@ impl<'a> TokenLexer<'a> { c if is_whitespace(c) => { let count = consume_and_count(&mut chars, is_whitespace); self.advance_line(count); + if matches!(self.previous_token, Some(Token::NewLine) | None) { + self.indent = count; + } Whitespace } '\r' | '\n' => self.consume_newline(chars), @@ -742,13 +819,40 @@ fn consume_and_count_utf8( (char_bytes, char_count) } -#[derive(Clone)] -struct PeekedToken<'a> { - token: Option, - slice: &'a str, - span: Span, - indent: usize, - source_position: usize, +/// A [Token] along with additional metadata +#[derive(Clone, PartialEq, Debug)] +pub struct LexedToken { + /// The token + pub token: Token, + /// The byte positions in the source representing the token + pub source_bytes: Range, + /// The token's span + pub span: Span, + /// The indentation level of the token's starting line + pub indent: usize, +} + +impl LexedToken { + /// A helper for getting the token's starting line + pub fn line(&self) -> u32 { + self.span.start.line + } + + /// A helper for getting the token's string slice from the source + pub fn slice<'a>(&self, source: &'a str) -> &'a str { + &source[self.source_bytes.clone()] + } +} + +impl Default for LexedToken { + fn default() -> Self { + Self { + token: Token::Error, + source_bytes: Default::default(), + span: Default::default(), + indent: Default::default(), + } + } } /// The lexer used by the Koto parser @@ -757,8 +861,7 @@ struct PeekedToken<'a> { #[derive(Clone)] pub struct KotoLexer<'a> { lexer: TokenLexer<'a>, - peeked_tokens: Vec>, - current_peek_index: usize, + token_queue: VecDeque, } impl<'a> KotoLexer<'a> { @@ -766,8 +869,7 @@ impl<'a> KotoLexer<'a> { pub fn new(source: &'a str) -> Self { Self { lexer: TokenLexer::new(source), - peeked_tokens: Vec::new(), - current_peek_index: 0, + token_queue: VecDeque::new(), } } @@ -776,246 +878,190 @@ impl<'a> KotoLexer<'a> { self.lexer.source } - /// Returns the current position in the input source - pub fn source_position(&self) -> usize { - if self.peeked_tokens.is_empty() { - self.lexer.current_byte - } else { - self.peeked_tokens[self.current_peek_index].source_position - } - } - - /// Peeks the next token in the output stream - pub fn peek(&mut self) -> Option { - if self.peeked_tokens.is_empty() { - self.peek_n(0) - } else { - self.peeked_tokens[self.current_peek_index].token - } - } - /// Peeks the nth token that will appear in the output stream /// /// peek_n(0) is equivalent to calling peek(). /// peek_n(1) returns the token that will appear after that, and so forth. - pub fn peek_n(&mut self, n: usize) -> Option { - while self.peeked_tokens.len() - self.current_peek_index <= n { - let span = self.lexer.span; - let slice = self.lexer.slice(); - let indent = self.lexer.indent; - let source_position = self.lexer.current_byte; - // getting the token needs to happen after the other properties - let token = self.lexer.next(); - self.peeked_tokens.push(PeekedToken { - token, - slice, - span, - indent, - source_position, - }); - } - self.peeked_tokens[self.current_peek_index + n].token - } - - /// Returns the current span - pub fn span(&self) -> Span { - if self.peeked_tokens.is_empty() { - self.lexer.span - } else { - self.peeked_tokens[self.current_peek_index].span - } - } - - /// Returns the string slice of the input associated with the current token - pub fn slice(&self) -> &'a str { - if self.peeked_tokens.is_empty() { - self.lexer.slice() - } else { - self.peeked_tokens[self.current_peek_index].slice - } - } + pub fn peek(&mut self, n: usize) -> Option<&LexedToken> { + let token_queue_len = self.token_queue.len(); + let tokens_to_add = token_queue_len + 1 - n.max(token_queue_len); - /// Returns the indent associated with the current token - pub fn current_indent(&self) -> usize { - if self.peeked_tokens.is_empty() { - self.lexer.indent - } else { - self.peeked_tokens[self.current_peek_index].indent + for _ in 0..tokens_to_add { + if let Some(next) = self.next_token() { + self.token_queue.push_back(next); + } else { + break; + } } - } - - /// Returns the indent associated with the nth coming token in the output stream - pub fn peek_indent(&self, peek_index: usize) -> usize { - self.peeked_tokens[self.current_peek_index + peek_index].indent - } - /// Returns the line number associated with the current token - pub fn line_number(&self) -> u32 { - self.span().end.line + self.token_queue.get(n) } - /// Returns the line number associated with the nth coming token in the output stream - pub fn peek_line_number(&self, peek_index: usize) -> u32 { - self.peeked_tokens[self.current_peek_index + peek_index] - .span - .end - .line + fn next_token(&mut self) -> Option { + self.lexer.next().map(|token| LexedToken { + token, + source_bytes: self.lexer.source_bytes(), + span: self.lexer.span, + indent: self.lexer.indent, + }) } } impl<'a> Iterator for KotoLexer<'a> { - type Item = Token; + type Item = LexedToken; - fn next(&mut self) -> Option { - if self.peeked_tokens.is_empty() { - self.lexer.next() + fn next(&mut self) -> Option { + if let Some(next) = self.token_queue.pop_front() { + Some(next) } else { - let result = self.peeked_tokens[self.current_peek_index].token; - self.current_peek_index += 1; - if self.current_peek_index == self.peeked_tokens.len() { - self.peeked_tokens.clear(); - self.current_peek_index = 0; - } - result + self.next_token() } } } #[cfg(test)] mod tests { - use super::{Token::*, *}; - - fn check_lexer_output(source: &str, tokens: &[(Token, Option<&str>, u32)]) { - let mut lex = KotoLexer::new(source); - - for (i, (token, maybe_slice, line_number)) in tokens.iter().enumerate() { - loop { - match lex.next().expect("Expected token") { - Whitespace => continue, - output => { - assert_eq!(&output, token, "Token mismatch at position {i}"); - if let Some(slice) = maybe_slice { - assert_eq!(&lex.slice(), slice, "Slice mismatch at position {i}"); + use super::*; + + mod lexer_output { + use super::{Token::*, *}; + + fn check_lexer_output(source: &str, tokens: &[(Token, Option<&str>, u32)]) { + let mut lex = KotoLexer::new(source); + + for (i, (token, maybe_slice, line_number)) in tokens.iter().enumerate() { + loop { + match lex.next().expect("Expected token") { + LexedToken { + token: Whitespace, .. + } => continue, + output => { + assert_eq!(output.token, *token, "Token mismatch at position {i}"); + if let Some(slice) = maybe_slice { + assert_eq!( + output.slice(source), + *slice, + "Slice mismatch at position {i}" + ); + } + assert_eq!( + output.line(), + *line_number, + "Line number mismatch at position {i}", + ); + break; } - assert_eq!( - lex.line_number(), - *line_number, - "Line number mismatch at position {i}", - ); - break; } } } - } - assert_eq!(lex.next(), None); - } + assert_eq!(lex.next(), None); + } - fn check_lexer_output_indented(source: &str, tokens: &[(Token, Option<&str>, u32, u32)]) { - let mut lex = KotoLexer::new(source); - - for (i, (token, maybe_slice, line_number, indent)) in tokens.iter().enumerate() { - loop { - match lex.next().expect("Expected token") { - Whitespace => continue, - output => { - assert_eq!(&output, token, "Mismatch at token {i}"); - if let Some(slice) = maybe_slice { - assert_eq!(&lex.slice(), slice, "Mismatch at token {i}"); + fn check_lexer_output_indented(source: &str, tokens: &[(Token, Option<&str>, u32, u32)]) { + let mut lex = KotoLexer::new(source); + + for (i, (token, maybe_slice, line_number, indent)) in tokens.iter().enumerate() { + loop { + match lex.next().expect("Expected token") { + LexedToken { + token: Whitespace, .. + } => continue, + output => { + assert_eq!(output.token, *token, "Mismatch at token {i}"); + if let Some(slice) = maybe_slice { + assert_eq!(output.slice(source), *slice, "Mismatch at token {i}"); + } + assert_eq!( + output.line(), + *line_number, + "Line number - expected: {}, actual: {} - (token {i} - {token:?})", + *line_number, + output.line(), + ); + assert_eq!( + output.indent as u32, *indent, + "Indent (token {i} - {token:?})" + ); + break; } - assert_eq!( - lex.line_number(), - *line_number, - "Line number - expected: {}, actual: {} - (token {i} - {token:?})", - *line_number, - lex.line_number(), - ); - assert_eq!(lex.current_indent() as u32, *indent, "Indent (token {i})"); - break; } } } - } - assert_eq!(lex.next(), None); - } + assert_eq!(lex.next(), None); + } - #[test] - fn ids() { - let input = "id id1 id_2 i_d_3 ïd_ƒôûr if iff _ _foo"; - check_lexer_output( - input, - &[ - (Id, Some("id"), 1), - (Id, Some("id1"), 1), - (Id, Some("id_2"), 1), - (Id, Some("i_d_3"), 1), - (Id, Some("ïd_ƒôûr"), 1), - (If, None, 1), - (Id, Some("iff"), 1), - (Wildcard, Some("_"), 1), - (Wildcard, Some("_foo"), 1), - ], - ); - } + #[test] + fn ids() { + let input = "id id1 id_2 i_d_3 ïd_ƒôûr if iff _ _foo"; + check_lexer_output( + input, + &[ + (Id, Some("id"), 1), + (Id, Some("id1"), 1), + (Id, Some("id_2"), 1), + (Id, Some("i_d_3"), 1), + (Id, Some("ïd_ƒôûr"), 1), + (If, None, 1), + (Id, Some("iff"), 1), + (Wildcard, Some("_"), 1), + (Wildcard, Some("_foo"), 1), + ], + ); + } - #[test] - fn indent() { - let input = "\ + #[test] + fn indent() { + let input = "\ if true then foo 1 -bar 2 -x -y"; - check_lexer_output_indented( - input, - &[ - (If, None, 1, 0), - (True, None, 1, 0), - (Then, None, 1, 0), - (NewLineIndented, None, 2, 2), - (Id, Some("foo"), 2, 2), - (Number, Some("1"), 2, 2), - (NewLine, None, 3, 0), - (NewLine, None, 4, 0), - (Id, Some("bar"), 4, 0), - (Number, Some("2"), 4, 0), - (NewLine, None, 5, 0), - (Id, Some("x"), 5, 0), - (NewLine, None, 6, 0), - (Id, Some("y"), 6, 0), - ], - ); - } +bar 2"; + check_lexer_output_indented( + input, + &[ + (If, None, 1, 0), + (True, None, 1, 0), + (Then, None, 1, 0), + (NewLine, None, 1, 0), + (Id, Some("foo"), 2, 2), + (Number, Some("1"), 2, 2), + (NewLine, None, 2, 2), + (NewLine, None, 3, 0), + (Id, Some("bar"), 4, 0), + (Number, Some("2"), 4, 0), + ], + ); + } - #[test] - fn comments() { - let input = "\ + #[test] + fn comments() { + let input = "\ # single true #- multiline - false # -# true ()"; - check_lexer_output( - input, - &[ - (CommentSingle, Some("# single"), 1), - (NewLine, None, 2), - (True, None, 2), - (CommentMulti, Some("#-\nmultiline -\nfalse #\n-#"), 5), - (True, None, 5), - (NewLine, None, 6), - (RoundOpen, None, 6), - (RoundClose, None, 6), - ], - ); - } + check_lexer_output( + input, + &[ + (CommentSingle, Some("# single"), 1), + (NewLine, None, 1), + (True, None, 2), + (CommentMulti, Some("#-\nmultiline -\nfalse #\n-#"), 2), + (True, None, 5), + (NewLine, None, 5), + (RoundOpen, None, 6), + (RoundClose, None, 6), + ], + ); + } - #[test] - fn strings() { - let input = r#" + #[test] + fn strings() { + let input = r#" "hello, world!" "escaped \\\"\n\$ string" "double-\"quoted\" 'string'" @@ -1024,123 +1070,138 @@ false # "\\" "#; - check_lexer_output( - input, - &[ - (NewLine, None, 2), - (DoubleQuote, None, 2), - (StringLiteral, Some("hello, world!"), 2), - (DoubleQuote, None, 2), - (NewLine, None, 3), - (DoubleQuote, None, 3), - (StringLiteral, Some(r#"escaped \\\"\n\$ string"#), 3), - (DoubleQuote, None, 3), - (NewLine, None, 4), - (DoubleQuote, None, 4), - (StringLiteral, Some(r#"double-\"quoted\" 'string'"#), 4), - (DoubleQuote, None, 4), - (NewLine, None, 5), - (SingleQuote, None, 5), - (StringLiteral, Some(r#"single-\'quoted\' "string""#), 5), - (SingleQuote, None, 5), - (NewLine, None, 6), - (DoubleQuote, None, 6), - (DoubleQuote, None, 6), - (NewLine, None, 7), - (DoubleQuote, None, 7), - (StringLiteral, Some(r"\\"), 7), - (DoubleQuote, None, 7), - (NewLine, None, 8), - ], - ); - } + check_lexer_output( + input, + &[ + (NewLine, None, 1), + (DoubleQuote, None, 2), + (StringLiteral, Some("hello, world!"), 2), + (DoubleQuote, None, 2), + (NewLine, None, 2), + (DoubleQuote, None, 3), + (StringLiteral, Some(r#"escaped \\\"\n\$ string"#), 3), + (DoubleQuote, None, 3), + (NewLine, None, 3), + (DoubleQuote, None, 4), + (StringLiteral, Some(r#"double-\"quoted\" 'string'"#), 4), + (DoubleQuote, None, 4), + (NewLine, None, 4), + (SingleQuote, None, 5), + (StringLiteral, Some(r#"single-\'quoted\' "string""#), 5), + (SingleQuote, None, 5), + (NewLine, None, 5), + (DoubleQuote, None, 6), + (DoubleQuote, None, 6), + (NewLine, None, 6), + (DoubleQuote, None, 7), + (StringLiteral, Some(r"\\"), 7), + (DoubleQuote, None, 7), + (NewLine, None, 7), + ], + ); + } + + #[test] + fn raw_strings() { + let input = r#" +r'$foo' +"#; + + check_lexer_output( + input, + &[ + (NewLine, None, 1), + (RawStringStart, None, 2), + (StringLiteral, Some("$foo"), 2), + (RawStringEnd, None, 2), + (NewLine, None, 2), + ], + ); + } - #[test] - fn interpolated_string_ids() { - let input = r#" + #[test] + fn interpolated_string_ids() { + let input = r#" "hello $name, how are you?" '$foo$bar' "#; - check_lexer_output( - input, - &[ - (NewLine, None, 2), - (DoubleQuote, None, 2), - (StringLiteral, Some("hello "), 2), - (Dollar, None, 2), - (Id, Some("name"), 2), - (StringLiteral, Some(", how are you?"), 2), - (DoubleQuote, None, 2), - (NewLine, None, 3), - (SingleQuote, None, 3), - (Dollar, None, 3), - (Id, Some("foo"), 3), - (Dollar, None, 3), - (Id, Some("bar"), 3), - (SingleQuote, None, 3), - (NewLine, None, 4), - ], - ); - } + check_lexer_output( + input, + &[ + (NewLine, None, 1), + (DoubleQuote, None, 2), + (StringLiteral, Some("hello "), 2), + (Dollar, None, 2), + (Id, Some("name"), 2), + (StringLiteral, Some(", how are you?"), 2), + (DoubleQuote, None, 2), + (NewLine, None, 2), + (SingleQuote, None, 3), + (Dollar, None, 3), + (Id, Some("foo"), 3), + (Dollar, None, 3), + (Id, Some("bar"), 3), + (SingleQuote, None, 3), + (NewLine, None, 3), + ], + ); + } - #[test] - fn interpolated_string_expressions() { - let input = r#" + #[test] + fn interpolated_string_expressions() { + let input = r#" "x + y == ${x + y}" '${'{}'.format foo}' "#; - check_lexer_output( - input, - &[ - (NewLine, None, 2), - (DoubleQuote, None, 2), - (StringLiteral, Some("x + y == "), 2), - (Dollar, None, 2), - (CurlyOpen, None, 2), - (Id, Some("x"), 2), - (Add, None, 2), - (Id, Some("y"), 2), - (CurlyClose, None, 2), - (DoubleQuote, None, 2), - (NewLine, None, 3), - (SingleQuote, None, 3), - (Dollar, None, 3), - (CurlyOpen, None, 3), - (SingleQuote, None, 3), - (StringLiteral, Some("{}"), 3), - (SingleQuote, None, 3), - (Dot, None, 3), - (Id, Some("format"), 3), - (Id, Some("foo"), 3), - (CurlyClose, None, 3), - (SingleQuote, None, 3), - (NewLine, None, 4), - ], - ); - } + check_lexer_output( + input, + &[ + (NewLine, None, 1), + (DoubleQuote, None, 2), + (StringLiteral, Some("x + y == "), 2), + (Dollar, None, 2), + (CurlyOpen, None, 2), + (Id, Some("x"), 2), + (Add, None, 2), + (Id, Some("y"), 2), + (CurlyClose, None, 2), + (DoubleQuote, None, 2), + (NewLine, None, 2), + (SingleQuote, None, 3), + (Dollar, None, 3), + (CurlyOpen, None, 3), + (SingleQuote, None, 3), + (StringLiteral, Some("{}"), 3), + (SingleQuote, None, 3), + (Dot, None, 3), + (Id, Some("format"), 3), + (Id, Some("foo"), 3), + (CurlyClose, None, 3), + (SingleQuote, None, 3), + (NewLine, None, 3), + ], + ); + } - #[test] - fn operators() { - let input = r#" -> >= >> < <= -"#; - check_lexer_output( - input, - &[ - (NewLine, None, 2), - (Greater, None, 2), - (GreaterOrEqual, None, 2), - (Pipe, None, 2), - (Less, None, 2), - (LessOrEqual, None, 2), - (NewLine, None, 3), - ], - ); - } + #[test] + fn operators() { + let input = "> >= >> < <="; + + check_lexer_output( + input, + &[ + (Greater, None, 1), + (GreaterOrEqual, None, 1), + (Pipe, None, 1), + (Less, None, 1), + (LessOrEqual, None, 1), + ], + ); + } - #[test] - fn numbers() { - let input = "\ + #[test] + fn numbers() { + let input = "\ 123 55.5 -1e-3 @@ -1150,260 +1211,309 @@ false # 0xABADCAFE 0o707606 0b1010101"; - check_lexer_output( - input, - &[ - (Number, Some("123"), 1), - (NewLine, None, 2), - (Number, Some("55.5"), 2), - (NewLine, None, 3), - (Subtract, None, 3), - (Number, Some("1e-3"), 3), - (NewLine, None, 4), - (Number, Some("0.5e+9"), 4), - (NewLine, None, 5), - (Subtract, None, 5), - (Number, Some("8e8"), 5), - (NewLine, None, 6), - (Number, Some("0xabadcafe"), 6), - (NewLine, None, 7), - (Number, Some("0xABADCAFE"), 7), - (NewLine, None, 8), - (Number, Some("0o707606"), 8), - (NewLine, None, 9), - (Number, Some("0b1010101"), 9), - ], - ); - } + check_lexer_output( + input, + &[ + (Number, Some("123"), 1), + (NewLine, None, 1), + (Number, Some("55.5"), 2), + (NewLine, None, 2), + (Subtract, None, 3), + (Number, Some("1e-3"), 3), + (NewLine, None, 3), + (Number, Some("0.5e+9"), 4), + (NewLine, None, 4), + (Subtract, None, 5), + (Number, Some("8e8"), 5), + (NewLine, None, 5), + (Number, Some("0xabadcafe"), 6), + (NewLine, None, 6), + (Number, Some("0xABADCAFE"), 7), + (NewLine, None, 7), + (Number, Some("0o707606"), 8), + (NewLine, None, 8), + (Number, Some("0b1010101"), 9), + ], + ); + } - #[test] - fn lookups_on_numbers() { - let input = "\ + #[test] + fn lookups_on_numbers() { + let input = "\ 1.0.sin() -1e-3.abs() 1.min x 9.exp()"; - check_lexer_output( - input, - &[ - (Number, Some("1.0"), 1), - (Dot, None, 1), - (Id, Some("sin"), 1), - (RoundOpen, None, 1), - (RoundClose, None, 1), - (NewLine, None, 2), - (Subtract, None, 2), - (Number, Some("1e-3"), 2), - (Dot, None, 2), - (Id, Some("abs"), 2), - (RoundOpen, None, 2), - (RoundClose, None, 2), - (NewLine, None, 3), - (Number, Some("1"), 3), - (Dot, None, 3), - (Id, Some("min"), 3), - (Id, Some("x"), 3), - (NewLine, None, 4), - (Number, Some("9"), 4), - (Dot, None, 4), - (Id, Some("exp"), 4), - (RoundOpen, None, 4), - (RoundClose, None, 4), - ], - ); - } + check_lexer_output( + input, + &[ + (Number, Some("1.0"), 1), + (Dot, None, 1), + (Id, Some("sin"), 1), + (RoundOpen, None, 1), + (RoundClose, None, 1), + (NewLine, None, 1), + (Subtract, None, 2), + (Number, Some("1e-3"), 2), + (Dot, None, 2), + (Id, Some("abs"), 2), + (RoundOpen, None, 2), + (RoundClose, None, 2), + (NewLine, None, 2), + (Number, Some("1"), 3), + (Dot, None, 3), + (Id, Some("min"), 3), + (Id, Some("x"), 3), + (NewLine, None, 3), + (Number, Some("9"), 4), + (Dot, None, 4), + (Id, Some("exp"), 4), + (RoundOpen, None, 4), + (RoundClose, None, 4), + ], + ); + } - #[test] - fn modify_assign() { - let input = "\ + #[test] + fn modify_assign() { + let input = "\ a += 1 b -= 2 c *= 3"; - check_lexer_output( - input, - &[ - (Id, Some("a"), 1), - (AddAssign, None, 1), - (Number, Some("1"), 1), - (NewLine, None, 2), - (Id, Some("b"), 2), - (SubtractAssign, None, 2), - (Number, Some("2"), 2), - (NewLine, None, 3), - (Id, Some("c"), 3), - (MultiplyAssign, None, 3), - (Number, Some("3"), 3), - ], - ); - } + check_lexer_output( + input, + &[ + (Id, Some("a"), 1), + (AddAssign, None, 1), + (Number, Some("1"), 1), + (NewLine, None, 1), + (Id, Some("b"), 2), + (SubtractAssign, None, 2), + (Number, Some("2"), 2), + (NewLine, None, 2), + (Id, Some("c"), 3), + (MultiplyAssign, None, 3), + (Number, Some("3"), 3), + ], + ); + } - #[test] - fn ranges() { - let input = "\ + #[test] + fn ranges() { + let input = "\ a[..=9] x = [i for i in 0..5]"; - check_lexer_output( - input, - &[ - (Id, Some("a"), 1), - (SquareOpen, None, 1), - (RangeInclusive, None, 1), - (Number, Some("9"), 1), - (SquareClose, None, 1), - (NewLine, None, 2), - (Id, Some("x"), 2), - (Assign, None, 2), - (SquareOpen, None, 2), - (Id, Some("i"), 2), - (For, None, 2), - (Id, Some("i"), 2), - (In, None, 2), - (Number, Some("0"), 2), - (Range, None, 2), - (Number, Some("5"), 2), - (SquareClose, None, 2), - ], - ); - } + check_lexer_output( + input, + &[ + (Id, Some("a"), 1), + (SquareOpen, None, 1), + (RangeInclusive, None, 1), + (Number, Some("9"), 1), + (SquareClose, None, 1), + (NewLine, None, 1), + (Id, Some("x"), 2), + (Assign, None, 2), + (SquareOpen, None, 2), + (Id, Some("i"), 2), + (For, None, 2), + (Id, Some("i"), 2), + (In, None, 2), + (Number, Some("0"), 2), + (Range, None, 2), + (Number, Some("5"), 2), + (SquareClose, None, 2), + ], + ); + } - #[test] - fn function() { - let input = "\ + #[test] + fn function() { + let input = "\ export f = |a, b...| c = a + b.size() c f()"; - check_lexer_output_indented( - input, - &[ - (Export, None, 1, 0), - (Id, Some("f"), 1, 0), - (Assign, None, 1, 0), - (Function, None, 1, 0), - (Id, Some("a"), 1, 0), - (Comma, None, 1, 0), - (Id, Some("b"), 1, 0), - (Ellipsis, None, 1, 0), - (Function, None, 1, 0), - (NewLineIndented, None, 2, 2), - (Id, Some("c"), 2, 2), - (Assign, None, 2, 2), - (Id, Some("a"), 2, 2), - (Add, None, 2, 2), - (Id, Some("b"), 2, 2), - (Dot, None, 2, 2), - (Id, Some("size"), 2, 2), - (RoundOpen, None, 2, 2), - (RoundClose, None, 2, 2), - (NewLineIndented, None, 3, 2), - (Id, Some("c"), 3, 2), - (NewLine, None, 4, 0), - (Id, Some("f"), 4, 0), - (RoundOpen, None, 4, 0), - (RoundClose, None, 4, 0), - ], - ); - } + check_lexer_output_indented( + input, + &[ + (Export, None, 1, 0), + (Id, Some("f"), 1, 0), + (Assign, None, 1, 0), + (Function, None, 1, 0), + (Id, Some("a"), 1, 0), + (Comma, None, 1, 0), + (Id, Some("b"), 1, 0), + (Ellipsis, None, 1, 0), + (Function, None, 1, 0), + (NewLine, None, 1, 0), + (Id, Some("c"), 2, 2), + (Assign, None, 2, 2), + (Id, Some("a"), 2, 2), + (Add, None, 2, 2), + (Id, Some("b"), 2, 2), + (Dot, None, 2, 2), + (Id, Some("size"), 2, 2), + (RoundOpen, None, 2, 2), + (RoundClose, None, 2, 2), + (NewLine, None, 2, 2), + (Id, Some("c"), 3, 2), + (NewLine, None, 3, 2), + (Id, Some("f"), 4, 0), + (RoundOpen, None, 4, 0), + (RoundClose, None, 4, 0), + ], + ); + } - #[test] - fn if_inline() { - let input = "1 + if true then 0 else 1"; - check_lexer_output( - input, - &[ - (Number, Some("1"), 1), - (Add, None, 1), - (If, None, 1), - (True, None, 1), - (Then, None, 1), - (Number, Some("0"), 1), - (Else, None, 1), - (Number, Some("1"), 1), - ], - ); - } + #[test] + fn if_inline() { + let input = "1 + if true then 0 else 1"; + check_lexer_output( + input, + &[ + (Number, Some("1"), 1), + (Add, None, 1), + (If, None, 1), + (True, None, 1), + (Then, None, 1), + (Number, Some("0"), 1), + (Else, None, 1), + (Number, Some("1"), 1), + ], + ); + } - #[test] - fn if_block() { - let input = "\ + #[test] + fn if_block() { + let input = "\ if true 0 else if false 1 else 0"; - check_lexer_output_indented( - input, - &[ - (If, None, 1, 0), - (True, None, 1, 0), - (NewLineIndented, None, 2, 2), - (Number, Some("0"), 2, 2), - (NewLine, None, 3, 0), - (ElseIf, None, 3, 0), - (False, None, 3, 0), - (NewLineIndented, None, 4, 2), - (Number, Some("1"), 4, 2), - (NewLine, None, 5, 0), - (Else, None, 5, 0), - (NewLineIndented, None, 6, 2), - (Number, Some("0"), 6, 2), - ], - ); - } + check_lexer_output_indented( + input, + &[ + (If, None, 1, 0), + (True, None, 1, 0), + (NewLine, None, 1, 0), + (Number, Some("0"), 2, 2), + (NewLine, None, 2, 2), + (ElseIf, None, 3, 0), + (False, None, 3, 0), + (NewLine, None, 3, 0), + (Number, Some("1"), 4, 2), + (NewLine, None, 4, 2), + (Else, None, 5, 0), + (NewLine, None, 5, 0), + (Number, Some("0"), 6, 2), + ], + ); + } - #[test] - fn map_lookup() { - let input = "m.检验.foo[1].bär()"; - - check_lexer_output( - input, - &[ - (Id, Some("m"), 1), - (Dot, None, 1), - (Id, Some("检验"), 1), - (Dot, None, 1), - (Id, Some("foo"), 1), - (SquareOpen, None, 1), - (Number, Some("1"), 1), - (SquareClose, None, 1), - (Dot, None, 1), - (Id, Some("bär"), 1), - (RoundOpen, None, 1), - (RoundClose, None, 1), - ], - ); - } + #[test] + fn map_lookup() { + let input = "m.检验.foo[1].bär()"; + + check_lexer_output( + input, + &[ + (Id, Some("m"), 1), + (Dot, None, 1), + (Id, Some("检验"), 1), + (Dot, None, 1), + (Id, Some("foo"), 1), + (SquareOpen, None, 1), + (Number, Some("1"), 1), + (SquareClose, None, 1), + (Dot, None, 1), + (Id, Some("bär"), 1), + (RoundOpen, None, 1), + (RoundClose, None, 1), + ], + ); + } - #[test] - fn map_lookup_with_keyword_as_key() { - let input = "foo.and()"; - - check_lexer_output( - input, - &[ - (Id, Some("foo"), 1), - (Dot, None, 1), - (Id, Some("and"), 1), - (RoundOpen, None, 1), - (RoundClose, None, 1), - ], - ); + #[test] + fn map_lookup_with_keyword_as_key() { + let input = "foo.and()"; + + check_lexer_output( + input, + &[ + (Id, Some("foo"), 1), + (Dot, None, 1), + (Id, Some("and"), 1), + (RoundOpen, None, 1), + (RoundClose, None, 1), + ], + ); + } + + #[test] + fn windows_line_endings() { + let input = "123\r\n456\r\n789"; + + check_lexer_output( + input, + &[ + (Number, Some("123"), 1), + (NewLine, None, 1), + (Number, Some("456"), 2), + (NewLine, None, 2), + (Number, Some("789"), 3), + ], + ); + } } - #[test] - fn windows_line_endings() { - let input = "123\r\n456\r\n789"; - - check_lexer_output( - input, - &[ - (Number, Some("123"), 1), - (NewLine, None, 2), - (Number, Some("456"), 2), - (NewLine, None, 3), - (Number, Some("789"), 3), - ], - ); + mod peek { + use super::*; + + #[test] + fn lookup_in_list() { + let source = " +[foo.bar] +"; + let mut lex = KotoLexer::new(source); + assert_eq!(lex.peek(0).unwrap().token, Token::NewLine); + assert_eq!(lex.peek(1).unwrap().token, Token::SquareOpen); + assert_eq!(lex.peek(2).unwrap().token, Token::Id); + assert_eq!(lex.peek(2).unwrap().slice(source), "foo"); + assert_eq!(lex.peek(3).unwrap().token, Token::Dot); + assert_eq!(lex.peek(4).unwrap().token, Token::Id); + assert_eq!(lex.peek(4).unwrap().slice(source), "bar"); + assert_eq!(lex.peek(5).unwrap().token, Token::SquareClose); + assert_eq!(lex.peek(6).unwrap().token, Token::NewLine); + assert_eq!(lex.peek(7), None); + } + + #[test] + fn multiline_lookup() { + let source = " +x.iter() + .skip 1 +"; + let mut lex = KotoLexer::new(source); + assert_eq!(lex.peek(0).unwrap().token, Token::NewLine); + assert_eq!(lex.peek(1).unwrap().token, Token::Id); + assert_eq!(lex.peek(1).unwrap().slice(source), "x"); + assert_eq!(lex.peek(2).unwrap().token, Token::Dot); + assert_eq!(lex.peek(3).unwrap().token, Token::Id); + assert_eq!(lex.peek(3).unwrap().slice(source), "iter"); + assert_eq!(lex.peek(4).unwrap().token, Token::RoundOpen); + assert_eq!(lex.peek(5).unwrap().token, Token::RoundClose); + assert_eq!(lex.peek(6).unwrap().token, Token::NewLine); + assert_eq!(lex.peek(7).unwrap().token, Token::Whitespace); + assert_eq!(lex.peek(8).unwrap().token, Token::Dot); + assert_eq!(lex.peek(9).unwrap().token, Token::Id); + assert_eq!(lex.peek(9).unwrap().slice(source), "skip"); + assert_eq!(lex.peek(10).unwrap().token, Token::Whitespace); + assert_eq!(lex.peek(11).unwrap().token, Token::Number); + assert_eq!(lex.peek(12).unwrap().token, Token::NewLine); + assert_eq!(lex.peek(13), None); + } } } diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 2d9478a42..675b696bb 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -6,6 +6,6 @@ mod lexer; mod span; pub use crate::{ - lexer::{is_id_continue, is_id_start, KotoLexer as Lexer, Token}, + lexer::{is_id_continue, is_id_start, KotoLexer as Lexer, LexedToken, Token}, span::{Position, Span}, }; diff --git a/crates/parser/src/error.rs b/crates/parser/src/error.rs index 6a04b4cc5..252b28f18 100644 --- a/crates/parser/src/error.rs +++ b/crates/parser/src/error.rs @@ -22,6 +22,8 @@ pub enum InternalError { MissingFrame, #[error("Failed to parse number")] NumberParseFailure, + #[error("Failed to parse raw string")] + RawStringParseFailure, #[error("Unexpected token")] UnexpectedToken, } diff --git a/crates/parser/src/node.rs b/crates/parser/src/node.rs index b4c931b71..690233753 100644 --- a/crates/parser/src/node.rs +++ b/crates/parser/src/node.rs @@ -350,15 +350,26 @@ pub struct Function { pub struct AstString { /// Indicates if single or double quotation marks were used pub quotation_mark: QuotationMark, - /// A series of string nodes + /// The string's contents + pub contents: StringContents, +} + +/// The contents of an [AstString] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum StringContents { + /// A string literal + Literal(ConstantIndex), + /// A raw string literal + Raw(ConstantIndex), + /// An interpolated string /// - /// A string is made up of a series of literals and template expressions, + /// An interpolated string is made up of a series of literals and template expressions, /// which are then joined together using a string builder. - pub nodes: Vec, + Interpolated(Vec), } /// A node in a string definition -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum StringNode { /// A string literal Literal(ConstantIndex), diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index da36163f0..73408ea86 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -5,7 +5,7 @@ use crate::{ error::{ExpectedIndentation, InternalError, ParserError, ParserErrorKind, SyntaxError}, *, }; -use koto_lexer::{Lexer, Span, Token}; +use koto_lexer::{LexedToken, Lexer, Span, Token}; use std::{collections::HashSet, str::FromStr}; // Contains info about the current frame, representing either the module's top level or a function @@ -18,7 +18,7 @@ struct Frame { // IDs and lookup roots which were accessed when not locally assigned at the time of access accessed_non_locals: HashSet, // While expressions are being parsed we keep track of lhs assignments and rhs accesses. - // At the end of a multi-assignment expresson (see `finalize_id_accesses`), + // At the end of a multi-assignment expression (see `finalize_id_accesses`), // accessed IDs that weren't locally assigned at the time of access are then counted as // non-local accesses. pending_accesses: HashSet, @@ -115,7 +115,7 @@ struct ExpressionContext { // The indentation that should be expected on following lines for an expression to continue #[derive(Clone, Copy, Debug)] enum Indentation { - // Indentation isn't required + // Indentation isn't required on following lines // (e.g. in a comma separated braced expression) Flexible, // Indentation should match the expected indentation @@ -226,9 +226,12 @@ impl ExpressionContext { /// Koto's parser pub struct Parser<'source> { + source: &'source str, ast: Ast, constants: ConstantPoolBuilder, lexer: Lexer<'source>, + current_token: LexedToken, + current_line: u32, frame_stack: Vec, } @@ -237,9 +240,12 @@ impl<'source> Parser<'source> { pub fn parse(source: &'source str) -> Result { let capacity_guess = source.len() / 4; let mut parser = Parser { + source, ast: Ast::with_capacity(capacity_guess), constants: ConstantPoolBuilder::default(), lexer: Lexer::new(source), + current_token: LexedToken::default(), + current_line: 1, frame_stack: Vec::new(), }; @@ -270,7 +276,7 @@ impl<'source> Parser<'source> { body.push(expression); match self.peek_next_token_on_same_line() { - Some(Token::NewLine | Token::NewLineIndented) => continue, + Some(Token::NewLine) => continue, None => break, _ => return self.consume_token_and_error(SyntaxError::UnexpectedToken), } @@ -305,7 +311,7 @@ impl<'source> Parser<'source> { let start_indent = self.current_indent(); match self.peek_token_with_context(&block_context) { - Some(peeked) if peeked.indent > start_indent => {} + Some(peeked) if peeked.info.indent > start_indent => {} _ => return Ok(None), // No indented block found } @@ -329,7 +335,7 @@ impl<'source> Parser<'source> { match self.peek_next_token_on_same_line() { None => break, - Some(Token::NewLine | Token::NewLineIndented) => {} + Some(Token::NewLine) => {} _ => return self.consume_token_and_error(SyntaxError::UnexpectedToken), } @@ -367,13 +373,13 @@ impl<'source> Parser<'source> { context: &ExpressionContext, temp_result: TempResult, ) -> Result, ParserError> { - let start_line = self.current_line_number(); - let mut expression_context = ExpressionContext { allow_space_separated_call: true, ..*context }; + let start_line = self.current_line; + let Some(first) = self.parse_expression(&expression_context)? else { return Ok(None); }; @@ -387,7 +393,7 @@ impl<'source> Parser<'source> { encountered_comma = true; - if !encountered_linebreak && self.current_line_number() > start_line { + if !encountered_linebreak && self.current_line > start_line { // e.g. // x, y = // 1, # <- We're here, and want following values to have matching @@ -398,7 +404,6 @@ impl<'source> Parser<'source> { encountered_linebreak = true; } - // if let Some(next_expression) = self.parse_expression_start(&expressions, 0, &expression_context)? { @@ -472,59 +477,36 @@ impl<'source> Parser<'source> { min_precedence: u8, context: &ExpressionContext, ) -> Result, ParserError> { - let entry_indent = self.current_indent(); - let entry_line = self.current_line_number(); + let entry_line = self.current_line; - // Look ahead to get the indent of the first term in the expression. + // Look ahead to get the indent of the first token in the expression. // We need to look ahead here because the term may contain its own indentation, // so it may end with different indentation. - let expression_start_info = self.peek_token_with_context(context); + let Some(start_info) = self.peek_token_with_context(context) else { + return Ok(None); + }; let expression_start = match self.parse_term(context)? { Some(term) => term, None => return Ok(None), }; - // Safety: it's OK to unwrap here given that a term was successfully parsed - let expression_start_info = expression_start_info.unwrap(); - - let continuation_context = if self.current_line_number() > entry_line { - if expression_start_info.line == entry_line { - // The term started on the entry line and ended on a following line. - // - // e.g. - // foo = ( 1 - // + 2 ) - // + 3 - // # ^ entry indent - // # ^ expression start indent - // # ^ expression end indent - // # ^ continuation indent - // - // A continuation of the expression from here should then be greater than the entry - // indent, rather than greater than the current (expression end) indent. - context.with_expected_indentation(Indentation::GreaterThan(entry_indent)) - } else { - // The term started on a following line. - // - // An indent has already occurred for the start term, so then we can allow an - // expression to continue with greater or equal indentation. - // - // e.g. - // foo = - // ( 1 - // + 2 ) - // + 3 - // # ^ entry indent - // # ^ expression start indent - // # ^ expression end indent - // # ^ continuation indent - // - // A continuation of the expression from here should be allowed to match the - // expression start indent. - context.with_expected_indentation(Indentation::GreaterOrEqual( - expression_start_info.indent, - )) + let continuation_context = if self.current_line > entry_line { + match context.expected_indentation { + Indentation::Equal(indent) + | Indentation::GreaterThan(indent) + | Indentation::GreaterOrEqual(indent) => { + // If the context has a fixed indentation requirement, then allow the + // indentation for the continued expression to grow or stay the same + context.with_expected_indentation(Indentation::GreaterOrEqual(indent)) + } + Indentation::Greater | Indentation::Flexible => { + // Indentation within an arithmetic expression shouldn't be able to continue + // with decreased indentation + context.with_expected_indentation(Indentation::GreaterOrEqual( + start_info.info.indent, + )) + } } } else { *context @@ -549,40 +531,50 @@ impl<'source> Parser<'source> { min_precedence: u8, context: &ExpressionContext, ) -> Result, ParserError> { - let context = match context.expected_indentation { - Indentation::Equal(indent) => { - // If the context has fixed indentation (e.g. at the start of an indented block), - // allow the indentation to increase - context.with_expected_indentation(Indentation::GreaterOrEqual(indent)) - } - Indentation::Flexible => { - // Indentation within an arithmetic expression shouldn't be able to continue with - // decreased indentation - context - .with_expected_indentation(Indentation::GreaterOrEqual(self.current_indent())) - } - _ => *context, - }; + let start_line = self.current_line; + let start_indent = self.current_indent(); if let Some(assignment_expression) = - self.parse_assign_expression(expression_start, previous_expressions, &context)? + self.parse_assign_expression(expression_start, previous_expressions, context)? { return Ok(Some(assignment_expression)); - } else if let Some(next) = self.peek_token_with_context(&context) { + } else if let Some(next) = self.peek_token_with_context(context) { if let Some((left_priority, right_priority)) = operator_precedence(next.token) { if left_priority >= min_precedence { - let (op, context) = self.consume_token_with_context(&context).unwrap(); + let (op, _) = self.consume_token_with_context(context).unwrap(); let op_span = self.current_span(); // Move on to the token after the operator - if self.peek_token_with_context(&context).is_none() { + if self.peek_token_with_context(context).is_none() { return self.consume_token_on_same_line_and_error( ExpectedIndentation::RhsExpression, ); } - let context = self.consume_until_token_with_context(&context).unwrap(); - - let Some(rhs) = self.parse_expression_start(&[], right_priority, &context)? + self.consume_until_token_with_context(context).unwrap(); + + let rhs_context = if self.current_line > start_line { + match context.expected_indentation { + Indentation::Equal(indent) + | Indentation::GreaterThan(indent) + | Indentation::GreaterOrEqual(indent) => { + // If the context has a fixed indentation requirement, then allow the + // indentation for the continued expression to grow or stay the same + context + .with_expected_indentation(Indentation::GreaterOrEqual(indent)) + } + Indentation::Greater | Indentation::Flexible => { + // Indentation within an arithmetic expression shouldn't be able to continue + // with decreased indentation + context.with_expected_indentation(Indentation::GreaterOrEqual( + start_indent, + )) + } + } + } else { + *context + }; + let Some(rhs) = + self.parse_expression_start(&[], right_priority, &rhs_context)? else { return self.consume_token_on_same_line_and_error( ExpectedIndentation::RhsExpression, @@ -629,7 +621,12 @@ impl<'source> Parser<'source> { op_span, )?; - return self.parse_expression_continued(op_node, &[], min_precedence, &context); + return self.parse_expression_continued( + op_node, + &[], + min_precedence, + &rhs_context, + ); } } } @@ -732,7 +729,7 @@ impl<'source> Parser<'source> { } Token::RoundOpen => self.consume_tuple(context), Token::Number => self.consume_number(false, context), - Token::DoubleQuote | Token::SingleQuote => { + Token::DoubleQuote | Token::SingleQuote | Token::RawStringStart => { let string = self.parse_string(context)?.unwrap(); if self.peek_token() == Some(Token::Colon) { @@ -749,7 +746,8 @@ impl<'source> Parser<'source> { Token::Id => self.consume_id_expression(context), Token::Self_ => self.consume_self_expression(context), Token::At => { - let map_block_allowed = context.allow_map_block || peeked.indent > start_indent; + let map_block_allowed = + context.allow_map_block || peeked.info.indent > start_indent; let meta_context = self.consume_until_token_with_context(context).unwrap(); // Safe to unwrap here, parse_meta_key would error on invalid key @@ -786,7 +784,7 @@ impl<'source> Parser<'source> { Token::Switch => self.parse_switch_expression(context), Token::Function => self.consume_function(context), Token::Subtract => match self.peek_token_n(peeked.peek_count + 1) { - Some(token) if token.is_whitespace() || token.is_newline() => return Ok(None), + Some(token) if token.is_whitespace_including_newline() => return Ok(None), Some(Token::Number) => { self.consume_token_with_context(context); // Token::Subtract self.consume_number(true, context) @@ -1103,14 +1101,14 @@ impl<'source> Parser<'source> { ..*context }; - let mut last_arg_line = self.current_line_number(); + let mut last_arg_line = self.current_line; while let Some(peeked) = self.peek_token_with_context(&arg_context) { - let new_line = peeked.line > last_arg_line; - last_arg_line = peeked.line; + let new_line = peeked.info.line() > last_arg_line; + last_arg_line = peeked.info.line(); if new_line { - arg_context.expected_indentation = Indentation::Equal(peeked.indent); + arg_context.expected_indentation = Indentation::Equal(peeked.info.indent); } else if self.peek_token() != Some(Token::Whitespace) { break; } @@ -1136,7 +1134,7 @@ impl<'source> Parser<'source> { // Parses a single id // - // See also: parse_id_or_wildcard(), parse_id_expression() + // See also: parse_id_or_wildcard(), consume_id_expression() fn parse_id( &mut self, context: &ExpressionContext, @@ -1146,7 +1144,7 @@ impl<'source> Parser<'source> { token: Token::Id, .. }) => { let (_, id_context) = self.consume_token_with_context(context).unwrap(); - let constant_index = self.add_string_constant(self.lexer.slice())?; + let constant_index = self.add_current_slice_as_string_constant()?; Ok(Some((constant_index, id_context))) } _ => Ok(None), @@ -1156,7 +1154,7 @@ impl<'source> Parser<'source> { // Parses a single `_` wildcard, along with its optional following id fn consume_wildcard(&mut self, context: &ExpressionContext) -> Result { self.consume_token_with_context(context); - let slice = self.lexer.slice(); + let slice = self.current_token.slice(self.source); let maybe_id = if slice.len() > 1 { Some(self.add_string_constant(&slice[1..])?) } else { @@ -1177,7 +1175,7 @@ impl<'source> Parser<'source> { token: Token::Id, .. }) => { self.consume_token_with_context(context); - self.add_string_constant(self.lexer.slice()) + self.add_current_slice_as_string_constant() .map(|result| Some(IdOrWildcard::Id(result))) } Some(PeekInfo { @@ -1185,7 +1183,7 @@ impl<'source> Parser<'source> { .. }) => { self.consume_token_with_context(context); - let slice = self.lexer.slice(); + let slice = self.current_token.slice(self.source); let maybe_id = if slice.len() > 1 { Some(self.add_string_constant(&slice[1..])?) } else { @@ -1308,7 +1306,7 @@ impl<'source> Parser<'source> { context: &ExpressionContext, ) -> Result { let mut lookup = Vec::new(); - let mut lookup_line = self.current_line_number(); + let mut lookup_line = self.current_line; let mut node_context = *context; let mut node_start_span = self.current_span(); @@ -1378,7 +1376,7 @@ impl<'source> Parser<'source> { .unwrap(); // Check that the next dot is on an indented line - if self.current_line_number() == lookup_line { + if self.current_line == lookup_line { // TODO Error here? break; } @@ -1407,7 +1405,7 @@ impl<'source> Parser<'source> { // ~~~~~~~ // Allow a map block if we're on an indented line - node_context.allow_map_block = peeked.line > lookup_line; + node_context.allow_map_block = peeked.info.line() > lookup_line; let args = self.parse_call_args(&node_context)?; @@ -1429,7 +1427,7 @@ impl<'source> Parser<'source> { } } - lookup_line = self.current_line_number(); + lookup_line = self.current_line; } } } @@ -1628,19 +1626,20 @@ impl<'source> Parser<'source> { let start_position = self.current_span().start; - self.consume_until_next_token_on_same_line(); - let context = ExpressionContext::permissive(); - let expression_source_start = self.lexer.source_position(); + let Some(expression_start_info) = self.peek_token_with_context(&context) else { + return self.consume_token_and_error(SyntaxError::ExpectedExpression); + }; + let expression_source_start = expression_start_info.info.source_bytes.start; + let Some(expression) = self.parse_expressions(&context, TempResult::No)? else { return self.consume_token_and_error(SyntaxError::ExpectedExpression); }; - let expression_source_end = self.lexer.source_position(); + let expression_source_end = self.current_token.source_bytes.end; - let expression_string = self.add_string_constant( - &self.lexer.source()[expression_source_start..expression_source_end], - )?; + let expression_string = + self.add_string_constant(&self.source[expression_source_start..expression_source_end])?; self.ast.push( Node::Debug { @@ -1663,7 +1662,7 @@ impl<'source> Parser<'source> { self.consume_token_with_context(context); // Token::Number - let slice = self.lexer.slice(); + let slice = self.current_token.slice(self.source); let maybe_integer = if let Some(hex) = slice.strip_prefix("0x") { i64::from_str_radix(hex, 16) @@ -2011,7 +2010,7 @@ impl<'source> Parser<'source> { Some(Token::Equal) => MetaKeyId::Equal, Some(Token::NotEqual) => MetaKeyId::NotEqual, Some(Token::Not) => MetaKeyId::Not, - Some(Token::Id) => match self.lexer.slice() { + Some(Token::Id) => match self.current_token.slice(self.source) { "display" => MetaKeyId::Display, "iterator" => MetaKeyId::Iterator, "next" => MetaKeyId::Next, @@ -2025,7 +2024,7 @@ impl<'source> Parser<'source> { "post_test" => MetaKeyId::PostTest, "test" => match self.consume_next_token_on_same_line() { Some(Token::Id) => { - let test_name = self.add_string_constant(self.lexer.slice())?; + let test_name = self.add_current_slice_as_string_constant()?; meta_name = Some(test_name); MetaKeyId::Test } @@ -2033,7 +2032,7 @@ impl<'source> Parser<'source> { }, "meta" => match self.consume_next_token_on_same_line() { Some(Token::Id) => { - let id = self.add_string_constant(self.lexer.slice())?; + let id = self.add_current_slice_as_string_constant()?; meta_name = Some(id); MetaKeyId::Named } @@ -2768,6 +2767,10 @@ impl<'source> Parser<'source> { token: SingleQuote | DoubleQuote, .. }) => {} + Some(PeekInfo { + token: RawStringStart, + .. + }) => return self.consume_raw_string(context), _ => return Ok(None), } @@ -2778,7 +2781,7 @@ impl<'source> Parser<'source> { while let Some(next_token) = self.consume_token() { match next_token { StringLiteral => { - let string_literal = self.lexer.slice(); + let string_literal = self.current_token.slice(self.source); let mut literal = String::with_capacity(string_literal.len()); let mut chars = string_literal.chars().peekable(); @@ -2871,7 +2874,7 @@ impl<'source> Parser<'source> { Dollar => match self.peek_token() { Some(Id) => { self.consume_token(); - let id = self.add_string_constant(self.lexer.slice())?; + let id = self.add_current_slice_as_string_constant()?; self.frame_mut()?.add_id_access(id); let id_node = self.push_node(Node::Id(id))?; nodes.push(StringNode::Expr(id_node)); @@ -2903,14 +2906,16 @@ impl<'source> Parser<'source> { QuotationMark::Double }; - if nodes.is_empty() { - nodes.push(StringNode::Literal(self.add_string_constant("")?)); - } + let contents = match nodes.as_slice() { + [] => StringContents::Literal(self.add_string_constant("")?), + [StringNode::Literal(literal)] => StringContents::Literal(*literal), + _ => StringContents::Interpolated(nodes), + }; return Ok(Some(ParseStringOutput { string: AstString { quotation_mark, - nodes, + contents, }, span: self.span_with_start(start_span), context: string_context, @@ -2923,6 +2928,44 @@ impl<'source> Parser<'source> { self.error(UnterminatedString) } + fn consume_raw_string( + &mut self, + context: &ExpressionContext, + ) -> Result, ParserError> { + let Some((_, string_context)) = self.consume_token_with_context(context) else { + return self.error(InternalError::RawStringParseFailure); + }; // Token::RawStringDelimiter + + let start_span = self.current_span(); + let start_delimiter = self.current_token.slice(self.source); + let quotation_mark = match start_delimiter.chars().next_back() { + Some('\'') => QuotationMark::Single, + Some('"') => QuotationMark::Double, + _ => return self.error(InternalError::RawStringParseFailure), + }; + + let contents = match self.consume_token() { + Some(Token::StringLiteral) => { + let contents = self.add_string_constant(self.current_token.slice(self.source))?; + match self.consume_token() { + Some(Token::RawStringEnd) => contents, + _ => return self.error(SyntaxError::UnterminatedString), + } + } + Some(Token::RawStringEnd) => self.add_string_constant("")?, + _ => return self.error(SyntaxError::UnterminatedString), + }; + + Ok(Some(ParseStringOutput { + string: AstString { + quotation_mark, + contents: StringContents::Raw(contents), + }, + span: self.span_with_start(start_span), + context: string_context, + })) + } + //// Error helpers fn error(&mut self, error_type: E) -> Result @@ -2966,28 +3009,34 @@ impl<'source> Parser<'source> { //// Lexer getters - fn current_line_number(&self) -> u32 { - self.lexer.line_number() - } + fn consume_token(&mut self) -> Option { + if let Some(next) = self.lexer.next() { + self.current_token = next; - fn current_indent(&self) -> usize { - self.lexer.current_indent() - } + if self.current_token.token == Token::NewLine { + self.current_line += 1; + } - fn current_span(&self) -> Span { - self.lexer.span() + Some(self.current_token.token) + } else { + None + } } fn peek_token(&mut self) -> Option { - self.lexer.peek() + self.peek_token_n(0) } fn peek_token_n(&mut self, n: usize) -> Option { - self.lexer.peek_n(n) + self.lexer.peek(n).map(|peeked| peeked.token) } - fn consume_token(&mut self) -> Option { - self.lexer.next() + fn current_indent(&self) -> usize { + self.current_token.indent + } + + fn current_span(&self) -> Span { + self.current_token.span } //// Node push helpers @@ -3015,6 +3064,10 @@ impl<'source> Parser<'source> { } } + fn add_current_slice_as_string_constant(&mut self) -> Result { + self.add_string_constant(self.current_token.slice(self.source)) + } + fn add_string_constant(&mut self, s: &str) -> Result { match self.constants.add_string(s) { Ok(result) => Ok(result), @@ -3032,49 +3085,41 @@ impl<'source> Parser<'source> { use Token::*; let mut peek_count = 0; - let start_line = self.current_line_number(); + let mut same_line = true; let start_indent = self.current_indent(); - while let Some(peeked) = self.peek_token_n(peek_count) { - match peeked { - Whitespace | NewLine | NewLineIndented | CommentMulti | CommentSingle => {} + while let Some(peeked) = self.lexer.peek(peek_count) { + match peeked.token { + NewLine => same_line = false, + Whitespace | CommentMulti | CommentSingle => {} token => { - return match self.lexer.peek_line_number(peek_count) { - peeked_line if peeked_line == start_line => Some(PeekInfo { - token, - line: start_line, - indent: start_indent, - peek_count, - }), - peeked_line if context.allow_linebreaks => { - let peeked_indent = self.lexer.peek_indent(peek_count); - let peek_info = PeekInfo { - token, - line: peeked_line, - indent: peeked_indent, - peek_count, - }; - - use Indentation::*; - match context.expected_indentation { - GreaterThan(expected_indent) if peeked_indent > expected_indent => { - Some(peek_info) - } - GreaterOrEqual(expected_indent) - if peeked_indent >= expected_indent => - { - Some(peek_info) - } - Equal(expected_indent) if peeked_indent == expected_indent => { - Some(peek_info) - } - Greater if peeked_indent > start_indent => Some(peek_info), - Flexible => Some(peek_info), - _ => None, + let result = Some(PeekInfo { + token, + peek_count, + info: peeked.clone(), + }); + + let result = if same_line { + result + } else if context.allow_linebreaks { + use Indentation::*; + match context.expected_indentation { + GreaterThan(expected_indent) if peeked.indent > expected_indent => { + result } + GreaterOrEqual(expected_indent) if peeked.indent >= expected_indent => { + result + } + Equal(expected_indent) if peeked.indent == expected_indent => result, + Greater if peeked.indent > start_indent => result, + Flexible => result, + _ => None, } - _ => None, - } + } else { + None + }; + + return result; } } @@ -3098,11 +3143,11 @@ impl<'source> Parser<'source> { &mut self, context: &ExpressionContext, ) -> Option<(Token, ExpressionContext)> { - let start_line = self.current_line_number(); + let start_line = self.current_line; - for token in &mut self.lexer { - if !(token.is_whitespace() || token.is_newline()) { - let is_indented_block = self.current_line_number() > start_line + while let Some(token) = self.consume_token() { + if !(token.is_whitespace_including_newline()) { + let is_indented_block = self.current_line > start_line && context.allow_linebreaks && matches!(context.expected_indentation, Indentation::Greater); @@ -3130,19 +3175,19 @@ impl<'source> Parser<'source> { &mut self, context: &ExpressionContext, ) -> Option { - let start_line = self.current_line_number(); + let start_line = self.current_line; - while let Some(peeked) = self.peek_token_n(0) { - if peeked.is_whitespace() || peeked.is_newline() { - self.lexer.next(); + while let Some(peeked) = self.lexer.peek(0) { + if peeked.token.is_whitespace_including_newline() { + self.consume_token(); } else { - let is_indented_block = self.lexer.peek_line_number(0) > start_line + let is_indented_block = peeked.span.start.line > start_line && context.allow_linebreaks && matches!(context.expected_indentation, Indentation::Greater); let new_context = if is_indented_block { ExpressionContext { - expected_indentation: Indentation::Equal(self.lexer.peek_indent(0)), + expected_indentation: Indentation::Equal(peeked.indent), allow_map_block: true, ..*context } @@ -3181,7 +3226,7 @@ impl<'source> Parser<'source> { _ => return, } - self.lexer.next(); + self.consume_token(); } } @@ -3190,10 +3235,10 @@ impl<'source> Parser<'source> { while let Some(peeked) = self.peek_token() { match peeked { token if token.is_whitespace() => {} - _ => return self.lexer.next(), + _ => return self.consume_token(), } - self.lexer.next(); + self.consume_token(); } None @@ -3258,9 +3303,8 @@ fn operator_precedence(op: Token) -> Option<(u8, u8)> { #[derive(Debug)] struct PeekInfo { token: Token, - line: u32, - indent: usize, peek_count: usize, + info: LexedToken, } // Returned by Parser::parse_id_or_wildcard() diff --git a/crates/parser/tests/parser_tests.rs b/crates/parser/tests/parser_tests.rs index 1211ab4fa..c6e53549a 100644 --- a/crates/parser/tests/parser_tests.rs +++ b/crates/parser/tests/parser_tests.rs @@ -46,22 +46,23 @@ mod parser { } } - fn constant(index: u8) -> u32 { + fn constant(index: u8) -> ConstantIndex { ConstantIndex::from(index) } - fn string_literal(literal_index: u8, quotation_mark: QuotationMark) -> Node { - Node::Str(AstString { + fn simple_string(literal_index: u8, quotation_mark: QuotationMark) -> AstString { + AstString { quotation_mark, - nodes: vec![StringNode::Literal(constant(literal_index))], - }) + contents: StringContents::Literal(constant(literal_index)), + } + } + + fn string_literal(literal_index: u8, quotation_mark: QuotationMark) -> Node { + Node::Str(simple_string(literal_index, quotation_mark)) } fn string_literal_map_key(literal_index: u8, quotation_mark: QuotationMark) -> MapKey { - MapKey::Str(AstString { - quotation_mark, - nodes: vec![StringNode::Literal(constant(literal_index))], - }) + MapKey::Str(simple_string(literal_index, quotation_mark)) } mod values { @@ -195,26 +196,26 @@ null"#; Id(constant(1)), Str(AstString { quotation_mark: QuotationMark::Single, - nodes: vec![ + contents: StringContents::Interpolated(vec![ StringNode::Literal(constant(0)), StringNode::Expr(0), StringNode::Literal(constant(2)), - ], + ]), }), Id(constant(3)), Str(AstString { quotation_mark: QuotationMark::Double, - nodes: vec![StringNode::Expr(2)], + contents: StringContents::Interpolated(vec![StringNode::Expr(2)]), }), Id(constant(4)), Id(constant(6)), // 5 Str(AstString { quotation_mark: QuotationMark::Single, - nodes: vec![ + contents: StringContents::Interpolated(vec![ StringNode::Expr(4), StringNode::Literal(constant(5)), StringNode::Expr(5), - ], + ]), }), MainBlock { body: vec![1, 3, 6], @@ -250,7 +251,10 @@ null"#; }, Str(AstString { quotation_mark: QuotationMark::Single, - nodes: vec![StringNode::Expr(2), StringNode::Literal(constant(1))], + contents: StringContents::Interpolated(vec![ + StringNode::Expr(2), + StringNode::Literal(constant(1)), + ]), }), MainBlock { body: vec![3], @@ -261,9 +265,36 @@ null"#; ) } + #[test] + fn raw_strings() { + let source = r#" +r'$foo ${bar}' +r"[\r?\n]\" +"#; + + check_ast( + source, + &[ + Str(AstString { + quotation_mark: QuotationMark::Single, + contents: StringContents::Raw(constant(0)), + }), + Str(AstString { + quotation_mark: QuotationMark::Double, + contents: StringContents::Raw(constant(1)), + }), + MainBlock { + body: vec![0, 1], + local_count: 0, + }, + ], + Some(&[Constant::Str("$foo ${bar}"), Constant::Str(r"[\r?\n]\")]), + ) + } + #[test] fn negatives() { - let source = "\ + let source = " -12.0 -a -x[0] @@ -1180,6 +1211,34 @@ x %= 4"; Some(&[Constant::Str("x")]), ) } + + #[test] + fn list_with_lookup_as_first_element() { + let source = " +[foo.bar()] +"; + check_ast( + source, + &[ + Id(constant(0)), + Lookup(( + LookupNode::Call { + args: vec![], + with_parens: true, + }, + None, + )), + Lookup((LookupNode::Id(constant(1)), Some(1))), + Lookup((LookupNode::Root(0), Some(2))), + List(vec![3]), + MainBlock { + body: vec![4], + local_count: 0, + }, + ], + Some(&[Constant::Str("foo"), Constant::Str("bar")]), + ) + } } mod export { @@ -1446,7 +1505,7 @@ export } #[test] - fn arithmetic_assignment() { + fn arithmetic_assignment_chained() { let sources = [ " a = 1 + @@ -2315,7 +2374,7 @@ f(x, } #[test] - fn call_with_indentated_args() { + fn call_with_indented_args() { let source = " foo x, @@ -2339,7 +2398,7 @@ foo } #[test] - fn call_with_indentated_function_arg() { + fn call_with_indented_function_arg() { let source = " foo x, @@ -3390,7 +3449,7 @@ x.bar()."baz" = 1 Lookup(( LookupNode::Str(AstString { quotation_mark: QuotationMark::Double, - nodes: vec![StringNode::Literal(constant(2))], + contents: StringContents::Literal(constant(2)), }), None, )), @@ -4114,10 +4173,7 @@ assert_eq x, "hello" } fn import_string(literal_index: u8, quotation_mark: QuotationMark) -> ImportItemNode { - ImportItemNode::Str(AstString { - quotation_mark, - nodes: vec![StringNode::Literal(constant(literal_index))], - }) + ImportItemNode::Str(simple_string(literal_index, quotation_mark)) } #[test] diff --git a/crates/runtime/tests/vm_tests.rs b/crates/runtime/tests/vm_tests.rs index e7efe1f46..48799353f 100644 --- a/crates/runtime/tests/vm_tests.rs +++ b/crates/runtime/tests/vm_tests.rs @@ -2636,6 +2636,48 @@ x = ('foo', 'bar') } } + mod raw_strings { + use super::*; + + #[test] + fn unescaped_backslashes() { + let script = r" +r'\r\n\\\$\' +"; + test_script(script, string(r"\r\n\\\$\")); + } + + #[test] + fn uninterpolated_expressions() { + let script = r" +foo, bar = 42, 99 +r'$foo + $bar == ${foo + bar}' +"; + test_script(script, string(r"$foo + $bar == ${foo + bar}")); + } + + #[test] + fn multiline() { + let script = r#" +r" +$foo +\n +$bar +" +"#; + test_script( + script, + string( + r" +$foo +\n +$bar +", + ), + ); + } + } + mod iterators { use super::*; diff --git a/docs/language/strings.md b/docs/language/strings.md index c2420aaa9..6db447759 100644 --- a/docs/language/strings.md +++ b/docs/language/strings.md @@ -91,3 +91,12 @@ print 'This string has to escape its \'single quotes\'.' check! This string has to escape its 'single quotes'. ``` +## Raw Strings + +Sometimes it can be preferable to use a _raw string_, which provides the contents of the string without support for escaped characters or interpolated expressions. +Like normal strings, raw strings use single or double quotes, but prefixed with an `r`. + +```koto +print r'This string contains special characters: $foo\n\t' +check! This string contains special characters: $foo\n\t +``` diff --git a/koto/tests/strings.koto b/koto/tests/strings.koto index b1d7b18ab..31dda3581 100644 --- a/koto/tests/strings.koto +++ b/koto/tests/strings.koto @@ -52,6 +52,10 @@ baz" assert_eq x, "foo bar baz" + @test raw_strings: || + assert_eq r'\r\n', '\\r\\n' + assert_eq r'${1 + 1}', '\${1 + 1}' + @test bytes: || assert_eq "Hëy".bytes().to_tuple(), (72, 195, 171, 121) From 96d2d0761bcfdcf54f028c111b5e2a3295b2ddda Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Fri, 12 Jan 2024 14:35:58 +0100 Subject: [PATCH 5/8] Resolve a todo in the parser --- crates/parser/src/parser.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 73408ea86..64783fecc 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -1367,7 +1367,7 @@ impl<'source> Parser<'source> { break; }; if peeked.token == Token::Dot { - // Indented Dot on the next line? + // Indented Dot on a following line? // Consume up until the Dot, // which will be picked up on the next iteration @@ -1377,8 +1377,7 @@ impl<'source> Parser<'source> { // Check that the next dot is on an indented line if self.current_line == lookup_line { - // TODO Error here? - break; + return self.consume_token_and_error(SyntaxError::ExpectedMapKey); } // Starting a new line, so space separated calls are allowed From 4e7ec8b3bf0048103591c92cd8cbc7d2e21fac5a Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Fri, 12 Jan 2024 14:51:27 +0100 Subject: [PATCH 6/8] Remove unnecessary helper in parser_tests.rs --- crates/parser/tests/parser_tests.rs | 733 ++++++++++++++-------------- 1 file changed, 360 insertions(+), 373 deletions(-) diff --git a/crates/parser/tests/parser_tests.rs b/crates/parser/tests/parser_tests.rs index c6e53549a..56e403f0e 100644 --- a/crates/parser/tests/parser_tests.rs +++ b/crates/parser/tests/parser_tests.rs @@ -46,22 +46,21 @@ mod parser { } } - fn constant(index: u8) -> ConstantIndex { - ConstantIndex::from(index) - } - - fn simple_string(literal_index: u8, quotation_mark: QuotationMark) -> AstString { + fn simple_string(literal_index: ConstantIndex, quotation_mark: QuotationMark) -> AstString { AstString { quotation_mark, - contents: StringContents::Literal(constant(literal_index)), + contents: StringContents::Literal(literal_index), } } - fn string_literal(literal_index: u8, quotation_mark: QuotationMark) -> Node { + fn string_literal(literal_index: ConstantIndex, quotation_mark: QuotationMark) -> Node { Node::Str(simple_string(literal_index, quotation_mark)) } - fn string_literal_map_key(literal_index: u8, quotation_mark: QuotationMark) -> MapKey { + fn string_literal_map_key( + literal_index: ConstantIndex, + quotation_mark: QuotationMark, + ) -> MapKey { MapKey::Str(simple_string(literal_index, quotation_mark)) } @@ -85,10 +84,10 @@ null"#; BoolTrue, BoolFalse, SmallInt(1), - Float(constant(0)), + Float(0), string_literal(1, QuotationMark::Double), string_literal(2, QuotationMark::Single), - Id(constant(3)), + Id(3), Null, MainBlock { body: vec![0, 1, 2, 3, 4, 5, 6, 7], @@ -121,8 +120,8 @@ null"#; &[ SmallInt(1), SmallInt(1), - Int(constant(0)), - Int(constant(1)), + Int(0), + Int(1), SmallInt(1), SmallInt(64), SmallInt(1), @@ -193,27 +192,27 @@ null"#; check_ast( source, &[ - Id(constant(1)), + Id(1), Str(AstString { quotation_mark: QuotationMark::Single, contents: StringContents::Interpolated(vec![ - StringNode::Literal(constant(0)), + StringNode::Literal(0), StringNode::Expr(0), - StringNode::Literal(constant(2)), + StringNode::Literal(2), ]), }), - Id(constant(3)), + Id(3), Str(AstString { quotation_mark: QuotationMark::Double, contents: StringContents::Interpolated(vec![StringNode::Expr(2)]), }), - Id(constant(4)), - Id(constant(6)), // 5 + Id(4), + Id(6), // 5 Str(AstString { quotation_mark: QuotationMark::Single, contents: StringContents::Interpolated(vec![ StringNode::Expr(4), - StringNode::Literal(constant(5)), + StringNode::Literal(5), StringNode::Expr(5), ]), }), @@ -243,7 +242,7 @@ null"#; source, &[ SmallInt(123), - Int(constant(0)), + Int(0), BinaryOp { op: AstBinaryOp::Add, lhs: 0, @@ -253,7 +252,7 @@ null"#; quotation_mark: QuotationMark::Single, contents: StringContents::Interpolated(vec![ StringNode::Expr(2), - StringNode::Literal(constant(1)), + StringNode::Literal(1), ]), }), MainBlock { @@ -277,11 +276,11 @@ r"[\r?\n]\" &[ Str(AstString { quotation_mark: QuotationMark::Single, - contents: StringContents::Raw(constant(0)), + contents: StringContents::Raw(0), }), Str(AstString { quotation_mark: QuotationMark::Double, - contents: StringContents::Raw(constant(1)), + contents: StringContents::Raw(1), }), MainBlock { body: vec![0, 1], @@ -302,13 +301,13 @@ r"[\r?\n]\" check_ast( source, &[ - Float(constant(0)), - Id(constant(1)), + Float(0), + Id(1), UnaryOp { op: AstUnaryOp::Negate, value: 1, }, - Id(constant(2)), + Id(2), SmallInt(0), Lookup((LookupNode::Index(4), None)), // 5 Lookup((LookupNode::Root(3), Some(5))), @@ -351,9 +350,9 @@ r"[\r?\n]\" source, &[ SmallInt(0), - Id(constant(0)), + Id(0), string_literal(1, QuotationMark::Double), - Id(constant(0)), + Id(0), SmallInt(-1), List(vec![0, 1, 2, 3, 4]), List(vec![]), @@ -428,7 +427,7 @@ x = [ check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), SmallInt(0), SmallInt(1), SmallInt(0), @@ -480,14 +479,14 @@ x = &sources, &[ Map(vec![]), - Id(constant(0)), + Id(0), SmallInt(42), string_literal(4, QuotationMark::Single), SmallInt(99), Map(vec![ (string_literal_map_key(1, QuotationMark::Single), Some(2)), - (MapKey::Id(constant(2)), None), - (MapKey::Id(constant(3)), Some(3)), + (MapKey::Id(2), None), + (MapKey::Id(3), Some(3)), (MapKey::Meta(MetaKeyId::Add, None), Some(4)), ]), // 5 Assign { @@ -525,8 +524,8 @@ x = string_literal(3, QuotationMark::Double), Map(vec![ (string_literal_map_key(0, QuotationMark::Single), Some(0)), - (MapKey::Id(constant(1)), None), - (MapKey::Id(constant(2)), Some(1)), + (MapKey::Id(1), None), + (MapKey::Id(2), Some(1)), ]), MainBlock { body: vec![2], @@ -554,13 +553,13 @@ x"#; check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x SmallInt(42), SmallInt(0), - Map(vec![(MapKey::Id(constant(1)), Some(2))]), // foo, 0 + Map(vec![(MapKey::Id(1), Some(2))]), // foo, 0 SmallInt(-1), Map(vec![ - (MapKey::Id(constant(1)), Some(1)), // foo: 42 + (MapKey::Id(1), Some(1)), // foo: 42 (string_literal_map_key(2, QuotationMark::Double), Some(3)), // "baz": nested map (MapKey::Meta(MetaKeyId::Subtract, None), Some(4)), // @-: -1 ]), // 5 @@ -568,7 +567,7 @@ x"#; target: 0, expression: 5, }, - Id(constant(0)), + Id(0), MainBlock { body: vec![6, 7], local_count: 1, @@ -591,7 +590,7 @@ x = check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x SmallInt(42), Map(vec![( string_literal_map_key(1, QuotationMark::Double), @@ -620,13 +619,13 @@ x = check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x SmallInt(42), Map(vec![ - (MapKey::Id(constant(2)), Some(1)), // bar: 42 + (MapKey::Id(2), Some(1)), // bar: 42 ]), Map(vec![ - (MapKey::Id(constant(1)), Some(2)), // foo: ... + (MapKey::Id(1), Some(2)), // foo: ... ]), Assign { target: 0, @@ -656,14 +655,14 @@ x = check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x SmallInt(0), SmallInt(1), SmallInt(0), Map(vec![ (MapKey::Meta(MetaKeyId::Add, None), Some(1)), (MapKey::Meta(MetaKeyId::Subtract, None), Some(2)), - (MapKey::Meta(MetaKeyId::Named, Some(constant(1))), Some(3)), + (MapKey::Meta(MetaKeyId::Named, Some(1)), Some(3)), ]), Assign { target: 0, @@ -696,7 +695,7 @@ x = Map(vec![ (MapKey::Meta(MetaKeyId::PreTest, None), Some(1)), (MapKey::Meta(MetaKeyId::PostTest, None), Some(2)), - (MapKey::Meta(MetaKeyId::Test, Some(constant(0))), Some(3)), + (MapKey::Meta(MetaKeyId::Test, Some(0)), Some(3)), ]), Assign { target: 0, @@ -791,20 +790,20 @@ min..max check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(0), Assign { target: 0, expression: 1, }, - Id(constant(1)), + Id(1), SmallInt(10), Assign { target: 3, expression: 4, }, // 5 - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), Range { start: 6, end: 7, @@ -825,11 +824,11 @@ min..max check_ast( source, &[ - Id(constant(0)), - Lookup((LookupNode::Id(constant(1)), None)), + Id(0), + Lookup((LookupNode::Id(1), None)), Lookup((LookupNode::Root(0), Some(1))), - Id(constant(0)), - Lookup((LookupNode::Id(constant(2)), None)), + Id(0), + Lookup((LookupNode::Id(2), None)), Lookup((LookupNode::Root(3), Some(4))), // 5 Range { start: 2, @@ -1005,7 +1004,7 @@ min..max check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(1), Assign { target: 0, @@ -1026,7 +1025,7 @@ min..max check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(1), SmallInt(0), Tuple(vec![1, 2]), @@ -1049,7 +1048,7 @@ min..max check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(0), SmallInt(1), Tuple(vec![1, 2]), @@ -1076,8 +1075,8 @@ min..max check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), SmallInt(0), Lookup((LookupNode::Index(2), None)), Lookup((LookupNode::Root(1), Some(3))), @@ -1107,8 +1106,8 @@ x"; check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), SmallInt(1), SmallInt(0), TempTuple(vec![2, 3]), @@ -1116,7 +1115,7 @@ x"; targets: vec![0, 1], expression: 4, }, // 5 - Id(constant(0)), + Id(0), MainBlock { body: vec![5, 6], local_count: 2, @@ -1132,10 +1131,10 @@ x"; check_ast( source, &[ - Id(constant(0)), + Id(0), Wildcard(None), - Wildcard(Some(constant(1))), - Id(constant(2)), + Wildcard(Some(1)), + Id(2), Lookup(( LookupNode::Call { args: vec![], @@ -1168,35 +1167,35 @@ x %= 4"; check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(0), BinaryOp { op: AstBinaryOp::AddAssign, lhs: 0, rhs: 1, }, - Id(constant(0)), + Id(0), SmallInt(1), BinaryOp { op: AstBinaryOp::SubtractAssign, lhs: 3, rhs: 4, }, // 5 - Id(constant(0)), + Id(0), SmallInt(2), BinaryOp { op: AstBinaryOp::MultiplyAssign, lhs: 6, rhs: 7, }, - Id(constant(0)), + Id(0), SmallInt(3), // 10 BinaryOp { op: AstBinaryOp::DivideAssign, lhs: 9, rhs: 10, }, - Id(constant(0)), + Id(0), SmallInt(4), BinaryOp { op: AstBinaryOp::RemainderAssign, @@ -1220,7 +1219,7 @@ x %= 4"; check_ast( source, &[ - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -1228,7 +1227,7 @@ x %= 4"; }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(1))), + Lookup((LookupNode::Id(1), Some(1))), Lookup((LookupNode::Root(0), Some(2))), List(vec![3]), MainBlock { @@ -1259,7 +1258,7 @@ export a = check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), SmallInt(1), SmallInt(1), BinaryOp { @@ -1295,8 +1294,8 @@ export SmallInt(123), SmallInt(99), Map(vec![ - (MapKey::Id(constant(0)), Some(0)), // a: 123 - (MapKey::Id(constant(1)), Some(1)), // b: 99 + (MapKey::Id(0), Some(0)), // a: 123 + (MapKey::Id(1), Some(1)), // b: 99 ]), Export(2), MainBlock { @@ -1458,7 +1457,7 @@ export source, &[ string_literal(0, QuotationMark::Single), - Id(constant(1)), + Id(1), BinaryOp { op: AstBinaryOp::Add, lhs: 0, @@ -1479,11 +1478,11 @@ export check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x SmallInt(1), - Id(constant(2)), // y + Id(2), // y NamedCall { - id: constant(1), // f + id: 1, // f args: vec![2], }, BinaryOp { @@ -1533,7 +1532,7 @@ a = check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), SmallInt(1), SmallInt(2), SmallInt(3), @@ -1586,7 +1585,7 @@ a = (1 check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), SmallInt(1), SmallInt(2), BinaryOp { @@ -1758,7 +1757,7 @@ a", check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), BoolFalse, SmallInt(0), BoolTrue, @@ -1776,7 +1775,7 @@ a", target: 0, expression: 8, }, - Id(constant(0)), + Id(0), MainBlock { body: vec![9, 10], local_count: 1, @@ -1792,8 +1791,8 @@ a", check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), BoolTrue, SmallInt(0), SmallInt(1), @@ -1840,12 +1839,12 @@ a", else_if_blocks: vec![], else_node: None, }), - Id(constant(0)), + Id(0), Block(vec![2, 3]), Function(koto_parser::Function { args: vec![], local_count: 0, - accessed_non_locals: vec![constant(0)], + accessed_non_locals: vec![0], body: 4, is_variadic: false, is_generator: false, @@ -1871,14 +1870,14 @@ for x, _, _y, z in foo check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x Wildcard(None), - Wildcard(Some(constant(1))), // _y - Id(constant(2)), // z - Id(constant(3)), // foo - Id(constant(0)), // x - 5 + Wildcard(Some(1)), // _y + Id(2), // z + Id(3), // foo + Id(0), // x - 5 NamedCall { - id: constant(2), // z + id: 2, // z args: vec![5], }, For(AstFor { @@ -1908,16 +1907,16 @@ while x > y check_ast( source, &[ - Id(constant(0)), // x - Id(constant(1)), // y + Id(0), // x + Id(1), // y BinaryOp { op: AstBinaryOp::Greater, lhs: 0, rhs: 1, }, - Id(constant(0)), // x + Id(0), // x NamedCall { - id: constant(2), // f + id: 2, // f args: vec![3], }, While { @@ -1941,16 +1940,16 @@ until x < y check_ast( source, &[ - Id(constant(0)), // x - Id(constant(1)), // y + Id(0), // x + Id(1), // y BinaryOp { op: AstBinaryOp::Less, lhs: 0, rhs: 1, }, - Id(constant(1)), // y + Id(1), // y NamedCall { - id: constant(2), // f + id: 2, // f args: vec![3], }, Until { @@ -1978,9 +1977,9 @@ for x in y source, &[ List(vec![]), - Id(constant(0)), // x - Id(constant(1)), // y - Id(constant(0)), // x + Id(0), // x + Id(1), // y + Id(0), // x For(AstFor { args: vec![1], iterable: 2, @@ -2004,9 +2003,9 @@ for a in x.zip y check_ast( source, &[ - Id(constant(0)), // a - Id(constant(1)), // x - Id(constant(3)), // y + Id(0), // a + Id(1), // x + Id(3), // y Lookup(( LookupNode::Call { args: vec![2], @@ -2014,9 +2013,9 @@ for a in x.zip y }, None, )), - Lookup((LookupNode::Id(constant(2)), Some(3))), + Lookup((LookupNode::Id(2), Some(3))), Lookup((LookupNode::Root(1), Some(4))), // ast 5 - Id(constant(0)), // a + Id(0), // a For(AstFor { args: vec![0], iterable: 5, @@ -2048,7 +2047,7 @@ a()"; check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(42), Function(koto_parser::Function { args: vec![], @@ -2062,7 +2061,7 @@ a()"; target: 0, expression: 2, }, - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -2096,10 +2095,10 @@ a()"; check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), - Id(constant(1)), - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), + Id(0), + Id(1), BinaryOp { op: AstBinaryOp::Add, lhs: 2, @@ -2128,10 +2127,10 @@ a()"; check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), + Id(0), + Id(1), Lookup(( LookupNode::Call { args: vec![], @@ -2139,7 +2138,7 @@ a()"; }, None, )), - Lookup((LookupNode::Id(constant(2)), Some(4))), // 5 + Lookup((LookupNode::Id(2), Some(4))), // 5 Lookup((LookupNode::Root(3), Some(5))), BinaryOp { op: AstBinaryOp::Add, @@ -2177,15 +2176,15 @@ f 42"; check_ast( source, &[ - Id(constant(0)), // f - Id(constant(1)), // x - Id(constant(2)), // y - Id(constant(1)), // x + Id(0), // f + Id(1), // x + Id(2), // y + Id(1), // x Assign { target: 2, expression: 3, }, - Id(constant(2)), // 5 + Id(2), // 5 Block(vec![4, 5]), Function(koto_parser::Function { args: vec![1], @@ -2201,7 +2200,7 @@ f 42"; }, SmallInt(42), NamedCall { - id: constant(0), + id: 0, args: vec![9], }, // 10 MainBlock { @@ -2224,11 +2223,11 @@ f 42"; check_ast( source, &[ - Id(constant(0)), // f - Id(constant(1)), // x - Id(constant(2)), // y - Id(constant(3)), // z - Id(constant(3)), // z + Id(0), // f + Id(1), // x + Id(2), // y + Id(3), // z + Id(3), // z Function(koto_parser::Function { args: vec![3], local_count: 1, @@ -2241,9 +2240,9 @@ f 42"; target: 2, expression: 5, }, - Id(constant(1)), // x + Id(1), // x NamedCall { - id: constant(2), // y + id: 2, // y args: vec![7], }, Block(vec![6, 8]), @@ -2261,7 +2260,7 @@ f 42"; }, SmallInt(42), NamedCall { - id: constant(0), // f + id: 0, // f args: vec![12], }, MainBlock { @@ -2284,14 +2283,14 @@ f 42"; check_ast( source, &[ - Id(constant(1)), - Id(constant(1)), + Id(1), + Id(1), UnaryOp { op: AstUnaryOp::Negate, value: 1, }, NamedCall { - id: constant(0), // f + id: 0, // f args: vec![0, 2], }, MainBlock { @@ -2309,7 +2308,7 @@ f 42"; check_ast( source, &[ - Id(constant(1)), + Id(1), SmallInt(1), BinaryOp { op: AstBinaryOp::Subtract, @@ -2317,7 +2316,7 @@ f 42"; rhs: 1, }, NamedCall { - id: constant(0), // f + id: 0, // f args: vec![2], }, MainBlock { @@ -2349,9 +2348,9 @@ f(x, check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), - Id(constant(1)), - Id(constant(1)), + Id(0), + Id(1), + Id(1), UnaryOp { op: AstUnaryOp::Negate, value: 2, @@ -2382,10 +2381,10 @@ foo check_ast( source, &[ - Id(constant(1)), - Id(constant(2)), + Id(1), + Id(2), NamedCall { - id: constant(0), // foo + id: 0, // foo args: vec![0, 1], }, MainBlock { @@ -2406,9 +2405,9 @@ foo check_ast( source, &[ - Id(constant(1)), - Id(constant(2)), - Id(constant(2)), + Id(1), + Id(2), + Id(2), Function(koto_parser::Function { args: vec![1], local_count: 1, @@ -2418,7 +2417,7 @@ foo is_generator: false, }), NamedCall { - id: constant(0), // foo + id: 0, // foo args: vec![0, 3], }, MainBlock { @@ -2439,14 +2438,14 @@ f x"; check_ast( source, &[ - Id(constant(1)), + Id(1), NamedCall { - id: constant(0), + id: 0, args: vec![0], }, - Id(constant(1)), + Id(1), NamedCall { - id: constant(0), + id: 0, args: vec![2], }, // 5 MainBlock { @@ -2464,17 +2463,17 @@ f x"; check_ast( source, &[ - Id(constant(0)), // f - Id(constant(1)), // x - Id(constant(1)), // x + Id(0), // f + Id(1), // x + Id(1), // x NamedCall { - id: constant(0), // f + id: 0, // f args: vec![2], }, Function(koto_parser::Function { args: vec![1], local_count: 1, - accessed_non_locals: vec![constant(0)], + accessed_non_locals: vec![0], body: 3, is_variadic: false, is_generator: false, @@ -2498,33 +2497,33 @@ f x"; check_ast( source, &[ - Id(constant(0)), // f - Id(constant(1)), // g - Id(constant(2)), // x - Id(constant(2)), + Id(0), // f + Id(1), // g + Id(2), // x + Id(2), NamedCall { - id: constant(0), + id: 0, args: vec![3], }, Function(koto_parser::Function { args: vec![2], local_count: 1, - accessed_non_locals: vec![constant(0)], + accessed_non_locals: vec![0], body: 4, is_variadic: false, is_generator: false, }), // 5 Nested(5), - Id(constant(2)), // x - Id(constant(2)), // x + Id(2), // x + Id(2), // x NamedCall { - id: constant(1), // g + id: 1, // g args: vec![8], }, Function(koto_parser::Function { args: vec![7], local_count: 1, - accessed_non_locals: vec![constant(1)], + accessed_non_locals: vec![1], body: 9, is_variadic: false, is_generator: false, @@ -2550,18 +2549,18 @@ f x"; check_ast( source, &[ - Id(constant(1)), // x + Id(1), // x NamedCall { - id: constant(0), // f + id: 0, // f args: vec![0], }, - Id(constant(2)), // g + Id(2), // g BinaryOp { op: AstBinaryOp::Pipe, lhs: 1, rhs: 2, }, - Id(constant(3)), // h + Id(3), // h BinaryOp { op: AstBinaryOp::Pipe, lhs: 3, @@ -2591,8 +2590,8 @@ foo.bar x check_ast( source, &[ - Id(constant(0)), // foo - Id(constant(2)), // x + Id(0), // foo + Id(2), // x Lookup(( LookupNode::Call { args: vec![1], @@ -2600,15 +2599,15 @@ foo.bar x }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(2))), + Lookup((LookupNode::Id(1), Some(2))), Lookup((LookupNode::Root(0), Some(3))), - Id(constant(3)), // 5 - y + Id(3), // 5 - y BinaryOp { op: AstBinaryOp::Pipe, lhs: 4, rhs: 5, }, - Id(constant(4)), // z + Id(4), // z BinaryOp { op: AstBinaryOp::Pipe, lhs: 6, @@ -2636,11 +2635,11 @@ foo.bar x source, &[ SmallInt(42), - Id(constant(2)), // x - Self_, // self - Lookup((LookupNode::Id(constant(0)), None)), + Id(2), // x + Self_, // self + Lookup((LookupNode::Id(0), None)), Lookup((LookupNode::Root(2), Some(3))), - Id(constant(2)), // 5 + Id(2), // 5 Assign { target: 4, expression: 5, @@ -2653,10 +2652,7 @@ foo.bar x is_variadic: false, is_generator: false, }), - Map(vec![ - (MapKey::Id(constant(0)), Some(0)), - (MapKey::Id(constant(1)), Some(7)), - ]), + Map(vec![(MapKey::Id(0), Some(0)), (MapKey::Id(1), Some(7))]), MainBlock { body: vec![8], local_count: 0, @@ -2680,17 +2676,14 @@ f = || check_ast( source, &[ - Id(constant(0)), - Id(constant(2)), + Id(0), + Id(2), SmallInt(0), - Map(vec![ - (MapKey::Id(constant(1)), Some(1)), - (MapKey::Id(constant(3)), Some(2)), - ]), + Map(vec![(MapKey::Id(1), Some(1)), (MapKey::Id(3), Some(2))]), Function(koto_parser::Function { args: vec![], local_count: 0, - accessed_non_locals: vec![constant(2)], + accessed_non_locals: vec![2], body: 3, is_variadic: false, is_generator: false, @@ -2724,20 +2717,20 @@ f = || check_ast( source, &[ - Id(constant(0)), // f - Id(constant(3)), // x + Id(0), // f + Id(3), // x Map(vec![ - (MapKey::Id(constant(2)), Some(1)), // bar: x + (MapKey::Id(2), Some(1)), // bar: x ]), SmallInt(0), Map(vec![ - (MapKey::Id(constant(1)), Some(2)), // foo: ... - (MapKey::Id(constant(4)), Some(3)), // baz: 0 + (MapKey::Id(1), Some(2)), // foo: ... + (MapKey::Id(4), Some(3)), // baz: 0 ]), Function(koto_parser::Function { args: vec![], local_count: 0, - accessed_non_locals: vec![constant(3)], + accessed_non_locals: vec![3], body: 4, is_variadic: false, is_generator: false, @@ -2771,13 +2764,13 @@ f()"; check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(42), - Id(constant(3)), // x + Id(3), // x Self_, - Lookup((LookupNode::Id(constant(1)), None)), + Lookup((LookupNode::Id(1), None)), Lookup((LookupNode::Root(3), Some(4))), // 5 - Id(constant(3)), + Id(3), Assign { target: 5, expression: 6, @@ -2790,10 +2783,7 @@ f()"; is_variadic: false, is_generator: false, }), // 10 - Map(vec![ - (MapKey::Id(constant(1)), Some(1)), - (MapKey::Id(constant(2)), Some(8)), - ]), + Map(vec![(MapKey::Id(1), Some(1)), (MapKey::Id(2), Some(8))]), Function(koto_parser::Function { args: vec![], local_count: 0, @@ -2806,7 +2796,7 @@ f()"; target: 0, expression: 10, }, - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -2842,26 +2832,26 @@ f = |n| check_ast( source, &[ - Id(constant(0)), // f - Id(constant(1)), // n - Id(constant(2)), // f2 - Id(constant(1)), - Id(constant(3)), // i - SmallInt(0), // ast 5 + Id(0), // f + Id(1), // n + Id(2), // f2 + Id(1), + Id(3), // i + SmallInt(0), // ast 5 SmallInt(1), Range { start: 5, end: 6, inclusive: false, }, - Id(constant(3)), // i - Id(constant(1)), + Id(3), // i + Id(1), BinaryOp { op: AstBinaryOp::Equal, lhs: 8, rhs: 9, }, // ast 10 - Id(constant(3)), + Id(3), Return(Some(11)), If(AstIf { condition: 10, @@ -2886,7 +2876,7 @@ f = |n| target: 2, expression: 15, }, - Id(constant(2)), + Id(2), Block(vec![16, 17]), Function(koto_parser::Function { args: vec![1], @@ -2924,8 +2914,8 @@ f = |n| check_ast( source, &[ - Id(constant(0)), - Id(constant(0)), + Id(0), + Id(0), SmallInt(1), BinaryOp { op: AstBinaryOp::Add, @@ -2936,12 +2926,12 @@ f = |n| target: 0, expression: 3, }, - Id(constant(0)), // 5 + Id(0), // 5 Block(vec![4, 5]), Function(koto_parser::Function { args: vec![], local_count: 1, - accessed_non_locals: vec![constant(0)], // initial read of x via capture + accessed_non_locals: vec![0], // initial read of x via capture body: 6, is_variadic: false, is_generator: false, @@ -2964,15 +2954,15 @@ f = |n| check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), SmallInt(1), Assign { target: 1, expression: 2, }, Nested(3), - Id(constant(1)), // 5 + Id(1), // 5 Tuple(vec![4, 5]), Assign { target: 0, @@ -3003,7 +2993,7 @@ f = |n| check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(1), BinaryOp { op: AstBinaryOp::AddAssign, @@ -3013,7 +3003,7 @@ f = |n| Function(koto_parser::Function { args: vec![], local_count: 0, - accessed_non_locals: vec![constant(0)], // initial read of x via capture + accessed_non_locals: vec![0], // initial read of x via capture body: 2, is_variadic: false, is_generator: false, @@ -3035,7 +3025,7 @@ y z"; check_ast( source, &[ - Id(constant(0)), // z + Id(0), // z SmallInt(0), SmallInt(20), Range { @@ -3044,8 +3034,8 @@ y z"; inclusive: false, }, List(vec![3]), - Id(constant(2)), // 5 - x - Id(constant(2)), + Id(2), // 5 - x + Id(2), SmallInt(1), BinaryOp { op: AstBinaryOp::Greater, @@ -3061,16 +3051,16 @@ y z"; is_generator: false, }), NamedCall { - id: constant(1), // y + id: 1, // y args: vec![4, 9], }, // 10 Assign { target: 0, expression: 10, }, - Id(constant(0)), // z + Id(0), // z NamedCall { - id: constant(1), // y + id: 1, // y args: vec![12], }, MainBlock { @@ -3145,7 +3135,7 @@ y z"; source, &[ SmallInt(42), - Map(vec![(MapKey::Id(constant(0)), Some(0))]), + Map(vec![(MapKey::Id(0), Some(0))]), Yield(1), Function(koto_parser::Function { args: vec![], @@ -3184,15 +3174,15 @@ y z"; check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), // a + Id(0), // a Wildcard(None), - Ellipsis(Some(constant(1))), // others - Id(constant(2)), // c - Wildcard(Some(constant(3))), // d - Tuple(vec![2, 3, 4]), // ast index 5 + Ellipsis(Some(1)), // others + Id(2), // c + Wildcard(Some(3)), // d + Tuple(vec![2, 3, 4]), // ast index 5 Tuple(vec![1, 5]), - Wildcard(Some(constant(4))), // e - Id(constant(0)), + Wildcard(Some(4)), // e + Id(0), Function(koto_parser::Function { args: vec![0, 6, 7], local_count: 3, @@ -3236,15 +3226,15 @@ y z"; check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), // a + Id(0), // a Wildcard(None), - Id(constant(1)), // c - Wildcard(Some(constant(2))), // d - Ellipsis(None), // ... - List(vec![2, 3, 4]), // ast index 5 + Id(1), // c + Wildcard(Some(2)), // d + Ellipsis(None), // ... + List(vec![2, 3, 4]), // ast index 5 List(vec![1, 5]), - Id(constant(3)), // e - Id(constant(0)), + Id(3), // e + Id(0), Function(koto_parser::Function { args: vec![0, 6, 7], local_count: 3, @@ -3278,11 +3268,11 @@ y z"; check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(0), Lookup((LookupNode::Index(1), None)), Lookup((LookupNode::Root(0), Some(2))), - Id(constant(0)), + Id(0), SmallInt(1), // 5 Lookup((LookupNode::Index(5), None)), Lookup((LookupNode::Root(4), Some(6))), @@ -3305,7 +3295,7 @@ y z"; check_ast( source, &[ - Id(constant(0)), + Id(0), RangeFull, Lookup((LookupNode::Index(1), None)), Lookup((LookupNode::Root(0), Some(2))), @@ -3324,7 +3314,7 @@ y z"; check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(3), RangeTo { end: 1, @@ -3347,7 +3337,7 @@ y z"; check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(10), RangeFrom { start: 1 }, SmallInt(0), @@ -3369,8 +3359,8 @@ y z"; check_ast( source, &[ - Id(constant(0)), - Lookup((LookupNode::Id(constant(1)), None)), + Id(0), + Lookup((LookupNode::Id(1), None)), Lookup((LookupNode::Root(0), Some(1))), MainBlock { body: vec![2], @@ -3387,7 +3377,7 @@ y z"; check_ast( source, &[ - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -3395,7 +3385,7 @@ y z"; }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(1))), + Lookup((LookupNode::Id(1), Some(1))), Lookup((LookupNode::Root(0), Some(2))), MainBlock { body: vec![3], @@ -3412,7 +3402,7 @@ y z"; check_ast( source, &[ - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -3420,7 +3410,7 @@ y z"; }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(1))), + Lookup((LookupNode::Id(1), Some(1))), Lookup((LookupNode::Root(0), Some(2))), SmallInt(1), BinaryOp { @@ -3445,11 +3435,11 @@ x.bar()."baz" = 1 check_ast( source, &[ - Id(constant(0)), + Id(0), Lookup(( LookupNode::Str(AstString { quotation_mark: QuotationMark::Double, - contents: StringContents::Literal(constant(2)), + contents: StringContents::Literal(2), }), None, )), @@ -3460,7 +3450,7 @@ x.bar()."baz" = 1 }, Some(1), )), - Lookup((LookupNode::Id(constant(1)), Some(2))), + Lookup((LookupNode::Id(1), Some(2))), Lookup((LookupNode::Root(0), Some(3))), SmallInt(1), // 5 Assign { @@ -3486,7 +3476,7 @@ x.bar()."baz" = 1 check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(42), Lookup(( LookupNode::Call { @@ -3495,7 +3485,7 @@ x.bar()."baz" = 1 }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(2))), + Lookup((LookupNode::Id(1), Some(2))), Lookup((LookupNode::Root(0), Some(3))), MainBlock { body: vec![4], @@ -3515,7 +3505,7 @@ x.foo check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(42), Lookup(( LookupNode::Call { @@ -3524,7 +3514,7 @@ x.foo }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(2))), + Lookup((LookupNode::Id(1), Some(2))), Lookup((LookupNode::Root(0), Some(3))), MainBlock { body: vec![4], @@ -3544,10 +3534,10 @@ x.takes_a_map check_ast( source, &[ - Id(constant(0)), // x + Id(0), // x SmallInt(42), Map(vec![ - (MapKey::Id(constant(2)), Some(1)), // foo: 42 + (MapKey::Id(2), Some(1)), // foo: 42 ]), Lookup(( LookupNode::Call { @@ -3556,8 +3546,8 @@ x.takes_a_map }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(3))), // takes_a_map - Lookup((LookupNode::Root(0), Some(4))), // @5 + Lookup((LookupNode::Id(1), Some(3))), // takes_a_map + Lookup((LookupNode::Root(0), Some(4))), // @5 MainBlock { body: vec![5], local_count: 0, @@ -3594,11 +3584,11 @@ x.takes_a_map check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), - Lookup((LookupNode::Id(constant(1)), None)), + Id(0), + Lookup((LookupNode::Id(1), None)), Lookup((LookupNode::Root(0), Some(1))), - Id(constant(0)), - Lookup((LookupNode::Id(constant(2)), None)), + Id(0), + Lookup((LookupNode::Id(2), None)), Lookup((LookupNode::Root(3), Some(4))), // 5 List(vec![2, 5]), MainBlock { @@ -3620,13 +3610,13 @@ x.takes_a_map check_ast( source, &[ - Id(constant(1)), // x + Id(1), // x NamedCall { - id: constant(0), // f + id: 0, // f args: vec![0], }, Nested(1), - Lookup((LookupNode::Id(constant(2)), None)), + Lookup((LookupNode::Id(2), None)), Lookup((LookupNode::Root(2), Some(3))), MainBlock { body: vec![4], @@ -3643,9 +3633,9 @@ x.takes_a_map check_ast( source, &[ - Id(constant(1)), // x + Id(1), // x NamedCall { - id: constant(0), // f + id: 0, // f args: vec![0], }, Nested(1), @@ -3667,13 +3657,13 @@ x.takes_a_map check_ast( source, &[ - Id(constant(1)), // x + Id(1), // x NamedCall { - id: constant(0), // f + id: 0, // f args: vec![0], }, Nested(1), - Id(constant(2)), // y + Id(2), // y Lookup(( LookupNode::Call { args: vec![3], @@ -3705,7 +3695,7 @@ x.takes_a_map }, None, )), - Lookup((LookupNode::Id(constant(0)), Some(1))), + Lookup((LookupNode::Id(0), Some(1))), Lookup((LookupNode::Root(0), Some(2))), MainBlock { body: vec![3], @@ -3723,7 +3713,7 @@ x.takes_a_map source, &[ string_literal(0, QuotationMark::Single), - Id(constant(2)), + Id(2), Lookup(( LookupNode::Call { args: vec![1], @@ -3731,7 +3721,7 @@ x.takes_a_map }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(2))), + Lookup((LookupNode::Id(1), Some(2))), Lookup((LookupNode::Root(0), Some(3))), MainBlock { body: vec![4], @@ -3765,11 +3755,11 @@ x = ( 0 check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), SmallInt(0), SmallInt(1), Tuple(vec![1, 2]), - Id(constant(2)), + Id(2), Lookup(( LookupNode::Call { args: vec![4], @@ -3777,7 +3767,7 @@ x = ( 0 }, None, )), // 5 - Lookup((LookupNode::Id(constant(1)), Some(5))), + Lookup((LookupNode::Id(1), Some(5))), Lookup((LookupNode::Root(3), Some(6))), Assign { target: 0, @@ -3814,11 +3804,11 @@ x = [ 0 check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), + Id(0), SmallInt(0), SmallInt(1), List(vec![1, 2]), - Id(constant(2)), + Id(2), Lookup(( LookupNode::Call { args: vec![4], @@ -3826,7 +3816,7 @@ x = [ 0 }, None, )), // 5 - Lookup((LookupNode::Id(constant(1)), Some(5))), + Lookup((LookupNode::Id(1), Some(5))), Lookup((LookupNode::Root(3), Some(6))), Assign { target: 0, @@ -3869,11 +3859,8 @@ x = { y check_ast_for_equivalent_sources( &sources, &[ - Id(constant(0)), - Map(vec![ - (MapKey::Id(constant(1)), None), - (MapKey::Id(constant(2)), None), - ]), + Id(0), + Map(vec![(MapKey::Id(1), None), (MapKey::Id(2), None)]), Lookup(( LookupNode::Call { args: vec![], @@ -3881,7 +3868,7 @@ x = { y }, None, )), - Lookup((LookupNode::Id(constant(3)), Some(2))), + Lookup((LookupNode::Id(3), Some(2))), Lookup((LookupNode::Root(1), Some(3))), Assign { target: 0, @@ -3922,7 +3909,7 @@ x = { y }, None, )), - Lookup((LookupNode::Id(constant(0)), Some(4))), // 5 + Lookup((LookupNode::Id(0), Some(4))), // 5 Lookup((LookupNode::Root(3), Some(5))), MainBlock { body: vec![6], @@ -3956,7 +3943,7 @@ x = { y }, None, )), - Lookup((LookupNode::Id(constant(0)), Some(3))), + Lookup((LookupNode::Id(0), Some(3))), Lookup((LookupNode::Root(2), Some(4))), // 5 MainBlock { body: vec![5], @@ -3973,9 +3960,9 @@ x = { y check_ast( source, &[ - Id(constant(0)), + Id(0), Nested(0), - Id(constant(2)), + Id(2), Lookup(( LookupNode::Call { args: vec![2], @@ -3983,7 +3970,7 @@ x = { y }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(3))), + Lookup((LookupNode::Id(1), Some(3))), Lookup((LookupNode::Root(1), Some(4))), // 5 Nested(5), MainBlock { @@ -4009,7 +3996,7 @@ x.iter() check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(1), Lookup(( LookupNode::Call { @@ -4018,7 +4005,7 @@ x.iter() }, None, )), - Lookup((LookupNode::Id(constant(3)), Some(2))), + Lookup((LookupNode::Id(3), Some(2))), Lookup(( LookupNode::Call { args: vec![1], @@ -4026,7 +4013,7 @@ x.iter() }, Some(3), )), - Lookup((LookupNode::Id(constant(2)), Some(4))), // 5 + Lookup((LookupNode::Id(2), Some(4))), // 5 Lookup(( LookupNode::Call { args: vec![], @@ -4034,7 +4021,7 @@ x.iter() }, Some(5), )), - Lookup((LookupNode::Id(constant(1)), Some(6))), + Lookup((LookupNode::Id(1), Some(6))), Lookup((LookupNode::Root(0), Some(7))), MainBlock { body: vec![8], @@ -4060,11 +4047,11 @@ foo.bar check_ast( source, &[ - Id(constant(0)), - Lookup((LookupNode::Id(constant(1)), None)), + Id(0), + Lookup((LookupNode::Id(1), None)), Lookup((LookupNode::Root(0), Some(1))), - Id(constant(0)), - Lookup((LookupNode::Id(constant(2)), None)), + Id(0), + Lookup((LookupNode::Id(2), None)), Lookup((LookupNode::Root(3), Some(4))), // 5 BinaryOp { op: AstBinaryOp::Or, @@ -4133,21 +4120,21 @@ assert_eq x, "hello" op: AstUnaryOp::Not, value: 0, }, - Id(constant(0)), - Id(constant(0)), + Id(0), + Id(0), BinaryOp { op: AstBinaryOp::Add, lhs: 2, rhs: 3, }, Debug { - expression_string: constant(1), + expression_string: 1, expression: 4, }, // 5 - Id(constant(0)), // x + Id(0), // x string_literal(3, QuotationMark::Double), NamedCall { - id: constant(2), + id: 2, args: vec![6, 7], }, MainBlock { @@ -4168,11 +4155,14 @@ assert_eq x, "hello" mod import { use super::*; - fn import_id(id: u8) -> ImportItemNode { - ImportItemNode::Id(constant(id)) + fn import_id(id: ConstantIndex) -> ImportItemNode { + ImportItemNode::Id(id) } - fn import_string(literal_index: u8, quotation_mark: QuotationMark) -> ImportItemNode { + fn import_string( + literal_index: ConstantIndex, + quotation_mark: QuotationMark, + ) -> ImportItemNode { ImportItemNode::Str(simple_string(literal_index, quotation_mark)) } @@ -4220,7 +4210,7 @@ assert_eq x, "hello" check_ast( source, &[ - Id(constant(0)), + Id(0), Import { from: vec![import_id(1)], items: vec![import_id(2)], @@ -4354,7 +4344,7 @@ catch e check_ast( source, &[ - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -4363,10 +4353,10 @@ catch e None, )), Lookup((LookupNode::Root(0), Some(1))), - Id(constant(1)), // e - Id(constant(1)), + Id(1), // e + Id(1), Debug { - expression_string: constant(1), + expression_string: 1, expression: 4, }, // ast 5 Try(AstTry { @@ -4395,9 +4385,9 @@ catch _ check_ast( source, &[ - Id(constant(0)), + Id(0), Wildcard(None), - Id(constant(1)), + Id(1), Try(AstTry { try_block: 0, catch_arg: 1, @@ -4424,9 +4414,9 @@ catch _error check_ast( source, &[ - Id(constant(0)), // x - Wildcard(Some(constant(1))), // error - Id(constant(2)), // y + Id(0), // x + Wildcard(Some(1)), // error + Id(2), // y Try(AstTry { try_block: 0, catch_arg: 1, @@ -4459,7 +4449,7 @@ finally check_ast( source, &[ - Id(constant(0)), + Id(0), Lookup(( LookupNode::Call { args: vec![], @@ -4468,10 +4458,10 @@ finally None, )), Lookup((LookupNode::Root(0), Some(1))), - Id(constant(1)), // e - Id(constant(1)), + Id(1), // e + Id(1), Debug { - expression_string: constant(1), + expression_string: 1, expression: 4, }, // ast 5 SmallInt(0), @@ -4496,7 +4486,7 @@ finally check_ast( source, &[ - Id(constant(0)), + Id(0), Throw(0), MainBlock { body: vec![1], @@ -4534,12 +4524,9 @@ throw check_ast( source, &[ - Id(constant(1)), + Id(1), string_literal(3, QuotationMark::Double), - Map(vec![ - (MapKey::Id(constant(0)), Some(0)), - (MapKey::Id(constant(2)), Some(1)), - ]), + Map(vec![(MapKey::Id(0), Some(0)), (MapKey::Id(2), Some(1))]), Throw(2), MainBlock { body: vec![3], @@ -4569,12 +4556,12 @@ x = match y check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), SmallInt(0), SmallInt(1), SmallInt(42), - Id(constant(2)), // 5 + Id(2), // 5 SmallInt(-1), Match { expression: 1, @@ -4614,7 +4601,7 @@ match x check_ast( source, &[ - Id(constant(0)), + Id(0), string_literal(1, QuotationMark::Single), SmallInt(99), string_literal(2, QuotationMark::Double), @@ -4659,20 +4646,20 @@ match (x, y, z) check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), - Id(constant(2)), + Id(0), + Id(1), + Id(2), Tuple(vec![0, 1, 2]), SmallInt(0), - Id(constant(3)), // 5 + Id(3), // 5 Wildcard(None), Tuple(vec![4, 5, 6]), - Id(constant(3)), + Id(3), Wildcard(None), SmallInt(0), // 10 - Id(constant(4)), + Id(4), Tuple(vec![10, 11]), - Wildcard(Some(constant(5))), + Wildcard(Some(5)), Tuple(vec![9, 12, 13]), SmallInt(0), // 15 Match { @@ -4716,7 +4703,7 @@ match x check_ast( source, &[ - Id(constant(0)), + Id(0), Ellipsis(None), SmallInt(0), Tuple(vec![1, 2]), @@ -4759,15 +4746,15 @@ match y check_ast( source, &[ - Id(constant(0)), - Ellipsis(Some(constant(1))), + Id(0), + Ellipsis(Some(1)), SmallInt(0), SmallInt(1), Tuple(vec![1, 2, 3]), SmallInt(0), // 5 SmallInt(1), SmallInt(0), - Ellipsis(Some(constant(2))), + Ellipsis(Some(2)), Tuple(vec![6, 7, 8]), SmallInt(1), // 10 Match { @@ -4811,9 +4798,9 @@ match x check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), - Id(constant(1)), + Id(0), + Id(1), + Id(1), SmallInt(5), BinaryOp { op: AstBinaryOp::Greater, @@ -4821,8 +4808,8 @@ match x rhs: 3, }, SmallInt(0), // 5 - Id(constant(1)), - Id(constant(1)), + Id(1), + Id(1), SmallInt(10), BinaryOp { op: AstBinaryOp::Less, @@ -4830,7 +4817,7 @@ match x rhs: 8, }, SmallInt(1), // 10 - Id(constant(1)), + Id(1), SmallInt(-1), Match { expression: 0, @@ -4873,8 +4860,8 @@ match x, y check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), + Id(0), + Id(1), TempTuple(vec![0, 1]), SmallInt(0), SmallInt(1), @@ -4882,12 +4869,12 @@ match x, y SmallInt(2), SmallInt(3), TempTuple(vec![6, 7]), - Id(constant(2)), + Id(2), SmallInt(0), // 10 - Id(constant(3)), + Id(3), Null, TempTuple(vec![11, 12]), - Id(constant(3)), + Id(3), SmallInt(0), // 15 Match { expression: 2, @@ -4933,7 +4920,7 @@ match x.foo 42 check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(42), Lookup(( LookupNode::Call { @@ -4942,7 +4929,7 @@ match x.foo 42 }, None, )), - Lookup((LookupNode::Id(constant(1)), Some(2))), + Lookup((LookupNode::Id(1), Some(2))), Lookup((LookupNode::Root(0), Some(3))), Null, // 5 SmallInt(0), @@ -4980,9 +4967,9 @@ match x check_ast( source, &[ - Id(constant(0)), - Id(constant(1)), - Lookup((LookupNode::Id(constant(2)), None)), + Id(0), + Id(1), + Lookup((LookupNode::Id(2), None)), Lookup((LookupNode::Root(1), Some(2))), SmallInt(0), Match { @@ -5012,7 +4999,7 @@ match x check_ast( source, &[ - Id(constant(0)), + Id(0), SmallInt(0), SmallInt(1), string_literal(1, QuotationMark::Single), @@ -5060,15 +5047,15 @@ switch rhs: 1, }, SmallInt(0), - Id(constant(0)), - Id(constant(1)), // 5 + Id(0), + Id(1), // 5 BinaryOp { op: AstBinaryOp::Greater, lhs: 4, rhs: 5, }, SmallInt(1), - Id(constant(0)), + Id(0), Switch(vec![ SwitchArm { condition: Some(2), @@ -5104,9 +5091,9 @@ switch &[ BoolTrue, SmallInt(1), - Id(constant(0)), + Id(0), Debug { - expression_string: constant(0), + expression_string: 0, expression: 2, }, Switch(vec![ From ea1cf207b2763f8fa19b1fd52e064e90001f1546 Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Tue, 16 Jan 2024 11:46:11 +0100 Subject: [PATCH 7/8] Ensure that raw strings can be used in the same places as normal strings --- crates/lexer/src/lexer.rs | 148 +++++++++++++++++----------- crates/lexer/src/lib.rs | 2 +- crates/parser/src/lib.rs | 2 +- crates/parser/src/node.rs | 12 +-- crates/parser/src/parser.rs | 62 +++++------- crates/parser/tests/parser_tests.rs | 79 +++++++-------- crates/runtime/tests/vm_tests.rs | 3 +- 7 files changed, 161 insertions(+), 147 deletions(-) diff --git a/crates/lexer/src/lexer.rs b/crates/lexer/src/lexer.rs index b41578757..666b7c201 100644 --- a/crates/lexer/src/lexer.rs +++ b/crates/lexer/src/lexer.rs @@ -16,10 +16,8 @@ pub enum Token { Id, Wildcard, - SingleQuote, - DoubleQuote, - RawStringStart, - RawStringEnd, + StringStart { quote: StringQuote, raw: bool }, + StringEnd, StringLiteral, // Symbols @@ -108,15 +106,31 @@ impl Token { } } +/// The type of quotation mark used in string delimiters +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[allow(missing_docs)] +pub enum StringQuote { + Double, + Single, +} + +impl TryFrom for StringQuote { + type Error = (); + + fn try_from(c: char) -> Result { + match c { + '"' => Ok(Self::Double), + '\'' => Ok(Self::Single), + _ => Err(()), + } + } +} + // Used to keep track of different lexing modes while working through a string #[derive(Clone)] enum StringMode { - // Inside a string literal, expecting an end quote - Literal(char), - // The start of a raw string has just been encountered, raw string contents follow - RawStart(char), - // The contents of the raw string have just been consumed, the end delimiter should follow - RawEnd(char), + // Inside a string literal, expecting an end quote or the start of a template expression + Literal(StringQuote), // Just after a $ symbol, either an id or a '{' will follow TemplateStart, // Inside a string template, e.g. '${...}' @@ -124,6 +138,10 @@ enum StringMode { // Inside an inline map in a template expression, e.g. '${foo({bar: 42})}' // A closing '}' will end the map rather than the template expression. TemplateExpressionInlineMap, + // The start of a raw string has just been consumed, raw string contents follow + RawStart(StringQuote), + // The contents of the raw string have just been consumed, the end delimiter should follow + RawEnd(StringQuote), } // Separates the input source into Tokens @@ -297,7 +315,7 @@ impl<'a> TokenLexer<'a> { fn consume_string_literal(&mut self, mut chars: Peekable) -> Token { use Token::*; - let string_quote = match self.string_mode_stack.last() { + let end_quote = match self.string_mode_stack.last() { Some(StringMode::Literal(quote)) => *quote, _ => return Error, }; @@ -307,7 +325,7 @@ impl<'a> TokenLexer<'a> { while let Some(c) = chars.peek().cloned() { match c { - _ if c == string_quote => { + _ if c.try_into() == Ok(end_quote) => { self.advance_to_position(string_bytes, position); return StringLiteral; } @@ -323,7 +341,7 @@ impl<'a> TokenLexer<'a> { let skip_next_char = match chars.peek() { Some('$') => true, Some('\\') => true, - Some(c) if *c == string_quote => true, + Some(&c) if c.try_into() == Ok(end_quote) => true, _ => false, }; @@ -362,7 +380,7 @@ impl<'a> TokenLexer<'a> { fn consume_raw_string_contents( &mut self, mut chars: Peekable, - end_quote: char, + end_quote: StringQuote, ) -> Token { let mut string_bytes = 0; @@ -370,7 +388,7 @@ impl<'a> TokenLexer<'a> { while let Some(c) = chars.next() { match c { - _ if c == end_quote => { + _ if c.try_into() == Ok(end_quote) => { self.advance_to_position(string_bytes, position); self.string_mode_stack.pop(); // StringMode::RawStart self.string_mode_stack.push(StringMode::RawEnd(end_quote)); @@ -399,12 +417,16 @@ impl<'a> TokenLexer<'a> { Token::Error } - fn consume_raw_string_end(&mut self, mut chars: Peekable, end_quote: char) -> Token { + fn consume_raw_string_end( + &mut self, + mut chars: Peekable, + end_quote: StringQuote, + ) -> Token { match chars.next() { - Some(c) if c == end_quote => { + Some(c) if c.try_into() == Ok(end_quote) => { self.string_mode_stack.pop(); // StringMode::RawEnd self.advance_line(1); - Token::RawStringEnd + Token::StringEnd } _ => Token::Error, } @@ -507,10 +529,12 @@ impl<'a> TokenLexer<'a> { } "r" => { // look ahead and determine if this is the start of a raw string - if let Some(c @ '\'' | c @ '"') = chars.peek() { - self.advance_line(2); - self.string_mode_stack.push(StringMode::RawStart(*c)); - return RawStringStart; + if let Some(&c) = chars.peek() { + if let Ok(quote) = c.try_into() { + self.advance_line(2); + self.string_mode_stack.push(StringMode::RawStart(quote)); + return StringStart { quote, raw: true }; + } } } _ => {} @@ -645,15 +669,10 @@ impl<'a> TokenLexer<'a> { let result = match string_mode { Some(StringMode::Literal(quote)) => match next_char { - '"' if quote == '"' => { - self.advance_line(1); - self.string_mode_stack.pop(); - DoubleQuote - } - '\'' if quote == '\'' => { + c if c.try_into() == Ok(quote) => { self.advance_line(1); self.string_mode_stack.pop(); - SingleQuote + StringEnd } '$' => { self.advance_line(1); @@ -695,13 +714,21 @@ impl<'a> TokenLexer<'a> { '#' => self.consume_comment(chars), '"' => { self.advance_line(1); - self.string_mode_stack.push(StringMode::Literal('"')); - DoubleQuote + self.string_mode_stack + .push(StringMode::Literal(StringQuote::Double)); + StringStart { + quote: StringQuote::Double, + raw: false, + } } '\'' => { self.advance_line(1); - self.string_mode_stack.push(StringMode::Literal('\'')); - SingleQuote + self.string_mode_stack + .push(StringMode::Literal(StringQuote::Single)); + StringStart { + quote: StringQuote::Single, + raw: false, + } } '0'..='9' => self.consume_number(chars), c if is_id_start(c) => self.consume_id_or_keyword(chars), @@ -992,6 +1019,10 @@ mod tests { assert_eq!(lex.next(), None); } + fn string_start(quote: StringQuote, raw: bool) -> Token { + Token::StringStart { quote, raw } + } + #[test] fn ids() { let input = "id id1 id_2 i_d_3 ïd_ƒôûr if iff _ _foo"; @@ -1070,32 +1101,33 @@ false # "\\" "#; + use StringQuote::*; check_lexer_output( input, &[ (NewLine, None, 1), - (DoubleQuote, None, 2), + (string_start(Double, false), None, 2), (StringLiteral, Some("hello, world!"), 2), - (DoubleQuote, None, 2), + (StringEnd, None, 2), (NewLine, None, 2), - (DoubleQuote, None, 3), + (string_start(Double, false), None, 3), (StringLiteral, Some(r#"escaped \\\"\n\$ string"#), 3), - (DoubleQuote, None, 3), + (StringEnd, None, 3), (NewLine, None, 3), - (DoubleQuote, None, 4), + (string_start(Double, false), None, 4), (StringLiteral, Some(r#"double-\"quoted\" 'string'"#), 4), - (DoubleQuote, None, 4), + (StringEnd, None, 4), (NewLine, None, 4), - (SingleQuote, None, 5), + (string_start(Single, false), None, 5), (StringLiteral, Some(r#"single-\'quoted\' "string""#), 5), - (SingleQuote, None, 5), + (StringEnd, None, 5), (NewLine, None, 5), - (DoubleQuote, None, 6), - (DoubleQuote, None, 6), + (string_start(Double, false), None, 6), + (StringEnd, None, 6), (NewLine, None, 6), - (DoubleQuote, None, 7), + (string_start(Double, false), None, 7), (StringLiteral, Some(r"\\"), 7), - (DoubleQuote, None, 7), + (StringEnd, None, 7), (NewLine, None, 7), ], ); @@ -1111,9 +1143,9 @@ r'$foo' input, &[ (NewLine, None, 1), - (RawStringStart, None, 2), + (string_start(StringQuote::Single, true), None, 2), (StringLiteral, Some("$foo"), 2), - (RawStringEnd, None, 2), + (StringEnd, None, 2), (NewLine, None, 2), ], ); @@ -1125,23 +1157,24 @@ r'$foo' "hello $name, how are you?" '$foo$bar' "#; + use StringQuote::*; check_lexer_output( input, &[ (NewLine, None, 1), - (DoubleQuote, None, 2), + (string_start(Double, false), None, 2), (StringLiteral, Some("hello "), 2), (Dollar, None, 2), (Id, Some("name"), 2), (StringLiteral, Some(", how are you?"), 2), - (DoubleQuote, None, 2), + (StringEnd, None, 2), (NewLine, None, 2), - (SingleQuote, None, 3), + (string_start(Single, false), None, 3), (Dollar, None, 3), (Id, Some("foo"), 3), (Dollar, None, 3), (Id, Some("bar"), 3), - (SingleQuote, None, 3), + (StringEnd, None, 3), (NewLine, None, 3), ], ); @@ -1153,11 +1186,12 @@ r'$foo' "x + y == ${x + y}" '${'{}'.format foo}' "#; + use StringQuote::*; check_lexer_output( input, &[ (NewLine, None, 1), - (DoubleQuote, None, 2), + (string_start(Double, false), None, 2), (StringLiteral, Some("x + y == "), 2), (Dollar, None, 2), (CurlyOpen, None, 2), @@ -1165,19 +1199,19 @@ r'$foo' (Add, None, 2), (Id, Some("y"), 2), (CurlyClose, None, 2), - (DoubleQuote, None, 2), + (StringEnd, None, 2), (NewLine, None, 2), - (SingleQuote, None, 3), + (string_start(Single, false), None, 3), (Dollar, None, 3), (CurlyOpen, None, 3), - (SingleQuote, None, 3), + (string_start(Single, false), None, 3), (StringLiteral, Some("{}"), 3), - (SingleQuote, None, 3), + (StringEnd, None, 3), (Dot, None, 3), (Id, Some("format"), 3), (Id, Some("foo"), 3), (CurlyClose, None, 3), - (SingleQuote, None, 3), + (StringEnd, None, 3), (NewLine, None, 3), ], ); diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 675b696bb..b41e412e4 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -6,6 +6,6 @@ mod lexer; mod span; pub use crate::{ - lexer::{is_id_continue, is_id_start, KotoLexer as Lexer, LexedToken, Token}, + lexer::{is_id_continue, is_id_start, KotoLexer as Lexer, LexedToken, StringQuote, Token}, span::{Position, Span}, }; diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 35f98c7bc..a48a8edc6 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -15,4 +15,4 @@ pub use crate::{ node::*, parser::Parser, }; -pub use koto_lexer::{Position, Span}; +pub use koto_lexer::{Position, Span, StringQuote}; diff --git a/crates/parser/src/node.rs b/crates/parser/src/node.rs index 690233753..cffad8390 100644 --- a/crates/parser/src/node.rs +++ b/crates/parser/src/node.rs @@ -1,4 +1,4 @@ -use crate::{ast::AstIndex, constant_pool::ConstantIndex}; +use crate::{ast::AstIndex, constant_pool::ConstantIndex, StringQuote}; use std::fmt; /// A parsed node that can be included in the [AST](crate::Ast). @@ -349,7 +349,7 @@ pub struct Function { #[derive(Clone, Debug, PartialEq, Eq)] pub struct AstString { /// Indicates if single or double quotation marks were used - pub quotation_mark: QuotationMark, + pub quote: StringQuote, /// The string's contents pub contents: StringContents, } @@ -635,14 +635,6 @@ pub enum MapKey { Meta(MetaKeyId, Option), } -/// The type of quotation mark used in a string literal -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[allow(missing_docs)] -pub enum QuotationMark { - Double, - Single, -} - /// A node in an import item, see [Node::Import] #[derive(Clone, Debug, PartialEq, Eq)] pub enum ImportItemNode { diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 64783fecc..89b002ff0 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -729,7 +729,7 @@ impl<'source> Parser<'source> { } Token::RoundOpen => self.consume_tuple(context), Token::Number => self.consume_number(false, context), - Token::DoubleQuote | Token::SingleQuote | Token::RawStringStart => { + Token::StringStart { .. } => { let string = self.parse_string(context)?.unwrap(); if self.peek_token() == Some(Token::Colon) { @@ -1348,7 +1348,7 @@ impl<'source> Parser<'source> { if !matches!( self.peek_token(), - Some(Token::Id | Token::SingleQuote | Token::DoubleQuote) + Some(Token::Id | Token::StringStart { .. }) ) { // This check prevents detached dot accesses, e.g. `x. foo` return self.error(SyntaxError::ExpectedMapKey); @@ -2487,7 +2487,7 @@ impl<'source> Parser<'source> { let result = match self.peek_token_with_context(&pattern_context) { Some(peeked) => match peeked.token { - True | False | Null | Number | SingleQuote | DoubleQuote | Subtract => { + True | False | Null | Number | StringStart { .. } | Subtract => { return self.parse_term(&pattern_context) } Id => match self.parse_id(&pattern_context)? { @@ -2761,19 +2761,21 @@ impl<'source> Parser<'source> { use SyntaxError::*; use Token::*; - match self.peek_token_with_context(context) { - Some(PeekInfo { - token: SingleQuote | DoubleQuote, - .. - }) => {} + let quote = match self.peek_token_with_context(context) { Some(PeekInfo { - token: RawStringStart, + token: StringStart { quote, raw }, .. - }) => return self.consume_raw_string(context), + }) => { + if raw { + return self.consume_raw_string(context); + } else { + quote + } + } _ => return Ok(None), - } + }; - let (string_quote, string_context) = self.consume_token_with_context(context).unwrap(); + let (_, string_context) = self.consume_token_with_context(context).unwrap(); let start_span = self.current_span(); let mut nodes = Vec::new(); @@ -2898,13 +2900,7 @@ impl<'source> Parser<'source> { } None => break, }, - c if c == string_quote => { - let quotation_mark = if string_quote == SingleQuote { - QuotationMark::Single - } else { - QuotationMark::Double - }; - + StringEnd => { let contents = match nodes.as_slice() { [] => StringContents::Literal(self.add_string_constant("")?), [StringNode::Literal(literal)] => StringContents::Literal(*literal), @@ -2912,10 +2908,7 @@ impl<'source> Parser<'source> { }; return Ok(Some(ParseStringOutput { - string: AstString { - quotation_mark, - contents, - }, + string: AstString { quote, contents }, span: self.span_with_start(start_span), context: string_context, })); @@ -2931,33 +2924,30 @@ impl<'source> Parser<'source> { &mut self, context: &ExpressionContext, ) -> Result, ParserError> { - let Some((_, string_context)) = self.consume_token_with_context(context) else { - return self.error(InternalError::RawStringParseFailure); - }; // Token::RawStringDelimiter - - let start_span = self.current_span(); - let start_delimiter = self.current_token.slice(self.source); - let quotation_mark = match start_delimiter.chars().next_back() { - Some('\'') => QuotationMark::Single, - Some('"') => QuotationMark::Double, + let (quote, string_context) = match self.consume_token_with_context(context) { + Some((Token::StringStart { quote, raw }, string_context)) if raw => { + (quote, string_context) + } _ => return self.error(InternalError::RawStringParseFailure), }; + let start_span = self.current_span(); + let contents = match self.consume_token() { Some(Token::StringLiteral) => { - let contents = self.add_string_constant(self.current_token.slice(self.source))?; + let contents = self.add_current_slice_as_string_constant()?; match self.consume_token() { - Some(Token::RawStringEnd) => contents, + Some(Token::StringEnd) => contents, _ => return self.error(SyntaxError::UnterminatedString), } } - Some(Token::RawStringEnd) => self.add_string_constant("")?, + Some(Token::StringEnd) => self.add_string_constant("")?, _ => return self.error(SyntaxError::UnterminatedString), }; Ok(Some(ParseStringOutput { string: AstString { - quotation_mark, + quote, contents: StringContents::Raw(contents), }, span: self.span_with_start(start_span), diff --git a/crates/parser/tests/parser_tests.rs b/crates/parser/tests/parser_tests.rs index 56e403f0e..40b1bfaec 100644 --- a/crates/parser/tests/parser_tests.rs +++ b/crates/parser/tests/parser_tests.rs @@ -46,21 +46,18 @@ mod parser { } } - fn simple_string(literal_index: ConstantIndex, quotation_mark: QuotationMark) -> AstString { + fn simple_string(literal_index: ConstantIndex, quotation_mark: StringQuote) -> AstString { AstString { - quotation_mark, + quote: quotation_mark, contents: StringContents::Literal(literal_index), } } - fn string_literal(literal_index: ConstantIndex, quotation_mark: QuotationMark) -> Node { + fn string_literal(literal_index: ConstantIndex, quotation_mark: StringQuote) -> Node { Node::Str(simple_string(literal_index, quotation_mark)) } - fn string_literal_map_key( - literal_index: ConstantIndex, - quotation_mark: QuotationMark, - ) -> MapKey { + fn string_literal_map_key(literal_index: ConstantIndex, quotation_mark: StringQuote) -> MapKey { MapKey::Str(simple_string(literal_index, quotation_mark)) } @@ -85,8 +82,8 @@ null"#; BoolFalse, SmallInt(1), Float(0), - string_literal(1, QuotationMark::Double), - string_literal(2, QuotationMark::Single), + string_literal(1, StringQuote::Double), + string_literal(2, StringQuote::Single), Id(3), Null, MainBlock { @@ -148,8 +145,8 @@ null"#; check_ast( source, &[ - string_literal(0, QuotationMark::Double), - string_literal(1, QuotationMark::Double), + string_literal(0, StringQuote::Double), + string_literal(1, StringQuote::Double), MainBlock { body: vec![0, 1], local_count: 0, @@ -171,8 +168,8 @@ null"#; check_ast( source, &[ - string_literal(0, QuotationMark::Double), - string_literal(1, QuotationMark::Single), + string_literal(0, StringQuote::Double), + string_literal(1, StringQuote::Single), MainBlock { body: vec![0, 1], local_count: 0, @@ -194,7 +191,7 @@ null"#; &[ Id(1), Str(AstString { - quotation_mark: QuotationMark::Single, + quote: StringQuote::Single, contents: StringContents::Interpolated(vec![ StringNode::Literal(0), StringNode::Expr(0), @@ -203,13 +200,13 @@ null"#; }), Id(3), Str(AstString { - quotation_mark: QuotationMark::Double, + quote: StringQuote::Double, contents: StringContents::Interpolated(vec![StringNode::Expr(2)]), }), Id(4), Id(6), // 5 Str(AstString { - quotation_mark: QuotationMark::Single, + quote: StringQuote::Single, contents: StringContents::Interpolated(vec![ StringNode::Expr(4), StringNode::Literal(5), @@ -249,7 +246,7 @@ null"#; rhs: 1, }, Str(AstString { - quotation_mark: QuotationMark::Single, + quote: StringQuote::Single, contents: StringContents::Interpolated(vec![ StringNode::Expr(2), StringNode::Literal(1), @@ -275,11 +272,11 @@ r"[\r?\n]\" source, &[ Str(AstString { - quotation_mark: QuotationMark::Single, + quote: StringQuote::Single, contents: StringContents::Raw(0), }), Str(AstString { - quotation_mark: QuotationMark::Double, + quote: StringQuote::Double, contents: StringContents::Raw(1), }), MainBlock { @@ -351,7 +348,7 @@ r"[\r?\n]\" &[ SmallInt(0), Id(0), - string_literal(1, QuotationMark::Double), + string_literal(1, StringQuote::Double), Id(0), SmallInt(-1), List(vec![0, 1, 2, 3, 4]), @@ -481,10 +478,10 @@ x = Map(vec![]), Id(0), SmallInt(42), - string_literal(4, QuotationMark::Single), + string_literal(4, StringQuote::Single), SmallInt(99), Map(vec![ - (string_literal_map_key(1, QuotationMark::Single), Some(2)), + (string_literal_map_key(1, StringQuote::Single), Some(2)), (MapKey::Id(2), None), (MapKey::Id(3), Some(3)), (MapKey::Meta(MetaKeyId::Add, None), Some(4)), @@ -521,9 +518,9 @@ x = source, &[ SmallInt(42), - string_literal(3, QuotationMark::Double), + string_literal(3, StringQuote::Double), Map(vec![ - (string_literal_map_key(0, QuotationMark::Single), Some(0)), + (string_literal_map_key(0, StringQuote::Single), Some(0)), (MapKey::Id(1), None), (MapKey::Id(2), Some(1)), ]), @@ -559,9 +556,9 @@ x"#; Map(vec![(MapKey::Id(1), Some(2))]), // foo, 0 SmallInt(-1), Map(vec![ - (MapKey::Id(1), Some(1)), // foo: 42 - (string_literal_map_key(2, QuotationMark::Double), Some(3)), // "baz": nested map - (MapKey::Meta(MetaKeyId::Subtract, None), Some(4)), // @-: -1 + (MapKey::Id(1), Some(1)), // foo: 42 + (string_literal_map_key(2, StringQuote::Double), Some(3)), // "baz": nested map + (MapKey::Meta(MetaKeyId::Subtract, None), Some(4)), // @-: -1 ]), // 5 Assign { target: 0, @@ -593,7 +590,7 @@ x = Id(0), // x SmallInt(42), Map(vec![( - string_literal_map_key(1, QuotationMark::Double), + string_literal_map_key(1, StringQuote::Double), Some(1), )]), // "foo", 42 Assign { @@ -1456,7 +1453,7 @@ export check_ast( source, &[ - string_literal(0, QuotationMark::Single), + string_literal(0, StringQuote::Single), Id(1), BinaryOp { op: AstBinaryOp::Add, @@ -3438,7 +3435,7 @@ x.bar()."baz" = 1 Id(0), Lookup(( LookupNode::Str(AstString { - quotation_mark: QuotationMark::Double, + quote: StringQuote::Double, contents: StringContents::Literal(2), }), None, @@ -3712,7 +3709,7 @@ x.takes_a_map check_ast( source, &[ - string_literal(0, QuotationMark::Single), + string_literal(0, StringQuote::Single), Id(2), Lookup(( LookupNode::Call { @@ -4132,7 +4129,7 @@ assert_eq x, "hello" expression: 4, }, // 5 Id(0), // x - string_literal(3, QuotationMark::Double), + string_literal(3, StringQuote::Double), NamedCall { id: 2, args: vec![6, 7], @@ -4161,7 +4158,7 @@ assert_eq x, "hello" fn import_string( literal_index: ConstantIndex, - quotation_mark: QuotationMark, + quotation_mark: StringQuote, ) -> ImportItemNode { ImportItemNode::Str(simple_string(literal_index, quotation_mark)) } @@ -4255,7 +4252,7 @@ import foo, from: vec![], items: vec![ import_id(0), - import_string(1, QuotationMark::Single), + import_string(1, StringQuote::Single), import_id(2), ], }, @@ -4312,7 +4309,7 @@ from foo import bar, source, &[ Import { - from: vec![import_string(0, QuotationMark::Single), import_id(1)], + from: vec![import_string(0, StringQuote::Single), import_id(1)], items: vec![import_id(2), import_id(3)], }, MainBlock { @@ -4503,7 +4500,7 @@ finally check_ast( source, &[ - string_literal(0, QuotationMark::Single), + string_literal(0, StringQuote::Single), Throw(0), MainBlock { body: vec![1], @@ -4525,7 +4522,7 @@ throw source, &[ Id(1), - string_literal(3, QuotationMark::Double), + string_literal(3, StringQuote::Double), Map(vec![(MapKey::Id(0), Some(0)), (MapKey::Id(2), Some(1))]), Throw(2), MainBlock { @@ -4602,10 +4599,10 @@ match x source, &[ Id(0), - string_literal(1, QuotationMark::Single), + string_literal(1, StringQuote::Single), SmallInt(99), - string_literal(2, QuotationMark::Double), - string_literal(3, QuotationMark::Double), + string_literal(2, StringQuote::Double), + string_literal(3, StringQuote::Double), Break(None), // 5 Match { expression: 0, @@ -5002,7 +4999,7 @@ match x Id(0), SmallInt(0), SmallInt(1), - string_literal(1, QuotationMark::Single), + string_literal(1, StringQuote::Single), Throw(3), Match { expression: 0, diff --git a/crates/runtime/tests/vm_tests.rs b/crates/runtime/tests/vm_tests.rs index 48799353f..7ef9db498 100644 --- a/crates/runtime/tests/vm_tests.rs +++ b/crates/runtime/tests/vm_tests.rs @@ -639,6 +639,7 @@ match x % 3, x % 5 x = "hello" match x "goodbye" then 1 + r'byeeee' then 2 () then 99 y if y == "O_o" then -1 y if y == "hello" then @@ -1920,7 +1921,7 @@ m.baz"; fn string_keys() { let script = r#" foo, bar = 42, -1 -m = {foo, bar, 'baz': 99} +m = {foo, 'bar': bar, r'baz': 99} m.baz"#; test_script(script, 99); } From 8f56d3a04b16ea35e7e4e23bdbcd49446333b634 Mon Sep 17 00:00:00 2001 From: Ian Hobson Date: Tue, 16 Jan 2024 14:07:02 +0100 Subject: [PATCH 8/8] Add support for extended raw string delimiters --- crates/bytecode/src/compiler.rs | 6 +- crates/lexer/src/lexer.rs | 165 +++++++++++++++-------- crates/lexer/src/lib.rs | 5 +- crates/parser/src/lib.rs | 2 +- crates/parser/src/node.rs | 7 +- crates/parser/src/parser.rs | 27 ++-- crates/parser/tests/parser_tests.rs | 39 +++++- crates/runtime/tests/runtime_failures.rs | 21 +++ docs/language/strings.md | 14 +- koto/tests/strings.koto | 2 + 10 files changed, 210 insertions(+), 78 deletions(-) diff --git a/crates/bytecode/src/compiler.rs b/crates/bytecode/src/compiler.rs index 934399cb7..deee63250 100644 --- a/crates/bytecode/src/compiler.rs +++ b/crates/bytecode/src/compiler.rs @@ -2026,7 +2026,11 @@ impl Compiler { let result = self.get_result_register(result_register)?; match contents { - StringContents::Raw(constant_index) | StringContents::Literal(constant_index) => { + StringContents::Raw { + constant: constant_index, + .. + } + | StringContents::Literal(constant_index) => { if let Some(result) = result { self.compile_load_string_constant(result.register, *constant_index); } diff --git a/crates/lexer/src/lexer.rs b/crates/lexer/src/lexer.rs index 666b7c201..959da74d8 100644 --- a/crates/lexer/src/lexer.rs +++ b/crates/lexer/src/lexer.rs @@ -16,7 +16,7 @@ pub enum Token { Id, Wildcard, - StringStart { quote: StringQuote, raw: bool }, + StringStart(StringType), StringEnd, StringLiteral, @@ -106,6 +106,24 @@ impl Token { } } +/// The string types that the lexer can produce +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum StringType { + /// A normal string + Normal(StringQuote), + /// A raw string + Raw(RawStringDelimiter), +} + +/// The delimiter used by a raw string +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct RawStringDelimiter { + /// The quotation mark used in the raw string delimiter + pub quote: StringQuote, + /// The number of hashes used in the raw string delimiter + pub hash_count: u8, +} + /// The type of quotation mark used in string delimiters #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[allow(missing_docs)] @@ -139,9 +157,9 @@ enum StringMode { // A closing '}' will end the map rather than the template expression. TemplateExpressionInlineMap, // The start of a raw string has just been consumed, raw string contents follow - RawStart(StringQuote), + RawStart(RawStringDelimiter), // The contents of the raw string have just been consumed, the end delimiter should follow - RawEnd(StringQuote), + RawEnd(RawStringDelimiter), } // Separates the input source into Tokens @@ -377,21 +395,71 @@ impl<'a> TokenLexer<'a> { Error } + fn parse_raw_string_start(&mut self, mut chars: Peekable) -> Option { + // look ahead and determine if this is the start of a raw string + let mut hash_count = 0; + loop { + match chars.next() { + Some('#') => { + hash_count += 1; + if hash_count == 256 { + break; + } + } + Some(c) => { + if let Ok(quote) = c.try_into() { + self.advance_line(2 + hash_count); + let hash_count = hash_count as u8; + self.string_mode_stack + .push(StringMode::RawStart(RawStringDelimiter { + quote, + hash_count, + })); + return Some(Token::StringStart(StringType::Raw(RawStringDelimiter { + quote, + hash_count, + }))); + } else { + break; + } + } + None => break, + } + } + + None + } + fn consume_raw_string_contents( &mut self, mut chars: Peekable, - end_quote: StringQuote, + delimiter: RawStringDelimiter, ) -> Token { let mut string_bytes = 0; let mut position = self.current_position(); - while let Some(c) = chars.next() { + 'outer: while let Some(c) = chars.next() { match c { - _ if c.try_into() == Ok(end_quote) => { + _ if c.try_into() == Ok(delimiter.quote) => { + // Is this the end delimiter? + for i in 0..delimiter.hash_count { + if chars.peek() == Some(&'#') { + chars.next(); + } else { + // Adjust for the quote and hashes that were consumed while checking if + // we were at the end delimiter + let not_the_end_delimiter_len = 1 + i as usize; + position.column += not_the_end_delimiter_len as u32; + string_bytes += not_the_end_delimiter_len; + // We haven't hit the required hash count, so keep consuming characters + // as part of the raw string's contents. + continue 'outer; + } + } self.advance_to_position(string_bytes, position); self.string_mode_stack.pop(); // StringMode::RawStart - self.string_mode_stack.push(StringMode::RawEnd(end_quote)); + self.string_mode_stack.push(StringMode::RawEnd(delimiter)); return Token::StringLiteral; } '\r' => { @@ -417,19 +485,12 @@ impl<'a> TokenLexer<'a> { Token::Error } - fn consume_raw_string_end( - &mut self, - mut chars: Peekable, - end_quote: StringQuote, - ) -> Token { - match chars.next() { - Some(c) if c.try_into() == Ok(end_quote) => { - self.string_mode_stack.pop(); // StringMode::RawEnd - self.advance_line(1); - Token::StringEnd - } - _ => Token::Error, - } + fn consume_raw_string_end(&mut self, delimiter: RawStringDelimiter) -> Token { + // The end delimiter has already been matched in consume_raw_string_contents, + // so we can simply advance and return here. + self.advance_line(1 + delimiter.hash_count as usize); + self.string_mode_stack.pop(); // StringMode::RawEnd + Token::StringEnd } fn consume_number(&mut self, mut chars: Peekable) -> Token { @@ -528,13 +589,8 @@ impl<'a> TokenLexer<'a> { } } "r" => { - // look ahead and determine if this is the start of a raw string - if let Some(&c) = chars.peek() { - if let Ok(quote) = c.try_into() { - self.advance_line(2); - self.string_mode_stack.push(StringMode::RawStart(quote)); - return StringStart { quote, raw: true }; - } + if let Some(raw_string) = self.parse_raw_string_start(chars) { + return raw_string; } } _ => {} @@ -681,10 +737,10 @@ impl<'a> TokenLexer<'a> { } _ => self.consume_string_literal(chars), }, - Some(StringMode::RawStart(quote)) => { - self.consume_raw_string_contents(chars, quote) + Some(StringMode::RawStart(delimiter)) => { + self.consume_raw_string_contents(chars, delimiter) } - Some(StringMode::RawEnd(quote)) => self.consume_raw_string_end(chars, quote), + Some(StringMode::RawEnd(delimiter)) => self.consume_raw_string_end(delimiter), Some(StringMode::TemplateStart) => match next_char { _ if is_id_start(next_char) => match self.consume_id_or_keyword(chars) { Id => { @@ -716,19 +772,13 @@ impl<'a> TokenLexer<'a> { self.advance_line(1); self.string_mode_stack .push(StringMode::Literal(StringQuote::Double)); - StringStart { - quote: StringQuote::Double, - raw: false, - } + StringStart(StringType::Normal(StringQuote::Double)) } '\'' => { self.advance_line(1); self.string_mode_stack .push(StringMode::Literal(StringQuote::Single)); - StringStart { - quote: StringQuote::Single, - raw: false, - } + StringStart(StringType::Normal(StringQuote::Single)) } '0'..='9' => self.consume_number(chars), c if is_id_start(c) => self.consume_id_or_keyword(chars), @@ -1019,8 +1069,12 @@ mod tests { assert_eq!(lex.next(), None); } - fn string_start(quote: StringQuote, raw: bool) -> Token { - Token::StringStart { quote, raw } + fn normal_string(quote: StringQuote) -> Token { + Token::StringStart(StringType::Normal(quote)) + } + + fn raw_string(quote: StringQuote, hash_count: u8) -> Token { + Token::StringStart(StringType::Raw(RawStringDelimiter { quote, hash_count })) } #[test] @@ -1106,26 +1160,26 @@ false # input, &[ (NewLine, None, 1), - (string_start(Double, false), None, 2), + (normal_string(Double), None, 2), (StringLiteral, Some("hello, world!"), 2), (StringEnd, None, 2), (NewLine, None, 2), - (string_start(Double, false), None, 3), + (normal_string(Double), None, 3), (StringLiteral, Some(r#"escaped \\\"\n\$ string"#), 3), (StringEnd, None, 3), (NewLine, None, 3), - (string_start(Double, false), None, 4), + (normal_string(Double), None, 4), (StringLiteral, Some(r#"double-\"quoted\" 'string'"#), 4), (StringEnd, None, 4), (NewLine, None, 4), - (string_start(Single, false), None, 5), + (normal_string(Single), None, 5), (StringLiteral, Some(r#"single-\'quoted\' "string""#), 5), (StringEnd, None, 5), (NewLine, None, 5), - (string_start(Double, false), None, 6), + (normal_string(Double), None, 6), (StringEnd, None, 6), (NewLine, None, 6), - (string_start(Double, false), None, 7), + (normal_string(Double), None, 7), (StringLiteral, Some(r"\\"), 7), (StringEnd, None, 7), (NewLine, None, 7), @@ -1136,17 +1190,22 @@ false # #[test] fn raw_strings() { let input = r#" -r'$foo' +r"$foo" +r#''bar''# "#; check_lexer_output( input, &[ (NewLine, None, 1), - (string_start(StringQuote::Single, true), None, 2), + (raw_string(StringQuote::Double, 0), None, 2), (StringLiteral, Some("$foo"), 2), (StringEnd, None, 2), (NewLine, None, 2), + (raw_string(StringQuote::Single, 1), None, 3), + (StringLiteral, Some("'bar'"), 3), + (StringEnd, None, 3), + (NewLine, None, 3), ], ); } @@ -1162,14 +1221,14 @@ r'$foo' input, &[ (NewLine, None, 1), - (string_start(Double, false), None, 2), + (normal_string(Double), None, 2), (StringLiteral, Some("hello "), 2), (Dollar, None, 2), (Id, Some("name"), 2), (StringLiteral, Some(", how are you?"), 2), (StringEnd, None, 2), (NewLine, None, 2), - (string_start(Single, false), None, 3), + (normal_string(Single), None, 3), (Dollar, None, 3), (Id, Some("foo"), 3), (Dollar, None, 3), @@ -1191,7 +1250,7 @@ r'$foo' input, &[ (NewLine, None, 1), - (string_start(Double, false), None, 2), + (normal_string(Double), None, 2), (StringLiteral, Some("x + y == "), 2), (Dollar, None, 2), (CurlyOpen, None, 2), @@ -1201,10 +1260,10 @@ r'$foo' (CurlyClose, None, 2), (StringEnd, None, 2), (NewLine, None, 2), - (string_start(Single, false), None, 3), + (normal_string(Single), None, 3), (Dollar, None, 3), (CurlyOpen, None, 3), - (string_start(Single, false), None, 3), + (normal_string(Single), None, 3), (StringLiteral, Some("{}"), 3), (StringEnd, None, 3), (Dot, None, 3), diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index b41e412e4..2d03d1f93 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -6,6 +6,9 @@ mod lexer; mod span; pub use crate::{ - lexer::{is_id_continue, is_id_start, KotoLexer as Lexer, LexedToken, StringQuote, Token}, + lexer::{ + is_id_continue, is_id_start, KotoLexer as Lexer, LexedToken, RawStringDelimiter, + StringQuote, StringType, Token, + }, span::{Position, Span}, }; diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index a48a8edc6..08ffd3353 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -15,4 +15,4 @@ pub use crate::{ node::*, parser::Parser, }; -pub use koto_lexer::{Position, Span, StringQuote}; +pub use koto_lexer::{Position, RawStringDelimiter, Span, StringQuote, StringType}; diff --git a/crates/parser/src/node.rs b/crates/parser/src/node.rs index cffad8390..e777393fc 100644 --- a/crates/parser/src/node.rs +++ b/crates/parser/src/node.rs @@ -360,7 +360,12 @@ pub enum StringContents { /// A string literal Literal(ConstantIndex), /// A raw string literal - Raw(ConstantIndex), + Raw { + /// The literal's constant index + constant: ConstantIndex, + /// The number of hashes associated with the raw string's delimiter + hash_count: u8, + }, /// An interpolated string /// /// An interpolated string is made up of a series of literals and template expressions, diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 89b002ff0..6329b1d05 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -2763,15 +2763,13 @@ impl<'source> Parser<'source> { let quote = match self.peek_token_with_context(context) { Some(PeekInfo { - token: StringStart { quote, raw }, + token: StringStart(StringType::Normal(quote)), .. - }) => { - if raw { - return self.consume_raw_string(context); - } else { - quote - } - } + }) => quote, + Some(PeekInfo { + token: StringStart(StringType::Raw { .. }), + .. + }) => return self.consume_raw_string(context), _ => return Ok(None), }; @@ -2924,9 +2922,9 @@ impl<'source> Parser<'source> { &mut self, context: &ExpressionContext, ) -> Result, ParserError> { - let (quote, string_context) = match self.consume_token_with_context(context) { - Some((Token::StringStart { quote, raw }, string_context)) if raw => { - (quote, string_context) + let (delimiter, string_context) = match self.consume_token_with_context(context) { + Some((Token::StringStart(StringType::Raw(delimiter)), string_context)) => { + (delimiter, string_context) } _ => return self.error(InternalError::RawStringParseFailure), }; @@ -2947,8 +2945,11 @@ impl<'source> Parser<'source> { Ok(Some(ParseStringOutput { string: AstString { - quote, - contents: StringContents::Raw(contents), + quote: delimiter.quote, + contents: StringContents::Raw { + constant: contents, + hash_count: delimiter.hash_count, + }, }, span: self.span_with_start(start_span), context: string_context, diff --git a/crates/parser/tests/parser_tests.rs b/crates/parser/tests/parser_tests.rs index 40b1bfaec..9f23bf4d7 100644 --- a/crates/parser/tests/parser_tests.rs +++ b/crates/parser/tests/parser_tests.rs @@ -263,28 +263,55 @@ null"#; #[test] fn raw_strings() { - let source = r#" + let source = r###" r'$foo ${bar}' r"[\r?\n]\" -"#; +r#''$foo''# +r##'#$bar'## +"###; check_ast( source, &[ Str(AstString { quote: StringQuote::Single, - contents: StringContents::Raw(0), + contents: StringContents::Raw { + constant: 0, + hash_count: 0, + }, }), Str(AstString { quote: StringQuote::Double, - contents: StringContents::Raw(1), + contents: StringContents::Raw { + constant: 1, + hash_count: 0, + }, + }), + Str(AstString { + quote: StringQuote::Single, + contents: StringContents::Raw { + constant: 2, + hash_count: 1, + }, + }), + Str(AstString { + quote: StringQuote::Single, + contents: StringContents::Raw { + constant: 3, + hash_count: 2, + }, }), MainBlock { - body: vec![0, 1], + body: vec![0, 1, 2, 3], local_count: 0, }, ], - Some(&[Constant::Str("$foo ${bar}"), Constant::Str(r"[\r?\n]\")]), + Some(&[ + Constant::Str("$foo ${bar}"), + Constant::Str(r"[\r?\n]\"), + Constant::Str("'$foo'"), + Constant::Str("#$bar"), + ]), ) } diff --git a/crates/runtime/tests/runtime_failures.rs b/crates/runtime/tests/runtime_failures.rs index bac7cd82c..da2f63a44 100644 --- a/crates/runtime/tests/runtime_failures.rs +++ b/crates/runtime/tests/runtime_failures.rs @@ -233,6 +233,27 @@ a, b = x x = @next_back: || 42 x.reversed().next() +"; + check_script_fails(script); + } + } + + mod strings { + use super::*; + + #[test] + fn missing_interpolated_id() { + let script = " +x = '$foo' +"; + check_script_fails(script); + } + + #[test] + fn invalid_raw_string_delimiter() { + // 256 #s in the delimiter is over the limit + let script = " +x = r################################################################################################################################################################################################################################################################'foo'################################################################################################################################################################################################################################################################ "; check_script_fails(script); } diff --git a/docs/language/strings.md b/docs/language/strings.md index 6db447759..896985701 100644 --- a/docs/language/strings.md +++ b/docs/language/strings.md @@ -97,6 +97,16 @@ Sometimes it can be preferable to use a _raw string_, which provides the content Like normal strings, raw strings use single or double quotes, but prefixed with an `r`. ```koto -print r'This string contains special characters: $foo\n\t' -check! This string contains special characters: $foo\n\t +print r'This string contains special characters: $foo\n\t.' +check! This string contains special characters: $foo\n\t. +``` + +The `r` prefix can optionally be followed by up to 255 `#`s to extend the sequence of characters that mark the end of the string. + +```koto +print r#'This string contains "both" 'quote' types.'# +check! This string contains "both" 'quote' types. + +print r##'This string also includes a '#' symbol.'## +check! This string also includes a '#' symbol. ``` diff --git a/koto/tests/strings.koto b/koto/tests/strings.koto index 31dda3581..a6fb4c4e2 100644 --- a/koto/tests/strings.koto +++ b/koto/tests/strings.koto @@ -55,6 +55,8 @@ @test raw_strings: || assert_eq r'\r\n', '\\r\\n' assert_eq r'${1 + 1}', '\${1 + 1}' + assert_eq r#''$foo''#, "'\$foo'" + assert_eq r##'#${2 * 2}'##, '#\${2 * 2}' @test bytes: || assert_eq "Hëy".bytes().to_tuple(), (72, 195, 171, 121)