diff --git a/.changeset/famous-falcons-lie.md b/.changeset/famous-falcons-lie.md new file mode 100644 index 0000000000..a9ef0ccd30 --- /dev/null +++ b/.changeset/famous-falcons-lie.md @@ -0,0 +1,5 @@ +--- +"changelog": minor +--- + +Record both character and byte offsets for input positions diff --git a/crates/codegen/syntax/src/rust_lib_code_generator.rs b/crates/codegen/syntax/src/rust_lib_code_generator.rs index 98565eca43..092cce14c8 100644 --- a/crates/codegen/syntax/src/rust_lib_code_generator.rs +++ b/crates/codegen/syntax/src/rust_lib_code_generator.rs @@ -93,7 +93,7 @@ impl CodeGenerator { let message = format!(\"ProductionKind {{production_kind}} is not valid in this version of {grammar_title}\"); ParseOutput {{ parse_tree: None, - errors: vec![ParseError::new(0, message)] + errors: vec![ParseError::new(Default::default(), message)] }} }}) }} diff --git a/crates/codegen/syntax_templates/src/rust/cst_visitor.rs b/crates/codegen/syntax_templates/src/rust/cst_visitor.rs index c126cba2c5..36996c13fd 100644 --- a/crates/codegen/syntax_templates/src/rust/cst_visitor.rs +++ b/crates/codegen/syntax_templates/src/rust/cst_visitor.rs @@ -1,14 +1,15 @@ -use std::{ops::Range, rc::Rc}; +use std::rc::Rc; use super::cst::*; use super::kinds::*; +use super::language::TextRange; #[allow(unused_variables)] pub trait Visitor { fn enter_rule( &mut self, kind: RuleKind, - range: &Range, + range: &TextRange, children: &Vec>, node: &Rc, path: &Vec>, @@ -19,7 +20,7 @@ pub trait Visitor { fn exit_rule( &mut self, kind: RuleKind, - range: &Range, + range: &TextRange, children: &Vec>, node: &Rc, path: &Vec>, @@ -30,7 +31,7 @@ pub trait Visitor { fn enter_token( &mut self, kind: TokenKind, - range: &Range, + range: &TextRange, trivia: &Vec>, node: &Rc, path: &Vec>, @@ -41,7 +42,7 @@ pub trait Visitor { fn exit_token( &mut self, kind: TokenKind, - range: &Range, + range: &TextRange, trivia: &Vec>, node: &Rc, path: &Vec>, diff --git a/crates/codegen/syntax_templates/src/rust/parser_output.rs b/crates/codegen/syntax_templates/src/rust/parser_output.rs index be57107bef..0b875efcb0 100644 --- a/crates/codegen/syntax_templates/src/rust/parser_output.rs +++ b/crates/codegen/syntax_templates/src/rust/parser_output.rs @@ -1,6 +1,9 @@ use std::{collections::BTreeSet, rc::Rc}; -use super::{cst, language::render_error_report}; +use super::{ + cst, + language::{render_error_report, TextPosition}, +}; #[derive(PartialEq)] pub struct ParseOutput { @@ -24,13 +27,13 @@ impl ParseOutput { #[derive(PartialEq)] pub struct ParseError { - pub(crate) position: usize, + pub(crate) position: TextPosition, pub(crate) expected: BTreeSet, } impl ParseError { - pub fn position(&self) -> usize { - return self.position; + pub fn position(&self) -> &TextPosition { + return &self.position; } pub fn expected(&self) -> &BTreeSet { diff --git a/crates/codegen/syntax_templates/src/shared/cst.rs b/crates/codegen/syntax_templates/src/shared/cst.rs index fba63871df..3f2f7f20cf 100644 --- a/crates/codegen/syntax_templates/src/shared/cst.rs +++ b/crates/codegen/syntax_templates/src/shared/cst.rs @@ -5,31 +5,32 @@ use std::rc::Rc; use serde::Serialize; use super::kinds::*; +use super::language::TextRange; #[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub enum Node { Rule { kind: RuleKind, - range: Range, + range: TextRange, children: Vec>, }, Token { kind: TokenKind, - range: Range, + range: TextRange, #[serde(skip_serializing_if = "Vec::is_empty")] trivia: Vec>, }, } impl Node { - pub fn range(&self) -> Range { + pub fn range(&self) -> TextRange { match self { Self::Rule { range, .. } => range.clone(), Self::Token { range, .. } => range.clone(), } } - pub fn range_including_trivia(&self) -> Range { + pub fn range_including_trivia(&self) -> TextRange { match self { Self::Rule { range, .. } => range.clone(), Self::Token { range, trivia, .. } => { @@ -76,7 +77,7 @@ impl Node { } } let range = if flattened_children.is_empty() { - Range { start: 0, end: 0 } + Default::default() } else { Range { start: flattened_children @@ -101,7 +102,7 @@ impl Node { #[allow(dead_code)] pub(crate) fn token( kind: TokenKind, - range: Range, + range: TextRange, leading_trivia: Option>, trailing_trivia: Option>, ) -> Rc { diff --git a/crates/codegen/syntax_templates/src/shared/language.rs b/crates/codegen/syntax_templates/src/shared/language.rs index d4de7aec9c..417da25b4c 100644 --- a/crates/codegen/syntax_templates/src/shared/language.rs +++ b/crates/codegen/syntax_templates/src/shared/language.rs @@ -1,7 +1,9 @@ +use std::fmt::Display; pub use std::{collections::BTreeSet, ops::Range, rc::Rc}; #[allow(deprecated, unused_imports)] use semver::Version; +use serde::Serialize; pub use super::{ cst, @@ -12,7 +14,7 @@ pub use super::{ const DEBUG_ERROR_MERGING: bool = false; impl ParseError { - pub(crate) fn new>(position: usize, expected: T) -> Self { + pub(crate) fn new>(position: TextPosition, expected: T) -> Self { Self { position, expected: BTreeSet::from([expected.into()]), @@ -71,10 +73,36 @@ pub enum ParserResult { }, } +#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Serialize)] +pub struct TextPosition { + pub byte: usize, + pub char: usize, +} + +pub type TextRange = Range; + +impl PartialOrd for TextPosition { + fn partial_cmp(&self, other: &Self) -> Option { + self.char.partial_cmp(&other.char) + } +} + +impl Ord for TextPosition { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.byte.cmp(&other.byte) + } +} + +impl Display for TextPosition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.char.fmt(f) + } +} + pub struct Stream<'s> { source: &'s str, - position: usize, - undo_position: usize, + position: TextPosition, + undo_position: TextPosition, has_undo: bool, } @@ -82,30 +110,31 @@ impl<'s> Stream<'s> { pub fn new(source: &'s str) -> Self { Self { source, - position: 0, - undo_position: 0, + position: Default::default(), + undo_position: Default::default(), has_undo: false, } } - pub fn position(&self) -> usize { + pub fn position(&self) -> TextPosition { self.position } - pub fn set_position(&mut self, position: usize) { + pub fn set_position(&mut self, position: TextPosition) { self.position = position; } pub fn peek(&self) -> Option { - self.source[self.position..].chars().next() + self.source[self.position.byte..].chars().next() } pub fn next(&mut self) -> Option { self.has_undo = true; self.undo_position = self.position; - let mut chars = self.source[self.position..].chars(); + let mut chars = self.source[self.position.byte..].chars(); if let Some(c) = chars.next() { - self.position += c.len_utf8(); + self.position.byte += c.len_utf8(); + self.position.char += 1; Some(c) } else { None @@ -146,22 +175,26 @@ pub(crate) fn render_error_report( ); if DEBUG_ERROR_MERGING { - format!("{position}: {message}", position = error.position) + format!("{position}: {message}", position = source_start.char) } else { message } }; if source.is_empty() { - return format!("{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]"); + return format!( + "{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]", + source_start = source_start.char, + source_end = source_end.char + ); } - let mut builder = Report::build(kind, source_id, source_start) + let mut builder = Report::build(kind, source_id, source_start.byte) .with_config(Config::default().with_color(with_color)) .with_message(message); builder.add_label( - Label::new((source_id, source_start..source_end)) + Label::new((source_id, source_start.char..source_end.char)) .with_color(color) .with_message("Error occurred here.".to_string()), ); @@ -196,7 +229,7 @@ where parse_tree: Some(cst::Node::token( kind, Range { - start: 0, + start: Default::default(), end: stream.position(), }, None, @@ -231,7 +264,7 @@ where parse_tree: Some(cst::Node::token( kind, Range { - start: 0, + start: Default::default(), end: stream.position(), }, None, diff --git a/crates/codegen/syntax_templates/src/typescript/cst_types.rs b/crates/codegen/syntax_templates/src/typescript/cst_types.rs index b19396c3b2..a1c02970b7 100644 --- a/crates/codegen/syntax_templates/src/typescript/cst_types.rs +++ b/crates/codegen/syntax_templates/src/typescript/cst_types.rs @@ -33,16 +33,28 @@ impl RuleNode { } } - #[napi(getter)] - pub fn range(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range(&self) -> [usize; 2] { let range = self.0.range(); - [range.start, range.end] + [range.start.byte, range.end.byte] } - #[napi(getter)] - pub fn range_including_trivia(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range(&self) -> [usize; 2] { + let range = self.0.range(); + [range.start.char, range.end.char] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range_including_trivia(&self) -> [usize; 2] { + let range = self.0.range_including_trivia(); + [range.start.byte, range.end.byte] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range_including_trivia(&self) -> [usize; 2] { let range = self.0.range_including_trivia(); - [range.start, range.end] + [range.start.char, range.end.char] } #[napi(ts_return_type = "(RuleNode | TokenNode)[]")] @@ -69,16 +81,28 @@ impl TokenNode { } } - #[napi(getter)] - pub fn range(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range(&self) -> [usize; 2] { let range = self.0.range(); - [range.start, range.end] + [range.start.byte, range.end.byte] } - #[napi(getter)] - pub fn range_including_trivia(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range(&self) -> [usize; 2] { + let range = self.0.range(); + [range.start.char, range.end.char] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range_including_trivia(&self) -> [usize; 2] { + let range = self.0.range_including_trivia(); + [range.start.byte, range.end.byte] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range_including_trivia(&self) -> [usize; 2] { let range = self.0.range_including_trivia(); - [range.start, range.end] + [range.start.char, range.end.char] } #[napi(ts_return_type = "(RuleNode | TokenNode)[]")] diff --git a/crates/codegen/syntax_templates/src/typescript/parser_output.rs b/crates/codegen/syntax_templates/src/typescript/parser_output.rs index b87e6434e4..adaa22a256 100644 --- a/crates/codegen/syntax_templates/src/typescript/parser_output.rs +++ b/crates/codegen/syntax_templates/src/typescript/parser_output.rs @@ -1,7 +1,9 @@ use std::{collections::BTreeSet, rc::Rc}; use super::{ - cst, cst_types::RcNodeExtensions as CSTRcNodeExtensions, language::render_error_report, + cst, + cst_types::RcNodeExtensions as CSTRcNodeExtensions, + language::{render_error_report, TextPosition}, }; use napi::bindgen_prelude::*; @@ -32,15 +34,20 @@ impl ParseOutput { #[napi] #[derive(PartialEq, Clone)] pub struct ParseError { - pub(crate) position: usize, + pub(crate) position: TextPosition, pub(crate) expected: BTreeSet, } #[napi] impl ParseError { #[napi(getter)] - pub fn position(&self) -> usize { - return self.position; + pub fn byte_position(&self) -> usize { + return self.position.byte; + } + + #[napi(getter)] + pub fn char_position(&self) -> usize { + return self.position.char; } #[napi] diff --git a/crates/solidity/outputs/cargo/crate/src/generated/cst.rs b/crates/solidity/outputs/cargo/crate/src/generated/cst.rs index 4eb4c22de5..6ef45822d8 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/cst.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/cst.rs @@ -7,31 +7,32 @@ use std::rc::Rc; use serde::Serialize; use super::kinds::*; +use super::language::TextRange; #[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub enum Node { Rule { kind: RuleKind, - range: Range, + range: TextRange, children: Vec>, }, Token { kind: TokenKind, - range: Range, + range: TextRange, #[serde(skip_serializing_if = "Vec::is_empty")] trivia: Vec>, }, } impl Node { - pub fn range(&self) -> Range { + pub fn range(&self) -> TextRange { match self { Self::Rule { range, .. } => range.clone(), Self::Token { range, .. } => range.clone(), } } - pub fn range_including_trivia(&self) -> Range { + pub fn range_including_trivia(&self) -> TextRange { match self { Self::Rule { range, .. } => range.clone(), Self::Token { range, trivia, .. } => { @@ -78,7 +79,7 @@ impl Node { } } let range = if flattened_children.is_empty() { - Range { start: 0, end: 0 } + Default::default() } else { Range { start: flattened_children @@ -103,7 +104,7 @@ impl Node { #[allow(dead_code)] pub(crate) fn token( kind: TokenKind, - range: Range, + range: TextRange, leading_trivia: Option>, trailing_trivia: Option>, ) -> Rc { diff --git a/crates/solidity/outputs/cargo/crate/src/generated/cst_visitor.rs b/crates/solidity/outputs/cargo/crate/src/generated/cst_visitor.rs index dee25d2ac5..f6f8c0c735 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/cst_visitor.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/cst_visitor.rs @@ -1,16 +1,17 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. -use std::{ops::Range, rc::Rc}; +use std::rc::Rc; use super::cst::*; use super::kinds::*; +use super::language::TextRange; #[allow(unused_variables)] pub trait Visitor { fn enter_rule( &mut self, kind: RuleKind, - range: &Range, + range: &TextRange, children: &Vec>, node: &Rc, path: &Vec>, @@ -21,7 +22,7 @@ pub trait Visitor { fn exit_rule( &mut self, kind: RuleKind, - range: &Range, + range: &TextRange, children: &Vec>, node: &Rc, path: &Vec>, @@ -32,7 +33,7 @@ pub trait Visitor { fn enter_token( &mut self, kind: TokenKind, - range: &Range, + range: &TextRange, trivia: &Vec>, node: &Rc, path: &Vec>, @@ -43,7 +44,7 @@ pub trait Visitor { fn exit_token( &mut self, kind: TokenKind, - range: &Range, + range: &TextRange, trivia: &Vec>, node: &Rc, path: &Vec>, diff --git a/crates/solidity/outputs/cargo/crate/src/generated/language.rs b/crates/solidity/outputs/cargo/crate/src/generated/language.rs index b477f4d078..249e49de2a 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/language.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/language.rs @@ -1,9 +1,11 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +use std::fmt::Display; pub use std::{collections::BTreeSet, ops::Range, rc::Rc}; #[allow(deprecated, unused_imports)] use semver::Version; +use serde::Serialize; pub use super::{ cst, @@ -14,7 +16,7 @@ pub use super::{ const DEBUG_ERROR_MERGING: bool = false; impl ParseError { - pub(crate) fn new>(position: usize, expected: T) -> Self { + pub(crate) fn new>(position: TextPosition, expected: T) -> Self { Self { position, expected: BTreeSet::from([expected.into()]), @@ -73,10 +75,36 @@ pub enum ParserResult { }, } +#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Serialize)] +pub struct TextPosition { + pub byte: usize, + pub char: usize, +} + +pub type TextRange = Range; + +impl PartialOrd for TextPosition { + fn partial_cmp(&self, other: &Self) -> Option { + self.char.partial_cmp(&other.char) + } +} + +impl Ord for TextPosition { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.byte.cmp(&other.byte) + } +} + +impl Display for TextPosition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.char.fmt(f) + } +} + pub struct Stream<'s> { source: &'s str, - position: usize, - undo_position: usize, + position: TextPosition, + undo_position: TextPosition, has_undo: bool, } @@ -84,30 +112,31 @@ impl<'s> Stream<'s> { pub fn new(source: &'s str) -> Self { Self { source, - position: 0, - undo_position: 0, + position: Default::default(), + undo_position: Default::default(), has_undo: false, } } - pub fn position(&self) -> usize { + pub fn position(&self) -> TextPosition { self.position } - pub fn set_position(&mut self, position: usize) { + pub fn set_position(&mut self, position: TextPosition) { self.position = position; } pub fn peek(&self) -> Option { - self.source[self.position..].chars().next() + self.source[self.position.byte..].chars().next() } pub fn next(&mut self) -> Option { self.has_undo = true; self.undo_position = self.position; - let mut chars = self.source[self.position..].chars(); + let mut chars = self.source[self.position.byte..].chars(); if let Some(c) = chars.next() { - self.position += c.len_utf8(); + self.position.byte += c.len_utf8(); + self.position.char += 1; Some(c) } else { None @@ -148,22 +177,26 @@ pub(crate) fn render_error_report( ); if DEBUG_ERROR_MERGING { - format!("{position}: {message}", position = error.position) + format!("{position}: {message}", position = source_start.char) } else { message } }; if source.is_empty() { - return format!("{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]"); + return format!( + "{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]", + source_start = source_start.char, + source_end = source_end.char + ); } - let mut builder = Report::build(kind, source_id, source_start) + let mut builder = Report::build(kind, source_id, source_start.byte) .with_config(Config::default().with_color(with_color)) .with_message(message); builder.add_label( - Label::new((source_id, source_start..source_end)) + Label::new((source_id, source_start.char..source_end.char)) .with_color(color) .with_message("Error occurred here.".to_string()), ); @@ -198,7 +231,7 @@ where parse_tree: Some(cst::Node::token( kind, Range { - start: 0, + start: Default::default(), end: stream.position(), }, None, @@ -233,7 +266,7 @@ where parse_tree: Some(cst::Node::token( kind, Range { - start: 0, + start: Default::default(), end: stream.position(), }, None, @@ -1847,7 +1880,7 @@ impl Language { ); ParseOutput { parse_tree: None, - errors: vec![ParseError::new(0, message)], + errors: vec![ParseError::new(Default::default(), message)], } }) } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/parser_output.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_output.rs index 19fe5e5dfd..68a12b35a7 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/parser_output.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_output.rs @@ -2,7 +2,10 @@ use std::{collections::BTreeSet, rc::Rc}; -use super::{cst, language::render_error_report}; +use super::{ + cst, + language::{render_error_report, TextPosition}, +}; #[derive(PartialEq)] pub struct ParseOutput { @@ -26,13 +29,13 @@ impl ParseOutput { #[derive(PartialEq)] pub struct ParseError { - pub(crate) position: usize, + pub(crate) position: TextPosition, pub(crate) expected: BTreeSet, } impl ParseError { - pub fn position(&self) -> usize { - return self.position; + pub fn position(&self) -> &TextPosition { + return &self.position; } pub fn expected(&self) -> &BTreeSet { diff --git a/crates/solidity/outputs/cargo/crate/src/lib.rs b/crates/solidity/outputs/cargo/crate/src/lib.rs index f0c4250321..1d88bb9330 100644 --- a/crates/solidity/outputs/cargo/crate/src/lib.rs +++ b/crates/solidity/outputs/cargo/crate/src/lib.rs @@ -22,6 +22,7 @@ mod public_api { pub mod nodes { pub use crate::generated::cst::Node; pub use crate::generated::kinds::{RuleKind, TokenKind}; + pub use crate::generated::language::{TextPosition, TextRange}; } pub mod parser { diff --git a/crates/solidity/outputs/npm/crate/src/generated/cst.rs b/crates/solidity/outputs/npm/crate/src/generated/cst.rs index 4eb4c22de5..6ef45822d8 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/cst.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/cst.rs @@ -7,31 +7,32 @@ use std::rc::Rc; use serde::Serialize; use super::kinds::*; +use super::language::TextRange; #[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub enum Node { Rule { kind: RuleKind, - range: Range, + range: TextRange, children: Vec>, }, Token { kind: TokenKind, - range: Range, + range: TextRange, #[serde(skip_serializing_if = "Vec::is_empty")] trivia: Vec>, }, } impl Node { - pub fn range(&self) -> Range { + pub fn range(&self) -> TextRange { match self { Self::Rule { range, .. } => range.clone(), Self::Token { range, .. } => range.clone(), } } - pub fn range_including_trivia(&self) -> Range { + pub fn range_including_trivia(&self) -> TextRange { match self { Self::Rule { range, .. } => range.clone(), Self::Token { range, trivia, .. } => { @@ -78,7 +79,7 @@ impl Node { } } let range = if flattened_children.is_empty() { - Range { start: 0, end: 0 } + Default::default() } else { Range { start: flattened_children @@ -103,7 +104,7 @@ impl Node { #[allow(dead_code)] pub(crate) fn token( kind: TokenKind, - range: Range, + range: TextRange, leading_trivia: Option>, trailing_trivia: Option>, ) -> Rc { diff --git a/crates/solidity/outputs/npm/crate/src/generated/cst_types.rs b/crates/solidity/outputs/npm/crate/src/generated/cst_types.rs index b498fa2a65..16453b8913 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/cst_types.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/cst_types.rs @@ -35,16 +35,28 @@ impl RuleNode { } } - #[napi(getter)] - pub fn range(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range(&self) -> [usize; 2] { let range = self.0.range(); - [range.start, range.end] + [range.start.byte, range.end.byte] } - #[napi(getter)] - pub fn range_including_trivia(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range(&self) -> [usize; 2] { + let range = self.0.range(); + [range.start.char, range.end.char] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range_including_trivia(&self) -> [usize; 2] { + let range = self.0.range_including_trivia(); + [range.start.byte, range.end.byte] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range_including_trivia(&self) -> [usize; 2] { let range = self.0.range_including_trivia(); - [range.start, range.end] + [range.start.char, range.end.char] } #[napi(ts_return_type = "(RuleNode | TokenNode)[]")] @@ -71,16 +83,28 @@ impl TokenNode { } } - #[napi(getter)] - pub fn range(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range(&self) -> [usize; 2] { let range = self.0.range(); - [range.start, range.end] + [range.start.byte, range.end.byte] } - #[napi(getter)] - pub fn range_including_trivia(&self) -> [usize; 2] { + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range(&self) -> [usize; 2] { + let range = self.0.range(); + [range.start.char, range.end.char] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn byte_range_including_trivia(&self) -> [usize; 2] { + let range = self.0.range_including_trivia(); + [range.start.byte, range.end.byte] + } + + #[napi(getter, ts_return_type = "[ start: number, end: number ]")] + pub fn char_range_including_trivia(&self) -> [usize; 2] { let range = self.0.range_including_trivia(); - [range.start, range.end] + [range.start.char, range.end.char] } #[napi(ts_return_type = "(RuleNode | TokenNode)[]")] diff --git a/crates/solidity/outputs/npm/crate/src/generated/language.rs b/crates/solidity/outputs/npm/crate/src/generated/language.rs index f177e4a306..18f085e004 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/language.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/language.rs @@ -1,9 +1,11 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +use std::fmt::Display; pub use std::{collections::BTreeSet, ops::Range, rc::Rc}; #[allow(deprecated, unused_imports)] use semver::Version; +use serde::Serialize; pub use super::{ cst, @@ -14,7 +16,7 @@ pub use super::{ const DEBUG_ERROR_MERGING: bool = false; impl ParseError { - pub(crate) fn new>(position: usize, expected: T) -> Self { + pub(crate) fn new>(position: TextPosition, expected: T) -> Self { Self { position, expected: BTreeSet::from([expected.into()]), @@ -73,10 +75,36 @@ pub enum ParserResult { }, } +#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Serialize)] +pub struct TextPosition { + pub byte: usize, + pub char: usize, +} + +pub type TextRange = Range; + +impl PartialOrd for TextPosition { + fn partial_cmp(&self, other: &Self) -> Option { + self.char.partial_cmp(&other.char) + } +} + +impl Ord for TextPosition { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.byte.cmp(&other.byte) + } +} + +impl Display for TextPosition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.char.fmt(f) + } +} + pub struct Stream<'s> { source: &'s str, - position: usize, - undo_position: usize, + position: TextPosition, + undo_position: TextPosition, has_undo: bool, } @@ -84,30 +112,31 @@ impl<'s> Stream<'s> { pub fn new(source: &'s str) -> Self { Self { source, - position: 0, - undo_position: 0, + position: Default::default(), + undo_position: Default::default(), has_undo: false, } } - pub fn position(&self) -> usize { + pub fn position(&self) -> TextPosition { self.position } - pub fn set_position(&mut self, position: usize) { + pub fn set_position(&mut self, position: TextPosition) { self.position = position; } pub fn peek(&self) -> Option { - self.source[self.position..].chars().next() + self.source[self.position.byte..].chars().next() } pub fn next(&mut self) -> Option { self.has_undo = true; self.undo_position = self.position; - let mut chars = self.source[self.position..].chars(); + let mut chars = self.source[self.position.byte..].chars(); if let Some(c) = chars.next() { - self.position += c.len_utf8(); + self.position.byte += c.len_utf8(); + self.position.char += 1; Some(c) } else { None @@ -148,22 +177,26 @@ pub(crate) fn render_error_report( ); if DEBUG_ERROR_MERGING { - format!("{position}: {message}", position = error.position) + format!("{position}: {message}", position = source_start.char) } else { message } }; if source.is_empty() { - return format!("{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]"); + return format!( + "{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]", + source_start = source_start.char, + source_end = source_end.char + ); } - let mut builder = Report::build(kind, source_id, source_start) + let mut builder = Report::build(kind, source_id, source_start.byte) .with_config(Config::default().with_color(with_color)) .with_message(message); builder.add_label( - Label::new((source_id, source_start..source_end)) + Label::new((source_id, source_start.char..source_end.char)) .with_color(color) .with_message("Error occurred here.".to_string()), ); @@ -198,7 +231,7 @@ where parse_tree: Some(cst::Node::token( kind, Range { - start: 0, + start: Default::default(), end: stream.position(), }, None, @@ -233,7 +266,7 @@ where parse_tree: Some(cst::Node::token( kind, Range { - start: 0, + start: Default::default(), end: stream.position(), }, None, diff --git a/crates/solidity/outputs/npm/crate/src/generated/parser_output.rs b/crates/solidity/outputs/npm/crate/src/generated/parser_output.rs index 08af57224c..9660908a73 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/parser_output.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/parser_output.rs @@ -3,7 +3,9 @@ use std::{collections::BTreeSet, rc::Rc}; use super::{ - cst, cst_types::RcNodeExtensions as CSTRcNodeExtensions, language::render_error_report, + cst, + cst_types::RcNodeExtensions as CSTRcNodeExtensions, + language::{render_error_report, TextPosition}, }; use napi::bindgen_prelude::*; @@ -34,15 +36,20 @@ impl ParseOutput { #[napi] #[derive(PartialEq, Clone)] pub struct ParseError { - pub(crate) position: usize, + pub(crate) position: TextPosition, pub(crate) expected: BTreeSet, } #[napi] impl ParseError { #[napi(getter)] - pub fn position(&self) -> usize { - return self.position; + pub fn byte_position(&self) -> usize { + return self.position.byte; + } + + #[napi(getter)] + pub fn char_position(&self) -> usize { + return self.position.char; } #[napi] diff --git a/crates/solidity/outputs/npm/package/src/generated/index.d.ts b/crates/solidity/outputs/npm/package/src/generated/index.d.ts index c535bb6277..a50b13485f 100644 --- a/crates/solidity/outputs/npm/package/src/generated/index.d.ts +++ b/crates/solidity/outputs/npm/package/src/generated/index.d.ts @@ -622,15 +622,19 @@ export enum ProductionKind { export class RuleNode { get type(): NodeType.Rule; get kind(): RuleKind; - get range(): bigint[]; - get rangeIncludingTrivia(): bigint[]; + get byteRange(): [start: number, end: number]; + get charRange(): [start: number, end: number]; + get byteRangeIncludingTrivia(): [start: number, end: number]; + get charRangeIncludingTrivia(): [start: number, end: number]; children(): (RuleNode | TokenNode)[]; } export class TokenNode { get type(): NodeType.Token; get kind(): TokenKind; - get range(): bigint[]; - get rangeIncludingTrivia(): bigint[]; + get byteRange(): [start: number, end: number]; + get charRange(): [start: number, end: number]; + get byteRangeIncludingTrivia(): [start: number, end: number]; + get charRangeIncludingTrivia(): [start: number, end: number]; trivia(): (RuleNode | TokenNode)[]; } export class Language { @@ -644,7 +648,8 @@ export class ParseOutput { isValid(): boolean; } export class ParseError { - get position(): bigint; + get bytePosition(): bigint; + get charPosition(): bigint; expected(): Array; toErrorReport(sourceId: string, source: string, withColour: boolean): string; } diff --git a/crates/solidity/testing/utils/src/cst_snapshots/mod.rs b/crates/solidity/testing/utils/src/cst_snapshots/mod.rs index 0c26280725..ad9d731d5f 100644 --- a/crates/solidity/testing/utils/src/cst_snapshots/mod.rs +++ b/crates/solidity/testing/utils/src/cst_snapshots/mod.rs @@ -115,9 +115,18 @@ fn write_node( let (node_value, node_comment) = if let Some(range) = &node.range { let preview = node.render_preview(source, range)?; if node.children.is_empty() { - (format!(" {preview}"), format!("{range:?}")) + ( + format!(" {preview}"), + format!("{range:?}", range = range.start.byte..range.end.byte), + ) } else { - ("".to_owned(), format!("{range:?} {preview}")) + ( + "".to_owned(), + format!( + "{range:?} {preview}", + range = range.start.byte..range.end.byte + ), + ) } } else { (" \"\"".to_owned(), "".to_owned()) diff --git a/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs b/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs index 9061a2886b..474cabed49 100644 --- a/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs +++ b/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs @@ -1,7 +1,7 @@ -use std::{ops::Range, rc::Rc}; +use std::rc::Rc; use anyhow::Result; -use slang_solidity::syntax::nodes::{Node, RuleKind, TokenKind}; +use slang_solidity::syntax::nodes::{Node, RuleKind, TextRange, TokenKind}; #[derive(Debug)] pub enum TestNodeKind { @@ -13,7 +13,7 @@ pub enum TestNodeKind { pub struct TestNode { pub kind: TestNodeKind, - pub range: Option>, + pub range: Option, pub children: Vec, } @@ -47,8 +47,8 @@ impl TestNode { fn from_token( token_kind: &TokenKind, - token_range: &Range, - node_range: Range, + token_range: &TextRange, + node_range: TextRange, token_trivia: &Vec>, ) -> Self { let mut leading = vec![]; @@ -153,14 +153,14 @@ impl TestNode { }; } - pub fn render_preview(&self, source: &str, range: &Range) -> Result { + pub fn render_preview(&self, source: &str, range: &TextRange) -> Result { let max_length = 50; - let length = range.end - range.start; + let length = range.end.byte - range.start.byte; // Trim long values: let contents = source .bytes() - .skip(range.start) + .skip(range.start.byte) .take(length.clamp(0, max_length)) .collect(); diff --git a/crates/solidity/testing/utils/src/node_extensions/mod.rs b/crates/solidity/testing/utils/src/node_extensions/mod.rs index b7b99e88bc..7a3c513a4a 100644 --- a/crates/solidity/testing/utils/src/node_extensions/mod.rs +++ b/crates/solidity/testing/utils/src/node_extensions/mod.rs @@ -14,8 +14,8 @@ impl NodeExtensions for Node { let range = range; let result = source .bytes() - .skip(range.start) - .take(range.end - range.start) + .skip(range.start.byte) + .take(range.end.byte - range.start.byte) .collect(); return String::from_utf8(result).unwrap(); diff --git a/crates/solidity/testing/utils/src/version_pragmas/mod.rs b/crates/solidity/testing/utils/src/version_pragmas/mod.rs index 32b80de405..fe6fc5f211 100644 --- a/crates/solidity/testing/utils/src/version_pragmas/mod.rs +++ b/crates/solidity/testing/utils/src/version_pragmas/mod.rs @@ -1,13 +1,13 @@ #[cfg(test)] mod tests; -use std::{ops::Range, rc::Rc, str::FromStr}; +use std::{rc::Rc, str::FromStr}; use anyhow::{bail, Context, Error, Result}; use semver::{Comparator, Op, Version}; use slang_solidity::{ syntax::{ - nodes::{Node, RuleKind}, + nodes::{Node, RuleKind, TextRange}, parser::ProductionKind, visitors::{Visitable, Visitor, VisitorEntryResponse}, }, @@ -48,7 +48,7 @@ impl<'a> Visitor for PragmaCollector<'a> { fn enter_rule( &mut self, kind: RuleKind, - range: &Range, + range: &TextRange, children: &Vec>, node: &Rc, _path: &Vec>, @@ -61,6 +61,7 @@ impl<'a> Visitor for PragmaCollector<'a> { [child] => self.extract_pragma(child).with_context(|| { format!( "Failed to extract pragma at {range:?}: '{value}'", + range = range.start.byte..range.end.byte, value = child.extract_non_trivia(self.source) ) })?,