Skip to content

Commit

Permalink
Use both char and byte positions (#458)
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonyBlakey authored May 12, 2023
1 parent 6a05423 commit c0fc7e9
Show file tree
Hide file tree
Showing 22 changed files with 331 additions and 138 deletions.
5 changes: 5 additions & 0 deletions .changeset/famous-falcons-lie.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"changelog": minor
---

Record both character and byte offsets for input positions
2 changes: 1 addition & 1 deletion crates/codegen/syntax/src/rust_lib_code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl CodeGenerator {
let message = format!(\"ProductionKind {{production_kind}} is not valid in this version of {grammar_title}\");
ParseOutput {{
parse_tree: None,
errors: vec![ParseError::new(0, message)]
errors: vec![ParseError::new(Default::default(), message)]
}}
}})
}}
Expand Down
11 changes: 6 additions & 5 deletions crates/codegen/syntax_templates/src/rust/cst_visitor.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
use std::{ops::Range, rc::Rc};
use std::rc::Rc;

use super::cst::*;
use super::kinds::*;
use super::language::TextRange;

#[allow(unused_variables)]
pub trait Visitor<E> {
fn enter_rule(
&mut self,
kind: RuleKind,
range: &Range<usize>,
range: &TextRange,
children: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -19,7 +20,7 @@ pub trait Visitor<E> {
fn exit_rule(
&mut self,
kind: RuleKind,
range: &Range<usize>,
range: &TextRange,
children: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -30,7 +31,7 @@ pub trait Visitor<E> {
fn enter_token(
&mut self,
kind: TokenKind,
range: &Range<usize>,
range: &TextRange,
trivia: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -41,7 +42,7 @@ pub trait Visitor<E> {
fn exit_token(
&mut self,
kind: TokenKind,
range: &Range<usize>,
range: &TextRange,
trivia: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand Down
11 changes: 7 additions & 4 deletions crates/codegen/syntax_templates/src/rust/parser_output.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::{collections::BTreeSet, rc::Rc};

use super::{cst, language::render_error_report};
use super::{
cst,
language::{render_error_report, TextPosition},
};

#[derive(PartialEq)]
pub struct ParseOutput {
Expand All @@ -24,13 +27,13 @@ impl ParseOutput {

#[derive(PartialEq)]
pub struct ParseError {
pub(crate) position: usize,
pub(crate) position: TextPosition,
pub(crate) expected: BTreeSet<String>,
}

impl ParseError {
pub fn position(&self) -> usize {
return self.position;
pub fn position(&self) -> &TextPosition {
return &self.position;
}

pub fn expected(&self) -> &BTreeSet<String> {
Expand Down
13 changes: 7 additions & 6 deletions crates/codegen/syntax_templates/src/shared/cst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,32 @@ use std::rc::Rc;
use serde::Serialize;

use super::kinds::*;
use super::language::TextRange;

#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
pub enum Node {
Rule {
kind: RuleKind,
range: Range<usize>,
range: TextRange,
children: Vec<Rc<Node>>,
},
Token {
kind: TokenKind,
range: Range<usize>,
range: TextRange,
#[serde(skip_serializing_if = "Vec::is_empty")]
trivia: Vec<Rc<Node>>,
},
}

impl Node {
pub fn range(&self) -> Range<usize> {
pub fn range(&self) -> TextRange {
match self {
Self::Rule { range, .. } => range.clone(),
Self::Token { range, .. } => range.clone(),
}
}

pub fn range_including_trivia(&self) -> Range<usize> {
pub fn range_including_trivia(&self) -> TextRange {
match self {
Self::Rule { range, .. } => range.clone(),
Self::Token { range, trivia, .. } => {
Expand Down Expand Up @@ -76,7 +77,7 @@ impl Node {
}
}
let range = if flattened_children.is_empty() {
Range { start: 0, end: 0 }
Default::default()
} else {
Range {
start: flattened_children
Expand All @@ -101,7 +102,7 @@ impl Node {
#[allow(dead_code)]
pub(crate) fn token(
kind: TokenKind,
range: Range<usize>,
range: TextRange,
leading_trivia: Option<Rc<Self>>,
trailing_trivia: Option<Rc<Self>>,
) -> Rc<Self> {
Expand Down
65 changes: 49 additions & 16 deletions crates/codegen/syntax_templates/src/shared/language.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::fmt::Display;
pub use std::{collections::BTreeSet, ops::Range, rc::Rc};

#[allow(deprecated, unused_imports)]
use semver::Version;
use serde::Serialize;

pub use super::{
cst,
Expand All @@ -12,7 +14,7 @@ pub use super::{
const DEBUG_ERROR_MERGING: bool = false;

impl ParseError {
pub(crate) fn new<T: Into<String>>(position: usize, expected: T) -> Self {
pub(crate) fn new<T: Into<String>>(position: TextPosition, expected: T) -> Self {
Self {
position,
expected: BTreeSet::from([expected.into()]),
Expand Down Expand Up @@ -71,41 +73,68 @@ pub enum ParserResult {
},
}

#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Serialize)]
pub struct TextPosition {
pub byte: usize,
pub char: usize,
}

pub type TextRange = Range<TextPosition>;

impl PartialOrd for TextPosition {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.char.partial_cmp(&other.char)
}
}

impl Ord for TextPosition {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.byte.cmp(&other.byte)
}
}

impl Display for TextPosition {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.char.fmt(f)
}
}

pub struct Stream<'s> {
source: &'s str,
position: usize,
undo_position: usize,
position: TextPosition,
undo_position: TextPosition,
has_undo: bool,
}

impl<'s> Stream<'s> {
pub fn new(source: &'s str) -> Self {
Self {
source,
position: 0,
undo_position: 0,
position: Default::default(),
undo_position: Default::default(),
has_undo: false,
}
}

pub fn position(&self) -> usize {
pub fn position(&self) -> TextPosition {
self.position
}

pub fn set_position(&mut self, position: usize) {
pub fn set_position(&mut self, position: TextPosition) {
self.position = position;
}

pub fn peek(&self) -> Option<char> {
self.source[self.position..].chars().next()
self.source[self.position.byte..].chars().next()
}

pub fn next(&mut self) -> Option<char> {
self.has_undo = true;
self.undo_position = self.position;
let mut chars = self.source[self.position..].chars();
let mut chars = self.source[self.position.byte..].chars();
if let Some(c) = chars.next() {
self.position += c.len_utf8();
self.position.byte += c.len_utf8();
self.position.char += 1;
Some(c)
} else {
None
Expand Down Expand Up @@ -146,22 +175,26 @@ pub(crate) fn render_error_report(
);

if DEBUG_ERROR_MERGING {
format!("{position}: {message}", position = error.position)
format!("{position}: {message}", position = source_start.char)
} else {
message
}
};

if source.is_empty() {
return format!("{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]");
return format!(
"{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]",
source_start = source_start.char,
source_end = source_end.char
);
}

let mut builder = Report::build(kind, source_id, source_start)
let mut builder = Report::build(kind, source_id, source_start.byte)
.with_config(Config::default().with_color(with_color))
.with_message(message);

builder.add_label(
Label::new((source_id, source_start..source_end))
Label::new((source_id, source_start.char..source_end.char))
.with_color(color)
.with_message("Error occurred here.".to_string()),
);
Expand Down Expand Up @@ -196,7 +229,7 @@ where
parse_tree: Some(cst::Node::token(
kind,
Range {
start: 0,
start: Default::default(),
end: stream.position(),
},
None,
Expand Down Expand Up @@ -231,7 +264,7 @@ where
parse_tree: Some(cst::Node::token(
kind,
Range {
start: 0,
start: Default::default(),
end: stream.position(),
},
None,
Expand Down
48 changes: 36 additions & 12 deletions crates/codegen/syntax_templates/src/typescript/cst_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,28 @@ impl RuleNode {
}
}

#[napi(getter)]
pub fn range(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start, range.end]
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn range_including_trivia(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start.char, range.end.char]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start.byte, range.end.byte]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start, range.end]
[range.start.char, range.end.char]
}

#[napi(ts_return_type = "(RuleNode | TokenNode)[]")]
Expand All @@ -69,16 +81,28 @@ impl TokenNode {
}
}

#[napi(getter)]
pub fn range(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start, range.end]
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn range_including_trivia(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start.char, range.end.char]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start.byte, range.end.byte]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start, range.end]
[range.start.char, range.end.char]
}

#[napi(ts_return_type = "(RuleNode | TokenNode)[]")]
Expand Down
15 changes: 11 additions & 4 deletions crates/codegen/syntax_templates/src/typescript/parser_output.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::{collections::BTreeSet, rc::Rc};

use super::{
cst, cst_types::RcNodeExtensions as CSTRcNodeExtensions, language::render_error_report,
cst,
cst_types::RcNodeExtensions as CSTRcNodeExtensions,
language::{render_error_report, TextPosition},
};
use napi::bindgen_prelude::*;

Expand Down Expand Up @@ -32,15 +34,20 @@ impl ParseOutput {
#[napi]
#[derive(PartialEq, Clone)]
pub struct ParseError {
pub(crate) position: usize,
pub(crate) position: TextPosition,
pub(crate) expected: BTreeSet<String>,
}

#[napi]
impl ParseError {
#[napi(getter)]
pub fn position(&self) -> usize {
return self.position;
pub fn byte_position(&self) -> usize {
return self.position.byte;
}

#[napi(getter)]
pub fn char_position(&self) -> usize {
return self.position.char;
}

#[napi]
Expand Down
Loading

0 comments on commit c0fc7e9

Please sign in to comment.