Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use both char and byte positions #458

Merged
merged 1 commit into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/famous-falcons-lie.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"changelog": minor
---

Record both character and byte offsets for input positions
2 changes: 1 addition & 1 deletion crates/codegen/syntax/src/rust_lib_code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl CodeGenerator {
let message = format!(\"ProductionKind {{production_kind}} is not valid in this version of {grammar_title}\");
ParseOutput {{
parse_tree: None,
errors: vec![ParseError::new(0, message)]
errors: vec![ParseError::new(Default::default(), message)]
}}
}})
}}
Expand Down
11 changes: 6 additions & 5 deletions crates/codegen/syntax_templates/src/rust/cst_visitor.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
use std::{ops::Range, rc::Rc};
use std::rc::Rc;

use super::cst::*;
use super::kinds::*;
use super::language::TextRange;

#[allow(unused_variables)]
pub trait Visitor<E> {
fn enter_rule(
&mut self,
kind: RuleKind,
range: &Range<usize>,
range: &TextRange,
children: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -19,7 +20,7 @@ pub trait Visitor<E> {
fn exit_rule(
&mut self,
kind: RuleKind,
range: &Range<usize>,
range: &TextRange,
children: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -30,7 +31,7 @@ pub trait Visitor<E> {
fn enter_token(
&mut self,
kind: TokenKind,
range: &Range<usize>,
range: &TextRange,
trivia: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -41,7 +42,7 @@ pub trait Visitor<E> {
fn exit_token(
&mut self,
kind: TokenKind,
range: &Range<usize>,
range: &TextRange,
trivia: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand Down
11 changes: 7 additions & 4 deletions crates/codegen/syntax_templates/src/rust/parser_output.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::{collections::BTreeSet, rc::Rc};

use super::{cst, language::render_error_report};
use super::{
cst,
language::{render_error_report, TextPosition},
};

#[derive(PartialEq)]
pub struct ParseOutput {
Expand All @@ -24,13 +27,13 @@ impl ParseOutput {

#[derive(PartialEq)]
pub struct ParseError {
pub(crate) position: usize,
pub(crate) position: TextPosition,
pub(crate) expected: BTreeSet<String>,
}

impl ParseError {
pub fn position(&self) -> usize {
return self.position;
pub fn position(&self) -> &TextPosition {
return &self.position;
}

pub fn expected(&self) -> &BTreeSet<String> {
Expand Down
13 changes: 7 additions & 6 deletions crates/codegen/syntax_templates/src/shared/cst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,32 @@ use std::rc::Rc;
use serde::Serialize;

use super::kinds::*;
use super::language::TextRange;

#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
pub enum Node {
Rule {
kind: RuleKind,
range: Range<usize>,
range: TextRange,
children: Vec<Rc<Node>>,
},
Token {
kind: TokenKind,
range: Range<usize>,
range: TextRange,
#[serde(skip_serializing_if = "Vec::is_empty")]
trivia: Vec<Rc<Node>>,
},
}

impl Node {
pub fn range(&self) -> Range<usize> {
pub fn range(&self) -> TextRange {
match self {
Self::Rule { range, .. } => range.clone(),
Self::Token { range, .. } => range.clone(),
}
}

pub fn range_including_trivia(&self) -> Range<usize> {
pub fn range_including_trivia(&self) -> TextRange {
match self {
Self::Rule { range, .. } => range.clone(),
Self::Token { range, trivia, .. } => {
Expand Down Expand Up @@ -76,7 +77,7 @@ impl Node {
}
}
let range = if flattened_children.is_empty() {
Range { start: 0, end: 0 }
Default::default()
} else {
Range {
start: flattened_children
Expand All @@ -101,7 +102,7 @@ impl Node {
#[allow(dead_code)]
pub(crate) fn token(
kind: TokenKind,
range: Range<usize>,
range: TextRange,
leading_trivia: Option<Rc<Self>>,
trailing_trivia: Option<Rc<Self>>,
) -> Rc<Self> {
Expand Down
65 changes: 49 additions & 16 deletions crates/codegen/syntax_templates/src/shared/language.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::fmt::Display;
pub use std::{collections::BTreeSet, ops::Range, rc::Rc};

#[allow(deprecated, unused_imports)]
use semver::Version;
use serde::Serialize;

pub use super::{
cst,
Expand All @@ -12,7 +14,7 @@ pub use super::{
const DEBUG_ERROR_MERGING: bool = false;

impl ParseError {
pub(crate) fn new<T: Into<String>>(position: usize, expected: T) -> Self {
pub(crate) fn new<T: Into<String>>(position: TextPosition, expected: T) -> Self {
Self {
position,
expected: BTreeSet::from([expected.into()]),
Expand Down Expand Up @@ -71,41 +73,68 @@ pub enum ParserResult {
},
}

#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Serialize)]
pub struct TextPosition {
pub byte: usize,
pub char: usize,
}

pub type TextRange = Range<TextPosition>;

impl PartialOrd for TextPosition {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.char.partial_cmp(&other.char)
}
}

impl Ord for TextPosition {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.byte.cmp(&other.byte)
}
}

impl Display for TextPosition {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.char.fmt(f)
}
}

pub struct Stream<'s> {
source: &'s str,
position: usize,
undo_position: usize,
position: TextPosition,
undo_position: TextPosition,
has_undo: bool,
}

impl<'s> Stream<'s> {
pub fn new(source: &'s str) -> Self {
Self {
source,
position: 0,
undo_position: 0,
position: Default::default(),
undo_position: Default::default(),
has_undo: false,
}
}

pub fn position(&self) -> usize {
pub fn position(&self) -> TextPosition {
self.position
}

pub fn set_position(&mut self, position: usize) {
pub fn set_position(&mut self, position: TextPosition) {
self.position = position;
}

pub fn peek(&self) -> Option<char> {
self.source[self.position..].chars().next()
self.source[self.position.byte..].chars().next()
}

pub fn next(&mut self) -> Option<char> {
self.has_undo = true;
self.undo_position = self.position;
let mut chars = self.source[self.position..].chars();
let mut chars = self.source[self.position.byte..].chars();
if let Some(c) = chars.next() {
self.position += c.len_utf8();
self.position.byte += c.len_utf8();
self.position.char += 1;
Some(c)
} else {
None
Expand Down Expand Up @@ -146,22 +175,26 @@ pub(crate) fn render_error_report(
);

if DEBUG_ERROR_MERGING {
format!("{position}: {message}", position = error.position)
format!("{position}: {message}", position = source_start.char)
} else {
message
}
};

if source.is_empty() {
return format!("{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]");
return format!(
"{kind}: {message}\n ─[{source_id}:{source_start}:{source_end}]",
source_start = source_start.char,
source_end = source_end.char
);
}

let mut builder = Report::build(kind, source_id, source_start)
let mut builder = Report::build(kind, source_id, source_start.byte)
.with_config(Config::default().with_color(with_color))
.with_message(message);

builder.add_label(
Label::new((source_id, source_start..source_end))
Label::new((source_id, source_start.char..source_end.char))
.with_color(color)
.with_message("Error occurred here.".to_string()),
);
Expand Down Expand Up @@ -196,7 +229,7 @@ where
parse_tree: Some(cst::Node::token(
kind,
Range {
start: 0,
start: Default::default(),
end: stream.position(),
},
None,
Expand Down Expand Up @@ -231,7 +264,7 @@ where
parse_tree: Some(cst::Node::token(
kind,
Range {
start: 0,
start: Default::default(),
end: stream.position(),
},
None,
Expand Down
48 changes: 36 additions & 12 deletions crates/codegen/syntax_templates/src/typescript/cst_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,28 @@ impl RuleNode {
}
}

#[napi(getter)]
pub fn range(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start, range.end]
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn range_including_trivia(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start.char, range.end.char]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range_including_trivia(&self) -> [usize; 2] {
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
let range = self.0.range_including_trivia();
[range.start.byte, range.end.byte]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start, range.end]
[range.start.char, range.end.char]
}

#[napi(ts_return_type = "(RuleNode | TokenNode)[]")]
Expand All @@ -69,16 +81,28 @@ impl TokenNode {
}
}

#[napi(getter)]
pub fn range(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start, range.end]
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn range_including_trivia(&self) -> [usize; 2] {
#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start.char, range.end.char]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn byte_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start.byte, range.end.byte]
}

#[napi(getter, ts_return_type = "[ start: number, end: number ]")]
pub fn char_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start, range.end]
[range.start.char, range.end.char]
}

#[napi(ts_return_type = "(RuleNode | TokenNode)[]")]
Expand Down
15 changes: 11 additions & 4 deletions crates/codegen/syntax_templates/src/typescript/parser_output.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::{collections::BTreeSet, rc::Rc};

use super::{
cst, cst_types::RcNodeExtensions as CSTRcNodeExtensions, language::render_error_report,
cst,
cst_types::RcNodeExtensions as CSTRcNodeExtensions,
language::{render_error_report, TextPosition},
};
use napi::bindgen_prelude::*;

Expand Down Expand Up @@ -32,15 +34,20 @@ impl ParseOutput {
#[napi]
#[derive(PartialEq, Clone)]
pub struct ParseError {
pub(crate) position: usize,
pub(crate) position: TextPosition,
pub(crate) expected: BTreeSet<String>,
}

#[napi]
impl ParseError {
#[napi(getter)]
pub fn position(&self) -> usize {
return self.position;
pub fn byte_position(&self) -> usize {
return self.position.byte;
}

#[napi(getter)]
pub fn char_position(&self) -> usize {
return self.position.char;
}

#[napi]
Expand Down
Loading