Skip to content

Commit

Permalink
Use both char and byte positions
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonyBlakey committed May 11, 2023
1 parent b7aae2a commit ca821a6
Show file tree
Hide file tree
Showing 25 changed files with 310 additions and 135 deletions.
5 changes: 5 additions & 0 deletions .changeset/famous-falcons-lie.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"changelog": minor
---

Record both character and byte offsets for input positions
2 changes: 1 addition & 1 deletion crates/codegen/syntax/src/rust_lib_code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl CodeGenerator {
let message = format!(\"ProductionKind {{production_kind}} is not valid in this version of {grammar_title}\");
ParseOutput {{
parse_tree: None,
errors: vec![ParseError::new(0, message)]
errors: vec![ParseError::new(Default::default(), message)]
}}
}})
}}
Expand Down
9 changes: 5 additions & 4 deletions crates/codegen/syntax_templates/src/rust/cst_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ use std::{ops::Range, rc::Rc};

use super::cst::*;
use super::kinds::*;
use super::language::InputPosition;

#[allow(unused_variables)]
pub trait Visitor<E> {
fn enter_rule(
&mut self,
kind: RuleKind,
range: &Range<usize>,
range: &Range<InputPosition>,
children: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -19,7 +20,7 @@ pub trait Visitor<E> {
fn exit_rule(
&mut self,
kind: RuleKind,
range: &Range<usize>,
range: &Range<InputPosition>,
children: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -30,7 +31,7 @@ pub trait Visitor<E> {
fn enter_token(
&mut self,
kind: TokenKind,
range: &Range<usize>,
range: &Range<InputPosition>,
trivia: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand All @@ -41,7 +42,7 @@ pub trait Visitor<E> {
fn exit_token(
&mut self,
kind: TokenKind,
range: &Range<usize>,
range: &Range<InputPosition>,
trivia: &Vec<Rc<Node>>,
node: &Rc<Node>,
path: &Vec<Rc<Node>>,
Expand Down
11 changes: 7 additions & 4 deletions crates/codegen/syntax_templates/src/rust/parser_output.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::{collections::BTreeSet, rc::Rc};

use super::{cst, language::render_error_report};
use super::{
cst,
language::{render_error_report, InputPosition},
};

#[derive(PartialEq)]
pub struct ParseOutput {
Expand All @@ -24,13 +27,13 @@ impl ParseOutput {

#[derive(PartialEq)]
pub struct ParseError {
pub(crate) position: usize,
pub(crate) position: InputPosition,
pub(crate) expected: BTreeSet<String>,
}

impl ParseError {
pub fn position(&self) -> usize {
return self.position;
pub fn position(&self) -> &InputPosition {
return &self.position;
}

pub fn expected(&self) -> &BTreeSet<String> {
Expand Down
13 changes: 7 additions & 6 deletions crates/codegen/syntax_templates/src/shared/cst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,32 @@ use std::rc::Rc;
use serde::Serialize;

use super::kinds::*;
use super::language::InputPosition;

#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
pub enum Node {
Rule {
kind: RuleKind,
range: Range<usize>,
range: Range<InputPosition>,
children: Vec<Rc<Node>>,
},
Token {
kind: TokenKind,
range: Range<usize>,
range: Range<InputPosition>,
#[serde(skip_serializing_if = "Vec::is_empty")]
trivia: Vec<Rc<Node>>,
},
}

impl Node {
pub fn range(&self) -> Range<usize> {
pub fn range(&self) -> Range<InputPosition> {
match self {
Self::Rule { range, .. } => range.clone(),
Self::Token { range, .. } => range.clone(),
}
}

pub fn range_including_trivia(&self) -> Range<usize> {
pub fn range_including_trivia(&self) -> Range<InputPosition> {
match self {
Self::Rule { range, .. } => range.clone(),
Self::Token { range, trivia, .. } => {
Expand Down Expand Up @@ -76,7 +77,7 @@ impl Node {
}
}
let range = if flattened_children.is_empty() {
Range { start: 0, end: 0 }
Default::default()
} else {
Range {
start: flattened_children
Expand All @@ -101,7 +102,7 @@ impl Node {
#[allow(dead_code)]
pub(crate) fn token(
kind: TokenKind,
range: Range<usize>,
range: Range<InputPosition>,
leading_trivia: Option<Rc<Self>>,
trailing_trivia: Option<Rc<Self>>,
) -> Rc<Self> {
Expand Down
57 changes: 42 additions & 15 deletions crates/codegen/syntax_templates/src/shared/language.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::fmt::Display;
pub use std::{collections::BTreeSet, ops::Range, rc::Rc};

#[allow(deprecated, unused_imports)]
use semver::Version;
use serde::Serialize;

pub use super::{
cst,
Expand All @@ -12,7 +14,7 @@ pub use super::{
const DEBUG_ERROR_MERGING: bool = false;

impl ParseError {
pub(crate) fn new<T: Into<String>>(position: usize, expected: T) -> Self {
pub(crate) fn new<T: Into<String>>(position: InputPosition, expected: T) -> Self {
Self {
position,
expected: BTreeSet::from([expected.into()]),
Expand Down Expand Up @@ -71,41 +73,66 @@ pub enum ParserResult {
},
}

#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Serialize)]
pub struct InputPosition {
pub byte: usize,
pub char: usize,
}

impl PartialOrd for InputPosition {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.byte.partial_cmp(&other.byte)
}
}

impl Ord for InputPosition {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.byte.cmp(&other.byte)
}
}

impl Display for InputPosition {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.char.fmt(f)
}
}

pub struct Stream<'s> {
source: &'s str,
position: usize,
undo_position: usize,
position: InputPosition,
undo_position: InputPosition,
has_undo: bool,
}

impl<'s> Stream<'s> {
pub fn new(source: &'s str) -> Self {
Self {
source,
position: 0,
undo_position: 0,
position: Default::default(),
undo_position: Default::default(),
has_undo: false,
}
}

pub fn position(&self) -> usize {
pub fn position(&self) -> InputPosition {
self.position
}

pub fn set_position(&mut self, position: usize) {
pub fn set_position(&mut self, position: InputPosition) {
self.position = position;
}

pub fn peek(&self) -> Option<char> {
self.source[self.position..].chars().next()
self.source[self.position.byte..].chars().next()
}

pub fn next(&mut self) -> Option<char> {
self.has_undo = true;
self.undo_position = self.position;
let mut chars = self.source[self.position..].chars();
let mut chars = self.source[self.position.byte..].chars();
if let Some(c) = chars.next() {
self.position += c.len_utf8();
self.position.byte += c.len_utf8();
self.position.char += 1;
Some(c)
} else {
None
Expand All @@ -131,8 +158,8 @@ pub(crate) fn render_error_report(

let kind = ReportKind::Error;
let color = if with_color { Color::Red } else { Color::Unset };
let source_start = error.position;
let source_end = error.position;
let source_start = error.position.char;
let source_end = error.position.char;

let message = {
let message = format!(
Expand All @@ -146,7 +173,7 @@ pub(crate) fn render_error_report(
);

if DEBUG_ERROR_MERGING {
format!("{position}: {message}", position = error.position)
format!("{position}: {message}", position = error.position.char)
} else {
message
}
Expand Down Expand Up @@ -196,7 +223,7 @@ where
parse_tree: Some(cst::Node::token(
kind,
Range {
start: 0,
start: Default::default(),
end: stream.position(),
},
None,
Expand Down Expand Up @@ -231,7 +258,7 @@ where
parse_tree: Some(cst::Node::token(
kind,
Range {
start: 0,
start: Default::default(),
end: stream.position(),
},
None,
Expand Down
40 changes: 32 additions & 8 deletions crates/codegen/syntax_templates/src/typescript/cst_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,27 @@ impl RuleNode {
}

#[napi(getter)]
pub fn range(&self) -> [usize; 2] {
pub fn byte_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start, range.end]
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn range_including_trivia(&self) -> [usize; 2] {
pub fn char_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start.char, range.end.char]
}

#[napi(getter)]
pub fn byte_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn char_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start, range.end]
[range.start.char, range.end.char]
}

#[napi(ts_return_type = "(RuleNode | TokenNode)[]")]
Expand Down Expand Up @@ -70,15 +82,27 @@ impl TokenNode {
}

#[napi(getter)]
pub fn range(&self) -> [usize; 2] {
pub fn byte_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start, range.end]
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn char_range(&self) -> [usize; 2] {
let range = self.0.range();
[range.start.char, range.end.char]
}

#[napi(getter)]
pub fn byte_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start.byte, range.end.byte]
}

#[napi(getter)]
pub fn range_including_trivia(&self) -> [usize; 2] {
pub fn char_range_including_trivia(&self) -> [usize; 2] {
let range = self.0.range_including_trivia();
[range.start, range.end]
[range.start.char, range.end.char]
}

#[napi(ts_return_type = "(RuleNode | TokenNode)[]")]
Expand Down
15 changes: 11 additions & 4 deletions crates/codegen/syntax_templates/src/typescript/parser_output.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::{collections::BTreeSet, rc::Rc};

use super::{
cst, cst_types::RcNodeExtensions as CSTRcNodeExtensions, language::render_error_report,
cst,
cst_types::RcNodeExtensions as CSTRcNodeExtensions,
language::{render_error_report, InputPosition},
};
use napi::bindgen_prelude::*;

Expand Down Expand Up @@ -32,15 +34,20 @@ impl ParseOutput {
#[napi]
#[derive(PartialEq, Clone)]
pub struct ParseError {
pub(crate) position: usize,
pub(crate) position: InputPosition,
pub(crate) expected: BTreeSet<String>,
}

#[napi]
impl ParseError {
#[napi(getter)]
pub fn position(&self) -> usize {
return self.position;
pub fn byte_position(&self) -> [usize; 2] {
return [self.position.byte, self.position.byte];
}

#[napi(getter)]
pub fn char_position(&self) -> [usize; 2] {
return [self.position.char, self.position.char];
}

#[napi]
Expand Down
Loading

0 comments on commit ca821a6

Please sign in to comment.