Skip to content

Commit

Permalink
fix: unknown token & unterminated string should be recognized by lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
caelansar committed Sep 7, 2024
1 parent 32958b4 commit 9983f9b
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 15 deletions.
3 changes: 2 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ impl fmt::Display for Assign {
}
}
}

/// Represents different types of expressions in the abstract syntax tree.
#[derive(PartialEq, Clone, Debug)]
pub enum Expression {
Ident(Ident),
Expand Down Expand Up @@ -358,6 +358,7 @@ impl Display for Literal {
}
}

/// Represents different types of statements in the abstract syntax tree.
#[derive(PartialEq, Clone, Debug)]
pub enum Statement {
Let(Ident, Expression),
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/scope.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::{code, eval::object};

/// CompilationScope is used to manage the state of the compiler during
/// the compilation of a single block or expression.
#[derive(Debug, Default, Clone)]
pub(super) struct CompilationScope {
pub(super) instructions: object::Instructions,
Expand Down
64 changes: 55 additions & 9 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::token::{lookup_ident, Token};
use crate::token::{lookup_ident, Token, TokenError};
use std::str::FromStr;

#[derive(Default)]
Expand Down Expand Up @@ -73,15 +73,15 @@ impl<'a> Lexer<'a> {
self.input[pos..self.pos].to_string()
}

fn read_string(&mut self) -> String {
fn read_string(&mut self) -> Option<String> {
let pos = self.pos + 1;
loop {
self.read_char();
if self.ch.is_none() || self.ch.is_some_and(|x| x == '"') {
if self.ch? == '"' {
break;
}
}
self.input[pos..self.pos].to_string()
Some(self.input[pos..self.pos].to_string())
}

#[inline(always)]
Expand Down Expand Up @@ -144,7 +144,13 @@ impl<'a> Lexer<'a> {
}
_ => Token::Lt,
},
'"' => Token::String(self.read_string()),
'"' => {
if let Some(s) = self.read_string() {
Token::String(s)
} else {
Token::Illegal(TokenError::UnterminatedString)
}
}
'+' => {
if let Some('=') = self.peek_char() {
self.read_char();
Expand Down Expand Up @@ -228,7 +234,7 @@ impl<'a> Lexer<'a> {
',' | ';' | ':' | '(' | ')' | '{' | '}' | '[' | ']' => {
Token::from_str(token.to_string().as_str()).unwrap()
}
_ => {
other => {
if token.is_alphabetic() || token == '_' {
let literal = self.read_identifier();
let typ = lookup_ident(literal);
Expand All @@ -246,7 +252,7 @@ impl<'a> Lexer<'a> {
return Token::Int(value);
}
} else {
return Token::Illegal;
Token::Illegal(crate::token::TokenError::UnknowToken(other))
}
}
}
Expand Down Expand Up @@ -287,9 +293,49 @@ impl<'a, 'b> Iterator for Iter<'a, 'b> {

#[cfg(test)]
mod test {
use crate::token::Token;
use super::*;

#[test]
fn test_float_issue() {
// this test should failed
let input = "1.2.3.";

let mut lexer = Lexer::new(input);

let mut tokens = Vec::new();
loop {
let token = lexer.next_token();
tokens.push(token.clone());
if token == Token::Eof {
break;
}
}

assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], Token::Float(1.2));
assert_eq!(tokens[1], Token::Illegal(TokenError::UnknowToken('.')));
assert_eq!(tokens[2], Token::Float(3.));
assert_eq!(tokens[3], Token::Eof);
}

#[test]
fn unterminated_string_should_failed() {
let input = r#""This string is not terminated"#;
let mut lexer = Lexer::new(input);

use super::Lexer;
let mut tokens = Vec::new();
loop {
let token = lexer.next_token();
tokens.push(token.clone());
if token == Token::Eof {
break;
}
}

assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], Token::Illegal(TokenError::UnterminatedString));
assert_eq!(tokens[1], Token::Eof);
}

#[test]
fn next_token_should_work() {
Expand Down
17 changes: 14 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ impl<'a> Parser<'a> {
}

fn is_illegal_token(&mut self) -> bool {
if self.current_token_is(&token::Token::Illegal) {
self.errors.push("illegal token".to_string());
if let token::Token::Illegal(e) = &self.current_token {
self.errors.push(e.to_string());
true
} else {
false
Expand Down Expand Up @@ -657,9 +657,10 @@ impl<'a> Parser<'a> {

pub fn parse_program(&mut self) -> Result<ast::Program, String> {
let mut stmts = Vec::new();

while self.current_token != token::Token::Eof {
if self.is_illegal_token() {
return Ok(ast::BlockStatement(stmts));
return Err(self.errors().join("\n"));
}
let stmt = self.parse_statement();
if let Some(stmt) = stmt {
Expand Down Expand Up @@ -821,6 +822,16 @@ mod test {
assert!(errs.is_empty())
}

#[test]
#[should_panic]
fn illegal_float_should_failed() {
let input = "1.2.3.";
let lexer = lexer::Lexer::new(input);
let mut parser = Parser::new(lexer);

parser.parse_program().unwrap();
}

#[test]
fn assign_statement_should_work() {
let input = r#"
Expand Down
19 changes: 17 additions & 2 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
use std::{fmt, mem, str::FromStr};

#[derive(Debug, Clone)]
pub enum TokenError {
UnknowToken(char),
UnterminatedString,
}

impl fmt::Display for TokenError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::UnknowToken(token) => write!(f, "Unknown token: '{}'", token),
Self::UnterminatedString => write!(f, "Unterminated string"),
}
}
}

#[derive(Debug, Clone)]
pub enum Token {
Illegal,
Illegal(TokenError),
Eof,

Ident(String),
Expand Down Expand Up @@ -164,7 +179,7 @@ impl fmt::Display for Token {
Token::Else => write!(f, "else"),
Token::For => write!(f, "for"),
Token::Return => write!(f, "return"),
Token::Illegal => write!(f, "ILLEGAL"),
Token::Illegal(e) => write!(f, "{e}"),
Token::Break => write!(f, "break"),
Token::Continue => write!(f, "continue"),
Token::And => write!(f, "&&"),
Expand Down

0 comments on commit 9983f9b

Please sign in to comment.