From e4e4b18eff45cc627a65dcc34558e68fa293126e Mon Sep 17 00:00:00 2001 From: Peefy Date: Wed, 16 Aug 2023 10:37:00 +0800 Subject: [PATCH] feat: support format tool error recovery and error indent/dedent code format (#661) feat: support format tool error recovery and error indent/dedent code format. --- kclvm/api/src/service/service_impl.rs | 11 +++- kclvm/ast/src/token.rs | 6 ++ kclvm/cmd/src/fmt.rs | 1 + kclvm/parser/src/lexer/indent.rs | 9 ++- kclvm/parser/src/lib.rs | 34 +++++++++++ kclvm/parser/src/parser/expr.rs | 17 ++---- kclvm/tools/src/format/mod.rs | 18 ++++-- kclvm/tools/src/format/tests.rs | 83 ++++++++++++++++++++++++++- 8 files changed, 160 insertions(+), 19 deletions(-) diff --git a/kclvm/api/src/service/service_impl.rs b/kclvm/api/src/service/service_impl.rs index 572a77032..9f01b31cc 100644 --- a/kclvm/api/src/service/service_impl.rs +++ b/kclvm/api/src/service/service_impl.rs @@ -278,7 +278,15 @@ impl KclvmServiceImpl { /// assert_eq!(result.formatted, source.as_bytes().to_vec()); /// ``` pub fn format_code(&self, args: &FormatCodeArgs) -> anyhow::Result { - let (formatted, _) = format_source("", &args.source)?; + let (formatted, _) = format_source( + "", + &args.source, + &FormatOptions { + is_stdout: false, + recursively: false, + omit_errors: true, + }, + )?; Ok(FormatCodeResult { formatted: formatted.as_bytes().to_vec(), }) @@ -313,6 +321,7 @@ impl KclvmServiceImpl { &FormatOptions { recursively, is_stdout: false, + omit_errors: true, }, )?; Ok(FormatPathResult { changed_paths }) diff --git a/kclvm/ast/src/token.rs b/kclvm/ast/src/token.rs index 59b92eb2a..177416e33 100644 --- a/kclvm/ast/src/token.rs +++ b/kclvm/ast/src/token.rs @@ -340,6 +340,12 @@ impl Token { _ => false, } } + + /// Whether the token kind is in the recovery token set, when meets errors, drop it. + #[inline] + pub fn is_in_recovery_set(&self) -> bool { + matches!(self.kind, TokenKind::Indent | TokenKind::Dummy) + } } impl PartialEq for Token { diff --git a/kclvm/cmd/src/fmt.rs b/kclvm/cmd/src/fmt.rs index 78f2d3a83..aa8c4fadb 100644 --- a/kclvm/cmd/src/fmt.rs +++ b/kclvm/cmd/src/fmt.rs @@ -13,6 +13,7 @@ pub fn fmt_command(matches: &ArgMatches) -> Result<()> { &FormatOptions { is_stdout: bool_from_matches(matches, "std_output").unwrap_or_default(), recursively: bool_from_matches(matches, "recursive").unwrap_or_default(), + omit_errors: true, }, )?; Ok(()) diff --git a/kclvm/parser/src/lexer/indent.rs b/kclvm/parser/src/lexer/indent.rs index efff2ed28..a8f472f01 100644 --- a/kclvm/parser/src/lexer/indent.rs +++ b/kclvm/parser/src/lexer/indent.rs @@ -92,6 +92,7 @@ impl<'a> Lexer<'a> { } Ordering::Less => { let mut dedents = Vec::new(); + let mut indents = Vec::new(); loop { match ordering { @@ -99,7 +100,9 @@ impl<'a> Lexer<'a> { match order { Ordering::Less => { // Pop indents util we find an equal ident level - self.indent_cxt.indents.pop(); + if let Some(indent) = self.indent_cxt.indents.pop() { + indents.push(indent); + } // update pos & collect dedent // For dedent token, we ignore the length let dedent = Token::new( @@ -113,6 +116,10 @@ impl<'a> Lexer<'a> { break; } Ordering::Greater => { + if let Some(indent) = indents.pop() { + self.indent_cxt.indents.push(indent); + } + dedents.pop(); self.sess.struct_span_error( &format!("unindent {} does not match any outer indentation level", indent.spaces), self.span(self.pos, self.pos), diff --git a/kclvm/parser/src/lib.rs b/kclvm/parser/src/lib.rs index ea841c936..70c7d1d50 100644 --- a/kclvm/parser/src/lib.rs +++ b/kclvm/parser/src/lib.rs @@ -113,6 +113,40 @@ pub fn parse_file(filename: &str, code: Option) -> Result, +) -> (Option, String) { + let sess = Arc::new(ParseSession::default()); + match parse_file_with_global_session(sess.clone(), filename, code) { + Ok(module) => match sess.0.diag_handler.has_errors() { + Ok(has_error) => { + let get_err = || -> anyhow::Result { + let err = sess + .0 + .emit_nth_diag_into_string(0)? + .unwrap_or(Ok(ErrorKind::InvalidSyntax.name()))?; + Ok(err) + }; + if has_error { + match get_err() { + Ok(err) => (Some(module), err), + Err(err) => (Some(module), err.to_string()), + } + } else { + (Some(module), "".to_string()) + } + } + Err(err) => (Some(module), err.to_string()), + }, + Err(err) => (None, err), + } +} + /// Parse a KCL file to the AST module with the parse session . pub fn parse_file_with_session( sess: Arc, diff --git a/kclvm/parser/src/parser/expr.rs b/kclvm/parser/src/parser/expr.rs index c94a920c2..4b3c99266 100644 --- a/kclvm/parser/src/parser/expr.rs +++ b/kclvm/parser/src/parser/expr.rs @@ -49,6 +49,12 @@ impl<'a> Parser<'a> { /// Syntax: /// test: if_expr | simple_expr pub(crate) fn parse_expr(&mut self) -> NodeRef { + if self.token.is_in_recovery_set() { + self.sess + .struct_span_error("expected expression", self.token.span); + self.bump(); + } + let token = self.token; let operand = self.parse_simple_expr(); @@ -912,13 +918,6 @@ impl<'a> Parser<'a> { // If we don't find the indentation, skip and parse the next statement. self.sess .struct_token_error(&[TokenKind::Indent.into()], self.token); - return Box::new(Node::node( - Expr::List(ListExpr { - elts: vec![], - ctx: ExprContext::Load, - }), - self.sess.struct_token_loc(token, self.token), - )); } true } else { @@ -1256,10 +1255,6 @@ impl<'a> Parser<'a> { // If we don't find the indentation, skip and parse the next statement. self.sess .struct_token_error(&[TokenKind::Indent.into()], self.token); - return Box::new(Node::node( - Expr::Config(ConfigExpr { items: vec![] }), - self.sess.struct_token_loc(token, self.token), - )); } true } else { diff --git a/kclvm/tools/src/format/mod.rs b/kclvm/tools/src/format/mod.rs index 3467aec8d..a566f63f8 100644 --- a/kclvm/tools/src/format/mod.rs +++ b/kclvm/tools/src/format/mod.rs @@ -10,7 +10,7 @@ use kclvm_ast_pretty::print_ast_module; use kclvm_driver::get_kcl_files; use std::path::Path; -use kclvm_parser::parse_file; +use kclvm_parser::{parse_file, parse_file_with_errors}; #[cfg(test)] mod tests; @@ -18,10 +18,12 @@ mod tests; /// FormatOptions contains two options: /// - is_stdout: whether to output the formatted result to stdout. /// - recursively: whether to recursively traverse a folder and format all KCL files in it. +/// - omit_errors: whether to omit the parse errors when format the KCL code. #[derive(Debug, Default)] pub struct FormatOptions { pub is_stdout: bool, pub recursively: bool, + pub omit_errors: bool, } /// Formats kcl file or directory path contains kcl files and @@ -69,7 +71,7 @@ pub fn format>(path: P, opts: &FormatOptions) -> Result Result { let src = std::fs::read_to_string(file)?; - let (source, is_formatted) = format_source(file, &src)?; + let (source, is_formatted) = format_source(file, &src, opts)?; if opts.is_stdout { println!("{}", source); } else { @@ -80,8 +82,16 @@ pub fn format_file(file: &str, opts: &FormatOptions) -> Result { /// Formats a code source and returns the formatted source and /// whether the source is changed. -pub fn format_source(file: &str, src: &str) -> Result<(String, bool)> { - let module = parse_file(file, Some(src.to_string())).map_err(|err| anyhow::anyhow!(err))?; +pub fn format_source(file: &str, src: &str, opts: &FormatOptions) -> Result<(String, bool)> { + let module = if opts.omit_errors { + let (module, err) = parse_file_with_errors(file, Some(src.to_string())); + match module { + Some(module) => module, + None => return Err(anyhow::anyhow!(err)), + } + } else { + parse_file(file, Some(src.to_string())).map_err(|err| anyhow::anyhow!(err))? + }; let formatted_src = print_ast_module(&module); let is_formatted = src != formatted_src; Ok((formatted_src, is_formatted)) diff --git a/kclvm/tools/src/format/tests.rs b/kclvm/tools/src/format/tests.rs index 8c5728fbd..19cbb9744 100644 --- a/kclvm/tools/src/format/tests.rs +++ b/kclvm/tools/src/format/tests.rs @@ -34,7 +34,7 @@ fn read_data(data_name: &str) -> (String, String) { .unwrap(); ( - format_source("", &src).unwrap().0, + format_source("", &src, &Default::default()).unwrap().0, std::fs::read_to_string(&format!( "./src/format/test_data/format_data/{}{}", data_name, FILE_OUTPUT_SUFFIX @@ -76,6 +76,7 @@ fn test_format_with_stdout_option() { let opts = FormatOptions { is_stdout: true, recursively: false, + omit_errors: false, }; let changed_files = format("./src/format/test_data/format_path_data/if.k", &opts).unwrap(); assert_eq!(changed_files.len(), 1); @@ -84,11 +85,89 @@ fn test_format_with_stdout_option() { let opts = FormatOptions { is_stdout: true, recursively: true, + omit_errors: false, }; let changed_files = format("./src/format/test_data/format_path_data/", &opts).unwrap(); assert_eq!(changed_files.len(), 2); } +#[test] +fn test_format_with_omit_error_option() { + let opts = FormatOptions { + is_stdout: false, + recursively: false, + omit_errors: true, + }; + let cases = [ + ( + r#"x = { +a: { +b: 1 +c: 2 +} +d: 3 +} +"#, + r#"x = { + a: { + b: 1 + c: 2 + } + d: 3 +} +"#, + ), + ( + r#"x = { +a: { + b: 1 + c: 2 +} +} +"#, + r#"x = { + a: { + b: 1 + c: 2 + } +} +"#, + ), + ( + r#"x = { + a: 1 + b: 2 + c: 3 +} +"#, + r#"x = { + a: 1 + b: 2 + c: 3 +} +"#, + ), + ( + r#"x = { + a: 1 + b: 2 + c: 3 +} +"#, + r#"x = { + a: 1 + b: 2 + c: 3 +} +"#, + ), + ]; + for (code, expected_code) in cases { + let (actual_code, _) = format_source("error_indent.k", code, &opts).unwrap(); + assert_eq!(actual_code, expected_code); + } +} + #[test] fn test_format_integration_konfig() -> Result<()> { let konfig_path = Path::new(".") @@ -111,7 +190,7 @@ fn test_format_integration_konfig() -> Result<()> { file ); let src = std::fs::read_to_string(file)?; - let (formatted_src, _) = format_source("", &src)?; + let (formatted_src, _) = format_source("", &src, &Default::default())?; let parse_result = parse_file("test.k", Some(formatted_src.clone() + "\n")); assert!( parse_result.is_ok(),