Errors once (#152)

* [Perl] Change the token matcher to accept tokens and return errors at the same time The current implemnetation of the parser calls the token matcher multiple times to identify the same input line (e.g. in a 'lookahead' or as part of the state machine ('match_token_at_*'). When an invocation causes an error (e.g. because there are tags with embedded spaces, or the selected language isn't available), the parser always stashes the error in the list of errors. Since the matcher may be invoked multiple times, the error may get stashed multiple times. To fix the user experience, Ruby, Perl, Python and others de-duplicate the reported errors by error message. This change moves stashing of errors to the point where the token is accepted into the AST, which means that multiples of the same error are discarded when that makes sense. E.g. the lookahead discards the errors and *only* looks at whether there is a match with the desired token type. Note: The structure of returning a boolean accepting the line and a second element indicating an error, adds the currently-missing capability of saying 'yes, this is what you're looking for, but there are problems with it'. * Further simplify the parser Move the responsibility to check that there even *is* an input line to match, to the token matcher. Now that the parser largely isn't responsible for it anymore, we can do away with a whole slew of equally-named wrapper functions in the parser that already exist in the token matcher. Not having two sets of functions by the same name helps to navigate the code.
cucumber · Aug 17, 2023 · 59bc8e8 · 59bc8e8
1 parent a1e381a
commit 59bc8e8
Show file tree

Hide file tree

Showing 6 changed files with 1,379 additions and 655 deletions.
diff --git a/perl/gherkin-perl.razor b/perl/gherkin-perl.razor
@@ -16,7 +16,7 @@
 }
 @helper HandleParserError(IEnumerable<string> expectedTokens, State state)
 {<text>    $token->detach;
-    my $err = $self->_construct_parser_error(
+    $err = $self->_construct_parser_error(
         $token,
         ["@Raw(string.Join("\", \"", expectedTokens))"],
         "State: @state.Id - @Raw(state.Comment)",
@@ -26,7 +26,7 @@
     return @state.Id;
 </text>}
 @helper MatchToken(TokenType tokenType)
-{<text>match_@(tokenType)($context, $token)</text>}
+{<text>$context->token_matcher->match_@(tokenType)($token)</text>}
 package Gherkin::Generated::@(Model.ParserClassName);
 
 # This file is generated. Do not edit! Edit gherkin-perl.razor instead.
@@ -94,29 +94,17 @@ sub _construct_parser_error {
     return $error_class->new( $token, @@args );
 }
 
-@foreach(var rule in Model.RuleSet.TokenRules)
-{<text>
-sub match_@(rule.Name.Replace("#", "")) {
-    my ($self, $context, $token) = @@_;
-    @if (rule.Name != "#EOF")
-    {
-    @:return if $token->is_eof;
-    }
-    return $self->handle_external_error(
-        $context,
-        sub { $context->token_matcher->match_@(rule.Name.Replace("#", ""))( $token ) }
-    );
-}
-</text>}
-
 @foreach(var state in Model.States.Values.Where(s => !s.IsEndState)) //..
 {<text>
 # @Raw(state.Comment)
 sub match_token_at_@(state.Id) {
     my ( $self, $token, $context ) = @@_;
+    my ( $ok, $err );
     @foreach(var transition in state.Transitions)
     {
-    @:if ($self->@MatchToken(transition.TokenType)) {
+    @:($ok, $err) = @MatchToken(transition.TokenType);
+    @:if ($ok) {
+        @:$self->add_error( $context, $err ) if $err;
         if (transition.LookAheadHint != null)
         {
         @:if ($self->lookahead_@(transition.LookAheadHint.Id)($context, $token)) {
@@ -150,18 +138,19 @@ sub lookahead_@(lookAheadHint.Id) {
     my @@queue;
     my $match = 0;
 
+    my $ok;
     while (1) {
         $token = $context->read_token();
         $token->detach;
         push( @@queue, $token );
 
-        @foreach(var tokenType in lookAheadHint.ExpectedTokens) {
-        @:$match = 1 if $self->@MatchToken(tokenType);
-        }
-        last if $match;
+        @foreach(var tokenType in lookAheadHint.ExpectedTokens) {<text>
+        ($match) = @MatchToken(tokenType);
+        last if $match;</text>}
 
-        @foreach(var tokenType in lookAheadHint.Skip) {
-        @:next if $self->@MatchToken(tokenType);
+        @foreach(var tokenType in lookAheadHint.Skip) {<text>
+        ($ok) = @MatchToken(tokenType);
+        next if $ok;</text>
         }
 
         last;