A number of bug fixes.

- Some member variable init was missing. - Fixed duplicated serialized ATN getters (one with a real getter, one with the getSerializedATN method). - Fixed token index access for non-writable tokens in BufferedTokenStream. - Fixed some toString() methods that produce incompatible output. - ParseCancellationException now keeps the original error for later examination. - Fixed target state output of the profiling ATN simulator. Signed-off-by: Mike Lischke <[email protected]>
mike-lischke · Dec 19, 2024 · 4f5bf49 · 4f5bf49
1 parent 28ccec5
commit 4f5bf49
Show file tree

Hide file tree

Showing 19 changed files with 88 additions and 122 deletions.
diff --git a/src/BufferedTokenStream.ts b/src/BufferedTokenStream.ts
@@ -13,6 +13,7 @@ import { Interval } from "./misc/Interval.js";
 import { TokenStream } from "./TokenStream.js";
 import { TokenSource } from "./TokenSource.js";
 import { ParserRuleContext } from "./ParserRuleContext.js";
+import { isWritableToken } from "./WritableToken.js";
 
 /**
  * This implementation of {@link TokenStream} loads tokens from a
@@ -151,7 +152,10 @@ export class BufferedTokenStream implements TokenStream {
 
         for (let i = 0; i < n; i++) {
             const t = this.tokenSource.nextToken();
-            t.tokenIndex = this.tokens.length;
+            if (isWritableToken(t)) {
+                t.tokenIndex = this.tokens.length;
+            }
+
             this.tokens.push(t);
             if (t.type === Token.EOF) {
                 this.fetchedEOF = true;

diff --git a/src/LexerInterpreter.ts b/src/LexerInterpreter.ts
@@ -71,4 +71,8 @@ export class LexerInterpreter extends Lexer {
     public get vocabulary(): Vocabulary {
         return this.#vocabulary;
     }
+
+    public get serializedATN(): number[] {
+        throw new Error("The LexerInterpreter does not support the serializedATN property.");
+    }
 }
diff --git a/src/ListTokenSource.ts b/src/ListTokenSource.ts
@@ -32,19 +32,19 @@ export class ListTokenSource implements TokenSource {
     /**
      * The wrapped collection of {@link Token} objects to return.
      */
-    protected readonly tokens: Token[];
+    protected readonly tokens: Token[] = [];
 
     /**
      * The index into {@link tokens} of token to return by the next call to
      * {@link #nextToken}. The end of the input is indicated by this value
      * being greater than or equal to the number of items in {@link #tokens}.
      */
-    protected i: number;
+    protected i = 0;
 
     /**
      * This field caches the EOF token for the token source.
      */
-    protected eofToken: Token | null;
+    protected eofToken: Token | null = null;
 
     /**
      * Constructs a new {@link ListTokenSource} instance from the specified

diff --git a/src/Parser.ts b/src/Parser.ts
@@ -93,7 +93,7 @@ export abstract class Parser extends Recognizer<ParserATNSimulator> {
 
     /**
      * This field holds the deserialized {@link ATN} with bypass alternatives, created
-     * lazily upon first demand. In 4.10 I changed from map<serializedATNstring, ATN>
+     * lazily upon first demand. In 4.10 I changed from map<serializedATNString, ATN>
      * since we only need one per parser object and also it complicates other targets
      * that don't use ATN strings.
      *
@@ -346,7 +346,7 @@ export abstract class Parser extends Recognizer<ParserATNSimulator> {
      * implement the {@link getSerializedATN()} method.
      */
     public getATNWithBypassAlts(): ATN {
-        const serializedAtn = this.getSerializedATN();
+        const serializedAtn = this.serializedATN;
         if (serializedAtn === null) {
             throw new Error("The current parser does not support an ATN with bypass alternatives.");
         }

diff --git a/src/ParserInterpreter.ts b/src/ParserInterpreter.ts
@@ -111,15 +111,14 @@ export class ParserInterpreter extends Parser {
         this.rootContext = this.createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
         if (startRuleStartState.isLeftRecursiveRule) {
             this.enterRecursionRule(this.rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
-        }
-        else {
+        } else {
             this.enterRule(this.rootContext, startRuleStartState.stateNumber, startRuleIndex);
         }
 
         while (true) {
             const p = this.atnState;
             switch ((p.constructor as typeof ATNState).stateType) {
-                case ATNState.RULE_STOP:
+                case ATNState.RULE_STOP: {
                     // pop; return from rule
                     if (this.context?.isEmpty()) {
                         if (startRuleStartState.isLeftRecursiveRule) {
@@ -138,8 +137,9 @@ export class ParserInterpreter extends Parser {
 
                     this.visitRuleStopState(p);
                     break;
+                }
 
-                default:
+                default: {
                     try {
                         this.visitState(p);
                     } catch (e) {
@@ -153,6 +153,7 @@ export class ParserInterpreter extends Parser {
                     }
 
                     break;
+                }
             }
         }
     }
@@ -177,6 +178,10 @@ export class ParserInterpreter extends Parser {
         super.enterRecursionRule(localctx, state, ruleIndex, precedence);
     }
 
+    public get serializedATN(): number[] {
+        throw new Error("The ParserInterpreter does not support the serializedATN property.");
+    }
+
     protected visitState(p: ATNState): void {
         let predictedAlt = 1;
         if (p instanceof DecisionState) {

diff --git a/src/Recognizer.ts b/src/Recognizer.ts
@@ -154,14 +154,12 @@ export abstract class Recognizer<ATNInterpreter extends ATNSimulator> {
         this.stateNumber = state;
     }
 
-    public getSerializedATN(): number[] {
-        throw new Error("there is no serialized ATN");
-    }
-
     public getParseInfo(): ParseInfo | undefined {
         return undefined;
     }
 
+    public abstract get serializedATN(): number[];
+
     // TODO: remove need for this: public abstract get literalNames(): Array<string | null>;
     // TODO: remove need for this: public abstract get symbolicNames(): Array<string | null>;
     public abstract get grammarFileName(): string;

diff --git a/src/atn/ATNDeserializer.ts b/src/atn/ATNDeserializer.ts
@@ -398,11 +398,11 @@ export class ATNDeserializer {
         // all transitions leaving the rule start state need to leave blockStart
         // instead
         const ruleToStartState = atn.ruleToStartState[idx]!;
-        const count = ruleToStartState.transitions.length;
-        while (count > 0) {
-            bypassStart.addTransition(ruleToStartState.transitions[count - 1]);
-            ruleToStartState.transitions = ruleToStartState.transitions.slice(-1);
+        while (ruleToStartState.transitions.length > 0) {
+            const transition = ruleToStartState.removeTransition(ruleToStartState.transitions.length - 1);
+            bypassStart.addTransition(transition);
         }
+
         // link the new states
         atn.ruleToStartState[idx]!.addTransition(new EpsilonTransition(bypassStart));
         if (endState) {

diff --git a/src/atn/ProfilingATNSimulator.ts b/src/atn/ProfilingATNSimulator.ts
@@ -114,31 +114,28 @@ export class ProfilingATNSimulator extends ParserATNSimulator {
     }
 
     public override getExistingTargetState(previousD: DFAState, t: number): DFAState | undefined {
-        if (this.predictionState?.input) {
-            this.sllStopIndex = this.predictionState.input.index;
+        // This method is called after each time the input position advances during SLL prediction.
+        this.sllStopIndex = this.predictionState!.input!.index;
 
-            const existingTargetState = super.getExistingTargetState(previousD, t);
+        const existingTargetState = super.getExistingTargetState(previousD, t);
 
-            if (existingTargetState !== null) {
-                this.decisions[this.currentDecision].sllDFATransitions++;
-                if (existingTargetState === ATNSimulator.ERROR) {
-                    this.decisions[this.currentDecision].errors.push({
-                        decision: this.currentDecision,
-                        configs: previousD.configs,
-                        input: this.predictionState.input,
-                        startIndex: this.predictionState.startIndex,
-                        stopIndex: this.sllStopIndex,
-                        fullCtx: false,
-                    });
-                }
+        if (existingTargetState !== undefined) {
+            this.decisions[this.currentDecision].sllDFATransitions++; // Count only if we transition over a DFA state.
+            if (existingTargetState === ATNSimulator.ERROR) {
+                this.decisions[this.currentDecision].errors.push({
+                    decision: this.currentDecision,
+                    configs: previousD.configs,
+                    input: this.predictionState!.input!,
+                    startIndex: this.predictionState!.startIndex,
+                    stopIndex: this.sllStopIndex,
+                    fullCtx: false,
+                });
             }
-
-            this.currentState = existingTargetState;
-
-            return existingTargetState;
         }
 
-        return undefined;
+        this.currentState = existingTargetState;
+
+        return existingTargetState;
     }
 
     public override computeTargetState(dfa: DFA, previousD: DFAState, t: number): DFAState {

diff --git a/src/misc/MultiMap.ts b/src/misc/MultiMap.ts
@@ -26,4 +26,13 @@ export class MultiMap<K extends string, V> extends Map<K, V[]> {
 
         return pairs;
     }
+
+    public override toString(): string {
+        const entries: string[] = [];
+        this.forEach((value, key) => {
+            entries.push(`${key}=[${value.join(", ")}]`);
+        });
+
+        return `{${entries.join(", ")}}`;
+    }
 }
diff --git a/src/misc/ParseCancellationException.ts b/src/misc/ParseCancellationException.ts
@@ -12,8 +12,9 @@
  */
 export class ParseCancellationException extends Error {
 
-    public constructor(_e: Error) {
+    public constructor(e: Error) {
         super();
-        Error.captureStackTrace(this, ParseCancellationException);
+        this.cause = e;
+        //Error.captureStackTrace(this, ParseCancellationException);
     }
 }
diff --git a/src/tree/ParseTree.ts b/src/tree/ParseTree.ts
@@ -53,7 +53,7 @@ export interface ParseTree {
      * Print out a whole tree, not just a node, in LISP format
      *  `(root child1 .. childN)`. Print just a node if this is a leaf.
      */
-    toStringTree(ruleNames: string[], recog: Parser): string;
+    toStringTree(recog?: Parser): string;
 
     /**
      * Return an {@link Interval} indicating the index in the

diff --git a/src/tree/pattern/ParseTreePattern.ts b/src/tree/pattern/ParseTreePattern.ts
@@ -89,7 +89,7 @@ export class ParseTreePattern {
      */
 
     public findAll(tree: ParseTree, xpath: string): ParseTreeMatch[] {
-        const subtrees = XPath.findAll(tree, xpath, this.matcher.getParser());
+        const subtrees = XPath.findAll(tree, xpath, this.matcher.getParser()!);
         const matches = new Array<ParseTreeMatch>();
         for (const t of subtrees) {
             const match = this.match(t);

diff --git a/src/tree/pattern/ParseTreePatternMatcher.ts b/src/tree/pattern/ParseTreePatternMatcher.ts
@@ -90,25 +90,25 @@ import { TokenTagToken } from "./TokenTagToken.js";
 export class ParseTreePatternMatcher {
     protected start = "<";
     protected stop = ">";
-    protected escape = "\\";
+    protected escape = "\\"; // e.g., \< and \> must escape BOTH!
 
     /**
      * This is the backing field for {@link #getLexer()}.
      */
-    private readonly lexer: Lexer;
+    private readonly lexer: Lexer | null;
 
     /**
      * This is the backing field for {@link #getParser()}.
      */
-    private readonly parser: Parser; // e.g., \< and \> must escape BOTH!
+    private readonly parser: Parser | null;
 
     /**
      * Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
      * {@link Parser} object. The lexer input stream is altered for tokenizing
      * the tree patterns. The parser is used as a convenient mechanism to get
      * the grammar name, plus token, rule names.
      */
-    public constructor(lexer: Lexer, parser: Parser) {
+    public constructor(lexer: Lexer | null, parser: Parser | null) {
         this.lexer = lexer;
         this.parser = parser;
     }
@@ -217,23 +217,23 @@ export class ParseTreePatternMatcher {
         const tokenSrc = new ListTokenSource(tokenList);
         const tokens = new CommonTokenStream(tokenSrc);
 
-        const parserInterp = new ParserInterpreter(this.parser.grammarFileName, this.parser.vocabulary,
-            this.parser.ruleNames, this.parser.getATNWithBypassAlts(), tokens);
+        const parserInterp = new ParserInterpreter(this.parser!.grammarFileName, this.parser!.vocabulary,
+            this.parser!.ruleNames, this.parser!.getATNWithBypassAlts(), tokens);
+        parserInterp.removeErrorListeners();
 
         let tree = null;
         try {
             parserInterp.errorHandler = new BailErrorStrategy();
             tree = parserInterp.parse(patternRuleIndex);
-        } catch (eOrRe) {
-            if (eOrRe instanceof ParseCancellationException) {
-                const e = eOrRe;
-                throw e.cause;
-            } else if (eOrRe instanceof RecognitionException) {
-                throw eOrRe;
-            } else if (eOrRe instanceof Error) {
-                throw new CannotInvokeStartRuleError(eOrRe);
+        } catch (error) {
+            if (error instanceof ParseCancellationException) {
+                throw error.cause;
+            } else if (error instanceof RecognitionException) {
+                throw error;
+            } else if (error instanceof Error) {
+                throw new CannotInvokeStartRuleError(error);
             } else {
-                throw eOrRe;
+                throw error;
             }
         }
 
@@ -250,7 +250,7 @@ export class ParseTreePatternMatcher {
      * input stream is reset.
      */
 
-    public getLexer(): Lexer {
+    public getLexer(): Lexer | null {
         return this.lexer;
     }
 
@@ -259,7 +259,7 @@ export class ParseTreePatternMatcher {
      * used to parse the pattern into a parse tree.
      */
 
-    public getParser(): Parser {
+    public getParser(): Parser | null {
         return this.parser;
     }
 
@@ -277,19 +277,19 @@ export class ParseTreePatternMatcher {
                 // add special rule token or conjure up new token from name
                 const char = tagChunk.tag[0];
                 if (char === char.toUpperCase()) {
-                    const ttype = this.parser.getTokenType(tagChunk.tag);
+                    const ttype = this.parser!.getTokenType(tagChunk.tag);
                     if (ttype === Token.INVALID_TYPE) {
                         throw new Error("Unknown token " + tagChunk.tag + " in pattern: " + pattern);
                     }
                     const t = new TokenTagToken(tagChunk.tag, ttype, tagChunk.label);
                     tokens.push(t);
                 } else {
                     if (char === char.toLowerCase()) {
-                        const ruleIndex = this.parser.getRuleIndex(tagChunk.tag);
+                        const ruleIndex = this.parser!.getRuleIndex(tagChunk.tag);
                         if (ruleIndex === -1) {
                             throw new Error("Unknown rule " + tagChunk.tag + " in pattern: " + pattern);
                         }
-                        const ruleImaginaryTokenType = this.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
+                        const ruleImaginaryTokenType = this.parser!.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
                         tokens.push(new RuleTagToken(tagChunk.tag, ruleImaginaryTokenType, tagChunk.label));
                     } else {
                         throw new Error("invalid tag: " + tagChunk.tag + " in pattern: " + pattern);
@@ -298,11 +298,11 @@ export class ParseTreePatternMatcher {
             } else {
                 const textChunk = chunk as TextChunk;
                 const input = CharStream.fromString(textChunk.text);
-                this.lexer.inputStream = input;
-                let t = this.lexer.nextToken();
+                this.lexer!.inputStream = input;
+                let t = this.lexer!.nextToken();
                 while (t.type !== Token.EOF) {
                     tokens.push(t);
-                    t = this.lexer.nextToken();
+                    t = this.lexer!.nextToken();
                 }
             }
         }
@@ -400,7 +400,7 @@ export class ParseTreePatternMatcher {
             const c = chunks[i];
             if (c instanceof TextChunk) {
                 const tc = c;
-                const unescaped = tc.text.replace(this.escape, ""); // TODO: do we need a copy of tc.text here?
+                const unescaped = tc.text.replaceAll(this.escape, ""); // TODO: do we need a copy of tc.text here?
                 if (unescaped.length < tc.text.length) {
                     chunks[i] = new TextChunk(unescaped);
                 }

diff --git a/src/tree/pattern/RuleTagToken.ts b/src/tree/pattern/RuleTagToken.ts
@@ -69,7 +69,7 @@ export class RuleTagToken implements Token {
      * delimiters.
      */
     public get text(): string {
-        if (this.label !== null) {
+        if (this.label !== undefined) {
             return "<" + this.label + ":" + this.ruleName + ">";
         }
 

diff --git a/src/tree/pattern/TagChunk.ts b/src/tree/pattern/TagChunk.ts
@@ -73,7 +73,7 @@ export class TagChunk extends Chunk {
      * returned as just the tag name.
      */
     public override toString(): string {
-        if (this.label !== null) {
+        if (this.label !== undefined) {
             return this.label + ":" + this.tag;
         }