Skip to content

Commit

Permalink
A number of bug fixes.
Browse files Browse the repository at this point in the history
- Some member variable init was missing.
- Fixed duplicated serialized ATN getters (one with a real getter, one with the getSerializedATN method).
- Fixed token index access for non-writable tokens in BufferedTokenStream.
- Fixed some toString() methods that produce incompatible output.
- ParseCancellationException now keeps the original error for later examination.
- Fixed target state output of the profiling ATN simulator.

Signed-off-by: Mike Lischke <[email protected]>
  • Loading branch information
mike-lischke committed Dec 19, 2024
1 parent 28ccec5 commit 4f5bf49
Show file tree
Hide file tree
Showing 19 changed files with 88 additions and 122 deletions.
6 changes: 5 additions & 1 deletion src/BufferedTokenStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { Interval } from "./misc/Interval.js";
import { TokenStream } from "./TokenStream.js";
import { TokenSource } from "./TokenSource.js";
import { ParserRuleContext } from "./ParserRuleContext.js";
import { isWritableToken } from "./WritableToken.js";

/**
* This implementation of {@link TokenStream} loads tokens from a
Expand Down Expand Up @@ -151,7 +152,10 @@ export class BufferedTokenStream implements TokenStream {

for (let i = 0; i < n; i++) {
const t = this.tokenSource.nextToken();
t.tokenIndex = this.tokens.length;
if (isWritableToken(t)) {
t.tokenIndex = this.tokens.length;
}

this.tokens.push(t);
if (t.type === Token.EOF) {
this.fetchedEOF = true;
Expand Down
4 changes: 4 additions & 0 deletions src/LexerInterpreter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,8 @@ export class LexerInterpreter extends Lexer {
public get vocabulary(): Vocabulary {
return this.#vocabulary;
}

public get serializedATN(): number[] {
throw new Error("The LexerInterpreter does not support the serializedATN property.");
}
}
6 changes: 3 additions & 3 deletions src/ListTokenSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,19 @@ export class ListTokenSource implements TokenSource {
/**
* The wrapped collection of {@link Token} objects to return.
*/
protected readonly tokens: Token[];
protected readonly tokens: Token[] = [];

/**
* The index into {@link tokens} of token to return by the next call to
* {@link #nextToken}. The end of the input is indicated by this value
* being greater than or equal to the number of items in {@link #tokens}.
*/
protected i: number;
protected i = 0;

/**
* This field caches the EOF token for the token source.
*/
protected eofToken: Token | null;
protected eofToken: Token | null = null;

/**
* Constructs a new {@link ListTokenSource} instance from the specified
Expand Down
4 changes: 2 additions & 2 deletions src/Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ export abstract class Parser extends Recognizer<ParserATNSimulator> {

/**
* This field holds the deserialized {@link ATN} with bypass alternatives, created
* lazily upon first demand. In 4.10 I changed from map<serializedATNstring, ATN>
* lazily upon first demand. In 4.10 I changed from map<serializedATNString, ATN>
* since we only need one per parser object and also it complicates other targets
* that don't use ATN strings.
*
Expand Down Expand Up @@ -346,7 +346,7 @@ export abstract class Parser extends Recognizer<ParserATNSimulator> {
* implement the {@link getSerializedATN()} method.
*/
public getATNWithBypassAlts(): ATN {
const serializedAtn = this.getSerializedATN();
const serializedAtn = this.serializedATN;
if (serializedAtn === null) {
throw new Error("The current parser does not support an ATN with bypass alternatives.");
}
Expand Down
13 changes: 9 additions & 4 deletions src/ParserInterpreter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,14 @@ export class ParserInterpreter extends Parser {
this.rootContext = this.createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
if (startRuleStartState.isLeftRecursiveRule) {
this.enterRecursionRule(this.rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
}
else {
} else {
this.enterRule(this.rootContext, startRuleStartState.stateNumber, startRuleIndex);
}

while (true) {
const p = this.atnState;
switch ((p.constructor as typeof ATNState).stateType) {
case ATNState.RULE_STOP:
case ATNState.RULE_STOP: {
// pop; return from rule
if (this.context?.isEmpty()) {
if (startRuleStartState.isLeftRecursiveRule) {
Expand All @@ -138,8 +137,9 @@ export class ParserInterpreter extends Parser {

this.visitRuleStopState(p);
break;
}

default:
default: {
try {
this.visitState(p);
} catch (e) {
Expand All @@ -153,6 +153,7 @@ export class ParserInterpreter extends Parser {
}

break;
}
}
}
}
Expand All @@ -177,6 +178,10 @@ export class ParserInterpreter extends Parser {
super.enterRecursionRule(localctx, state, ruleIndex, precedence);
}

public get serializedATN(): number[] {
throw new Error("The ParserInterpreter does not support the serializedATN property.");
}

protected visitState(p: ATNState): void {
let predictedAlt = 1;
if (p instanceof DecisionState) {
Expand Down
6 changes: 2 additions & 4 deletions src/Recognizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,12 @@ export abstract class Recognizer<ATNInterpreter extends ATNSimulator> {
this.stateNumber = state;
}

public getSerializedATN(): number[] {
throw new Error("there is no serialized ATN");
}

public getParseInfo(): ParseInfo | undefined {
return undefined;
}

public abstract get serializedATN(): number[];

// TODO: remove need for this: public abstract get literalNames(): Array<string | null>;
// TODO: remove need for this: public abstract get symbolicNames(): Array<string | null>;
public abstract get grammarFileName(): string;
Expand Down
8 changes: 4 additions & 4 deletions src/atn/ATNDeserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,11 @@ export class ATNDeserializer {
// all transitions leaving the rule start state need to leave blockStart
// instead
const ruleToStartState = atn.ruleToStartState[idx]!;
const count = ruleToStartState.transitions.length;
while (count > 0) {
bypassStart.addTransition(ruleToStartState.transitions[count - 1]);
ruleToStartState.transitions = ruleToStartState.transitions.slice(-1);
while (ruleToStartState.transitions.length > 0) {
const transition = ruleToStartState.removeTransition(ruleToStartState.transitions.length - 1);
bypassStart.addTransition(transition);
}

// link the new states
atn.ruleToStartState[idx]!.addTransition(new EpsilonTransition(bypassStart));
if (endState) {
Expand Down
37 changes: 17 additions & 20 deletions src/atn/ProfilingATNSimulator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,31 +114,28 @@ export class ProfilingATNSimulator extends ParserATNSimulator {
}

public override getExistingTargetState(previousD: DFAState, t: number): DFAState | undefined {
if (this.predictionState?.input) {
this.sllStopIndex = this.predictionState.input.index;
// This method is called after each time the input position advances during SLL prediction.
this.sllStopIndex = this.predictionState!.input!.index;

const existingTargetState = super.getExistingTargetState(previousD, t);
const existingTargetState = super.getExistingTargetState(previousD, t);

if (existingTargetState !== null) {
this.decisions[this.currentDecision].sllDFATransitions++;
if (existingTargetState === ATNSimulator.ERROR) {
this.decisions[this.currentDecision].errors.push({
decision: this.currentDecision,
configs: previousD.configs,
input: this.predictionState.input,
startIndex: this.predictionState.startIndex,
stopIndex: this.sllStopIndex,
fullCtx: false,
});
}
if (existingTargetState !== undefined) {
this.decisions[this.currentDecision].sllDFATransitions++; // Count only if we transition over a DFA state.
if (existingTargetState === ATNSimulator.ERROR) {
this.decisions[this.currentDecision].errors.push({
decision: this.currentDecision,
configs: previousD.configs,
input: this.predictionState!.input!,
startIndex: this.predictionState!.startIndex,
stopIndex: this.sllStopIndex,
fullCtx: false,
});
}

this.currentState = existingTargetState;

return existingTargetState;
}

return undefined;
this.currentState = existingTargetState;

return existingTargetState;
}

public override computeTargetState(dfa: DFA, previousD: DFAState, t: number): DFAState {
Expand Down
9 changes: 9 additions & 0 deletions src/misc/MultiMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,13 @@ export class MultiMap<K extends string, V> extends Map<K, V[]> {

return pairs;
}

public override toString(): string {
const entries: string[] = [];
this.forEach((value, key) => {
entries.push(`${key}=[${value.join(", ")}]`);
});

return `{${entries.join(", ")}}`;
}
}
5 changes: 3 additions & 2 deletions src/misc/ParseCancellationException.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
*/
export class ParseCancellationException extends Error {

public constructor(_e: Error) {
public constructor(e: Error) {
super();
Error.captureStackTrace(this, ParseCancellationException);
this.cause = e;
//Error.captureStackTrace(this, ParseCancellationException);
}
}
2 changes: 1 addition & 1 deletion src/tree/ParseTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export interface ParseTree {
* Print out a whole tree, not just a node, in LISP format
* `(root child1 .. childN)`. Print just a node if this is a leaf.
*/
toStringTree(ruleNames: string[], recog: Parser): string;
toStringTree(recog?: Parser): string;

/**
* Return an {@link Interval} indicating the index in the
Expand Down
2 changes: 1 addition & 1 deletion src/tree/pattern/ParseTreePattern.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ export class ParseTreePattern {
*/

public findAll(tree: ParseTree, xpath: string): ParseTreeMatch[] {
const subtrees = XPath.findAll(tree, xpath, this.matcher.getParser());
const subtrees = XPath.findAll(tree, xpath, this.matcher.getParser()!);
const matches = new Array<ParseTreeMatch>();
for (const t of subtrees) {
const match = this.match(t);
Expand Down
48 changes: 24 additions & 24 deletions src/tree/pattern/ParseTreePatternMatcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,25 @@ import { TokenTagToken } from "./TokenTagToken.js";
export class ParseTreePatternMatcher {
protected start = "<";
protected stop = ">";
protected escape = "\\";
protected escape = "\\"; // e.g., \< and \> must escape BOTH!

/**
* This is the backing field for {@link #getLexer()}.
*/
private readonly lexer: Lexer;
private readonly lexer: Lexer | null;

/**
* This is the backing field for {@link #getParser()}.
*/
private readonly parser: Parser; // e.g., \< and \> must escape BOTH!
private readonly parser: Parser | null;

/**
* Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
* {@link Parser} object. The lexer input stream is altered for tokenizing
* the tree patterns. The parser is used as a convenient mechanism to get
* the grammar name, plus token, rule names.
*/
public constructor(lexer: Lexer, parser: Parser) {
public constructor(lexer: Lexer | null, parser: Parser | null) {
this.lexer = lexer;
this.parser = parser;
}
Expand Down Expand Up @@ -217,23 +217,23 @@ export class ParseTreePatternMatcher {
const tokenSrc = new ListTokenSource(tokenList);
const tokens = new CommonTokenStream(tokenSrc);

const parserInterp = new ParserInterpreter(this.parser.grammarFileName, this.parser.vocabulary,
this.parser.ruleNames, this.parser.getATNWithBypassAlts(), tokens);
const parserInterp = new ParserInterpreter(this.parser!.grammarFileName, this.parser!.vocabulary,
this.parser!.ruleNames, this.parser!.getATNWithBypassAlts(), tokens);
parserInterp.removeErrorListeners();

let tree = null;
try {
parserInterp.errorHandler = new BailErrorStrategy();
tree = parserInterp.parse(patternRuleIndex);
} catch (eOrRe) {
if (eOrRe instanceof ParseCancellationException) {
const e = eOrRe;
throw e.cause;
} else if (eOrRe instanceof RecognitionException) {
throw eOrRe;
} else if (eOrRe instanceof Error) {
throw new CannotInvokeStartRuleError(eOrRe);
} catch (error) {
if (error instanceof ParseCancellationException) {
throw error.cause;
} else if (error instanceof RecognitionException) {
throw error;
} else if (error instanceof Error) {
throw new CannotInvokeStartRuleError(error);
} else {
throw eOrRe;
throw error;
}
}

Expand All @@ -250,7 +250,7 @@ export class ParseTreePatternMatcher {
* input stream is reset.
*/

public getLexer(): Lexer {
public getLexer(): Lexer | null {
return this.lexer;
}

Expand All @@ -259,7 +259,7 @@ export class ParseTreePatternMatcher {
* used to parse the pattern into a parse tree.
*/

public getParser(): Parser {
public getParser(): Parser | null {
return this.parser;
}

Expand All @@ -277,19 +277,19 @@ export class ParseTreePatternMatcher {
// add special rule token or conjure up new token from name
const char = tagChunk.tag[0];
if (char === char.toUpperCase()) {
const ttype = this.parser.getTokenType(tagChunk.tag);
const ttype = this.parser!.getTokenType(tagChunk.tag);
if (ttype === Token.INVALID_TYPE) {
throw new Error("Unknown token " + tagChunk.tag + " in pattern: " + pattern);
}
const t = new TokenTagToken(tagChunk.tag, ttype, tagChunk.label);
tokens.push(t);
} else {
if (char === char.toLowerCase()) {
const ruleIndex = this.parser.getRuleIndex(tagChunk.tag);
const ruleIndex = this.parser!.getRuleIndex(tagChunk.tag);
if (ruleIndex === -1) {
throw new Error("Unknown rule " + tagChunk.tag + " in pattern: " + pattern);
}
const ruleImaginaryTokenType = this.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
const ruleImaginaryTokenType = this.parser!.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
tokens.push(new RuleTagToken(tagChunk.tag, ruleImaginaryTokenType, tagChunk.label));
} else {
throw new Error("invalid tag: " + tagChunk.tag + " in pattern: " + pattern);
Expand All @@ -298,11 +298,11 @@ export class ParseTreePatternMatcher {
} else {
const textChunk = chunk as TextChunk;
const input = CharStream.fromString(textChunk.text);
this.lexer.inputStream = input;
let t = this.lexer.nextToken();
this.lexer!.inputStream = input;
let t = this.lexer!.nextToken();
while (t.type !== Token.EOF) {
tokens.push(t);
t = this.lexer.nextToken();
t = this.lexer!.nextToken();
}
}
}
Expand Down Expand Up @@ -400,7 +400,7 @@ export class ParseTreePatternMatcher {
const c = chunks[i];
if (c instanceof TextChunk) {
const tc = c;
const unescaped = tc.text.replace(this.escape, ""); // TODO: do we need a copy of tc.text here?
const unescaped = tc.text.replaceAll(this.escape, ""); // TODO: do we need a copy of tc.text here?
if (unescaped.length < tc.text.length) {
chunks[i] = new TextChunk(unescaped);
}
Expand Down
2 changes: 1 addition & 1 deletion src/tree/pattern/RuleTagToken.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export class RuleTagToken implements Token {
* delimiters.
*/
public get text(): string {
if (this.label !== null) {
if (this.label !== undefined) {
return "<" + this.label + ":" + this.ruleName + ">";
}

Expand Down
2 changes: 1 addition & 1 deletion src/tree/pattern/TagChunk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ export class TagChunk extends Chunk {
* returned as just the tag name.
*/
public override toString(): string {
if (this.label !== null) {
if (this.label !== undefined) {
return this.label + ":" + this.tag;
}

Expand Down
Loading

0 comments on commit 4f5bf49

Please sign in to comment.