From b4bd6f709f438a84fffc362634af6ef4c45fade8 Mon Sep 17 00:00:00 2001 From: Terence Parr Date: Sun, 28 Jul 2024 14:25:17 -0700 Subject: [PATCH] * Count tree nodes; don't allow more than 50k nodes. * Don't return input back to js client; a waste. * Add helper main for testing Signed-off-by: Terence Parr --- src/org/antlr/v4/server/GrammarProcessor.java | 102 ++++++++++++------ 1 file changed, 70 insertions(+), 32 deletions(-) diff --git a/src/org/antlr/v4/server/GrammarProcessor.java b/src/org/antlr/v4/server/GrammarProcessor.java index 393c5bd..1261357 100644 --- a/src/org/antlr/v4/server/GrammarProcessor.java +++ b/src/org/antlr/v4/server/GrammarProcessor.java @@ -23,20 +23,25 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.text.NumberFormat; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class GrammarProcessor { - public static final int MAX_PARSE_TIME_MS = 20 * 1000; // 20 seconds + public static final int MAX_PARSE_TIME_MS = 10 * 1000; // 10 seconds + public static final int MAX_TREE_SIZE_IN_NODES = 50_000; private static class KillableGrammarParserInterpreter extends GrammarParserInterpreter { private final long creationTime = System.currentTimeMillis(); + protected String startRule; - public KillableGrammarParserInterpreter(Grammar g, ATN deserializedATN, TokenStream tokenStream) { + public KillableGrammarParserInterpreter(Grammar g, + ATN deserializedATN, + String startRule, + TokenStream tokenStream) { super(g, deserializedATN, tokenStream); + this.startRule = startRule; } @Override @@ -44,19 +49,19 @@ protected void visitState(ATNState p) { super.visitState(p); long now = System.currentTimeMillis(); long runTimeMs = now - creationTime; - if ( runTimeMs > MAX_PARSE_TIME_MS ) { - LOGGER.error("Parser timeout ("+MAX_PARSE_TIME_MS+"ms)"); - throw new ParseCancellationException("Parser timeout ("+MAX_PARSE_TIME_MS+"ms)"); + if (runTimeMs > MAX_PARSE_TIME_MS) { + String msg = "Parser timeout (" + MAX_PARSE_TIME_MS + "ms) in rule " + startRule; + throw new ParseCancellationException(msg); } } } - /** Interpret the input according to the grammar, starting at the start rule, and return a JSON object - * with errors, tokens, rule names, and the parse tree. + /** + * Interpret the input according to the grammar, starting at the start rule, and return a JSON object + * with errors, tokens, rule names, and the parse tree. */ public static JsonObject interp(String grammar, String lexGrammar, String input, String startRule) - throws IOException - { + throws IOException { startRule = startRule.strip(); Grammar g = null; LexerGrammar lg = null; @@ -67,7 +72,7 @@ public static JsonObject interp(String grammar, String lexGrammar, String input, CollectGrammarErrorsAndWarnings lexlistener = new CollectLexerGrammarErrorsAndWarnings(errMgr); final JsonArray warnings = new JsonArray(); try { - if ( lexGrammar!=null && lexGrammar.strip().length()>0 ) { + if (lexGrammar != null && lexGrammar.strip().length() > 0) { lg = new LexerGrammar(lexGrammar, lexlistener); g = new IgnoreTokenVocabGrammar(null, grammar, lg, parselistener); } @@ -108,7 +113,7 @@ public static JsonObject interp(String grammar, String lexGrammar, String input, } private static JsonObject parseAndGetJSON(Grammar g, LexerGrammar lg, String startRule, String input) - throws IOException + throws IOException { CharStream charStream = CharStreams.fromStream(new StringBufferInputStream(input)); @@ -124,7 +129,7 @@ private static JsonObject parseAndGetJSON(Grammar g, LexerGrammar lg, String sta tokens.fill(); - KillableGrammarParserInterpreter parser = createGrammarParserInterpreter(g, tokens); + KillableGrammarParserInterpreter parser = createGrammarParserInterpreter(g, startRule, tokens); CollectLexOrParseSyntaxErrors parseListener = new CollectLexOrParseSyntaxErrors(); parser.removeErrorListeners(); @@ -135,8 +140,14 @@ private static JsonObject parseAndGetJSON(Grammar g, LexerGrammar lg, String sta ParseTree t = parser.parse(r.index); ParseInfo parseInfo = parser.getParseInfo(); + int n = nodeCount(t); + if ( n > MAX_TREE_SIZE_IN_NODES ) { + var msg = "Tree size "+n+" nodes > max of "+MAX_TREE_SIZE_IN_NODES; + throw new ParseCancellationException(msg); + } + long now = System.currentTimeMillis(); - LOGGER.info("PARSE TIME: "+(now - parser.creationTime)+"ms"); +// LOGGER.info("PARSE TIME: "+(now - parser.creationTime)+"ms"); // System.out.println("lex msgs" + lexListener.msgs); // System.out.println("parse msgs" + parseListener.msgs); @@ -145,7 +156,8 @@ private static JsonObject parseAndGetJSON(Grammar g, LexerGrammar lg, String sta String[][] profileData = getProfilerTable(parser, parseInfo); TokenStream tokenStream = parser.getInputStream(); - CharStream inputStream = tokenStream.getTokenSource().getInputStream(); +// CharStream inputStream = tokenStream.getTokenSource().getInputStream(); + CharStream inputStream = null; // don't send input back to client (they have it and it can be big) return JsonSerializer.toJSON( t, Arrays.asList(parser.getRuleNames()), @@ -157,8 +169,12 @@ private static JsonObject parseAndGetJSON(Grammar g, LexerGrammar lg, String sta profileData); } - /** Copy this function from {@link Grammar} so we can override {@link ParserInterpreter#visitState(ATNState)} */ - public static KillableGrammarParserInterpreter createGrammarParserInterpreter(Grammar g, TokenStream tokenStream) { + /** + * Copy this function from {@link Grammar} so we can override {@link ParserInterpreter#visitState(ATNState)} + */ + public static KillableGrammarParserInterpreter createGrammarParserInterpreter(Grammar g, + String startRule, + TokenStream tokenStream) { if (g.isLexer()) { throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar."); } @@ -166,13 +182,13 @@ public static KillableGrammarParserInterpreter createGrammarParserInterpreter(Gr IntegerList serialized = ATNSerializer.getSerialized(g.getATN()); ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray()); - return new KillableGrammarParserInterpreter(g, deserializedATN, tokenStream); + return new KillableGrammarParserInterpreter(g, deserializedATN, startRule, tokenStream); } private static String[][] getProfilerTable(GrammarParserInterpreter parser, ParseInfo parseInfo) { String[] ruleNamesByDecision = new String[parser.getATN().decisionToState.size()]; - for(int i = 0; i < ruleNamesByDecision .length; i++) { + for (int i = 0; i < ruleNamesByDecision.length; i++) { ruleNamesByDecision[i] = parser.getRuleNames()[parser.getATN().getDecisionState(i).ruleIndex]; } @@ -191,9 +207,9 @@ private static String[][] getProfilerTable(GrammarParserInterpreter parser, Pars public static String toSVG(Tree t, List ruleNames) throws IOException { long id = Thread.currentThread().getId(); - String psFileName = "temp-"+id+".ps"; - String pdfFileName = "temp-"+id+".pdf"; - String svgFileName = "temp-"+id+".svg"; + String psFileName = "temp-" + id + ".ps"; + String pdfFileName = "temp-" + id + ".pdf"; + String svgFileName = "temp-" + id + ".svg"; Trees.writePS(t, ruleNames, Path.of(IMAGES_DIR, psFileName).toAbsolutePath().toString()); String ps = Files.readString(Path.of(IMAGES_DIR, psFileName)); @@ -204,37 +220,37 @@ public static String toSVG(Tree t, List ruleNames) throws IOException { int width; int height; - if ( matcher.find()) { + if (matcher.find()) { width = Integer.valueOf(matcher.group(1)); height = Integer.valueOf(matcher.group(2)); } else { - LOGGER.error("Didn't match regex in PS: "+regex); + LOGGER.error("Didn't match regex in PS: " + regex); width = 1000; height = 1000; } String[] results = - execInDir(IMAGES_DIR, "ps2pdf", - "-dDEVICEWIDTHPOINTS=" + width, - "-dDEVICEHEIGHTPOINTS=" + height, - psFileName, pdfFileName); + execInDir(IMAGES_DIR, "ps2pdf", + "-dDEVICEWIDTHPOINTS=" + width, + "-dDEVICEHEIGHTPOINTS=" + height, + psFileName, pdfFileName); if (results[1].length() > 0) { - LOGGER.info("ps2pdf: "+results[1]); + LOGGER.info("ps2pdf: " + results[1]); String msg = results[1].strip(); return "\n" + "\n" + - " Can't create SVG tree; ps2pdf says: "+msg+"\n" + + " Can't create SVG tree; ps2pdf says: " + msg + "\n" + ""; } results = execInDir(IMAGES_DIR, "pdf2svg", pdfFileName, svgFileName); if (results[1].length() > 0) { - LOGGER.info("pdf2svg: "+results[1]); + LOGGER.info("pdf2svg: " + results[1]); String msg = results[1].strip(); return "\n" + - " Can't create SVG tree; pdf2svg says: "+msg+"\n" + + " Can't create SVG tree; pdf2svg says: " + msg + "\n" + ""; } @@ -242,4 +258,26 @@ public static String toSVG(Tree t, List ruleNames) throws IOException { String svg = new String(Files.readAllBytes(Paths.get(svgfilename))); return svg; } + + public static final int nodeCount(Tree t) { + if (t == null) { + return 0; + } + int n = 1; + for (int i = 0; i < t.getChildCount(); i++) { + n += nodeCount(t.getChild(i)); + } + return n; + } + + /** A test main program for the "big" dir grammar */ + public static void main(String[] args) throws IOException { + new File(IMAGES_DIR).mkdirs(); + var base = "/Users/parrt/antlr/code/antlr4-lab/big/"; + String parserContent = Files.readString(Path.of(base + "TPSParser.g4")); + String lexerContent = Files.readString(Path.of(base + "TPSLexer.g4")); + String input = Files.readString(Path.of(base + "fonline.clc")); + var json = interp(parserContent, lexerContent, input, "program"); +// System.out.println(json); + } }