From 10f9b8dd4d5689b96f15a80226ebe494d23c6170 Mon Sep 17 00:00:00 2001 From: Jeff Williams Date: Sun, 26 May 2013 12:01:34 -0700 Subject: [PATCH] add Rhino-to-SpiderMonkey/Esprima AST converter --- src/org/jsdoc/AstBuilder.java | 1099 +++++++++++++++++++++++++++++++++ 1 file changed, 1099 insertions(+) create mode 100644 src/org/jsdoc/AstBuilder.java diff --git a/src/org/jsdoc/AstBuilder.java b/src/org/jsdoc/AstBuilder.java new file mode 100644 index 0000000000..16bf9420ea --- /dev/null +++ b/src/org/jsdoc/AstBuilder.java @@ -0,0 +1,1099 @@ +package org.jsdoc; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; +import org.mozilla.javascript.CompilerEnvirons; +import org.mozilla.javascript.Context; +import org.mozilla.javascript.NativeArray; +import org.mozilla.javascript.NativeObject; +import org.mozilla.javascript.Node; +import org.mozilla.javascript.Parser; +import org.mozilla.javascript.Scriptable; +import org.mozilla.javascript.ScriptableObject; +import org.mozilla.javascript.Token; +import org.mozilla.javascript.Undefined; +import org.mozilla.javascript.ast.*; // we use almost every class + +/** + * Convert a Rhino-style AST to a SpiderMonkey/Esprima-style AST. The new AST is composed of + * native JavaScript datatypes, so it can be manipulated and traversed with the same code that is + * used for an AST generated in JavaScript. + * + * We handle this conversion in Java because an equivalent JavaScript implementation was more than + * four times slower (presumably because it required thousands of Java-to-JS-and-back type + * conversions). + * @author Jeff Williams + */ +public class AstBuilder +{ + private static final String NODE_ID = HiddenProperties.NODE_ID.getPropertyName(); + private static final String RHINO_NODE = HiddenProperties.RHINO_NODE.getPropertyName(); + private static final String ROOT = HiddenProperties.ROOT.getPropertyName(); + private static final String TYPE = Properties.TYPE.getPropertyName(); + + private static Context cx; + private static ScriptableObject scope; + + private Parser parser; + private NativeObject ast; + private AstRoot root; + private List seenComments; + + private enum NodeTypes + { + ArrayComprehension, + ArrayComprehensionLoop, + ArrayLiteral, + Assignment, + AstRoot, + Block, + BreakStatement, + CatchClause, + Comment, + ConditionalExpression, + ContinueStatement, + DoLoop, + ElementGet, + EmptyExpression, + ExpressionStatement, + ForInLoop, + ForLoop, + FunctionCall, + FunctionNode, + IfStatement, + InfixExpression, + KeywordLiteral, + Label, + LabeledStatement, + LetNode, + Name, + NewExpression, + NumberLiteral, + ObjectLiteral, + ObjectProperty, + ParenthesizedExpression, + PropertyGet, + RegExpLiteral, + ReturnStatement, + Scope, + StringLiteral, + SwitchCase, + SwitchStatement, + ThrowStatement, + TryStatement, + UnaryExpression, + VariableDeclaration, + VariableInitializer, + WhileLoop, + WithStatement, + Yield; + } + + + public AstBuilder() + { + cx = Context.getCurrentContext(); + scope = cx.initStandardObjects(); + reset(); + } + + public void reset() + { + parser = null; + ast = null; + root = null; + seenComments = new ArrayList(); + } + + public NativeObject getAst() + { + return ast; + } + + public NativeObject build(String sourceCode, String sourceName) + { + // Reset the instance's state if necessary + if (ast != null) { + reset(); + } + + parser = getParser(); + + root = parser.parse(sourceCode, sourceName, 1); + + ast = processNode(root); + attachRemainingComments(); + + return ast; + } + + protected static Context getCurrentContext() + { + return cx; + } + + protected static ScriptableObject getCurrentScope() + { + return scope; + } + + protected static NativeObject newObject() { + return (NativeObject)cx.newObject(scope); + } + + protected static NativeArray newArray(List list) { + return (NativeArray)cx.newArray(scope, list.toArray()); + } + + protected static NativeArray newArray(int capacity) { + return (NativeArray)cx.newArray(scope, capacity); + } + + private static Parser getParser() + { + CompilerEnvirons ce = new CompilerEnvirons(); + + ce.setRecordingComments(true); + ce.setRecordingLocalJsDocComments(true); + ce.setLanguageVersion(180); + ce.initFromContext(cx); + + return new Parser(ce, ce.getErrorReporter()); + } + + private NativeArray getRange(AstNode rhinoNode) + { + List range = new ArrayList(); + + Integer start = rhinoNode.getAbsolutePosition(); + Integer end = start + rhinoNode.getLength(); + range.add(start); + range.add(end); + + return newArray(range); + } + + /** + * Provide the node's location in an Esprima-compatible format. Rhino doesn't store + * start.column, end.line, or end.column, but fortunately we don't really need them. + * @param rhinoNode + * @return Esprima-compatible location info. + */ + private NativeObject getLocation(AstNode rhinoNode) + { + NativeObject loc = newObject(); + NativeObject start = newObject(); + + start.put("line", start, rhinoNode.getLineno()); + loc.put("start", loc, start); + loc.put("end", loc, newObject()); + + return loc; + } + + private Integer getSyntaxStart() + { + AstNode node = (AstNode)root.getFirstChild(); + while (node instanceof Comment) { + node = (AstNode)node.getNext(); + } + + return node.getAbsolutePosition(); + } + + private boolean isJsDocComment(Comment comment) + { + return comment.getCommentType() == Token.CommentType.JSDOC; + } + + private void attachLeadingComments(AstNode rhinoNode, Entry info) + { + List comments = new ArrayList(); + Comment comment = rhinoNode.getJsDocNode(); + if (comment != null) { + seenComments.add(comment); + comments.add(processNode(comment)); + info.put("leadingComments", newArray(comments)); + }; + } + + @SuppressWarnings("unchecked") + private void attachRemainingComments() + { + NativeObject comment; + List range; + Integer start; + + Integer syntaxStart = getSyntaxStart(); + List leadingComments = new ArrayList(); + List trailingComments = new ArrayList(); + + Set allComments = root.getComments(); + if (allComments == null) { + return; + } + + for (Comment commentNode : allComments) { + if (seenComments.contains(commentNode) == false && isJsDocComment(commentNode)) { + comment = processNode(commentNode); + range = (List)comment.get("range", comment); + start = range.get(0); + + if (start < syntaxStart) { + leadingComments.add(comment); + } else { + trailingComments.add(comment); + } + } + } + + if (leadingComments.size() > 0) { + ast.put("leadingComments", ast, newArray(leadingComments)); + } + + if (trailingComments.size() > 0) { + ast.put("trailingComments", ast, newArray(trailingComments)); + } + } + + private NativeObject createNode(Entry info) + { + JsDocNode node; + Integer start; + Integer end; + + AstNode rhinoNode = (AstNode)info.get(RHINO_NODE); + + NativeArray range = getRange(rhinoNode); + info.put("range", range); + info.put("loc", getLocation(rhinoNode)); + + attachLeadingComments(rhinoNode, info); + + node = new JsDocNode(info); + + if (this.ast == null && info.get(TYPE) == JsDocNode.PROGRAM) { + node.put(ROOT, node.getNativeObject()); + } else { + node.put(ROOT, this.ast); + } + + return node.getNativeObject(); + } + + private String getRhinoNodeId(Node rhinoNode) + { + return "astnode" + rhinoNode.hashCode(); + } + + private NativeArray processNodeList(List nodes) + { + List newNodes = new ArrayList(); + for (AstNode node : nodes) { + newNodes.add(processNode(node)); + } + + return newArray(newNodes); + } + + private NativeArray processNodeChildren(AstNode rhinoNode) + { + List kids = new ArrayList(); + Node current = rhinoNode.getFirstChild(); + + while (current != null) { + kids.add((AstNode)current); + current = current.getNext(); + } + + return processNodeList(kids); + } + + private NativeObject processNode(AstNode rhinoNode) + { + //System.out.println("new rhino node! shortName: " + rhinoNode.shortName() + ", source: " + + // rhinoNode.toSource()); + + NativeObject node = null; + Entry info = new Entry(); + NodeTypes type = NodeTypes.valueOf(rhinoNode.shortName()); + + info.put(NODE_ID, getRhinoNodeId(rhinoNode)); + info.put(RHINO_NODE, rhinoNode); + + // surely there's a better way to do this... + switch (type) { + case ArrayComprehension: + processArrayComprehension((ArrayComprehension)rhinoNode, info); + break; + case ArrayComprehensionLoop: + processArrayComprehensionLoop((ArrayComprehensionLoop)rhinoNode, info); + break; + case ArrayLiteral: + processArrayLiteral((ArrayLiteral)rhinoNode, info); + break; + case Assignment: + processAssignment((Assignment)rhinoNode, info); + break; + case AstRoot: + processAstRoot((AstRoot)rhinoNode, info); + break; + case Block: + processBlock((Block)rhinoNode, info); + break; + case BreakStatement: + processBreakStatement((BreakStatement)rhinoNode, info); + break; + case CatchClause: + processCatchClause((CatchClause)rhinoNode, info); + break; + case Comment: + processComment((Comment)rhinoNode, info); + break; + case ConditionalExpression: + processConditionalExpression((ConditionalExpression)rhinoNode, info); + break; + case ContinueStatement: + processContinueStatement((ContinueStatement)rhinoNode, info); + break; + case DoLoop: + processDoLoop((DoLoop)rhinoNode, info); + break; + case ElementGet: + processElementGet((ElementGet)rhinoNode, info); + break; + case EmptyExpression: + processEmptyExpression((EmptyExpression)rhinoNode, info); + break; + case ExpressionStatement: + processExpressionStatement((ExpressionStatement)rhinoNode, info); + break; + case ForInLoop: + processForInLoop((ForInLoop)rhinoNode, info); + break; + case ForLoop: + processForLoop((ForLoop)rhinoNode, info); + break; + case FunctionCall: + processFunctionCall((FunctionCall)rhinoNode, info); + break; + case FunctionNode: + processFunctionNode((FunctionNode)rhinoNode, info); + break; + case IfStatement: + processIfStatement((IfStatement)rhinoNode, info); + break; + case InfixExpression: + processInfixExpression((InfixExpression)rhinoNode, info); + break; + case KeywordLiteral: + processKeywordLiteral((KeywordLiteral)rhinoNode, info); + break; + case Label: + processLabel((Label)rhinoNode, info); + break; + case LabeledStatement: + processLabeledStatement((LabeledStatement)rhinoNode, info); + break; + case LetNode: + processLetNode((LetNode)rhinoNode, info); + break; + case Name: + processName((Name)rhinoNode, info); + break; + case NewExpression: + processNewExpression((NewExpression)rhinoNode, info); + break; + case NumberLiteral: + processNumberLiteral((NumberLiteral)rhinoNode, info); + break; + case ObjectLiteral: + processObjectLiteral((ObjectLiteral)rhinoNode, info); + break; + case ObjectProperty: + processObjectProperty((ObjectProperty)rhinoNode, info); + break; + case ParenthesizedExpression: + // we need the expression, but not the node itself + ParenthesizedExpression expr = (ParenthesizedExpression)rhinoNode; + node = processNode(expr.getExpression()); + break; + case PropertyGet: + processPropertyGet((PropertyGet)rhinoNode, info); + break; + case RegExpLiteral: + processRegExpLiteral((RegExpLiteral)rhinoNode, info); + break; + case ReturnStatement: + processReturnStatement((ReturnStatement)rhinoNode, info); + break; + case Scope: + processScope((Scope)rhinoNode, info); + break; + case StringLiteral: + processStringLiteral((StringLiteral)rhinoNode, info); + break; + case SwitchCase: + processSwitchCase((SwitchCase)rhinoNode, info); + break; + case SwitchStatement: + processSwitchStatement((SwitchStatement)rhinoNode, info); + break; + case ThrowStatement: + processThrowStatement((ThrowStatement)rhinoNode, info); + break; + case TryStatement: + processTryStatement((TryStatement)rhinoNode, info); + break; + case UnaryExpression: + processUnaryExpression((UnaryExpression)rhinoNode, info); + break; + case VariableDeclaration: + processVariableDeclaration((VariableDeclaration)rhinoNode, info); + break; + case VariableInitializer: + processVariableInitializer((VariableInitializer)rhinoNode, info); + break; + case WhileLoop: + processWhileLoop((WhileLoop)rhinoNode, info); + break; + case WithStatement: + processWithStatement((WithStatement)rhinoNode, info); + break; + case Yield: + processYield((Yield)rhinoNode, info); + break; + default: + throw new IllegalArgumentException("Unrecognized node type " + + rhinoNode.shortName() + " with source: " + rhinoNode.toSource()); + } + + if (node == null) { + node = createNode(info); + } + + return node; + } + + private void processArrayComprehension(ArrayComprehension rhinoNode, Entry info) + { + AstNode filter = rhinoNode.getFilter(); + + info.put(TYPE, JsDocNode.COMPREHENSION_EXPRESSION); + + info.put("body", processNode(rhinoNode.getResult())); + info.put("blocks", processNodeList(rhinoNode.getLoops())); + info.put("filter", filter == null ? filter : processNode(filter)); + } + + private void processArrayComprehensionLoop(ArrayComprehensionLoop rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.COMPREHENSION_BLOCK); + + info.put("left", processNode(rhinoNode.getIterator())); + info.put("right", processNode(rhinoNode.getIteratedObject())); + info.put("each", rhinoNode.isForEach()); + } + + private void processArrayLiteral(ArrayLiteral rhinoNode, Entry info) + { + if (rhinoNode.isDestructuring()) { + info.put(TYPE, JsDocNode.ARRAY_PATTERN); + } else { + info.put(TYPE, JsDocNode.ARRAY_EXPRESSION); + } + + info.put("elements", processNodeList(rhinoNode.getElements())); + } + + private void processAssignment(Assignment rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.ASSIGNMENT_EXPRESSION); + + info.put("operator", AstNode.operatorToString(rhinoNode.getOperator())); + info.put("left", processNode(rhinoNode.getLeft())); + info.put("right", processNode(rhinoNode.getRight())); + } + + private void processAstRoot(AstRoot rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.PROGRAM); + + info.put("body", processNodeChildren((AstNode)rhinoNode)); + } + + private void processBlock(Block rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.BLOCK_STATEMENT); + + info.put("body", processNodeChildren((AstNode)rhinoNode)); + } + + private void processBreakStatement(BreakStatement rhinoNode, Entry info) + { + AstNode label = rhinoNode.getBreakLabel(); + + info.put(TYPE, JsDocNode.BREAK_STATEMENT); + + info.put("label", label == null ? label : processNode(label)); + } + + private void processCatchClause(CatchClause rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.CATCH_CLAUSE); + + info.put("param", processNode(rhinoNode.getVarName())); + info.put("body", processNode(rhinoNode.getBody())); + } + + private void processComment(Comment rhinoNode, Entry info) + { + String comment = rhinoNode.getValue(); + info.put(TYPE, JsDocNode.BLOCK); + + // Esprima provides the comment value without delimiters, so we do too + info.put("value", comment.substring(2, comment.length() - 2)); + // Esprima doesn't provide this, but it's useful + info.put("raw", rhinoNode.getValue()); + } + + private void processConditionalExpression(ConditionalExpression rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.CONDITIONAL_EXPRESSION); + + info.put("test", processNode(rhinoNode.getTestExpression())); + info.put("consequent", processNode(rhinoNode.getTrueExpression())); + info.put("alternate", processNode(rhinoNode.getFalseExpression())); + } + + private void processContinueStatement(ContinueStatement rhinoNode, Entry info) + { + AstNode label = rhinoNode.getLabel(); + + info.put(TYPE, JsDocNode.CONTINUE_STATEMENT); + + info.put("label", label == null ? label : processNode(label)); + } + + private void processDoLoop(DoLoop rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.DO_WHILE_STATEMENT); + + info.put("body", processNode(rhinoNode.getBody())); + info.put("test", processNode(rhinoNode.getCondition())); + } + + private void processElementGet(ElementGet rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.MEMBER_EXPRESSION); + + info.put("computed", true); + info.put("object", processNode(rhinoNode.getTarget())); + info.put("property", processNode(rhinoNode.getElement())); + } + + private void processEmptyExpression(EmptyExpression rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.EMPTY_STATEMENT); + } + + private void processExpressionStatement(ExpressionStatement rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.EXPRESSION_STATEMENT); + + info.put("expression", processNode(rhinoNode.getExpression())); + } + + private void processForInLoop(ForInLoop rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.FOR_IN_STATEMENT); + + info.put("left", processNode(rhinoNode.getIterator())); + info.put("right", processNode(rhinoNode.getIteratedObject())); + info.put("body", processNode(rhinoNode.getBody())); + info.put("each", rhinoNode.isForEach()); + } + + private void processForLoop(ForLoop rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.FOR_STATEMENT); + + info.put("init", processNode(rhinoNode.getInitializer())); + info.put("test", processNode(rhinoNode.getCondition())); + info.put("update", processNode(rhinoNode.getIncrement())); + info.put("body", processNode(rhinoNode.getBody())); + } + + private void processFunctionCall(FunctionCall rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.CALL_EXPRESSION); + + info.put("callee", processNode(rhinoNode.getTarget())); + info.put("arguments", processNodeList(rhinoNode.getArguments())); + } + + private void processFunctionNode(FunctionNode rhinoNode, Entry info) + { + AstNode id = rhinoNode.getFunctionName(); + + info.put(TYPE, (rhinoNode.getFunctionType() == FunctionNode.FUNCTION_EXPRESSION) ? + JsDocNode.FUNCTION_EXPRESSION : JsDocNode.FUNCTION_DECLARATION); + + info.put("id", id == null ? id : processNode(id)); + info.put("params", processNodeList(rhinoNode.getParams())); + info.put("defaults", newArray(0)); + info.put("body", processNode(rhinoNode.getBody())); + info.put("rest", null); + info.put("generator", rhinoNode.isGenerator()); + info.put("expression", rhinoNode.isExpressionClosure()); + } + + private void processIfStatement(IfStatement rhinoNode, Entry info) + { + AstNode alternate = rhinoNode.getElsePart(); + + info.put(TYPE, JsDocNode.IF_STATEMENT); + + info.put("test", processNode(rhinoNode.getCondition())); + info.put("consequent", processNode(rhinoNode.getThenPart())); + info.put("alternate", alternate == null ? alternate : processNode(alternate)); + } + + private void processInfixExpression(InfixExpression rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.BINARY_EXPRESSION); + + info.put("operator", AstNode.operatorToString(rhinoNode.getOperator())); + info.put("left", processNode(rhinoNode.getLeft())); + info.put("right", processNode(rhinoNode.getRight())); + } + + private void processKeywordLiteral(KeywordLiteral rhinoNode, Entry info) + { + int tokenType = rhinoNode.getType(); + String type = null; + + switch (tokenType) { + case Token.TRUE: + info.put("value", true); + info.put("raw", "true"); + break; + case Token.FALSE: + info.put("value", false); + info.put("raw", "false"); + break; + case Token.NULL: + info.put("value", null); + info.put("raw", "null"); + break; + case Token.DEBUGGER: + type = JsDocNode.DEBUGGER_STATEMENT; + break; + case Token.THIS: + type = JsDocNode.THIS_EXPRESSION; + break; + default: + throw new IllegalArgumentException("Unrecognized KeywordLiteral: " + + rhinoNode.toSource() + " (token type: Token." + Token.typeToName(tokenType) + + ")"); + } + + if (type == null) { + type = JsDocNode.LITERAL; + } + + info.put(TYPE, type); + } + + private void processLabel(Label rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.IDENTIFIER); + + info.put("name", rhinoNode.getName()); + } + + private void processLabeledStatement(LabeledStatement rhinoNode, Entry info) + { + info.put(TYPE, JsDocNode.LABELED_STATEMENT); + + // does Rhino ever think that a node has multiple labels? if so, this may not work correctly + List