Implement updated grammar and improve trees

This change implements the updated grammar for traits and operations in #1800. WS and BR handling is now more explicit, including individual token trees for comments, spaces, and commas. NODE_OBJECT_KEY now contains an IDENTIFIER and QUOTED_TEXT tree rather than raw tokens. This change also cleans up token trees and adds some new features: * They implement FromSourceLocation * Removed unused methods.
smithy-lang · Jun 2, 2023 · e08012c · e08012c
1 parent 70a6f47
commit e08012c
Show file tree

Hide file tree

Showing 9 changed files with 273 additions and 181 deletions.
diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java
@@ -16,6 +16,8 @@
 package software.amazon.smithy.syntax;
 
 import java.util.function.Function;
+import software.amazon.smithy.model.FromSourceLocation;
+import software.amazon.smithy.model.SourceLocation;
 import software.amazon.smithy.model.loader.IdlToken;
 import software.amazon.smithy.model.loader.IdlTokenizer;
 
@@ -27,9 +29,10 @@
  * token. Because smithy-syntax needs to create a token-tree rather than go directly to an AST, it requires arbitrary
  * lookahead of tokens, which means it needs to persist tokens in memory, using this {@code CapturedToken}.
  */
-public final class CapturedToken {
+public final class CapturedToken implements FromSourceLocation {
 
     private final IdlToken token;
+    private final String filename;
     private final int position;
     private final int startLine;
     private final int startColumn;
@@ -42,6 +45,7 @@ public final class CapturedToken {
 
     private CapturedToken(
             IdlToken token,
+            String filename,
             int position,
             int startLine,
             int startColumn,
@@ -53,6 +57,7 @@ private CapturedToken(
             String errorMessage
     ) {
         this.token = token;
+        this.filename = filename;
         this.position = position;
         this.startLine = startLine;
         this.startColumn = startColumn;
@@ -90,6 +95,7 @@ public static CapturedToken from(IdlTokenizer tokenizer, Function<CharSequence,
         String errorMessage = token == IdlToken.ERROR ? tokenizer.getCurrentTokenError() : null;
         Number numberValue = token == IdlToken.NUMBER ? tokenizer.getCurrentTokenNumberValue() : null;
         return new CapturedToken(token,
+                                 tokenizer.getSourceFilename(),
                                  tokenizer.getCurrentTokenStart(),
                                  tokenizer.getCurrentTokenLine(),
                                  tokenizer.getCurrentTokenColumn(),
@@ -110,6 +116,15 @@ public IdlToken getIdlToken() {
         return token;
     }
 
+    @Override
+    public SourceLocation getSourceLocation() {
+        return new SourceLocation(getFilename(), getStartLine(), getStartColumn());
+    }
+
+    public String getFilename() {
+        return filename;
+    }
+
     public int getPosition() {
         return position;
     }

diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturingTokenizer.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturingTokenizer.java
@@ -32,10 +32,6 @@
  */
 final class CapturingTokenizer implements IdlTokenizer {
 
-    // For now, this also skips doc comments. We may later move doc comments out of WS.
-    private static final IdlToken[] WS_CHARS = {IdlToken.SPACE, IdlToken.NEWLINE, IdlToken.COMMA,
-                                                IdlToken.COMMENT, IdlToken.DOC_COMMENT};
-
     private final IdlTokenizer delegate;
     private final TokenTree root = TokenTree.of(TreeType.IDL);
     private final Deque<TokenTree> trees = new ArrayDeque<>();
@@ -188,29 +184,6 @@ TokenTree withState(TreeType state, Runnable errorRecovery, Runnable parser) {
         return tree;
     }
 
-    void expectWs() {
-        expect(WS_CHARS);
-        do {
-            next();
-        } while (isWs());
-    }
-
-    boolean isWs() {
-        return isToken(WS_CHARS);
-    }
-
-    private boolean isToken(IdlToken... tokens) {
-        IdlToken currentTokenType = getCurrentToken();
-
-        for (IdlToken token : tokens) {
-            if (currentTokenType == token) {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
     // Performs basic error recovery by skipping tokens until a $, identifier, or @ is found at column 1.
     private void defaultErrorRecovery() {
         while (hasNext()) {

diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TokenTree.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TokenTree.java
@@ -17,6 +17,7 @@
 
 import java.util.List;
 import java.util.stream.Stream;
+import software.amazon.smithy.model.FromSourceLocation;
 import software.amazon.smithy.model.loader.IdlTokenizer;
 
 /**
@@ -25,7 +26,7 @@
  * <p>This abstraction is a kind of parse tree based on lexer tokens. Each consumed token is present in the tree,
  * and grouped together into nodes with labels defined by {@link TreeType}.
  */
-public interface TokenTree {
+public interface TokenTree extends FromSourceLocation {
 
     /**
      * Create a TokenTree from a {@link IdlTokenizer}.
@@ -83,6 +84,13 @@ static TokenTree error(String error) {
      */
     List<TokenTree> getChildren();
 
+    /**
+     * Detect if the tree is empty.
+     *
+     * @return Return true if the tree has no children or tokens.
+     */
+    boolean isEmpty();
+
     /**
      * Check if the tree has an immediate child of the given type.
      *

diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TokenTreeLeaf.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TokenTreeLeaf.java
@@ -19,6 +19,7 @@
 import java.util.List;
 import java.util.Objects;
 import java.util.stream.Stream;
+import software.amazon.smithy.model.SourceLocation;
 
 final class TokenTreeLeaf implements TokenTree {
 
@@ -28,6 +29,11 @@ final class TokenTreeLeaf implements TokenTree {
         this.token = token;
     }
 
+    @Override
+    public SourceLocation getSourceLocation() {
+        return token.getSourceLocation();
+    }
+
     @Override
     public Stream<CapturedToken> tokens() {
         return Stream.of(token);
@@ -43,6 +49,11 @@ public List<TokenTree> getChildren() {
         return Collections.emptyList();
     }
 
+    @Override
+    public boolean isEmpty() {
+        return false;
+    }
+
     @Override
     public void appendChild(TokenTree tree) {
         throw new UnsupportedOperationException("Cannot append a child to a leaf node");

diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TokenTreeNode.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TokenTreeNode.java
@@ -19,6 +19,7 @@
 import java.util.List;
 import java.util.Objects;
 import java.util.stream.Stream;
+import software.amazon.smithy.model.SourceLocation;
 
 class TokenTreeNode implements TokenTree {
 
@@ -29,6 +30,11 @@ class TokenTreeNode implements TokenTree {
         this.treeType = treeType;
     }
 
+    @Override
+    public SourceLocation getSourceLocation() {
+        return getChildren().isEmpty() ? SourceLocation.NONE : getChildren().get(0).getSourceLocation();
+    }
+
     @Override
     public final TreeType getType() {
         return treeType;
@@ -44,6 +50,11 @@ public final List<TokenTree> getChildren() {
         return children;
     }
 
+    @Override
+    public boolean isEmpty() {
+        return getChildren().isEmpty();
+    }
+
     @Override
     public final void appendChild(TokenTree tree) {
         children.add(tree);

diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeCursor.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeCursor.java
@@ -21,14 +21,15 @@
 import java.util.List;
 import java.util.Objects;
 import java.util.function.Predicate;
-import software.amazon.smithy.model.loader.IdlToken;
+import software.amazon.smithy.model.FromSourceLocation;
+import software.amazon.smithy.model.SourceLocation;
 
 /**
  * Externally traverses a {@link TokenTree} to provide access to parents and siblings.
  *
  * @see TokenTree#zipper()
  */
-public final class TreeCursor {
+public final class TreeCursor implements FromSourceLocation {
 
     private final TokenTree tree;
     private final TreeCursor parent;
@@ -48,6 +49,11 @@ public static TreeCursor fromRoot(TokenTree tree) {
         return new TreeCursor(tree, null);
     }
 
+    @Override
+    public SourceLocation getSourceLocation() {
+        return getTree().getSourceLocation();
+    }
+
     /**
      * Get the wrapped {@link TokenTree}.
      *
@@ -156,6 +162,24 @@ public TreeCursor next() {
         };
     }
 
+    /**
+     * Get direct children from the current tree of a specific type.
+     *
+     * @param types Types of children to get.
+     * @return Returns the collected children, or an empty list.
+     */
+    public List<TreeCursor> getChildrenByType(TreeType... types) {
+        List<TreeCursor> result = new ArrayList<>();
+        for (TreeCursor child : getChildren()) {
+            for (TreeType type : types) {
+                if (child.getTree().getType() == type) {
+                    result.add(child);
+                }
+            }
+        }
+        return result;
+    }
+
     /**
      * Get the first child of the wrapped tree.
      *
@@ -199,22 +223,19 @@ public TreeCursor getLastChild() {
     /**
      * Recursively find every node in the tree that has the given {@code TreeType}.
      *
-     * @param type Tree type to find and return.
+     * @param types Types of children to return.
      * @return Returns the matching tree cursors.
      */
-    public List<TreeCursor> findChildrenByType(TreeType type) {
-        return findChildren(c -> c.getTree().getType() == type);
-    }
-
-    /**
-     * Recursively find every TOKEN tree in the tree that has the given {@code token}.
-     *
-     * @param token Token to find.
-     * @return Returns the matching tree cursors.
-     */
-    public List<TreeCursor> findChildrenByToken(IdlToken token) {
-        return findChildren(c -> c.getTree().getType() == TreeType.TOKEN
-                                 && c.getTree().tokens().iterator().next().getIdlToken() == token);
+    public List<TreeCursor> findChildrenByType(TreeType... types) {
+        return findChildren(c -> {
+            TreeType treeType = c.getTree().getType();
+            for (TreeType type : types) {
+                if (treeType == type) {
+                    return true;
+                }
+            }
+            return false;
+        });
     }
 
     /**