Output call tree analysis as CSV for use in graph database

* Methods and virtual methods are represented with graph nodes. * Direct, virtual and overriden-by relationships have been mapped. * Bytecode indexes are part of the relationships. * A method can interact with others multiple times, with each bytecode index indicating the origin of the call with the origin method. * Create symbolic links to latest reports/*.csv files, in order to provide path stability, and make it easier to create scripts for graph db vendors.
oracle · Jun 29, 2021 · 8896e6f · 8896e6f
1 parent be4c9b1
commit 8896e6f
Show file tree

Hide file tree

Showing 3 changed files with 309 additions and 2 deletions.
diff --git a/docs/reference-manual/native-image/Reports.md b/docs/reference-manual/native-image/Reports.md
@@ -151,3 +151,16 @@ The call tree report name has the structure `call_tree_<image_name>_<date_time>.
 The object tree report name has the structure: `object_tree_<image_name>_<date_time>.txt`.
 The image name is the name of the generated image, which can be set with the `-H:Name=<name>` option.
 The `<date_time>` is in the `yyyyMMdd_HHmmss` format.
+
+#### CSV files
+
+The reports include a number of CSV files containing raw data for methods and their relationships.
+The aim of these files is to make it enable this raw data to be easily imported into graph databases.
+Graph databases can provide the following functionality:
+
+* Sophisticated graphical visualization of the call tree graph that provide a different perspective compared to text-based formats.
+* Ability to execute complex queries that can for example show a subset of the tree that causes certain code path to be included in the call tree analysis.
+This querying functionality is crucial in making big analysis call trees manageable.
+
+The process to import the files into graph databases is specific to each database.
+Please follow the instructions provided by the graph database providers to find out how to import them.
diff --git a/.../src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/reports/CallTreePrinter.java b/.../src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/reports/CallTreePrinter.java
@@ -32,28 +32,46 @@
 import static com.oracle.graal.pointsto.reports.ReportUtils.methodComparator;
 
 import java.io.File;
+import java.io.IOException;
 import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.LinkOption;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Deque;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Consumer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import com.oracle.graal.pointsto.BigBang;
 import com.oracle.graal.pointsto.flow.InvokeTypeFlow;
 import com.oracle.graal.pointsto.meta.AnalysisMethod;
 
 import jdk.vm.ci.code.BytecodePosition;
+import jdk.vm.ci.meta.JavaKind;
 import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.meta.ResolvedJavaType;
 
 public final class CallTreePrinter {
 
+    public static final Pattern CAMEL_CASE_PATTERN = Pattern.compile(
+                    "\\b[a-zA-Z]|[A-Z]|\\.");
+
     public static void print(BigBang bigbang, String path, String reportName) {
         CallTreePrinter printer = new CallTreePrinter(bigbang);
         printer.buildCallTree();
@@ -66,6 +84,8 @@ public static void print(BigBang bigbang, String path, String reportName) {
                         writer -> printer.printClasses(writer, false));
         ReportUtils.report("list of used packages", path + File.separatorChar + "reports", "used_packages_" + reportName, "txt",
                         writer -> printer.printClasses(writer, true));
+
+        printCsvFiles(printer.methodToNode, path, reportName);
     }
 
     interface Node {
@@ -300,4 +320,276 @@ private static String packagePrefix(String name) {
         }
         return name.substring(0, lastDot);
     }
+
+    private static void printCsvFiles(Map<AnalysisMethod, MethodNode> methodToNode, String path, String reportName) {
+        // Set virtual node at next available method id
+        final AtomicInteger virtualNodeId = new AtomicInteger(MethodNode.methodId);
+
+        Set<Integer> entryPointIds = new HashSet<>();
+        Set<MethodNode> nonVirtualNodes = new HashSet<>();
+        Map<List<String>, Integer> virtualNodes = new HashMap<>();
+
+        Map<Integer, Set<BciEndEdge>> directEdges = new HashMap<>();
+        Map<Integer, Set<BciEndEdge>> virtualEdges = new HashMap<>();
+        Map<Integer, Set<Integer>> overridenByEdges = new HashMap<>();
+
+        final Iterator<MethodNode> iterator = methodToNode.values().stream().filter(n -> n.isEntryPoint).iterator();
+        while (iterator.hasNext()) {
+            final MethodNode node = iterator.next();
+            entryPointIds.add(node.id);
+            walkNodes(node, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
+        }
+
+        toCsvFile("call tree for vm entry point", path, "csv_call_tree_vm", reportName, CallTreePrinter::printVMEntryPoint);
+        toCsvFile("call tree for methods", path, "csv_call_tree_methods", reportName, writer -> printMethodNodes(methodToNode.values(), writer));
+        toCsvFile("call tree for virtual methods", path, "csv_call_tree_virtual_methods", reportName, writer -> printVirtualNodes(virtualNodes, writer));
+        toCsvFile("call tree for entry points", path, "csv_call_tree_entry_points", reportName, writer -> printEntryPointIds(entryPointIds, writer));
+        toCsvFile("call tree for direct edges", path, "csv_call_tree_direct_edges", reportName, writer -> printBciEdges(directEdges, writer));
+        toCsvFile("call tree for overriden by edges", path, "csv_call_tree_override_by_edges", reportName, writer -> printNonBciEdges(overridenByEdges, writer));
+        toCsvFile("call tree for virtual edges", path, "csv_call_tree_virtual_edges", reportName, writer -> printBciEdges(virtualEdges, writer));
+    }
+
+    private static void toCsvFile(String description, String path, String prefix, String reportName, Consumer<PrintWriter> reporter) {
+        final Path reportsPath = Paths.get(path, "reports");
+        final String name = prefix + "_" + reportName;
+        final String csvFile = ReportUtils.report(description, reportsPath.toString(), name, "csv", reporter);
+        final Path csvLink = reportsPath.resolve(prefix + ".csv");
+
+        if (Files.exists(csvLink, LinkOption.NOFOLLOW_LINKS)) {
+            try {
+                Files.delete(csvLink);
+            } catch (IOException e) {
+                // Ignore
+            }
+        }
+
+        try {
+            Files.createSymbolicLink(csvLink, Paths.get(csvFile));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private static void printVMEntryPoint(PrintWriter writer) {
+        writer.println(convertToCSV("Id", "Name"));
+        writer.println(convertToCSV("0", "VM"));
+    }
+
+    private static void printMethodNodes(Collection<MethodNode> methods, PrintWriter writer) {
+        writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
+        methods.stream()
+                        .map(CallTreePrinter::methodNodeInfo)
+                        .map(CallTreePrinter::convertToCSV)
+                        .forEach(writer::println);
+    }
+
+    private static List<String> methodNodeInfo(MethodNode method) {
+        return resolvedJavaMethodInfo(method.id, method.method);
+    }
+
+    private static void walkNodes(MethodNode methodNode, Map<Integer, Set<BciEndEdge>> directEdges, Map<Integer, Set<BciEndEdge>> virtualEdges, Map<Integer, Set<Integer>> overridenByEdges,
+                    Map<List<String>, Integer> virtualNodes, Set<MethodNode> nonVirtualNodes, AtomicInteger virtualNodeId) {
+        for (InvokeNode invoke : methodNode.invokes) {
+            if (invoke.isDirectInvoke) {
+                if (invoke.callees.size() > 0) {
+                    Node calleeNode = invoke.callees.get(0);
+                    addDirectEdge(methodNode.id, invoke, calleeNode, directEdges, nonVirtualNodes);
+                    if (calleeNode instanceof MethodNode) {
+                        walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
+                    }
+                }
+            } else {
+                final int nodeId = addVirtualNode(invoke, virtualNodes, virtualNodeId);
+                addVirtualMethodEdge(methodNode.id, invoke, nodeId, virtualEdges);
+                for (Node calleeNode : invoke.callees) {
+                    addOverridenByEdge(nodeId, calleeNode, overridenByEdges, nonVirtualNodes);
+                    if (calleeNode instanceof MethodNode) {
+                        walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
+                    }
+                }
+            }
+        }
+    }
+
+    private static void addDirectEdge(int nodeId, InvokeNode invoke, Node calleeNode, Map<Integer, Set<BciEndEdge>> edges, Set<MethodNode> nodes) {
+        Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
+        MethodNode methodNode = calleeNode instanceof MethodNode
+                        ? (MethodNode) calleeNode
+                        : ((MethodNodeReference) calleeNode).methodNode;
+        nodes.add(methodNode);
+        nodeEdges.add(new BciEndEdge(methodNode.id, bytecodeIndexes(invoke)));
+    }
+
+    private static List<Integer> bytecodeIndexes(InvokeNode node) {
+        return Stream.of(node.sourceReferences)
+                        .map(source -> source.bci)
+                        .collect(Collectors.toList());
+    }
+
+    private static int addVirtualNode(InvokeNode node, Map<List<String>, Integer> virtualNodes, AtomicInteger virtualNodeId) {
+        final List<String> virtualMethodInfo = virtualMethodInfo(node.targetMethod);
+        return virtualNodes.computeIfAbsent(virtualMethodInfo, k -> virtualNodeId.getAndIncrement());
+    }
+
+    private static void addVirtualMethodEdge(int startId, InvokeNode invoke, int endId, Map<Integer, Set<BciEndEdge>> edges) {
+        Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(startId, k -> new HashSet<>());
+        nodeEdges.add(new BciEndEdge(endId, bytecodeIndexes(invoke)));
+    }
+
+    private static void printVirtualNodes(Map<List<String>, Integer> virtualNodes, PrintWriter writer) {
+        writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
+        virtualNodes.entrySet().stream()
+                        .map(CallTreePrinter::virtualMethodAndIdInfo)
+                        .map(CallTreePrinter::convertToCSV)
+                        .forEach(writer::println);
+    }
+
+    private static List<String> virtualMethodAndIdInfo(Map.Entry<List<String>, Integer> entry) {
+        final List<String> methodInfo = entry.getKey();
+        final List<String> result = new ArrayList<>(methodInfo.size() + 1);
+        result.add(String.valueOf(entry.getValue()));
+        for (int i = 1; i < methodInfo.size(); i++) {
+            result.add(i, methodInfo.get(i));
+        }
+        return result;
+    }
+
+    private static void printEntryPointIds(Set<Integer> entryPoints, PrintWriter writer) {
+        writer.println(convertToCSV("Id"));
+        entryPoints.forEach(writer::println);
+    }
+
+    private static void addOverridenByEdge(int nodeId, Node calleeNode, Map<Integer, Set<Integer>> edges, Set<MethodNode> nodes) {
+        Set<Integer> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
+        MethodNode methodNode = calleeNode instanceof MethodNode
+                        ? (MethodNode) calleeNode
+                        : ((MethodNodeReference) calleeNode).methodNode;
+        nodes.add(methodNode);
+        nodeEdges.add(methodNode.id);
+    }
+
+    private static void printBciEdges(Map<Integer, Set<BciEndEdge>> edges, PrintWriter writer) {
+        final Set<BciEdge> idEdges = edges.entrySet().stream()
+                        .flatMap(entry -> entry.getValue().stream().map(endId -> new BciEdge(entry.getKey(), endId)))
+                        .collect(Collectors.toSet());
+
+        writer.println(convertToCSV("StartId", "EndId", "BytecodeIndexes"));
+        idEdges.stream()
+                        .map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endEdge.id), showBytecodeIndexes(edge.endEdge.bytecodeIndexes)))
+                        .forEach(writer::println);
+    }
+
+    private static String showBytecodeIndexes(List<Integer> bytecodeIndexes) {
+        return bytecodeIndexes.stream()
+                        .map(String::valueOf)
+                        .collect(Collectors.joining("->"));
+    }
+
+    private static void printNonBciEdges(Map<Integer, Set<Integer>> edges, PrintWriter writer) {
+        final Set<NonBciEdge> idEdges = edges.entrySet().stream()
+                        .flatMap(entry -> entry.getValue().stream().map(endId -> new NonBciEdge(entry.getKey(), endId)))
+                        .collect(Collectors.toSet());
+
+        writer.println(convertToCSV("StartId", "EndId"));
+        idEdges.stream()
+                        .map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endId)))
+                        .forEach(writer::println);
+    }
+
+    private static List<String> virtualMethodInfo(AnalysisMethod method) {
+        return resolvedJavaMethodInfo(null, method);
+    }
+
+    private static List<String> resolvedJavaMethodInfo(Integer id, ResolvedJavaMethod method) {
+        // TODO method parameter types are opaque, but could in the future be split out and link
+        // together
+        // e.g. each method could BELONG to a type, and a method could have PARAMETER relationships
+        // with N types
+        // see https://neo4j.com/developer/guide-import-csv/#_converting_data_values_with_load_csv
+        // for examples
+        final String parameters = method.getSignature().getParameterCount(false) > 0
+                        ? method.format("%P").replace(",", "")
+                        : "empty";
+
+        return Arrays.asList(
+                        id == null ? null : Integer.toString(id),
+                        method.getName(),
+                        method.getDeclaringClass().toJavaName(true),
+                        parameters,
+                        method.getSignature().getReturnType(null).toJavaName(true),
+                        display(method));
+    }
+
+    private static String display(ResolvedJavaMethod method) {
+        final ResolvedJavaType type = method.getDeclaringClass();
+        final String typeName = type.toJavaName(true);
+        if (type.getJavaKind() == JavaKind.Object) {
+            List<String> matchResults = new ArrayList<>();
+            Matcher matcher = CAMEL_CASE_PATTERN.matcher(typeName);
+            while (matcher.find()) {
+                matchResults.add(matcher.toMatchResult().group());
+            }
+
+            return String.join("", matchResults) + "." + method.getName();
+        }
+
+        return typeName + "." + method.getName();
+    }
+
+    private static String convertToCSV(String... data) {
+        return String.join(",", data);
+    }
+
+    private static String convertToCSV(List<String> data) {
+        return String.join(",", data);
+    }
+
+    private static final class NonBciEdge {
+
+        final int startId;
+        final int endId;
+
+        private NonBciEdge(int startId, int endId) {
+            this.startId = startId;
+            this.endId = endId;
+        }
+    }
+
+    private static final class BciEdge {
+        final int startId;
+        final BciEndEdge endEdge;
+
+        private BciEdge(int startId, BciEndEdge endEdge) {
+            this.startId = startId;
+            this.endEdge = endEdge;
+        }
+    }
+
+    private static final class BciEndEdge {
+        final int id;
+        final List<Integer> bytecodeIndexes;
+
+        private BciEndEdge(int id, List<Integer> bytecodeIndexes) {
+            this.id = id;
+            this.bytecodeIndexes = bytecodeIndexes;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) {
+                return true;
+            }
+            if (o == null || getClass() != o.getClass()) {
+                return false;
+            }
+            BciEndEdge endEdge = (BciEndEdge) o;
+            return id == endEdge.id &&
+                            bytecodeIndexes.equals(endEdge.bytecodeIndexes);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(id, bytecodeIndexes);
+        }
+    }
 }
diff --git a/...tevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/reports/ReportUtils.java b/...tevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/reports/ReportUtils.java
@@ -62,18 +62,20 @@ public class ReportUtils {
     /**
      * Print a report in the format: path/name_timeStamp.extension. The path is relative to the
      * working directory.
-     *
+     * 
      * @param description the description of the report
      * @param path the path (relative to the working directory if the argument represents a relative
      *            path)
      * @param name the name of the report
      * @param extension the extension of the report
      * @param reporter a consumer that writes to a PrintWriter
+     * @return path to the created report
      */
-    public static void report(String description, String path, String name, String extension, Consumer<PrintWriter> reporter) {
+    public static String report(String description, String path, String name, String extension, Consumer<PrintWriter> reporter) {
         String fileName = timeStampedFileName(name, extension);
         Path reportDir = Paths.get(path);
         reportImpl(description, reportDir, fileName, reporter);
+        return fileName;
     }
 
     public static String timeStampedFileName(String name, String extension) {