Skip to content

Commit

Permalink
[GR-32234] Call tree analysis with graph databases (e.g. Neo4j) v2.
Browse files Browse the repository at this point in the history
PullRequest: graal/9207
  • Loading branch information
cstancu committed Sep 21, 2021
2 parents 56b98df + a641ae4 commit 477fb02
Show file tree
Hide file tree
Showing 2 changed files with 291 additions and 0 deletions.
13 changes: 13 additions & 0 deletions docs/reference-manual/native-image/Reports.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,16 @@ The call tree report name has the structure `call_tree_<image_name>_<date_time>.
The object tree report name has the structure: `object_tree_<image_name>_<date_time>.txt`.
The image name is the name of the generated image, which can be set with the `-H:Name=<name>` option.
The `<date_time>` is in the `yyyyMMdd_HHmmss` format.

#### CSV files

The reports include a number of CSV files containing raw data for methods and their relationships.
The aim of these files is to make it enable this raw data to be easily imported into graph databases.
Graph databases can provide the following functionality:

* Sophisticated graphical visualization of the call tree graph that provide a different perspective compared to text-based formats.
* Ability to execute complex queries that can for example show a subset of the tree that causes certain code path to be included in the call tree analysis.
This querying functionality is crucial in making big analysis call trees manageable.

The process to import the files into graph databases is specific to each database.
Please follow the instructions provided by the graph database providers to find out how to import them.
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,36 @@
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.oracle.graal.pointsto.BigBang;
import com.oracle.graal.pointsto.flow.InvokeTypeFlow;
import com.oracle.graal.pointsto.meta.AnalysisMethod;

import jdk.vm.ci.code.BytecodePosition;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;

public final class CallTreePrinter {

public static final Pattern CAMEL_CASE_PATTERN = Pattern.compile(
"\\b[a-zA-Z]|[A-Z]|\\.");

public static void print(BigBang bb, String reportsPath, String reportName) {
CallTreePrinter printer = new CallTreePrinter(bb);
printer.buildCallTree();
Expand All @@ -65,6 +77,8 @@ public static void print(BigBang bb, String reportsPath, String reportName) {
writer -> printer.printClasses(writer, false));
ReportUtils.report("list of used packages", reportsPath, "used_packages_" + reportName, "txt",
writer -> printer.printClasses(writer, true));

printCsvFiles(printer.methodToNode, reportsPath, reportName);
}

interface Node {
Expand Down Expand Up @@ -299,4 +313,268 @@ private static String packagePrefix(String name) {
}
return name.substring(0, lastDot);
}

private static void printCsvFiles(Map<AnalysisMethod, MethodNode> methodToNode, String reportsPath, String reportName) {
// Set virtual node at next available method id
final AtomicInteger virtualNodeId = new AtomicInteger(MethodNode.methodId);

Set<Integer> entryPointIds = new HashSet<>();
Set<MethodNode> nonVirtualNodes = new HashSet<>();
Map<List<String>, Integer> virtualNodes = new HashMap<>();

Map<Integer, Set<BciEndEdge>> directEdges = new HashMap<>();
Map<Integer, Set<BciEndEdge>> virtualEdges = new HashMap<>();
Map<Integer, Set<Integer>> overridenByEdges = new HashMap<>();

final Iterator<MethodNode> iterator = methodToNode.values().stream().filter(n -> n.isEntryPoint).iterator();
while (iterator.hasNext()) {
final MethodNode node = iterator.next();
entryPointIds.add(node.id);
walkNodes(node, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}

ReportUtils.report("call tree for vm entry point", reportsPath, "csv_call_tree_vm_" + reportName, "csv",
CallTreePrinter::printVMEntryPoint);

ReportUtils.report("call tree for methods", reportsPath, "csv_call_tree_methods_" + reportName, "csv",
writer -> printMethodNodes(methodToNode.values(), writer));

ReportUtils.report("call tree for virtual methods", reportsPath, "csv_call_tree_virtual_methods_" + reportName, "csv",
writer -> printVirtualNodes(virtualNodes, writer));

ReportUtils.report("call tree for entry points", reportsPath, "csv_call_tree_entry_points_" + reportName, "csv",
writer -> printEntryPointIds(entryPointIds, writer));

ReportUtils.report("call tree for direct edges", reportsPath, "csv_call_tree_direct_edges_" + reportName, "csv",
writer -> printBciEdges(directEdges, writer));

ReportUtils.report("call tree for overriden by edges", reportsPath, "csv_call_tree_override_by_edges_" + reportName, "csv",
writer -> printNonBciEdges(overridenByEdges, writer));

ReportUtils.report("call tree for virtual edges", reportsPath, "csv_call_tree_virtual_edges_" + reportName, "csv",
writer -> printBciEdges(virtualEdges, writer));
}

private static void printVMEntryPoint(PrintWriter writer) {
writer.println(convertToCSV("Id", "Name"));
writer.println(convertToCSV("0", "VM"));
}

private static void printMethodNodes(Collection<MethodNode> methods, PrintWriter writer) {
writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
methods.stream()
.map(CallTreePrinter::methodNodeInfo)
.map(CallTreePrinter::convertToCSV)
.forEach(writer::println);
}

private static List<String> methodNodeInfo(MethodNode method) {
return resolvedJavaMethodInfo(method.id, method.method);
}

private static void walkNodes(MethodNode methodNode, Map<Integer, Set<BciEndEdge>> directEdges, Map<Integer, Set<BciEndEdge>> virtualEdges, Map<Integer, Set<Integer>> overridenByEdges,
Map<List<String>, Integer> virtualNodes, Set<MethodNode> nonVirtualNodes, AtomicInteger virtualNodeId) {
for (InvokeNode invoke : methodNode.invokes) {
if (invoke.isDirectInvoke) {
if (invoke.callees.size() > 0) {
Node calleeNode = invoke.callees.get(0);
addDirectEdge(methodNode.id, invoke, calleeNode, directEdges, nonVirtualNodes);
if (calleeNode instanceof MethodNode) {
walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}
}
} else {
final int nodeId = addVirtualNode(invoke, virtualNodes, virtualNodeId);
addVirtualMethodEdge(methodNode.id, invoke, nodeId, virtualEdges);
for (Node calleeNode : invoke.callees) {
addOverridenByEdge(nodeId, calleeNode, overridenByEdges, nonVirtualNodes);
if (calleeNode instanceof MethodNode) {
walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}
}
}
}
}

private static void addDirectEdge(int nodeId, InvokeNode invoke, Node calleeNode, Map<Integer, Set<BciEndEdge>> edges, Set<MethodNode> nodes) {
Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
MethodNode methodNode = calleeNode instanceof MethodNode
? (MethodNode) calleeNode
: ((MethodNodeReference) calleeNode).methodNode;
nodes.add(methodNode);
nodeEdges.add(new BciEndEdge(methodNode.id, bytecodeIndexes(invoke)));
}

private static List<Integer> bytecodeIndexes(InvokeNode node) {
return Stream.of(node.sourceReferences)
.map(source -> source.bci)
.collect(Collectors.toList());
}

private static int addVirtualNode(InvokeNode node, Map<List<String>, Integer> virtualNodes, AtomicInteger virtualNodeId) {
final List<String> virtualMethodInfo = virtualMethodInfo(node.targetMethod);
return virtualNodes.computeIfAbsent(virtualMethodInfo, k -> virtualNodeId.getAndIncrement());
}

private static void addVirtualMethodEdge(int startId, InvokeNode invoke, int endId, Map<Integer, Set<BciEndEdge>> edges) {
Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(startId, k -> new HashSet<>());
nodeEdges.add(new BciEndEdge(endId, bytecodeIndexes(invoke)));
}

private static void printVirtualNodes(Map<List<String>, Integer> virtualNodes, PrintWriter writer) {
writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
virtualNodes.entrySet().stream()
.map(CallTreePrinter::virtualMethodAndIdInfo)
.map(CallTreePrinter::convertToCSV)
.forEach(writer::println);
}

private static List<String> virtualMethodAndIdInfo(Map.Entry<List<String>, Integer> entry) {
final List<String> methodInfo = entry.getKey();
final List<String> result = new ArrayList<>(methodInfo.size() + 1);
result.add(String.valueOf(entry.getValue()));
for (int i = 1; i < methodInfo.size(); i++) {
result.add(i, methodInfo.get(i));
}
return result;
}

private static void printEntryPointIds(Set<Integer> entryPoints, PrintWriter writer) {
writer.println(convertToCSV("Id"));
entryPoints.forEach(writer::println);
}

private static void addOverridenByEdge(int nodeId, Node calleeNode, Map<Integer, Set<Integer>> edges, Set<MethodNode> nodes) {
Set<Integer> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
MethodNode methodNode = calleeNode instanceof MethodNode
? (MethodNode) calleeNode
: ((MethodNodeReference) calleeNode).methodNode;
nodes.add(methodNode);
nodeEdges.add(methodNode.id);
}

private static void printBciEdges(Map<Integer, Set<BciEndEdge>> edges, PrintWriter writer) {
final Set<BciEdge> idEdges = edges.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(endId -> new BciEdge(entry.getKey(), endId)))
.collect(Collectors.toSet());

writer.println(convertToCSV("StartId", "EndId", "BytecodeIndexes"));
idEdges.stream()
.map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endEdge.id), showBytecodeIndexes(edge.endEdge.bytecodeIndexes)))
.forEach(writer::println);
}

private static String showBytecodeIndexes(List<Integer> bytecodeIndexes) {
return bytecodeIndexes.stream()
.map(String::valueOf)
.collect(Collectors.joining("->"));
}

private static void printNonBciEdges(Map<Integer, Set<Integer>> edges, PrintWriter writer) {
final Set<NonBciEdge> idEdges = edges.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(endId -> new NonBciEdge(entry.getKey(), endId)))
.collect(Collectors.toSet());

writer.println(convertToCSV("StartId", "EndId"));
idEdges.stream()
.map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endId)))
.forEach(writer::println);
}

private static List<String> virtualMethodInfo(AnalysisMethod method) {
return resolvedJavaMethodInfo(null, method);
}

private static List<String> resolvedJavaMethodInfo(Integer id, ResolvedJavaMethod method) {
// TODO method parameter types are opaque, but could in the future be split out and link
// together
// e.g. each method could BELONG to a type, and a method could have PARAMETER relationships
// with N types
// see https://neo4j.com/developer/guide-import-csv/#_converting_data_values_with_load_csv
// for examples
final String parameters = method.getSignature().getParameterCount(false) > 0
? method.format("%P").replace(",", "")
: "empty";

return Arrays.asList(
id == null ? null : Integer.toString(id),
method.getName(),
method.getDeclaringClass().toJavaName(true),
parameters,
method.getSignature().getReturnType(null).toJavaName(true),
display(method));
}

private static String display(ResolvedJavaMethod method) {
final ResolvedJavaType type = method.getDeclaringClass();
final String typeName = type.toJavaName(true);
if (type.getJavaKind() == JavaKind.Object) {
List<String> matchResults = new ArrayList<>();
Matcher matcher = CAMEL_CASE_PATTERN.matcher(typeName);
while (matcher.find()) {
matchResults.add(matcher.toMatchResult().group());
}

return String.join("", matchResults) + "." + method.getName();
}

return typeName + "." + method.getName();
}

private static String convertToCSV(String... data) {
return String.join(",", data);
}

private static String convertToCSV(List<String> data) {
return String.join(",", data);
}

private static final class NonBciEdge {

final int startId;
final int endId;

private NonBciEdge(int startId, int endId) {
this.startId = startId;
this.endId = endId;
}
}

private static final class BciEdge {
final int startId;
final BciEndEdge endEdge;

private BciEdge(int startId, BciEndEdge endEdge) {
this.startId = startId;
this.endEdge = endEdge;
}
}

private static final class BciEndEdge {
final int id;
final List<Integer> bytecodeIndexes;

private BciEndEdge(int id, List<Integer> bytecodeIndexes) {
this.id = id;
this.bytecodeIndexes = bytecodeIndexes;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
BciEndEdge endEdge = (BciEndEdge) o;
return id == endEdge.id &&
bytecodeIndexes.equals(endEdge.bytecodeIndexes);
}

@Override
public int hashCode() {
return Objects.hash(id, bytecodeIndexes);
}
}
}

0 comments on commit 477fb02

Please sign in to comment.