Skip to content

Commit

Permalink
Output call tree analysis as CSV for use in graph database
Browse files Browse the repository at this point in the history
* Methods and virtual methods are represented with graph nodes.
* Direct, virtual and overriden-by relationships have been mapped.
* Bytecode indexes are part of the relationships.
* A method can interact with others multiple times,
with each bytecode index indicating the origin of the call
with the origin method.
* Create symbolic links to latest reports/*.csv files,
in order to provide path stability,
and make it easier to create scripts for graph db vendors.
  • Loading branch information
galderz committed Jun 29, 2021
1 parent be4c9b1 commit 8896e6f
Show file tree
Hide file tree
Showing 3 changed files with 309 additions and 2 deletions.
13 changes: 13 additions & 0 deletions docs/reference-manual/native-image/Reports.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,16 @@ The call tree report name has the structure `call_tree_<image_name>_<date_time>.
The object tree report name has the structure: `object_tree_<image_name>_<date_time>.txt`.
The image name is the name of the generated image, which can be set with the `-H:Name=<name>` option.
The `<date_time>` is in the `yyyyMMdd_HHmmss` format.

#### CSV files

The reports include a number of CSV files containing raw data for methods and their relationships.
The aim of these files is to make it enable this raw data to be easily imported into graph databases.
Graph databases can provide the following functionality:

* Sophisticated graphical visualization of the call tree graph that provide a different perspective compared to text-based formats.
* Ability to execute complex queries that can for example show a subset of the tree that causes certain code path to be included in the call tree analysis.
This querying functionality is crucial in making big analysis call trees manageable.

The process to import the files into graph databases is specific to each database.
Please follow the instructions provided by the graph database providers to find out how to import them.
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,46 @@
import static com.oracle.graal.pointsto.reports.ReportUtils.methodComparator;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.oracle.graal.pointsto.BigBang;
import com.oracle.graal.pointsto.flow.InvokeTypeFlow;
import com.oracle.graal.pointsto.meta.AnalysisMethod;

import jdk.vm.ci.code.BytecodePosition;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;

public final class CallTreePrinter {

public static final Pattern CAMEL_CASE_PATTERN = Pattern.compile(
"\\b[a-zA-Z]|[A-Z]|\\.");

public static void print(BigBang bigbang, String path, String reportName) {
CallTreePrinter printer = new CallTreePrinter(bigbang);
printer.buildCallTree();
Expand All @@ -66,6 +84,8 @@ public static void print(BigBang bigbang, String path, String reportName) {
writer -> printer.printClasses(writer, false));
ReportUtils.report("list of used packages", path + File.separatorChar + "reports", "used_packages_" + reportName, "txt",
writer -> printer.printClasses(writer, true));

printCsvFiles(printer.methodToNode, path, reportName);
}

interface Node {
Expand Down Expand Up @@ -300,4 +320,276 @@ private static String packagePrefix(String name) {
}
return name.substring(0, lastDot);
}

private static void printCsvFiles(Map<AnalysisMethod, MethodNode> methodToNode, String path, String reportName) {
// Set virtual node at next available method id
final AtomicInteger virtualNodeId = new AtomicInteger(MethodNode.methodId);

Set<Integer> entryPointIds = new HashSet<>();
Set<MethodNode> nonVirtualNodes = new HashSet<>();
Map<List<String>, Integer> virtualNodes = new HashMap<>();

Map<Integer, Set<BciEndEdge>> directEdges = new HashMap<>();
Map<Integer, Set<BciEndEdge>> virtualEdges = new HashMap<>();
Map<Integer, Set<Integer>> overridenByEdges = new HashMap<>();

final Iterator<MethodNode> iterator = methodToNode.values().stream().filter(n -> n.isEntryPoint).iterator();
while (iterator.hasNext()) {
final MethodNode node = iterator.next();
entryPointIds.add(node.id);
walkNodes(node, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}

toCsvFile("call tree for vm entry point", path, "csv_call_tree_vm", reportName, CallTreePrinter::printVMEntryPoint);
toCsvFile("call tree for methods", path, "csv_call_tree_methods", reportName, writer -> printMethodNodes(methodToNode.values(), writer));
toCsvFile("call tree for virtual methods", path, "csv_call_tree_virtual_methods", reportName, writer -> printVirtualNodes(virtualNodes, writer));
toCsvFile("call tree for entry points", path, "csv_call_tree_entry_points", reportName, writer -> printEntryPointIds(entryPointIds, writer));
toCsvFile("call tree for direct edges", path, "csv_call_tree_direct_edges", reportName, writer -> printBciEdges(directEdges, writer));
toCsvFile("call tree for overriden by edges", path, "csv_call_tree_override_by_edges", reportName, writer -> printNonBciEdges(overridenByEdges, writer));
toCsvFile("call tree for virtual edges", path, "csv_call_tree_virtual_edges", reportName, writer -> printBciEdges(virtualEdges, writer));
}

private static void toCsvFile(String description, String path, String prefix, String reportName, Consumer<PrintWriter> reporter) {
final Path reportsPath = Paths.get(path, "reports");
final String name = prefix + "_" + reportName;
final String csvFile = ReportUtils.report(description, reportsPath.toString(), name, "csv", reporter);
final Path csvLink = reportsPath.resolve(prefix + ".csv");

if (Files.exists(csvLink, LinkOption.NOFOLLOW_LINKS)) {
try {
Files.delete(csvLink);
} catch (IOException e) {
// Ignore
}
}

try {
Files.createSymbolicLink(csvLink, Paths.get(csvFile));
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private static void printVMEntryPoint(PrintWriter writer) {
writer.println(convertToCSV("Id", "Name"));
writer.println(convertToCSV("0", "VM"));
}

private static void printMethodNodes(Collection<MethodNode> methods, PrintWriter writer) {
writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
methods.stream()
.map(CallTreePrinter::methodNodeInfo)
.map(CallTreePrinter::convertToCSV)
.forEach(writer::println);
}

private static List<String> methodNodeInfo(MethodNode method) {
return resolvedJavaMethodInfo(method.id, method.method);
}

private static void walkNodes(MethodNode methodNode, Map<Integer, Set<BciEndEdge>> directEdges, Map<Integer, Set<BciEndEdge>> virtualEdges, Map<Integer, Set<Integer>> overridenByEdges,
Map<List<String>, Integer> virtualNodes, Set<MethodNode> nonVirtualNodes, AtomicInteger virtualNodeId) {
for (InvokeNode invoke : methodNode.invokes) {
if (invoke.isDirectInvoke) {
if (invoke.callees.size() > 0) {
Node calleeNode = invoke.callees.get(0);
addDirectEdge(methodNode.id, invoke, calleeNode, directEdges, nonVirtualNodes);
if (calleeNode instanceof MethodNode) {
walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}
}
} else {
final int nodeId = addVirtualNode(invoke, virtualNodes, virtualNodeId);
addVirtualMethodEdge(methodNode.id, invoke, nodeId, virtualEdges);
for (Node calleeNode : invoke.callees) {
addOverridenByEdge(nodeId, calleeNode, overridenByEdges, nonVirtualNodes);
if (calleeNode instanceof MethodNode) {
walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}
}
}
}
}

private static void addDirectEdge(int nodeId, InvokeNode invoke, Node calleeNode, Map<Integer, Set<BciEndEdge>> edges, Set<MethodNode> nodes) {
Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
MethodNode methodNode = calleeNode instanceof MethodNode
? (MethodNode) calleeNode
: ((MethodNodeReference) calleeNode).methodNode;
nodes.add(methodNode);
nodeEdges.add(new BciEndEdge(methodNode.id, bytecodeIndexes(invoke)));
}

private static List<Integer> bytecodeIndexes(InvokeNode node) {
return Stream.of(node.sourceReferences)
.map(source -> source.bci)
.collect(Collectors.toList());
}

private static int addVirtualNode(InvokeNode node, Map<List<String>, Integer> virtualNodes, AtomicInteger virtualNodeId) {
final List<String> virtualMethodInfo = virtualMethodInfo(node.targetMethod);
return virtualNodes.computeIfAbsent(virtualMethodInfo, k -> virtualNodeId.getAndIncrement());
}

private static void addVirtualMethodEdge(int startId, InvokeNode invoke, int endId, Map<Integer, Set<BciEndEdge>> edges) {
Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(startId, k -> new HashSet<>());
nodeEdges.add(new BciEndEdge(endId, bytecodeIndexes(invoke)));
}

private static void printVirtualNodes(Map<List<String>, Integer> virtualNodes, PrintWriter writer) {
writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
virtualNodes.entrySet().stream()
.map(CallTreePrinter::virtualMethodAndIdInfo)
.map(CallTreePrinter::convertToCSV)
.forEach(writer::println);
}

private static List<String> virtualMethodAndIdInfo(Map.Entry<List<String>, Integer> entry) {
final List<String> methodInfo = entry.getKey();
final List<String> result = new ArrayList<>(methodInfo.size() + 1);
result.add(String.valueOf(entry.getValue()));
for (int i = 1; i < methodInfo.size(); i++) {
result.add(i, methodInfo.get(i));
}
return result;
}

private static void printEntryPointIds(Set<Integer> entryPoints, PrintWriter writer) {
writer.println(convertToCSV("Id"));
entryPoints.forEach(writer::println);
}

private static void addOverridenByEdge(int nodeId, Node calleeNode, Map<Integer, Set<Integer>> edges, Set<MethodNode> nodes) {
Set<Integer> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
MethodNode methodNode = calleeNode instanceof MethodNode
? (MethodNode) calleeNode
: ((MethodNodeReference) calleeNode).methodNode;
nodes.add(methodNode);
nodeEdges.add(methodNode.id);
}

private static void printBciEdges(Map<Integer, Set<BciEndEdge>> edges, PrintWriter writer) {
final Set<BciEdge> idEdges = edges.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(endId -> new BciEdge(entry.getKey(), endId)))
.collect(Collectors.toSet());

writer.println(convertToCSV("StartId", "EndId", "BytecodeIndexes"));
idEdges.stream()
.map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endEdge.id), showBytecodeIndexes(edge.endEdge.bytecodeIndexes)))
.forEach(writer::println);
}

private static String showBytecodeIndexes(List<Integer> bytecodeIndexes) {
return bytecodeIndexes.stream()
.map(String::valueOf)
.collect(Collectors.joining("->"));
}

private static void printNonBciEdges(Map<Integer, Set<Integer>> edges, PrintWriter writer) {
final Set<NonBciEdge> idEdges = edges.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(endId -> new NonBciEdge(entry.getKey(), endId)))
.collect(Collectors.toSet());

writer.println(convertToCSV("StartId", "EndId"));
idEdges.stream()
.map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endId)))
.forEach(writer::println);
}

private static List<String> virtualMethodInfo(AnalysisMethod method) {
return resolvedJavaMethodInfo(null, method);
}

private static List<String> resolvedJavaMethodInfo(Integer id, ResolvedJavaMethod method) {
// TODO method parameter types are opaque, but could in the future be split out and link
// together
// e.g. each method could BELONG to a type, and a method could have PARAMETER relationships
// with N types
// see https://neo4j.com/developer/guide-import-csv/#_converting_data_values_with_load_csv
// for examples
final String parameters = method.getSignature().getParameterCount(false) > 0
? method.format("%P").replace(",", "")
: "empty";

return Arrays.asList(
id == null ? null : Integer.toString(id),
method.getName(),
method.getDeclaringClass().toJavaName(true),
parameters,
method.getSignature().getReturnType(null).toJavaName(true),
display(method));
}

private static String display(ResolvedJavaMethod method) {
final ResolvedJavaType type = method.getDeclaringClass();
final String typeName = type.toJavaName(true);
if (type.getJavaKind() == JavaKind.Object) {
List<String> matchResults = new ArrayList<>();
Matcher matcher = CAMEL_CASE_PATTERN.matcher(typeName);
while (matcher.find()) {
matchResults.add(matcher.toMatchResult().group());
}

return String.join("", matchResults) + "." + method.getName();
}

return typeName + "." + method.getName();
}

private static String convertToCSV(String... data) {
return String.join(",", data);
}

private static String convertToCSV(List<String> data) {
return String.join(",", data);
}

private static final class NonBciEdge {

final int startId;
final int endId;

private NonBciEdge(int startId, int endId) {
this.startId = startId;
this.endId = endId;
}
}

private static final class BciEdge {
final int startId;
final BciEndEdge endEdge;

private BciEdge(int startId, BciEndEdge endEdge) {
this.startId = startId;
this.endEdge = endEdge;
}
}

private static final class BciEndEdge {
final int id;
final List<Integer> bytecodeIndexes;

private BciEndEdge(int id, List<Integer> bytecodeIndexes) {
this.id = id;
this.bytecodeIndexes = bytecodeIndexes;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
BciEndEdge endEdge = (BciEndEdge) o;
return id == endEdge.id &&
bytecodeIndexes.equals(endEdge.bytecodeIndexes);
}

@Override
public int hashCode() {
return Objects.hash(id, bytecodeIndexes);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,20 @@ public class ReportUtils {
/**
* Print a report in the format: path/name_timeStamp.extension. The path is relative to the
* working directory.
*
*
* @param description the description of the report
* @param path the path (relative to the working directory if the argument represents a relative
* path)
* @param name the name of the report
* @param extension the extension of the report
* @param reporter a consumer that writes to a PrintWriter
* @return path to the created report
*/
public static void report(String description, String path, String name, String extension, Consumer<PrintWriter> reporter) {
public static String report(String description, String path, String name, String extension, Consumer<PrintWriter> reporter) {
String fileName = timeStampedFileName(name, extension);
Path reportDir = Paths.get(path);
reportImpl(description, reportDir, fileName, reporter);
return fileName;
}

public static String timeStampedFileName(String name, String extension) {
Expand Down

0 comments on commit 8896e6f

Please sign in to comment.