Skip to content

Commit

Permalink
[symbolizer] Support symbol lookup
Browse files Browse the repository at this point in the history
Recent versions of GNU binutils starting from 2.39 support symbol+offset
lookup in addition to the usual numeric address lookup. This change adds
symbol lookup to llvm-symbolize and llvm-addr2line.

Now llvm-symbolize behaves closer to GNU addr2line, - if the value specified
as address in command line or input stream is not a number, it is treated as
a symbol name. For example:

    llvm-symbolize --obj=abc.so func_22
    llvm-symbolize --obj=abc.so "CODE func_22"

This lookup is now supported only for functions. Specification with
offset is not supported yet.

This is a recommit of 2b27948, reverted
in 39fec54 because the test
llvm/test/Support/interrupts.test started failing on Windows. The test was
changed in 18f036d and is also updated in
this commit.

Differential Revision: https://reviews.llvm.org/D149759
  • Loading branch information
spavloff committed Nov 1, 2023
1 parent fd8be1e commit e144ae5
Show file tree
Hide file tree
Showing 24 changed files with 333 additions and 42 deletions.
13 changes: 12 additions & 1 deletion llvm/docs/CommandGuide/llvm-symbolizer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ DESCRIPTION
:program:`llvm-symbolizer` reads input names and addresses from the command-line
and prints corresponding source code locations to standard output. It can also
symbolize logs containing :doc:`Symbolizer Markup </SymbolizerMarkupFormat>` via
:option:`--filter-markup`.
:option:`--filter-markup`. Addresses may be specified as numbers or symbol names.

If no address is specified on the command-line, it reads the addresses from
standard input. If no input name is specified on the command-line, but addresses
Expand Down Expand Up @@ -196,6 +196,17 @@ shows --relativenames.
main
foo/test.cpp:15:0
Example 7 - Addresses as symbol names:

.. code-block:: console
$ llvm-symbolizer --obj=test.elf main
main
/tmp/test.cpp:14:0
$ llvm-symbolizer --obj=test.elf "CODE foz"
foz
/tmp/test.h:1:0
OPTIONS
-------

Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ Changes to the LLVM tools
* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
debugging information to print symbols' filenames and line numbers.

* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names.

Changes to LLDB
---------------------------------

Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class SourceCode;
struct Request {
StringRef ModuleName;
std::optional<uint64_t> Address;
StringRef Symbol;
};

class DIPrinter {
Expand All @@ -46,6 +47,8 @@ class DIPrinter {
virtual void print(const Request &Request, const DIGlobal &Global) = 0;
virtual void print(const Request &Request,
const std::vector<DILocal> &Locals) = 0;
virtual void print(const Request &Request,
const std::vector<DILineInfo> &Locations) = 0;

virtual bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) = 0;
Expand Down Expand Up @@ -91,6 +94,8 @@ class PlainPrinterBase : public DIPrinter {
void print(const Request &Request, const DIGlobal &Global) override;
void print(const Request &Request,
const std::vector<DILocal> &Locals) override;
void print(const Request &Request,
const std::vector<DILineInfo> &Locations) override;

bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) override;
Expand Down Expand Up @@ -141,6 +146,8 @@ class JSONPrinter : public DIPrinter {
void print(const Request &Request, const DIGlobal &Global) override;
void print(const Request &Request,
const std::vector<DILocal> &Locals) override;
void print(const Request &Request,
const std::vector<DILineInfo> &Locations) override;

bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) override;
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class SymbolizableModule {
virtual std::vector<DILocal>
symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;

virtual std::vector<object::SectionedAddress>
findSymbol(StringRef Symbol) const = 0;

// Return true if this is a 32-bit x86 PE COFF module.
virtual bool isWin32Module() const = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class SymbolizableObjectFile : public SymbolizableModule {
DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override;
std::vector<DILocal>
symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
std::vector<object::SectionedAddress>
findSymbol(StringRef Symbol) const override;

// Return true if this is a 32-bit x86 PE COFF module.
bool isWin32Module() const override;
Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ class LLVMSymbolizer {
Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);

Expected<std::vector<DILineInfo>> findSymbol(const ObjectFile &Obj,
StringRef Symbol);
Expected<std::vector<DILineInfo>> findSymbol(StringRef ModuleName,
StringRef Symbol);
Expected<std::vector<DILineInfo>> findSymbol(ArrayRef<uint8_t> BuildID,
StringRef Symbol);

void flush();

// Evict entries from the binary cache until it is under the maximum size
Expand Down Expand Up @@ -146,6 +154,9 @@ class LLVMSymbolizer {
Expected<std::vector<DILocal>>
symbolizeFrameCommon(const T &ModuleSpecifier,
object::SectionedAddress ModuleOffset);
template <typename T>
Expected<std::vector<DILineInfo>> findSymbolCommon(const T &ModuleSpecifier,
StringRef Symbol);

Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);

Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,17 @@ void PlainPrinterBase::print(const Request &Request,
printFooter();
}

void PlainPrinterBase::print(const Request &Request,
const std::vector<DILineInfo> &Locations) {
if (Locations.empty()) {
print(Request, DILineInfo());
} else {
for (const DILineInfo &L : Locations)
print(L, false);
printFooter();
}
}

bool PlainPrinterBase::printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) {
ErrHandler(ErrorInfo, Request.ModuleName);
Expand All @@ -273,6 +284,8 @@ static std::string toHex(uint64_t V) {

static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") {
json::Object Json({{"ModuleName", Request.ModuleName.str()}});
if (!Request.Symbol.empty())
Json["SymName"] = Request.Symbol.str();
if (Request.Address)
Json["Address"] = toHex(*Request.Address);
if (!ErrorMsg.empty())
Expand Down Expand Up @@ -362,6 +375,19 @@ void JSONPrinter::print(const Request &Request,
printJSON(std::move(Json));
}

void JSONPrinter::print(const Request &Request,
const std::vector<DILineInfo> &Locations) {
json::Array Definitions;
for (const DILineInfo &L : Locations)
Definitions.push_back(toJSON(L));
json::Object Json = toJSON(Request);
Json["Loc"] = std::move(Definitions);
if (ObjectList)
ObjectList->push_back(std::move(Json));
else
printJSON(std::move(Json));
}

bool JSONPrinter::printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) {
json::Object Json = toJSON(Request, ErrorInfo.message());
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,19 @@ std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
return DebugInfoContext->getLocalsForAddress(ModuleOffset);
}

std::vector<object::SectionedAddress>
SymbolizableObjectFile::findSymbol(StringRef Symbol) const {
std::vector<object::SectionedAddress> Result;
for (const SymbolDesc &Sym : Symbols) {
if (Sym.Name.equals(Symbol)) {
object::SectionedAddress A{Sym.Addr,
getModuleSectionIndexForAddress(Sym.Addr)};
Result.push_back(A);
}
}
return Result;
}

/// Search for the first occurence of specified Address in ObjectFile.
uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
uint64_t Address) const {
Expand Down
44 changes: 44 additions & 0 deletions llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,50 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
return symbolizeFrameCommon(BuildID, ModuleOffset);
}

template <typename T>
Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
if (!InfoOrErr)
return InfoOrErr.takeError();

SymbolizableModule *Info = *InfoOrErr;
std::vector<DILineInfo> Result;

// A null module means an error has already been reported. Return an empty
// result.
if (!Info)
return Result;

for (object::SectionedAddress A : Info->findSymbol(Symbol)) {
DILineInfo LineInfo = Info->symbolizeCode(
A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
Opts.UseSymbolTable);
if (LineInfo.FileName != DILineInfo::BadString) {
if (Opts.Demangle)
LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
Result.push_back(LineInfo);
}
}

return Result;
}

Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) {
return findSymbolCommon(Obj, Symbol);
}

Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol) {
return findSymbolCommon(ModuleName.str(), Symbol);
}

Expected<std::vector<DILineInfo>>
LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol) {
return findSymbolCommon(BuildID, Symbol);
}

void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
LRUBinaries.clear();
Expand Down
5 changes: 4 additions & 1 deletion llvm/test/Support/interrupts.test
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
## Show that SIGINT and similar signals don't cause crash messages to be
## reported.
# RUN: %python %s wrapper llvm-symbolizer 2> %t.err
# RUN: count 0 < %t.err
# RUN: FileCheck --input-file=%t.err %s

# CHECK: {{.*}} error: 'foo': {{[Nn]}}o such file or directory
# CHECK-NOT: {{.+}}

import os
import signal
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
some text
something not a valid address
0x40054d
some text2
some text possibly a symbol
2 changes: 1 addition & 1 deletion llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ some text
0x4005b9
0x4005ce
0x4005d4
some more text
another text
19 changes: 19 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// This file is a part of sources used to build `symbols.so`, which is used to
// test symbol location search made by llvm-symbolizer.
//
// Build instructions:
// $ mkdir /tmp/dbginfo
// $ cp symbols.h symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c /tmp/dbginfo/
// $ cd /tmp/dbginfo
// $ gcc -osymbols.so -shared -fPIC -g symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c


extern "C" {
extern int global_01;
int func_01();
int func_02(int);
}

template<typename T> T func_03(T x) {
return x + T(1);
}
25 changes: 25 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "symbols.h"

int global_01 = 22;

int static static_var = 0;

static int static_func_01(int x) {
static_var = x;
return global_01;
}

int func_01() {
int res = 1;
return res + static_func_01(22);
}

int func_04() {
static_var = 0;
return 22;
}

int func_04(int x) {
int res = static_var;
return res + func_03(x);
}
18 changes: 18 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "symbols.h"

int static static_var = 4;

static int static_func_01(int x) {
static_var--;
return x;
}

int func_02(int x) {
static_var = x;
return static_func_01(x);
}

int func_05(int x) {
int res = static_var;
return res + func_03(x);
}
12 changes: 12 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
static int static_func(int);
static int static_var = 0;

int static_func(int x) {
static_var++;
return static_var + x;
}

int func_06(int x) {
return static_func(x);
}

13 changes: 13 additions & 0 deletions llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
static int static_func(int);
static int static_var = 5;

int static_func(int x) {
static_var++;
return static_var + x;
}

int func_07(int x) {
static_var++;
return static_func(x);
}

Binary file not shown.
Loading

0 comments on commit e144ae5

Please sign in to comment.