From 328ca6a835ff39fa0c0634e41e47e9ff77354734 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Wed, 10 Jun 2020 01:31:25 -0400 Subject: [PATCH] Handle compressed debug sections in ELF files ELF files allow debug info sections to be compressed. The libbacktrace backed supported these compressed sections, but the Gimli backend did not. This commit adds that support to the Gimli backend. In my tests these debug info sections do not obey the alignment requirements that the object crate expects for the gABI compression header (nor can I find a source documenting any alignment requirements), so this commit additionally enables the "unaligned" feature in the upcoming version of the object crate. There is a bit of unsafe to ensure the lifetime of the decompressed sections matches the lifetime of the mmap'd file. I don't think there is a way around this unsafe code, unless we are willing to ditch Gimli's EndianSlice for an (apparently slower) EndianReader backed by a Cow<[u8]>. Fix #342. --- .github/workflows/main.yml | 4 ++ Cargo.toml | 7 +-- src/symbolize/gimli/elf.rs | 77 +++++++++++++++++++++++++------- src/symbolize/gimli/mmap_unix.rs | 23 +++++++++- 4 files changed, 92 insertions(+), 19 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 14a830dd6..e1e4eb59e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -78,6 +78,10 @@ jobs: - run: cargo test --features gimli-symbolize --manifest-path crates/without_debuginfo/Cargo.toml - run: cargo test --manifest-path crates/line-tables-only/Cargo.toml --features libbacktrace - run: cargo test --manifest-path crates/line-tables-only/Cargo.toml --features gimli-symbolize + - run: RUSTFLAGS="-C link-arg=-Wl,--compress-debug-sections=zlib-gabi" cargo test --features gimli-symbolize + if: contains(matrix.os == 'ubuntu') + - run: RUSTFLAGS="-C link-arg=-Wl,--compress-debug-sections=zlib-gnu" cargo test --features gimli-symbolize + if: contains(matrix.os == 'ubuntu') windows_arm64: name: Windows AArch64 diff --git a/Cargo.toml b/Cargo.toml index 66f431d9d..283821330 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,11 +39,12 @@ compiler_builtins = { version = '0.1.2', optional = true } # Optional dependencies enabled through the `gimli-symbolize` feature, do not # use these features directly. addr2line = { version = "0.12.0", optional = true, default-features = false } +flate2 = { version = "1.0.14", optional = true } [dependencies.object] -version = "0.19" +git = "https://github.com/gimli-rs/object.git" optional = true default-features = false -features = ['read_core', 'elf', 'macho', 'pe'] +features = ['read_core', 'elf', 'macho', 'pe', 'unaligned'] [target.'cfg(windows)'.dependencies] winapi = { version = "0.3.3", optional = true } @@ -71,7 +72,7 @@ std = [] # be affected by feature selection here. Also note that it's highly unlikely you # want to configure this. If you're having trouble getting backtraces it's # likely best to open an issue. -gimli-symbolize = ["addr2line", "object", "std"] +gimli-symbolize = ["addr2line", "flate2", "object", "std"] libbacktrace = ["backtrace-sys/backtrace-sys"] #======================================= diff --git a/src/symbolize/gimli/elf.rs b/src/symbolize/gimli/elf.rs index eb0ac88f5..75b7f0cdd 100644 --- a/src/symbolize/gimli/elf.rs +++ b/src/symbolize/gimli/elf.rs @@ -1,7 +1,8 @@ -use super::{Mapping, Path, Vec}; -use object::read::elf::{FileHeader, SectionHeader, SectionTable, Sym}; +use super::{Mapping, Mmap, Path, Vec}; +use object::elf::{ELFCOMPRESS_ZLIB, SHF_COMPRESSED}; +use object::read::elf::{CompressionHeader, FileHeader, SectionHeader, SectionTable, Sym}; use object::read::StringTable; -use object::{Bytes, NativeEndian}; +use object::{BigEndian, Bytes, NativeEndian}; #[cfg(target_pointer_width = "32")] type Elf = object::elf::FileHeader32; @@ -28,7 +29,7 @@ pub struct Object<'a> { /// We could use a literal instead, but this helps ensure correctness. endian: NativeEndian, /// The entire file data. - data: Bytes<'a>, + mmap: &'a Mmap, sections: SectionTable<'a, Elf>, strings: StringTable<'a>, /// List of pre-parsed and sorted symbols by base address. @@ -36,8 +37,8 @@ pub struct Object<'a> { } impl<'a> Object<'a> { - fn parse(data: &'a [u8]) -> Option> { - let data = object::Bytes(data); + fn parse(mmap: &'a Mmap) -> Option> { + let data = object::Bytes(mmap); let elf = Elf::parse(data).ok()?; let endian = elf.endian().ok()?; let sections = elf.sections(endian, data).ok()?; @@ -80,7 +81,7 @@ impl<'a> Object<'a> { syms.sort_unstable_by_key(|s| s.address); Some(Object { endian, - data, + mmap, sections, strings, syms, @@ -88,14 +89,51 @@ impl<'a> Object<'a> { } pub fn section(&self, name: &str) -> Option<&'a [u8]> { - Some( - self.sections - .section_by_name(self.endian, name.as_bytes())? - .1 - .data(self.endian, self.data) - .ok()? - .0, - ) + if let Some(section) = self.section_header(name) { + let mut data = section.data(self.endian, self.data()).ok()?; + + // Check for DWARF-standard (gABI) compression, i.e., as generated + // by ld's `--compress-debug-sections=zlib-gabi` flag. + let flags: u64 = section.sh_flags(self.endian).into(); + if (flags & u64::from(SHF_COMPRESSED)) == 0 { + // Not compressed. + return Some(data.0); + } + + let header = data.read::<::CompressionHeader>().ok()?; + if header.ch_type(self.endian) != ELFCOMPRESS_ZLIB { + // Zlib compression is the only known type. + return None; + } + let size = header.ch_size(self.endian) as usize; + let buf = decompress_zlib(data.0, size)?; + return Some(self.mmap.stash(buf)); + } + + // Check for the nonstandard GNU compression format, i.e., as generated + // by ld's `--compress-debug-sections=zlib-gnu` flag. + let zdebug_name = format!(".zdebug_{}", &name[7..]); + if let Some(section) = self.section_header(&zdebug_name) { + let mut data = section.data(self.endian, self.data()).ok()?; + if data.read_bytes(8).ok()?.0 != b"ZLIB\0\0\0\0" { + return None; + } + let size = data.read::>().ok()?.get(BigEndian); + let buf = decompress_zlib(data.0, size as usize)?; + return Some(self.mmap.stash(buf)); + } + + None + } + + fn section_header(&self, name: &str) -> Option<&::SectionHeader> { + self.sections + .section_by_name(self.endian, name.as_bytes()) + .map(|(_index, section)| section) + } + + fn data(&self) -> Bytes<'a> { + object::Bytes(&self.mmap) } pub fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> { @@ -112,3 +150,12 @@ impl<'a> Object<'a> { } } } + +fn decompress_zlib(data: &[u8], size: usize) -> Option> { + let mut buf = Vec::with_capacity(size); + let header_expected = true; + flate2::Decompress::new(header_expected) + .decompress_vec(data, &mut buf, flate2::FlushDecompress::Finish) + .ok()?; + Some(buf) +} diff --git a/src/symbolize/gimli/mmap_unix.rs b/src/symbolize/gimli/mmap_unix.rs index 348466288..750777420 100644 --- a/src/symbolize/gimli/mmap_unix.rs +++ b/src/symbolize/gimli/mmap_unix.rs @@ -1,12 +1,16 @@ +use std::cell::UnsafeCell; use std::fs::File; use std::ops::Deref; use std::os::unix::prelude::*; use std::ptr; use std::slice; +use std::vec::Vec; pub struct Mmap { ptr: *mut libc::c_void, len: usize, + /// Additional byte vectors that need to live as long as the mmap. + buffers: UnsafeCell>>, } impl Mmap { @@ -22,7 +26,24 @@ impl Mmap { if ptr == libc::MAP_FAILED { return None; } - Some(Mmap { ptr, len }) + Some(Mmap { + ptr, + len, + buffers: UnsafeCell::new(vec![]), + }) + } + + /// Takes ownership of `buf` and returns a reference to its contents that + /// lives as long as this `Mmap` does. + pub fn stash(&self, buf: Vec) -> &[u8] { + // SAFETY: this is the only function that ever constructs a mutable + // reference to `self.buffers`. + let buffers = unsafe { &mut *self.buffers.get() }; + let i = buffers.len(); + buffers.push(buf); + // SAFETY: we never remove elements from `self.buffers`, so a reference + // to the data inside any buffer will live as long as `Mmap` does. + &buffers[i] } }