Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize LEB128 data reading #719

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions src/gsym/inline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ impl InlineInfo {
) -> Result<Option<InlineInfo>> {
let range_cnt = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read range count from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read range count from inline information")?;
let range_cnt = usize::try_from(range_cnt)
.ok()
.ok_or_invalid_data(|| "range count ({}) is too big")?;
Expand All @@ -41,12 +40,10 @@ impl InlineInfo {
for i in 0..range_cnt {
let offset = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read offset from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read offset from inline information")?;
let size = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read size from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read size from inline information")?;

let start = base_addr
.checked_add(offset)
Expand Down Expand Up @@ -91,15 +88,13 @@ impl InlineInfo {
let (call_file, call_line) = if lookup_addr.is_some() {
let call_file = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read call file from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read call file from inline information")?;
let call_file = u32::try_from(call_file)
.ok()
.ok_or_invalid_data(|| "call file index ({}) is too big")?;
let call_line = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read call line from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read call line from inline information")?;
let call_line = u32::try_from(call_line).unwrap_or(u32::MAX);
(Some(call_file), Some(call_line))
} else {
Expand Down
12 changes: 6 additions & 6 deletions src/gsym/linetab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ impl LineTableHeader {
///
/// * `data` - is what [`AddrData::data`] is.
pub(super) fn parse(data: &mut &[u8]) -> Option<Self> {
let (min_delta, _bytes) = data.read_i64_leb128()?;
let (max_delta, _bytes) = data.read_i64_leb128()?;
let (first_line, _bytes) = data.read_u64_leb128()?;
let min_delta = data.read_i64_leb128()?;
let max_delta = data.read_i64_leb128()?;
let first_line = data.read_u64_leb128()?;

let header = Self {
min_delta,
Expand Down Expand Up @@ -108,17 +108,17 @@ pub(crate) fn run_op(
match op {
END_SEQUENCE => Some(RunResult::End),
SET_FILE => {
let (f, _bytes) = ops.read_u64_leb128()?;
let f = ops.read_u64_leb128()?;
row.file_idx = f as u32;
Some(RunResult::Ok)
}
ADVANCE_PC => {
let (adv, _bytes) = ops.read_u64_leb128()?;
let adv = ops.read_u64_leb128()?;
row.addr += adv as Addr;
Some(RunResult::NewRow)
}
ADVANCE_LINE => {
let (adv, _bytes) = ops.read_i64_leb128()?;
let adv = ops.read_i64_leb128()?;
row.file_line = (row.file_line as i64 + adv) as u32;
Some(RunResult::Ok)
}
Expand Down
88 changes: 58 additions & 30 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,8 @@ pub(crate) trait ReadRaw<'data> {
/// Consume and return `len` bytes.
fn read_slice(&mut self, len: usize) -> Option<&'data [u8]>;

fn read_array<const N: usize>(&mut self) -> Option<[u8; N]>;

/// Read a NUL terminated string.
fn read_cstr(&mut self) -> Option<&'data CStr>;

Expand Down Expand Up @@ -470,36 +472,55 @@ pub(crate) trait ReadRaw<'data> {

/// Read a `u64` encoded as unsigned variable length little endian base 128
/// value.
///
/// The function returns the value read along with the number of bytes
/// consumed.
fn read_u64_leb128(&mut self) -> Option<(u64, u8)> {
let mut shift = 0;
let mut value = 0u64;
while let Some(bytes) = self.read_slice(1) {
if let [byte] = bytes {
value |= ((byte & 0b0111_1111) as u64) << shift;
shift += 7;
if (byte & 0b1000_0000) == 0 {
return Some((value, shift / 7))
}
//
// Slightly adjusted copy of `rustc` implementation:
// https://github.com/rust-lang/rust/blob/7ebd2bdbf6d798e6e711a0100981b0ff029abf5f/compiler/rustc_serialize/src/leb128.rs#L54
fn read_u64_leb128(&mut self) -> Option<u64> {
// The first iteration of this loop is unpeeled. This is a
// performance win because this code is hot and integer values less
// than 128 are very common, typically occurring 50-80% or more of
// the time, even for u64 and u128.
let [byte] = self.read_array::<1>()?;
if (byte & 0x80) == 0 {
return Some(byte as u64);
}
let mut result = (byte & 0x7F) as u64;
let mut shift = 7;
loop {
let [byte] = self.read_array::<1>()?;
if (byte & 0x80) == 0 {
result |= (byte as u64) << shift;
return Some(result);
} else {
unreachable!()
result |= ((byte & 0x7F) as u64) << shift;
}
shift += 7;
}
None
}

/// Read a `u64` encoded as signed variable length little endian base 128
/// value.
///
/// The function returns the value read along with the number of bytes
/// consumed.
fn read_i64_leb128(&mut self) -> Option<(i64, u8)> {
let (value, shift) = self.read_u64_leb128()?;
let sign_bits = u64::BITS as u8 - shift * 7;
let value = ((value as i64) << sign_bits) >> sign_bits;
Some((value, shift))
fn read_i64_leb128(&mut self) -> Option<i64> {
let mut result = 0;
let mut shift = 0;
let mut byte;

loop {
[byte] = self.read_array::<1>()?;
result |= <i64>::from(byte & 0x7F) << shift;
shift += 7;

if (byte & 0x80) == 0 {
break;
}
}

if (shift < <i64>::BITS) && ((byte & 0x40) != 0) {
// sign extend
result |= !0 << shift;
}

Some(result)
}
}

Expand Down Expand Up @@ -527,6 +548,16 @@ impl<'data> ReadRaw<'data> for &'data [u8] {
Some(a)
}

#[inline]
fn read_array<const N: usize>(&mut self) -> Option<[u8; N]> {
self.ensure(N)?;
let (a, b) = self.split_at(N);
*self = b;
// SAFETY: We *know* that `a` has length `N`.
let array = unsafe { <[u8; N]>::try_from(a).unwrap_unchecked() };
Some(array)
}

#[inline]
fn read_cstr(&mut self) -> Option<&'data CStr> {
let idx = self.iter().position(|byte| *byte == b'\0')?;
Expand Down Expand Up @@ -815,13 +846,11 @@ mod tests {
#[test]
fn leb128_reading() {
let data = [0xf4, 0xf3, 0x75];
let (v, s) = data.as_slice().read_u64_leb128().unwrap();
let v = data.as_slice().read_u64_leb128().unwrap();
assert_eq!(v, 0x1d79f4);
assert_eq!(s, 3);

let (v, s) = data.as_slice().read_i64_leb128().unwrap();
let v = data.as_slice().read_i64_leb128().unwrap();
assert_eq!(v, -165388);
assert_eq!(s, 3);
}

/// Check that we can read a NUL terminated string from a slice.
Expand Down Expand Up @@ -941,16 +970,15 @@ mod tests {
];

for (data, expected) in data {
let (v, _s) = data.as_slice().read_u64_leb128().unwrap();
let v = data.as_slice().read_u64_leb128().unwrap();
assert_eq!(v, expected);
}

let () = b.iter(|| {
for (data, _) in data {
let mut slice = black_box(data.as_slice());
let (v, s) = slice.read_u64_leb128().unwrap();
let v = slice.read_u64_leb128().unwrap();
black_box(v);
black_box(s);
}
});
}
Expand Down