From 4b63e08265544f237afd6ab9db871588152a0e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 5 Jun 2024 13:49:49 -0700 Subject: [PATCH] Optimize LEB128 data reading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it turns out, the Rust compiler uses variable length LEB128 encoded integers internally. It so happens that they spent a fair amount of effort micro-optimizing the decoding functionality [0] [1], as it's in the hot path. With this change we replace our decoding routines with these optimized ones. To make that happen more easily (and to gain some base line speed up), also remove the "shift" return from the respective methods. As a result of these changes, we see a respectable speed up: Before: test util::tests::bench_u64_leb128_reading ... bench: 128 ns/iter (+/- 10) After: test util::tests::bench_u64_leb128_reading ... bench: 103 ns/iter (+/- 5) Gsym decoding, which uses these routines, improved as follows: main/symbolize_gsym_multi_no_setup time: [146.26 µs 146.69 µs 147.18 µs] change: [−7.2075% −5.7106% −4.4870%] (p = 0.00 < 0.02) Performance has improved. [0] https://github.com/rust-lang/rust/pull/69050 [1] https://github.com/rust-lang/rust/pull/69157 Signed-off-by: Daniel Müller --- src/gsym/inline.rs | 15 +++----- src/gsym/linetab.rs | 12 +++---- src/util.rs | 88 +++++++++++++++++++++++++++++---------------- 3 files changed, 69 insertions(+), 46 deletions(-) diff --git a/src/gsym/inline.rs b/src/gsym/inline.rs index 3aada8ac..2a95c391 100644 --- a/src/gsym/inline.rs +++ b/src/gsym/inline.rs @@ -25,8 +25,7 @@ impl InlineInfo { ) -> Result> { let range_cnt = data .read_u64_leb128() - .ok_or_invalid_data(|| "failed to read range count from inline information")? - .0; + .ok_or_invalid_data(|| "failed to read range count from inline information")?; let range_cnt = usize::try_from(range_cnt) .ok() .ok_or_invalid_data(|| "range count ({}) is too big")?; @@ -41,12 +40,10 @@ impl InlineInfo { for i in 0..range_cnt { let offset = data .read_u64_leb128() - .ok_or_invalid_data(|| "failed to read offset from inline information")? - .0; + .ok_or_invalid_data(|| "failed to read offset from inline information")?; let size = data .read_u64_leb128() - .ok_or_invalid_data(|| "failed to read size from inline information")? - .0; + .ok_or_invalid_data(|| "failed to read size from inline information")?; let start = base_addr .checked_add(offset) @@ -91,15 +88,13 @@ impl InlineInfo { let (call_file, call_line) = if lookup_addr.is_some() { let call_file = data .read_u64_leb128() - .ok_or_invalid_data(|| "failed to read call file from inline information")? - .0; + .ok_or_invalid_data(|| "failed to read call file from inline information")?; let call_file = u32::try_from(call_file) .ok() .ok_or_invalid_data(|| "call file index ({}) is too big")?; let call_line = data .read_u64_leb128() - .ok_or_invalid_data(|| "failed to read call line from inline information")? - .0; + .ok_or_invalid_data(|| "failed to read call line from inline information")?; let call_line = u32::try_from(call_line).unwrap_or(u32::MAX); (Some(call_file), Some(call_line)) } else { diff --git a/src/gsym/linetab.rs b/src/gsym/linetab.rs index 44da892a..84618649 100644 --- a/src/gsym/linetab.rs +++ b/src/gsym/linetab.rs @@ -48,9 +48,9 @@ impl LineTableHeader { /// /// * `data` - is what [`AddrData::data`] is. pub(super) fn parse(data: &mut &[u8]) -> Option { - let (min_delta, _bytes) = data.read_i64_leb128()?; - let (max_delta, _bytes) = data.read_i64_leb128()?; - let (first_line, _bytes) = data.read_u64_leb128()?; + let min_delta = data.read_i64_leb128()?; + let max_delta = data.read_i64_leb128()?; + let first_line = data.read_u64_leb128()?; let header = Self { min_delta, @@ -108,17 +108,17 @@ pub(crate) fn run_op( match op { END_SEQUENCE => Some(RunResult::End), SET_FILE => { - let (f, _bytes) = ops.read_u64_leb128()?; + let f = ops.read_u64_leb128()?; row.file_idx = f as u32; Some(RunResult::Ok) } ADVANCE_PC => { - let (adv, _bytes) = ops.read_u64_leb128()?; + let adv = ops.read_u64_leb128()?; row.addr += adv as Addr; Some(RunResult::NewRow) } ADVANCE_LINE => { - let (adv, _bytes) = ops.read_i64_leb128()?; + let adv = ops.read_i64_leb128()?; row.file_line = (row.file_line as i64 + adv) as u32; Some(RunResult::Ok) } diff --git a/src/util.rs b/src/util.rs index 8090f86c..4bdfd374 100644 --- a/src/util.rs +++ b/src/util.rs @@ -387,6 +387,8 @@ pub(crate) trait ReadRaw<'data> { /// Consume and return `len` bytes. fn read_slice(&mut self, len: usize) -> Option<&'data [u8]>; + fn read_array(&mut self) -> Option<[u8; N]>; + /// Read a NUL terminated string. fn read_cstr(&mut self) -> Option<&'data CStr>; @@ -470,36 +472,55 @@ pub(crate) trait ReadRaw<'data> { /// Read a `u64` encoded as unsigned variable length little endian base 128 /// value. - /// - /// The function returns the value read along with the number of bytes - /// consumed. - fn read_u64_leb128(&mut self) -> Option<(u64, u8)> { - let mut shift = 0; - let mut value = 0u64; - while let Some(bytes) = self.read_slice(1) { - if let [byte] = bytes { - value |= ((byte & 0b0111_1111) as u64) << shift; - shift += 7; - if (byte & 0b1000_0000) == 0 { - return Some((value, shift / 7)) - } + // + // Slightly adjusted copy of `rustc` implementation: + // https://github.com/rust-lang/rust/blob/7ebd2bdbf6d798e6e711a0100981b0ff029abf5f/compiler/rustc_serialize/src/leb128.rs#L54 + fn read_u64_leb128(&mut self) -> Option { + // The first iteration of this loop is unpeeled. This is a + // performance win because this code is hot and integer values less + // than 128 are very common, typically occurring 50-80% or more of + // the time, even for u64 and u128. + let [byte] = self.read_array::<1>()?; + if (byte & 0x80) == 0 { + return Some(byte as u64); + } + let mut result = (byte & 0x7F) as u64; + let mut shift = 7; + loop { + let [byte] = self.read_array::<1>()?; + if (byte & 0x80) == 0 { + result |= (byte as u64) << shift; + return Some(result); } else { - unreachable!() + result |= ((byte & 0x7F) as u64) << shift; } + shift += 7; } - None } /// Read a `u64` encoded as signed variable length little endian base 128 /// value. - /// - /// The function returns the value read along with the number of bytes - /// consumed. - fn read_i64_leb128(&mut self) -> Option<(i64, u8)> { - let (value, shift) = self.read_u64_leb128()?; - let sign_bits = u64::BITS as u8 - shift * 7; - let value = ((value as i64) << sign_bits) >> sign_bits; - Some((value, shift)) + fn read_i64_leb128(&mut self) -> Option { + let mut result = 0; + let mut shift = 0; + let mut byte; + + loop { + [byte] = self.read_array::<1>()?; + result |= ::from(byte & 0x7F) << shift; + shift += 7; + + if (byte & 0x80) == 0 { + break; + } + } + + if (shift < ::BITS) && ((byte & 0x40) != 0) { + // sign extend + result |= !0 << shift; + } + + Some(result) } } @@ -527,6 +548,16 @@ impl<'data> ReadRaw<'data> for &'data [u8] { Some(a) } + #[inline] + fn read_array(&mut self) -> Option<[u8; N]> { + self.ensure(N)?; + let (a, b) = self.split_at(N); + *self = b; + // SAFETY: We *know* that `a` has length `N`. + let array = unsafe { <[u8; N]>::try_from(a).unwrap_unchecked() }; + Some(array) + } + #[inline] fn read_cstr(&mut self) -> Option<&'data CStr> { let idx = self.iter().position(|byte| *byte == b'\0')?; @@ -815,13 +846,11 @@ mod tests { #[test] fn leb128_reading() { let data = [0xf4, 0xf3, 0x75]; - let (v, s) = data.as_slice().read_u64_leb128().unwrap(); + let v = data.as_slice().read_u64_leb128().unwrap(); assert_eq!(v, 0x1d79f4); - assert_eq!(s, 3); - let (v, s) = data.as_slice().read_i64_leb128().unwrap(); + let v = data.as_slice().read_i64_leb128().unwrap(); assert_eq!(v, -165388); - assert_eq!(s, 3); } /// Check that we can read a NUL terminated string from a slice. @@ -941,16 +970,15 @@ mod tests { ]; for (data, expected) in data { - let (v, _s) = data.as_slice().read_u64_leb128().unwrap(); + let v = data.as_slice().read_u64_leb128().unwrap(); assert_eq!(v, expected); } let () = b.iter(|| { for (data, _) in data { let mut slice = black_box(data.as_slice()); - let (v, s) = slice.read_u64_leb128().unwrap(); + let v = slice.read_u64_leb128().unwrap(); black_box(v); - black_box(s); } }); }