Skip to content

Commit

Permalink
Optimize LEB128 data reading
Browse files Browse the repository at this point in the history
As it turns out, the Rust compiler uses variable length LEB128 encoded
integers internally. It so happens that they spent a fair amount of
effort micro-optimizing the decoding functionality [0] [1], as it's in
the hot path.
With this change we replace our decoding routines with these optimized
ones. To make that happen more easily (and to gain some base line speed
up), also remove the "shift" return from the respective methods. As a
result of these changes, we see a respectable speed up:

Before:
  test util::tests::bench_u64_leb128_reading  ... bench:  128 ns/iter (+/- 10)

After:
  test util::tests::bench_u64_leb128_reading  ... bench:  103 ns/iter (+/- 5)

Gsym decoding, which uses these routines, improved as follows:
  main/symbolize_gsym_multi_no_setup
    time:   [146.26 µs 146.69 µs 147.18 µs]
    change: [−7.2075% −5.7106% −4.4870%] (p = 0.00 < 0.02)
    Performance has improved.

[0] rust-lang/rust#69050
[1] rust-lang/rust#69157

Signed-off-by: Daniel Müller <deso@posteo.net>
  • Loading branch information
d-e-s-o committed Jun 5, 2024
1 parent 228ead0 commit 72419e0
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 46 deletions.
15 changes: 5 additions & 10 deletions src/gsym/inline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ impl InlineInfo {
) -> Result<Option<InlineInfo>> {
let range_cnt = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read range count from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read range count from inline information")?;
let range_cnt = usize::try_from(range_cnt)
.ok()
.ok_or_invalid_data(|| "range count ({}) is too big")?;
Expand All @@ -41,12 +40,10 @@ impl InlineInfo {
for i in 0..range_cnt {
let offset = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read offset from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read offset from inline information")?;
let size = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read size from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read size from inline information")?;

let start = base_addr
.checked_add(offset)
Expand Down Expand Up @@ -91,15 +88,13 @@ impl InlineInfo {
let (call_file, call_line) = if lookup_addr.is_some() {
let call_file = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read call file from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read call file from inline information")?;
let call_file = u32::try_from(call_file)
.ok()
.ok_or_invalid_data(|| "call file index ({}) is too big")?;
let call_line = data
.read_u64_leb128()
.ok_or_invalid_data(|| "failed to read call line from inline information")?
.0;
.ok_or_invalid_data(|| "failed to read call line from inline information")?;
let call_line = u32::try_from(call_line).unwrap_or(u32::MAX);
(Some(call_file), Some(call_line))
} else {
Expand Down
12 changes: 6 additions & 6 deletions src/gsym/linetab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ impl LineTableHeader {
///
/// * `data` - is what [`AddrData::data`] is.
pub(super) fn parse(data: &mut &[u8]) -> Option<Self> {
let (min_delta, _bytes) = data.read_i64_leb128()?;
let (max_delta, _bytes) = data.read_i64_leb128()?;
let (first_line, _bytes) = data.read_u64_leb128()?;
let min_delta = data.read_i64_leb128()?;
let max_delta = data.read_i64_leb128()?;
let first_line = data.read_u64_leb128()?;

let header = Self {
min_delta,
Expand Down Expand Up @@ -108,17 +108,17 @@ pub(crate) fn run_op(
match op {
END_SEQUENCE => Some(RunResult::End),
SET_FILE => {
let (f, _bytes) = ops.read_u64_leb128()?;
let f = ops.read_u64_leb128()?;
row.file_idx = f as u32;
Some(RunResult::Ok)
}
ADVANCE_PC => {
let (adv, _bytes) = ops.read_u64_leb128()?;
let adv = ops.read_u64_leb128()?;
row.addr += adv as Addr;
Some(RunResult::NewRow)
}
ADVANCE_LINE => {
let (adv, _bytes) = ops.read_i64_leb128()?;
let adv = ops.read_i64_leb128()?;
row.file_line = (row.file_line as i64 + adv) as u32;
Some(RunResult::Ok)
}
Expand Down
88 changes: 58 additions & 30 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,8 @@ pub(crate) trait ReadRaw<'data> {
/// Consume and return `len` bytes.
fn read_slice(&mut self, len: usize) -> Option<&'data [u8]>;

fn read_array<const N: usize>(&mut self) -> Option<[u8; N]>;

/// Read a NUL terminated string.
fn read_cstr(&mut self) -> Option<&'data CStr>;

Expand Down Expand Up @@ -470,36 +472,55 @@ pub(crate) trait ReadRaw<'data> {

/// Read a `u64` encoded as unsigned variable length little endian base 128
/// value.
///
/// The function returns the value read along with the number of bytes
/// consumed.
fn read_u64_leb128(&mut self) -> Option<(u64, u8)> {
let mut shift = 0;
let mut value = 0u64;
while let Some(bytes) = self.read_slice(1) {
if let [byte] = bytes {
value |= ((byte & 0b0111_1111) as u64) << shift;
shift += 7;
if (byte & 0b1000_0000) == 0 {
return Some((value, shift / 7))
}
//
// Slightly adjusted copy of `rustc` implementation:
// https://github.com/rust-lang/rust/blob/7ebd2bdbf6d798e6e711a0100981b0ff029abf5f/compiler/rustc_serialize/src/leb128.rs#L54
fn read_u64_leb128(&mut self) -> Option<u64> {
// The first iteration of this loop is unpeeled. This is a
// performance win because this code is hot and integer values less
// than 128 are very common, typically occurring 50-80% or more of
// the time, even for u64 and u128.
let [byte] = self.read_array::<1>()?;
if (byte & 0x80) == 0 {
return Some(byte as u64);
}
let mut result = (byte & 0x7F) as u64;
let mut shift = 7;
loop {
let [byte] = self.read_array::<1>()?;
if (byte & 0x80) == 0 {
result |= (byte as u64) << shift;
return Some(result);
} else {
unreachable!()
result |= ((byte & 0x7F) as u64) << shift;
}
shift += 7;
}
None
}

/// Read a `u64` encoded as signed variable length little endian base 128
/// value.
///
/// The function returns the value read along with the number of bytes
/// consumed.
fn read_i64_leb128(&mut self) -> Option<(i64, u8)> {
let (value, shift) = self.read_u64_leb128()?;
let sign_bits = u64::BITS as u8 - shift * 7;
let value = ((value as i64) << sign_bits) >> sign_bits;
Some((value, shift))
fn read_i64_leb128(&mut self) -> Option<i64> {
let mut result = 0;
let mut shift = 0;
let mut byte;

loop {
[byte] = self.read_array::<1>()?;
result |= <i64>::from(byte & 0x7F) << shift;
shift += 7;

if (byte & 0x80) == 0 {
break;
}
}

if (shift < <i64>::BITS) && ((byte & 0x40) != 0) {
// sign extend
result |= !0 << shift;
}

Some(result)
}
}

Expand Down Expand Up @@ -527,6 +548,16 @@ impl<'data> ReadRaw<'data> for &'data [u8] {
Some(a)
}

#[inline]
fn read_array<const N: usize>(&mut self) -> Option<[u8; N]> {
self.ensure(N)?;
let (a, b) = self.split_at(N);
*self = b;
// SAFETY: We *know* that `a` has length `N`.
let array = unsafe { <[u8; N]>::try_from(a).unwrap_unchecked() };
Some(array)
}

#[inline]
fn read_cstr(&mut self) -> Option<&'data CStr> {
let idx = self.iter().position(|byte| *byte == b'\0')?;
Expand Down Expand Up @@ -815,13 +846,11 @@ mod tests {
#[test]
fn leb128_reading() {
let data = [0xf4, 0xf3, 0x75];
let (v, s) = data.as_slice().read_u64_leb128().unwrap();
let v = data.as_slice().read_u64_leb128().unwrap();
assert_eq!(v, 0x1d79f4);
assert_eq!(s, 3);

let (v, s) = data.as_slice().read_i64_leb128().unwrap();
let v = data.as_slice().read_i64_leb128().unwrap();
assert_eq!(v, -165388);
assert_eq!(s, 3);
}

/// Check that we can read a NUL terminated string from a slice.
Expand Down Expand Up @@ -941,16 +970,15 @@ mod tests {
];

for (data, expected) in data {
let (v, _s) = data.as_slice().read_u64_leb128().unwrap();
let v = data.as_slice().read_u64_leb128().unwrap();
assert_eq!(v, expected);
}

let () = b.iter(|| {
for (data, _) in data {
let mut slice = black_box(data.as_slice());
let (v, s) = slice.read_u64_leb128().unwrap();
let v = slice.read_u64_leb128().unwrap();
black_box(v);
black_box(s);
}
});
}
Expand Down

0 comments on commit 72419e0

Please sign in to comment.