From 3f7f5e8a2e3c9bb134d160241a6695bb379db647 Mon Sep 17 00:00:00 2001 From: Michael Bradshaw Date: Tue, 22 Mar 2022 20:21:56 -0600 Subject: [PATCH 1/3] Optimize RcInnerPtr::inc_strong instruction count Inspired by this internals thread: https://internals.rust-lang.org/t/rc-optimization-on-64-bit-targets/16362 [The generated assembly is a bit smaller](https://rust.godbolt.org/z/TeTnf6144) and is a more efficient usage of the CPU's instruction cache. `unlikely` doesn't impact any of the small artificial tests I've done, but I've included it in case it might help more complex scenarios when this is inlined. --- library/alloc/src/rc.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index ea651c075d968..8e7946dfd971e 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2512,14 +2512,21 @@ trait RcInnerPtr { fn inc_strong(&self) { let strong = self.strong(); + // We insert an `assume` here to hint LLVM at an otherwise + // missed optimization. + // SAFETY: The reference count will never be zero when this is + // called. + unsafe { core::intrinsics::assume(strong != 0); } + + let strong = strong.wrapping_add(1); + self.strong_ref().set(strong); + // We want to abort on overflow instead of dropping the value. - // The reference count will never be zero when this is called; - // nevertheless, we insert an abort here to hint LLVM at - // an otherwise missed optimization. - if strong == 0 || strong == usize::MAX { + // Checking after the store instead of before allows for + // slightly better code generation. + if core::intrinsics::unlikely(strong == 0) { abort(); } - self.strong_ref().set(strong + 1); } #[inline] @@ -2536,14 +2543,21 @@ trait RcInnerPtr { fn inc_weak(&self) { let weak = self.weak(); + // We insert an `assume` here to hint LLVM at an otherwise + // missed optimization. + // SAFETY: The reference count will never be zero when this is + // called. + unsafe { core::intrinsics::assume(weak != 0); } + + let weak = weak.wrapping_add(1); + self.weak_ref().set(weak); + // We want to abort on overflow instead of dropping the value. - // The reference count will never be zero when this is called; - // nevertheless, we insert an abort here to hint LLVM at - // an otherwise missed optimization. - if weak == 0 || weak == usize::MAX { + // Checking after the store instead of before allows for + // slightly better code generation. + if core::intrinsics::unlikely(weak == 0) { abort(); } - self.weak_ref().set(weak + 1); } #[inline] From f5dd42bce59bdd8ce5def70e2c87349434eb90a9 Mon Sep 17 00:00:00 2001 From: Michael Bradshaw Date: Tue, 22 Mar 2022 20:42:03 -0600 Subject: [PATCH 2/3] Format unsafe {} blocks --- library/alloc/src/rc.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 8e7946dfd971e..858bbeb6bf3fa 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2516,7 +2516,9 @@ trait RcInnerPtr { // missed optimization. // SAFETY: The reference count will never be zero when this is // called. - unsafe { core::intrinsics::assume(strong != 0); } + unsafe { + core::intrinsics::assume(strong != 0); + } let strong = strong.wrapping_add(1); self.strong_ref().set(strong); @@ -2547,7 +2549,9 @@ trait RcInnerPtr { // missed optimization. // SAFETY: The reference count will never be zero when this is // called. - unsafe { core::intrinsics::assume(weak != 0); } + unsafe { + core::intrinsics::assume(weak != 0); + } let weak = weak.wrapping_add(1); self.weak_ref().set(weak); From 8d14c03568d0ec99c586e17ce19a93cc0684ab9e Mon Sep 17 00:00:00 2001 From: Michael Bradshaw Date: Wed, 23 Mar 2022 08:14:53 -0600 Subject: [PATCH 3/3] Explicitly mention overflow is what we're checking --- library/alloc/src/rc.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 858bbeb6bf3fa..6ec86047fe4de 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2524,8 +2524,8 @@ trait RcInnerPtr { self.strong_ref().set(strong); // We want to abort on overflow instead of dropping the value. - // Checking after the store instead of before allows for - // slightly better code generation. + // Checking for overflow after the store instead of before + // allows for slightly better code generation. if core::intrinsics::unlikely(strong == 0) { abort(); } @@ -2557,8 +2557,8 @@ trait RcInnerPtr { self.weak_ref().set(weak); // We want to abort on overflow instead of dropping the value. - // Checking after the store instead of before allows for - // slightly better code generation. + // Checking for overflow after the store instead of before + // allows for slightly better code generation. if core::intrinsics::unlikely(weak == 0) { abort(); }