From a89894d5da589008ea39bade3953dd5a17058cec Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Mon, 19 Aug 2024 19:04:43 -0400 Subject: [PATCH 1/2] Don't alloca for unused locals --- compiler/rustc_codegen_ssa/src/mir/analyze.rs | 12 +- compiler/rustc_codegen_ssa/src/mir/mod.rs | 24 +-- compiler/rustc_middle/src/mir/terminator.rs | 9 + compiler/rustc_middle/src/mir/traversal.rs | 165 ++++++++++++++++++ 4 files changed, 195 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs index c8cf341628c32..bcafba2cf2932 100644 --- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs +++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs @@ -15,13 +15,18 @@ use crate::traits::*; pub fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( fx: &FunctionCx<'a, 'tcx, Bx>, + traversal_order: &[mir::BasicBlock], + reachable_locals: &BitSet, ) -> BitSet { let mir = fx.mir; let dominators = mir.basic_blocks.dominators(); let locals = mir .local_decls - .iter() - .map(|decl| { + .iter_enumerated() + .map(|(local, decl)| { + if !reachable_locals.contains(local) { + return LocalKind::Unused; + } let ty = fx.monomorphize(decl.ty); let layout = fx.cx.spanned_layout_of(ty, decl.source_info.span); if layout.is_zst() { @@ -44,7 +49,8 @@ pub fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // If there exists a local definition that dominates all uses of that local, // the definition should be visited first. Traverse blocks in an order that // is a topological sort of dominance partial order. - for (bb, data) in traversal::reverse_postorder(mir) { + for bb in traversal_order.iter().copied() { + let data = &mir.basic_blocks[bb]; analyzer.visit_basic_block_data(bb, data); } diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index de94d87bcea7a..8c6b320e8a9f6 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -192,6 +192,9 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( }) .collect(); + let (traversal_order, reachable_locals) = + traversal::mono_reachable_reverse_postorder(mir, cx.tcx(), instance); + let mut fx = FunctionCx { instance, mir, @@ -218,7 +221,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info(&mut start_bx); - let memory_locals = analyze::non_ssa_locals(&fx); + let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order, &reachable_locals); // Allocate variable and temp allocas let local_values = { @@ -277,17 +280,14 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // So drop the builder of `start_llbb` to avoid having two at the same time. drop(start_bx); - let reachable_blocks = traversal::mono_reachable_as_bitset(mir, cx.tcx(), instance); - - // Codegen the body of each block using reverse postorder - for (bb, _) in traversal::reverse_postorder(mir) { - if reachable_blocks.contains(bb) { - fx.codegen_block(bb); - } else { - // We want to skip this block, because it's not reachable. But we still create - // the block so terminators in other blocks can reference it. - fx.codegen_block_as_unreachable(bb); - } + let mut unreached_blocks = BitSet::new_filled(mir.basic_blocks.len()); + // Codegen the body of each reachable block using our reverse postorder list. + for bb in traversal_order { + fx.codegen_block(bb); + unreached_blocks.remove(bb); + } + for bb in unreached_blocks.iter() { + fx.codegen_block_as_unreachable(bb); } } diff --git a/compiler/rustc_middle/src/mir/terminator.rs b/compiler/rustc_middle/src/mir/terminator.rs index 962b93a25aac6..7e274bb9cd524 100644 --- a/compiler/rustc_middle/src/mir/terminator.rs +++ b/compiler/rustc_middle/src/mir/terminator.rs @@ -413,6 +413,15 @@ mod helper { use super::*; pub type Successors<'a> = impl DoubleEndedIterator + 'a; pub type SuccessorsMut<'a> = impl DoubleEndedIterator + 'a; + + impl SwitchTargets { + #[inline] + pub fn successors_for_value(&self, value: u128) -> Successors<'_> { + let target = self.target_for_value(value); + (&[]).into_iter().copied().chain(Some(target)) + } + } + impl<'tcx> TerminatorKind<'tcx> { #[inline] pub fn successors(&self) -> Successors<'_> { diff --git a/compiler/rustc_middle/src/mir/traversal.rs b/compiler/rustc_middle/src/mir/traversal.rs index 245e9096bad47..e0bb4fd81bd27 100644 --- a/compiler/rustc_middle/src/mir/traversal.rs +++ b/compiler/rustc_middle/src/mir/traversal.rs @@ -232,6 +232,155 @@ pub fn postorder<'a, 'tcx>( reverse_postorder(body).rev() } +pub struct MonoReachablePostorder<'a, 'tcx> { + basic_blocks: &'a IndexSlice>, + visited: BitSet, + visit_stack: Vec<(BasicBlock, Successors<'a>)>, + locals: BitSet, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, +} + +impl<'a, 'tcx> MonoReachablePostorder<'a, 'tcx> { + pub fn new( + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + ) -> MonoReachablePostorder<'a, 'tcx> { + let basic_blocks = &body.basic_blocks; + let mut po = MonoReachablePostorder { + basic_blocks, + visited: BitSet::new_empty(basic_blocks.len()), + visit_stack: Vec::new(), + locals: BitSet::new_empty(body.local_decls.len()), + tcx, + instance, + }; + + let root = START_BLOCK; + let data = &po.basic_blocks[root]; + + UsedLocals { locals: &mut po.locals }.visit_basic_block_data(root, data); + if let Some(ref term) = data.terminator { + po.visited.insert(root); + + let successors = if let Some((bits, targets)) = + Body::try_const_mono_switchint(tcx, instance, data) + { + targets.successors_for_value(bits) + } else { + term.successors() + }; + + po.visit_stack.push((root, successors)); + po.traverse_successor(); + } + + po + } + + fn traverse_successor(&mut self) { + // This is quite a complex loop due to 1. the borrow checker not liking it much + // and 2. what exactly is going on is not clear + // + // It does the actual traversal of the graph, while the `next` method on the iterator + // just pops off of the stack. `visit_stack` is a stack containing pairs of nodes and + // iterators over the successors of those nodes. Each iteration attempts to get the next + // node from the top of the stack, then pushes that node and an iterator over the + // successors to the top of the stack. This loop only grows `visit_stack`, stopping when + // we reach a child that has no children that we haven't already visited. + // + // For a graph that looks like this: + // + // A + // / \ + // / \ + // B C + // | | + // | | + // | D + // \ / + // \ / + // E + // + // The state of the stack starts out with just the root node (`A` in this case); + // [(A, [B, C])] + // + // When the first call to `traverse_successor` happens, the following happens: + // + // [(C, [D]), // `C` taken from the successors of `A`, pushed to the + // // top of the stack along with the successors of `C` + // (A, [B])] + // + // [(D, [E]), // `D` taken from successors of `C`, pushed to stack + // (C, []), + // (A, [B])] + // + // [(E, []), // `E` taken from successors of `D`, pushed to stack + // (D, []), + // (C, []), + // (A, [B])] + // + // Now that the top of the stack has no successors we can traverse, each item will + // be popped off during iteration until we get back to `A`. This yields [E, D, C]. + // + // When we yield `C` and call `traverse_successor`, we push `B` to the stack, but + // since we've already visited `E`, that child isn't added to the stack. The last + // two iterations yield `B` and finally `A` for a final traversal of [E, D, C, B, A] + while let Some(bb) = self.visit_stack.last_mut().and_then(|(_, iter)| iter.next_back()) { + if self.visited.insert(bb) { + let data = &self.basic_blocks[bb]; + UsedLocals { locals: &mut self.locals }.visit_basic_block_data(bb, data); + + let Some(term) = &data.terminator else { + continue; + }; + + let successors = if let Some((bits, targets)) = + Body::try_const_mono_switchint(self.tcx, self.instance, data) + { + targets.successors_for_value(bits) + } else { + term.successors() + }; + + self.visit_stack.push((bb, successors)); + } + } + } +} + +impl<'tcx> Iterator for MonoReachablePostorder<'_, 'tcx> { + type Item = BasicBlock; + + fn next(&mut self) -> Option { + let (bb, _) = self.visit_stack.pop()?; + self.traverse_successor(); + + Some(bb) + } + + fn size_hint(&self) -> (usize, Option) { + // All the blocks, minus the number of blocks we've visited. + let remaining = self.basic_blocks.len() - self.visited.count(); + (remaining, Some(remaining)) + } +} + +pub fn mono_reachable_reverse_postorder<'a, 'tcx>( + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, +) -> (Vec, BitSet) { + let mut iter = MonoReachablePostorder::new(body, tcx, instance); + let mut items = Vec::with_capacity(body.basic_blocks.len()); + while let Some(block) = iter.next() { + items.push(block); + } + items.reverse(); + (items, iter.locals) +} + /// Returns an iterator over all basic blocks reachable from the `START_BLOCK` in no particular /// order. /// @@ -320,6 +469,22 @@ pub struct MonoReachable<'a, 'tcx> { worklist: BitSet, } +struct UsedLocals<'a> { + locals: &'a mut BitSet, +} + +use crate::mir::visit::Visitor; +impl<'a, 'tcx> Visitor<'tcx> for UsedLocals<'a> { + fn visit_local( + &mut self, + local: Local, + _ctx: crate::mir::visit::PlaceContext, + _location: Location, + ) { + self.locals.insert(local); + } +} + impl<'a, 'tcx> MonoReachable<'a, 'tcx> { pub fn new( body: &'a Body<'tcx>, From 66653dcc8b1b4c54e37173e665d94752142efb20 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Tue, 20 Aug 2024 21:15:51 -0400 Subject: [PATCH 2/2] Update codegen tests --- tests/codegen/constant-branch.rs | 4 +--- tests/codegen/no-alloca-inside-if-false.rs | 23 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 tests/codegen/no-alloca-inside-if-false.rs diff --git a/tests/codegen/constant-branch.rs b/tests/codegen/constant-branch.rs index a2710cc4b2586..8f8438b0b1a45 100644 --- a/tests/codegen/constant-branch.rs +++ b/tests/codegen/constant-branch.rs @@ -41,10 +41,8 @@ pub fn if_constant_match() { _ => 4, }; - // CHECK: br label %[[MINUS1:.+]] + // CHECK: br label %{{.+}} _ = match -1 { - // CHECK: [[MINUS1]]: - // CHECK: store i32 1 -1 => 1, _ => 0, } diff --git a/tests/codegen/no-alloca-inside-if-false.rs b/tests/codegen/no-alloca-inside-if-false.rs new file mode 100644 index 0000000000000..1bfbf71da4d03 --- /dev/null +++ b/tests/codegen/no-alloca-inside-if-false.rs @@ -0,0 +1,23 @@ +//@ compile-flags: -Cno-prepopulate-passes -Copt-level=0 + +#![crate_type = "lib"] + +#[inline(never)] +fn test() { + // CHECK-LABEL: no_alloca_inside_if_false::test + // CHECK: start: + // CHECK-NEXT: %0 = alloca + // CHECK-NEXT: %vec = alloca + // CHECK-NOT: %arr = alloca + if const { SIZE < 4096 } { + let arr = [0u8; SIZE]; + std::hint::black_box(&arr); + } else { + let vec = vec![0u8; SIZE]; + std::hint::black_box(&vec); + } +} + +pub fn main() { + test::<8192>(); +}