Skip to content

Commit

Permalink
pulley: Optimize bounds-checks for 1-byte loads/stores (#10100)
Browse files Browse the repository at this point in the history
These have a different pattern than N-byte loads/stores where the
condition being tested is `a >= b` which doesn't match the pattern for
N-byte loads/stores with `a > b - N`. This commit adds dedicated opcodes
to Pulley for this pattern to help optimize single-byte loads/stores.
  • Loading branch information
alexcrichton authored Jan 24, 2025
1 parent 4dd7cd6 commit 5dfccc0
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 25 deletions.
39 changes: 36 additions & 3 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,14 @@
;; Each of these translates to a single "xbc" (x-register bounds check)
;; instruction.
;;
;; Currently there's a 2x2 matrix here. One axis is 32-bit hosts and 64-bit
;; hosts while the other axis is `a < b` vs `a > b`. These all bottom out
;; in the `emit_xbc32` helper below.
;; Currently there's a 2x2x2 matrix here:
;;
;; * One axis is 32-bit hosts and 64-bit hosts
;; * One axis is `a < b` vs `b > a` (e.g. flipped arguments, same meaning)
;; * One axis is `a < b - N` vs `a <= b` - the condition for multi-byte memory
;; accesses vs single-byte accesses
;;
;; These all bottom out in either `emit_xbc32` or `emit_xbc32_strict` below.
(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) a @ (value_type $I32) (isub b (u8_from_iconst size))) code))
(if-let (PointerWidth.PointerWidth32) (pointer_width))
(side_effect (emit_xbc32 a b size code)))
Expand All @@ -142,6 +147,23 @@
(if-let (PointerWidth.PointerWidth64) (pointer_width))
(side_effect (emit_xbc32 a b size code)))

(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type $I32) b) code))
(if-let (PointerWidth.PointerWidth32) (pointer_width))
(side_effect (emit_xbc32_strict a b code)))

(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThanOrEqual) (uextend a @ (value_type $I32)) b) code))
(if-let (PointerWidth.PointerWidth64) (pointer_width))
(side_effect (emit_xbc32_strict a b code)))

(rule 1 (lower (trapnz (icmp (IntCC.UnsignedLessThanOrEqual) b a @ (value_type $I32)) code))
(if-let (PointerWidth.PointerWidth32) (pointer_width))
(side_effect (emit_xbc32_strict a b code)))

(rule 1 (lower (trapnz (icmp (IntCC.UnsignedLessThanOrEqual) b (uextend a @ (value_type $I32))) code))
(if-let (PointerWidth.PointerWidth64) (pointer_width))
(side_effect (emit_xbc32_strict a b code)))


;; Helper to emit a bounds check which traps if the first value is greater than
;; the second value minus the immediate size provided here.
;;
Expand All @@ -159,6 +181,17 @@
(rule (emit_xbc32_sunk a (SunkLoad.Load _ bound_addr bound_off) size code)
(pulley_xbc32_boundne_trap a bound_addr bound_off size code))

(decl emit_xbc32_strict (Value Value TrapCode) SideEffectNoResult)
(rule 0 (emit_xbc32_strict a bound code)
(pulley_xbc32_strict_bound_trap a bound code))
(rule 1 (emit_xbc32_strict a bound code)
(if-let load (sinkable_load bound))
(emit_xbc32_strict_sunk a load code))

(decl emit_xbc32_strict_sunk (Value SunkLoad TrapCode) SideEffectNoResult)
(rule (emit_xbc32_strict_sunk a (SunkLoad.Load _ bound_addr bound_off) code)
(pulley_xbc32_strict_boundne_trap a bound_addr bound_off code))

;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (get_stack_pointer))
Expand Down
60 changes: 60 additions & 0 deletions cranelift/filetests/filetests/isa/pulley32/xbc.clif
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,63 @@ block0(v0: i32, v1: i32, v2: i32):
; xbc32_bound_trap x1, x3, 24
; ret

function %one_byte(i32, i32) {
block0(v0: i32, v1: i32):
v2 = load.i32 v0+16
v5 = icmp uge v1, v2
trapnz v5, user1
return
}

; VCode:
; block0:
; xbc32_strict_boundne_trap x1, x0, 16 // trap=TrapCode(1)
; ret
;
; Disassembled:
; xbc32_strict_boundne_trap x1, x0, 16
; ret


function %one_byte_flip(i32, i32) {
block0(v0: i32, v1: i32):
v2 = load.i32 v0+16
v5 = icmp ule v2, v1
trapnz v5, user1
return
}

; VCode:
; block0:
; xbc32_strict_boundne_trap x1, x0, 16 // trap=TrapCode(1)
; ret
;
; Disassembled:
; xbc32_strict_boundne_trap x1, x0, 16
; ret

function %one_byte_twice(i32, i32, i32) {
block0(v0: i32, v1: i32, v2: i32):
v3 = load.i32 v0+16

v5 = icmp uge v1, v3
trapnz v5, user1

v6 = icmp uge v2, v3
trapnz v6, user1
return
}

; VCode:
; block0:
; x4 = xload32 x0+16 // flags =
; xbc32_strict_bound_trap x1, x4 // trap=TrapCode(1)
; xbc32_strict_bound_trap x2, x4 // trap=TrapCode(1)
; ret
;
; Disassembled:
; xload32le_offset8 x4, x0, 16
; xbc32_strict_bound_trap x1, x4
; xbc32_strict_bound_trap x2, x4
; ret

66 changes: 66 additions & 0 deletions cranelift/filetests/filetests/isa/pulley64/xbc.clif
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,69 @@ block0(v0: i64, v1: i32, v2: i32):
; ret
; trap

function %one_byte(i64, i32) {
block0(v0: i64, v1: i32):
v2 = load.i64 v0+16
v3 = uextend.i64 v1
v6 = icmp uge v3, v2
trapnz v6, user1
return
}

; VCode:
; block0:
; xbc32_strict_boundne_trap x1, x0, 16 // trap=TrapCode(1)
; ret
;
; Disassembled:
; xbc32_strict_boundne_trap x1, x0, 16
; ret


function %one_byte_flip(i64, i32) {
block0(v0: i64, v1: i32):
v2 = load.i64 v0+16
v3 = uextend.i64 v1
v6 = icmp ule v2, v3
trapnz v6, user1
return
}

; VCode:
; block0:
; xbc32_strict_boundne_trap x1, x0, 16 // trap=TrapCode(1)
; ret
;
; Disassembled:
; xbc32_strict_boundne_trap x1, x0, 16
; ret


function %one_byte_twice(i64, i32, i32) {
block0(v0: i64, v1: i32, v2: i32):
v3 = load.i64 v0+16

v4 = uextend.i64 v1
v6 = icmp ule v3, v4
trapnz v6, user1

v5 = uextend.i64 v2
v7 = icmp uge v5, v3
trapnz v6, user1

return
}

; VCode:
; block0:
; x4 = xload64 x0+16 // flags =
; xbc32_strict_bound_trap x1, x4 // trap=TrapCode(1)
; xbc32_strict_bound_trap x1, x4 // trap=TrapCode(1)
; ret
;
; Disassembled:
; xload64le_offset8 x4, x0, 16
; xbc32_strict_bound_trap x1, x4
; xbc32_strict_bound_trap x1, x4
; ret

25 changes: 25 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2564,6 +2564,31 @@ impl OpVisitor for Interpreter<'_> {
}
}

fn xbc32_strict_bound_trap(&mut self, addr: XReg, bound: XReg) -> ControlFlow<Done> {
let bound = self.state[bound].get_u64() as usize;
let addr = self.state[addr].get_u32() as usize;
if addr >= bound {
self.done_trap::<crate::XBc32StrictBoundTrap>()
} else {
ControlFlow::Continue(())
}
}

fn xbc32_strict_boundne_trap(
&mut self,
addr: XReg,
bound_ptr: XReg,
bound_off: u8,
) -> ControlFlow<Done> {
let bound = unsafe { self.load::<usize>(bound_ptr, bound_off.into()) };
let addr = self.state[addr].get_u32() as usize;
if addr >= bound {
self.done_trap::<crate::XBc32StrictBoundNeTrap>()
} else {
ControlFlow::Continue(())
}
}

fn xload8_u32_g32(
&mut self,
dst: XReg,
Expand Down
11 changes: 11 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,17 @@ macro_rules! for_each_op {
bound_off: u8,
size: u8
};
/// `trapif(addr >= bound_ptr)` (unsigned)
xbc32_strict_bound_trap = XBc32StrictBoundTrap {
addr: XReg,
bound: XReg
};
/// `trapif(addr >= *(bound_ptr + bound_off))` (unsigned)
xbc32_strict_boundne_trap = XBc32StrictBoundNeTrap {
addr: XReg,
bound_ptr: XReg,
bound_off: u8
};
}
};
}
Expand Down
16 changes: 6 additions & 10 deletions tests/disas/pulley/pulley32_memory32.wat
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,11 @@
)
;; wasm[0]::function[0]::load8:
;; push_frame
;; xload32le_offset8 x5, x0, 44
;; br_if_xulteq32 x5, x2, 0x12 // target = 0x17
;; c: xload32le_offset8 x6, x0, 40
;; xload8_u32_g32 x0, x2, x6, 0
;; xbc32_strict_boundne_trap x2, x0, 44
;; xload32le_offset8 x5, x0, 40
;; xload8_u32_g32 x0, x2, x5, 0
;; pop_frame
;; ret
;; 17: trap
;;
;; wasm[0]::function[1]::load16:
;; push_frame
Expand Down Expand Up @@ -88,13 +86,11 @@
;;
;; wasm[0]::function[4]::store8:
;; push_frame
;; xload32le_offset8 x5, x0, 44
;; br_if_xulteq32 x5, x2, 0x12 // target = 0x17
;; c: xload32le_offset8 x6, x0, 40
;; xstore8_g32 x2, x6, 0, x3
;; xbc32_strict_boundne_trap x2, x0, 44
;; xload32le_offset8 x5, x0, 40
;; xstore8_g32 x2, x5, 0, x3
;; pop_frame
;; ret
;; 17: trap
;;
;; wasm[0]::function[5]::store16:
;; push_frame
Expand Down
18 changes: 6 additions & 12 deletions tests/disas/pulley/pulley64_memory32.wat
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,11 @@
)
;; wasm[0]::function[0]::load8:
;; push_frame
;; xload64le_offset8 x6, x0, 88
;; zext32 x7, x2
;; br_if_xulteq64 x6, x7, 0x12 // target = 0x1a
;; f: xload64le_offset8 x7, x0, 80
;; xload8_u32_g32 x0, x7, x2, 0
;; xbc32_strict_boundne_trap x2, x0, 88
;; xload64le_offset8 x5, x0, 80
;; xload8_u32_g32 x0, x5, x2, 0
;; pop_frame
;; ret
;; 1a: trap
;;
;; wasm[0]::function[1]::load16:
;; push_frame
Expand Down Expand Up @@ -93,14 +90,11 @@
;;
;; wasm[0]::function[4]::store8:
;; push_frame
;; xload64le_offset8 x6, x0, 88
;; zext32 x7, x2
;; br_if_xulteq64 x6, x7, 0x12 // target = 0x1a
;; f: xload64le_offset8 x7, x0, 80
;; xstore8_g32 x7, x2, 0, x3
;; xbc32_strict_boundne_trap x2, x0, 88
;; xload64le_offset8 x5, x0, 80
;; xstore8_g32 x5, x2, 0, x3
;; pop_frame
;; ret
;; 1a: trap
;;
;; wasm[0]::function[5]::store16:
;; push_frame
Expand Down

0 comments on commit 5dfccc0

Please sign in to comment.