Skip to content

Commit

Permalink
riscv64: Fix regaloc panic with bor+bnot on floats (#5857)
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 authored Mar 13, 2023
1 parent d03612c commit ad0bce3
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 19 deletions.
50 changes: 31 additions & 19 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -186,40 +186,44 @@
(alu_rrr (AluOPRRR.RemU) x y)))

;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (band x y)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (band x y)))
(alu_rrr (AluOPRRR.And) x y))

;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 2 (lower (has_type (fits_in_64 ty) (band x (imm12_from_value y))))
(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y))))
(alu_rr_imm12 (AluOPRRI.Andi) x y))

(rule 1 (lower (has_type (fits_in_64 ty) (band (imm12_from_value x) y)))
(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y)))
(alu_rr_imm12 (AluOPRRI.Andi) y x))

(rule (lower (has_type $I128 (band x y)))
(lower_b128_binary (AluOPRRR.And) x y))

(rule (lower (has_type $F32 (band x y)))
(lower_float_binary (AluOPRRR.And) x y $F32))

(rule (lower (has_type $F64 (band x y)))
(lower_float_binary (AluOPRRR.And) x y $F64))

;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
;; by Cranelift's `band_not` instruction that is legalized into the simpler
;; forms early on.

(rule 3 (lower (has_type (fits_in_64 ty) (band x (bnot y))))
(rule 3 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y))))
(if-let $true (has_zbb))
(gen_andn x y))
(rule 4 (lower (has_type (fits_in_64 ty) (band (bnot y) x)))

(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x)))
(if-let $true (has_zbb))
(gen_andn x y))

(rule 5 (lower (has_type $I128 (band x (bnot y))))
(if-let $true (has_zbb))
(let
((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0)))
(high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))

(rule 6 (lower (has_type $I128 (band (bnot y) x)))
(if-let $true (has_zbb))
(let
Expand All @@ -229,30 +233,34 @@


;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (bor x y)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x y)))
(alu_rrr (AluOPRRR.Or) x y))

;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 2 (lower (has_type (fits_in_64 ty) (bor x (imm12_from_value y))))
(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y))))
(alu_rr_imm12 (AluOPRRI.Ori) x y))

(rule 1 (lower (has_type (fits_in_64 ty) (bor (imm12_from_value x) y)))
(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y)))
(alu_rr_imm12 (AluOPRRI.Ori) y x))

(rule (lower (has_type $I128 (bor x y)))
(lower_b128_binary (AluOPRRR.Or) x y))

(rule (lower (has_type $F32 (bor x y)))
(lower_float_binary (AluOPRRR.Or) x y $F32))

(rule (lower (has_type $F64 (bor x y)))
(lower_float_binary (AluOPRRR.Or) x y $F64))

;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
;; by Cranelift's `bor_not` instruction that is legalized into the simpler
;; forms early on.

(rule 3 (lower (has_type (fits_in_64 ty) (bor x (bnot y))))
(rule 3 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y))))
(if-let $true (has_zbb))
(gen_orn x y))
(rule 4 (lower (has_type (fits_in_64 ty) (bor (bnot y) x)))

(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x)))
(if-let $true (has_zbb))
(gen_orn x y))

Expand All @@ -262,6 +270,7 @@
((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0)))
(high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))

(rule 6 (lower (has_type $I128 (bor (bnot y) x)))
(if-let $true (has_zbb))
(let
Expand All @@ -271,40 +280,43 @@


;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (bxor x y)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
(alu_rrr (AluOPRRR.Xor) x y))

;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 2 (lower (has_type (fits_in_64 ty) (bxor x (imm12_from_value y))))
(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y))))
(alu_rr_imm12 (AluOPRRI.Xori) x y))

(rule 1 (lower (has_type (fits_in_64 ty) (bxor (imm12_from_value x) y)))
(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y)))
(alu_rr_imm12 (AluOPRRI.Xori) y x))

(rule (lower (has_type $I128 (bxor x y)))
(lower_b128_binary (AluOPRRR.Xor) x y))

(rule (lower (has_type $F32 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F32))

(rule (lower (has_type $F64 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F64))


;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type fits_in_64 (bnot x)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bnot x)))
(alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_neg_bits -1)))

(rule (lower (has_type $I128 (bnot x)))
(bnot_128 x))

(rule
(lower (has_type $F32 (bnot x)))
(lower_float_bnot x $F32)
)
(lower_float_bnot x $F32))

(rule
(lower (has_type $F64 (bnot x)))
(lower_float_bnot x $F64)
)
(lower_float_bnot x $F64))

;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (bitrev x)))
(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x)))
(lower_bit_reverse x ty))

(rule 1 (lower (has_type $I128 (bitrev x)))
Expand Down
79 changes: 79 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/bitops-float.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb

;; This is a regression test for a bug in the RISC-V backend where
;; When enabling `Zbb` the backend would try to use one of the
;; integer instructions (`orn`) to implement a float operation
;; causing a regalloc panic.

function %or_not_optimization_float() -> i32 system_v {
block0:
v0 = iconst.i32 0
v1 = f32const 0.0
v2 = bnot v1
v3 = bor v2, v2
br_table v0, block1(v3), [block1(v1)]

block1(v4: f32):
return v0
}

; VCode:
; block0:
; li a1,0
; fmv.w.x ft9,a1
; li t1,0
; fmv.w.x fa6,t1
; fmv.x.w a1,fa6
; not a3,a1
; fmv.w.x ft1,a3
; fmv.x.w t1,ft1
; fmv.x.w a0,ft1
; or a2,t1,a0
; fmv.w.x fa2,a2
; li t2,0
; br_table t2,[MachLabel(1),MachLabel(2)]##tmp1=a1,tmp2=a2
; block1:
; j label3
; block2:
; fmv.d fa2,ft9
; j label3
; block3:
; li a0,0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mv a1, zero
; fmv.w.x ft9, a1
; mv t1, zero
; fmv.w.x fa6, t1
; fmv.x.w a1, fa6
; not a3, a1
; fmv.w.x ft1, a3
; fmv.x.w t1, ft1
; fmv.x.w a0, ft1
; or a2, t1, a0
; fmv.w.x fa2, a2
; mv t2, zero
; slli t6, t2, 0x20
; srli t6, t6, 0x20
; addi a2, zero, 1
; bltu t6, a2, 0xc
; auipc a2, 0
; jalr zero, a2, 0x28
; auipc a1, 0
; slli a2, t6, 3
; add a1, a1, a2
; jalr zero, a1, 0x10
; auipc a2, 0
; jalr zero, a2, 0xc
; block1: ; offset 0x60
; j 8
; block2: ; offset 0x64
; fmv.d fa2, ft9
; block3: ; offset 0x68
; mv a0, zero
; ret

0 comments on commit ad0bce3

Please sign in to comment.