-
Notifications
You must be signed in to change notification settings - Fork 12.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SVE][CodeGen] Bail out for scalable vectors in AArch64TargetLowering…
…::ReconstructShuffle Previously the code in AArch64TargetLowering::ReconstructShuffle assumed the input vectors were always fixed-width, however this is not always the case since you can extract elements from scalable vectors and insert into fixed-width ones. We were hitting crashes here for two different cases: 1. When lowering a fixed-length vector extract from a scalable vector with i1 element types. This happens due to the fact the i1 elements get promoted to larger integer types for fixed-width vectors and leads to sequences of INSERT_VECTOR_ELT and EXTRACT_VECTOR_ELT nodes. In this case AArch64TargetLowering::ReconstructShuffle will still fail to make a transformation, but at least it no longer crashes. 2. When lowering a sequence of extractelement/insertelement operations on mixed fixed-width/scalable vectors. For now, I've just changed AArch64TargetLowering::ReconstructShuffle to bail out if it finds a scalable vector. Tests for both instances described above have been added here: (1) CodeGen/AArch64/sve-extract-fixed-vector.ll (2) CodeGen/AArch64/sve-fixed-length-reshuffle.ll Differential Revision: https://reviews.llvm.org/D116602 (cherry picked from commit a57a7f3)
- Loading branch information
Showing
3 changed files
with
145 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc < %s | FileCheck %s | ||
|
||
target triple = "aarch64-unknown-linux-gnu" | ||
|
||
; == Matching first N elements == | ||
|
||
define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 { | ||
; CHECK-LABEL: reshuffle_v4i1_nxv4i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 | ||
; CHECK-NEXT: mov w8, v1.s[1] | ||
; CHECK-NEXT: mov w9, v1.s[2] | ||
; CHECK-NEXT: mov v0.16b, v1.16b | ||
; CHECK-NEXT: mov v0.h[1], w8 | ||
; CHECK-NEXT: mov w8, v1.s[3] | ||
; CHECK-NEXT: mov v0.h[2], w9 | ||
; CHECK-NEXT: mov v0.h[3], w8 | ||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 | ||
; CHECK-NEXT: ret | ||
%el0 = extractelement <vscale x 4 x i1> %a, i32 0 | ||
%el1 = extractelement <vscale x 4 x i1> %a, i32 1 | ||
%el2 = extractelement <vscale x 4 x i1> %a, i32 2 | ||
%el3 = extractelement <vscale x 4 x i1> %a, i32 3 | ||
%v0 = insertelement <4 x i1> undef, i1 %el0, i32 0 | ||
%v1 = insertelement <4 x i1> %v0, i1 %el1, i32 1 | ||
%v2 = insertelement <4 x i1> %v1, i1 %el2, i32 2 | ||
%v3 = insertelement <4 x i1> %v2, i1 %el3, i32 3 | ||
ret <4 x i1> %v3 | ||
} | ||
|
||
attributes #0 = { "target-features"="+sve" } |