Skip to content

Commit

Permalink
[SVE][CodeGen] Bail out for scalable vectors in AArch64TargetLowering…
Browse files Browse the repository at this point in the history
…::ReconstructShuffle

Previously the code in AArch64TargetLowering::ReconstructShuffle assumed
the input vectors were always fixed-width, however this is not always
the case since you can extract elements from scalable vectors and insert
into fixed-width ones. We were hitting crashes here for two different
cases:

1. When lowering a fixed-length vector extract from a scalable vector
with i1 element types. This happens due to the fact the i1 elements
get promoted to larger integer types for fixed-width vectors and leads
to sequences of INSERT_VECTOR_ELT and EXTRACT_VECTOR_ELT nodes. In this
case AArch64TargetLowering::ReconstructShuffle will still fail to make
a transformation, but at least it no longer crashes.
2. When lowering a sequence of extractelement/insertelement operations
on mixed fixed-width/scalable vectors.

For now, I've just changed AArch64TargetLowering::ReconstructShuffle to
bail out if it finds a scalable vector.

Tests for both instances described above have been added here:

  (1) CodeGen/AArch64/sve-extract-fixed-vector.ll
  (2) CodeGen/AArch64/sve-fixed-length-reshuffle.ll

Differential Revision: https://reviews.llvm.org/D116602

(cherry picked from commit a57a7f3)
  • Loading branch information
david-arm authored and tstellar committed Feb 22, 2022
1 parent 1362f8b commit 8c33ea3
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 7 deletions.
15 changes: 8 additions & 7 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8990,12 +8990,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(V.getOperand(1))) {
!isa<ConstantSDNode>(V.getOperand(1)) ||
V.getOperand(0).getValueType().isScalableVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: "
"a shuffle can only come from building a vector from "
"various elements of other vectors, provided their "
"indices are constant\n");
"various elements of other fixed-width vectors, provided "
"their indices are constant\n");
return SDValue();
}

Expand Down Expand Up @@ -9039,8 +9040,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();

uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
if (SrcVTSize == VTSize)
TypeSize SrcVTSize = SrcVT.getSizeInBits();
if (SrcVTSize == TypeSize::Fixed(VTSize))
continue;

// This stage of the search produces a source with the same element type as
Expand All @@ -9049,7 +9050,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);

if (SrcVTSize < VTSize) {
if (SrcVTSize.getFixedValue() < VTSize) {
assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Expand All @@ -9059,7 +9060,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
}

if (SrcVTSize != 2 * VTSize) {
if (SrcVTSize.getFixedValue() != 2 * VTSize) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: result vector too small to extract\n");
return SDValue();
Expand Down
105 changes: 105 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,106 @@ define <16 x i8> @extract_v16i8_nxv2i8_idx16(<vscale x 2 x i8> %vec) nounwind #1
ret <16 x i8> %retval
}


; Predicates

define <2 x i1> @extract_v2i1_nxv2i1(<vscale x 2 x i1> %inmask) {
; CHECK-LABEL: extract_v2i1_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %inmask, i64 0)
ret <2 x i1> %mask
}

define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
; CHECK-LABEL: extract_v4i1_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-NEXT: mov w8, v1.s[1]
; CHECK-NEXT: mov w9, v1.s[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: mov w8, v1.s[3]
; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <4 x i1> @llvm.experimental.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1> %inmask, i64 0)
ret <4 x i1> %mask
}

define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) {
; CHECK-LABEL: extract_v8i1_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
; CHECK-NEXT: umov w8, v1.h[1]
; CHECK-NEXT: umov w9, v1.h[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: mov v0.b[1], w8
; CHECK-NEXT: umov w8, v1.h[3]
; CHECK-NEXT: mov v0.b[2], w9
; CHECK-NEXT: umov w9, v1.h[4]
; CHECK-NEXT: mov v0.b[3], w8
; CHECK-NEXT: umov w8, v1.h[5]
; CHECK-NEXT: mov v0.b[4], w9
; CHECK-NEXT: umov w9, v1.h[6]
; CHECK-NEXT: mov v0.b[5], w8
; CHECK-NEXT: umov w8, v1.h[7]
; CHECK-NEXT: mov v0.b[6], w9
; CHECK-NEXT: mov v0.b[7], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> %inmask, i64 0)
ret <8 x i1> %mask
}

define <16 x i1> @extract_v16i1_nxv16i1(<vscale x 16 x i1> %inmask) {
; CHECK-LABEL: extract_v16i1_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
; CHECK-NEXT: umov w8, v1.b[1]
; CHECK-NEXT: umov w9, v1.b[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: mov v0.b[1], w8
; CHECK-NEXT: umov w8, v1.b[3]
; CHECK-NEXT: mov v0.b[2], w9
; CHECK-NEXT: umov w9, v1.b[4]
; CHECK-NEXT: mov v0.b[3], w8
; CHECK-NEXT: umov w8, v1.b[5]
; CHECK-NEXT: mov v0.b[4], w9
; CHECK-NEXT: umov w9, v1.b[6]
; CHECK-NEXT: mov v0.b[5], w8
; CHECK-NEXT: umov w8, v1.b[7]
; CHECK-NEXT: mov v0.b[6], w9
; CHECK-NEXT: umov w9, v1.b[8]
; CHECK-NEXT: mov v0.b[7], w8
; CHECK-NEXT: umov w8, v1.b[9]
; CHECK-NEXT: mov v0.b[8], w9
; CHECK-NEXT: umov w9, v1.b[10]
; CHECK-NEXT: mov v0.b[9], w8
; CHECK-NEXT: umov w8, v1.b[11]
; CHECK-NEXT: mov v0.b[10], w9
; CHECK-NEXT: umov w9, v1.b[12]
; CHECK-NEXT: mov v0.b[11], w8
; CHECK-NEXT: umov w8, v1.b[13]
; CHECK-NEXT: mov v0.b[12], w9
; CHECK-NEXT: umov w9, v1.b[14]
; CHECK-NEXT: mov v0.b[13], w8
; CHECK-NEXT: umov w8, v1.b[15]
; CHECK-NEXT: mov v0.b[14], w9
; CHECK-NEXT: mov v0.b[15], w8
; CHECK-NEXT: ret
%mask = call <16 x i1> @llvm.experimental.vector.extract.v16i1.nxv16i1(<vscale x 16 x i1> %inmask, i64 0)
ret <16 x i1> %mask
}


; Fixed length clamping

define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind #0 {
Expand Down Expand Up @@ -441,4 +541,9 @@ declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8(<vscale x 8 x i
declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(<vscale x 4 x i8>, i64)
declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8>, i64)

declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1>, i64)
declare <4 x i1> @llvm.experimental.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1>, i64)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1>, i64)
declare <16 x i1> @llvm.experimental.vector.extract.v16i1.nxv16i1(<vscale x 16 x i1>, i64)

declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64>, i64)
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

; == Matching first N elements ==

define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: reshuffle_v4i1_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-NEXT: mov w8, v1.s[1]
; CHECK-NEXT: mov w9, v1.s[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: mov w8, v1.s[3]
; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%el0 = extractelement <vscale x 4 x i1> %a, i32 0
%el1 = extractelement <vscale x 4 x i1> %a, i32 1
%el2 = extractelement <vscale x 4 x i1> %a, i32 2
%el3 = extractelement <vscale x 4 x i1> %a, i32 3
%v0 = insertelement <4 x i1> undef, i1 %el0, i32 0
%v1 = insertelement <4 x i1> %v0, i1 %el1, i32 1
%v2 = insertelement <4 x i1> %v1, i1 %el2, i32 2
%v3 = insertelement <4 x i1> %v2, i1 %el3, i32 3
ret <4 x i1> %v3
}

attributes #0 = { "target-features"="+sve" }

0 comments on commit 8c33ea3

Please sign in to comment.