Skip to content

Commit

Permalink
[AArch64] Add tests for operations on vectors with 3 elements.
Browse files Browse the repository at this point in the history
(cherry-picked from 8336515)
  • Loading branch information
fhahn committed Jan 25, 2024
1 parent 562cad1 commit 7431631
Showing 1 changed file with 310 additions and 0 deletions.
310 changes: 310 additions & 0 deletions llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s

define <16 x i8> @load_v3i8(ptr %src, ptr %dst) {
; CHECK-LABEL: load_v3i8:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: strh w8, [sp, #12]
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: umov.h w8, v0[0]
; CHECK-NEXT: umov.h w9, v0[1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: add x8, x0, #2
; CHECK-NEXT: mov.b v0[1], w9
; CHECK-NEXT: ld1.b { v0 }[2], [x8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: umov w8, v0.h[0]
; BE-NEXT: umov w9, v0.h[1]
; BE-NEXT: fmov s0, w8
; BE-NEXT: add x8, x0, #2
; BE-NEXT: mov v0.b[1], w9
; BE-NEXT: ld1 { v0.b }[2], [x8]
; BE-NEXT: rev64 v0.16b, v0.16b
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %s
}

define <4 x i32> @load_v3i8_to_4xi32(ptr %src, ptr %dst) {
; CHECK-LABEL: load_v3i8_to_4xi32:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: strh w8, [sp, #12]
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ldrsb w8, [x0, #2]
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: mov.h v0[1], v0[1]
; CHECK-NEXT: mov.h v0[2], w8
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_to_4xi32:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #2]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}

define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src, ptr %dst) {
; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: strh w8, [sp, #12]
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ldrsb w8, [x0, #2]
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: mov.h v0[1], v0[1]
; CHECK-NEXT: mov.h v0[2], w8
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: volatile_load_v3i8_to_4xi32:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #2]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load volatile <3 x i8>, ptr %src, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}

define <3 x i32> @load_v3i32(ptr %src) {
; CHECK-LABEL: load_v3i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: ld1.s { v0 }[2], [x8]
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i32:
; BE: // %bb.0:
; BE-NEXT: ldr d0, [x0]
; BE-NEXT: add x8, x0, #8
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ld1 { v0.s }[2], [x8]
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src, align 1
ret <3 x i32> %l
}

define void @store_trunc_from_64bits(ptr %src, ptr %dst) {
; CHECK-LABEL: store_trunc_from_64bits:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ldrh w8, [x0, #4]
; CHECK-NEXT: mov.h v0[2], w8
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: str s0, [sp, #12]
; CHECK-NEXT: ldrh w9, [sp, #12]
; CHECK-NEXT: strb w8, [x1, #2]
; CHECK-NEXT: strh w9, [x1]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: store_trunc_from_64bits:
; BE: // %bb.0: // %entry
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldr s0, [x0]
; BE-NEXT: ldrh w8, [x0, #4]
; BE-NEXT: rev32 v0.4h, v0.4h
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: xtn v0.8b, v0.8h
; BE-NEXT: rev32 v0.16b, v0.16b
; BE-NEXT: str s0, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
entry:
%l = load <3 x i16>, ptr %src, align 1
%t = trunc <3 x i16> %l to <3 x i8>
store <3 x i8> %t, ptr %dst, align 1
ret void
}

define void @load_ext_to_64bits(ptr %src, ptr %dst) {
; CHECK-LABEL: load_ext_to_64bits:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: strh w8, [sp, #12]
; CHECK-NEXT: add x8, x0, #2
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
; CHECK-NEXT: add x8, x1, #4
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: st1.h { v0 }[2], [x8]
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: load_ext_to_64bits:
; BE: // %bb.0: // %entry
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: add x8, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x8]
; BE-NEXT: add x8, x1, #4
; BE-NEXT: bic v0.4h, #255, lsl #8
; BE-NEXT: rev32 v1.8h, v0.8h
; BE-NEXT: st1 { v0.h }[2], [x8]
; BE-NEXT: str s1, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
entry:
%l = load <3 x i8>, ptr %src, align 1
%e = zext <3 x i8> %l to <3 x i16>
store <3 x i16> %e, ptr %dst, align 1
ret void
}

define void @shift_trunc_store(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_store:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn.4h v0, v0, #16
; CHECK-NEXT: xtn.8b v1, v0
; CHECK-NEXT: umov.h w8, v0[2]
; CHECK-NEXT: str s1, [sp, #12]
; CHECK-NEXT: ldrh w9, [sp, #12]
; CHECK-NEXT: strb w8, [x1, #2]
; CHECK-NEXT: strh w9, [x1]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_store:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: xtn v1.8b, v0.8h
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
store <3 x i8> %t, ptr %dst, align 1
ret void
}

define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_volatile_store:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn.4h v0, v0, #16
; CHECK-NEXT: xtn.8b v1, v0
; CHECK-NEXT: umov.h w8, v0[2]
; CHECK-NEXT: str s1, [sp, #12]
; CHECK-NEXT: ldrh w9, [sp, #12]
; CHECK-NEXT: strb w8, [x1, #2]
; CHECK-NEXT: strh w9, [x1]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_volatile_store:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: xtn v1.8b, v0.8h
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
store volatile <3 x i8> %t, ptr %dst, align 1
ret void
}

0 comments on commit 7431631

Please sign in to comment.