-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AtomicExpand: Preserve metadata when expanding partword RMW #89769
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis will be important for AMDGPU in a future patch. Full diff: https://github.com/llvm/llvm-project/pull/89769.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index e5496c0e31c1c0..c0053fb55676c0 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -950,7 +950,10 @@ AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
AI->getOrdering(), AI->getSyncScopeID());
- // TODO: Preserve metadata
+
+ // TODO: Do we need to drop noundef? We widened the operation and could be
+ // loading undefined bits.
+ NewAI->copyMetadata(*AI);
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
AI->replaceAllUsesWith(FinalOldResult);
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll
index 3806159ab7303c..fe0df52dfa1e5b 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck %s
-; RUN: opt -mtriple=r600-mesa-mesa3d -S -passes=atomic-expand %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN %s
+; RUN: opt -mtriple=r600-mesa-mesa3d -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,R600 %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
@@ -163,6 +163,55 @@ define i16 @test_atomicrmw_and_i16_global_agent(ptr addrspace(1) %ptr, i16 %valu
ret i16 %res
}
+define i16 @test_atomicrmw_and_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) {
+; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
+; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
+; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT: ret i16 [[EXTRACTED]]
+;
+ %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4
+ ret i16 %res
+}
+
+; Preserve unknown metadata, and drop poison generating metadata since
+; we widen to load unknown bytes.
+define i16 @test_atomicrmw_and_i16_global_agent_preserve_md(ptr addrspace(1) %ptr, i16 %value) {
+; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_md(
+; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
+; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
+; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
+; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
+; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
+; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
+; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0:![0-9]+]], !some.unknown.md [[META0]]
+; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
+; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
+; CHECK-NEXT: ret i16 [[EXTRACTED]]
+;
+ %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !some.unknown.md !0
+ ret i16 %res
+}
+
+; Preserve unknown metadata, and drop poison generating metadata since
+; we widen to load unknown bytes.
+define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_md(ptr addrspace(1) %ptr, i16 %value) {
+; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_md(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
+; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
+; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !some.unknown.md [[META0]]
+; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT: ret i16 [[EXTRACTED]]
+;
+ %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !some.unknown.md !0
+ ret i16 %res
+}
+
define i16 @test_atomicrmw_nand_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_nand_i16_global_agent(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
@@ -1064,3 +1113,8 @@ define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %pt
%res = atomicrmw xchg ptr addrspace(1) %ptr, bfloat %value syncscope("agent") seq_cst, align 4
ret bfloat %res
}
+
+!0 = !{}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; R600: {{.*}}
|
Is it legal to preserve aliasing metadata? What if we had a narrow rmw widened to a larger type where only the additional bits accessed aliased an unrelated access? |
I would hope it does not matter - the extra high bits are put back exactly as they were |
dc3f144
to
9f83267
Compare
Is it really a good idea to blindly preserve all unknown metadata? ISTM we have no way of knowing if the metadata will be valid on the modified instruction or not. Maybe better to preserve only known-to-be-OK metadata? |
The cases I really need to preserve are target string metadata, i.e. "unknown" |
So options are 1. hardcode the amdgpu prefixed metadata, or 2. promote the AMDGPU metadata to one of the builtin recognized metadata types? |
ping |
Which metadata are you interested in? Agree that blindly copying metadata is not a good idea -- as your own comment points out, this is already incorrect for !noundef. |
Primarily the new metadata in #85052 |
8081d91
to
bc3284d
Compare
IMO, it's OK to have an allow-list of metadata we copy which includes amdgpu-specific entries. |
bc3284d
to
367d4d5
Compare
The API for this is a bit awkward for the non-builtin enum metadata |
ping |
1 similar comment
ping |
This will be important for AMDGPU in a future patch.
367d4d5
to
3885f4c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. We can add more in the future if we think necessary.
This will be important for AMDGPU in a future patch.