-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MemCpyOpt] No need to create memcpy(a <- a)
#98321
Conversation
@llvm/pr-subscribers-llvm-transforms Author: DianQK (DianQK) ChangesWhen forwarding Full diff: https://github.com/llvm/llvm-project/pull/98321.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index b9efd9aaa28c5..1a4b3657cedae 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1161,6 +1161,14 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
return false;
+ // No need to create `memcpy(a <- a)`.
+ if (BAA.isMustAlias(M->getDest(), MDep->getRawSource())) {
+ // Remove the instruction we're replacing.
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. In addition, if the source of the first
// points to constant memory, they won't overlap by definition. Otherwise, we
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 48698f25174e7..a19cfe6e75b70 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -139,6 +139,56 @@ define void @test6_memcpy(ptr %src, ptr %dest) nounwind {
ret void
}
+; FIXME: When forwarding to memcpy(arg+1, arg+1), we don't need to create this memcpy.
+define void @test6_memcpy_forward_back(ptr %arg) nounwind {
+; CHECK-LABEL: @test6_memcpy_forward_back(
+; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca [16 x i8], align 1
+ %src = getelementptr inbounds i8, ptr %arg, i64 1
+ %dest = getelementptr inbounds i8, ptr %arg, i64 1
+ call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
+ call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
+ ret void
+}
+
+; We have to retain this `memcpy(arg+2, arg+1)` forwarding.
+define void @test6_memcpy_forward_not_back(ptr %arg) nounwind {
+; CHECK-LABEL: @test6_memcpy_forward_not_back(
+; CHECK-NEXT: [[TMP:%.*]] = alloca [16 x i8], align 1
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
+; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca [16 x i8], align 1
+ %src = getelementptr inbounds i8, ptr %arg, i64 1
+ %dest = getelementptr inbounds i8, ptr %arg, i64 2
+ call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
+ call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
+ ret void
+}
+
+; There is data writing between the two memcpy operations.
+define void @test6_memcpy_forward_back_failed(ptr %arg) nounwind {
+; CHECK-LABEL: @test6_memcpy_forward_back_failed(
+; CHECK-NEXT: [[TMP:%.*]] = alloca [16 x i8], align 1
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
+; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca [16 x i8], align 1
+ %src = getelementptr inbounds i8, ptr %arg, i64 1
+ %dest = getelementptr inbounds i8, ptr %arg, i64 1
+ call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
+ store i8 1, ptr %dest, align 1
+ call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
+ ret void
+}
@x = external global %0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/109/builds/861 Here is the relevant piece of the build log for the reference:
|
When forwarding `memcpy`, we don't need to create `memcpy(a, a)`.
When forwarding
memcpy
, we don't need to creatememcpy(a, a)
.I fixed a related test case at 62b3e68.