-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] Change the GPU to use builtin memory functions #68003
Conversation
@llvm/pr-subscribers-libc ChangesSummary: Full diff: https://github.com/llvm/llvm-project/pull/68003.diff 5 Files Affected:
diff --git a/libc/src/string/memory_utils/generic/builtin.h b/libc/src/string/memory_utils/generic/builtin.h
new file mode 100644
index 000000000000000..cfbd7e3ef361434
--- /dev/null
+++ b/libc/src/string/memory_utils/generic/builtin.h
@@ -0,0 +1,57 @@
+//===-- Trivial builtin implementations ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BUILTIN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BUILTIN_H
+
+#include "src/__support/macros/config.h" // LIBC_INLINE
+#include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE {
+
+static_assert(LIBC_HAS_BUILTIN(__builtin_memcpy), "Builtin not defined");
+static_assert(LIBC_HAS_BUILTIN(__builtin_memset), "Builtin not defined");
+static_assert(LIBC_HAS_BUILTIN(__builtin_memmove), "Builtin not defined");
+static_assert(LIBC_HAS_BUILTIN(__builtin_bcmp), "Builtin not defined");
+
+[[maybe_unused]] LIBC_INLINE void
+inline_memcpy_builtin(Ptr dst, CPtr src, size_t count, size_t offset = 0) {
+ __builtin_memcpy(dst + offset, src + offset, count);
+}
+
+[[maybe_unused]] LIBC_INLINE void inline_memmove_builtin(Ptr dst, CPtr src,
+ size_t count) {
+ __builtin_memmove(dst, src, count);
+}
+
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_builtin(Ptr dst, uint8_t value, size_t count, size_t offset = 0) {
+ __builtin_memset(dst + offset, value, count);
+}
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_builtin(CPtr p1, CPtr p2, size_t count, size_t offset = 0) {
+ return __builtin_bcmp(p1 + offset, p2 + offset, count)
+ ? BcmpReturnType::NONZERO()
+ : BcmpReturnType::ZERO();
+}
+
+[[maybe_unused]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_builtin(CPtr p1, CPtr p2, size_t count, size_t offset = 0) {
+ int32_t diff = __builtin_memcmp(p1 + offset, p2 + offset, count);
+ if (diff)
+ return diff;
+ return MemcmpReturnType::ZERO();
+}
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BUILTIN_H
diff --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h
index b1c981d859e0223..1901f61e2daaeeb 100644
--- a/libc/src/string/memory_utils/inline_bcmp.h
+++ b/libc/src/string/memory_utils/inline_bcmp.h
@@ -23,9 +23,12 @@
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_bcmp.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
-#elif defined(LIBC_TARGET_ARCH_IS_ARM) || defined(LIBC_TARGET_ARCH_IS_GPU)
+#elif defined(LIBC_TARGET_ARCH_IS_ARM)
#include "src/string/memory_utils/generic/byte_per_byte.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_byte_per_byte
+#elif defined(LIBC_TARGET_ARCH_IS_GPU)
+#include "src/string/memory_utils/generic/builtin.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_builtin
#else
#error "Unsupported architecture"
#endif
diff --git a/libc/src/string/memory_utils/inline_memcpy.h b/libc/src/string/memory_utils/inline_memcpy.h
index 0b8a7848da87b4a..a92bf4ddf881d54 100644
--- a/libc/src/string/memory_utils/inline_memcpy.h
+++ b/libc/src/string/memory_utils/inline_memcpy.h
@@ -28,9 +28,12 @@
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_memcpy.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY inline_memcpy_riscv
-#elif defined(LIBC_TARGET_ARCH_IS_ARM) || defined(LIBC_TARGET_ARCH_IS_GPU)
+#elif defined(LIBC_TARGET_ARCH_IS_ARM)
#include "src/string/memory_utils/generic/byte_per_byte.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY inline_memcpy_byte_per_byte
+#elif defined(LIBC_TARGET_ARCH_IS_GPU)
+#include "src/string/memory_utils/generic/builtin.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY inline_memcpy_builtin
#else
#error "Unsupported architecture"
#endif
diff --git a/libc/src/string/memory_utils/inline_memmove.h b/libc/src/string/memory_utils/inline_memmove.h
index 0d31e10eaff28ed..f72ea24ab538d69 100644
--- a/libc/src/string/memory_utils/inline_memmove.h
+++ b/libc/src/string/memory_utils/inline_memmove.h
@@ -20,9 +20,12 @@
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_memmove.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_riscv
-#elif defined(LIBC_TARGET_ARCH_IS_ARM) || defined(LIBC_TARGET_ARCH_IS_GPU)
+#elif defined(LIBC_TARGET_ARCH_IS_ARM)
#include "src/string/memory_utils/generic/byte_per_byte.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_byte_per_byte
+#elif defined(LIBC_TARGET_ARCH_IS_GPU)
+#include "src/string/memory_utils/generic/builtin.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMMOVE inline_memmove_builtin
#else
#error "Unsupported architecture"
#endif
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index f20ae45fa753b44..1c07c1ca4bffc0e 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -24,9 +24,12 @@
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_memset.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv
-#elif defined(LIBC_TARGET_ARCH_IS_ARM) || defined(LIBC_TARGET_ARCH_IS_GPU)
+#elif defined(LIBC_TARGET_ARCH_IS_ARM)
#include "src/string/memory_utils/generic/byte_per_byte.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_byte_per_byte
+#elif defined(LIBC_TARGET_ARCH_IS_GPU)
+#include "src/string/memory_utils/generic/builtin.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_builtin
#else
#error "Unsupported architecture"
#endif
|
I like it. I let libc people chime in. |
Definitely wait for @gchatelet to approve, but it's theoretically fine. You need to be careful though, because often the inline memory functions just call the libc memory functions. If the GPUs have their own special builtins this is fine. |
Do we need a test? |
This should be covered by the existing test suite. But this actually reminds me that we don't run all of the memory function tests. I'll fix that in a separate patch. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Patch LGTM with the following comments:
- I don't see
inline_memcmp.h
but it should be modified as well - As-is this will probably fail to compile as we define a config wise
-ffreestanding
that in turn triggers-fno-builtins
. So you probably need to tweak the CMake configuration as well. Maybe we can just revert the-ffreestanding
for the GPU mem targets? Something likeCOMPILE_OPTIONS -fno-freestanding -fno-builtins -fbuiltin=memcpy
for the GPU version ofmemcpy
and so on and so forth.
I actually tried that, but it seemed to cause some bizarre error on AMDGPU. Was planning on leaving it unchanged until I filed a bug and figured out why it crashed.
|
Ok so maybe remove
Ha indeed! |
Addressed comments, tests are properly enabled now in #68111. |
Also it turns out that NVPTX has an issue with the |
Summary: The GPU build is special in the sense that we always know that up-to-date `clang` is always going to be the compiler. This allows us to rely directly on builtins, which allow us to push a lot of this complexity into the backend. Backend implementations are favored on the GPU because it allows us to do a lot more target specific optimizations. This patch changes over the common memory functions to use builtin versions when building for AMDGPU or NVPTX.
Summary:
The GPU build is special in the sense that we always know that
up-to-date
clang
is always going to be the compiler. This allows us torely directly on builtins, which allow us to push a lot of this
complexity into the backend. Backend implementations are favored on
the GPU because it allows us to do a lot more target specific
optimizations. This patch changes over the common memory functions to
use builtin versions when building for AMDGPU or NVPTX.