-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Libomptarget] Make the DeviceRTL configuration globals weak #68220
Conversation
@llvm/pr-subscribers-clang Changes
Full diff: https://github.com/llvm/llvm-project/pull/68220.diff 4 Files Affected:
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 632e37e3cac8fec..f95b0f8cb317c75 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -595,6 +595,7 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
SmallVector<OffloadFile, 4> BitcodeInputFiles;
+ DenseSet<StringRef> StrongResolutions;
DenseSet<StringRef> UsedInRegularObj;
DenseSet<StringRef> UsedInSharedLib;
BumpPtrAllocator Alloc;
@@ -608,6 +609,18 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
file_magic Type = identify_magic(Buffer.getBuffer());
switch (Type) {
case file_magic::bitcode: {
+ Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer);
+ if (!IRSymtabOrErr)
+ return IRSymtabOrErr.takeError();
+
+ // Check for any strong resolutions we need to preserve.
+ for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
+ for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
+ if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() &&
+ !Sym.isUndefined())
+ StrongResolutions.insert(Saver.save(Sym.Name));
+ }
+ }
BitcodeInputFiles.emplace_back(std::move(File));
continue;
}
@@ -696,6 +709,7 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
// it is undefined or another definition has already been used.
Res.Prevailing =
!Sym.isUndefined() &&
+ !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) &&
PrevailingSymbols.insert(Saver.save(Sym.getName())).second;
// We need LTO to preseve the following global symbols:
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index 5deee9c53926e77..809c5f03886b048 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -20,10 +20,10 @@ using namespace ompx;
#pragma omp begin declare target device_type(nohost)
-// defined by CGOpenMPRuntimeGPU
-extern uint32_t __omp_rtl_debug_kind;
-extern uint32_t __omp_rtl_assume_no_thread_state;
-extern uint32_t __omp_rtl_assume_no_nested_parallelism;
+// Weak definitions will be overridden by CGOpenmpRuntimeGPU if enabled.
+[[gnu::weak]] extern const uint32_t __omp_rtl_debug_kind = 0;
+[[gnu::weak]] extern const uint32_t __omp_rtl_assume_no_thread_state = 0;
+[[gnu::weak]] extern const uint32_t __omp_rtl_assume_no_nested_parallelism = 0;
// This variable should be visibile to the plugin so we override the default
// hidden visibility.
diff --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports
index 2d13195aa7dc87c..fbcda3ce8f555ca 100644
--- a/openmp/libomptarget/DeviceRTL/src/exports
+++ b/openmp/libomptarget/DeviceRTL/src/exports
@@ -3,6 +3,10 @@ ompx_*
*llvm_*
__kmpc_*
+__omp_rtl_debug_kind
+__omp_rtl_assume_no_thread_state
+__omp_rtl_assume_no_nested_parallelism
+
_ZN4ompx*
IsSPMDMode
diff --git a/openmp/libomptarget/test/offloading/weak.c b/openmp/libomptarget/test/offloading/weak.c
new file mode 100644
index 000000000000000..ca81db958356b2e
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/weak.c
@@ -0,0 +1,33 @@
+// RUN: %libomptarget-compile-generic -DA -c -o %t-a.o
+// RUN: %libomptarget-compile-generic -DB -c -o %t-b.o
+// RUN: %libomptarget-compile-generic %t-a.o %t-b.o && \
+// RUN: %libomptarget-run-generic | %fcheck-generic
+
+#if defined(A)
+__attribute__((weak)) int x = 999;
+#pragma omp declare target to(x)
+#elif defined(B)
+int x = 42;
+#pragma omp declare target to(x)
+__attribute__((weak)) int y = 42;
+#pragma omp declare target to(y)
+#else
+
+#include <stdio.h>
+
+extern int x;
+#pragma omp declare target to(x)
+extern int y;
+#pragma omp declare target to(y)
+
+int main() {
+ x = 0;
+
+#pragma omp target update from(x)
+#pragma omp target update from(y)
+
+ // CHECK: PASS
+ if (x == 42 && y == 42)
+ printf("PASS\n");
+}
+#endif
|
Summary: This patch applies weak linkage to the config globals by the name `__omp_rtl...`. This is because when passing `-nogpulib` we will not link in or create these globals. This allows the OpenMP device RTL to be self contained without requiring the additional definitions from the `clang` compiler. In the standard case, this should not affect the current behavior, this is because the strong defintiion coming from the compiler should always override the weak definition we default to here. In the case that these are not defined by the compiler, these will remain weak. This will impact optimizations somewhat, but the previous behaviour was that it would not link so that is an improvement. Depends on: llvm#68215
Summary: We have tests that depend on two static libraries `libomptarget.devicertl.a` and `libcgpu.a`. These are currently implicitly picked up and searched through the standard path. This patch changes that to pass `-nogpulib` to disable implicit runtime path searches. We then explicitly passed the built libraries to the compilations so that we know exactly which libraries are being used. Depends on: llvm#68220
extern uint32_t __omp_rtl_debug_kind; | ||
extern uint32_t __omp_rtl_assume_no_thread_state; | ||
extern uint32_t __omp_rtl_assume_no_nested_parallelism; | ||
// Weak definitions will be overridden by CGOpenmpRuntimeGPU if enabled. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable names look sufficient for documentation. It's a bit oblique to have the compiler inject values for these but the behaviour is pretty obvious.
The defaults look legitimate. It's only an optimisation barrier if nothing specifies the value, the compiler or whatever defining a symbol with 0 or 1 will override this. And that path only exists when people are doing debugging things.
Hopefully the compiler tags them static. Shall we go with explicit protected visibility? We never want them to show up in the code object symbol table, whether default or not, as a language runtime deciding to write to them is bad.
…68225) Summary: We have tests that depend on two static libraries `libomptarget.devicertl.a` and `libcgpu.a`. These are currently implicitly picked up and searched through the standard path. This patch changes that to pass `-nogpulib` to disable implicit runtime path searches. We then explicitly passed the built libraries to the compilations so that we know exactly which libraries are being used. Depends on: #68220 Fixes #68141
) This patch applies weak linkage to the config globals by the name `__omp_rtl...`. This is because when passing `-nogpulib` we will not link in or create these globals. This allows the OpenMP device RTL to be self contained without requiring the additional definitions from the `clang` compiler. In the standard case, this should not affect the current behavior, this is because the strong definition coming from the compiler should always override the weak definition we default to here. In the case that these are not defined by the compiler, these will remain weak. This will impact optimizations somewhat, but the previous behavior was that it would not link so that is an improvement. Depends on: llvm#68215 Change-Id: I070aa3f58317347ecf7f35b947288709863c107f
This patch applies weak linkage to the config globals by the name
__omp_rtl...
. This is because when passing-nogpulib
we will not link in or create these globals. This allows the OpenMP device RTL to be self contained without requiring the additional definitions from theclang
compiler. In the standard case, this should not affect the current behavior, this is because the strong definition coming from the compiler should always override the weak definition we default to here. In the case that these are not defined by the compiler, these will remain weak. This will impact optimizations somewhat, but the previous behavior was that it would not link so that is an improvement.Depends on: #68215