llvm · jhuber6 · Oct 19, 2023 · Sep 12, 2023 · jdoerfert · Sep 25, 2023
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
@@ -63,6 +63,8 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.lldiv
     libc.src.stdlib.qsort
     libc.src.stdlib.qsort_r
+    libc.src.stdlib.rand
+    libc.src.stdlib.srand
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol

diff --git a/libc/src/stdlib/rand.cpp b/libc/src/stdlib/rand.cpp
@@ -15,10 +15,12 @@ namespace LIBC_NAMESPACE {
 // An implementation of the xorshift64star pseudo random number generator. This
 // is a good general purpose generator for most non-cryptographics applications.
 LLVM_LIBC_FUNCTION(int, rand, (void)) {
-  rand_next ^= rand_next >> 12;
-  rand_next ^= rand_next << 25;
-  rand_next ^= rand_next >> 27;
-  return static_cast<int>((rand_next * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
+  unsigned long x = rand_next;
+  x ^= x >> 12;
+  x ^= x << 25;
+  x ^= x >> 27;
+  rand_next = x;
+  return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/rand_util.cpp b/libc/src/stdlib/rand_util.cpp
@@ -11,8 +11,14 @@
 
 namespace LIBC_NAMESPACE {
 
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
+// a standard compliant default value.
+ThreadLocal<unsigned long> rand_next;
+#else
 // C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
 // the 'srand' function was called with a value of '1'.
 LIBC_THREAD_LOCAL unsigned long rand_next = 1;
+#endif
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/rand_util.h b/libc/src/stdlib/rand_util.h
@@ -9,11 +9,33 @@
 #ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 #define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 
+#include "src/__support/GPU/utils.h"
 #include "src/__support/macros/attributes.h"
 
 namespace LIBC_NAMESPACE {
 
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+// Implement thread local storage on the GPU using local memory. Each thread
+// gets its slot in the local memory array and is private to the group.
+// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
+// inefficient and incomplete stand-in until that is done.
+template <typename T> class ThreadLocal {
+private:
+  static constexpr long MAX_THREADS = 1024;
+  [[clang::loader_uninitialized]] static inline gpu::Local<T>
+      storage[MAX_THREADS];
+
+public:
+  LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
+  LIBC_INLINE void operator=(const T &value) {
+    storage[gpu::get_thread_id()] = value;
+  }
+};
+
+extern ThreadLocal<unsigned long> rand_next;
+#else
 extern LIBC_THREAD_LOCAL unsigned long rand_next;
+#endif
 
 } // namespace LIBC_NAMESPACE
 

diff --git a/libc/test/src/stdlib/rand_test.cpp b/libc/test/src/stdlib/rand_test.cpp
@@ -23,12 +23,15 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
     vals[i] = val;
   }
 
+  // FIXME: The GPU implementation cannot initialize the seed correctly.
+#ifndef LIBC_TARGET_ARCH_IS_GPU
   // The C standard specifies that if 'srand' is never called it should behave
   // as if 'srand' was called with a value of 1. If we seed the value with 1 we
   // should get the same sequence as the unseeded version.
   LIBC_NAMESPACE::srand(1);
   for (size_t i = 0; i < 1000; ++i)
     ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
+#endif
 }
 
 TEST(LlvmLibcRandTest, SetSeed) {