From ddc30ff802eb135934fc7b785d33c05217ab9e39 Mon Sep 17 00:00:00 2001
From: Joseph Huber <35342157+jhuber6@users.noreply.github.com>
Date: Tue, 17 Oct 2023 14:02:31 -0400
Subject: [PATCH 01/15] [libc] Implement the 'ungetc' function on the GPU
 (#69248)

Summary:
This function follows closely with the pattern of all the other
functions. That is, making a new opcode and forwarding the call to the
host. However, this also required modifying the test somewhat. It seems
that not all `libc` implementations follow the same error rules as are
tested here, and it is not explicit in the standard, so we simply
disable these EOF checks when targeting the GPU.
---
 libc/config/gpu/entrypoints.txt              |  1 +
 libc/docs/gpu/support.rst                    |  1 +
 libc/include/llvm-libc-types/rpc_opcodes_t.h |  1 +
 libc/src/stdio/CMakeLists.txt                | 13 +--------
 libc/src/stdio/generic/CMakeLists.txt        | 12 ++++++++
 libc/src/stdio/{ => generic}/ungetc.cpp      |  0
 libc/src/stdio/gpu/CMakeLists.txt            | 11 ++++++++
 libc/src/stdio/gpu/ungetc.cpp                | 29 ++++++++++++++++++++
 libc/test/src/stdio/ungetc_test.cpp          |  8 ++++++
 libc/utils/gpu/server/rpc_server.cpp         |  7 +++++
 10 files changed, 71 insertions(+), 12 deletions(-)
 rename libc/src/stdio/{ => generic}/ungetc.cpp (100%)
 create mode 100644 libc/src/stdio/gpu/ungetc.cpp

diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index ad68216a76b942..731508088cb6f8 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -104,6 +104,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.fgetc
     libc.src.stdio.getc
     libc.src.stdio.getchar
+    libc.src.stdio.ungetc
     libc.src.stdio.stdin
     libc.src.stdio.stdout
     libc.src.stdio.stderr
diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst
index fd27273ed562e4..806af5f219dfb4 100644
--- a/libc/docs/gpu/support.rst
+++ b/libc/docs/gpu/support.rst
@@ -134,6 +134,7 @@ ftell          |check|    |check|
 fflush         |check|    |check|
 fgetc          |check|    |check|
 fgets          |check|    |check|
+ungetc         |check|    |check|
 getc           |check|    |check|
 getchar        |check|    |check|
 puts           |check|    |check|
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 61e17756fa6477..2fd318f06a7db1 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -29,6 +29,7 @@ typedef enum {
   RPC_FSEEK,
   RPC_FTELL,
   RPC_FFLUSH,
+  RPC_UNGETC,
   RPC_LAST = 0xFFFF,
 } rpc_opcode_t;
 
diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index 169bc592dee488..380474ce271180 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -54,18 +54,6 @@ add_entrypoint_object(
     libc.src.__support.File.platform_file
 )
 
-add_entrypoint_object(
-  ungetc
-  SRCS
-    ungetc.cpp
-  HDRS
-    ungetc.h
-  DEPENDS
-    libc.include.stdio
-    libc.src.__support.File.file
-    libc.src.__support.File.platform_file
-)
-
 add_entrypoint_object(
   fopencookie
   SRCS
@@ -286,6 +274,7 @@ add_stdio_entrypoint_object(getc_unlocked)
 add_stdio_entrypoint_object(getchar)
 add_stdio_entrypoint_object(getchar_unlocked)
 add_stdio_entrypoint_object(fgets)
+add_stdio_entrypoint_object(ungetc)
 add_stdio_entrypoint_object(stdin)
 add_stdio_entrypoint_object(stdout)
 add_stdio_entrypoint_object(stderr)
diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt
index 282d056bba7129..2ecef879eb4bbf 100644
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -342,6 +342,18 @@ add_entrypoint_object(
     libc.src.__support.File.platform_file
 )
 
+add_entrypoint_object(
+  ungetc
+  SRCS
+    ungetc.cpp
+  HDRS
+    ../ungetc.h
+  DEPENDS
+    libc.include.stdio
+    libc.src.__support.File.file
+    libc.src.__support.File.platform_file
+)
+
 add_entrypoint_object(
   stdin
   SRCS
diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/generic/ungetc.cpp
similarity index 100%
rename from libc/src/stdio/ungetc.cpp
rename to libc/src/stdio/generic/ungetc.cpp
diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt
index 047b68931bce5c..1b1e2a903cc0b9 100644
--- a/libc/src/stdio/gpu/CMakeLists.txt
+++ b/libc/src/stdio/gpu/CMakeLists.txt
@@ -251,6 +251,17 @@ add_entrypoint_object(
     .ferror
 )
 
+add_entrypoint_object(
+  ungetc
+  SRCS
+    ungetc.cpp
+  HDRS
+    ../ungetc.h
+  DEPENDS
+    libc.include.stdio
+    .gpu_file
+)
+
 add_entrypoint_object(
   stdin
   SRCS
diff --git a/libc/src/stdio/gpu/ungetc.cpp b/libc/src/stdio/gpu/ungetc.cpp
new file mode 100644
index 00000000000000..373164a0c53a32
--- /dev/null
+++ b/libc/src/stdio/gpu/ungetc.cpp
@@ -0,0 +1,29 @@
+//===-- Implementation of ungetc ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/ungetc.h"
+#include "file.h"
+
+#include <stdio.h>
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) {
+  int ret;
+  rpc::Client::Port port = rpc::client.open<RPC_UNGETC>();
+  port.send_and_recv(
+      [=](rpc::Buffer *buffer) {
+        buffer->data[0] = c;
+        buffer->data[1] = file::from_stream(stream);
+      },
+      [&](rpc::Buffer *buffer) { ret = static_cast<int>(buffer->data[0]); });
+  port.close();
+  return ret;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp
index 75eecc87ef265f..c98995ff0811bb 100644
--- a/libc/test/src/stdio/ungetc_test.cpp
+++ b/libc/test/src/stdio/ungetc_test.cpp
@@ -24,12 +24,16 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) {
   constexpr size_t CONTENT_SIZE = sizeof(CONTENT);
   ASSERT_EQ(CONTENT_SIZE,
             LIBC_NAMESPACE::fwrite(CONTENT, 1, CONTENT_SIZE, file));
+#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations.
   // Cannot unget to an un-readable file.
   ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file));
+#endif
   ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file));
 
   file = LIBC_NAMESPACE::fopen(FILENAME, "r+");
   ASSERT_FALSE(file == nullptr);
+  // Calling with an EOF should always return EOF without doing anything.
+  ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(EOF, file));
   char c;
   ASSERT_EQ(LIBC_NAMESPACE::fread(&c, 1, 1, file), size_t(1));
   ASSERT_EQ(c, CONTENT[0]);
@@ -43,8 +47,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) {
   // ungetc should not fail after a seek operation.
   int unget_char = 'z';
   ASSERT_EQ(unget_char, LIBC_NAMESPACE::ungetc(unget_char, file));
+#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations.
   // Another unget should fail.
   ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(unget_char, file));
+#endif
   // ungetting a char at the beginning of the file will allow us to fetch
   // one additional character.
   char new_data[CONTENT_SIZE + 1];
@@ -53,8 +59,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) {
   ASSERT_STREQ("zabcdef", new_data);
 
   ASSERT_EQ(size_t(1), LIBC_NAMESPACE::fwrite("x", 1, 1, file));
+#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations.
   // unget should fail after a write operation.
   ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file));
+#endif
 
   ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file));
 }
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 1c1c9f1ae9e6b5..0550115f7cd1a1 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -186,6 +186,13 @@ struct Server {
       });
       break;
     }
+    case RPC_UNGETC: {
+      port->recv_and_send([](rpc::Buffer *buffer) {
+        buffer->data[0] = ungetc(static_cast<int>(buffer->data[0]),
+                                 file::to_stream(buffer->data[1]));
+      });
+      break;
+    }
     case RPC_NOOP: {
       port->recv([](rpc::Buffer *) {});
       break;

From b33723710f5194080e8bfab9f21c8445647c976b Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Tue, 17 Oct 2023 11:06:11 -0700
Subject: [PATCH 02/15] [NVPTX] Fixed few more corner cases for v4i8 lowering.
 (#69263)

Fixes https://github.com/llvm/llvm-project/issues/69124
---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  22 ++-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |   6 +
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td       |   3 +
 llvm/test/CodeGen/NVPTX/f16x2-instructions.ll |   6 +-
 llvm/test/CodeGen/NVPTX/i8x4-instructions.ll  | 154 ++++++++++++++++++
 llvm/test/CodeGen/NVPTX/param-load-store.ll   |   5 -
 6 files changed, 180 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 36da2e7b40efaa..a935c0e16a5523 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -504,13 +504,21 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   // Only logical ops can be done on v4i8 directly, others must be done
   // elementwise.
   setOperationAction(
-      {ISD::ADD,       ISD::MUL,        ISD::ABS,        ISD::SMIN,
-       ISD::SMAX,      ISD::UMIN,       ISD::UMAX,       ISD::CTPOP,
-       ISD::CTLZ,      ISD::ADD,        ISD::SUB,        ISD::MUL,
-       ISD::SHL,       ISD::SREM,       ISD::UREM,       ISD::SDIV,
-       ISD::UDIV,      ISD::SRA,        ISD::SRL,        ISD::MULHS,
-       ISD::MULHU,     ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP,
-       ISD::UINT_TO_FP},
+      {ISD::ABS,         ISD::ADD,        ISD::ADDC,        ISD::ADDE,
+       ISD::BITREVERSE,  ISD::CTLZ,       ISD::CTPOP,       ISD::CTTZ,
+       ISD::FP_TO_SINT,  ISD::FP_TO_UINT, ISD::FSHL,        ISD::FSHR,
+       ISD::MUL,         ISD::MULHS,      ISD::MULHU,       ISD::PARITY,
+       ISD::ROTL,        ISD::ROTR,       ISD::SADDO,       ISD::SADDO_CARRY,
+       ISD::SADDSAT,     ISD::SDIV,       ISD::SDIVREM,     ISD::SELECT_CC,
+       ISD::SETCC,       ISD::SHL,        ISD::SINT_TO_FP,  ISD::SMAX,
+       ISD::SMIN,        ISD::SMULO,      ISD::SMUL_LOHI,   ISD::SRA,
+       ISD::SREM,        ISD::SRL,        ISD::SSHLSAT,     ISD::SSUBO,
+       ISD::SSUBO_CARRY, ISD::SSUBSAT,    ISD::SUB,         ISD::SUBC,
+       ISD::SUBE,        ISD::UADDO,      ISD::UADDO_CARRY, ISD::UADDSAT,
+       ISD::UDIV,        ISD::UDIVREM,    ISD::UINT_TO_FP,  ISD::UMAX,
+       ISD::UMIN,        ISD::UMULO,      ISD::UMUL_LOHI,   ISD::UREM,
+       ISD::USHLSAT,     ISD::USUBO,      ISD::USUBO_CARRY, ISD::VSELECT,
+       ISD::USUBSAT},
       MVT::v4i8, Expand);
 
   // Operations not directly supported by NVPTX.
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 5c7c10965e2f2c..f6932db2aeb0b9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -586,6 +586,12 @@ class NVPTXTargetLowering : public TargetLowering {
   AtomicExpansionKind
   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
+  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
+    // There's rarely any point of packing something into a vector type if we
+    // already have the source data.
+    return true;
+  }
+
 private:
   const NVPTXSubtarget &STI; // cache the subtarget here
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 84ed953ad18a9b..b0b96b94a12575 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3485,6 +3485,9 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
 def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))),
           (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
 
+def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))),
+         (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
+
 // Count leading zeros
 let hasSideEffects = false in {
   def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 18788c776ffbd7..464b3a754804fe 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -1319,10 +1319,8 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
 ; CHECK-DAG:  and.b16         [[BX1:%rs[0-9]+]], [[B1]], -32768;
 ; CHECK-DAG:  or.b16          [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
 ; CHECK-DAG:  or.b16          [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG:  mov.b32         [[R:%r[0-9]+]], {[[R0]], [[R1]]}
-; CHECK:      mov.b32         {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]]
-; CHECK-DAG:  cvt.f32.f16     [[XR0:%f[0-9]+]], [[RX0]];
-; CHECK-DAG:  cvt.f32.f16     [[XR1:%f[0-9]+]], [[RX1]];
+; CHECK-DAG:  cvt.f32.f16     [[XR0:%f[0-9]+]], [[R0]];
+; CHECK-DAG:  cvt.f32.f16     [[XR1:%f[0-9]+]], [[R1]];
 ; CHECK:      st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
 ; CHECK:      ret;
 define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index fd48313ad68484..ddad374a4dc119 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -1269,4 +1269,158 @@ define <4 x i8> @test_fptoui_2xhalf_to_2xi8(<4 x half> %a) #0 {
   ret <4 x i8> %r
 }
 
+define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: test_srem_v4i8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<13>;
+; CHECK-NEXT:    .reg .b32 %r<18>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    ld.param.u64 %rd3, [test_srem_v4i8_param_2];
+; CHECK-NEXT:    ld.param.u64 %rd2, [test_srem_v4i8_param_1];
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_srem_v4i8_param_0];
+; CHECK-NEXT:    ld.u32 %r1, [%rd1];
+; CHECK-NEXT:    ld.u32 %r2, [%rd2];
+; CHECK-NEXT:    bfe.s32 %r3, %r2, 0, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs1, %r3;
+; CHECK-NEXT:    bfe.s32 %r4, %r1, 0, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs2, %r4;
+; CHECK-NEXT:    rem.s16 %rs3, %rs2, %rs1;
+; CHECK-NEXT:    cvt.u32.u16 %r5, %rs3;
+; CHECK-NEXT:    bfe.s32 %r6, %r2, 8, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs4, %r6;
+; CHECK-NEXT:    bfe.s32 %r7, %r1, 8, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs5, %r7;
+; CHECK-NEXT:    rem.s16 %rs6, %rs5, %rs4;
+; CHECK-NEXT:    cvt.u32.u16 %r8, %rs6;
+; CHECK-NEXT:    bfi.b32 %r9, %r8, %r5, 8, 8;
+; CHECK-NEXT:    bfe.s32 %r10, %r2, 16, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs7, %r10;
+; CHECK-NEXT:    bfe.s32 %r11, %r1, 16, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs8, %r11;
+; CHECK-NEXT:    rem.s16 %rs9, %rs8, %rs7;
+; CHECK-NEXT:    cvt.u32.u16 %r12, %rs9;
+; CHECK-NEXT:    bfi.b32 %r13, %r12, %r9, 16, 8;
+; CHECK-NEXT:    bfe.s32 %r14, %r2, 24, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs10, %r14;
+; CHECK-NEXT:    bfe.s32 %r15, %r1, 24, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs11, %r15;
+; CHECK-NEXT:    rem.s16 %rs12, %rs11, %rs10;
+; CHECK-NEXT:    cvt.u32.u16 %r16, %rs12;
+; CHECK-NEXT:    bfi.b32 %r17, %r16, %r13, 24, 8;
+; CHECK-NEXT:    st.u32 [%rd3], %r17;
+; CHECK-NEXT:    ret;
+entry:
+  %t57 = load <4 x i8>, ptr %a, align 4
+  %t59 = load <4 x i8>, ptr %b, align 4
+  %x = srem <4 x i8> %t57, %t59
+  store <4 x i8> %x, ptr %c, align 4
+  ret void
+}
+
+;; v3i8 lowering, especially for unaligned loads is terrible. We end up doing
+;; tons of pointless scalar_to_vector/bitcast/extract_elt on v2i16/v4i8, which
+;; is further complicated by LLVM trying to use i16 as an intermediate type,
+;; because we don't have i8 registers. It's a mess.
+;; Ideally we want to split it into element-wise ops, but legalizer can't handle
+;; odd-sized vectors.  TL;DR; don't use odd-sized vectors of v8.
+define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: test_srem_v3i8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<20>;
+; CHECK-NEXT:    .reg .b32 %r<16>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    ld.param.u64 %rd3, [test_srem_v3i8_param_2];
+; CHECK-NEXT:    ld.param.u64 %rd2, [test_srem_v3i8_param_1];
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_srem_v3i8_param_0];
+; CHECK-NEXT:    ld.u8 %rs1, [%rd1];
+; CHECK-NEXT:    ld.u8 %rs2, [%rd1+1];
+; CHECK-NEXT:    shl.b16 %rs3, %rs2, 8;
+; CHECK-NEXT:    or.b16 %rs4, %rs3, %rs1;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs4;
+; CHECK-NEXT:    ld.s8 %rs5, [%rd1+2];
+; CHECK-NEXT:    ld.u8 %rs6, [%rd2];
+; CHECK-NEXT:    ld.u8 %rs7, [%rd2+1];
+; CHECK-NEXT:    shl.b16 %rs8, %rs7, 8;
+; CHECK-NEXT:    or.b16 %rs9, %rs8, %rs6;
+; CHECK-NEXT:    cvt.u32.u16 %r3, %rs9;
+; CHECK-NEXT:    ld.s8 %rs10, [%rd2+2];
+; CHECK-NEXT:    bfe.s32 %r5, %r3, 0, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs11, %r5;
+; CHECK-NEXT:    bfe.s32 %r6, %r1, 0, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs12, %r6;
+; CHECK-NEXT:    rem.s16 %rs13, %rs12, %rs11;
+; CHECK-NEXT:    cvt.u32.u16 %r7, %rs13;
+; CHECK-NEXT:    bfe.s32 %r8, %r3, 8, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs14, %r8;
+; CHECK-NEXT:    bfe.s32 %r9, %r1, 8, 8;
+; CHECK-NEXT:    cvt.s8.s32 %rs15, %r9;
+; CHECK-NEXT:    rem.s16 %rs16, %rs15, %rs14;
+; CHECK-NEXT:    cvt.u32.u16 %r10, %rs16;
+; CHECK-NEXT:    bfi.b32 %r11, %r10, %r7, 8, 8;
+; CHECK-NEXT:    // implicit-def: %r13
+; CHECK-NEXT:    bfi.b32 %r12, %r13, %r11, 16, 8;
+; CHECK-NEXT:    // implicit-def: %r15
+; CHECK-NEXT:    bfi.b32 %r14, %r15, %r12, 24, 8;
+; CHECK-NEXT:    rem.s16 %rs17, %rs5, %rs10;
+; CHECK-NEXT:    cvt.u16.u32 %rs18, %r14;
+; CHECK-NEXT:    st.u8 [%rd3], %rs18;
+; CHECK-NEXT:    shr.u16 %rs19, %rs18, 8;
+; CHECK-NEXT:    st.u8 [%rd3+1], %rs19;
+; CHECK-NEXT:    st.u8 [%rd3+2], %rs17;
+; CHECK-NEXT:    ret;
+entry:
+  %t57 = load <3 x i8>, ptr %a, align 1
+  %t59 = load <3 x i8>, ptr %b, align 1
+  %x = srem <3 x i8> %t57, %t59
+  store <3 x i8> %x, ptr %c, align 1
+  ret void
+}
+
+define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: test_sext_v4i1_to_v4i8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<5>;
+; CHECK-NEXT:    .reg .b32 %r<18>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    ld.param.u64 %rd3, [test_sext_v4i1_to_v4i8_param_2];
+; CHECK-NEXT:    ld.param.u64 %rd2, [test_sext_v4i1_to_v4i8_param_1];
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_sext_v4i1_to_v4i8_param_0];
+; CHECK-NEXT:    ld.u32 %r1, [%rd1];
+; CHECK-NEXT:    ld.u32 %r2, [%rd2];
+; CHECK-NEXT:    bfe.s32 %r3, %r2, 24, 8;
+; CHECK-NEXT:    bfe.s32 %r4, %r1, 24, 8;
+; CHECK-NEXT:    setp.hi.u32 %p1, %r4, %r3;
+; CHECK-NEXT:    bfe.s32 %r5, %r2, 16, 8;
+; CHECK-NEXT:    bfe.s32 %r6, %r1, 16, 8;
+; CHECK-NEXT:    setp.hi.u32 %p2, %r6, %r5;
+; CHECK-NEXT:    bfe.s32 %r7, %r2, 8, 8;
+; CHECK-NEXT:    bfe.s32 %r8, %r1, 8, 8;
+; CHECK-NEXT:    setp.hi.u32 %p3, %r8, %r7;
+; CHECK-NEXT:    bfe.s32 %r9, %r2, 0, 8;
+; CHECK-NEXT:    bfe.s32 %r10, %r1, 0, 8;
+; CHECK-NEXT:    setp.hi.u32 %p4, %r10, %r9;
+; CHECK-NEXT:    selp.s32 %r11, -1, 0, %p4;
+; CHECK-NEXT:    selp.s32 %r12, -1, 0, %p3;
+; CHECK-NEXT:    bfi.b32 %r13, %r12, %r11, 8, 8;
+; CHECK-NEXT:    selp.s32 %r14, -1, 0, %p2;
+; CHECK-NEXT:    bfi.b32 %r15, %r14, %r13, 16, 8;
+; CHECK-NEXT:    selp.s32 %r16, -1, 0, %p1;
+; CHECK-NEXT:    bfi.b32 %r17, %r16, %r15, 24, 8;
+; CHECK-NEXT:    st.u32 [%rd3], %r17;
+; CHECK-NEXT:    ret;
+entry:
+  %t1 = load <4 x i8>, ptr %a, align 4
+  %t2 = load <4 x i8>, ptr %b, align 4
+  %t5 = icmp ugt <4 x i8> %t1, %t2
+  %t6 = sext <4 x i1> %t5 to <4 x i8>
+  store <4 x i8> %t6, ptr %c, align 4
+  ret void
+}
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index b4208c691c91df..c14dc88431d316 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -364,10 +364,6 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
 ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
 ; CHECK-DAG:  ld.param.u16    [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
 ; CHECK-DAG:  ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
-; CHECK-DAG:	mov.b32 	[[R0:%r[0-9]+]], {[[E0]], [[E1]]};
-; CHECK-DAG:	mov.b32 	{[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R0]];
-; CHECK-DAG:	mov.b32 	[[R1:%r[0-9]+]], {[[E2]], [[E3]]};
-; CHECK-DAG:	mov.b32 	{[[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [[R1]];
 ; CHECK:      .param .align 16 .b8 param0[16];
 ; CHECK-DAG:  st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
@@ -496,7 +492,6 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: test_v5f16(
 ; CHECK:      .param .align 16 .b8 test_v5f16_param_0[16]
 ; CHECK-DAG:  ld.param.v4.b16  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
-; CHECK-DAG:  mov.b32         {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
 ; CHECK-DAG:  ld.param.b16    [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
 ; CHECK:      .param .align 16 .b8 param0[16];
 ; CHECK-DAG:  st.param.v4.b16 [param0+0],

From dd64c82cbc9c69924b5c6df059e5b065fa29d185 Mon Sep 17 00:00:00 2001
From: Haowei <haowei@google.com>
Date: Tue, 17 Oct 2023 11:15:46 -0700
Subject: [PATCH 03/15] [unittest] Allow LLVM unit test to run under a wrapper
 program. (#66821)

This patch add CMake option "LLVM_GTEST_RUN_UNDER" to LLVM unittest
configuration. When specified, LLVM unittest will be run under the
wrapper program specified by this option. This feature can simplify the
setup to run LLVM unittest on a target platform that is different than
host.
---
 llvm/CMakeLists.txt               | 3 +++
 llvm/test/Unit/lit.cfg.py         | 6 +++++-
 llvm/test/Unit/lit.site.cfg.py.in | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index ef2f2146a03644..82d4beea91e346 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -1219,6 +1219,9 @@ if( LLVM_INCLUDE_EXAMPLES )
 endif()
 
 if( LLVM_INCLUDE_TESTS )
+  set(LLVM_GTEST_RUN_UNDER
+    "" CACHE STRING
+    "Define the wrapper program that LLVM unit tests should be run under.")
   if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang)
     include(LLVMExternalProjectUtils)
     llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite
diff --git a/llvm/test/Unit/lit.cfg.py b/llvm/test/Unit/lit.cfg.py
index f15c30dbcdb0aa..61296d7ea0032e 100644
--- a/llvm/test/Unit/lit.cfg.py
+++ b/llvm/test/Unit/lit.cfg.py
@@ -19,7 +19,11 @@
 config.test_source_root = config.test_exec_root
 
 # testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, "Tests")
+config.test_format = lit.formats.GoogleTest(
+    config.llvm_build_mode,
+    "Tests",
+    run_under=config.gtest_run_under,
+)
 
 # Propagate the temp directory. Windows requires this because it uses \Windows\
 # if none of these are present.
diff --git a/llvm/test/Unit/lit.site.cfg.py.in b/llvm/test/Unit/lit.site.cfg.py.in
index 1d7d7658014949..3536a34f796a28 100644
--- a/llvm/test/Unit/lit.site.cfg.py.in
+++ b/llvm/test/Unit/lit.site.cfg.py.in
@@ -7,6 +7,7 @@ config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@")
 config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@"))
 config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@")
 config.shlibdir = lit_config.substitute(path(r"@SHLIBDIR@"))
+config.gtest_run_under = lit_config.substitute(r"@LLVM_GTEST_RUN_UNDER@")
 
 # Let the main config do the real work.
 lit_config.load_config(

From fd311126349b8fe1684d62154a9fa5a7bbb0b713 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 17 Oct 2023 19:17:40 +0100
Subject: [PATCH 04/15] [VPlan] Insert Trunc/Exts for reductions directly in
 VPlan.

Update the code to create Trunc/Ext recipes directly in
adjustRecipesForReductions instead of fixing it up later in
fixReductions.

This explicitly models the required conversions and also makes sure they
are generated at the right place (instead of after the exit condition),
hence the changes in a few tests.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 67 ++++++++++---------
 .../epilog-vectorization-reductions.ll        |  8 +--
 .../LoopVectorize/reduction-small-size.ll     |  8 +--
 .../scalable-reduction-inloop.ll              |  8 +--
 4 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa435b0d47aa59..14c5c0d18a4db6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3792,8 +3792,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
     State.setDebugLocFrom(I->getDebugLoc());
 
   VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
-  // This is the vector-clone of the value that leaves the loop.
-  Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
 
   // Before each round, move the insertion point right between
   // the PHIs and the values we are going to write.
@@ -3805,10 +3803,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
   State.setDebugLocFrom(LoopExitInst->getDebugLoc());
 
   Type *PhiTy = OrigPhi->getType();
-
-  VPBasicBlock *LatchVPBB =
-      PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock();
-  BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB];
   // If tail is folded by masking, the vector value to leave the loop should be
   // a Select choosing between the vectorized LoopExitInst and vectorized Phi,
   // instead of the former. For an inloop reduction the reduction will already
@@ -3834,23 +3828,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
   // then extend the loop exit value to enable InstCombine to evaluate the
   // entire expression in the smaller type.
   if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
-    assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
-    Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
-    Builder.SetInsertPoint(VectorLoopLatch->getTerminator());
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
-      Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
-                                        : Builder.CreateZExt(Trunc, VecTy);
-      for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users()))
-        if (U != Trunc) {
-          U->replaceUsesOfWith(RdxParts[Part], Extnd);
-          RdxParts[Part] = Extnd;
-        }
-    }
     Builder.SetInsertPoint(LoopMiddleBlock,
                            LoopMiddleBlock->getFirstInsertionPt());
-    for (unsigned Part = 0; Part < UF; ++Part)
+    Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+    for (unsigned Part = 0; Part < UF; ++Part) {
       RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
+    }
   }
 
   // Reduce all of the unrolled parts into a single vector.
@@ -9155,18 +9138,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       PreviousLink = RedRecipe;
     }
   }
-
-  // If tail is folded by masking, introduce selects between the phi
-  // and the live-out instruction of each reduction, at the beginning of the
-  // dedicated latch block.
-  if (CM.foldTailByMasking()) {
     Builder.setInsertPoint(&*LatchVPBB->begin());
     for (VPRecipeBase &R :
          Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
-      VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
-      if (!PhiR || PhiR->isInLoop())
-        continue;
-      const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+    VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+    if (!PhiR || PhiR->isInLoop())
+      continue;
+
+    const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+    auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe();
+    // If tail is folded by masking, introduce selects between the phi
+    // and the live-out instruction of each reduction, at the beginning of the
+    // dedicated latch block.
+    if (CM.foldTailByMasking()) {
       VPValue *Cond =
           RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan);
       VPValue *Red = PhiR->getBackedgeValue();
@@ -9174,16 +9158,35 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
              "reduction recipe must be defined before latch");
       FastMathFlags FMFs = RdxDesc.getFastMathFlags();
       Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
-      auto *Select =
+      Result =
           PhiTy->isFloatingPointTy()
               ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs)
               : new VPInstruction(Instruction::Select, {Cond, Red, PhiR});
-      Select->insertBefore(&*Builder.getInsertPoint());
+      Result->insertBefore(&*Builder.getInsertPoint());
       if (PreferPredicatedReductionSelect ||
           TTI.preferPredicatedReductionSelect(
               PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy,
               TargetTransformInfo::ReductionFlags()))
-        PhiR->setOperand(1, Select);
+        PhiR->setOperand(1, Result->getVPSingleValue());
+    }
+    // If the vector reduction can be performed in a smaller type, we truncate
+    // then extend the loop exit value to enable InstCombine to evaluate the
+    // entire expression in the smaller type.
+    Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
+    if (PhiTy != RdxDesc.getRecurrenceType()) {
+      assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
+      Type *RdxTy = RdxDesc.getRecurrenceType();
+      auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc,
+                                          Result->getVPSingleValue(), RdxTy);
+      auto *Extnd =
+          RdxDesc.isSigned()
+              ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
+              : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
+
+      Trunc->insertAfter(Result);
+      Extnd->insertAfter(Trunc);
+      Result->getVPSingleValue()->replaceAllUsesWith(Extnd);
+      Trunc->setOperand(0, Result->getVPSingleValue());
     }
   }
 
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 7a3c7d6fbfea71..03903d80cfd6ec 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -207,10 +207,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP8]] = zext <4 x i16> [[TMP7]] to <4 x i32>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP9:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16>
@@ -234,10 +234,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP18:%.*]] = or <4 x i32> [[TMP14]], [[TMP17]]
-; CHECK-NEXT:    [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4
-; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256
 ; CHECK-NEXT:    [[TMP20:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP21]] = zext <4 x i16> [[TMP20]] to <4 x i32>
+; CHECK-NEXT:    [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    [[TMP22:%.*]] = trunc <4 x i32> [[TMP21]] to <4 x i16>
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
index 837d663f4a9263..a4a075463b1b0b 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -22,10 +22,10 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8>
@@ -99,10 +99,10 @@ define i32 @PR35734(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1>
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index 3cc6e5fa7b8d5f..afe16c71f7f9ca 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -17,14 +17,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
 ; CHECK-NEXT:    [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = add <vscale x 8 x i32> [[TMP15]], [[TMP27]]
+; CHECK-NEXT:    [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
+; CHECK-NEXT:    [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
+; CHECK-NEXT:    [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
+; CHECK-NEXT:    [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
 ; CHECK-NEXT:    [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP31:%.*]] = mul i32 [[TMP30]], 16
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}}
-; CHECK-NEXT:    [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
-; CHECK-NEXT:    [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
-; CHECK-NEXT:    [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
-; CHECK-NEXT:    [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
 ; CHECK-NEXT:    [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>

From 71c97c735c10dd8040f721f93a0b7be0cc58d3ef Mon Sep 17 00:00:00 2001
From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com>
Date: Tue, 17 Oct 2023 11:34:06 -0700
Subject: [PATCH 05/15] =?UTF-8?q?[mlir][sparse]=20avoid=20tensor=20to=20me?=
 =?UTF-8?q?mref=20conversion=20in=20sparse=20tensor=20rewri=E2=80=A6=20(#6?=
 =?UTF-8?q?9362)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…ting rules.
---
 .../Transforms/SparseTensorRewriting.cpp      | 107 +++++--------
 .../SparseTensor/convert_sparse2dense.mlir    |  35 ++---
 .../Dialect/SparseTensor/sparse_concat.mlir   | 148 +++++++++---------
 3 files changed, 132 insertions(+), 158 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 1bfee3aa1d7ee8..e50b14975e83d6 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -829,47 +829,40 @@ struct ReshapeRewriter : public OpRewritePattern<ReshapeOp> {
   }
 };
 
+// A trivial wrapper to help generate different operations for dense/sparse
+// tensors.
 struct TensorLike {
   TensorLike(OpBuilder &builder, Location loc, RankedTensorType rtt,
-             ValueRange sizes)
-      : isSparse(rtt.getEncoding() != nullptr) {
+             ValueRange sizes) {
     SmallVector<Value> dynSzs;
     getDynamicSizes(rtt, sizes, dynSzs);
 
-    if (isSparse)
-      val = builder.create<AllocTensorOp>(loc, rtt, dynSzs);
-    else
-      val = allocDenseTensor(builder, loc, rtt, sizes);
-  };
-
-  void insertOrStore(OpBuilder &builder, Location loc, Value v,
-                     ValueRange crds) {
-    if (isSparse)
-      val = builder.create<InsertOp>(loc, v, val, crds);
-    else
-      builder.create<memref::StoreOp>(loc, v, val, crds);
+    val = builder.create<AllocTensorOp>(loc, rtt, dynSzs);
+    if (!isSparse()) {
+      Value c0 = constantZero(builder, loc, rtt.getElementType());
+      val = builder.create<linalg::FillOp>(loc, c0, val).getResult(0);
+    }
   }
 
-  Value getSSA() const {
-    // We don't need to maintain the SSA chain for a memref value.
-    return isSparse ? val : nullptr;
+  void insert(OpBuilder &builder, Location loc, Value v, ValueRange crds) {
+    // TODO: Unify these two.
+    if (isSparse())
+      val = builder.create<sparse_tensor::InsertOp>(loc, v, val, crds);
+    else
+      val = builder.create<tensor::InsertOp>(loc, v, val, crds);
   }
 
   Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const {
-    if (isSparse)
+    if (isSparse())
       return builder.create<LoadOp>(loc, val, true);
-    return builder.create<bufferization::ToTensorOp>(loc, rtp, val);
+    return val;
   }
 
-  void updateSSA(Value v) {
-    // Dense memref is a non-SSA value.
-    assert(isSparse);
-    val = v;
+  bool isSparse() const {
+    return getSparseTensorEncoding(val.getType()) != nullptr;
   }
 
-private:
-  bool isSparse;
-  Value val; // either a memref (for dense tensor) or a sparse tensor.
+  Value val;
 };
 
 struct ConcatenateRewriter : public OpRewritePattern<ConcatenateOp> {
@@ -901,14 +894,14 @@ struct ConcatenateRewriter : public OpRewritePattern<ConcatenateOp> {
 
     TensorLike dstBuf(rewriter, loc, dstTp.getRankedTensorType(), sizes);
     Value offset = constantIndex(rewriter, loc, 0);
-    Value iterArg = dstBuf.getSSA();
+    Value iterArg = dstBuf.val;
 
     ForeachOp foreachOp;
     for (Value input : op.getInputs()) {
       // Builds a for op for each input tensor to append new values into the
       // output tensor.
       foreachOp = rewriter.create<ForeachOp>(
-          loc, input, iterArg ? ValueRange{iterArg} : ValueRange{},
+          loc, input, iterArg,
           [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v,
               ValueRange reduc) {
             SmallVector<Value> dstLcvs(dstTp.getLvlRank());
@@ -920,32 +913,26 @@ struct ConcatenateRewriter : public OpRewritePattern<ConcatenateOp> {
               // FIXME: `toStoredDim` is deprecated
               dstLcvs[toStoredDim(dstTp.getEncoding(), d)] = crd;
             }
-
-            if (!reduc.empty())
-              dstBuf.updateSSA(reduc.front());
-
+            // Enters foreach, updates the SSA chain.
+            dstBuf.val = reduc.front();
             if (!dstTp.isAllDense()) {
               Value cond = genIsNonzero(builder, loc, v);
               auto ifOp = builder.create<scf::IfOp>(loc, reduc.getTypes(), cond,
                                                     /*else*/ true);
               builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-              builder.create<scf::YieldOp>(loc, dstBuf.getSSA());
+              builder.create<scf::YieldOp>(loc, dstBuf.val);
 
               builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-              dstBuf.insertOrStore(builder, loc, v, dstLcvs);
-              builder.create<scf::YieldOp>(loc, dstBuf.getSSA());
+              dstBuf.insert(builder, loc, v, dstLcvs);
+              builder.create<scf::YieldOp>(loc, dstBuf.val);
 
               // Exits the ifOp, update the sparse tensor SSA value.
               builder.setInsertionPointAfter(ifOp);
-              assert(!reduc.empty());
-              dstBuf.updateSSA(ifOp.getResult(0));
+              dstBuf.val = ifOp.getResult(0);
             } else {
-              dstBuf.insertOrStore(builder, loc, v, dstLcvs);
+              dstBuf.insert(builder, loc, v, dstLcvs);
             }
-            if (reduc.empty())
-              builder.create<sparse_tensor::YieldOp>(loc);
-            else
-              builder.create<sparse_tensor::YieldOp>(loc, dstBuf.getSSA());
+            builder.create<sparse_tensor::YieldOp>(loc, dstBuf.val);
           });
       // Accumulates the offset. Note that only static-shaped inputs are allowed
       // by concatenate op verifier, which saves us from computing the offset
@@ -955,15 +942,11 @@ struct ConcatenateRewriter : public OpRewritePattern<ConcatenateOp> {
       offset = rewriter.create<arith::AddIOp>(
           loc, offset, constantIndex(rewriter, loc, *sh));
 
-      if (!foreachOp.getResults().empty()) {
-        iterArg = foreachOp.getResult(0);
-        dstBuf.updateSSA(iterArg);
-      }
+      iterArg = foreachOp.getResult(0);
+      dstBuf.val = iterArg;
     }
 
-    if (!foreachOp.getResults().empty())
-      dstBuf.updateSSA(iterArg);
-
+    dstBuf.val = iterArg;
     Value ret = dstBuf.finalize(rewriter, loc, dstTp.getRankedTensorType());
     rewriter.replaceOp(op, ret);
     return success();
@@ -1010,15 +993,12 @@ struct DirectConvertRewriter : public OpRewritePattern<ConvertOp> {
     ValueRange vs;
     TensorLike dstBuf(rewriter, loc, dstStt.getRankedTensorType(), sizes);
 
-    Value iterArg = dstBuf.getSSA();
     auto foreachOp = rewriter.create<ForeachOp>(
-        loc, src, iterArg ? ValueRange{iterArg} : ValueRange{}, foreachOrder,
+        loc, src, dstBuf.val, foreachOrder,
         [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v,
             ValueRange reduc) {
           // Enters the loop, update the SSA value for insertion chain.
-          if (!reduc.empty())
-            dstBuf.updateSSA(reduc.front());
-
+          dstBuf.val = reduc.front();
           const Dimension dimRank = dstStt.getDimRank();
           const Level lvlRank = dstStt.getLvlRank();
           SmallVector<Value> lcvs(lvlRank);
@@ -1028,34 +1008,29 @@ struct DirectConvertRewriter : public OpRewritePattern<ConvertOp> {
           }
 
           if (!skipZeroCheck) {
-            assert(!reduc.empty());
             Value cond = genIsNonzero(builder, loc, v);
             auto ifOp = builder.create<scf::IfOp>(loc, reduc.getTypes(), cond,
                                                   /*else*/ true);
             builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-            builder.create<scf::YieldOp>(loc, dstBuf.getSSA());
+            builder.create<scf::YieldOp>(loc, dstBuf.val);
 
             builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-            dstBuf.insertOrStore(builder, loc, v, lcvs);
-            builder.create<scf::YieldOp>(loc, dstBuf.getSSA());
+            dstBuf.insert(builder, loc, v, lcvs);
+            builder.create<scf::YieldOp>(loc, dstBuf.val);
 
             // Exits the ifOp, update the sparse tensor SSA value.
             builder.setInsertionPointAfter(ifOp);
-            dstBuf.updateSSA(ifOp.getResult(0));
+            dstBuf.val = ifOp.getResult(0);
           } else {
-            dstBuf.insertOrStore(builder, loc, v, lcvs);
+            dstBuf.insert(builder, loc, v, lcvs);
           }
-          if (reduc.empty())
-            builder.create<sparse_tensor::YieldOp>(loc);
-          else
-            builder.create<sparse_tensor::YieldOp>(loc, dstBuf.getSSA());
+          builder.create<sparse_tensor::YieldOp>(loc, dstBuf.val);
         });
 
     rewriter.setInsertionPointAfter(foreachOp);
 
     // Exits the for loop, links the SSA chain.
-    if (!foreachOp.getResults().empty())
-      dstBuf.updateSSA(foreachOp.getResult(0));
+    dstBuf.val = foreachOp.getResult(0);
 
     Value ret = dstBuf.finalize(rewriter, loc, dstStt.getRankedTensorType());
     rewriter.replaceOp(op, ret);
diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
index c22f051a0d5854..e2dcb068e11851 100644
--- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
@@ -14,11 +14,10 @@
 
 // CHECK-LABEL:  func.func @sparse_convert_1d
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> {
   %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32>
   return %0 : tensor<13xi32>
@@ -26,11 +25,10 @@ func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13x
 
 // CHECK-LABEL:  func.func @sparse_convert_1d_dyn
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_1d_dyn(%arg0: tensor<?xi32, #SparseVector>) -> tensor<?xi32> {
   %0 = sparse_tensor.convert %arg0 : tensor<?xi32, #SparseVector> to tensor<?xi32>
   return %0 : tensor<?xi32>
@@ -38,11 +36,10 @@ func.func @sparse_convert_1d_dyn(%arg0: tensor<?xi32, #SparseVector>) -> tensor<
 
 // CHECK-LABEL:  func.func @sparse_convert_2d
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> {
   %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64>
   return %0 : tensor<2x4xf64>
@@ -50,11 +47,10 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x
 
 // CHECK-LABEL:  func.func @sparse_convert_2d_dyn
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_2d_dyn0(%arg0: tensor<?x4xf64, #SparseMatrix>) -> tensor<?x4xf64> {
   %0 = sparse_tensor.convert %arg0 : tensor<?x4xf64, #SparseMatrix> to tensor<?x4xf64>
   return %0 : tensor<?x4xf64>
@@ -62,11 +58,10 @@ func.func @sparse_convert_2d_dyn0(%arg0: tensor<?x4xf64, #SparseMatrix>) -> tens
 
 // CHECK-LABEL:  func.func @sparse_convert_2d_dyn1
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x?xf64> {
   %0 = sparse_tensor.convert %arg0 : tensor<2x?xf64, #SparseMatrix> to tensor<2x?xf64>
   return %0 : tensor<2x?xf64>
@@ -74,11 +69,10 @@ func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tens
 
 // CHECK-LABEL:  func.func @sparse_convert_2d_dyn2
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_2d_dyn2(%arg0: tensor<?x?xf64, #SparseMatrix>) -> tensor<?x?xf64> {
   %0 = sparse_tensor.convert %arg0 : tensor<?x?xf64, #SparseMatrix> to tensor<?x?xf64>
   return %0 : tensor<?x?xf64>
@@ -86,11 +80,10 @@ func.func @sparse_convert_2d_dyn2(%arg0: tensor<?x?xf64, #SparseMatrix>) -> tens
 
 // CHECK-LABEL:  func.func @sparse_convert_3d
 // CHECK-NOT:      sparse_tensor.reorder_coo
-// CHECK:          memref.alloc
+// CHECK:          bufferization.alloc_tensor
 // CHECK:          linalg.fill
 // CHECK:          sparse_tensor.foreach
-// CHECK:            memref.store
-// CHECK:          bufferization.to_tensor
+// CHECK:            tensor.insert
 func.func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> {
   %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64>
   return %0 : tensor<2x3x4xf64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
index bdfab54dc6daeb..f3d3dd28563e89 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -176,77 +176,83 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
     return %0 : tensor<?x?xf64, #DCSR>
 }
 
-// CHECK-LABEL: @concat_sparse_sparse_dense(
-//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor
-//   CHECK-DAG:  %[[TMP_c0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:  %[[TMP_c1:.*]] = arith.constant 1 : index
-//   CHECK-DAG:  %[[TMP_c5:.*]] = arith.constant 5 : index
-//   CHECK-DAG:  %[[TMP_c2:.*]] = arith.constant 2 : index
-//   CHECK-DAG:  %[[TMP_c9:.*]] = arith.constant 9 : index
-//   CHECK-DAG:  %[[TMP_c4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:  %[[TMP_d0:.*]] = arith.constant 0.000000e+00 : f64
-//       CHECK:  %[[A:.*]] = memref.alloc(%[[TMP_c9]], %[[TMP_c4]]) : memref<?x?xf64>
-//       CHECK:  linalg.fill ins(%[[TMP_d0]] : f64) outs(%[[A]] : memref<?x?xf64>)
-//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
-//       CHECK:  %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
-//       CHECK:  scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]]
-//       CHECK:    %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref<?xindex>
-//   CHECK-DAG:    %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref<?xindex>
-//   CHECK-DAG:    %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
-//       CHECK:    %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref<?xindex>
-//       CHECK:    scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]]
-//       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
-//       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_23]], %[[TMP_27]]] : memref<?x?xf64>
-//       CHECK:    }
-//       CHECK:  }
-//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
-//       CHECK:  %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
-//       CHECK:  scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]]
-//       CHECK:    %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref<?xindex>
-//   CHECK-DAG:    %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref<?xindex>
-//   CHECK-DAG:    %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
-//       CHECK:    %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref<?xindex>
-//       CHECK:    scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]]
-//       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
-//       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
-//       CHECK:      memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref<?x?xf64>
-//       CHECK:    }
-//       CHECK:  }
-//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
-//       CHECK:  %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
-//       CHECK:  scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]]
-//       CHECK:    %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref<?xindex>
-//       CHECK:    %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref<?xindex>
-//       CHECK:    %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
-//       CHECK:    %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref<?xindex>
-//       CHECK:    scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]]
-//       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
-//       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
-//       CHECK:      memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref<?x?xf64>
-//       CHECK:    }
-//       CHECK:  }
-//       CHECK:  %[[R:.*]] = bufferization.to_tensor %[[A]] : memref<?x?xf64>
-//       CHECK:  return %[[R]] : tensor<?x?xf64>
+// CHECK-LABEL:   func.func @concat_sparse_sparse_dense(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse_tensor
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x4xf64, #sparse_tensor
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 4 : index
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 9 : index
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 5 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_10:.*]] = bufferization.alloc_tensor(%[[VAL_4]], %[[VAL_3]]) : tensor<?x?xf64>
+// CHECK:           %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_6]] : f64) outs(%[[VAL_10]] : tensor<?x?xf64>) -> tensor<?x?xf64>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_8]] iter_args(%[[VAL_21:.*]] = %[[VAL_11]]) -> (tensor<?x?xf64>) {
+// CHECK:             %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:             %[[VAL_23:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_20]], %[[VAL_8]] : index
+// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_8]] iter_args(%[[VAL_28:.*]] = %[[VAL_21]]) -> (tensor<?x?xf64>) {
+// CHECK:               %[[VAL_29:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               %[[VAL_30:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_27]]] : memref<?xf64>
+// CHECK:               %[[VAL_31:.*]] = tensor.insert %[[VAL_30]] into %[[VAL_28]]{{\[}}%[[VAL_22]], %[[VAL_29]]] : tensor<?x?xf64>
+// CHECK:               scf.yield %[[VAL_31]] : tensor<?x?xf64>
+// CHECK:             }
+// CHECK:             scf.yield %[[VAL_26]] : tensor<?x?xf64>
+// CHECK:           }
+// CHECK:           %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_37:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_38:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_39:.*]] = scf.for %[[VAL_40:.*]] = %[[VAL_37]] to %[[VAL_38]] step %[[VAL_8]] iter_args(%[[VAL_41:.*]] = %[[VAL_19]]) -> (tensor<?x?xf64>) {
+// CHECK:             %[[VAL_42:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_40]]] : memref<?xindex>
+// CHECK:             %[[VAL_43:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_40]]] : memref<?xindex>
+// CHECK:             %[[VAL_44:.*]] = arith.addi %[[VAL_40]], %[[VAL_8]] : index
+// CHECK:             %[[VAL_45:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_44]]] : memref<?xindex>
+// CHECK:             %[[VAL_46:.*]] = scf.for %[[VAL_47:.*]] = %[[VAL_43]] to %[[VAL_45]] step %[[VAL_8]] iter_args(%[[VAL_48:.*]] = %[[VAL_41]]) -> (tensor<?x?xf64>) {
+// CHECK:               %[[VAL_49:.*]] = memref.load %[[VAL_35]]{{\[}}%[[VAL_47]]] : memref<?xindex>
+// CHECK:               %[[VAL_50:.*]] = memref.load %[[VAL_36]]{{\[}}%[[VAL_47]]] : memref<?xf64>
+// CHECK:               %[[VAL_51:.*]] = arith.addi %[[VAL_42]], %[[VAL_9]] : index
+// CHECK:               %[[VAL_52:.*]] = tensor.insert %[[VAL_50]] into %[[VAL_48]]{{\[}}%[[VAL_51]], %[[VAL_49]]] : tensor<?x?xf64>
+// CHECK:               scf.yield %[[VAL_52]] : tensor<?x?xf64>
+// CHECK:             }
+// CHECK:             scf.yield %[[VAL_46]] : tensor<?x?xf64>
+// CHECK:           }
+// CHECK:           %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_58:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_59:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_58]] to %[[VAL_59]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_39]]) -> (tensor<?x?xf64>) {
+// CHECK:             %[[VAL_63:.*]] = memref.load %[[VAL_54]]{{\[}}%[[VAL_61]]] : memref<?xindex>
+// CHECK:             %[[VAL_64:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_61]]] : memref<?xindex>
+// CHECK:             %[[VAL_65:.*]] = arith.addi %[[VAL_61]], %[[VAL_8]] : index
+// CHECK:             %[[VAL_66:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_65]]] : memref<?xindex>
+// CHECK:             %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_8]] iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (tensor<?x?xf64>) {
+// CHECK:               %[[VAL_70:.*]] = memref.load %[[VAL_56]]{{\[}}%[[VAL_68]]] : memref<?xindex>
+// CHECK:               %[[VAL_71:.*]] = memref.load %[[VAL_57]]{{\[}}%[[VAL_68]]] : memref<?xf64>
+// CHECK:               %[[VAL_72:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index
+// CHECK:               %[[VAL_73:.*]] = tensor.insert %[[VAL_71]] into %[[VAL_69]]{{\[}}%[[VAL_72]], %[[VAL_70]]] : tensor<?x?xf64>
+// CHECK:               scf.yield %[[VAL_73]] : tensor<?x?xf64>
+// CHECK:             }
+// CHECK:             scf.yield %[[VAL_67]] : tensor<?x?xf64>
+// CHECK:           }
+// CHECK:           return %[[VAL_60]] : tensor<?x?xf64>
+// CHECK:         }
 func.func @concat_sparse_sparse_dense(%arg0: tensor<2x4xf64, #DCSR>,
                                 %arg1: tensor<3x4xf64, #DCSR>,
                                 %arg2: tensor<4x4xf64, #DCSR>)

From 31512811b8c0f8fd328fba585640992c39218f1e Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usaxena95@gmail.com>
Date: Tue, 17 Oct 2023 20:46:01 +0200
Subject: [PATCH 06/15] [clang-tidy] Add check to diagnose coroutine-hostile
 RAII objects (#68738)

This check detects **hostile-RAII** objects which should not **persist
across a suspension point in a coroutine**.

Some objects require that they be destroyed on the same thread that
created them. Traditionally this requirement was often phrased as "must
be a local variable", under the assumption that local variables always
work this way. However this is incorrect with **C++20 coroutines**,
since an intervening `co_await` may cause the coroutine to suspend and
later be resumed on another thread.

The lifetime of an object that requires being destroyed on the same
thread must not encompass a `co_await` or `co_yield` point. If you
create/destroy an object, you must do so without allowing the coroutine
to suspend in the meantime.

The check considers the following type as hostile:

- **Scoped-lockable types**: A scoped-lockable object persisting across
a suspension point is problematic as the lock held by this object could
be unlocked by a different thread. This would be undefined behaviour.

 - Types belonging to a configurable **denylist**.

```cpp
  // Call some async API while holding a lock.
const my::MutexLock l(&mu_);

// Oops! The async Bar function may finish on a different
// thread from the one that created the MutexLock object and therefore called
// Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread.
co_await Bar();
```
---
 .../clang-tidy/misc/CMakeLists.txt            |   1 +
 .../misc/CoroutineHostileRAIICheck.cpp        |  98 +++++++++
 .../misc/CoroutineHostileRAIICheck.h          |  50 +++++
 .../clang-tidy/misc/MiscTidyModule.cpp        |   3 +
 clang-tools-extra/docs/ReleaseNotes.rst       |   7 +
 .../docs/clang-tidy/checks/list.rst           |   1 +
 .../checks/misc/coroutine-hostile-raii.rst    |  50 +++++
 .../checkers/misc/coroutine-hostile-raii.cpp  | 192 ++++++++++++++++++
 8 files changed, 402 insertions(+)
 create mode 100644 clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp
 create mode 100644 clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp

diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
index 2e88e68a544782..d9ec268650c053 100644
--- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
@@ -18,6 +18,7 @@ add_custom_target(genconfusable DEPENDS Confusables.inc)
 
 add_clang_library(clangTidyMiscModule
   ConstCorrectnessCheck.cpp
+  CoroutineHostileRAIICheck.cpp
   DefinitionsInHeadersCheck.cpp
   ConfusableIdentifierCheck.cpp
   HeaderIncludeCycleCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp
new file mode 100644
index 00000000000000..e820cd39d83d21
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp
@@ -0,0 +1,98 @@
+//===--- CoroutineHostileRAII.cpp - clang-tidy ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CoroutineHostileRAIICheck.h"
+#include "../utils/OptionsUtils.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/ASTMatchers/ASTMatchersInternal.h"
+#include "clang/Basic/AttrKinds.h"
+#include "clang/Basic/DiagnosticIDs.h"
+
+using namespace clang::ast_matchers;
+namespace clang::tidy::misc {
+namespace {
+using clang::ast_matchers::internal::BoundNodesTreeBuilder;
+
+AST_MATCHER_P(Stmt, forEachPrevStmt, ast_matchers::internal::Matcher<Stmt>,
+              InnerMatcher) {
+  DynTypedNode P;
+  bool IsHostile = false;
+  for (const Stmt *Child = &Node; Child; Child = P.get<Stmt>()) {
+    auto Parents = Finder->getASTContext().getParents(*Child);
+    if (Parents.empty())
+      break;
+    P = *Parents.begin();
+    auto *PCS = P.get<CompoundStmt>();
+    if (!PCS)
+      continue;
+    for (const auto &Sibling : PCS->children()) {
+      // Child contains suspension. Siblings after Child do not persist across
+      // this suspension.
+      if (Sibling == Child)
+        break;
+      // In case of a match, add the bindings as a separate match. Also don't
+      // clear the bindings if a match is not found (unlike Matcher::matches).
+      BoundNodesTreeBuilder SiblingBuilder;
+      if (InnerMatcher.matches(*Sibling, Finder, &SiblingBuilder)) {
+        Builder->addMatch(SiblingBuilder);
+        IsHostile = true;
+      }
+    }
+  }
+  return IsHostile;
+}
+} // namespace
+
+CoroutineHostileRAIICheck::CoroutineHostileRAIICheck(StringRef Name,
+                                                     ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      RAIITypesList(utils::options::parseStringList(
+          Options.get("RAIITypesList", "std::lock_guard;std::scoped_lock"))) {}
+
+void CoroutineHostileRAIICheck::registerMatchers(MatchFinder *Finder) {
+  // A suspension happens with co_await or co_yield.
+  auto ScopedLockable = varDecl(hasType(hasCanonicalType(hasDeclaration(
+                                    hasAttr(attr::Kind::ScopedLockable)))))
+                            .bind("scoped-lockable");
+  auto OtherRAII = varDecl(hasType(hasCanonicalType(hasDeclaration(
+                               namedDecl(hasAnyName(RAIITypesList))))))
+                       .bind("raii");
+  Finder->addMatcher(expr(anyOf(coawaitExpr(), coyieldExpr()),
+                          forEachPrevStmt(declStmt(forEach(
+                              varDecl(anyOf(ScopedLockable, OtherRAII))))))
+                         .bind("suspension"),
+                     this);
+}
+
+void CoroutineHostileRAIICheck::check(const MatchFinder::MatchResult &Result) {
+  if (const auto *VD = Result.Nodes.getNodeAs<VarDecl>("scoped-lockable"))
+    diag(VD->getLocation(),
+         "%0 holds a lock across a suspension point of coroutine and could be "
+         "unlocked by a different thread")
+        << VD;
+  if (const auto *VD = Result.Nodes.getNodeAs<VarDecl>("raii"))
+    diag(VD->getLocation(),
+         "%0 persists across a suspension point of coroutine")
+        << VD;
+  if (const auto *Suspension = Result.Nodes.getNodeAs<Expr>("suspension"))
+    diag(Suspension->getBeginLoc(), "suspension point is here",
+         DiagnosticIDs::Note);
+}
+
+void CoroutineHostileRAIICheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "RAIITypesList",
+                utils::options::serializeStringList(RAIITypesList));
+}
+} // namespace clang::tidy::misc
diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h
new file mode 100644
index 00000000000000..a5e9cb89ef6769
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h
@@ -0,0 +1,50 @@
+//===--- CoroutineHostileRAIICheck.h - clang-tidy ----------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H
+
+#include "../ClangTidyCheck.h"
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
+namespace clang::tidy::misc {
+
+/// Detects when objects of certain hostile RAII types persists across
+/// suspension points in a coroutine. Such hostile types include scoped-lockable
+/// types and types belonging to a configurable denylist.
+///
+///  For the user-facing documentation see:
+///  http://clang.llvm.org/extra/clang-tidy/checks/misc/coroutine-hostile-raii.html
+class CoroutineHostileRAIICheck : public ClangTidyCheck {
+public:
+  CoroutineHostileRAIICheck(llvm::StringRef Name, ClangTidyContext *Context);
+
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    return LangOpts.CPlusPlus20;
+  }
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+  std::optional<TraversalKind> getCheckTraversalKind() const override {
+    return TK_AsIs;
+  }
+
+private:
+  // List of fully qualified types which should not persist across a suspension
+  // point in a coroutine.
+  std::vector<StringRef> RAIITypesList;
+};
+
+} // namespace clang::tidy::misc
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
index 92590506e1ec1e..d8a88324ee63e0 100644
--- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
@@ -11,6 +11,7 @@
 #include "../ClangTidyModuleRegistry.h"
 #include "ConfusableIdentifierCheck.h"
 #include "ConstCorrectnessCheck.h"
+#include "CoroutineHostileRAIICheck.h"
 #include "DefinitionsInHeadersCheck.h"
 #include "HeaderIncludeCycleCheck.h"
 #include "IncludeCleanerCheck.h"
@@ -41,6 +42,8 @@ class MiscModule : public ClangTidyModule {
         "misc-confusable-identifiers");
     CheckFactories.registerCheck<ConstCorrectnessCheck>(
         "misc-const-correctness");
+    CheckFactories.registerCheck<CoroutineHostileRAIICheck>(
+        "misc-coroutine-hostile-raii");
     CheckFactories.registerCheck<DefinitionsInHeadersCheck>(
         "misc-definitions-in-headers");
     CheckFactories.registerCheck<HeaderIncludeCycleCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index af164d0462d52c..3e1fbe091c9ff6 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -163,6 +163,13 @@ New checks
   Flags coroutines that suspend while a lock guard is in scope at the
   suspension point.
 
+- New :doc:`misc-coroutine-hostile-raii
+  <clang-tidy/checks/misc/coroutine-hostile-raii>` check.
+
+  Detects when objects of certain hostile RAII types persists across suspension
+  points in a coroutine. Such hostile types include scoped-lockable types and
+  types belonging to a configurable denylist.
+
 - New :doc:`modernize-use-constraints
   <clang-tidy/checks/modernize/use-constraints>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 2125ebd7a213c1..819e3974e3f133 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -241,6 +241,7 @@ Clang-Tidy Checks
    :doc:`llvmlibc-restrict-system-libc-headers <llvmlibc/restrict-system-libc-headers>`, "Yes"
    :doc:`misc-confusable-identifiers <misc/confusable-identifiers>`,
    :doc:`misc-const-correctness <misc/const-correctness>`, "Yes"
+   :doc:`misc-coroutine-hostile-raii <misc/coroutine-hostile-raii.html>`_,
    :doc:`misc-definitions-in-headers <misc/definitions-in-headers>`, "Yes"
    :doc:`misc-header-include-cycle <misc/header-include-cycle>`,
    :doc:`misc-include-cleaner <misc/include-cleaner>`, "Yes"
diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst
new file mode 100644
index 00000000000000..dcb9f399774cba
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst
@@ -0,0 +1,50 @@
+.. title:: clang-tidy - misc-coroutine-hostile-raii
+
+misc-coroutine-hostile-raii
+====================
+
+Detects when objects of certain hostile RAII types persists across suspension
+points in a coroutine. Such hostile types include scoped-lockable types and
+types belonging to a configurable denylist.
+
+Some objects require that they be destroyed on the same thread that created them. 
+Traditionally this requirement was often phrased as "must be a local variable",
+under the assumption that local variables always work this way. However this is
+incorrect with C++20 coroutines, since an intervening ``co_await`` may cause the
+coroutine to suspend and later be resumed on another thread.
+
+The lifetime of an object that requires being destroyed on the same thread must 
+not encompass a ``co_await`` or ``co_yield`` point. If you create/destroy an object,
+you must do so without allowing the coroutine to suspend in the meantime.
+
+Following types are considered as hostile:
+
+ - Scoped-lockable types: A scoped-lockable object persisting across a suspension
+ point is problematic as the lock held by this object could be unlocked by a 
+ different thread. This would be undefined behaviour.
+ This includes all types annotated with the ``scoped_lockable`` attribute.
+
+ - Types belonging to a configurable denylist.
+
+.. code-block:: c++
+
+  // Call some async API while holding a lock.
+  {
+    const my::MutexLock l(&mu_);
+
+    // Oops! The async Bar function may finish on a different
+    // thread from the one that created the MutexLock object and therefore called
+    // Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread.
+    co_await Bar();
+  }
+
+
+Options
+-------
+
+.. option:: RAIITypesList
+
+    A semicolon-separated list of qualified types which should not be allowed to 
+    persist across suspension points.
+    Eg: ``my::lockable; a::b;::my::other::lockable;``
+    The default value of this option is `"std::lock_guard;std::scoped_lock"`.
\ No newline at end of file
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp
new file mode 100644
index 00000000000000..2d022e21c85d56
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp
@@ -0,0 +1,192 @@
+// RUN: %check_clang_tidy -std=c++20 %s misc-coroutine-hostile-raii %t \
+// RUN:   -config="{CheckOptions: \
+// RUN:             {misc-coroutine-hostile-raii.RAIITypesList: \
+// RUN:               'my::Mutex; ::my::other::Mutex'}}"
+
+namespace std {
+
+template <typename R, typename...> struct coroutine_traits {
+  using promise_type = typename R::promise_type;
+};
+
+template <typename Promise = void> struct coroutine_handle;
+
+template <> struct coroutine_handle<void> {
+  static coroutine_handle from_address(void *addr) noexcept {
+    coroutine_handle me;
+    me.ptr = addr;
+    return me;
+  }
+  void operator()() { resume(); }
+  void *address() const noexcept { return ptr; }
+  void resume() const {  }
+  void destroy() const { }
+  bool done() const { return true; }
+  coroutine_handle &operator=(decltype(nullptr)) {
+    ptr = nullptr;
+    return *this;
+  }
+  coroutine_handle(decltype(nullptr)) : ptr(nullptr) {}
+  coroutine_handle() : ptr(nullptr) {}
+  //  void reset() { ptr = nullptr; } // add to P0057?
+  explicit operator bool() const { return ptr; }
+
+protected:
+  void *ptr;
+};
+
+template <typename Promise> struct coroutine_handle : coroutine_handle<> {
+  using coroutine_handle<>::operator=;
+
+  static coroutine_handle from_address(void *addr) noexcept {
+    coroutine_handle me;
+    me.ptr = addr;
+    return me;
+  }
+
+  Promise &promise() const {
+    return *reinterpret_cast<Promise *>(
+        __builtin_coro_promise(ptr, alignof(Promise), false));
+  }
+  static coroutine_handle from_promise(Promise &promise) {
+    coroutine_handle p;
+    p.ptr = __builtin_coro_promise(&promise, alignof(Promise), true);
+    return p;
+  }
+};
+
+struct suspend_always {
+  bool await_ready() noexcept { return false; }
+  void await_suspend(std::coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+} // namespace std
+
+struct ReturnObject {
+    struct promise_type {
+        ReturnObject get_return_object() { return {}; }
+        std::suspend_always initial_suspend() { return {}; }
+        std::suspend_always final_suspend() noexcept { return {}; }
+        void unhandled_exception() {}
+        std::suspend_always yield_value(int value) { return {}; }
+    };
+};
+
+#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable))
+
+namespace absl {
+class SCOPED_LOCKABLE Mutex {};
+using Mutex2 = Mutex;
+} // namespace absl
+
+ReturnObject BasicWarning() {
+  absl::Mutex mtx;
+  // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'mtx' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+  int no_warning;
+  {
+    co_yield 1;
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here
+  }
+}
+
+ReturnObject BasicNoWarning() {
+  co_yield 1;
+  {  absl::Mutex no_warning; }
+  int no_warning;
+  {
+    co_yield 1;
+    absl::Mutex no_warning;
+  }
+  co_yield 1;
+}
+
+ReturnObject scopedLockableTest() {
+    co_yield 0;
+    absl::Mutex a;
+    // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+    absl::Mutex2 b;
+    // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 'b' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+    {
+        absl::Mutex no_warning_1;
+        { absl::Mutex no_warning_2; }
+    }
+
+    co_yield 1;
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here
+    absl::Mutex c;
+    // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'c' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+    co_await std::suspend_always{};
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here
+    for(int i=1; i<=10; ++i ) {
+      absl::Mutex d;
+      // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'd' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+      co_await std::suspend_always{};
+      // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here
+      co_yield 1;
+      absl::Mutex no_warning_3;
+    }
+    if (true) {
+      absl::Mutex e;
+      // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'e' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+      co_yield 1;
+      // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here
+      absl::Mutex no_warning_4;
+    }
+    absl::Mutex no_warning_5;
+}
+
+void lambda() {
+  absl::Mutex no_warning;
+  auto lambda = []() -> ReturnObject {
+    co_await std::suspend_always{};
+    absl::Mutex a;
+    // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+    co_yield 1;
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here
+    co_await std::suspend_always{};
+    co_yield 1;
+  };
+  absl::Mutex no_warning_2;
+}
+
+template<class T>
+ReturnObject raii_in_template(){
+  T a;
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii]
+  co_yield 1;
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: note: suspension point is here
+}
+void foo_template() { raii_in_template<absl::Mutex>(); }
+
+namespace my {
+class Mutex{};
+namespace other {
+class Mutex{};
+} // namespace other
+
+using Mutex2 = Mutex;
+} // namespace my
+
+ReturnObject denyListTest() {
+    my::Mutex a;
+    // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'a' persists across a suspension point of coroutine [misc-coroutine-hostile-raii]
+    my::other::Mutex b;
+    // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 'b' persists across a suspension point of coroutine [misc-coroutine-hostile-raii]
+    my::Mutex2 c;
+    // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: 'c' persists across a suspension point of coroutine [misc-coroutine-hostile-raii]
+    co_yield 1;
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here
+}
+
+ReturnObject referenceTest(my::Mutex& ref) {
+    my::Mutex& a = ref;
+    co_yield 1;
+}
+ReturnObject pointerTest(my::Mutex* ref) {
+    my::Mutex* a = ref;
+    co_yield 1;
+}
+
+ReturnObject functionArgTest(my::Mutex ref) {
+    co_yield 1;
+}

From e6d0b126c824222fca2f31a2ba571c2ee2bb4760 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usaxena95@gmail.com>
Date: Tue, 17 Oct 2023 20:53:42 +0200
Subject: [PATCH 07/15] Correctly compute conversion seq for args to fn with
 reversed param order (#68999)

We associated conversion seq for args (when reversed) to the wrong
index.
This lead to clang believing reversed `operator==` a worse overload
candidate than the `operator==` without reversed args when both these
candidate were ambiguous.

Fixes https://github.com/llvm/llvm-project/issues/53954
---
 clang/docs/ReleaseNotes.rst                   |  2 ++
 clang/lib/Sema/SemaOverload.cpp               |  2 +-
 .../over.match.oper/p3-2a.cpp                 | 35 +++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 81cbfd90155fe0..443325bb0d1e17 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -117,6 +117,8 @@ C++ Language Changes
 
 C++20 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
+- Fix a bug in conversion sequence of arguments to a function with reversed parameter order.
+  Fixes `GH <https://github.com/llvm/llvm-project/issues/53954>`_.
 
 C++23 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index ce78994e655381..c271cebb9eb638 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7688,7 +7688,7 @@ bool Sema::CheckNonDependentConversions(
     QualType ParamType = ParamTypes[I + Offset];
     if (!ParamType->isDependentType()) {
       unsigned ConvIdx = PO == OverloadCandidateParamOrder::Reversed
-                             ? 0
+                             ? Args.size() - 1 - (ThisConversions + I)
                              : (ThisConversions + I);
       Conversions[ConvIdx]
         = TryCopyInitialization(*this, Args[I], ParamType,
diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp
index 5c6804eb7726b5..02fe37dc1be505 100644
--- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp
+++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp
@@ -324,6 +324,41 @@ bool x = X() == X(); // expected-warning {{ambiguous}}
 }
 } // namespace P2468R2
 
+namespace GH53954{
+namespace test1 {
+struct P {
+    template <class T>
+    friend bool operator==(const P&, const T&); // expected-note {{candidate}} \
+                                                  // expected-note {{reversed parameter order}}
+};
+struct A : public P {};
+struct B : public P {};
+bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}}
+}
+
+namespace test2 {
+struct P {
+    template <class T>
+    friend bool operator==(const T&, const P&); // expected-note {{candidate}} \
+                                                // expected-note {{reversed parameter order}}
+};
+struct A : public P {};
+struct B : public P {};
+bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}}
+}
+
+namespace test3 {
+struct P {
+  template<class S>
+  bool operator==(const S &) const; // expected-note {{candidate}} \
+                                    // expected-note {{reversed parameter order}}
+};
+struct A : public P {};
+struct B : public P {};
+bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}}
+}
+}
+
 #else // NO_ERRORS
 
 namespace problem_cases {

From fbf0a77e80f18a6d0fd8a28833b0bc87a99b1b2f Mon Sep 17 00:00:00 2001
From: Bill Wendling <5993918+bwendling@users.noreply.github.com>
Date: Tue, 17 Oct 2023 12:03:26 -0700
Subject: [PATCH 08/15] [CodeGen] Avoid potential sideeffects from XOR (#67193)

XOR may change flag values (e.g. for X86 gprs). In the case where that's
not desirable, specify that buildClearRegister() should use MOV instead.
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h  |  7 +++--
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp |  6 ++--
 llvm/lib/Target/AArch64/AArch64InstrInfo.h   |  4 +--
 llvm/lib/Target/X86/X86InstrInfo.cpp         | 33 ++++++++++++++------
 llvm/lib/Target/X86/X86InstrInfo.h           |  4 +--
 5 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 14e27abe882b03..6c3e02b2f59405 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2093,10 +2093,13 @@ class TargetInstrInfo : public MCInstrInfo {
         "Target didn't implement TargetInstrInfo::insertOutlinedCall!");
   }
 
-  /// Insert an architecture-specific instruction to clear a register.
+  /// Insert an architecture-specific instruction to clear a register. If you
+  /// need to avoid sideeffects (e.g. avoid XOR on x86, which sets EFLAGS), set
+  /// \p AllowSideEffects to \p false.
   virtual void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator Iter,
-                                  DebugLoc &DL) const {
+                                  DebugLoc &DL,
+                                  bool AllowSideEffects = true) const {
     llvm_unreachable(
         "Target didn't implement TargetInstrInfo::buildClearRegister!");
   }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 05c79b610cb36c..7dcf24c26e124a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9134,13 +9134,15 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
 
 void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator Iter,
-                                          DebugLoc &DL) const {
+                                          DebugLoc &DL,
+                                          bool AllowSideEffects) const {
   const MachineFunction &MF = *MBB.getParent();
   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
 
   if (TRI.isGeneralPurposeRegister(MF, Reg)) {
-    BuildMI(MBB, Iter, DL, get(AArch64::MOVi64imm), Reg)
+    BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg)
+      .addImm(0)
       .addImm(0);
   } else if (STI.hasSVE()) {
     BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 4a40b2fa122159..a934103c90cbf9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -333,8 +333,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
 
   void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator Iter,
-                          DebugLoc &DL) const override;
+                          MachineBasicBlock::iterator Iter, DebugLoc &DL,
+                          bool AllowSideEffects = true) const override;
 
   /// Returns the vector element size (B, H, S or D) of an SVE opcode.
   uint64_t getElementSizeForOpcode(unsigned Opc) const;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index f0c46419ab3516..4c6854da0ada3d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10130,27 +10130,36 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
   return It;
 }
 
-void X86InstrInfo::buildClearRegister(Register Reg,
-                                      MachineBasicBlock &MBB,
+void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator Iter,
-                                      DebugLoc &DL) const {
+                                      DebugLoc &DL,
+                                      bool AllowSideEffects) const {
   const MachineFunction &MF = *MBB.getParent();
   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   const TargetRegisterInfo &TRI = getRegisterInfo();
 
   if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
-    // FIXME: Ignore MMX registers?
+    // FIXME: Should we ignore MMX registers?
     return;
 
   if (TRI.isGeneralPurposeRegister(MF, Reg)) {
-    BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg)
-      .addReg(Reg, RegState::Undef)
-      .addReg(Reg, RegState::Undef);
+    // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the
+    // upper bits of a 64-bit register automagically.
+    Reg = getX86SubSuperRegister(Reg, 32);
+
+    if (!AllowSideEffects)
+      // XOR affects flags, so use a MOV instead.
+      BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0);
+    else
+      BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg)
+          .addReg(Reg, RegState::Undef)
+          .addReg(Reg, RegState::Undef);
   } else if (X86::VR128RegClass.contains(Reg)) {
     // XMM#
     if (!ST.hasSSE1())
       return;
 
+    // PXOR is safe to use because it doesn't affect flags.
     BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
       .addReg(Reg, RegState::Undef)
       .addReg(Reg, RegState::Undef);
@@ -10159,6 +10168,7 @@ void X86InstrInfo::buildClearRegister(Register Reg,
     if (!ST.hasAVX())
       return;
 
+    // VPXOR is safe to use because it doesn't affect flags.
     BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
       .addReg(Reg, RegState::Undef)
       .addReg(Reg, RegState::Undef);
@@ -10167,6 +10177,7 @@ void X86InstrInfo::buildClearRegister(Register Reg,
     if (!ST.hasAVX512())
       return;
 
+    // VPXORY is safe to use because it doesn't affect flags.
     BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
       .addReg(Reg, RegState::Undef)
       .addReg(Reg, RegState::Undef);
@@ -10178,9 +10189,11 @@ void X86InstrInfo::buildClearRegister(Register Reg,
     if (!ST.hasVLX())
       return;
 
-    BuildMI(MBB, Iter, DL, get(ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr), Reg)
-      .addReg(Reg, RegState::Undef)
-      .addReg(Reg, RegState::Undef);
+    // KXOR is safe to use because it doesn't affect flags.
+    unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+    BuildMI(MBB, Iter, DL, get(Op), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
   }
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 4d261a803421c1..e1199e20c318e2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -583,8 +583,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
                      outliner::Candidate &C) const override;
 
   void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator Iter,
-                          DebugLoc &DL) const override;
+                          MachineBasicBlock::iterator Iter, DebugLoc &DL,
+                          bool AllowSideEffects = true) const override;
 
   bool verifyInstruction(const MachineInstr &MI,
                          StringRef &ErrInfo) const override;

From ab91e05e48d9ea47b60858dc259bdbf00dfde7fa Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Tue, 17 Oct 2023 12:16:45 -0700
Subject: [PATCH 09/15] [mlgo] Fix tests post 760e7d0

---
 .../MLRegAlloc/Inputs/reference-log-noml.txt  | 72 +++++++++----------
 .../Inputs/reference-prio-log-noml.txt        | 12 ++--
 .../MLRegAlloc/dev-mode-prio-logging.ll       |  2 +-
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
index 0c024ad2b2e1bf..a5ccdde751ed56 100644
--- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
+++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
@@ -16,8 +16,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,0.07943800836801529,0.07943800836801529,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7940031290054321,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.01417447254061699,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -40,8 +40,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2404157966375351,0.08013860136270523,0.0,0.08013860136270523,1.0,0.07131929695606232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08013860136270523
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.0,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7940031290054321
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.0,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01417447254061699
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.0,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7939082384109497
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.0,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014218696393072605
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -64,8 +64,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,0.02235277369618416,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.7908878326416016,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6725077629089355
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.014231426641345024,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4858442544937134
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.7907436490058899,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6831487417221069
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.014276761561632156,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47955840826034546
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -88,8 +88,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.0,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.0,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,1.0,0.0,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07943800836801529
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.0,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7908878326416016
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.0,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014231426641345024
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.0,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7907436490058899
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.0,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014276761561632156
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -112,8 +112,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01117638684809208
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6693925261497498
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00449750293046236
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6799841523170471
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004439314361661673
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -136,8 +136,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01822916604578495
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6662772297859192
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008109557442367077
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6768196225166321
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008004635572433472
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -160,8 +160,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688586473465,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -184,8 +184,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.2724486181679993e-10
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.0,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9591121673583984
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01772311143577099
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9647942781448364
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.0,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017619721591472626
 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -208,8 +208,8 @@ hint_weights_by_max: 1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,
 start_bb_freq_by_max: 0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204
 end_bb_freq_by_max: 0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5110765099525452
 hottest_bb_freq_by_max: 0.2813863754272461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.27892643213272095
-liverange_size: 0.7352024912834167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.46105918288230896
-use_def_density: 0.42606985569000244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05081497132778168,0.07567594200372696,0.48368439078330994,0.9955543875694275,0.07338171452283859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
+liverange_size: 0.7436708807945251,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4683544337749481
+use_def_density: 0.4243086874485016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99146968126297
 max_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -232,8 +232,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.07601386308670044,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016
 max_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -256,8 +256,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204,0.0,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018,0.0,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334,0.0,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044,0.0,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217,0.0,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769,0.0,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038
 max_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -280,8 +280,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.35764437913894653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.482131917196483e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2819080352783203
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2828805446624756
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -304,8 +304,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.5236390233039856,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3283064365386963e-10
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.0,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01989283785223961
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.0,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.42606985569000244,0.0,0.07567594200372696,1.0,0.48368439078330994,0.9955543875694275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07338171452283859
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.0,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016,0.0,0.07503040134906769,0.99146968126297,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07275574654340744
 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.7777777910232544
@@ -328,8 +328,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9982500076293
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6522180438041687,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015647225081920624
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8219112157821655,0.5714285969734192,0.8334941864013672,0.9145752787590027,0.31660231947898865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9652509689331055
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016097404062747955
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8283073902130127,0.575875461101532,0.8399805426597595,0.9100194573402405,0.3190661370754242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9688715934753418
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016164302825927734
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 progress: 0.2222222238779068
@@ -352,8 +352,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.9
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8797762989997864,0.7935694456100464,0.5517241358757019,0.8047530055046082,0.8830382227897644,0.30568498373031616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008228360675275326
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8826290965080261,0.7995305061340332,0.5558685660362244,0.8107981085777283,0.8784037828445435,0.3079812228679657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00826177466660738
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 progress: 0.1944444477558136
@@ -376,8 +376,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9434669613838196,
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,1.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,1.0,1.482131917196483e-10,0.3333333432674408,0.6365708112716675,0.6365708112716675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8342907428741455,0.2898806929588318,1.0,0.5231993198394775,0.7631462812423706,0.8373839855194092,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9253203868865967
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8337028622627258,0.290909081697464,1.0,0.5250554084777832,0.7658536434173584,0.8297117352485657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9250554442405701
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 progress: 0.1388888955116272
@@ -400,8 +400,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9839805960655212
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006958406884223223
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9837278127670288
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007072303909808397
 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 progress: 0.1111111119389534
@@ -424,8 +424,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748
 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931
 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675
 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416
-liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9849514365196228
-use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006951410323381424
+liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9847140312194824
+use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007065076380968094
 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
 progress: 0.0833333358168602
diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt
index beb0c5205979c0..01b4a3835c978c 100644
--- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt
+++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt
@@ -171,7 +171,7 @@ observation: 28
 li_size: 0
 stage: 0
 weight: 0.0
-priority: 2147485184.0
+priority: 2147484928.0
 reward: 0.0
 observation: 29
 li_size: 0
@@ -237,7 +237,7 @@ observation: 39
 li_size: 0
 stage: 0
 weight: 0.0
-priority: 3598.0
+priority: 3534.0
 reward: 0.0
 observation: 40
 li_size: 0
@@ -249,7 +249,7 @@ observation: 41
 li_size: 0
 stage: 0
 weight: 0.0
-priority: 3582.0
+priority: 3518.0
 reward: 0.0
 observation: 42
 li_size: 0
@@ -273,7 +273,7 @@ observation: 45
 li_size: 0
 stage: 0
 weight: 0.0
-priority: 4078.0
+priority: 4046.0
 reward: 0.0
 observation: 46
 li_size: 0
@@ -291,7 +291,7 @@ observation: 48
 li_size: 0
 stage: 0
 weight: 0.0
-priority: 4384.0
+priority: 4304.0
 reward: 0.0
 observation: 49
 li_size: 0
@@ -309,7 +309,7 @@ observation: 51
 li_size: 0
 stage: 0
 weight: 0.0
-priority: 2684358144.0
+priority: 2684357888.0
 reward: 0.0
 observation: 52
 li_size: 0
diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll
index 21bb75278874a5..6b013b55df77ad 100644
--- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll
+++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll
@@ -24,5 +24,5 @@
 ; CHECK-NOT: nan
 ; CHECK-LABEL: priority:
 ; NOML-SAME: 2684358144.0
-; ML-SAME: 3599
+; ML-SAME: 3535
 ; CHECK-LABEL: reward:

From f781508d319438d2c6d6bb264328b018c15b6946 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 17 Oct 2023 15:21:06 -0400
Subject: [PATCH 10/15] [gn] port dd64c82cbc9c6

---
 llvm/utils/gn/secondary/llvm/test/BUILD.gn | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
index f859af249faf5f..dd9fd0c10d53ef 100644
--- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
@@ -222,7 +222,10 @@ write_lit_config("lit_site_cfg") {
 write_lit_config("lit_unit_site_cfg") {
   input = "//llvm/test/Unit/lit.site.cfg.py.in"
   output = llvm_lit_unit_site_cfg_file
-  extra_values = [ "LLVM_BUILD_MODE=." ]
+  extra_values = [
+    "LLVM_BUILD_MODE=.",
+    "LLVM_GTEST_RUN_UNDER=",
+  ]
 }
 
 # This target should contain all dependencies of check-llvm.

From c0f3478934bec4a585cd1ed973a0ee39e0ceb7be Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Tue, 17 Oct 2023 19:22:25 +0000
Subject: [PATCH 11/15] [gn build] Port 31512811b8c0

---
 .../gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn
index 8a811bc990d41b..36957f502c3231 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn
@@ -34,6 +34,7 @@ static_library("misc") {
   sources = [
     "ConfusableIdentifierCheck.cpp",
     "ConstCorrectnessCheck.cpp",
+    "CoroutineHostileRAIICheck.cpp",
     "DefinitionsInHeadersCheck.cpp",
     "HeaderIncludeCycleCheck.cpp",
     "IncludeCleanerCheck.cpp",

From 7dc644fc463a8f42f54d63a99c3a4579df2c3859 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Tue, 17 Oct 2023 12:31:34 -0700
Subject: [PATCH 12/15] [CodeGen] Temporary disable the unreachable

It should be there, but we need all platforms that use stack protectors
to implement it first.
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 6c3e02b2f59405..8e7499ac626a74 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2100,8 +2100,12 @@ class TargetInstrInfo : public MCInstrInfo {
                                   MachineBasicBlock::iterator Iter,
                                   DebugLoc &DL,
                                   bool AllowSideEffects = true) const {
+#if 0
+    // FIXME: This should exist once all platforms that use stack protectors
+    // implements it.
     llvm_unreachable(
         "Target didn't implement TargetInstrInfo::buildClearRegister!");
+#endif
   }
 
   /// Return true if the function can safely be outlined from.

From 389958a9f67ae35dde9c46205bb032842f0cad6a Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Tue, 17 Oct 2023 12:35:30 -0700
Subject: [PATCH 13/15] [CodeGen][NFC] Fix formatting

This fixes the formatting introduced by
fbf0a77e80f18a6d0fd8a28833b0bc87a99b1b2f.
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 7dcf24c26e124a..7f1421549b1492 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9141,9 +9141,7 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
   const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
 
   if (TRI.isGeneralPurposeRegister(MF, Reg)) {
-    BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg)
-      .addImm(0)
-      .addImm(0);
+    BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
   } else if (STI.hasSVE()) {
     BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
       .addImm(0)

From 0996ceece605ccba3f4c0079e0204e3c0b068d0e Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 17 Oct 2023 12:49:17 -0700
Subject: [PATCH 14/15] [ELF][test] Improve relocatable link & /DISCARD/ test

Check that #69295 will fix symbols referenced by relocations that are
defined in discarded sections.
---
 lld/test/ELF/linkerscript/discard-section.s | 22 ++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s
index 9e021ac83f563a..0ede36c7351f29 100644
--- a/lld/test/ELF/linkerscript/discard-section.s
+++ b/lld/test/ELF/linkerscript/discard-section.s
@@ -6,7 +6,27 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o
 # RUN: ld.lld -T a.lds a.o b.o -z undefs -o /dev/null 2>&1 | count 0
 # RUN: ld.lld -T a.lds a.o b.o -o /dev/null 2>&1 | count 0
-# RUN: ld.lld -r -T a.lds a.o b.o -o /dev/null 2>&1 | count 0
+# RUN: ld.lld -r -T a.lds a.o b.o -o a.ro 2>&1 | count 0
+# RUN: llvm-readelf -r -s a.ro | FileCheck %s --check-prefix=RELOC
+
+# RELOC:      Relocation section '.rela.bbb' at offset {{.*}} contains 1 entries:
+# RELOC-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# RELOC-NEXT: 0000000000000000  0000000000000000 R_X86_64_NONE                             0
+# RELOC-EMPTY:
+# RELOC-NEXT: Relocation section '.rela.data' at offset {{.*}} contains 4 entries:
+# RELOC-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# RELOC-NEXT: 0000000000000000  0000000000000001 R_X86_64_64                               0
+# RELOC-NEXT: 0000000000000008  0000000000000001 R_X86_64_64                               0
+# RELOC-NEXT: 0000000000000010  0000000000000001 R_X86_64_64                               0
+# RELOC-NEXT: 0000000000000018  0000000000000001 R_X86_64_64                               0
+
+# RELOC:      Num:    Value          Size Type    Bind   Vis      Ndx Name
+# RELOC-NEXT:   0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
+# RELOC-NEXT:   1: 0000000000000000     0 SECTION LOCAL  DEFAULT    1 .text
+# RELOC-NEXT:   2: 0000000000000000     0 SECTION LOCAL  DEFAULT    2 .bbb
+# RELOC-NEXT:   3: 0000000000000000     0 SECTION LOCAL  DEFAULT    4 .data
+# RELOC-NEXT:   4: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT    1 _start
+# RELOC-EMPTY:
 
 #--- a.s
 .globl _start

From 122064a6303eb9c06e0af231f5a4ce145d9a2e67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 17 Oct 2023 22:49:52 +0300
Subject: [PATCH 15/15] [libcxx] [test] Add a test parameter for disabling
 memory intensive tests (#68214)

Specifically, the test std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
allocates a std::string with INT_MAX-1 elements, and then writes this to
a std::stringstream. On Linux, running this test consumes around 5.0 GB
of memory; on Windows, it ends up using up to 6.8 GB of memory.

This limits whether such tests can run on e.g. GitHub Actions runners,
where the free runners are limited to 8 GB of memory.

This is somewhat similar to, but still notably different, from the
existing test parameter long_tests.
---
 .../stringstream/stringstream.members/gcount.pass.cpp     | 1 +
 libcxx/utils/libcxx/test/params.py                        | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
index 3a5edac6c58b4f..8dc74421e78959 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: 32-bit-pointer
+// REQUIRES: large_tests
 
 // Test that tellp() does not break the stringstream after INT_MAX, due to use
 // of pbump() that accept int.
diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index c3732560f5e469..e34fd0387f4f5b 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -276,6 +276,14 @@ def getStdFlag(cfg, std):
         help="Whether to enable tests that take longer to run. This can be useful when running on a very slow device.",
         actions=lambda enabled: [] if not enabled else [AddFeature("long_tests")],
     ),
+    Parameter(
+        name="large_tests",
+        choices=[True, False],
+        type=bool,
+        default=True,
+        help="Whether to enable tests that use a lot of memory. This can be useful when running on a device with limited amounts of memory.",
+        actions=lambda enabled: [] if not enabled else [AddFeature("large_tests")],
+    ),
     Parameter(
         name="hardening_mode",
         choices=["unchecked", "hardened", "safe", "debug"],