From ddc30ff802eb135934fc7b785d33c05217ab9e39 Mon Sep 17 00:00:00 2001 From: Joseph Huber <35342157+jhuber6@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:02:31 -0400 Subject: [PATCH 01/15] [libc] Implement the 'ungetc' function on the GPU (#69248) Summary: This function follows closely with the pattern of all the other functions. That is, making a new opcode and forwarding the call to the host. However, this also required modifying the test somewhat. It seems that not all `libc` implementations follow the same error rules as are tested here, and it is not explicit in the standard, so we simply disable these EOF checks when targeting the GPU. --- libc/config/gpu/entrypoints.txt | 1 + libc/docs/gpu/support.rst | 1 + libc/include/llvm-libc-types/rpc_opcodes_t.h | 1 + libc/src/stdio/CMakeLists.txt | 13 +-------- libc/src/stdio/generic/CMakeLists.txt | 12 ++++++++ libc/src/stdio/{ => generic}/ungetc.cpp | 0 libc/src/stdio/gpu/CMakeLists.txt | 11 ++++++++ libc/src/stdio/gpu/ungetc.cpp | 29 ++++++++++++++++++++ libc/test/src/stdio/ungetc_test.cpp | 8 ++++++ libc/utils/gpu/server/rpc_server.cpp | 7 +++++ 10 files changed, 71 insertions(+), 12 deletions(-) rename libc/src/stdio/{ => generic}/ungetc.cpp (100%) create mode 100644 libc/src/stdio/gpu/ungetc.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index ad68216a76b942..731508088cb6f8 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -104,6 +104,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fgetc libc.src.stdio.getc libc.src.stdio.getchar + libc.src.stdio.ungetc libc.src.stdio.stdin libc.src.stdio.stdout libc.src.stdio.stderr diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index fd27273ed562e4..806af5f219dfb4 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -134,6 +134,7 @@ ftell |check| |check| fflush |check| |check| fgetc |check| |check| fgets |check| |check| +ungetc |check| |check| getc |check| |check| getchar |check| |check| puts |check| |check| diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h index 61e17756fa6477..2fd318f06a7db1 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -29,6 +29,7 @@ typedef enum { RPC_FSEEK, RPC_FTELL, RPC_FFLUSH, + RPC_UNGETC, RPC_LAST = 0xFFFF, } rpc_opcode_t; diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 169bc592dee488..380474ce271180 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -54,18 +54,6 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) -add_entrypoint_object( - ungetc - SRCS - ungetc.cpp - HDRS - ungetc.h - DEPENDS - libc.include.stdio - libc.src.__support.File.file - libc.src.__support.File.platform_file -) - add_entrypoint_object( fopencookie SRCS @@ -286,6 +274,7 @@ add_stdio_entrypoint_object(getc_unlocked) add_stdio_entrypoint_object(getchar) add_stdio_entrypoint_object(getchar_unlocked) add_stdio_entrypoint_object(fgets) +add_stdio_entrypoint_object(ungetc) add_stdio_entrypoint_object(stdin) add_stdio_entrypoint_object(stdout) add_stdio_entrypoint_object(stderr) diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 282d056bba7129..2ecef879eb4bbf 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -342,6 +342,18 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ../ungetc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/generic/ungetc.cpp similarity index 100% rename from libc/src/stdio/ungetc.cpp rename to libc/src/stdio/generic/ungetc.cpp diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt index 047b68931bce5c..1b1e2a903cc0b9 100644 --- a/libc/src/stdio/gpu/CMakeLists.txt +++ b/libc/src/stdio/gpu/CMakeLists.txt @@ -251,6 +251,17 @@ add_entrypoint_object( .ferror ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ../ungetc.h + DEPENDS + libc.include.stdio + .gpu_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/gpu/ungetc.cpp b/libc/src/stdio/gpu/ungetc.cpp new file mode 100644 index 00000000000000..373164a0c53a32 --- /dev/null +++ b/libc/src/stdio/gpu/ungetc.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of ungetc ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ungetc.h" +#include "file.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) { + int ret; + rpc::Client::Port port = rpc::client.open(); + port.send_and_recv( + [=](rpc::Buffer *buffer) { + buffer->data[0] = c; + buffer->data[1] = file::from_stream(stream); + }, + [&](rpc::Buffer *buffer) { ret = static_cast(buffer->data[0]); }); + port.close(); + return ret; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp index 75eecc87ef265f..c98995ff0811bb 100644 --- a/libc/test/src/stdio/ungetc_test.cpp +++ b/libc/test/src/stdio/ungetc_test.cpp @@ -24,12 +24,16 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { constexpr size_t CONTENT_SIZE = sizeof(CONTENT); ASSERT_EQ(CONTENT_SIZE, LIBC_NAMESPACE::fwrite(CONTENT, 1, CONTENT_SIZE, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // Cannot unget to an un-readable file. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file)); +#endif ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file)); file = LIBC_NAMESPACE::fopen(FILENAME, "r+"); ASSERT_FALSE(file == nullptr); + // Calling with an EOF should always return EOF without doing anything. + ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(EOF, file)); char c; ASSERT_EQ(LIBC_NAMESPACE::fread(&c, 1, 1, file), size_t(1)); ASSERT_EQ(c, CONTENT[0]); @@ -43,8 +47,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { // ungetc should not fail after a seek operation. int unget_char = 'z'; ASSERT_EQ(unget_char, LIBC_NAMESPACE::ungetc(unget_char, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // Another unget should fail. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(unget_char, file)); +#endif // ungetting a char at the beginning of the file will allow us to fetch // one additional character. char new_data[CONTENT_SIZE + 1]; @@ -53,8 +59,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { ASSERT_STREQ("zabcdef", new_data); ASSERT_EQ(size_t(1), LIBC_NAMESPACE::fwrite("x", 1, 1, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // unget should fail after a write operation. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file)); +#endif ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file)); } diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 1c1c9f1ae9e6b5..0550115f7cd1a1 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -186,6 +186,13 @@ struct Server { }); break; } + case RPC_UNGETC: { + port->recv_and_send([](rpc::Buffer *buffer) { + buffer->data[0] = ungetc(static_cast(buffer->data[0]), + file::to_stream(buffer->data[1])); + }); + break; + } case RPC_NOOP: { port->recv([](rpc::Buffer *) {}); break; From b33723710f5194080e8bfab9f21c8445647c976b Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 17 Oct 2023 11:06:11 -0700 Subject: [PATCH 02/15] [NVPTX] Fixed few more corner cases for v4i8 lowering. (#69263) Fixes https://github.com/llvm/llvm-project/issues/69124 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 22 ++- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 6 + llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 3 + llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 6 +- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 154 ++++++++++++++++++ llvm/test/CodeGen/NVPTX/param-load-store.ll | 5 - 6 files changed, 180 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 36da2e7b40efaa..a935c0e16a5523 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -504,13 +504,21 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Only logical ops can be done on v4i8 directly, others must be done // elementwise. setOperationAction( - {ISD::ADD, ISD::MUL, ISD::ABS, ISD::SMIN, - ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::CTPOP, - ISD::CTLZ, ISD::ADD, ISD::SUB, ISD::MUL, - ISD::SHL, ISD::SREM, ISD::UREM, ISD::SDIV, - ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS, - ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, - ISD::UINT_TO_FP}, + {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE, + ISD::BITREVERSE, ISD::CTLZ, ISD::CTPOP, ISD::CTTZ, + ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FSHL, ISD::FSHR, + ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::PARITY, + ISD::ROTL, ISD::ROTR, ISD::SADDO, ISD::SADDO_CARRY, + ISD::SADDSAT, ISD::SDIV, ISD::SDIVREM, ISD::SELECT_CC, + ISD::SETCC, ISD::SHL, ISD::SINT_TO_FP, ISD::SMAX, + ISD::SMIN, ISD::SMULO, ISD::SMUL_LOHI, ISD::SRA, + ISD::SREM, ISD::SRL, ISD::SSHLSAT, ISD::SSUBO, + ISD::SSUBO_CARRY, ISD::SSUBSAT, ISD::SUB, ISD::SUBC, + ISD::SUBE, ISD::UADDO, ISD::UADDO_CARRY, ISD::UADDSAT, + ISD::UDIV, ISD::UDIVREM, ISD::UINT_TO_FP, ISD::UMAX, + ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM, + ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT, + ISD::USUBSAT}, MVT::v4i8, Expand); // Operations not directly supported by NVPTX. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 5c7c10965e2f2c..f6932db2aeb0b9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -586,6 +586,12 @@ class NVPTXTargetLowering : public TargetLowering { AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { + // There's rarely any point of packing something into a vector type if we + // already have the source data. + return true; + } + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 84ed953ad18a9b..b0b96b94a12575 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3485,6 +3485,9 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; +def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; + // Count leading zeros let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 18788c776ffbd7..464b3a754804fe 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1319,10 +1319,8 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]] -; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; -; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; +; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]]; +; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]]; ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; ; CHECK: ret; define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index fd48313ad68484..ddad374a4dc119 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1269,4 +1269,158 @@ define <4 x i8> @test_fptoui_2xhalf_to_2xi8(<4 x half> %a) #0 { ret <4 x i8> %r } +define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<18>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_srem_v4i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_srem_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_srem_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd2]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs2, %r4; +; CHECK-NEXT: rem.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs5, %r7; +; CHECK-NEXT: rem.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.s8.s32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.s8.s32 %rs8, %r11; +; CHECK-NEXT: rem.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.s8.s32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.s8.s32 %rs11, %r15; +; CHECK-NEXT: rem.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.u32 [%rd3], %r17; +; CHECK-NEXT: ret; +entry: + %t57 = load <4 x i8>, ptr %a, align 4 + %t59 = load <4 x i8>, ptr %b, align 4 + %x = srem <4 x i8> %t57, %t59 + store <4 x i8> %x, ptr %c, align 4 + ret void +} + +;; v3i8 lowering, especially for unaligned loads is terrible. We end up doing +;; tons of pointless scalar_to_vector/bitcast/extract_elt on v2i16/v4i8, which +;; is further complicated by LLVM trying to use i16 as an intermediate type, +;; because we don't have i8 registers. It's a mess. +;; Ideally we want to split it into element-wise ops, but legalizer can't handle +;; odd-sized vectors. TL;DR; don't use odd-sized vectors of v8. +define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v3i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<20>; +; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_srem_v3i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_srem_v3i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_srem_v3i8_param_0]; +; CHECK-NEXT: ld.u8 %rs1, [%rd1]; +; CHECK-NEXT: ld.u8 %rs2, [%rd1+1]; +; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; +; CHECK-NEXT: or.b16 %rs4, %rs3, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; +; CHECK-NEXT: ld.s8 %rs5, [%rd1+2]; +; CHECK-NEXT: ld.u8 %rs6, [%rd2]; +; CHECK-NEXT: ld.u8 %rs7, [%rd2+1]; +; CHECK-NEXT: shl.b16 %rs8, %rs7, 8; +; CHECK-NEXT: or.b16 %rs9, %rs8, %rs6; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs9; +; CHECK-NEXT: ld.s8 %rs10, [%rd2+2]; +; CHECK-NEXT: bfe.s32 %r5, %r3, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs11, %r5; +; CHECK-NEXT: bfe.s32 %r6, %r1, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs12, %r6; +; CHECK-NEXT: rem.s16 %rs13, %rs12, %rs11; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs13; +; CHECK-NEXT: bfe.s32 %r8, %r3, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs14, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs15, %r9; +; CHECK-NEXT: rem.s16 %rs16, %rs15, %rs14; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs16; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r7, 8, 8; +; CHECK-NEXT: // implicit-def: %r13 +; CHECK-NEXT: bfi.b32 %r12, %r13, %r11, 16, 8; +; CHECK-NEXT: // implicit-def: %r15 +; CHECK-NEXT: bfi.b32 %r14, %r15, %r12, 24, 8; +; CHECK-NEXT: rem.s16 %rs17, %rs5, %rs10; +; CHECK-NEXT: cvt.u16.u32 %rs18, %r14; +; CHECK-NEXT: st.u8 [%rd3], %rs18; +; CHECK-NEXT: shr.u16 %rs19, %rs18, 8; +; CHECK-NEXT: st.u8 [%rd3+1], %rs19; +; CHECK-NEXT: st.u8 [%rd3+2], %rs17; +; CHECK-NEXT: ret; +entry: + %t57 = load <3 x i8>, ptr %a, align 1 + %t59 = load <3 x i8>, ptr %b, align 1 + %x = srem <3 x i8> %t57, %t59 + store <3 x i8> %x, ptr %c, align 1 + ret void +} + +define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_sext_v4i1_to_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<18>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_sext_v4i1_to_v4i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_sext_v4i1_to_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_sext_v4i1_to_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd2]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r1, 24, 8; +; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3; +; CHECK-NEXT: bfe.s32 %r5, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r1, 16, 8; +; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5; +; CHECK-NEXT: bfe.s32 %r7, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r1, 8, 8; +; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7; +; CHECK-NEXT: bfe.s32 %r9, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 0, 8; +; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9; +; CHECK-NEXT: selp.s32 %r11, -1, 0, %p4; +; CHECK-NEXT: selp.s32 %r12, -1, 0, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.s32 %r14, -1, 0, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.s32 %r16, -1, 0, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.u32 [%rd3], %r17; +; CHECK-NEXT: ret; +entry: + %t1 = load <4 x i8>, ptr %a, align 4 + %t2 = load <4 x i8>, ptr %b, align 4 + %t5 = icmp ugt <4 x i8> %t1, %t2 + %t6 = sext <4 x i1> %t5 to <4 x i8> + store <4 x i8> %t6, ptr %c, align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index b4208c691c91df..c14dc88431d316 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -364,10 +364,6 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; ; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] -; CHECK-DAG: mov.b32 [[R0:%r[0-9]+]], {[[E0]], [[E1]]}; -; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R0]]; -; CHECK-DAG: mov.b32 [[R1:%r[0-9]+]], {[[E2]], [[E3]]}; -; CHECK-DAG: mov.b32 {[[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [[R1]]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; @@ -496,7 +492,6 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], From dd64c82cbc9c69924b5c6df059e5b065fa29d185 Mon Sep 17 00:00:00 2001 From: Haowei Date: Tue, 17 Oct 2023 11:15:46 -0700 Subject: [PATCH 03/15] [unittest] Allow LLVM unit test to run under a wrapper program. (#66821) This patch add CMake option "LLVM_GTEST_RUN_UNDER" to LLVM unittest configuration. When specified, LLVM unittest will be run under the wrapper program specified by this option. This feature can simplify the setup to run LLVM unittest on a target platform that is different than host. --- llvm/CMakeLists.txt | 3 +++ llvm/test/Unit/lit.cfg.py | 6 +++++- llvm/test/Unit/lit.site.cfg.py.in | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index ef2f2146a03644..82d4beea91e346 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1219,6 +1219,9 @@ if( LLVM_INCLUDE_EXAMPLES ) endif() if( LLVM_INCLUDE_TESTS ) + set(LLVM_GTEST_RUN_UNDER + "" CACHE STRING + "Define the wrapper program that LLVM unit tests should be run under.") if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang) include(LLVMExternalProjectUtils) llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite diff --git a/llvm/test/Unit/lit.cfg.py b/llvm/test/Unit/lit.cfg.py index f15c30dbcdb0aa..61296d7ea0032e 100644 --- a/llvm/test/Unit/lit.cfg.py +++ b/llvm/test/Unit/lit.cfg.py @@ -19,7 +19,11 @@ config.test_source_root = config.test_exec_root # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, "Tests") +config.test_format = lit.formats.GoogleTest( + config.llvm_build_mode, + "Tests", + run_under=config.gtest_run_under, +) # Propagate the temp directory. Windows requires this because it uses \Windows\ # if none of these are present. diff --git a/llvm/test/Unit/lit.site.cfg.py.in b/llvm/test/Unit/lit.site.cfg.py.in index 1d7d7658014949..3536a34f796a28 100644 --- a/llvm/test/Unit/lit.site.cfg.py.in +++ b/llvm/test/Unit/lit.site.cfg.py.in @@ -7,6 +7,7 @@ config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@") config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@")) config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@") config.shlibdir = lit_config.substitute(path(r"@SHLIBDIR@")) +config.gtest_run_under = lit_config.substitute(r"@LLVM_GTEST_RUN_UNDER@") # Let the main config do the real work. lit_config.load_config( From fd311126349b8fe1684d62154a9fa5a7bbb0b713 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 17 Oct 2023 19:17:40 +0100 Subject: [PATCH 04/15] [VPlan] Insert Trunc/Exts for reductions directly in VPlan. Update the code to create Trunc/Ext recipes directly in adjustRecipesForReductions instead of fixing it up later in fixReductions. This explicitly models the required conversions and also makes sure they are generated at the right place (instead of after the exit condition), hence the changes in a few tests. --- .../Transforms/Vectorize/LoopVectorize.cpp | 67 ++++++++++--------- .../epilog-vectorization-reductions.ll | 8 +-- .../LoopVectorize/reduction-small-size.ll | 8 +-- .../scalable-reduction-inloop.ll | 8 +-- 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index aa435b0d47aa59..14c5c0d18a4db6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3792,8 +3792,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, State.setDebugLocFrom(I->getDebugLoc()); VPValue *LoopExitInstDef = PhiR->getBackedgeValue(); - // This is the vector-clone of the value that leaves the loop. - Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Before each round, move the insertion point right between // the PHIs and the values we are going to write. @@ -3805,10 +3803,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, State.setDebugLocFrom(LoopExitInst->getDebugLoc()); Type *PhiTy = OrigPhi->getType(); - - VPBasicBlock *LatchVPBB = - PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock(); - BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB]; // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already @@ -3834,23 +3828,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { - assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); - Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - Builder.SetInsertPoint(VectorLoopLatch->getTerminator()); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); - Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) - : Builder.CreateZExt(Trunc, VecTy); - for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users())) - if (U != Trunc) { - U->replaceUsesOfWith(RdxParts[Part], Extnd); - RdxParts[Part] = Extnd; - } - } Builder.SetInsertPoint(LoopMiddleBlock, LoopMiddleBlock->getFirstInsertionPt()); - for (unsigned Part = 0; Part < UF; ++Part) + Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); + for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + } } // Reduce all of the unrolled parts into a single vector. @@ -9155,18 +9138,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( PreviousLink = RedRecipe; } } - - // If tail is folded by masking, introduce selects between the phi - // and the live-out instruction of each reduction, at the beginning of the - // dedicated latch block. - if (CM.foldTailByMasking()) { Builder.setInsertPoint(&*LatchVPBB->begin()); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - VPReductionPHIRecipe *PhiR = dyn_cast(&R); - if (!PhiR || PhiR->isInLoop()) - continue; - const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + VPReductionPHIRecipe *PhiR = dyn_cast(&R); + if (!PhiR || PhiR->isInLoop()) + continue; + + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); + // If tail is folded by masking, introduce selects between the phi + // and the live-out instruction of each reduction, at the beginning of the + // dedicated latch block. + if (CM.foldTailByMasking()) { VPValue *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan); VPValue *Red = PhiR->getBackedgeValue(); @@ -9174,16 +9158,35 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "reduction recipe must be defined before latch"); FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - auto *Select = + Result = PhiTy->isFloatingPointTy() ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Select->insertBefore(&*Builder.getInsertPoint()); + Result->insertBefore(&*Builder.getInsertPoint()); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Select); + PhiR->setOperand(1, Result->getVPSingleValue()); + } + // If the vector reduction can be performed in a smaller type, we truncate + // then extend the loop exit value to enable InstCombine to evaluate the + // entire expression in the smaller type. + Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType(); + if (PhiTy != RdxDesc.getRecurrenceType()) { + assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); + Type *RdxTy = RdxDesc.getRecurrenceType(); + auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, + Result->getVPSingleValue(), RdxTy); + auto *Extnd = + RdxDesc.isSigned() + ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) + : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); + + Trunc->insertAfter(Result); + Extnd->insertAfter(Trunc); + Result->getVPSingleValue()->replaceAllUsesWith(Extnd); + Trunc->setOperand(0, Result->getVPSingleValue()); } } diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 7a3c7d6fbfea71..03903d80cfd6ec 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -207,10 +207,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16> ; CHECK-NEXT: [[TMP8]] = zext <4 x i16> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> @@ -234,10 +234,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], [[TMP17]] -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16> ; CHECK-NEXT: [[TMP21]] = zext <4 x i16> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP21]] to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 837d663f4a9263..a4a075463b1b0b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -22,10 +22,10 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> @@ -99,10 +99,10 @@ define i32 @PR35734(i32 %x, i32 %y) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1> ; CHECK-NEXT: [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 3cc6e5fa7b8d5f..afe16c71f7f9ca 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -17,14 +17,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-NEXT: [[TMP28:%.*]] = add [[TMP14]], [[TMP26]] ; CHECK-NEXT: [[TMP29:%.*]] = add [[TMP15]], [[TMP27]] +; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to +; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to +; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to +; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}} -; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to -; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to -; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to -; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to ; CHECK: middle.block: ; CHECK-NEXT: [[TMP37:%.*]] = trunc [[TMP34]] to ; CHECK-NEXT: [[TMP38:%.*]] = trunc [[TMP36]] to From 71c97c735c10dd8040f721f93a0b7be0cc58d3ef Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:34:06 -0700 Subject: [PATCH 05/15] =?UTF-8?q?[mlir][sparse]=20avoid=20tensor=20to=20me?= =?UTF-8?q?mref=20conversion=20in=20sparse=20tensor=20rewri=E2=80=A6=20(#6?= =?UTF-8?q?9362)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ting rules. --- .../Transforms/SparseTensorRewriting.cpp | 107 +++++-------- .../SparseTensor/convert_sparse2dense.mlir | 35 ++--- .../Dialect/SparseTensor/sparse_concat.mlir | 148 +++++++++--------- 3 files changed, 132 insertions(+), 158 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 1bfee3aa1d7ee8..e50b14975e83d6 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -829,47 +829,40 @@ struct ReshapeRewriter : public OpRewritePattern { } }; +// A trivial wrapper to help generate different operations for dense/sparse +// tensors. struct TensorLike { TensorLike(OpBuilder &builder, Location loc, RankedTensorType rtt, - ValueRange sizes) - : isSparse(rtt.getEncoding() != nullptr) { + ValueRange sizes) { SmallVector dynSzs; getDynamicSizes(rtt, sizes, dynSzs); - if (isSparse) - val = builder.create(loc, rtt, dynSzs); - else - val = allocDenseTensor(builder, loc, rtt, sizes); - }; - - void insertOrStore(OpBuilder &builder, Location loc, Value v, - ValueRange crds) { - if (isSparse) - val = builder.create(loc, v, val, crds); - else - builder.create(loc, v, val, crds); + val = builder.create(loc, rtt, dynSzs); + if (!isSparse()) { + Value c0 = constantZero(builder, loc, rtt.getElementType()); + val = builder.create(loc, c0, val).getResult(0); + } } - Value getSSA() const { - // We don't need to maintain the SSA chain for a memref value. - return isSparse ? val : nullptr; + void insert(OpBuilder &builder, Location loc, Value v, ValueRange crds) { + // TODO: Unify these two. + if (isSparse()) + val = builder.create(loc, v, val, crds); + else + val = builder.create(loc, v, val, crds); } Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const { - if (isSparse) + if (isSparse()) return builder.create(loc, val, true); - return builder.create(loc, rtp, val); + return val; } - void updateSSA(Value v) { - // Dense memref is a non-SSA value. - assert(isSparse); - val = v; + bool isSparse() const { + return getSparseTensorEncoding(val.getType()) != nullptr; } -private: - bool isSparse; - Value val; // either a memref (for dense tensor) or a sparse tensor. + Value val; }; struct ConcatenateRewriter : public OpRewritePattern { @@ -901,14 +894,14 @@ struct ConcatenateRewriter : public OpRewritePattern { TensorLike dstBuf(rewriter, loc, dstTp.getRankedTensorType(), sizes); Value offset = constantIndex(rewriter, loc, 0); - Value iterArg = dstBuf.getSSA(); + Value iterArg = dstBuf.val; ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds a for op for each input tensor to append new values into the // output tensor. foreachOp = rewriter.create( - loc, input, iterArg ? ValueRange{iterArg} : ValueRange{}, + loc, input, iterArg, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { SmallVector dstLcvs(dstTp.getLvlRank()); @@ -920,32 +913,26 @@ struct ConcatenateRewriter : public OpRewritePattern { // FIXME: `toStoredDim` is deprecated dstLcvs[toStoredDim(dstTp.getEncoding(), d)] = crd; } - - if (!reduc.empty()) - dstBuf.updateSSA(reduc.front()); - + // Enters foreach, updates the SSA chain. + dstBuf.val = reduc.front(); if (!dstTp.isAllDense()) { Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, /*else*/ true); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insertOrStore(builder, loc, v, dstLcvs); - builder.create(loc, dstBuf.getSSA()); + dstBuf.insert(builder, loc, v, dstLcvs); + builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); - assert(!reduc.empty()); - dstBuf.updateSSA(ifOp.getResult(0)); + dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insertOrStore(builder, loc, v, dstLcvs); + dstBuf.insert(builder, loc, v, dstLcvs); } - if (reduc.empty()) - builder.create(loc); - else - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -955,15 +942,11 @@ struct ConcatenateRewriter : public OpRewritePattern { offset = rewriter.create( loc, offset, constantIndex(rewriter, loc, *sh)); - if (!foreachOp.getResults().empty()) { - iterArg = foreachOp.getResult(0); - dstBuf.updateSSA(iterArg); - } + iterArg = foreachOp.getResult(0); + dstBuf.val = iterArg; } - if (!foreachOp.getResults().empty()) - dstBuf.updateSSA(iterArg); - + dstBuf.val = iterArg; Value ret = dstBuf.finalize(rewriter, loc, dstTp.getRankedTensorType()); rewriter.replaceOp(op, ret); return success(); @@ -1010,15 +993,12 @@ struct DirectConvertRewriter : public OpRewritePattern { ValueRange vs; TensorLike dstBuf(rewriter, loc, dstStt.getRankedTensorType(), sizes); - Value iterArg = dstBuf.getSSA(); auto foreachOp = rewriter.create( - loc, src, iterArg ? ValueRange{iterArg} : ValueRange{}, foreachOrder, + loc, src, dstBuf.val, foreachOrder, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { // Enters the loop, update the SSA value for insertion chain. - if (!reduc.empty()) - dstBuf.updateSSA(reduc.front()); - + dstBuf.val = reduc.front(); const Dimension dimRank = dstStt.getDimRank(); const Level lvlRank = dstStt.getLvlRank(); SmallVector lcvs(lvlRank); @@ -1028,34 +1008,29 @@ struct DirectConvertRewriter : public OpRewritePattern { } if (!skipZeroCheck) { - assert(!reduc.empty()); Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, /*else*/ true); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insertOrStore(builder, loc, v, lcvs); - builder.create(loc, dstBuf.getSSA()); + dstBuf.insert(builder, loc, v, lcvs); + builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); - dstBuf.updateSSA(ifOp.getResult(0)); + dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insertOrStore(builder, loc, v, lcvs); + dstBuf.insert(builder, loc, v, lcvs); } - if (reduc.empty()) - builder.create(loc); - else - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); }); rewriter.setInsertionPointAfter(foreachOp); // Exits the for loop, links the SSA chain. - if (!foreachOp.getResults().empty()) - dstBuf.updateSSA(foreachOp.getResult(0)); + dstBuf.val = foreachOp.getResult(0); Value ret = dstBuf.finalize(rewriter, loc, dstStt.getRankedTensorType()); rewriter.replaceOp(op, ret); diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir index c22f051a0d5854..e2dcb068e11851 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -14,11 +14,10 @@ // CHECK-LABEL: func.func @sparse_convert_1d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> { %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32> return %0 : tensor<13xi32> @@ -26,11 +25,10 @@ func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13x // CHECK-LABEL: func.func @sparse_convert_1d_dyn // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -38,11 +36,10 @@ func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor< // CHECK-LABEL: func.func @sparse_convert_2d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64> return %0 : tensor<2x4xf64> @@ -50,11 +47,10 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x // CHECK-LABEL: func.func @sparse_convert_2d_dyn // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -62,11 +58,10 @@ func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tens // CHECK-LABEL: func.func @sparse_convert_2d_dyn1 // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x?xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x?xf64, #SparseMatrix> to tensor<2x?xf64> return %0 : tensor<2x?xf64> @@ -74,11 +69,10 @@ func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tens // CHECK-LABEL: func.func @sparse_convert_2d_dyn2 // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -86,11 +80,10 @@ func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tens // CHECK-LABEL: func.func @sparse_convert_3d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64> return %0 : tensor<2x3x4xf64> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir index bdfab54dc6daeb..f3d3dd28563e89 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -176,77 +176,83 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>, return %0 : tensor } -// CHECK-LABEL: @concat_sparse_sparse_dense( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[TMP_c9:.*]] = arith.constant 9 : index -// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[TMP_d0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: %[[A:.*]] = memref.alloc(%[[TMP_c9]], %[[TMP_c4]]) : memref -// CHECK: linalg.fill ins(%[[TMP_d0]] : f64) outs(%[[A]] : memref) -// CHECK: %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_23]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[R:.*]] = bufferization.to_tensor %[[A]] : memref -// CHECK: return %[[R]] : tensor +// CHECK-LABEL: func.func @concat_sparse_sparse_dense( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<2x4xf64, #sparse_tensor +// CHECK-SAME: %[[VAL_1:.*]]: tensor<3x4xf64, #sparse_tensor +// CHECK-SAME: %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 9 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_10:.*]] = bufferization.alloc_tensor(%[[VAL_4]], %[[VAL_3]]) : tensor +// CHECK: %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_6]] : f64) outs(%[[VAL_10]] : tensor) -> tensor +// CHECK: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_8]] iter_args(%[[VAL_21:.*]] = %[[VAL_11]]) -> (tensor) { +// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_20]], %[[VAL_8]] : index +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]]] : memref +// CHECK: %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_8]] iter_args(%[[VAL_28:.*]] = %[[VAL_21]]) -> (tensor) { +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_31:.*]] = tensor.insert %[[VAL_30]] into %[[VAL_28]]{{\[}}%[[VAL_22]], %[[VAL_29]]] : tensor +// CHECK: scf.yield %[[VAL_31]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_26]] : tensor +// CHECK: } +// CHECK: %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_39:.*]] = scf.for %[[VAL_40:.*]] = %[[VAL_37]] to %[[VAL_38]] step %[[VAL_8]] iter_args(%[[VAL_41:.*]] = %[[VAL_19]]) -> (tensor) { +// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_40]]] : memref +// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_40]]] : memref +// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_40]], %[[VAL_8]] : index +// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_44]]] : memref +// CHECK: %[[VAL_46:.*]] = scf.for %[[VAL_47:.*]] = %[[VAL_43]] to %[[VAL_45]] step %[[VAL_8]] iter_args(%[[VAL_48:.*]] = %[[VAL_41]]) -> (tensor) { +// CHECK: %[[VAL_49:.*]] = memref.load %[[VAL_35]]{{\[}}%[[VAL_47]]] : memref +// CHECK: %[[VAL_50:.*]] = memref.load %[[VAL_36]]{{\[}}%[[VAL_47]]] : memref +// CHECK: %[[VAL_51:.*]] = arith.addi %[[VAL_42]], %[[VAL_9]] : index +// CHECK: %[[VAL_52:.*]] = tensor.insert %[[VAL_50]] into %[[VAL_48]]{{\[}}%[[VAL_51]], %[[VAL_49]]] : tensor +// CHECK: scf.yield %[[VAL_52]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_46]] : tensor +// CHECK: } +// CHECK: %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_58]] to %[[VAL_59]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_39]]) -> (tensor) { +// CHECK: %[[VAL_63:.*]] = memref.load %[[VAL_54]]{{\[}}%[[VAL_61]]] : memref +// CHECK: %[[VAL_64:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_61]]] : memref +// CHECK: %[[VAL_65:.*]] = arith.addi %[[VAL_61]], %[[VAL_8]] : index +// CHECK: %[[VAL_66:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_65]]] : memref +// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_8]] iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (tensor) { +// CHECK: %[[VAL_70:.*]] = memref.load %[[VAL_56]]{{\[}}%[[VAL_68]]] : memref +// CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_57]]{{\[}}%[[VAL_68]]] : memref +// CHECK: %[[VAL_72:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index +// CHECK: %[[VAL_73:.*]] = tensor.insert %[[VAL_71]] into %[[VAL_69]]{{\[}}%[[VAL_72]], %[[VAL_70]]] : tensor +// CHECK: scf.yield %[[VAL_73]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_67]] : tensor +// CHECK: } +// CHECK: return %[[VAL_60]] : tensor +// CHECK: } func.func @concat_sparse_sparse_dense(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, %arg2: tensor<4x4xf64, #DCSR>) From 31512811b8c0f8fd328fba585640992c39218f1e Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 17 Oct 2023 20:46:01 +0200 Subject: [PATCH 06/15] [clang-tidy] Add check to diagnose coroutine-hostile RAII objects (#68738) This check detects **hostile-RAII** objects which should not **persist across a suspension point in a coroutine**. Some objects require that they be destroyed on the same thread that created them. Traditionally this requirement was often phrased as "must be a local variable", under the assumption that local variables always work this way. However this is incorrect with **C++20 coroutines**, since an intervening `co_await` may cause the coroutine to suspend and later be resumed on another thread. The lifetime of an object that requires being destroyed on the same thread must not encompass a `co_await` or `co_yield` point. If you create/destroy an object, you must do so without allowing the coroutine to suspend in the meantime. The check considers the following type as hostile: - **Scoped-lockable types**: A scoped-lockable object persisting across a suspension point is problematic as the lock held by this object could be unlocked by a different thread. This would be undefined behaviour. - Types belonging to a configurable **denylist**. ```cpp // Call some async API while holding a lock. const my::MutexLock l(&mu_); // Oops! The async Bar function may finish on a different // thread from the one that created the MutexLock object and therefore called // Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread. co_await Bar(); ``` --- .../clang-tidy/misc/CMakeLists.txt | 1 + .../misc/CoroutineHostileRAIICheck.cpp | 98 +++++++++ .../misc/CoroutineHostileRAIICheck.h | 50 +++++ .../clang-tidy/misc/MiscTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 7 + .../docs/clang-tidy/checks/list.rst | 1 + .../checks/misc/coroutine-hostile-raii.rst | 50 +++++ .../checkers/misc/coroutine-hostile-raii.cpp | 192 ++++++++++++++++++ 8 files changed, 402 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp create mode 100644 clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt index 2e88e68a544782..d9ec268650c053 100644 --- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt @@ -18,6 +18,7 @@ add_custom_target(genconfusable DEPENDS Confusables.inc) add_clang_library(clangTidyMiscModule ConstCorrectnessCheck.cpp + CoroutineHostileRAIICheck.cpp DefinitionsInHeadersCheck.cpp ConfusableIdentifierCheck.cpp HeaderIncludeCycleCheck.cpp diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp new file mode 100644 index 00000000000000..e820cd39d83d21 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp @@ -0,0 +1,98 @@ +//===--- CoroutineHostileRAII.cpp - clang-tidy ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CoroutineHostileRAIICheck.h" +#include "../utils/OptionsUtils.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Basic/AttrKinds.h" +#include "clang/Basic/DiagnosticIDs.h" + +using namespace clang::ast_matchers; +namespace clang::tidy::misc { +namespace { +using clang::ast_matchers::internal::BoundNodesTreeBuilder; + +AST_MATCHER_P(Stmt, forEachPrevStmt, ast_matchers::internal::Matcher, + InnerMatcher) { + DynTypedNode P; + bool IsHostile = false; + for (const Stmt *Child = &Node; Child; Child = P.get()) { + auto Parents = Finder->getASTContext().getParents(*Child); + if (Parents.empty()) + break; + P = *Parents.begin(); + auto *PCS = P.get(); + if (!PCS) + continue; + for (const auto &Sibling : PCS->children()) { + // Child contains suspension. Siblings after Child do not persist across + // this suspension. + if (Sibling == Child) + break; + // In case of a match, add the bindings as a separate match. Also don't + // clear the bindings if a match is not found (unlike Matcher::matches). + BoundNodesTreeBuilder SiblingBuilder; + if (InnerMatcher.matches(*Sibling, Finder, &SiblingBuilder)) { + Builder->addMatch(SiblingBuilder); + IsHostile = true; + } + } + } + return IsHostile; +} +} // namespace + +CoroutineHostileRAIICheck::CoroutineHostileRAIICheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + RAIITypesList(utils::options::parseStringList( + Options.get("RAIITypesList", "std::lock_guard;std::scoped_lock"))) {} + +void CoroutineHostileRAIICheck::registerMatchers(MatchFinder *Finder) { + // A suspension happens with co_await or co_yield. + auto ScopedLockable = varDecl(hasType(hasCanonicalType(hasDeclaration( + hasAttr(attr::Kind::ScopedLockable))))) + .bind("scoped-lockable"); + auto OtherRAII = varDecl(hasType(hasCanonicalType(hasDeclaration( + namedDecl(hasAnyName(RAIITypesList)))))) + .bind("raii"); + Finder->addMatcher(expr(anyOf(coawaitExpr(), coyieldExpr()), + forEachPrevStmt(declStmt(forEach( + varDecl(anyOf(ScopedLockable, OtherRAII)))))) + .bind("suspension"), + this); +} + +void CoroutineHostileRAIICheck::check(const MatchFinder::MatchResult &Result) { + if (const auto *VD = Result.Nodes.getNodeAs("scoped-lockable")) + diag(VD->getLocation(), + "%0 holds a lock across a suspension point of coroutine and could be " + "unlocked by a different thread") + << VD; + if (const auto *VD = Result.Nodes.getNodeAs("raii")) + diag(VD->getLocation(), + "%0 persists across a suspension point of coroutine") + << VD; + if (const auto *Suspension = Result.Nodes.getNodeAs("suspension")) + diag(Suspension->getBeginLoc(), "suspension point is here", + DiagnosticIDs::Note); +} + +void CoroutineHostileRAIICheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "RAIITypesList", + utils::options::serializeStringList(RAIITypesList)); +} +} // namespace clang::tidy::misc diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h new file mode 100644 index 00000000000000..a5e9cb89ef6769 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h @@ -0,0 +1,50 @@ +//===--- CoroutineHostileRAIICheck.h - clang-tidy ----------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H + +#include "../ClangTidyCheck.h" +#include "clang/AST/ASTTypeTraits.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang::tidy::misc { + +/// Detects when objects of certain hostile RAII types persists across +/// suspension points in a coroutine. Such hostile types include scoped-lockable +/// types and types belonging to a configurable denylist. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/misc/coroutine-hostile-raii.html +class CoroutineHostileRAIICheck : public ClangTidyCheck { +public: + CoroutineHostileRAIICheck(llvm::StringRef Name, ClangTidyContext *Context); + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus20; + } + + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + + std::optional getCheckTraversalKind() const override { + return TK_AsIs; + } + +private: + // List of fully qualified types which should not persist across a suspension + // point in a coroutine. + std::vector RAIITypesList; +}; + +} // namespace clang::tidy::misc + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp index 92590506e1ec1e..d8a88324ee63e0 100644 --- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp @@ -11,6 +11,7 @@ #include "../ClangTidyModuleRegistry.h" #include "ConfusableIdentifierCheck.h" #include "ConstCorrectnessCheck.h" +#include "CoroutineHostileRAIICheck.h" #include "DefinitionsInHeadersCheck.h" #include "HeaderIncludeCycleCheck.h" #include "IncludeCleanerCheck.h" @@ -41,6 +42,8 @@ class MiscModule : public ClangTidyModule { "misc-confusable-identifiers"); CheckFactories.registerCheck( "misc-const-correctness"); + CheckFactories.registerCheck( + "misc-coroutine-hostile-raii"); CheckFactories.registerCheck( "misc-definitions-in-headers"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index af164d0462d52c..3e1fbe091c9ff6 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -163,6 +163,13 @@ New checks Flags coroutines that suspend while a lock guard is in scope at the suspension point. +- New :doc:`misc-coroutine-hostile-raii + ` check. + + Detects when objects of certain hostile RAII types persists across suspension + points in a coroutine. Such hostile types include scoped-lockable types and + types belonging to a configurable denylist. + - New :doc:`modernize-use-constraints ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 2125ebd7a213c1..819e3974e3f133 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -241,6 +241,7 @@ Clang-Tidy Checks :doc:`llvmlibc-restrict-system-libc-headers `, "Yes" :doc:`misc-confusable-identifiers `, :doc:`misc-const-correctness `, "Yes" + :doc:`misc-coroutine-hostile-raii `_, :doc:`misc-definitions-in-headers `, "Yes" :doc:`misc-header-include-cycle `, :doc:`misc-include-cleaner `, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst new file mode 100644 index 00000000000000..dcb9f399774cba --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst @@ -0,0 +1,50 @@ +.. title:: clang-tidy - misc-coroutine-hostile-raii + +misc-coroutine-hostile-raii +==================== + +Detects when objects of certain hostile RAII types persists across suspension +points in a coroutine. Such hostile types include scoped-lockable types and +types belonging to a configurable denylist. + +Some objects require that they be destroyed on the same thread that created them. +Traditionally this requirement was often phrased as "must be a local variable", +under the assumption that local variables always work this way. However this is +incorrect with C++20 coroutines, since an intervening ``co_await`` may cause the +coroutine to suspend and later be resumed on another thread. + +The lifetime of an object that requires being destroyed on the same thread must +not encompass a ``co_await`` or ``co_yield`` point. If you create/destroy an object, +you must do so without allowing the coroutine to suspend in the meantime. + +Following types are considered as hostile: + + - Scoped-lockable types: A scoped-lockable object persisting across a suspension + point is problematic as the lock held by this object could be unlocked by a + different thread. This would be undefined behaviour. + This includes all types annotated with the ``scoped_lockable`` attribute. + + - Types belonging to a configurable denylist. + +.. code-block:: c++ + + // Call some async API while holding a lock. + { + const my::MutexLock l(&mu_); + + // Oops! The async Bar function may finish on a different + // thread from the one that created the MutexLock object and therefore called + // Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread. + co_await Bar(); + } + + +Options +------- + +.. option:: RAIITypesList + + A semicolon-separated list of qualified types which should not be allowed to + persist across suspension points. + Eg: ``my::lockable; a::b;::my::other::lockable;`` + The default value of this option is `"std::lock_guard;std::scoped_lock"`. \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp new file mode 100644 index 00000000000000..2d022e21c85d56 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp @@ -0,0 +1,192 @@ +// RUN: %check_clang_tidy -std=c++20 %s misc-coroutine-hostile-raii %t \ +// RUN: -config="{CheckOptions: \ +// RUN: {misc-coroutine-hostile-raii.RAIITypesList: \ +// RUN: 'my::Mutex; ::my::other::Mutex'}}" + +namespace std { + +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + void operator()() { resume(); } + void *address() const noexcept { return ptr; } + void resume() const { } + void destroy() const { } + bool done() const { return true; } + coroutine_handle &operator=(decltype(nullptr)) { + ptr = nullptr; + return *this; + } + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} + // void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const { return ptr; } + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + + Promise &promise() const { + return *reinterpret_cast( + __builtin_coro_promise(ptr, alignof(Promise), false)); + } + static coroutine_handle from_promise(Promise &promise) { + coroutine_handle p; + p.ptr = __builtin_coro_promise(&promise, alignof(Promise), true); + return p; + } +}; + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(std::coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +} // namespace std + +struct ReturnObject { + struct promise_type { + ReturnObject get_return_object() { return {}; } + std::suspend_always initial_suspend() { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + void unhandled_exception() {} + std::suspend_always yield_value(int value) { return {}; } + }; +}; + +#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) + +namespace absl { +class SCOPED_LOCKABLE Mutex {}; +using Mutex2 = Mutex; +} // namespace absl + +ReturnObject BasicWarning() { + absl::Mutex mtx; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'mtx' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + int no_warning; + { + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + } +} + +ReturnObject BasicNoWarning() { + co_yield 1; + { absl::Mutex no_warning; } + int no_warning; + { + co_yield 1; + absl::Mutex no_warning; + } + co_yield 1; +} + +ReturnObject scopedLockableTest() { + co_yield 0; + absl::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + absl::Mutex2 b; + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 'b' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + { + absl::Mutex no_warning_1; + { absl::Mutex no_warning_2; } + } + + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + absl::Mutex c; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'c' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_await std::suspend_always{}; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + for(int i=1; i<=10; ++i ) { + absl::Mutex d; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'd' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_await std::suspend_always{}; + // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here + co_yield 1; + absl::Mutex no_warning_3; + } + if (true) { + absl::Mutex e; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'e' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here + absl::Mutex no_warning_4; + } + absl::Mutex no_warning_5; +} + +void lambda() { + absl::Mutex no_warning; + auto lambda = []() -> ReturnObject { + co_await std::suspend_always{}; + absl::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + co_await std::suspend_always{}; + co_yield 1; + }; + absl::Mutex no_warning_2; +} + +template +ReturnObject raii_in_template(){ + T a; + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:3: note: suspension point is here +} +void foo_template() { raii_in_template(); } + +namespace my { +class Mutex{}; +namespace other { +class Mutex{}; +} // namespace other + +using Mutex2 = Mutex; +} // namespace my + +ReturnObject denyListTest() { + my::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'a' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + my::other::Mutex b; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 'b' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + my::Mutex2 c; + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: 'c' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here +} + +ReturnObject referenceTest(my::Mutex& ref) { + my::Mutex& a = ref; + co_yield 1; +} +ReturnObject pointerTest(my::Mutex* ref) { + my::Mutex* a = ref; + co_yield 1; +} + +ReturnObject functionArgTest(my::Mutex ref) { + co_yield 1; +} From e6d0b126c824222fca2f31a2ba571c2ee2bb4760 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 17 Oct 2023 20:53:42 +0200 Subject: [PATCH 07/15] Correctly compute conversion seq for args to fn with reversed param order (#68999) We associated conversion seq for args (when reversed) to the wrong index. This lead to clang believing reversed `operator==` a worse overload candidate than the `operator==` without reversed args when both these candidate were ambiguous. Fixes https://github.com/llvm/llvm-project/issues/53954 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaOverload.cpp | 2 +- .../over.match.oper/p3-2a.cpp | 35 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 81cbfd90155fe0..443325bb0d1e17 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -117,6 +117,8 @@ C++ Language Changes C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ +- Fix a bug in conversion sequence of arguments to a function with reversed parameter order. + Fixes `GH `_. C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index ce78994e655381..c271cebb9eb638 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7688,7 +7688,7 @@ bool Sema::CheckNonDependentConversions( QualType ParamType = ParamTypes[I + Offset]; if (!ParamType->isDependentType()) { unsigned ConvIdx = PO == OverloadCandidateParamOrder::Reversed - ? 0 + ? Args.size() - 1 - (ThisConversions + I) : (ThisConversions + I); Conversions[ConvIdx] = TryCopyInitialization(*this, Args[I], ParamType, diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp index 5c6804eb7726b5..02fe37dc1be505 100644 --- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp +++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp @@ -324,6 +324,41 @@ bool x = X() == X(); // expected-warning {{ambiguous}} } } // namespace P2468R2 +namespace GH53954{ +namespace test1 { +struct P { + template + friend bool operator==(const P&, const T&); // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} + +namespace test2 { +struct P { + template + friend bool operator==(const T&, const P&); // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} + +namespace test3 { +struct P { + template + bool operator==(const S &) const; // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} +} + #else // NO_ERRORS namespace problem_cases { From fbf0a77e80f18a6d0fd8a28833b0bc87a99b1b2f Mon Sep 17 00:00:00 2001 From: Bill Wendling <5993918+bwendling@users.noreply.github.com> Date: Tue, 17 Oct 2023 12:03:26 -0700 Subject: [PATCH 08/15] [CodeGen] Avoid potential sideeffects from XOR (#67193) XOR may change flag values (e.g. for X86 gprs). In the case where that's not desirable, specify that buildClearRegister() should use MOV instead. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 +++-- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 6 ++-- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 +-- llvm/lib/Target/X86/X86InstrInfo.cpp | 33 ++++++++++++++------ llvm/lib/Target/X86/X86InstrInfo.h | 4 +-- 5 files changed, 36 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 14e27abe882b03..6c3e02b2f59405 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2093,10 +2093,13 @@ class TargetInstrInfo : public MCInstrInfo { "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } - /// Insert an architecture-specific instruction to clear a register. + /// Insert an architecture-specific instruction to clear a register. If you + /// need to avoid sideeffects (e.g. avoid XOR on x86, which sets EFLAGS), set + /// \p AllowSideEffects to \p false. virtual void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects = true) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::buildClearRegister!"); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 05c79b610cb36c..7dcf24c26e124a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9134,13 +9134,15 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects) const { const MachineFunction &MF = *MBB.getParent(); const AArch64Subtarget &STI = MF.getSubtarget(); const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(AArch64::MOVi64imm), Reg) + BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg) + .addImm(0) .addImm(0); } else if (STI.hasSVE()) { BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 4a40b2fa122159..a934103c90cbf9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -333,8 +333,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; void buildClearRegister(Register Reg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator Iter, - DebugLoc &DL) const override; + MachineBasicBlock::iterator Iter, DebugLoc &DL, + bool AllowSideEffects = true) const override; /// Returns the vector element size (B, H, S or D) of an SVE opcode. uint64_t getElementSizeForOpcode(unsigned Opc) const; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f0c46419ab3516..4c6854da0ada3d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10130,27 +10130,36 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, return It; } -void X86InstrInfo::buildClearRegister(Register Reg, - MachineBasicBlock &MBB, +void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects) const { const MachineFunction &MF = *MBB.getParent(); const X86Subtarget &ST = MF.getSubtarget(); const TargetRegisterInfo &TRI = getRegisterInfo(); if (ST.hasMMX() && X86::VR64RegClass.contains(Reg)) - // FIXME: Ignore MMX registers? + // FIXME: Should we ignore MMX registers? return; if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the + // upper bits of a 64-bit register automagically. + Reg = getX86SubSuperRegister(Reg, 32); + + if (!AllowSideEffects) + // XOR affects flags, so use a MOV instead. + BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0); + else + BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); } else if (X86::VR128RegClass.contains(Reg)) { // XMM# if (!ST.hasSSE1()) return; + // PXOR is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10159,6 +10168,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasAVX()) return; + // VPXOR is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10167,6 +10177,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasAVX512()) return; + // VPXORY is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10178,9 +10189,11 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasVLX()) return; - BuildMI(MBB, Iter, DL, get(ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + // KXOR is safe to use because it doesn't affect flags. + unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr; + BuildMI(MBB, Iter, DL, get(Op), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 4d261a803421c1..e1199e20c318e2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -583,8 +583,8 @@ class X86InstrInfo final : public X86GenInstrInfo { outliner::Candidate &C) const override; void buildClearRegister(Register Reg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator Iter, - DebugLoc &DL) const override; + MachineBasicBlock::iterator Iter, DebugLoc &DL, + bool AllowSideEffects = true) const override; bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; From ab91e05e48d9ea47b60858dc259bdbf00dfde7fa Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Oct 2023 12:16:45 -0700 Subject: [PATCH 09/15] [mlgo] Fix tests post 760e7d0 --- .../MLRegAlloc/Inputs/reference-log-noml.txt | 72 +++++++++---------- .../Inputs/reference-prio-log-noml.txt | 12 ++-- .../MLRegAlloc/dev-mode-prio-logging.ll | 2 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt index 0c024ad2b2e1bf..a5ccdde751ed56 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt @@ -16,8 +16,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,0.07943800836801529,0.07943800836801529,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7940031290054321,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.01417447254061699,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -40,8 +40,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2404157966375351,0.08013860136270523,0.0,0.08013860136270523,1.0,0.07131929695606232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08013860136270523 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.0,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7940031290054321 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.0,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01417447254061699 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.0,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7939082384109497 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.0,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014218696393072605 max_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -64,8 +64,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,0.02235277369618416,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.7908878326416016,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6725077629089355 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.014231426641345024,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4858442544937134 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.7907436490058899,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6831487417221069 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.014276761561632156,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47955840826034546 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -88,8 +88,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.0,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.0,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,1.0,0.0,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07943800836801529 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.0,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7908878326416016 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.0,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014231426641345024 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.0,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7907436490058899 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.0,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014276761561632156 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -112,8 +112,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01117638684809208 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6693925261497498 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00449750293046236 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6799841523170471 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004439314361661673 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -136,8 +136,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01822916604578495 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6662772297859192 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008109557442367077 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6768196225166321 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008004635572433472 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -160,8 +160,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688586473465,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -184,8 +184,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.2724486181679993e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.0,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9591121673583984 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01772311143577099 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9647942781448364 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.0,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017619721591472626 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -208,8 +208,8 @@ hint_weights_by_max: 1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853, start_bb_freq_by_max: 0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5110765099525452 hottest_bb_freq_by_max: 0.2813863754272461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.27892643213272095 -liverange_size: 0.7352024912834167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.46105918288230896 -use_def_density: 0.42606985569000244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05081497132778168,0.07567594200372696,0.48368439078330994,0.9955543875694275,0.07338171452283859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +liverange_size: 0.7436708807945251,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4683544337749481 +use_def_density: 0.4243086874485016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99146968126297 max_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -232,8 +232,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.07601386308670044,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -256,8 +256,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204,0.0,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018,0.0,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334,0.0,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044,0.0,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217,0.0,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769,0.0,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038 max_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -280,8 +280,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.35764437913894653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.482131917196483e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2819080352783203 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2828805446624756 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -304,8 +304,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.5236390233039856,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3283064365386963e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.0,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01989283785223961 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.0,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.42606985569000244,0.0,0.07567594200372696,1.0,0.48368439078330994,0.9955543875694275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07338171452283859 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.0,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016,0.0,0.07503040134906769,0.99146968126297,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07275574654340744 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -328,8 +328,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9982500076293 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6522180438041687,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015647225081920624 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8219112157821655,0.5714285969734192,0.8334941864013672,0.9145752787590027,0.31660231947898865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9652509689331055 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016097404062747955 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8283073902130127,0.575875461101532,0.8399805426597595,0.9100194573402405,0.3190661370754242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9688715934753418 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016164302825927734 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.2222222238779068 @@ -352,8 +352,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.9 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8797762989997864,0.7935694456100464,0.5517241358757019,0.8047530055046082,0.8830382227897644,0.30568498373031616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008228360675275326 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8826290965080261,0.7995305061340332,0.5558685660362244,0.8107981085777283,0.8784037828445435,0.3079812228679657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00826177466660738 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1944444477558136 @@ -376,8 +376,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9434669613838196, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,1.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,1.0,1.482131917196483e-10,0.3333333432674408,0.6365708112716675,0.6365708112716675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8342907428741455,0.2898806929588318,1.0,0.5231993198394775,0.7631462812423706,0.8373839855194092,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9253203868865967 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8337028622627258,0.290909081697464,1.0,0.5250554084777832,0.7658536434173584,0.8297117352485657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9250554442405701 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1388888955116272 @@ -400,8 +400,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9839805960655212 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006958406884223223 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9837278127670288 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007072303909808397 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1111111119389534 @@ -424,8 +424,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9849514365196228 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006951410323381424 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9847140312194824 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007065076380968094 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.0833333358168602 diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt index beb0c5205979c0..01b4a3835c978c 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt @@ -171,7 +171,7 @@ observation: 28 li_size: 0 stage: 0 weight: 0.0 -priority: 2147485184.0 +priority: 2147484928.0 reward: 0.0 observation: 29 li_size: 0 @@ -237,7 +237,7 @@ observation: 39 li_size: 0 stage: 0 weight: 0.0 -priority: 3598.0 +priority: 3534.0 reward: 0.0 observation: 40 li_size: 0 @@ -249,7 +249,7 @@ observation: 41 li_size: 0 stage: 0 weight: 0.0 -priority: 3582.0 +priority: 3518.0 reward: 0.0 observation: 42 li_size: 0 @@ -273,7 +273,7 @@ observation: 45 li_size: 0 stage: 0 weight: 0.0 -priority: 4078.0 +priority: 4046.0 reward: 0.0 observation: 46 li_size: 0 @@ -291,7 +291,7 @@ observation: 48 li_size: 0 stage: 0 weight: 0.0 -priority: 4384.0 +priority: 4304.0 reward: 0.0 observation: 49 li_size: 0 @@ -309,7 +309,7 @@ observation: 51 li_size: 0 stage: 0 weight: 0.0 -priority: 2684358144.0 +priority: 2684357888.0 reward: 0.0 observation: 52 li_size: 0 diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll index 21bb75278874a5..6b013b55df77ad 100644 --- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll +++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll @@ -24,5 +24,5 @@ ; CHECK-NOT: nan ; CHECK-LABEL: priority: ; NOML-SAME: 2684358144.0 -; ML-SAME: 3599 +; ML-SAME: 3535 ; CHECK-LABEL: reward: From f781508d319438d2c6d6bb264328b018c15b6946 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Oct 2023 15:21:06 -0400 Subject: [PATCH 10/15] [gn] port dd64c82cbc9c6 --- llvm/utils/gn/secondary/llvm/test/BUILD.gn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index f859af249faf5f..dd9fd0c10d53ef 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -222,7 +222,10 @@ write_lit_config("lit_site_cfg") { write_lit_config("lit_unit_site_cfg") { input = "//llvm/test/Unit/lit.site.cfg.py.in" output = llvm_lit_unit_site_cfg_file - extra_values = [ "LLVM_BUILD_MODE=." ] + extra_values = [ + "LLVM_BUILD_MODE=.", + "LLVM_GTEST_RUN_UNDER=", + ] } # This target should contain all dependencies of check-llvm. From c0f3478934bec4a585cd1ed973a0ee39e0ceb7be Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 17 Oct 2023 19:22:25 +0000 Subject: [PATCH 11/15] [gn build] Port 31512811b8c0 --- .../gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn index 8a811bc990d41b..36957f502c3231 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn @@ -34,6 +34,7 @@ static_library("misc") { sources = [ "ConfusableIdentifierCheck.cpp", "ConstCorrectnessCheck.cpp", + "CoroutineHostileRAIICheck.cpp", "DefinitionsInHeadersCheck.cpp", "HeaderIncludeCycleCheck.cpp", "IncludeCleanerCheck.cpp", From 7dc644fc463a8f42f54d63a99c3a4579df2c3859 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 17 Oct 2023 12:31:34 -0700 Subject: [PATCH 12/15] [CodeGen] Temporary disable the unreachable It should be there, but we need all platforms that use stack protectors to implement it first. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 6c3e02b2f59405..8e7499ac626a74 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2100,8 +2100,12 @@ class TargetInstrInfo : public MCInstrInfo { MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects = true) const { +#if 0 + // FIXME: This should exist once all platforms that use stack protectors + // implements it. llvm_unreachable( "Target didn't implement TargetInstrInfo::buildClearRegister!"); +#endif } /// Return true if the function can safely be outlined from. From 389958a9f67ae35dde9c46205bb032842f0cad6a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 17 Oct 2023 12:35:30 -0700 Subject: [PATCH 13/15] [CodeGen][NFC] Fix formatting This fixes the formatting introduced by fbf0a77e80f18a6d0fd8a28833b0bc87a99b1b2f. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 7dcf24c26e124a..7f1421549b1492 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9141,9 +9141,7 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg) - .addImm(0) - .addImm(0); + BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0); } else if (STI.hasSVE()) { BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) .addImm(0) From 0996ceece605ccba3f4c0079e0204e3c0b068d0e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 17 Oct 2023 12:49:17 -0700 Subject: [PATCH 14/15] [ELF][test] Improve relocatable link & /DISCARD/ test Check that #69295 will fix symbols referenced by relocations that are defined in discarded sections. --- lld/test/ELF/linkerscript/discard-section.s | 22 ++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s index 9e021ac83f563a..0ede36c7351f29 100644 --- a/lld/test/ELF/linkerscript/discard-section.s +++ b/lld/test/ELF/linkerscript/discard-section.s @@ -6,7 +6,27 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o # RUN: ld.lld -T a.lds a.o b.o -z undefs -o /dev/null 2>&1 | count 0 # RUN: ld.lld -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 -# RUN: ld.lld -r -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -r -T a.lds a.o b.o -o a.ro 2>&1 | count 0 +# RUN: llvm-readelf -r -s a.ro | FileCheck %s --check-prefix=RELOC + +# RELOC: Relocation section '.rela.bbb' at offset {{.*}} contains 1 entries: +# RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# RELOC-NEXT: 0000000000000000 0000000000000000 R_X86_64_NONE 0 +# RELOC-EMPTY: +# RELOC-NEXT: Relocation section '.rela.data' at offset {{.*}} contains 4 entries: +# RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# RELOC-NEXT: 0000000000000000 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000008 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000010 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000018 0000000000000001 R_X86_64_64 0 + +# RELOC: Num: Value Size Type Bind Vis Ndx Name +# RELOC-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# RELOC-NEXT: 1: 0000000000000000 0 SECTION LOCAL DEFAULT 1 .text +# RELOC-NEXT: 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 .bbb +# RELOC-NEXT: 3: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .data +# RELOC-NEXT: 4: 0000000000000000 0 NOTYPE GLOBAL DEFAULT 1 _start +# RELOC-EMPTY: #--- a.s .globl _start From 122064a6303eb9c06e0af231f5a4ce145d9a2e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 17 Oct 2023 22:49:52 +0300 Subject: [PATCH 15/15] [libcxx] [test] Add a test parameter for disabling memory intensive tests (#68214) Specifically, the test std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp allocates a std::string with INT_MAX-1 elements, and then writes this to a std::stringstream. On Linux, running this test consumes around 5.0 GB of memory; on Windows, it ends up using up to 6.8 GB of memory. This limits whether such tests can run on e.g. GitHub Actions runners, where the free runners are limited to 8 GB of memory. This is somewhat similar to, but still notably different, from the existing test parameter long_tests. --- .../stringstream/stringstream.members/gcount.pass.cpp | 1 + libcxx/utils/libcxx/test/params.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp index 3a5edac6c58b4f..8dc74421e78959 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: 32-bit-pointer +// REQUIRES: large_tests // Test that tellp() does not break the stringstream after INT_MAX, due to use // of pbump() that accept int. diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index c3732560f5e469..e34fd0387f4f5b 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -276,6 +276,14 @@ def getStdFlag(cfg, std): help="Whether to enable tests that take longer to run. This can be useful when running on a very slow device.", actions=lambda enabled: [] if not enabled else [AddFeature("long_tests")], ), + Parameter( + name="large_tests", + choices=[True, False], + type=bool, + default=True, + help="Whether to enable tests that use a lot of memory. This can be useful when running on a device with limited amounts of memory.", + actions=lambda enabled: [] if not enabled else [AddFeature("large_tests")], + ), Parameter( name="hardening_mode", choices=["unchecked", "hardened", "safe", "debug"],