diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt index 2e88e68a544782..d9ec268650c053 100644 --- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt @@ -18,6 +18,7 @@ add_custom_target(genconfusable DEPENDS Confusables.inc) add_clang_library(clangTidyMiscModule ConstCorrectnessCheck.cpp + CoroutineHostileRAIICheck.cpp DefinitionsInHeadersCheck.cpp ConfusableIdentifierCheck.cpp HeaderIncludeCycleCheck.cpp diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp new file mode 100644 index 00000000000000..e820cd39d83d21 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp @@ -0,0 +1,98 @@ +//===--- CoroutineHostileRAII.cpp - clang-tidy ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CoroutineHostileRAIICheck.h" +#include "../utils/OptionsUtils.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Basic/AttrKinds.h" +#include "clang/Basic/DiagnosticIDs.h" + +using namespace clang::ast_matchers; +namespace clang::tidy::misc { +namespace { +using clang::ast_matchers::internal::BoundNodesTreeBuilder; + +AST_MATCHER_P(Stmt, forEachPrevStmt, ast_matchers::internal::Matcher, + InnerMatcher) { + DynTypedNode P; + bool IsHostile = false; + for (const Stmt *Child = &Node; Child; Child = P.get()) { + auto Parents = Finder->getASTContext().getParents(*Child); + if (Parents.empty()) + break; + P = *Parents.begin(); + auto *PCS = P.get(); + if (!PCS) + continue; + for (const auto &Sibling : PCS->children()) { + // Child contains suspension. Siblings after Child do not persist across + // this suspension. + if (Sibling == Child) + break; + // In case of a match, add the bindings as a separate match. Also don't + // clear the bindings if a match is not found (unlike Matcher::matches). + BoundNodesTreeBuilder SiblingBuilder; + if (InnerMatcher.matches(*Sibling, Finder, &SiblingBuilder)) { + Builder->addMatch(SiblingBuilder); + IsHostile = true; + } + } + } + return IsHostile; +} +} // namespace + +CoroutineHostileRAIICheck::CoroutineHostileRAIICheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + RAIITypesList(utils::options::parseStringList( + Options.get("RAIITypesList", "std::lock_guard;std::scoped_lock"))) {} + +void CoroutineHostileRAIICheck::registerMatchers(MatchFinder *Finder) { + // A suspension happens with co_await or co_yield. + auto ScopedLockable = varDecl(hasType(hasCanonicalType(hasDeclaration( + hasAttr(attr::Kind::ScopedLockable))))) + .bind("scoped-lockable"); + auto OtherRAII = varDecl(hasType(hasCanonicalType(hasDeclaration( + namedDecl(hasAnyName(RAIITypesList)))))) + .bind("raii"); + Finder->addMatcher(expr(anyOf(coawaitExpr(), coyieldExpr()), + forEachPrevStmt(declStmt(forEach( + varDecl(anyOf(ScopedLockable, OtherRAII)))))) + .bind("suspension"), + this); +} + +void CoroutineHostileRAIICheck::check(const MatchFinder::MatchResult &Result) { + if (const auto *VD = Result.Nodes.getNodeAs("scoped-lockable")) + diag(VD->getLocation(), + "%0 holds a lock across a suspension point of coroutine and could be " + "unlocked by a different thread") + << VD; + if (const auto *VD = Result.Nodes.getNodeAs("raii")) + diag(VD->getLocation(), + "%0 persists across a suspension point of coroutine") + << VD; + if (const auto *Suspension = Result.Nodes.getNodeAs("suspension")) + diag(Suspension->getBeginLoc(), "suspension point is here", + DiagnosticIDs::Note); +} + +void CoroutineHostileRAIICheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "RAIITypesList", + utils::options::serializeStringList(RAIITypesList)); +} +} // namespace clang::tidy::misc diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h new file mode 100644 index 00000000000000..a5e9cb89ef6769 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h @@ -0,0 +1,50 @@ +//===--- CoroutineHostileRAIICheck.h - clang-tidy ----------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H + +#include "../ClangTidyCheck.h" +#include "clang/AST/ASTTypeTraits.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang::tidy::misc { + +/// Detects when objects of certain hostile RAII types persists across +/// suspension points in a coroutine. Such hostile types include scoped-lockable +/// types and types belonging to a configurable denylist. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/misc/coroutine-hostile-raii.html +class CoroutineHostileRAIICheck : public ClangTidyCheck { +public: + CoroutineHostileRAIICheck(llvm::StringRef Name, ClangTidyContext *Context); + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus20; + } + + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + + std::optional getCheckTraversalKind() const override { + return TK_AsIs; + } + +private: + // List of fully qualified types which should not persist across a suspension + // point in a coroutine. + std::vector RAIITypesList; +}; + +} // namespace clang::tidy::misc + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp index 92590506e1ec1e..d8a88324ee63e0 100644 --- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp @@ -11,6 +11,7 @@ #include "../ClangTidyModuleRegistry.h" #include "ConfusableIdentifierCheck.h" #include "ConstCorrectnessCheck.h" +#include "CoroutineHostileRAIICheck.h" #include "DefinitionsInHeadersCheck.h" #include "HeaderIncludeCycleCheck.h" #include "IncludeCleanerCheck.h" @@ -41,6 +42,8 @@ class MiscModule : public ClangTidyModule { "misc-confusable-identifiers"); CheckFactories.registerCheck( "misc-const-correctness"); + CheckFactories.registerCheck( + "misc-coroutine-hostile-raii"); CheckFactories.registerCheck( "misc-definitions-in-headers"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index af164d0462d52c..3e1fbe091c9ff6 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -163,6 +163,13 @@ New checks Flags coroutines that suspend while a lock guard is in scope at the suspension point. +- New :doc:`misc-coroutine-hostile-raii + ` check. + + Detects when objects of certain hostile RAII types persists across suspension + points in a coroutine. Such hostile types include scoped-lockable types and + types belonging to a configurable denylist. + - New :doc:`modernize-use-constraints ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 2125ebd7a213c1..819e3974e3f133 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -241,6 +241,7 @@ Clang-Tidy Checks :doc:`llvmlibc-restrict-system-libc-headers `, "Yes" :doc:`misc-confusable-identifiers `, :doc:`misc-const-correctness `, "Yes" + :doc:`misc-coroutine-hostile-raii `_, :doc:`misc-definitions-in-headers `, "Yes" :doc:`misc-header-include-cycle `, :doc:`misc-include-cleaner `, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst new file mode 100644 index 00000000000000..dcb9f399774cba --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst @@ -0,0 +1,50 @@ +.. title:: clang-tidy - misc-coroutine-hostile-raii + +misc-coroutine-hostile-raii +==================== + +Detects when objects of certain hostile RAII types persists across suspension +points in a coroutine. Such hostile types include scoped-lockable types and +types belonging to a configurable denylist. + +Some objects require that they be destroyed on the same thread that created them. +Traditionally this requirement was often phrased as "must be a local variable", +under the assumption that local variables always work this way. However this is +incorrect with C++20 coroutines, since an intervening ``co_await`` may cause the +coroutine to suspend and later be resumed on another thread. + +The lifetime of an object that requires being destroyed on the same thread must +not encompass a ``co_await`` or ``co_yield`` point. If you create/destroy an object, +you must do so without allowing the coroutine to suspend in the meantime. + +Following types are considered as hostile: + + - Scoped-lockable types: A scoped-lockable object persisting across a suspension + point is problematic as the lock held by this object could be unlocked by a + different thread. This would be undefined behaviour. + This includes all types annotated with the ``scoped_lockable`` attribute. + + - Types belonging to a configurable denylist. + +.. code-block:: c++ + + // Call some async API while holding a lock. + { + const my::MutexLock l(&mu_); + + // Oops! The async Bar function may finish on a different + // thread from the one that created the MutexLock object and therefore called + // Mutex::Lock -- now Mutex::Unlock will be called on the wrong thread. + co_await Bar(); + } + + +Options +------- + +.. option:: RAIITypesList + + A semicolon-separated list of qualified types which should not be allowed to + persist across suspension points. + Eg: ``my::lockable; a::b;::my::other::lockable;`` + The default value of this option is `"std::lock_guard;std::scoped_lock"`. \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp new file mode 100644 index 00000000000000..2d022e21c85d56 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp @@ -0,0 +1,192 @@ +// RUN: %check_clang_tidy -std=c++20 %s misc-coroutine-hostile-raii %t \ +// RUN: -config="{CheckOptions: \ +// RUN: {misc-coroutine-hostile-raii.RAIITypesList: \ +// RUN: 'my::Mutex; ::my::other::Mutex'}}" + +namespace std { + +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + void operator()() { resume(); } + void *address() const noexcept { return ptr; } + void resume() const { } + void destroy() const { } + bool done() const { return true; } + coroutine_handle &operator=(decltype(nullptr)) { + ptr = nullptr; + return *this; + } + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} + // void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const { return ptr; } + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + + Promise &promise() const { + return *reinterpret_cast( + __builtin_coro_promise(ptr, alignof(Promise), false)); + } + static coroutine_handle from_promise(Promise &promise) { + coroutine_handle p; + p.ptr = __builtin_coro_promise(&promise, alignof(Promise), true); + return p; + } +}; + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(std::coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +} // namespace std + +struct ReturnObject { + struct promise_type { + ReturnObject get_return_object() { return {}; } + std::suspend_always initial_suspend() { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + void unhandled_exception() {} + std::suspend_always yield_value(int value) { return {}; } + }; +}; + +#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) + +namespace absl { +class SCOPED_LOCKABLE Mutex {}; +using Mutex2 = Mutex; +} // namespace absl + +ReturnObject BasicWarning() { + absl::Mutex mtx; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'mtx' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + int no_warning; + { + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + } +} + +ReturnObject BasicNoWarning() { + co_yield 1; + { absl::Mutex no_warning; } + int no_warning; + { + co_yield 1; + absl::Mutex no_warning; + } + co_yield 1; +} + +ReturnObject scopedLockableTest() { + co_yield 0; + absl::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + absl::Mutex2 b; + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 'b' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + { + absl::Mutex no_warning_1; + { absl::Mutex no_warning_2; } + } + + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + absl::Mutex c; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'c' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_await std::suspend_always{}; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + for(int i=1; i<=10; ++i ) { + absl::Mutex d; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'd' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_await std::suspend_always{}; + // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here + co_yield 1; + absl::Mutex no_warning_3; + } + if (true) { + absl::Mutex e; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 'e' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:7: note: suspension point is here + absl::Mutex no_warning_4; + } + absl::Mutex no_warning_5; +} + +void lambda() { + absl::Mutex no_warning; + auto lambda = []() -> ReturnObject { + co_await std::suspend_always{}; + absl::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here + co_await std::suspend_always{}; + co_yield 1; + }; + absl::Mutex no_warning_2; +} + +template +ReturnObject raii_in_template(){ + T a; + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'a' holds a lock across a suspension point of coroutine and could be unlocked by a different thread [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:3: note: suspension point is here +} +void foo_template() { raii_in_template(); } + +namespace my { +class Mutex{}; +namespace other { +class Mutex{}; +} // namespace other + +using Mutex2 = Mutex; +} // namespace my + +ReturnObject denyListTest() { + my::Mutex a; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 'a' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + my::other::Mutex b; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 'b' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + my::Mutex2 c; + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: 'c' persists across a suspension point of coroutine [misc-coroutine-hostile-raii] + co_yield 1; + // CHECK-MESSAGES: :[[@LINE-1]]:5: note: suspension point is here +} + +ReturnObject referenceTest(my::Mutex& ref) { + my::Mutex& a = ref; + co_yield 1; +} +ReturnObject pointerTest(my::Mutex* ref) { + my::Mutex* a = ref; + co_yield 1; +} + +ReturnObject functionArgTest(my::Mutex ref) { + co_yield 1; +} diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 81cbfd90155fe0..443325bb0d1e17 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -117,6 +117,8 @@ C++ Language Changes C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ +- Fix a bug in conversion sequence of arguments to a function with reversed parameter order. + Fixes `GH `_. C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index ce78994e655381..c271cebb9eb638 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7688,7 +7688,7 @@ bool Sema::CheckNonDependentConversions( QualType ParamType = ParamTypes[I + Offset]; if (!ParamType->isDependentType()) { unsigned ConvIdx = PO == OverloadCandidateParamOrder::Reversed - ? 0 + ? Args.size() - 1 - (ThisConversions + I) : (ThisConversions + I); Conversions[ConvIdx] = TryCopyInitialization(*this, Args[I], ParamType, diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp index 5c6804eb7726b5..02fe37dc1be505 100644 --- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp +++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p3-2a.cpp @@ -324,6 +324,41 @@ bool x = X() == X(); // expected-warning {{ambiguous}} } } // namespace P2468R2 +namespace GH53954{ +namespace test1 { +struct P { + template + friend bool operator==(const P&, const T&); // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} + +namespace test2 { +struct P { + template + friend bool operator==(const T&, const P&); // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} + +namespace test3 { +struct P { + template + bool operator==(const S &) const; // expected-note {{candidate}} \ + // expected-note {{reversed parameter order}} +}; +struct A : public P {}; +struct B : public P {}; +bool check(A a, B b) { return a == b; } // expected-error {{ '==' is ambiguous}} +} +} + #else // NO_ERRORS namespace problem_cases { diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index ad68216a76b942..731508088cb6f8 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -104,6 +104,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fgetc libc.src.stdio.getc libc.src.stdio.getchar + libc.src.stdio.ungetc libc.src.stdio.stdin libc.src.stdio.stdout libc.src.stdio.stderr diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index fd27273ed562e4..806af5f219dfb4 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -134,6 +134,7 @@ ftell |check| |check| fflush |check| |check| fgetc |check| |check| fgets |check| |check| +ungetc |check| |check| getc |check| |check| getchar |check| |check| puts |check| |check| diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h index 61e17756fa6477..2fd318f06a7db1 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -29,6 +29,7 @@ typedef enum { RPC_FSEEK, RPC_FTELL, RPC_FFLUSH, + RPC_UNGETC, RPC_LAST = 0xFFFF, } rpc_opcode_t; diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 169bc592dee488..380474ce271180 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -54,18 +54,6 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) -add_entrypoint_object( - ungetc - SRCS - ungetc.cpp - HDRS - ungetc.h - DEPENDS - libc.include.stdio - libc.src.__support.File.file - libc.src.__support.File.platform_file -) - add_entrypoint_object( fopencookie SRCS @@ -286,6 +274,7 @@ add_stdio_entrypoint_object(getc_unlocked) add_stdio_entrypoint_object(getchar) add_stdio_entrypoint_object(getchar_unlocked) add_stdio_entrypoint_object(fgets) +add_stdio_entrypoint_object(ungetc) add_stdio_entrypoint_object(stdin) add_stdio_entrypoint_object(stdout) add_stdio_entrypoint_object(stderr) diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 282d056bba7129..2ecef879eb4bbf 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -342,6 +342,18 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ../ungetc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/generic/ungetc.cpp similarity index 100% rename from libc/src/stdio/ungetc.cpp rename to libc/src/stdio/generic/ungetc.cpp diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt index 047b68931bce5c..1b1e2a903cc0b9 100644 --- a/libc/src/stdio/gpu/CMakeLists.txt +++ b/libc/src/stdio/gpu/CMakeLists.txt @@ -251,6 +251,17 @@ add_entrypoint_object( .ferror ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ../ungetc.h + DEPENDS + libc.include.stdio + .gpu_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/gpu/ungetc.cpp b/libc/src/stdio/gpu/ungetc.cpp new file mode 100644 index 00000000000000..373164a0c53a32 --- /dev/null +++ b/libc/src/stdio/gpu/ungetc.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of ungetc ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ungetc.h" +#include "file.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) { + int ret; + rpc::Client::Port port = rpc::client.open(); + port.send_and_recv( + [=](rpc::Buffer *buffer) { + buffer->data[0] = c; + buffer->data[1] = file::from_stream(stream); + }, + [&](rpc::Buffer *buffer) { ret = static_cast(buffer->data[0]); }); + port.close(); + return ret; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp index 75eecc87ef265f..c98995ff0811bb 100644 --- a/libc/test/src/stdio/ungetc_test.cpp +++ b/libc/test/src/stdio/ungetc_test.cpp @@ -24,12 +24,16 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { constexpr size_t CONTENT_SIZE = sizeof(CONTENT); ASSERT_EQ(CONTENT_SIZE, LIBC_NAMESPACE::fwrite(CONTENT, 1, CONTENT_SIZE, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // Cannot unget to an un-readable file. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file)); +#endif ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file)); file = LIBC_NAMESPACE::fopen(FILENAME, "r+"); ASSERT_FALSE(file == nullptr); + // Calling with an EOF should always return EOF without doing anything. + ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(EOF, file)); char c; ASSERT_EQ(LIBC_NAMESPACE::fread(&c, 1, 1, file), size_t(1)); ASSERT_EQ(c, CONTENT[0]); @@ -43,8 +47,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { // ungetc should not fail after a seek operation. int unget_char = 'z'; ASSERT_EQ(unget_char, LIBC_NAMESPACE::ungetc(unget_char, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // Another unget should fail. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc(unget_char, file)); +#endif // ungetting a char at the beginning of the file will allow us to fetch // one additional character. char new_data[CONTENT_SIZE + 1]; @@ -53,8 +59,10 @@ TEST(LlvmLibcUngetcTest, UngetAndReadBack) { ASSERT_STREQ("zabcdef", new_data); ASSERT_EQ(size_t(1), LIBC_NAMESPACE::fwrite("x", 1, 1, file)); +#ifndef LIBC_TARGET_ARCH_IS_GPU // Behavior varies between libc implementations. // unget should fail after a write operation. ASSERT_EQ(EOF, LIBC_NAMESPACE::ungetc('1', file)); +#endif ASSERT_EQ(0, LIBC_NAMESPACE::fclose(file)); } diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 1c1c9f1ae9e6b5..0550115f7cd1a1 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -186,6 +186,13 @@ struct Server { }); break; } + case RPC_UNGETC: { + port->recv_and_send([](rpc::Buffer *buffer) { + buffer->data[0] = ungetc(static_cast(buffer->data[0]), + file::to_stream(buffer->data[1])); + }); + break; + } case RPC_NOOP: { port->recv([](rpc::Buffer *) {}); break; diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp index 3a5edac6c58b4f..8dc74421e78959 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: 32-bit-pointer +// REQUIRES: large_tests // Test that tellp() does not break the stringstream after INT_MAX, due to use // of pbump() that accept int. diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index c3732560f5e469..e34fd0387f4f5b 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -276,6 +276,14 @@ def getStdFlag(cfg, std): help="Whether to enable tests that take longer to run. This can be useful when running on a very slow device.", actions=lambda enabled: [] if not enabled else [AddFeature("long_tests")], ), + Parameter( + name="large_tests", + choices=[True, False], + type=bool, + default=True, + help="Whether to enable tests that use a lot of memory. This can be useful when running on a device with limited amounts of memory.", + actions=lambda enabled: [] if not enabled else [AddFeature("large_tests")], + ), Parameter( name="hardening_mode", choices=["unchecked", "hardened", "safe", "debug"], diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s index 9e021ac83f563a..0ede36c7351f29 100644 --- a/lld/test/ELF/linkerscript/discard-section.s +++ b/lld/test/ELF/linkerscript/discard-section.s @@ -6,7 +6,27 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o # RUN: ld.lld -T a.lds a.o b.o -z undefs -o /dev/null 2>&1 | count 0 # RUN: ld.lld -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 -# RUN: ld.lld -r -T a.lds a.o b.o -o /dev/null 2>&1 | count 0 +# RUN: ld.lld -r -T a.lds a.o b.o -o a.ro 2>&1 | count 0 +# RUN: llvm-readelf -r -s a.ro | FileCheck %s --check-prefix=RELOC + +# RELOC: Relocation section '.rela.bbb' at offset {{.*}} contains 1 entries: +# RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# RELOC-NEXT: 0000000000000000 0000000000000000 R_X86_64_NONE 0 +# RELOC-EMPTY: +# RELOC-NEXT: Relocation section '.rela.data' at offset {{.*}} contains 4 entries: +# RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# RELOC-NEXT: 0000000000000000 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000008 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000010 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000018 0000000000000001 R_X86_64_64 0 + +# RELOC: Num: Value Size Type Bind Vis Ndx Name +# RELOC-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# RELOC-NEXT: 1: 0000000000000000 0 SECTION LOCAL DEFAULT 1 .text +# RELOC-NEXT: 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 .bbb +# RELOC-NEXT: 3: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .data +# RELOC-NEXT: 4: 0000000000000000 0 NOTYPE GLOBAL DEFAULT 1 _start +# RELOC-EMPTY: #--- a.s .globl _start diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index ef2f2146a03644..82d4beea91e346 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1219,6 +1219,9 @@ if( LLVM_INCLUDE_EXAMPLES ) endif() if( LLVM_INCLUDE_TESTS ) + set(LLVM_GTEST_RUN_UNDER + "" CACHE STRING + "Define the wrapper program that LLVM unit tests should be run under.") if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang) include(LLVMExternalProjectUtils) llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index fad074ff90ee19..d07c3bf8a8fe86 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2093,12 +2093,19 @@ class TargetInstrInfo : public MCInstrInfo { "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } - /// Insert an architecture-specific instruction to clear a register. + /// Insert an architecture-specific instruction to clear a register. If you + /// need to avoid sideeffects (e.g. avoid XOR on x86, which sets EFLAGS), set + /// \p AllowSideEffects to \p false. virtual void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects = true) const { +#if 0 + // FIXME: This should exist once all platforms that use stack protectors + // implements it. llvm_unreachable( "Target didn't implement TargetInstrInfo::buildClearRegister!"); +#endif } /// Return true if the function can safely be outlined from. diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index f5d4cb314f56ee..d33eaf7bcb7b2f 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -16,7 +16,7 @@ /* Indicate that this is LLVM compiled from the amd-gfx branch. */ #define LLVM_HAVE_BRANCH_AMD_GFX -#define LLVM_MAIN_REVISION 477794 +#define LLVM_MAIN_REVISION 477809 /* Define if LLVM_ENABLE_DUMP is enabled */ #cmakedefine LLVM_ENABLE_DUMP diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 05c79b610cb36c..7f1421549b1492 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9134,14 +9134,14 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects) const { const MachineFunction &MF = *MBB.getParent(); const AArch64Subtarget &STI = MF.getSubtarget(); const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(AArch64::MOVi64imm), Reg) - .addImm(0); + BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0); } else if (STI.hasSVE()) { BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) .addImm(0) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 4a40b2fa122159..a934103c90cbf9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -333,8 +333,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; void buildClearRegister(Register Reg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator Iter, - DebugLoc &DL) const override; + MachineBasicBlock::iterator Iter, DebugLoc &DL, + bool AllowSideEffects = true) const override; /// Returns the vector element size (B, H, S or D) of an SVE opcode. uint64_t getElementSizeForOpcode(unsigned Opc) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 36da2e7b40efaa..a935c0e16a5523 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -504,13 +504,21 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Only logical ops can be done on v4i8 directly, others must be done // elementwise. setOperationAction( - {ISD::ADD, ISD::MUL, ISD::ABS, ISD::SMIN, - ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::CTPOP, - ISD::CTLZ, ISD::ADD, ISD::SUB, ISD::MUL, - ISD::SHL, ISD::SREM, ISD::UREM, ISD::SDIV, - ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS, - ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, - ISD::UINT_TO_FP}, + {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE, + ISD::BITREVERSE, ISD::CTLZ, ISD::CTPOP, ISD::CTTZ, + ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FSHL, ISD::FSHR, + ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::PARITY, + ISD::ROTL, ISD::ROTR, ISD::SADDO, ISD::SADDO_CARRY, + ISD::SADDSAT, ISD::SDIV, ISD::SDIVREM, ISD::SELECT_CC, + ISD::SETCC, ISD::SHL, ISD::SINT_TO_FP, ISD::SMAX, + ISD::SMIN, ISD::SMULO, ISD::SMUL_LOHI, ISD::SRA, + ISD::SREM, ISD::SRL, ISD::SSHLSAT, ISD::SSUBO, + ISD::SSUBO_CARRY, ISD::SSUBSAT, ISD::SUB, ISD::SUBC, + ISD::SUBE, ISD::UADDO, ISD::UADDO_CARRY, ISD::UADDSAT, + ISD::UDIV, ISD::UDIVREM, ISD::UINT_TO_FP, ISD::UMAX, + ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM, + ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT, + ISD::USUBSAT}, MVT::v4i8, Expand); // Operations not directly supported by NVPTX. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 5c7c10965e2f2c..f6932db2aeb0b9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -586,6 +586,12 @@ class NVPTXTargetLowering : public TargetLowering { AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { + // There's rarely any point of packing something into a vector type if we + // already have the source data. + return true; + } + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 84ed953ad18a9b..b0b96b94a12575 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3485,6 +3485,9 @@ def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; +def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; + // Count leading zeros let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f0c46419ab3516..4c6854da0ada3d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10130,27 +10130,36 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, return It; } -void X86InstrInfo::buildClearRegister(Register Reg, - MachineBasicBlock &MBB, +void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, - DebugLoc &DL) const { + DebugLoc &DL, + bool AllowSideEffects) const { const MachineFunction &MF = *MBB.getParent(); const X86Subtarget &ST = MF.getSubtarget(); const TargetRegisterInfo &TRI = getRegisterInfo(); if (ST.hasMMX() && X86::VR64RegClass.contains(Reg)) - // FIXME: Ignore MMX registers? + // FIXME: Should we ignore MMX registers? return; if (TRI.isGeneralPurposeRegister(MF, Reg)) { - BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the + // upper bits of a 64-bit register automagically. + Reg = getX86SubSuperRegister(Reg, 32); + + if (!AllowSideEffects) + // XOR affects flags, so use a MOV instead. + BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0); + else + BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); } else if (X86::VR128RegClass.contains(Reg)) { // XMM# if (!ST.hasSSE1()) return; + // PXOR is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10159,6 +10168,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasAVX()) return; + // VPXOR is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10167,6 +10177,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasAVX512()) return; + // VPXORY is safe to use because it doesn't affect flags. BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -10178,9 +10189,11 @@ void X86InstrInfo::buildClearRegister(Register Reg, if (!ST.hasVLX()) return; - BuildMI(MBB, Iter, DL, get(ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + // KXOR is safe to use because it doesn't affect flags. + unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr; + BuildMI(MBB, Iter, DL, get(Op), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 4d261a803421c1..e1199e20c318e2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -583,8 +583,8 @@ class X86InstrInfo final : public X86GenInstrInfo { outliner::Candidate &C) const override; void buildClearRegister(Register Reg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator Iter, - DebugLoc &DL) const override; + MachineBasicBlock::iterator Iter, DebugLoc &DL, + bool AllowSideEffects = true) const override; bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index aa435b0d47aa59..14c5c0d18a4db6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3792,8 +3792,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, State.setDebugLocFrom(I->getDebugLoc()); VPValue *LoopExitInstDef = PhiR->getBackedgeValue(); - // This is the vector-clone of the value that leaves the loop. - Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Before each round, move the insertion point right between // the PHIs and the values we are going to write. @@ -3805,10 +3803,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, State.setDebugLocFrom(LoopExitInst->getDebugLoc()); Type *PhiTy = OrigPhi->getType(); - - VPBasicBlock *LatchVPBB = - PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock(); - BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB]; // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already @@ -3834,23 +3828,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { - assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); - Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - Builder.SetInsertPoint(VectorLoopLatch->getTerminator()); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); - Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) - : Builder.CreateZExt(Trunc, VecTy); - for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users())) - if (U != Trunc) { - U->replaceUsesOfWith(RdxParts[Part], Extnd); - RdxParts[Part] = Extnd; - } - } Builder.SetInsertPoint(LoopMiddleBlock, LoopMiddleBlock->getFirstInsertionPt()); - for (unsigned Part = 0; Part < UF; ++Part) + Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); + for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + } } // Reduce all of the unrolled parts into a single vector. @@ -9155,18 +9138,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( PreviousLink = RedRecipe; } } - - // If tail is folded by masking, introduce selects between the phi - // and the live-out instruction of each reduction, at the beginning of the - // dedicated latch block. - if (CM.foldTailByMasking()) { Builder.setInsertPoint(&*LatchVPBB->begin()); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - VPReductionPHIRecipe *PhiR = dyn_cast(&R); - if (!PhiR || PhiR->isInLoop()) - continue; - const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + VPReductionPHIRecipe *PhiR = dyn_cast(&R); + if (!PhiR || PhiR->isInLoop()) + continue; + + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); + // If tail is folded by masking, introduce selects between the phi + // and the live-out instruction of each reduction, at the beginning of the + // dedicated latch block. + if (CM.foldTailByMasking()) { VPValue *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan); VPValue *Red = PhiR->getBackedgeValue(); @@ -9174,16 +9158,35 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "reduction recipe must be defined before latch"); FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - auto *Select = + Result = PhiTy->isFloatingPointTy() ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Select->insertBefore(&*Builder.getInsertPoint()); + Result->insertBefore(&*Builder.getInsertPoint()); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Select); + PhiR->setOperand(1, Result->getVPSingleValue()); + } + // If the vector reduction can be performed in a smaller type, we truncate + // then extend the loop exit value to enable InstCombine to evaluate the + // entire expression in the smaller type. + Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType(); + if (PhiTy != RdxDesc.getRecurrenceType()) { + assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); + Type *RdxTy = RdxDesc.getRecurrenceType(); + auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, + Result->getVPSingleValue(), RdxTy); + auto *Extnd = + RdxDesc.isSigned() + ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) + : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); + + Trunc->insertAfter(Result); + Extnd->insertAfter(Trunc); + Result->getVPSingleValue()->replaceAllUsesWith(Extnd); + Trunc->setOperand(0, Result->getVPSingleValue()); } } diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt index 0c024ad2b2e1bf..a5ccdde751ed56 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt @@ -16,8 +16,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,0.07943800836801529,0.07943800836801529,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7940031290054321,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.01417447254061699,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -40,8 +40,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2404157966375351,0.08013860136270523,0.0,0.08013860136270523,1.0,0.07131929695606232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08013860136270523 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.0,0.7908878326416016,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7940031290054321 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.0,0.014231426641345024,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01417447254061699 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.0,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7939082384109497 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.0,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014218696393072605 max_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -64,8 +64,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,0.02235277369618416,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.7908878326416016,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6725077629089355 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.014231426641345024,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4858442544937134 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.7907436490058899,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6831487417221069 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.014276761561632156,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.47955840826034546 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -88,8 +88,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.0,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.0,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,1.0,0.0,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07943800836801529 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.0,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7908878326416016 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.0,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014231426641345024 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.0,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7907436490058899 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.0,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014276761561632156 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -112,8 +112,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01117638684809208 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6693925261497498 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00449750293046236 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6799841523170471 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004439314361661673 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -136,8 +136,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01822916604578495 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6662772297859192 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008109557442367077 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6768196225166321 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008004635572433472 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -160,8 +160,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688586473465,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.3333333432674408,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2724486181679993e-10,2.2724486181679993e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.01989283785223961,0.02235277369618416,0.2813863754272461,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9061526656150818,0.9591121673583984,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0737093985080719,0.01772311143577099,0.4279724359512329,0.4858442544937134,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9173259735107422,0.9647942781448364,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07275574654340744,0.017619721591472626,0.4243086874485016,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -184,8 +184,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.3333333432674408,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.2724486181679993e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.0,0.2813863754272461,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7352024912834167,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9591121673583984 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.0,0.4279724359512329,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01772311143577099 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7436708807945251,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9647942781448364 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.0,0.4243086874485016,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017619721591472626 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -208,8 +208,8 @@ hint_weights_by_max: 1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2421688437461853, start_bb_freq_by_max: 0.3333333432674408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204 end_bb_freq_by_max: 0.9760092496871948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5110765099525452 hottest_bb_freq_by_max: 0.2813863754272461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.27892643213272095 -liverange_size: 0.7352024912834167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.46105918288230896 -use_def_density: 0.42606985569000244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05081497132778168,0.07567594200372696,0.48368439078330994,0.9955543875694275,0.07338171452283859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +liverange_size: 0.7436708807945251,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4683544337749481 +use_def_density: 0.4243086874485016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99146968126297 max_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -232,8 +232,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1666666716337204,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218,0.2631579041481018,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.6631619930267334,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7352024912834167 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129,0.07601386308670044,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4279724359512329 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.673655092716217,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.07503040134906769,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -256,8 +256,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1666666716337204,0.0,0.1666666716337204,0.3333333432674408,0.1666666716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948,0.0,0.9760092496871948,0.9760092496871948,2.2724486181679993e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018,0.0,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06705831736326218 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6631619930267334,0.0,0.6725077629089355,0.7379283308982849,0.9061526656150818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07601386308670044,0.0,0.4858442544937134,1.0,0.0737093985080719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05104188248515129 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.673655092716217,0.0,0.6831487417221069,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07503040134906769,0.0,0.47955840826034546,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038 max_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -280,8 +280,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.35764437913894653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.482131917196483e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.01989283785223961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2631579041481018 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2819080352783203 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2828805446624756 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.05657143518328667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -304,8 +304,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.5236390233039856,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3283064365386963e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.0,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01989283785223961 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.811345100402832,0.0,0.7318435907363892,0.5088096261024475,0.7421572804450989,0.8143532276153564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.42606985569000244,0.0,0.07567594200372696,1.0,0.48368439078330994,0.9955543875694275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07338171452283859 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8106942772865295,0.0,0.7343682646751404,0.510564923286438,0.744717538356781,0.8068132996559143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016,0.0,0.07503040134906769,0.99146968126297,0.47955840826034546,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07275574654340744 max_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.7777777910232544 @@ -328,8 +328,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9982500076293 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6522180438041687,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015647225081920624 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8219112157821655,0.5714285969734192,0.8334941864013672,0.9145752787590027,0.31660231947898865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9652509689331055 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016097404062747955 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8283073902130127,0.575875461101532,0.8399805426597595,0.9100194573402405,0.3190661370754242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9688715934753418 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016164302825927734 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.2222222238779068 @@ -352,8 +352,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.9 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8797762989997864,0.7935694456100464,0.5517241358757019,0.8047530055046082,0.8830382227897644,0.30568498373031616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008228360675275326 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8826290965080261,0.7995305061340332,0.5558685660362244,0.8107981085777283,0.8784037828445435,0.3079812228679657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00826177466660738 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1944444477558136 @@ -376,8 +376,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9434669613838196, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931,1.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,1.0,1.482131917196483e-10,0.3333333432674408,0.6365708112716675,0.6365708112716675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.778997310048936e-10 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8342907428741455,0.2898806929588318,1.0,0.5231993198394775,0.7631462812423706,0.8373839855194092,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9253203868865967 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7674928903579712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8337028622627258,0.290909081697464,1.0,0.5250554084777832,0.7658536434173584,0.8297117352485657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9250554442405701 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,1.0,0.058340102434158325,0.7709200978279114,0.37288200855255127,0.7775528430938721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015127303078770638 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1388888955116272 @@ -400,8 +400,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9839805960655212 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006958406884223223 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9837278127670288 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007072303909808397 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.1111111119389534 @@ -424,8 +424,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.07419288158416748 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.35764437913894653,0.7152887582778931,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7152887582778931 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675,0.6365708112716675,0.3333333432674408,0.6365708112716675,0.6365708112716675,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6365708112716675 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2813863754272461,0.2631579041481018,0.27892643213272095,1.0,0.27892643213272095,0.2631579041481018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02235277369618416 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9451456069946289,0.5747572779655457,0.8383495211601257,0.9199029207229614,0.3184466063976288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9849514365196228 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3284657895565033,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7674928903579712,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006951410323381424 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9521695971488953,0.5838264226913452,0.8515779376029968,0.922583818435669,0.32347139716148376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9847140312194824 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3299224376678467,0.07900823652744293,0.7709200978279114,0.37288200855255127,0.7775528430938721,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007065076380968094 max_stage: 0,0,0,0,0,0,0,0,0,4,4,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 progress: 0.0833333358168602 diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt index beb0c5205979c0..01b4a3835c978c 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt @@ -171,7 +171,7 @@ observation: 28 li_size: 0 stage: 0 weight: 0.0 -priority: 2147485184.0 +priority: 2147484928.0 reward: 0.0 observation: 29 li_size: 0 @@ -237,7 +237,7 @@ observation: 39 li_size: 0 stage: 0 weight: 0.0 -priority: 3598.0 +priority: 3534.0 reward: 0.0 observation: 40 li_size: 0 @@ -249,7 +249,7 @@ observation: 41 li_size: 0 stage: 0 weight: 0.0 -priority: 3582.0 +priority: 3518.0 reward: 0.0 observation: 42 li_size: 0 @@ -273,7 +273,7 @@ observation: 45 li_size: 0 stage: 0 weight: 0.0 -priority: 4078.0 +priority: 4046.0 reward: 0.0 observation: 46 li_size: 0 @@ -291,7 +291,7 @@ observation: 48 li_size: 0 stage: 0 weight: 0.0 -priority: 4384.0 +priority: 4304.0 reward: 0.0 observation: 49 li_size: 0 @@ -309,7 +309,7 @@ observation: 51 li_size: 0 stage: 0 weight: 0.0 -priority: 2684358144.0 +priority: 2684357888.0 reward: 0.0 observation: 52 li_size: 0 diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll index 21bb75278874a5..6b013b55df77ad 100644 --- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll +++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll @@ -24,5 +24,5 @@ ; CHECK-NOT: nan ; CHECK-LABEL: priority: ; NOML-SAME: 2684358144.0 -; ML-SAME: 3599 +; ML-SAME: 3535 ; CHECK-LABEL: reward: diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 18788c776ffbd7..464b3a754804fe 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1319,10 +1319,8 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]] -; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; -; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; +; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]]; +; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]]; ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; ; CHECK: ret; define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index fd48313ad68484..ddad374a4dc119 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1269,4 +1269,158 @@ define <4 x i8> @test_fptoui_2xhalf_to_2xi8(<4 x half> %a) #0 { ret <4 x i8> %r } +define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<18>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_srem_v4i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_srem_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_srem_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd2]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs1, %r3; +; CHECK-NEXT: bfe.s32 %r4, %r1, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs2, %r4; +; CHECK-NEXT: rem.s16 %rs3, %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs3; +; CHECK-NEXT: bfe.s32 %r6, %r2, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs4, %r6; +; CHECK-NEXT: bfe.s32 %r7, %r1, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs5, %r7; +; CHECK-NEXT: rem.s16 %rs6, %rs5, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r8, %rs6; +; CHECK-NEXT: bfi.b32 %r9, %r8, %r5, 8, 8; +; CHECK-NEXT: bfe.s32 %r10, %r2, 16, 8; +; CHECK-NEXT: cvt.s8.s32 %rs7, %r10; +; CHECK-NEXT: bfe.s32 %r11, %r1, 16, 8; +; CHECK-NEXT: cvt.s8.s32 %rs8, %r11; +; CHECK-NEXT: rem.s16 %rs9, %rs8, %rs7; +; CHECK-NEXT: cvt.u32.u16 %r12, %rs9; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r9, 16, 8; +; CHECK-NEXT: bfe.s32 %r14, %r2, 24, 8; +; CHECK-NEXT: cvt.s8.s32 %rs10, %r14; +; CHECK-NEXT: bfe.s32 %r15, %r1, 24, 8; +; CHECK-NEXT: cvt.s8.s32 %rs11, %r15; +; CHECK-NEXT: rem.s16 %rs12, %rs11, %rs10; +; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; +; CHECK-NEXT: st.u32 [%rd3], %r17; +; CHECK-NEXT: ret; +entry: + %t57 = load <4 x i8>, ptr %a, align 4 + %t59 = load <4 x i8>, ptr %b, align 4 + %x = srem <4 x i8> %t57, %t59 + store <4 x i8> %x, ptr %c, align 4 + ret void +} + +;; v3i8 lowering, especially for unaligned loads is terrible. We end up doing +;; tons of pointless scalar_to_vector/bitcast/extract_elt on v2i16/v4i8, which +;; is further complicated by LLVM trying to use i16 as an intermediate type, +;; because we don't have i8 registers. It's a mess. +;; Ideally we want to split it into element-wise ops, but legalizer can't handle +;; odd-sized vectors. TL;DR; don't use odd-sized vectors of v8. +define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_srem_v3i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<20>; +; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_srem_v3i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_srem_v3i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_srem_v3i8_param_0]; +; CHECK-NEXT: ld.u8 %rs1, [%rd1]; +; CHECK-NEXT: ld.u8 %rs2, [%rd1+1]; +; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; +; CHECK-NEXT: or.b16 %rs4, %rs3, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; +; CHECK-NEXT: ld.s8 %rs5, [%rd1+2]; +; CHECK-NEXT: ld.u8 %rs6, [%rd2]; +; CHECK-NEXT: ld.u8 %rs7, [%rd2+1]; +; CHECK-NEXT: shl.b16 %rs8, %rs7, 8; +; CHECK-NEXT: or.b16 %rs9, %rs8, %rs6; +; CHECK-NEXT: cvt.u32.u16 %r3, %rs9; +; CHECK-NEXT: ld.s8 %rs10, [%rd2+2]; +; CHECK-NEXT: bfe.s32 %r5, %r3, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs11, %r5; +; CHECK-NEXT: bfe.s32 %r6, %r1, 0, 8; +; CHECK-NEXT: cvt.s8.s32 %rs12, %r6; +; CHECK-NEXT: rem.s16 %rs13, %rs12, %rs11; +; CHECK-NEXT: cvt.u32.u16 %r7, %rs13; +; CHECK-NEXT: bfe.s32 %r8, %r3, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs14, %r8; +; CHECK-NEXT: bfe.s32 %r9, %r1, 8, 8; +; CHECK-NEXT: cvt.s8.s32 %rs15, %r9; +; CHECK-NEXT: rem.s16 %rs16, %rs15, %rs14; +; CHECK-NEXT: cvt.u32.u16 %r10, %rs16; +; CHECK-NEXT: bfi.b32 %r11, %r10, %r7, 8, 8; +; CHECK-NEXT: // implicit-def: %r13 +; CHECK-NEXT: bfi.b32 %r12, %r13, %r11, 16, 8; +; CHECK-NEXT: // implicit-def: %r15 +; CHECK-NEXT: bfi.b32 %r14, %r15, %r12, 24, 8; +; CHECK-NEXT: rem.s16 %rs17, %rs5, %rs10; +; CHECK-NEXT: cvt.u16.u32 %rs18, %r14; +; CHECK-NEXT: st.u8 [%rd3], %rs18; +; CHECK-NEXT: shr.u16 %rs19, %rs18, 8; +; CHECK-NEXT: st.u8 [%rd3+1], %rs19; +; CHECK-NEXT: st.u8 [%rd3+2], %rs17; +; CHECK-NEXT: ret; +entry: + %t57 = load <3 x i8>, ptr %a, align 1 + %t59 = load <3 x i8>, ptr %b, align 1 + %x = srem <3 x i8> %t57, %t59 + store <3 x i8> %x, ptr %c, align 1 + ret void +} + +define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: test_sext_v4i1_to_v4i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<18>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ld.param.u64 %rd3, [test_sext_v4i1_to_v4i8_param_2]; +; CHECK-NEXT: ld.param.u64 %rd2, [test_sext_v4i1_to_v4i8_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [test_sext_v4i1_to_v4i8_param_0]; +; CHECK-NEXT: ld.u32 %r1, [%rd1]; +; CHECK-NEXT: ld.u32 %r2, [%rd2]; +; CHECK-NEXT: bfe.s32 %r3, %r2, 24, 8; +; CHECK-NEXT: bfe.s32 %r4, %r1, 24, 8; +; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3; +; CHECK-NEXT: bfe.s32 %r5, %r2, 16, 8; +; CHECK-NEXT: bfe.s32 %r6, %r1, 16, 8; +; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5; +; CHECK-NEXT: bfe.s32 %r7, %r2, 8, 8; +; CHECK-NEXT: bfe.s32 %r8, %r1, 8, 8; +; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7; +; CHECK-NEXT: bfe.s32 %r9, %r2, 0, 8; +; CHECK-NEXT: bfe.s32 %r10, %r1, 0, 8; +; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9; +; CHECK-NEXT: selp.s32 %r11, -1, 0, %p4; +; CHECK-NEXT: selp.s32 %r12, -1, 0, %p3; +; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 8, 8; +; CHECK-NEXT: selp.s32 %r14, -1, 0, %p2; +; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; +; CHECK-NEXT: selp.s32 %r16, -1, 0, %p1; +; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; +; CHECK-NEXT: st.u32 [%rd3], %r17; +; CHECK-NEXT: ret; +entry: + %t1 = load <4 x i8>, ptr %a, align 4 + %t2 = load <4 x i8>, ptr %b, align 4 + %t5 = icmp ugt <4 x i8> %t1, %t2 + %t6 = sext <4 x i1> %t5 to <4 x i8> + store <4 x i8> %t6, ptr %c, align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index b4208c691c91df..c14dc88431d316 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -364,10 +364,6 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; ; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] -; CHECK-DAG: mov.b32 [[R0:%r[0-9]+]], {[[E0]], [[E1]]}; -; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R0]]; -; CHECK-DAG: mov.b32 [[R1:%r[0-9]+]], {[[E2]], [[E3]]}; -; CHECK-DAG: mov.b32 {[[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [[R1]]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; @@ -496,7 +492,6 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 7a3c7d6fbfea71..03903d80cfd6ec 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -207,10 +207,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16> ; CHECK-NEXT: [[TMP8]] = zext <4 x i16> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> @@ -234,10 +234,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2 ; CHECK-NEXT: [[TMP17:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], [[TMP17]] -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16> ; CHECK-NEXT: [[TMP21]] = zext <4 x i16> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP21]] to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 837d663f4a9263..a4a075463b1b0b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -22,10 +22,10 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> @@ -99,10 +99,10 @@ define i32 @PR35734(i32 %x, i32 %y) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1> ; CHECK-NEXT: [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 3cc6e5fa7b8d5f..afe16c71f7f9ca 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -17,14 +17,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = zext [[WIDE_LOAD2]] to ; CHECK-NEXT: [[TMP28:%.*]] = add [[TMP14]], [[TMP26]] ; CHECK-NEXT: [[TMP29:%.*]] = add [[TMP15]], [[TMP27]] +; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to +; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to +; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to +; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}} -; CHECK-NEXT: [[TMP33:%.*]] = trunc [[TMP28]] to -; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to -; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to -; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to ; CHECK: middle.block: ; CHECK-NEXT: [[TMP37:%.*]] = trunc [[TMP34]] to ; CHECK-NEXT: [[TMP38:%.*]] = trunc [[TMP36]] to diff --git a/llvm/test/Unit/lit.cfg.py b/llvm/test/Unit/lit.cfg.py index f15c30dbcdb0aa..61296d7ea0032e 100644 --- a/llvm/test/Unit/lit.cfg.py +++ b/llvm/test/Unit/lit.cfg.py @@ -19,7 +19,11 @@ config.test_source_root = config.test_exec_root # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, "Tests") +config.test_format = lit.formats.GoogleTest( + config.llvm_build_mode, + "Tests", + run_under=config.gtest_run_under, +) # Propagate the temp directory. Windows requires this because it uses \Windows\ # if none of these are present. diff --git a/llvm/test/Unit/lit.site.cfg.py.in b/llvm/test/Unit/lit.site.cfg.py.in index 1d7d7658014949..3536a34f796a28 100644 --- a/llvm/test/Unit/lit.site.cfg.py.in +++ b/llvm/test/Unit/lit.site.cfg.py.in @@ -7,6 +7,7 @@ config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@") config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@")) config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@") config.shlibdir = lit_config.substitute(path(r"@SHLIBDIR@")) +config.gtest_run_under = lit_config.substitute(r"@LLVM_GTEST_RUN_UNDER@") # Let the main config do the real work. lit_config.load_config( diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn index 8a811bc990d41b..36957f502c3231 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/misc/BUILD.gn @@ -34,6 +34,7 @@ static_library("misc") { sources = [ "ConfusableIdentifierCheck.cpp", "ConstCorrectnessCheck.cpp", + "CoroutineHostileRAIICheck.cpp", "DefinitionsInHeadersCheck.cpp", "HeaderIncludeCycleCheck.cpp", "IncludeCleanerCheck.cpp", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index f859af249faf5f..dd9fd0c10d53ef 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -222,7 +222,10 @@ write_lit_config("lit_site_cfg") { write_lit_config("lit_unit_site_cfg") { input = "//llvm/test/Unit/lit.site.cfg.py.in" output = llvm_lit_unit_site_cfg_file - extra_values = [ "LLVM_BUILD_MODE=." ] + extra_values = [ + "LLVM_BUILD_MODE=.", + "LLVM_GTEST_RUN_UNDER=", + ] } # This target should contain all dependencies of check-llvm. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 1bfee3aa1d7ee8..e50b14975e83d6 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -829,47 +829,40 @@ struct ReshapeRewriter : public OpRewritePattern { } }; +// A trivial wrapper to help generate different operations for dense/sparse +// tensors. struct TensorLike { TensorLike(OpBuilder &builder, Location loc, RankedTensorType rtt, - ValueRange sizes) - : isSparse(rtt.getEncoding() != nullptr) { + ValueRange sizes) { SmallVector dynSzs; getDynamicSizes(rtt, sizes, dynSzs); - if (isSparse) - val = builder.create(loc, rtt, dynSzs); - else - val = allocDenseTensor(builder, loc, rtt, sizes); - }; - - void insertOrStore(OpBuilder &builder, Location loc, Value v, - ValueRange crds) { - if (isSparse) - val = builder.create(loc, v, val, crds); - else - builder.create(loc, v, val, crds); + val = builder.create(loc, rtt, dynSzs); + if (!isSparse()) { + Value c0 = constantZero(builder, loc, rtt.getElementType()); + val = builder.create(loc, c0, val).getResult(0); + } } - Value getSSA() const { - // We don't need to maintain the SSA chain for a memref value. - return isSparse ? val : nullptr; + void insert(OpBuilder &builder, Location loc, Value v, ValueRange crds) { + // TODO: Unify these two. + if (isSparse()) + val = builder.create(loc, v, val, crds); + else + val = builder.create(loc, v, val, crds); } Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const { - if (isSparse) + if (isSparse()) return builder.create(loc, val, true); - return builder.create(loc, rtp, val); + return val; } - void updateSSA(Value v) { - // Dense memref is a non-SSA value. - assert(isSparse); - val = v; + bool isSparse() const { + return getSparseTensorEncoding(val.getType()) != nullptr; } -private: - bool isSparse; - Value val; // either a memref (for dense tensor) or a sparse tensor. + Value val; }; struct ConcatenateRewriter : public OpRewritePattern { @@ -901,14 +894,14 @@ struct ConcatenateRewriter : public OpRewritePattern { TensorLike dstBuf(rewriter, loc, dstTp.getRankedTensorType(), sizes); Value offset = constantIndex(rewriter, loc, 0); - Value iterArg = dstBuf.getSSA(); + Value iterArg = dstBuf.val; ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds a for op for each input tensor to append new values into the // output tensor. foreachOp = rewriter.create( - loc, input, iterArg ? ValueRange{iterArg} : ValueRange{}, + loc, input, iterArg, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { SmallVector dstLcvs(dstTp.getLvlRank()); @@ -920,32 +913,26 @@ struct ConcatenateRewriter : public OpRewritePattern { // FIXME: `toStoredDim` is deprecated dstLcvs[toStoredDim(dstTp.getEncoding(), d)] = crd; } - - if (!reduc.empty()) - dstBuf.updateSSA(reduc.front()); - + // Enters foreach, updates the SSA chain. + dstBuf.val = reduc.front(); if (!dstTp.isAllDense()) { Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, /*else*/ true); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insertOrStore(builder, loc, v, dstLcvs); - builder.create(loc, dstBuf.getSSA()); + dstBuf.insert(builder, loc, v, dstLcvs); + builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); - assert(!reduc.empty()); - dstBuf.updateSSA(ifOp.getResult(0)); + dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insertOrStore(builder, loc, v, dstLcvs); + dstBuf.insert(builder, loc, v, dstLcvs); } - if (reduc.empty()) - builder.create(loc); - else - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -955,15 +942,11 @@ struct ConcatenateRewriter : public OpRewritePattern { offset = rewriter.create( loc, offset, constantIndex(rewriter, loc, *sh)); - if (!foreachOp.getResults().empty()) { - iterArg = foreachOp.getResult(0); - dstBuf.updateSSA(iterArg); - } + iterArg = foreachOp.getResult(0); + dstBuf.val = iterArg; } - if (!foreachOp.getResults().empty()) - dstBuf.updateSSA(iterArg); - + dstBuf.val = iterArg; Value ret = dstBuf.finalize(rewriter, loc, dstTp.getRankedTensorType()); rewriter.replaceOp(op, ret); return success(); @@ -1010,15 +993,12 @@ struct DirectConvertRewriter : public OpRewritePattern { ValueRange vs; TensorLike dstBuf(rewriter, loc, dstStt.getRankedTensorType(), sizes); - Value iterArg = dstBuf.getSSA(); auto foreachOp = rewriter.create( - loc, src, iterArg ? ValueRange{iterArg} : ValueRange{}, foreachOrder, + loc, src, dstBuf.val, foreachOrder, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { // Enters the loop, update the SSA value for insertion chain. - if (!reduc.empty()) - dstBuf.updateSSA(reduc.front()); - + dstBuf.val = reduc.front(); const Dimension dimRank = dstStt.getDimRank(); const Level lvlRank = dstStt.getLvlRank(); SmallVector lcvs(lvlRank); @@ -1028,34 +1008,29 @@ struct DirectConvertRewriter : public OpRewritePattern { } if (!skipZeroCheck) { - assert(!reduc.empty()); Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, /*else*/ true); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insertOrStore(builder, loc, v, lcvs); - builder.create(loc, dstBuf.getSSA()); + dstBuf.insert(builder, loc, v, lcvs); + builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); - dstBuf.updateSSA(ifOp.getResult(0)); + dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insertOrStore(builder, loc, v, lcvs); + dstBuf.insert(builder, loc, v, lcvs); } - if (reduc.empty()) - builder.create(loc); - else - builder.create(loc, dstBuf.getSSA()); + builder.create(loc, dstBuf.val); }); rewriter.setInsertionPointAfter(foreachOp); // Exits the for loop, links the SSA chain. - if (!foreachOp.getResults().empty()) - dstBuf.updateSSA(foreachOp.getResult(0)); + dstBuf.val = foreachOp.getResult(0); Value ret = dstBuf.finalize(rewriter, loc, dstStt.getRankedTensorType()); rewriter.replaceOp(op, ret); diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir index c22f051a0d5854..e2dcb068e11851 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -14,11 +14,10 @@ // CHECK-LABEL: func.func @sparse_convert_1d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> { %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32> return %0 : tensor<13xi32> @@ -26,11 +25,10 @@ func.func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13x // CHECK-LABEL: func.func @sparse_convert_1d_dyn // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -38,11 +36,10 @@ func.func @sparse_convert_1d_dyn(%arg0: tensor) -> tensor< // CHECK-LABEL: func.func @sparse_convert_2d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64> return %0 : tensor<2x4xf64> @@ -50,11 +47,10 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x // CHECK-LABEL: func.func @sparse_convert_2d_dyn // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -62,11 +58,10 @@ func.func @sparse_convert_2d_dyn0(%arg0: tensor) -> tens // CHECK-LABEL: func.func @sparse_convert_2d_dyn1 // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x?xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x?xf64, #SparseMatrix> to tensor<2x?xf64> return %0 : tensor<2x?xf64> @@ -74,11 +69,10 @@ func.func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tens // CHECK-LABEL: func.func @sparse_convert_2d_dyn2 // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor @@ -86,11 +80,10 @@ func.func @sparse_convert_2d_dyn2(%arg0: tensor) -> tens // CHECK-LABEL: func.func @sparse_convert_3d // CHECK-NOT: sparse_tensor.reorder_coo -// CHECK: memref.alloc +// CHECK: bufferization.alloc_tensor // CHECK: linalg.fill // CHECK: sparse_tensor.foreach -// CHECK: memref.store -// CHECK: bufferization.to_tensor +// CHECK: tensor.insert func.func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> { %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64> return %0 : tensor<2x3x4xf64> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir index bdfab54dc6daeb..f3d3dd28563e89 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -176,77 +176,83 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>, return %0 : tensor } -// CHECK-LABEL: @concat_sparse_sparse_dense( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[TMP_c9:.*]] = arith.constant 9 : index -// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[TMP_d0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: %[[A:.*]] = memref.alloc(%[[TMP_c9]], %[[TMP_c4]]) : memref -// CHECK: linalg.fill ins(%[[TMP_d0]] : f64) outs(%[[A]] : memref) -// CHECK: %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_23]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref -// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor -// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref -// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] -// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] -// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: memref.store %[[TMP_28]], %[[A]]{{\[}}%[[TMP_29]], %[[TMP_27]]] : memref -// CHECK: } -// CHECK: } -// CHECK: %[[R:.*]] = bufferization.to_tensor %[[A]] : memref -// CHECK: return %[[R]] : tensor +// CHECK-LABEL: func.func @concat_sparse_sparse_dense( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<2x4xf64, #sparse_tensor +// CHECK-SAME: %[[VAL_1:.*]]: tensor<3x4xf64, #sparse_tensor +// CHECK-SAME: %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 9 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_10:.*]] = bufferization.alloc_tensor(%[[VAL_4]], %[[VAL_3]]) : tensor +// CHECK: %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_6]] : f64) outs(%[[VAL_10]] : tensor) -> tensor +// CHECK: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_8]] iter_args(%[[VAL_21:.*]] = %[[VAL_11]]) -> (tensor) { +// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_20]], %[[VAL_8]] : index +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]]] : memref +// CHECK: %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_8]] iter_args(%[[VAL_28:.*]] = %[[VAL_21]]) -> (tensor) { +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_31:.*]] = tensor.insert %[[VAL_30]] into %[[VAL_28]]{{\[}}%[[VAL_22]], %[[VAL_29]]] : tensor +// CHECK: scf.yield %[[VAL_31]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_26]] : tensor +// CHECK: } +// CHECK: %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_39:.*]] = scf.for %[[VAL_40:.*]] = %[[VAL_37]] to %[[VAL_38]] step %[[VAL_8]] iter_args(%[[VAL_41:.*]] = %[[VAL_19]]) -> (tensor) { +// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_40]]] : memref +// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_40]]] : memref +// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_40]], %[[VAL_8]] : index +// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_44]]] : memref +// CHECK: %[[VAL_46:.*]] = scf.for %[[VAL_47:.*]] = %[[VAL_43]] to %[[VAL_45]] step %[[VAL_8]] iter_args(%[[VAL_48:.*]] = %[[VAL_41]]) -> (tensor) { +// CHECK: %[[VAL_49:.*]] = memref.load %[[VAL_35]]{{\[}}%[[VAL_47]]] : memref +// CHECK: %[[VAL_50:.*]] = memref.load %[[VAL_36]]{{\[}}%[[VAL_47]]] : memref +// CHECK: %[[VAL_51:.*]] = arith.addi %[[VAL_42]], %[[VAL_9]] : index +// CHECK: %[[VAL_52:.*]] = tensor.insert %[[VAL_50]] into %[[VAL_48]]{{\[}}%[[VAL_51]], %[[VAL_49]]] : tensor +// CHECK: scf.yield %[[VAL_52]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_46]] : tensor +// CHECK: } +// CHECK: %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_58]] to %[[VAL_59]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_39]]) -> (tensor) { +// CHECK: %[[VAL_63:.*]] = memref.load %[[VAL_54]]{{\[}}%[[VAL_61]]] : memref +// CHECK: %[[VAL_64:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_61]]] : memref +// CHECK: %[[VAL_65:.*]] = arith.addi %[[VAL_61]], %[[VAL_8]] : index +// CHECK: %[[VAL_66:.*]] = memref.load %[[VAL_55]]{{\[}}%[[VAL_65]]] : memref +// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_8]] iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (tensor) { +// CHECK: %[[VAL_70:.*]] = memref.load %[[VAL_56]]{{\[}}%[[VAL_68]]] : memref +// CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_57]]{{\[}}%[[VAL_68]]] : memref +// CHECK: %[[VAL_72:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index +// CHECK: %[[VAL_73:.*]] = tensor.insert %[[VAL_71]] into %[[VAL_69]]{{\[}}%[[VAL_72]], %[[VAL_70]]] : tensor +// CHECK: scf.yield %[[VAL_73]] : tensor +// CHECK: } +// CHECK: scf.yield %[[VAL_67]] : tensor +// CHECK: } +// CHECK: return %[[VAL_60]] : tensor +// CHECK: } func.func @concat_sparse_sparse_dense(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, %arg2: tensor<4x4xf64, #DCSR>)