Skip to content

Commit

Permalink
[Codegen][Tuner] Add support for default tuning specs (#19394)
Browse files Browse the repository at this point in the history
These default specs are target architecture-specific and will be shipped
with the compiler.

* Default specs belong to target plugins and get embedded in
`libIREECompiler.so`, just like ukernels.
* Plugins then register their default tuning specs with the default
embedded directory.
* We store them as mlir text. We can't easily assemble them as mlir
bytecode without taking a circular dependency on iree-opt. We can
revisit this in the future and add a new tool `iree-as` that will only
link with dialects.
* After the initial loading, we cache the default specs in the IREE
codegen dialect transform library manager.
* Add a placeholder spec for gfx942.
* Document and test the inclusion order. User specs come before default
specs.

Issue: #19214
  • Loading branch information
kuhar authored Dec 9, 2024
1 parent b2c5f3b commit c62c3d0
Show file tree
Hide file tree
Showing 19 changed files with 464 additions and 40 deletions.
1 change: 1 addition & 0 deletions compiler/plugins/target/ROCM/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ iree_compiler_cc_library(
"ROCMTargetUtils.h",
],
deps = [
"//compiler/plugins/target/ROCM/builtins/tuning:iree_default_tuning_specs_amdgpu",
"//compiler/plugins/target/ROCM/builtins/ukernel:iree_uk_amdgpu_bitcode",
"//compiler/src/iree/compiler/Codegen/Common",
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
Expand Down
1 change: 1 addition & 0 deletions compiler/plugins/target/ROCM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ iree_cc_library(
iree::compiler::Dialect::HAL::Utils::LLVMLinkerUtils
iree::compiler::PluginAPI
iree::compiler::Utils
iree::compiler::plugins::target::ROCM::builtins::tuning::iree_default_tuning_specs_amdgpu
iree::compiler::plugins::target::ROCM::builtins::ukernel::iree_uk_amdgpu_bitcode
iree::schemas::amdgpu_executable_def_c_fbs
iree::schemas::executable_debug_info_c_fbs
Expand Down
27 changes: 21 additions & 6 deletions compiler/plugins/target/ROCM/ROCMTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <cstdint>

#include "compiler/plugins/target/ROCM/builtins/tuning/iree_default_tuning_specs_amdgpu.h"
#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_bitcode.h"
#include "iree/compiler/Codegen/Common/Passes.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
Expand Down Expand Up @@ -900,14 +901,9 @@ struct ROCMSession final
}
};

} // namespace

} // namespace mlir::iree_compiler::IREE::HAL

// Iterate over ukernel bitcode embedded-data files, and insert them into the
// EmbeddedDataDirectory singleton.
static void addAMDGPUUkernelBitcodeToGlobalEmbeddedDataDirectory() {
using mlir::iree_compiler::EmbeddedDataDirectory;
EmbeddedDataDirectory::withGlobal([](EmbeddedDataDirectory &dir) {
const iree_file_toc_t *toc = iree_uk_amdgpu_bitcode_create();
for (size_t i = 0; i < iree_uk_amdgpu_bitcode_size(); ++i) {
Expand All @@ -916,11 +912,30 @@ static void addAMDGPUUkernelBitcodeToGlobalEmbeddedDataDirectory() {
});
}

// Iterate over default tuning spec embedded-data files, and insert them into
// the EmbeddedDataDirectory singleton.
static void addAMDGPUDefaultTuningSpecsToGlobalEmbeddedDataDirectory() {
EmbeddedDataDirectory::withGlobal([](EmbeddedDataDirectory &dir) {
const iree_file_toc_t *toc = iree_default_tuning_specs_amdgpu_create();
for (size_t i = 0, e = iree_default_tuning_specs_amdgpu_size(); i != e;
++i) {
dir.addFile(toc[i].name, llvm::StringRef{toc[i].data, toc[i].size});
}
});
}

} // namespace

} // namespace mlir::iree_compiler::IREE::HAL

extern "C" bool iree_register_compiler_plugin_hal_target_rocm(
mlir::iree_compiler::PluginRegistrar *registrar) {
registrar->registerPlugin<mlir::iree_compiler::IREE::HAL::ROCMSession>(
"hal_target_rocm");
addAMDGPUUkernelBitcodeToGlobalEmbeddedDataDirectory();
mlir::iree_compiler::IREE::HAL::
addAMDGPUUkernelBitcodeToGlobalEmbeddedDataDirectory();
mlir::iree_compiler::IREE::HAL::
addAMDGPUDefaultTuningSpecsToGlobalEmbeddedDataDirectory();
return true;
}

Expand Down
53 changes: 53 additions & 0 deletions compiler/plugins/target/ROCM/builtins/tuning/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

load("//build_tools/bazel:build_defs.oss.bzl", "iree_cmake_extra_content")
load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
load("//build_tools/embed_data:build_defs.bzl", "iree_c_embed_data")

package(
default_visibility = ["//visibility:public"],
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)

iree_cmake_extra_content(
content = """
if(NOT IREE_TARGET_BACKEND_ROCM)
return()
endif()
""",
inline = True,
)

# Target archs for tuning specs. https://llvm.org/docs/AMDGPUUsage.html#processors
gpu_archs = [
"gfx942",
]

tuning_spec_mlir_files = [
"iree_default_tuning_spec_%s.mlir" % gpu_arch
for gpu_arch in gpu_archs
]

iree_c_embed_data(
name = "iree_default_tuning_specs_amdgpu",
srcs = tuning_spec_mlir_files,
c_file_output = "iree_default_tuning_specs_amdgpu.c",
flatten = True,
h_file_output = "iree_default_tuning_specs_amdgpu.h",
)

# Verify that the tuning specs are valid. We need this here because we do not
# assemble the tuning spec mlir and invalid specs do not lead to build errors.
iree_lit_test_suite(
name = "verify_default_tuning_specs_amdgpu",
srcs = tuning_spec_mlir_files,
cfg = "//compiler:lit.cfg.py",
tools = [
"//tools:iree-opt",
],
)
39 changes: 39 additions & 0 deletions compiler/plugins/target/ROCM/builtins/tuning/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
################################################################################
# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
# compiler/plugins/target/ROCM/builtins/tuning/BUILD.bazel #
# #
# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
# CMake-only content. #
# #
# To disable autogeneration for this file entirely, delete this header. #
################################################################################

iree_add_all_subdirs()

if(NOT IREE_TARGET_BACKEND_ROCM)
return()
endif()

iree_c_embed_data(
NAME
iree_default_tuning_specs_amdgpu
SRCS
"iree_default_tuning_spec_gfx942.mlir"
C_FILE_OUTPUT
"iree_default_tuning_specs_amdgpu.c"
H_FILE_OUTPUT
"iree_default_tuning_specs_amdgpu.h"
FLATTEN
PUBLIC
)

iree_lit_test_suite(
NAME
verify_default_tuning_specs_amdgpu
SRCS
"iree_default_tuning_spec_gfx942.mlir"
TOOLS
iree-opt
)

### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// RUN: iree-opt %s

// This is just an initial tuning spec for gfx942 and is not intended for
// production use.
// TODO(https://github.com/iree-org/iree/issues/19214): Add missing
// configurations to this spec.

module @iree_default_tuning_spec_gfx942 attributes { transform.with_named_sequence } {

transform.named_sequence @__kernel_config(%variant_op: !transform.any_op {transform.readonly}) -> ()
attributes { iree_codegen.tuning_spec_entrypoint } {
transform.yield
}

}
35 changes: 35 additions & 0 deletions compiler/plugins/target/ROCM/builtins/tuning/test/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

load("//build_tools/bazel:build_defs.oss.bzl", "iree_cmake_extra_content")
load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")

package(
default_visibility = ["//visibility:public"],
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)

iree_cmake_extra_content(
content = """
if(NOT IREE_TARGET_BACKEND_ROCM)
return()
endif()
""",
inline = True,
)

iree_lit_test_suite(
name = "lit",
srcs = [
"spec_gfx942.mlir",
],
cfg = "//compiler:lit.cfg.py",
tools = [
"//tools:iree-opt",
"@llvm-project//llvm:FileCheck",
],
)
27 changes: 27 additions & 0 deletions compiler/plugins/target/ROCM/builtins/tuning/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
################################################################################
# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
# compiler/plugins/target/ROCM/builtins/tuning/test/BUILD.bazel #
# #
# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
# CMake-only content. #
# #
# To disable autogeneration for this file entirely, delete this header. #
################################################################################

iree_add_all_subdirs()

if(NOT IREE_TARGET_BACKEND_ROCM)
return()
endif()

iree_lit_test_suite(
NAME
lit
SRCS
"spec_gfx942.mlir"
TOOLS
FileCheck
iree-opt
)

### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
28 changes: 28 additions & 0 deletions compiler/plugins/target/ROCM/builtins/tuning/test/spec_gfx942.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx942 \
// RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-configure-target-executable-variants{target=rocm})))" \
// RUN: --iree-codegen-enable-default-tuning-specs \
// RUN: --iree-codegen-notify-transform-strategy-application \
// RUN: --verify-diagnostics %s | FileCheck %s

// CHECK-LABEL: func.func @placeholder

#pipeline_layout = #hal.pipeline.layout<bindings = [
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>
]>
hal.executable public @main {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
hal.executable.export public @placeholder ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
hal.return %x, %y, %z : index, index, index
}
builtin.module {
// expected-remark@+1 {{Applied transform configuration strategy @iree_default_tuning_spec_gfx942::@__kernel_config}}
func.func @placeholder() {
return
}
}
}
}
1 change: 1 addition & 0 deletions compiler/plugins/target/ROCM/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package(
iree_lit_test_suite(
name = "lit",
srcs = [
"default_tuning_specs_amdgpu.mlir",
"gpu_lower_to_ukernels.mlir",
"lowering_strategy_from_tuning_spec.mlir",
"ukernel_pipeline_transform.mlir",
Expand Down
1 change: 1 addition & 0 deletions compiler/plugins/target/ROCM/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ iree_lit_test_suite(
NAME
lit
SRCS
"default_tuning_specs_amdgpu.mlir"
"gpu_lower_to_ukernels.mlir"
"lowering_strategy_from_tuning_spec.mlir"
"ukernel_pipeline_transform.mlir"
Expand Down
55 changes: 55 additions & 0 deletions compiler/plugins/target/ROCM/test/default_tuning_specs_amdgpu.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-codegen-materialize-tuning-specs)' \
// RUN: --iree-codegen-enable-default-tuning-specs \
// RUN: --iree-codegen-dump-tuning-specs-to=- \
// RUN: --iree-gpu-test-target=gfx942 --mlir-disable-threading \
// RUN: --no-implicit-module %s | FileCheck %s --check-prefix=DEFAULT

// RUN: iree-opt --pass-pipeline='builtin.module(iree-codegen-materialize-tuning-specs)' \
// RUN: --iree-codegen-tuning-spec-path=%p/tuning_spec_mmt_tile_and_fuse.mlir \
// RUN: --iree-codegen-enable-default-tuning-specs \
// RUN: --iree-codegen-dump-tuning-specs-to=- \
// RUN: --iree-gpu-test-target=gfx942 --mlir-disable-threading \
// RUN: --no-implicit-module %s | FileCheck %s --check-prefix=BOTH

// Note: This test needs to be in the plugin subdirectory because it depends
// on the default spec that's only embedded in the compiler library when the
// ROCM plugin is built.

// ============================================================================

// Check that the default tuning spec gets materialized without linking.

// DEFAULT-LABEL: module @iree_default_tuning_spec_gfx942 attributes {transform.with_named_sequence}
// DEFAULT-LABEL: transform.named_sequence @__kernel_config
// DEFAULT-SAME: attributes {iree_codegen.tuning_spec_entrypoint}

// Check that the default tuning spec gets materialized as a module attribute.
// DEFAULT: module attributes
// DEFAULT-SAME: iree_codegen.tuning_spec_mlirbc = dense<{{.+}}> : vector<{{[0-9]+}}xi8>
// DEFAULT-LABEL: func.func @main_0

// ============================================================================

// Check that both the user tuning spec and the default spec get linked and
// materialized. The user spec should have precedence over the default one.

// BOTH-LABEL: module @iree_linked_tuning_spec attributes {transform.with_named_sequence}
// BOTH-LABEL: module @mmt_tile_and_fuse_spec_0 attributes {transform.with_named_sequence}
// BOTH-LABEL: transform.named_sequence @main
// BOTH-SAME: attributes {iree_codegen.tuning_spec_entrypoint}
// BOTH-LABEL: module @iree_default_tuning_spec_gfx942_1 attributes {transform.with_named_sequence}
// BOTH: transform.named_sequence @__kernel_config
// BOTH-SAME: attributes {iree_codegen.tuning_spec_entrypoint}
// BOTH: transform.named_sequence @__kernel_config
// BOTH: @mmt_tile_and_fuse_spec_0::@main
// BOTH: @iree_default_tuning_spec_gfx942_1::@__kernel_config

// BOTH: module attributes
// BOTH-SAME: iree_codegen.tuning_spec_mlirbc = dense<{{.+}}> : vector<{{[0-9]+}}xi8>
// BOTH-LABEL: func.func @main_0

module {
func.func @main_0() {
return
}
}
Loading

0 comments on commit c62c3d0

Please sign in to comment.