Skip to content

Commit

Permalink
[LLVMCPU] Add LLVMCPU support for winograd.filter_transform op (#17105)
Browse files Browse the repository at this point in the history
This PR adds a tiling configuration for the `winograd.filter_transform`
op in the LLVMCPU winograd pipeline.
  • Loading branch information
Max191 authored Apr 26, 2024
1 parent 1ac066a commit ab54a60
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 32 deletions.
58 changes: 26 additions & 32 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1672,45 +1672,37 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
entryPointFn, fftOp, tileSizes, DispatchLoweringPassPipeline::CPUDefault);
}

/// Sets the lowering configuration for dispatch region for
/// linalg_ext.winograd.input_transform root op.
/// Sets the lowering configuration for dispatch region for winograd ops:
/// linalg_ext.winograd.filter_transform
/// linalg_ext.winograd.input_transform
/// linalg_ext.winograd.output_transform
/// The vector tile sizes should be 1 for each dim here, because
/// the winograd decomposition relies on these unit dimensions.
template <typename WinogradOp>
static LogicalResult
setRootConfig(mlir::FunctionOpInterface entryPointFn,
IREE::LinalgExt::WinogradInputTransformOp inputOp) {
assert(!getLoweringConfig(inputOp) && "expected lowering_config is not set");
auto iterationRank = inputOp.getIterationDomainRank();
SmallVector<int64_t> vecSizeHints(iterationRank, 1);
DistributionHeuristicConfig distConfig;
distConfig.vectorSizeHints = vecSizeHints;
SmallVector<int64_t> distTileSizes =
getDefaultDistributedLevelTileSizes(inputOp, distConfig);
TileSizesListType tileSizes;
tileSizes.push_back(distTileSizes);
SmallVector<int64_t> vecTileSizes(iterationRank, 1);
tileSizes.push_back(vecTileSizes);
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, inputOp, tileSizes,
DispatchLoweringPassPipeline::CPULinalgExtTileAndVectorize);
}

/// Sets the lowering configuration for dispatch region for
/// linalg_ext.winograd.input_transform root op.
static LogicalResult
setRootConfig(mlir::FunctionOpInterface entryPointFn,
IREE::LinalgExt::WinogradOutputTransformOp outputOp) {
assert(!getLoweringConfig(outputOp) && "expected lowering_config is not set");
auto iterationRank = outputOp.getIterationDomainRank();
setWinogradRootConfig(mlir::FunctionOpInterface entryPointFn,
WinogradOp winogradOp) {
static_assert(
std::is_same<WinogradOp, IREE::LinalgExt::WinogradInputTransformOp>() ||
std::is_same<WinogradOp,
IREE::LinalgExt::WinogradOutputTransformOp>() ||
std::is_same<WinogradOp,
IREE::LinalgExt::WinogradFilterTransformOp>(),
"op expected to be a winograd op");
assert(!getLoweringConfig(winogradOp) &&
"expected lowering_config is not set");
auto iterationRank = winogradOp.getIterationDomainRank();
SmallVector<int64_t> vecSizeHints(iterationRank, 1);
DistributionHeuristicConfig distConfig;
distConfig.vectorSizeHints = vecSizeHints;
SmallVector<int64_t> distTileSizes =
getDefaultDistributedLevelTileSizes(outputOp, distConfig);
getDefaultDistributedLevelTileSizes(winogradOp, distConfig);
TileSizesListType tileSizes;
tileSizes.push_back(distTileSizes);
SmallVector<int64_t> vecTileSizes(iterationRank, 1);
tileSizes.push_back(vecTileSizes);
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, outputOp, tileSizes,
entryPointFn, winogradOp, tileSizes,
DispatchLoweringPassPipeline::CPULinalgExtTileAndVectorize);
}

Expand Down Expand Up @@ -2301,11 +2293,13 @@ setRootConfigImpl(mlir::FunctionOpInterface entryPointFn, Operation *op,
targetMLTransInfo);
})
.Case<IREE::LinalgExt::AttentionOp, IREE::LinalgExt::FftOp,
IREE::LinalgExt::WinogradInputTransformOp,
IREE::LinalgExt::WinogradOutputTransformOp, tensor::PackOp,
tensor::PadOp, tensor::UnPackOp, linalg::Mmt4DOp,
tensor::PackOp, tensor::PadOp, tensor::UnPackOp, linalg::Mmt4DOp,
linalg::BatchMmt4DOp>(
[&](auto op) { return setRootConfig(entryPointFn, op); })
.Case<IREE::LinalgExt::WinogradFilterTransformOp,
IREE::LinalgExt::WinogradInputTransformOp,
IREE::LinalgExt::WinogradOutputTransformOp>(
[&](auto op) { return setWinogradRootConfig(entryPointFn, op); })
.Case<linalg::Conv2DNhwcHwcfOp, linalg::Conv2DNchwFchwOp,
linalg::PoolingNhwcSumOp, linalg::PoolingNhwcMaxOp,
linalg::PoolingNhwcMaxUnsignedOp, linalg::PoolingNhwcMinOp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,31 @@ module {

// -----

#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
module {
func.func @winograd_filter_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>> -> tensor<3x3x64x128xf32>
%3 = tensor.empty() : tensor<8x8x64x128xf32>
%4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32>
flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [8, 8, 64, 128], strides = [1, 1, 1, 1] : tensor<8x8x64x128xf32> -> !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
return
}
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 64], [1, 1]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPULinalgExtTileAndVectorize>
// CHECK: func.func @winograd_filter_transform()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: iree_linalg_ext.winograd.filter_transform
// CHECK-SAME: {lowering_config = #[[CONFIG]]}

// -----

#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
Expand Down

0 comments on commit ab54a60

Please sign in to comment.