From fa505cfad846dfb4935fe46c15bb524f05f614b8 Mon Sep 17 00:00:00 2001 From: hanhanW Date: Thu, 2 Nov 2023 13:59:43 -0700 Subject: [PATCH 1/4] enable data tiling by default --- .../Codegen/LLVMCPU/KernelDispatch.cpp | 7 +++++-- .../compiler/GlobalOptimization/Passes.cpp | 21 +++++++++++-------- .../src/iree/compiler/Pipelines/Options.h | 2 +- .../regression/trace_dispatch_tensors.mlir | 1 + tests/e2e/tosa_ops/BUILD.bazel | 3 +++ tests/e2e/tosa_ops/CMakeLists.txt | 1 + tests/transform_dialect/cpu/matmul.mlir | 1 + .../cpu/matmul_library_call.mlir | 2 ++ 8 files changed, 26 insertions(+), 12 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index 2ad5bc5f62257..32109f5f36bb2 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -10,6 +10,7 @@ #include "iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "iree/compiler/Codegen/Common/TileSizeSelection.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenAttrs.h" #include "iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h" #include "iree/compiler/Codegen/LLVMCPU/Utils.h" #include "iree/compiler/Codegen/TransformStrategies/CPU/Common.h" @@ -2591,13 +2592,15 @@ setTranslationInfoAndRootConfig(func::FuncOp entryPointFn, // Ignore the tile sizes adjustment. auto pipeline = getTranslationInfo(entryPointFn).getPassPipeline().getValue(); if (pipeline != DispatchLoweringPassPipeline::TransformDialectCodegen) { - if (failed(adjustTileSizesForUnPackOp(entryPointFn, rootOperation))) { + if (failed(adjustTileSizesForUnPackOp(entryPointFn, rootOperation)) && + !hasMicrokernels(targetAttr)) { return failure(); } // Set vector level tile sizes for other operations individually. if (failed(setLoweringConfigForComputeOps(entryPointFn, computeOps, - rootOperation))) { + rootOperation)) && + !hasMicrokernels(targetAttr)) { return failure(); } } diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp index f56b0c35e32bf..850c683027908 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp @@ -79,16 +79,17 @@ void buildGlobalOptimizationPassPipeline( clEnableQuantizedMatmulReassociation); }) .addPass(mlir::createCanonicalizerPass) - .addPass(mlir::createCSEPass) - // Expand all vectors in vecmat/matvec ops into matrices for tiling. - .addPredicatedPass(transformOptions.options.dataTiling, - createExpandVectorsPass) - // Enable data tiling after they are in a canonical form. - .addPredicatedPass(transformOptions.options.dataTiling, - createSetEncodingPass) - .addPass(mlir::createCanonicalizerPass) .addPass(mlir::createCSEPass); - mainPassManager.addPass(createMaterializeHomogeneousEncodingsPass()); + + // Enable data tiling after they are in a canonical form. + if (transformOptions.options.dataTiling) { + // Expand all vectors in vecmat/matvec ops into matrices for tiling. + mainPassManager.addPass(createExpandVectorsPass()); + mainPassManager.addPass(createSetEncodingPass()); + mainPassManager.addPass(createMaterializeHomogeneousEncodingsPass()); + mainPassManager.addPass(createCanonicalizerPass()); + mainPassManager.addPass(createCSEPass()); + } OpPassManager pipeline(ModuleOp::getOperationName()); FunctionLikeNest(pipeline) @@ -101,6 +102,8 @@ void buildGlobalOptimizationPassPipeline( pipeline.addPass(IREE::Util::createApplyPatternsPass()); pipeline.addPass(IREE::Util::createFoldGlobalsPass()); pipeline.addPass(IREE::Util::createIPOPass()); + pipeline.addPass(createCanonicalizerPass()); + pipeline.addPass(createCSEPass()); if (transformOptions.options.constExprHoisting) { pipeline.addPass(IREE::Util::createHoistIntoGlobalsPass( diff --git a/compiler/src/iree/compiler/Pipelines/Options.h b/compiler/src/iree/compiler/Pipelines/Options.h index 0909ab8bb82ec..2827ac0000629 100644 --- a/compiler/src/iree/compiler/Pipelines/Options.h +++ b/compiler/src/iree/compiler/Pipelines/Options.h @@ -80,7 +80,7 @@ struct GlobalOptimizationOptions { bool demoteI64ToI32 = false; // Enables data tiling. - bool dataTiling = false; + bool dataTiling = true; // Enables const-expr hoisting into globals. bool constExprHoisting = true; diff --git a/tests/e2e/regression/trace_dispatch_tensors.mlir b/tests/e2e/regression/trace_dispatch_tensors.mlir index e60c63435d9ae..ffdf365bd471c 100644 --- a/tests/e2e/regression/trace_dispatch_tensors.mlir +++ b/tests/e2e/regression/trace_dispatch_tensors.mlir @@ -2,6 +2,7 @@ // RUN: --Xcompiler,iree-input-type=stablehlo \ // RUN: --Xcompiler,iree-hal-target-backends=vmvx \ // RUN: --Xcompiler,iree-flow-trace-dispatch-tensors \ +// RUN: --Xcompiler,iree-opt-data-tiling=false \ // RUN: %s 2>&1 | FileCheck %s func.func @two_dispatch() -> (tensor<5x5xf32>, tensor<3x5xf32>) { diff --git a/tests/e2e/tosa_ops/BUILD.bazel b/tests/e2e/tosa_ops/BUILD.bazel index a554c080acb84..d3dcf3f324020 100644 --- a/tests/e2e/tosa_ops/BUILD.bazel +++ b/tests/e2e/tosa_ops/BUILD.bazel @@ -181,6 +181,9 @@ iree_check_single_backend_test_suite( name = "check_vmvx_local-sync_microkernels", srcs = VMVX_MICROKERNELS_SRCS, compiler_flags = [ + # TODO(15314): Remove the flag once vmvx supports batch_matmul on + # ukernel path. + "--iree-opt-data-tiling=false", "--iree-vmvx-enable-microkernels", ], # Sync has more strict runtime error checking for mis-compiled programs. diff --git a/tests/e2e/tosa_ops/CMakeLists.txt b/tests/e2e/tosa_ops/CMakeLists.txt index 97aebe2e58e8b..c8d3cf0d816ef 100644 --- a/tests/e2e/tosa_ops/CMakeLists.txt +++ b/tests/e2e/tosa_ops/CMakeLists.txt @@ -165,6 +165,7 @@ iree_check_single_backend_test_suite( DRIVER "local-sync" COMPILER_FLAGS + "--iree-opt-data-tiling=false" "--iree-vmvx-enable-microkernels" INPUT_TYPE "tosa" diff --git a/tests/transform_dialect/cpu/matmul.mlir b/tests/transform_dialect/cpu/matmul.mlir index 63d059e630457..1af272fe62185 100644 --- a/tests/transform_dialect/cpu/matmul.mlir +++ b/tests/transform_dialect/cpu/matmul.mlir @@ -26,6 +26,7 @@ func.func @matmul_static( // CODEGEN-DEFAULT: hal.return %[[C2]], %[[C1]], %[[C1]] // RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \ +// RUN: --iree-opt-data-tiling=false \ // RUN: --iree-codegen-use-transform-dialect-strategy=%p/matmul_codegen_default_spec.mlir | \ // RUN: iree-run-module --module=- --function=matmul_static \ // RUN: --input="3x5xf32=1" \ diff --git a/tests/transform_dialect/cpu/matmul_library_call.mlir b/tests/transform_dialect/cpu/matmul_library_call.mlir index 211a598899449..e2f066ad54198 100644 --- a/tests/transform_dialect/cpu/matmul_library_call.mlir +++ b/tests/transform_dialect/cpu/matmul_library_call.mlir @@ -13,6 +13,7 @@ module { } // RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \ +// RUN: --iree-opt-data-tiling=false \ // RUN: --iree-codegen-use-transform-dialect-strategy=custom_matmul \ // RUN: --iree-codegen-transform-dialect-library=%p/transform_library.mlir \ // RUN: --compile-to=executable-targets | \ @@ -24,6 +25,7 @@ module { // CODEGEN-DEFAULT: hal.return %[[C2]], %[[C1]], %[[C1]] // RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \ +// RUN: --iree-opt-data-tiling=false \ // RUN: --iree-codegen-transform-dialect-library=%p/transform_library.mlir \ // RUN: --iree-codegen-use-transform-dialect-strategy=custom_matmul | \ // RUN: iree-run-module --module=- --function=matmul_static \ From bc154737c128eb1f65140165f218cb5fee30a954 Mon Sep 17 00:00:00 2001 From: hanhanW Date: Fri, 20 Oct 2023 16:13:05 -0700 Subject: [PATCH 2/4] enable microkernels by default --- .../iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp index cbf49e25a91c5..da3c32c620c03 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp @@ -49,7 +49,7 @@ static llvm::cl::opt clEnableCPUMicrokernels( "iree-llvmcpu-enable-microkernels", llvm::cl::desc( "Enables microkernel lowering for llvmcpu backend (experimental)"), - llvm::cl::init(false)); + llvm::cl::init(true)); static llvm::cl::opt clNativeVectorWidthInBytes( "iree-llvmcpu-native-vector-width-in-bytes", From 94df92a387d9743e45ef0c1fdd2217c6e807d24b Mon Sep 17 00:00:00 2001 From: hanhanW Date: Sat, 11 Nov 2023 03:31:10 +0000 Subject: [PATCH 3/4] remove unneeded changes --- tests/e2e/regression/trace_dispatch_tensors.mlir | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/regression/trace_dispatch_tensors.mlir b/tests/e2e/regression/trace_dispatch_tensors.mlir index b948ca0da5a64..b16d938820a6d 100644 --- a/tests/e2e/regression/trace_dispatch_tensors.mlir +++ b/tests/e2e/regression/trace_dispatch_tensors.mlir @@ -1,7 +1,6 @@ // RUN: iree-run-mlir \ // RUN: --Xcompiler,iree-hal-target-backends=vmvx \ // RUN: --Xcompiler,iree-flow-trace-dispatch-tensors \ -// RUN: --Xcompiler,iree-opt-data-tiling=false \ // RUN: %s 2>&1 | FileCheck %s func.func @two_dispatch() -> (tensor<15xf32>) { From d8fa65ff2cc47ecdd46791282d312553314d2a56 Mon Sep 17 00:00:00 2001 From: hanhanW Date: Sat, 11 Nov 2023 04:44:45 +0000 Subject: [PATCH 4/4] disable ukernel --- .../iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp index 00ab66b34cc64..346b2c3290a1b 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp @@ -49,7 +49,7 @@ static llvm::cl::opt clEnableCPUMicrokernels( "iree-llvmcpu-enable-microkernels", llvm::cl::desc( "Enables microkernel lowering for llvmcpu backend (experimental)"), - llvm::cl::init(true)); + llvm::cl::init(false)); static llvm::cl::opt clLinkCPUUKernelBitcode( "iree-llvmcpu-link-ukernel-bitcode",