iree-org · erman-gurses · Feb 4, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
@@ -45,11 +45,14 @@ verifyLoweringConfiguration(FunctionOpInterface funcOp,
                             IREE::Codegen::TranslationInfoAttr translationInfo,
                             ArrayRef<int64_t> workgroupSize, F verificationFn) {
   auto walkResult = funcOp.walk([&](Operation *op) -> WalkResult {
-    auto loweringConfig =
+    auto codegenLoweringConfig =
         getLoweringConfig<IREE::Codegen::LoweringConfigAttr>(op);
-    if (!loweringConfig)
+    auto gpuLoweringConfig =
+        getLoweringConfig<IREE::GPU::LoweringConfigAttr>(op);
+    if (!codegenLoweringConfig && !gpuLoweringConfig)
       return WalkResult::advance();
-    return verificationFn(op, loweringConfig, translationInfo, workgroupSize);
+    return verificationFn(op, codegenLoweringConfig, translationInfo,
+                          gpuLoweringConfig, workgroupSize);
   });
   return failure(walkResult.wasInterrupted());
 }

@@ -94,8 +94,9 @@ void buildLLVMGPUCodegenPassPipeline(OpPassManager &variantPassManagery,
 /// Lowering calling vectorization patterns.
 LogicalResult
 verifyGPUMatmulPipeline(Operation *op,
-                        IREE::Codegen::LoweringConfigAttr loweringConfig,
+                        IREE::Codegen::LoweringConfigAttr codegenloweringConfig,
                         IREE::Codegen::TranslationInfoAttr translationInfo,
+                        IREE::GPU::LoweringConfigAttr gpuloweringConfig,
                         ArrayRef<int64_t> workgroupSize);
 
 //----------------------------------------------------------------------------//

@@ -72,20 +72,65 @@ getInstructionShape(Operation *op, CodeGenPipeline pipeline,
 /// and Tensor Core pipelines.
 LogicalResult
 verifyGPUMatmulPipeline(Operation *op,
-                        IREE::Codegen::LoweringConfigAttr loweringConfig,
+                        IREE::Codegen::LoweringConfigAttr codegenloweringConfig,
                         IREE::Codegen::TranslationInfoAttr translationInfo,
+                        IREE::GPU::LoweringConfigAttr gpuLoweringConfig,
                         ArrayRef<int64_t> workgroupSize) {
-  // This verifier only applies to matmul.
   CodeGenPipeline pipeline = translationInfo.getDispatchLoweringPassPipeline();
+
   if (pipeline != CodeGenPipeline::LLVMGPUMatmulTensorCore &&
-      pipeline != CodeGenPipeline::LLVMGPUMatmulTensorCoreMmaSync) {
+      pipeline != CodeGenPipeline::LLVMGPUMatmulTensorCoreMmaSync &&
+      pipeline != CodeGenPipeline::LLVMGPUVectorDistribute) {
     return success();
   }
+
   // Only verify batched and unbatched matmul.
   if (!isa<linalg::MatmulOp, linalg::BatchMatmulOp>(op)) {
     return success();
   }
 
+  uint32_t reduction = static_cast<uint32_t>(IREE::GPU::TilingLevel::Reduction);
+  uint numLoops = llvm::cast<linalg::MatmulOp>(op).getNumLoops();
+  size_t size = 0;
+  if (gpuLoweringConfig.hasTilingLevel(reduction)) {
+    SmallVector<int64_t> reductionTileSizes =
+        gpuLoweringConfig.getStaticTilingLevelSizes(reduction, op);
+
+    if (!reductionTileSizes.empty()) {
+      size = reductionTileSizes.size();
+    }
+
+    if (size > numLoops) {
+      // return op->emitOpError(
+      //     "expected number of reduction tile size is equal or "
+      //     "less than number of loops");
+    }
+    for (size_t i = 0; i < size; ++i) {
+      if (reductionTileSizes[i] > 0 &&
+          llvm::cast<linalg::MatmulOp>(op).getIteratorTypesArray()[i] !=
+              utils::IteratorType::reduction) {
+        return op->emitOpError(
+            "expected to non-zero reduction tile has reduction iterator");
+      }
+    }
+  }
+  // SmallVector<int64_t> workgroupTileSizes =
+  //     gpuLoweringConfig.getWorkgroupTileSizes();
+  // size = workgroupTileSizes.size();
+
+  // for (size_t i = 0; i < size; ++i) {
+  // if (workgroupTileSizes[i] > 0 &&
+  //     llvm::cast<linalg::MatmulOp>(op).getIteratorTypesArray()[i] !=
+  //         utils::IteratorType::parallel) {
+  //   return op->emitOpError(
+  //       "expected to non-zero workgroup tile has parallel iterator");
+  // }
+  // }
+
+  if (pipeline == CodeGenPipeline::LLVMGPUVectorDistribute) {
+    return success();
+  }
+
   // Early exit if the workgroup size is not set.
   if (workgroupSize.empty()) {
     return op->emitOpError("expected workgroup size for GPU pipelines");
@@ -123,7 +168,7 @@ verifyGPUMatmulPipeline(Operation *op,
 
   // Tile shapes in number of elements.
   SmallVector<int64_t> tileShape =
-      loweringConfig.getTileSizeVals(kWorkgroupTileLevel);
+      codegenloweringConfig.getTileSizeVals(kWorkgroupTileLevel);
   SmallVector<int64_t> threadBlockShape{tileShape};
 
   if (auto batchMatmulOp = dyn_cast<linalg::BatchMatmulOp>(op)) {