triton-lang · peterbell10 · Nov 5, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
@@ -522,7 +522,7 @@ void init_triton_ir(py::module &&m) {
              auto findTMA = [](ArrayRef<NamedAttribute> dictVals) {
                for (auto attr : dictVals) {
                  if (auto intAttr = dyn_cast<IntegerAttr>(attr.getValue())) {
-                   if (intAttr.getInt() == 1)
+                   if (attr.getName() == "tt.nv_tma_desc" && intAttr.getInt() == 1)
                      return true;
                  }
                }

diff --git a/python/test/unit/runtime/test_irsource.py b/python/test/unit/runtime/test_irsource.py
@@ -5,6 +5,7 @@
 
 target = triton.runtime.driver.active.get_current_target()
 
+
 def test_mlir_attribute_parsing() -> None:
     '''
     Tests that MLIR attributes are parsed correctly from input ttir/ttgir.
@@ -30,7 +31,7 @@ def test_mlir_attribute_parsing() -> None:
                                 %arg5: i32 {tt.divisibility = 16 : i32},
                                 %arg6: i32 {tt.divisibility = 16 : i32},
                                 %arg7: i32 {tt.divisibility = 16 : i32},
-                                %arg8: i32 {tt.divisibility = 16 : i32},
+                                %arg8: i32 {tt.divisibility = 16 : i32, tt.nv_tma_desc = 0 : i32},
                                 %desc: !tt.ptr<i8, 0> {tt.nv_tma_desc = 1 : i32}) attributes {noinline = false} {
     tt.return
   }
@@ -39,9 +40,7 @@ def test_mlir_attribute_parsing() -> None:
     with tempfile.NamedTemporaryFile(mode='w', suffix='.ttgir') as f:
         f.write(sample_ttgir)
         f.flush()
-        context = ir.context()
-        ir.load_dialects(context)
-        src = IRSource(f.name, context)
+        src = IRSource(f.name)
 
         # check name and type signature
         # should match ty_to_cpp(...)
@@ -53,5 +52,40 @@ def test_mlir_attribute_parsing() -> None:
         # check num warps
         assert src.parse_options()['num_warps'] == 8
 
+    sample_ttgir_vector_add = r"""
+    #blocked = #triton_gpu.blocked<{sizePerThread = [4], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
+    module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 : i32, triton_gpu.target = "cuda:90", "triton_gpu.threads-per-warp" = 32 : i32} {
+       tt.func public @add_kernel(%arg0: !tt.ptr<i32> {tt.divisibility = 16 : i32},
+       %arg1: !tt.ptr<i32> {tt.divisibility = 16 : i32},
+       %arg2: !tt.ptr<i32> {tt.divisibility = 16 : i32},
+       %arg3: i32 {tt.divisibility = 16 : i32})
+        attributes {noinline = false} {
+         %c1024_i32 = arith.constant 1024 : i32
+         %0 = tt.get_program_id x : i32
+         %1 = arith.muli %0, %c1024_i32 : i32
+         %2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked>
+         %3 = tt.splat %1 : i32 -> tensor<1024xi32, #blocked>
+         %4 = arith.addi %3, %2 : tensor<1024xi32, #blocked>
+         %5 = tt.splat %arg3 : i32 -> tensor<1024xi32, #blocked>
+         %6 = arith.cmpi slt, %4, %5 : tensor<1024xi32, #blocked>
+         %7 = tt.splat %arg0 : !tt.ptr<i32> -> tensor<1024x!tt.ptr<i32>, #blocked>
+         %8 = tt.addptr %7, %4 : tensor<1024x!tt.ptr<i32>, #blocked>, tensor<1024xi32, #blocked>
+         %9 = tt.load %8, %6 : tensor<1024x!tt.ptr<i32>, #blocked>
+         %10 = tt.splat %arg1 : !tt.ptr<i32> -> tensor<1024x!tt.ptr<i32>, #blocked>
+         %11 = tt.addptr %10, %4 : tensor<1024x!tt.ptr<i32>, #blocked>, tensor<1024xi32, #blocked>
+         %12 = tt.load %11, %6 : tensor<1024x!tt.ptr<i32>, #blocked>
+         %13 = arith.addi %9, %12 : tensor<1024xi32, #blocked>
+         %14 = tt.splat %arg2 : !tt.ptr<i32> -> tensor<1024x!tt.ptr<i32>, #blocked>
+         %15 = tt.addptr %14, %4 : tensor<1024x!tt.ptr<i32>, #blocked>, tensor<1024xi32, #blocked>
+         tt.store %15, %13, %6 : tensor<1024x!tt.ptr<i32>, #blocked>
+         tt.return
+       }
+     }
+    """
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.ttgir') as f:
+        f.write(sample_ttgir_vector_add)
+        f.flush()
+        src = IRSource(f.name)
+
         # now test compilation
         triton.compile(f.name, target=target)
diff --git a/python/test/unit/runtime/test_subproc.py b/python/test/unit/runtime/test_subproc.py
@@ -1,12 +1,10 @@
 import multiprocessing
 import shutil
-import tempfile
 
 import triton
 import triton.language as tl
 from triton.backends.compiler import AttrsDescriptor
-from triton.compiler import ASTSource, IRSource
-from triton._C.libtriton import ir
+from triton.compiler import ASTSource
 
 target = triton.runtime.driver.active.get_current_target()
 

@@ -90,11 +90,13 @@ def parse_options(self):
 
 class IRSource:
 
-    def __init__(self, path, context):
+    def __init__(self, path):
         self.path = path
         path = Path(path)
         self.ext = path.suffix[1:]
         self.src = path.read_text()
+        self.context = ir.context()
+        ir.load_dialects(self.context)
 
         # We don't have a easy-to-use PTX parser that we can use, so keep that regex for now.
         # TODO - replace with a proper parser
@@ -105,7 +107,7 @@ def __init__(self, path, context):
             types = re.findall(arg_type_pattern[self.ext], signature)
             self.signature = {k: convert_type_repr(ty) for k, ty in enumerate(types)}
         else:
-            self.module = ir.parse_mlir_module(self.path, context)
+            self.module = ir.parse_mlir_module(self.path, self.context)
             fn_name = self.module.get_first_func_name()
             self.name = "@" + fn_name
             funcOp = self.module.get_function(fn_name)
@@ -116,7 +118,7 @@ def hash(self):
         return hashlib.sha256(self.src.encode("utf-8")).hexdigest()
 
     def make_ir(self, options, codegen_fns, module_map, context):
-        self.module.context = context
+        self.module.context = self.context
         return self.module
 
     def parse_options(self):
@@ -217,10 +219,7 @@ def compile(src, target=None, options=None):
     # create backend
     if ir_source:
         assert isinstance(src, str), "source must be either AST or a filepath"
-        # Do an early init, since we use the MLIR parser which needs the context
-        context = ir.context()
-        ir.load_dialects(context)
-        src = IRSource(src, context)
+        src = IRSource(src)
 
     extra_options = src.parse_options()
     options = backend.parse_options(dict(options or dict(), **extra_options))
@@ -264,10 +263,12 @@ def compile(src, target=None, options=None):
         first_stage += 1
 
     # We initialize these
-    if not ir_source:
+    if not isinstance(src, IRSource):
         context = ir.context()
         ir.load_dialects(context)
-    backend.load_dialects(context)
+        backend.load_dialects(context)
+    else:
+        context = src.context
     codegen_fns = backend.get_codegen_implementation()
     module_map = backend.get_module_map()
     try: