Support code emission of nvptx on Apple #660

wsmoses · 2025-01-30T12:57:50Z

When cross compiling a cuda kernel (for eventual CPU execution) on Apple, GPUCompiler presently errs:

Square Kernel: Error During Test at /Users/wmoses/git/Reactant.jl/test/integration/cuda.jl:22
  Got exception outside of a @test
  ArgumentError: No available targets are compatible with triple "nvptx64-nvidia-cuda"
  Stacktrace:
    [1] LLVM.Target(; name::Nothing, triple::String)
      @ LLVM ~/.julia/packages/LLVM/b3kFs/src/target.jl:33
    [2] Target
      @ ~/.julia/packages/LLVM/b3kFs/src/target.jl:23 [inlined]
    [3] llvm_machine(target::GPUCompiler.PTXCompilerTarget)
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/ptx.jl:49
    [4] macro expansion
      @ ~/.julia/packages/GPUCompiler/Nxf8r/src/ptx.jl:140 [inlined]
    [5] macro expansion
      @ ~/.julia/packages/LLVM/b3kFs/src/base.jl:97 [inlined]
    [6] finish_module!(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget}, mod::LLVM.Module, entry::LLVM.Function)
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/ptx.jl:137
    [7] finish_module!(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, mod::LLVM.Module, entry::LLVM.Function)
      @ CUDA ~/.julia/packages/CUDA/1kIOw/src/compiler/compilation.jl:58
    [8] macro expansion
      @ ~/.julia/packages/GPUCompiler/Nxf8r/src/driver.jl:183 [inlined]
    [9] emit_llvm(job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, only_entry::Bool)
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/utils.jl:108
   [10] emit_llvm
      @ ~/.julia/packages/GPUCompiler/Nxf8r/src/utils.jl:106 [inlined]
   [11] codegen(output::Symbol, job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, strip::Bool, only_entry::Bool, parent_job::Nothing)
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/driver.jl:100
   [12] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{optimize::Bool, cleanup::Bool, validate::Bool, libraries::Bool})
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/driver.jl:79
   [13] compile
      @ ~/.julia/packages/GPUCompiler/Nxf8r/src/driver.jl:74 [inlined]
   [14] (::ReactantCUDAExt.var"#7#10"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}})(ctx::LLVM.Context)
      @ ReactantCUDAExt ~/git/Reactant.jl/ext/ReactantCUDAExt.jl:344
   [15] JuliaContext(f::ReactantCUDAExt.var"#7#10"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/driver.jl:34
   [16] JuliaContext
      @ ~/.julia/packages/GPUCompiler/Nxf8r/src/driver.jl:25 [inlined]
   [17] compile(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams})
      @ ReactantCUDAExt ~/git/Reactant.jl/ext/ReactantCUDAExt.jl:343
   [18] actual_compilation(cache::Dict{Any, ReactantCUDAExt.LLVMFunc}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(ReactantCUDAExt.compile), linker::typeof(ReactantCUDAExt.link))
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/execution.jl:237
   [19] cached_compilation(cache::Dict{Any, ReactantCUDAExt.LLVMFunc}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
      @ GPUCompiler ~/.julia/packages/GPUCompiler/Nxf8r/src/execution.jl:151
   [20] macro expansion
      @ ~/git/Reactant.jl/ext/ReactantCUDAExt.jl:772 [inlined]
   [21] macro expansion
      @ ./lock.jl:267 [inlined]
   [22] cufunction(f::typeof(Main.var"##CUDA#241".square_kernel!), tt::Type{Tuple{ReactantCUDAExt.CuTracedArray{Int64, 1, 1, (64,)}, ReactantCUDAExt.CuTracedArray{Int64, 1, 1, (64,)}}}; kwargs::@Kwargs{})
      @ ReactantCUDAExt ~/git/Reactant.jl/ext/ReactantCUDAExt.jl:750
   [23] #cufunction
      @ ~/git/Reactant.jl/ext/ReactantCUDAExt.jl:747 [inlined]
   [24] cufunction(none::typeof(Main.var"##CUDA#241".square_kernel!), none::Type{Tuple{ReactantCUDAExt.CuTracedArray{Int64, 1, 1, (64,)}, ReactantCUDAExt.CuTracedArray{Int64, 1, 1, (64,)}}})
      @ Reactant ./<missing>:0
   [25] #cufunction
      @ ~/git/Reactant.jl/ext/ReactantCUDAExt.jl:747 [inlined]
   [26] call_with_reactant(::typeof(CUDA.cufunction), ::typeof(Main.var"##CUDA#241".square_kernel!), ::Type{Tuple{ReactantCUDAExt.CuTracedArray{Int64, 1, 1, (64,)}, ReactantCUDAExt.CuTracedArray{Int64, 1, 1, (64,)}}})
      @ Reactant ~/git/Reactant.jl/src/utils.jl:0
   [27] macro expansion
      @ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:112 [inlined]
   [28] square!
      @ ~/git/Reactant.jl/test/integration/cuda.jl:15 [inlined]
   [29] square!(none::Reactant.TracedRArray{Int64, 1}, none::Reactant.TracedRArray{Int64, 1})
      @ Reactant ./<missing>:0
   [30] macro expansion
      @ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:108 [inlined]
   [31] square!
      @ ~/git/Reactant.jl/test/integration/cuda.jl:15 [inlined]
   [32] call_with_reactant(::typeof(Main.var"##CUDA#241".square!), ::Reactant.TracedRArray{Int64, 1}, ::Reactant.TracedRArray{Int64, 1})
      @ Reactant ~/git/Reactant.jl/src/utils.jl:0
   [33] make_mlir_fn(f::Function, args::Tuple{ConcreteRArray{Int64, 1}, ConcreteRArray{Int64, 1}}, kwargs::Tuple{}, name::String, concretein::Bool; toscalar::Bool, return_dialect::Symbol, do_transpose::Bool, no_args_in_result::Bool)
      @ Reactant.TracedUtils ~/git/Reactant.jl/src/TracedUtils.jl:216
   [34] make_mlir_fn
      @ ~/git/Reactant.jl/src/TracedUtils.jl:129 [inlined]
   [35] compile_mlir!(mod::Reactant.MLIR.IR.Module, f::Function, args::Tuple{ConcreteRArray{Int64, 1}, ConcreteRArray{Int64, 1}}; optimize::Bool, no_nan::Bool, backend::String)
      @ Reactant.Compiler ~/git/Reactant.jl/src/Compiler.jl:448
   [36] compile_mlir!
      @ ~/git/Reactant.jl/src/Compiler.jl:437 [inlined]
   [37] compile_xla(f::Function, args::Tuple{ConcreteRArray{Int64, 1}, ConcreteRArray{Int64, 1}}; client::Nothing, optimize::Bool, no_nan::Bool, device::Nothing)
      @ Reactant.Compiler ~/git/Reactant.jl/src/Compiler.jl:1005
   [38] compile_xla
      @ ~/git/Reactant.jl/src/Compiler.jl:984 [inlined]
   [39] compile(f::Function, args::Tuple{ConcreteRArray{Int64, 1}, ConcreteRArray{Int64, 1}}; sync::Bool, kwargs::@Kwargs{client::Nothing, no_nan::Bool, device::Nothing, optimize::Bool})
      @ Reactant.Compiler ~/git/Reactant.jl/src/Compiler.jl:1057
   [40] macro expansion
      @ ~/git/Reactant.jl/src/Compiler.jl:707 [inlined]
   [41] macro expansion
      @ ~/git/Reactant.jl/test/integration/cuda.jl:26 [inlined]
   [42] macro expansion
      @ ~/.julia/juliaup/julia-1.10.8+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/Test/src/Test.jl:1577 [inlined]
   [43] top-level scope
      @ ~/git/Reactant.jl/test/integration/cuda.jl:23
   [44] include(mod::Module, _path::String)
      @ Base ./Base.jl:495
   [45] include(x::String)
      @ Main.var"##CUDA#241" ~/.julia/packages/SafeTestsets/raUNr/src/SafeTestsets.jl:28
   [46] macro expansion
      @ ~/.julia/packages/SafeTestsets/raUNr/src/SafeTestsets.jl:24 [inlined]
   [47] macro expansion
      @ ~/.julia/juliaup/julia-1.10.8+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/Test/src/Test.jl:1577 [inlined]
   [48] top-level scope
      @ ~/.julia/packages/SafeTestsets/raUNr/src/SafeTestsets.jl:24
   [49] eval(m::Module, e::Any)
      @ Core ./boot.jl:385
   [50] macro expansion
      @ ~/.julia/packages/SafeTestsets/raUNr/src/SafeTestsets.jl:28 [inlined]
   [51] macro expansion
      @ ~/git/Reactant.jl/test/runtests.jl:67 [inlined]
   [52] macro expansion
      @ ~/.julia/juliaup/julia-1.10.8+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/Test/src/Test.jl:1577 [inlined]
   [53] top-level scope
      @ ~/git/Reactant.jl/test/runtests.jl:45
   [54] include(fname::String)
      @ Base.MainInclude ./client.jl:494
   [55] top-level scope
      @ none:6
   [56] eval
      @ ./boot.jl:385 [inlined]
   [57] exec_options(opts::Base.JLOptions)
      @ Base ./client.jl:296

github-actions · 2025-01-30T12:58:42Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/test/gcn.jl b/test/gcn.jl
index c16bc5d..7a31089 100644
--- a/test/gcn.jl
+++ b/test/gcn.jl
@@ -14,7 +14,7 @@ end
 
 end
 
-############################################################################################
+    ############################################################################################
 @testset "assembly" begin
 
 @testset "skip scalar trap" begin

codecov · 2025-01-30T13:01:03Z

Codecov Report

Attention: Patch coverage is 75.00000% with 1 line in your changes missing coverage. Please review.

Project coverage is 73.39%. Comparing base (47a4a8b) to head (0069186).

Files with missing lines	Patch %	Lines
src/gcn.jl	50.00%	1 Missing ⚠️

Additional details and impacted files

@@            Coverage Diff             @@
##           master     #660      +/-   ##
==========================================
+ Coverage   71.76%   73.39%   +1.62%     
==========================================
  Files          24       24              
  Lines        3326     3330       +4     
==========================================
+ Hits         2387     2444      +57     
+ Misses        939      886      -53

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

src/ptx.jl

maleadt · 2025-02-03T08:24:18Z

Just disable those tests? I don't see why we should include obviously broken code to test a non-realistic code path.

vchuravy · 2025-02-03T09:04:37Z

The right answer is that the LLVM you are using needs to be built with nvptx as a target.

wsmoses · 2025-02-03T13:05:04Z

So this is more than just a test, we actually want it to use this end to end. In particular through polygeists GPU to CPU pipeline this lets us run cuda code on macOS … if we can ask cuda.jl to get us the llvm IR for it (where this is a prerequisite)

src/ptx.jl

wsmoses · 2025-02-04T14:32:56Z

@vchuravy this now fails in GCN?

wsmoses force-pushed the nvapple branch from 90aa3dc to 8c24ea9 Compare January 30, 2025 20:32

wsmoses requested review from maleadt and vchuravy January 31, 2025 16:42

This was referenced Jan 31, 2025

KA ext EnzymeAD/Reactant.jl#667

Merged

[Test] Skip (again) CUDA integration tests when they are known to fail EnzymeAD/Reactant.jl#674

Closed

vchuravy reviewed Feb 2, 2025

View reviewed changes

src/ptx.jl Show resolved Hide resolved

vchuravy reviewed Feb 3, 2025

View reviewed changes

src/ptx.jl Show resolved Hide resolved

wsmoses force-pushed the nvapple branch from 2709ba7 to d3db05f Compare February 4, 2025 00:21

vchuravy reviewed Feb 4, 2025

View reviewed changes

src/ptx.jl Outdated Show resolved Hide resolved

wsmoses and others added 9 commits February 4, 2025 14:02

nvapple

297d150

fix

31d86be

fix

1dd1243

Update ptx.jl

162ecb9

Update ptx.jl

34ba23a

Update runtests.jl

941f4b2

Update ptx.jl

dbb4465

Use LLVM.backends instead of Sys.apple

c3e36e0

Update src/ptx.jl

3c7db5f

vchuravy force-pushed the nvapple branch from 0e29640 to 3c7db5f Compare February 4, 2025 13:02

maleadt removed their request for review February 4, 2025 13:08

Update gcn.jl

0069186

wsmoses enabled auto-merge February 5, 2025 01:24

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support code emission of nvptx on Apple #660

Support code emission of nvptx on Apple #660

wsmoses commented Jan 30, 2025

github-actions bot commented Jan 30, 2025 •

edited

Loading

codecov bot commented Jan 30, 2025 •

edited

Loading

maleadt commented Feb 3, 2025

vchuravy commented Feb 3, 2025

wsmoses commented Feb 3, 2025

wsmoses commented Feb 4, 2025

Support code emission of nvptx on Apple #660

Are you sure you want to change the base?

Support code emission of nvptx on Apple #660

Conversation

wsmoses commented Jan 30, 2025

github-actions bot commented Jan 30, 2025 • edited Loading

codecov bot commented Jan 30, 2025 • edited Loading

Codecov Report

maleadt commented Feb 3, 2025

vchuravy commented Feb 3, 2025

wsmoses commented Feb 3, 2025

wsmoses commented Feb 4, 2025

github-actions bot commented Jan 30, 2025 •

edited

Loading

codecov bot commented Jan 30, 2025 •

edited

Loading