-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add nextafter
intrinsic
#529
base: main
Are you sure you want to change the base?
Conversation
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/test/device/intrinsics.jl b/test/device/intrinsics.jl
index ecd5e44d..ca30165b 100644
--- a/test/device/intrinsics.jl
+++ b/test/device/intrinsics.jl
@@ -354,45 +354,45 @@ end
end
- let # nextafter
- function nextafter_test(X, y)
- idx = thread_position_in_grid_1d()
- X[idx] = Metal.nextafter(X[idx], y)
- return nothing
- end
+ let # nextafter
+ function nextafter_test(X, y)
+ idx = thread_position_in_grid_1d()
+ X[idx] = Metal.nextafter(X[idx], y)
+ return nothing
+ end
- # Check the code is generated as expected
- outval = T(0)
- function nextafter_out_test()
- Metal.nextafter(outval, outval)
- return
- end
+ # Check the code is generated as expected
+ outval = T(0)
+ function nextafter_out_test()
+ Metal.nextafter(outval, outval)
+ return
+ end
- N = 4
- arr = rand(T, N)
+ N = 4
+ arr = rand(T, N)
- # test the intrinsic (macOS >= v14)
- if metal_support() >= v"3.1"
- buffer1 = MtlArray(arr)
- Metal.@sync @metal threads = N nextafter_test(buffer1, typemax(T))
- @test Array(buffer1) == nextfloat.(arr)
- Metal.@sync @metal threads = N nextafter_test(buffer1, typemin(T))
- @test Array(buffer1) == arr
+ # test the intrinsic (macOS >= v14)
+ if metal_support() >= v"3.1"
+ buffer1 = MtlArray(arr)
+ Metal.@sync @metal threads = N nextafter_test(buffer1, typemax(T))
+ @test Array(buffer1) == nextfloat.(arr)
+ Metal.@sync @metal threads = N nextafter_test(buffer1, typemin(T))
+ @test Array(buffer1) == arr
- ir = sprint(io->(@device_code_llvm io=io dump_module=true @metal nextafter_out_test()))
- @test occursin(Regex("@air\\.nextafter\\.f$(8*sizeof(T))"), ir)
- end
+ ir = sprint(io -> (@device_code_llvm io = io dump_module = true @metal nextafter_out_test()))
+ @test occursin(Regex("@air\\.nextafter\\.f$(8 * sizeof(T))"), ir)
+ end
- # test for metal < 3.1
- buffer2 = MtlArray(arr)
- Metal.@sync @metal threads = N metal = v"3.0" nextafter_test(buffer2, typemax(T))
- @test Array(buffer2) == nextfloat.(arr)
- Metal.@sync @metal threads = N metal = v"3.0" nextafter_test(buffer2, typemin(T))
- @test Array(buffer2) == arr
+ # test for metal < 3.1
+ buffer2 = MtlArray(arr)
+ Metal.@sync @metal threads = N metal = v"3.0" nextafter_test(buffer2, typemax(T))
+ @test Array(buffer2) == nextfloat.(arr)
+ Metal.@sync @metal threads = N metal = v"3.0" nextafter_test(buffer2, typemin(T))
+ @test Array(buffer2) == arr
- ir = sprint(io->(@device_code_llvm io=io dump_module=true @metal metal = v"3.0" nextafter_out_test()))
- @test occursin(Regex("@air\\.sign\\.f$(8*sizeof(T))"), ir)
- end
+ ir = sprint(io -> (@device_code_llvm io = io dump_module = true @metal metal = v"3.0" nextafter_out_test()))
+ @test occursin(Regex("@air\\.sign\\.f$(8 * sizeof(T))"), ir)
+ end
end
end
|
2569fe1
to
18279c9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Metal Benchmarks
Benchmark suite | Current: e581e2b | Previous: 6654291 | Ratio |
---|---|---|---|
private array/construct |
26298.666666666668 ns |
26250 ns |
1.00 |
private array/broadcast |
460208 ns |
462084 ns |
1.00 |
private array/random/randn/Float32 |
804853.5 ns |
826208 ns |
0.97 |
private array/random/randn!/Float32 |
634500 ns |
627791.5 ns |
1.01 |
private array/random/rand!/Int64 |
561750 ns |
558250 ns |
1.01 |
private array/random/rand!/Float32 |
419417 ns |
587500 ns |
0.71 |
private array/random/rand/Int64 |
804083 ns |
777229 ns |
1.03 |
private array/random/rand/Float32 |
574625.5 ns |
634042 ns |
0.91 |
private array/copyto!/gpu_to_gpu |
384145.5 ns |
658833 ns |
0.58 |
private array/copyto!/cpu_to_gpu |
801541.5 ns |
709208 ns |
1.13 |
private array/copyto!/gpu_to_cpu |
689896 ns |
665625 ns |
1.04 |
private array/accumulate/1d |
1351333.5 ns |
1338542 ns |
1.01 |
private array/accumulate/2d |
1389687.5 ns |
1395208 ns |
1.00 |
private array/iteration/findall/int |
2086563 ns |
2084458 ns |
1.00 |
private array/iteration/findall/bool |
1844000 ns |
1817562.5 ns |
1.01 |
private array/iteration/findfirst/int |
1707333.5 ns |
1711563 ns |
1.00 |
private array/iteration/findfirst/bool |
1673500 ns |
1674417 ns |
1.00 |
private array/iteration/scalar |
3773500 ns |
3782854 ns |
1.00 |
private array/iteration/logical |
3191083 ns |
3254708.5 ns |
0.98 |
private array/iteration/findmin/1d |
1787500 ns |
1778417 ns |
1.01 |
private array/iteration/findmin/2d |
1351292 ns |
1351834 ns |
1.00 |
private array/reductions/reduce/1d |
1032771 ns |
1034625 ns |
1.00 |
private array/reductions/reduce/2d |
672625 ns |
663541 ns |
1.01 |
private array/reductions/mapreduce/1d |
1033542 ns |
1036542 ns |
1.00 |
private array/reductions/mapreduce/2d |
663750 ns |
667125 ns |
0.99 |
private array/permutedims/4d |
2540125 ns |
2546416 ns |
1.00 |
private array/permutedims/2d |
1025437.5 ns |
1022500 ns |
1.00 |
private array/permutedims/3d |
1599000 ns |
1618208 ns |
0.99 |
private array/copy |
592896 ns |
582708 ns |
1.02 |
latency/precompile |
8956606708 ns |
8868498250 ns |
1.01 |
latency/ttfp |
3607131084 ns |
3613257333 ns |
1.00 |
latency/import |
1237545375 ns |
1235355708 ns |
1.00 |
integration/metaldevrt |
732375 ns |
721583 ns |
1.01 |
integration/byval/slices=1 |
1565437.5 ns |
1558729.5 ns |
1.00 |
integration/byval/slices=3 |
9880625 ns |
9618396 ns |
1.03 |
integration/byval/reference |
1607312.5 ns |
1546854.5 ns |
1.04 |
integration/byval/slices=2 |
2677750 ns |
2557458 ns |
1.05 |
kernel/indexing |
458020.5 ns |
476250 ns |
0.96 |
kernel/indexing_checked |
474500 ns |
466417 ns |
1.02 |
kernel/launch |
10145.666666666666 ns |
8125 ns |
1.25 |
metal/synchronization/stream |
14750 ns |
14667 ns |
1.01 |
metal/synchronization/context |
14958 ns |
14834 ns |
1.01 |
shared array/construct |
24708.333333333332 ns |
24854.166666666668 ns |
0.99 |
shared array/broadcast |
455708 ns |
455833 ns |
1.00 |
shared array/random/randn/Float32 |
776021 ns |
820500 ns |
0.95 |
shared array/random/randn!/Float32 |
643375 ns |
639041 ns |
1.01 |
shared array/random/rand!/Int64 |
561625 ns |
549729.5 ns |
1.02 |
shared array/random/rand!/Float32 |
597625 ns |
601250 ns |
0.99 |
shared array/random/rand/Int64 |
782249.5 ns |
751833.5 ns |
1.04 |
shared array/random/rand/Float32 |
583166 ns |
608854 ns |
0.96 |
shared array/copyto!/gpu_to_gpu |
83666 ns |
84542 ns |
0.99 |
shared array/copyto!/cpu_to_gpu |
82792 ns |
81417 ns |
1.02 |
shared array/copyto!/gpu_to_cpu |
83125 ns |
85333.5 ns |
0.97 |
shared array/accumulate/1d |
1339375 ns |
1347916.5 ns |
0.99 |
shared array/accumulate/2d |
1392917 ns |
1396125 ns |
1.00 |
shared array/iteration/findall/int |
1814667 ns |
1836562.5 ns |
0.99 |
shared array/iteration/findall/bool |
1612604 ns |
1595583 ns |
1.01 |
shared array/iteration/findfirst/int |
1406959 ns |
1408417 ns |
1.00 |
shared array/iteration/findfirst/bool |
1374958 ns |
1371458 ns |
1.00 |
shared array/iteration/scalar |
155333 ns |
154834 ns |
1.00 |
shared array/iteration/logical |
3128667 ns |
2978125 ns |
1.05 |
shared array/iteration/findmin/1d |
1473833.5 ns |
1471042 ns |
1.00 |
shared array/iteration/findmin/2d |
1372542 ns |
1369917 ns |
1.00 |
shared array/reductions/reduce/1d |
729625 ns |
738208 ns |
0.99 |
shared array/reductions/reduce/2d |
667334 ns |
670417 ns |
1.00 |
shared array/reductions/mapreduce/1d |
738875 ns |
732854 ns |
1.01 |
shared array/reductions/mapreduce/2d |
668083 ns |
666916.5 ns |
1.00 |
shared array/permutedims/4d |
2560458.5 ns |
2545729.5 ns |
1.01 |
shared array/permutedims/2d |
1027833.5 ns |
1007791 ns |
1.02 |
shared array/permutedims/3d |
1597812.5 ns |
1583166 ns |
1.01 |
shared array/copy |
247542 ns |
244917 ns |
1.01 |
This comment was automatically generated by workflow using github-action-benchmark.
54e1adc
to
43147a3
Compare
Error doesn't seem related:
|
43147a3
to
987b73b
Compare
9eff7d2
to
f4b4a58
Compare
bb6e7fa
to
5354003
Compare
This is still ready for review. |
Tests should still pass when run on macOS 13
e581e2b
to
78ba579
Compare
#373