-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcopy_log_l2.txt
800 lines (800 loc) · 33.8 KB
/
copy_log_l2.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
Shader Clock Mode: 0 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 28.7636
foo_256_8_32_256_asm 256 8 32 28.0393
foo_256_4_64_256_asm 256 4 64 27.8378
foo_256_2_128_256_asm 256 2 128 21.8674
foo_128_16_8_512_asm 128 16 8 28.0293
foo_128_8_16_512_asm 128 8 16 31.9245
foo_128_4_32_512_asm 128 4 32 27.8086
foo_128_2_64_512_asm 128 2 64 27.851
foo_64_8_8_1024_asm 64 8 8 26.4445
foo_64_4_16_1024_asm 64 4 16 28.0276
foo_64_2_32_1024_asm 64 2 32 27.3579
foo_256_16_16_256_hip 256 16 16 7.99965
foo_256_8_32_256_hip 256 8 32 8.07856
foo_256_4_64_256_hip 256 4 64 8.09229
foo_256_2_128_256_hip 256 2 128 7.77185
foo_128_16_8_512_hip 128 16 8 13.6194
foo_128_8_16_512_hip 128 8 16 13.7394
foo_128_4_32_512_hip 128 4 32 13.8394
foo_128_2_64_512_hip 128 2 64 13.953
foo_64_8_8_1024_hip 64 8 8 24.5559
foo_64_4_16_1024_hip 64 4 16 22.6673
foo_64_2_32_1024_hip 64 2 32 23.6485
Shader Clock Mode: 0 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 21.7168
foo_256_8_32_256_asm 256 8 32 31.532
foo_256_4_64_256_asm 256 4 64 28.0176
foo_256_2_128_256_asm 256 2 128 21.6112
foo_128_16_8_512_asm 128 16 8 28.4677
foo_128_8_16_512_asm 128 8 16 28.0054
foo_128_4_32_512_asm 128 4 32 27.9603
foo_128_2_64_512_asm 128 2 64 27.6728
foo_64_8_8_1024_asm 64 8 8 28.0853
foo_64_4_16_1024_asm 64 4 16 28.1994
foo_64_2_32_1024_asm 64 2 32 27.188
foo_256_16_16_256_hip 256 16 16 8.19598
foo_256_8_32_256_hip 256 8 32 8.48976
foo_256_4_64_256_hip 256 4 64 8.29078
foo_256_2_128_256_hip 256 2 128 8.03674
foo_128_16_8_512_hip 128 16 8 13.6517
foo_128_8_16_512_hip 128 8 16 13.7782
foo_128_4_32_512_hip 128 4 32 14.995
foo_128_2_64_512_hip 128 2 64 13.6409
foo_64_8_8_1024_hip 64 8 8 25.7966
foo_64_4_16_1024_hip 64 4 16 22.458
foo_64_2_32_1024_hip 64 2 32 21.4903
Shader Clock Mode: 0 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 28.7725
foo_256_8_32_256_asm 256 8 32 28.0755
foo_256_4_64_256_asm 256 4 64 26.2217
foo_256_2_128_256_asm 256 2 128 22.1515
foo_128_16_8_512_asm 128 16 8 28.2633
foo_128_8_16_512_asm 128 8 16 34.0542
foo_128_4_32_512_asm 128 4 32 28.3261
foo_128_2_64_512_asm 128 2 64 26.2288
foo_64_8_8_1024_asm 64 8 8 34.052
foo_64_4_16_1024_asm 64 4 16 27.9015
foo_64_2_32_1024_asm 64 2 32 28.229
foo_256_16_16_256_hip 256 16 16 8.23047
foo_256_8_32_256_hip 256 8 32 7.84902
foo_256_4_64_256_hip 256 4 64 8.32985
foo_256_2_128_256_hip 256 2 128 7.73673
foo_128_16_8_512_hip 128 16 8 13.7057
foo_128_8_16_512_hip 128 8 16 13.6718
foo_128_4_32_512_hip 128 4 32 14.8599
foo_128_2_64_512_hip 128 2 64 13.5371
foo_64_8_8_1024_hip 64 8 8 22.559
foo_64_4_16_1024_hip 64 4 16 23.5791
foo_64_2_32_1024_hip 64 2 32 23.09
Shader Clock Mode: 0 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 23.2193
foo_256_8_32_256_asm 256 8 32 34.054
foo_256_4_64_256_asm 256 4 64 28.5744
foo_256_2_128_256_asm 256 2 128 21.9282
foo_128_16_8_512_asm 128 16 8 28.2532
foo_128_8_16_512_asm 128 8 16 28.0866
foo_128_4_32_512_asm 128 4 32 28.1437
foo_128_2_64_512_asm 128 2 64 33.7857
foo_64_8_8_1024_asm 64 8 8 28.1566
foo_64_4_16_1024_asm 64 4 16 28.0108
foo_64_2_32_1024_asm 64 2 32 27.3049
foo_256_16_16_256_hip 256 16 16 8.02959
foo_256_8_32_256_hip 256 8 32 8.0932
foo_256_4_64_256_hip 256 4 64 8.19114
foo_256_2_128_256_hip 256 2 128 7.46888
foo_128_16_8_512_hip 128 16 8 13.602
foo_128_8_16_512_hip 128 8 16 14.987
foo_128_4_32_512_hip 128 4 32 13.7312
foo_128_2_64_512_hip 128 2 64 13.671
foo_64_8_8_1024_hip 64 8 8 22.6776
foo_64_4_16_1024_hip 64 4 16 22.4537
foo_64_2_32_1024_hip 64 2 32 22.1301
Shader Clock Mode: 1 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 30.0823
foo_256_8_32_256_asm 256 8 32 31.0998
foo_256_4_64_256_asm 256 4 64 29.7231
foo_256_2_128_256_asm 256 2 128 24.6948
foo_128_16_8_512_asm 128 16 8 32.2587
foo_128_8_16_512_asm 128 8 16 26.5746
foo_128_4_32_512_asm 128 4 32 32.3322
foo_128_2_64_512_asm 128 2 64 31.7556
foo_64_8_8_1024_asm 64 8 8 29.2945
foo_64_4_16_1024_asm 64 4 16 31.345
foo_64_2_32_1024_asm 64 2 32 32.8448
foo_256_16_16_256_hip 256 16 16 9.33631
foo_256_8_32_256_hip 256 8 32 9.77768
foo_256_4_64_256_hip 256 4 64 9.3995
foo_256_2_128_256_hip 256 2 128 8.83031
foo_128_16_8_512_hip 128 16 8 17.3202
foo_128_8_16_512_hip 128 8 16 16.293
foo_128_4_32_512_hip 128 4 32 15.5955
foo_128_2_64_512_hip 128 2 64 15.4692
foo_64_8_8_1024_hip 64 8 8 25.3807
foo_64_4_16_1024_hip 64 4 16 25.4977
foo_64_2_32_1024_hip 64 2 32 24.3373
Shader Clock Mode: 1 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 30.8894
foo_256_8_32_256_asm 256 8 32 34.0745
foo_256_4_64_256_asm 256 4 64 31.796
foo_256_2_128_256_asm 256 2 128 24.8224
foo_128_16_8_512_asm 128 16 8 32.3502
foo_128_8_16_512_asm 128 8 16 31.6422
foo_128_4_32_512_asm 128 4 32 31.4498
foo_128_2_64_512_asm 128 2 64 34.5209
foo_64_8_8_1024_asm 64 8 8 31.7399
foo_64_4_16_1024_asm 64 4 16 31.4847
foo_64_2_32_1024_asm 64 2 32 30.614
foo_256_16_16_256_hip 256 16 16 9.11792
foo_256_8_32_256_hip 256 8 32 9.32124
foo_256_4_64_256_hip 256 4 64 9.32365
foo_256_2_128_256_hip 256 2 128 8.99551
foo_128_16_8_512_hip 128 16 8 15.5265
foo_128_8_16_512_hip 128 8 16 15.523
foo_128_4_32_512_hip 128 4 32 15.7451
foo_128_2_64_512_hip 128 2 64 15.3969
foo_64_8_8_1024_hip 64 8 8 25.5383
foo_64_4_16_1024_hip 64 4 16 25.1966
foo_64_2_32_1024_hip 64 2 32 28.6257
Shader Clock Mode: 1 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 29.6139
foo_256_8_32_256_asm 256 8 32 29.0859
foo_256_4_64_256_asm 256 4 64 31.7777
foo_256_2_128_256_asm 256 2 128 24.8304
foo_128_16_8_512_asm 128 16 8 31.9881
foo_128_8_16_512_asm 128 8 16 32.0413
foo_128_4_32_512_asm 128 4 32 30.1515
foo_128_2_64_512_asm 128 2 64 31.8007
foo_64_8_8_1024_asm 64 8 8 38.2893
foo_64_4_16_1024_asm 64 4 16 31.2042
foo_64_2_32_1024_asm 64 2 32 32.6053
foo_256_16_16_256_hip 256 16 16 9.28062
foo_256_8_32_256_hip 256 8 32 9.1205
foo_256_4_64_256_hip 256 4 64 9.42709
foo_256_2_128_256_hip 256 2 128 8.73514
foo_128_16_8_512_hip 128 16 8 15.5991
foo_128_8_16_512_hip 128 8 16 15.5129
foo_128_4_32_512_hip 128 4 32 16.2467
foo_128_2_64_512_hip 128 2 64 15.4275
foo_64_8_8_1024_hip 64 8 8 25.5006
foo_64_4_16_1024_hip 64 4 16 24.45
foo_64_2_32_1024_hip 64 2 32 24.3422
Shader Clock Mode: 1 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 30.3531
foo_256_8_32_256_asm 256 8 32 31.6766
foo_256_4_64_256_asm 256 4 64 31.4687
foo_256_2_128_256_asm 256 2 128 28.6505
foo_128_16_8_512_asm 128 16 8 32.1545
foo_128_8_16_512_asm 128 8 16 32.0014
foo_128_4_32_512_asm 128 4 32 31.8557
foo_128_2_64_512_asm 128 2 64 31.7669
foo_64_8_8_1024_asm 64 8 8 32.0777
foo_64_4_16_1024_asm 64 4 16 31.5911
foo_64_2_32_1024_asm 64 2 32 30.9455
foo_256_16_16_256_hip 256 16 16 9.21966
foo_256_8_32_256_hip 256 8 32 9.23721
foo_256_4_64_256_hip 256 4 64 9.3332
foo_256_2_128_256_hip 256 2 128 9.13615
foo_128_16_8_512_hip 128 16 8 15.989
foo_128_8_16_512_hip 128 8 16 15.7892
foo_128_4_32_512_hip 128 4 32 15.6474
foo_128_2_64_512_hip 128 2 64 15.4952
foo_64_8_8_1024_hip 64 8 8 25.4106
foo_64_4_16_1024_hip 64 4 16 28.5143
foo_64_2_32_1024_hip 64 2 32 24.8839
Shader Clock Mode: 2 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 28.1151
foo_256_8_32_256_asm 256 8 32 33.5514
foo_256_4_64_256_asm 256 4 64 38.5473
foo_256_2_128_256_asm 256 2 128 26.7343
foo_128_16_8_512_asm 128 16 8 34.427
foo_128_8_16_512_asm 128 8 16 40.4115
foo_128_4_32_512_asm 128 4 32 34.2557
foo_128_2_64_512_asm 128 2 64 34.1014
foo_64_8_8_1024_asm 64 8 8 34.279
foo_64_4_16_1024_asm 64 4 16 34.1588
foo_64_2_32_1024_asm 64 2 32 33.1625
foo_256_16_16_256_hip 256 16 16 10.0992
foo_256_8_32_256_hip 256 8 32 10.0735
foo_256_4_64_256_hip 256 4 64 10.2777
foo_256_2_128_256_hip 256 2 128 9.37744
foo_128_16_8_512_hip 128 16 8 19.0686
foo_128_8_16_512_hip 128 8 16 17.0333
foo_128_4_32_512_hip 128 4 32 17.1708
foo_128_2_64_512_hip 128 2 64 16.5661
foo_64_8_8_1024_hip 64 8 8 27.5723
foo_64_4_16_1024_hip 64 4 16 27.3583
foo_64_2_32_1024_hip 64 2 32 26.2255
Shader Clock Mode: 2 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 29.9094
foo_256_8_32_256_asm 256 8 32 37.2894
foo_256_4_64_256_asm 256 4 64 34.0191
foo_256_2_128_256_asm 256 2 128 26.5746
foo_128_16_8_512_asm 128 16 8 36.8798
foo_128_8_16_512_asm 128 8 16 34.4348
foo_128_4_32_512_asm 128 4 32 34.0977
foo_128_2_64_512_asm 128 2 64 34.1642
foo_64_8_8_1024_asm 64 8 8 34.2752
foo_64_4_16_1024_asm 64 4 16 33.6429
foo_64_2_32_1024_asm 64 2 32 34.2174
foo_256_16_16_256_hip 256 16 16 9.92878
foo_256_8_32_256_hip 256 8 32 9.94763
foo_256_4_64_256_hip 256 4 64 10.7682
foo_256_2_128_256_hip 256 2 128 9.66099
foo_128_16_8_512_hip 128 16 8 17.5
foo_128_8_16_512_hip 128 8 16 17.1497
foo_128_4_32_512_hip 128 4 32 16.838
foo_128_2_64_512_hip 128 2 64 16.6041
foo_64_8_8_1024_hip 64 8 8 30.8756
foo_64_4_16_1024_hip 64 4 16 31.6833
foo_64_2_32_1024_hip 64 2 32 26.1127
Shader Clock Mode: 2 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 30.0909
foo_256_8_32_256_asm 256 8 32 34.4724
foo_256_4_64_256_asm 256 4 64 30.5365
foo_256_2_128_256_asm 256 2 128 26.3785
foo_128_16_8_512_asm 128 16 8 37.028
foo_128_8_16_512_asm 128 8 16 34.2713
foo_128_4_32_512_asm 128 4 32 34.202
foo_128_2_64_512_asm 128 2 64 34.526
foo_64_8_8_1024_asm 64 8 8 34.4679
foo_64_4_16_1024_asm 64 4 16 34.1851
foo_64_2_32_1024_asm 64 2 32 33.9855
foo_256_16_16_256_hip 256 16 16 9.80857
foo_256_8_32_256_hip 256 8 32 10.1609
foo_256_4_64_256_hip 256 4 64 10.2488
foo_256_2_128_256_hip 256 2 128 9.51791
foo_128_16_8_512_hip 128 16 8 16.5764
foo_128_8_16_512_hip 128 8 16 16.8183
foo_128_4_32_512_hip 128 4 32 16.4326
foo_128_2_64_512_hip 128 2 64 16.6854
foo_64_8_8_1024_hip 64 8 8 27.1545
foo_64_4_16_1024_hip 64 4 16 26.2358
foo_64_2_32_1024_hip 64 2 32 26.0521
Shader Clock Mode: 2 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 30.2801
foo_256_8_32_256_asm 256 8 32 33.7622
foo_256_4_64_256_asm 256 4 64 30.5106
foo_256_2_128_256_asm 256 2 128 26.3971
foo_128_16_8_512_asm 128 16 8 35.4356
foo_128_8_16_512_asm 128 8 16 34.7141
foo_128_4_32_512_asm 128 4 32 34.1306
foo_128_2_64_512_asm 128 2 64 33.9519
foo_64_8_8_1024_asm 64 8 8 34.2636
foo_64_4_16_1024_asm 64 4 16 34.1631
foo_64_2_32_1024_asm 64 2 32 36.8137
foo_256_16_16_256_hip 256 16 16 9.67622
foo_256_8_32_256_hip 256 8 32 10.0704
foo_256_4_64_256_hip 256 4 64 10.8034
foo_256_2_128_256_hip 256 2 128 9.61102
foo_128_16_8_512_hip 128 16 8 18.5641
foo_128_8_16_512_hip 128 8 16 18.413
foo_128_4_32_512_hip 128 4 32 17.8314
foo_128_2_64_512_hip 128 2 64 17.3847
foo_64_8_8_1024_hip 64 8 8 27.3056
foo_64_4_16_1024_hip 64 4 16 28.727
foo_64_2_32_1024_hip 64 2 32 27.8475
Shader Clock Mode: 3 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 30.2667
foo_256_8_32_256_asm 256 8 32 35.0162
foo_256_4_64_256_asm 256 4 64 38.1662
foo_256_2_128_256_asm 256 2 128 27.8728
foo_128_16_8_512_asm 128 16 8 35.8121
foo_128_8_16_512_asm 128 8 16 38.8967
foo_128_4_32_512_asm 128 4 32 35.5309
foo_128_2_64_512_asm 128 2 64 35.693
foo_64_8_8_1024_asm 64 8 8 35.5707
foo_64_4_16_1024_asm 64 4 16 35.5076
foo_64_2_32_1024_asm 64 2 32 34.4224
foo_256_16_16_256_hip 256 16 16 10.3076
foo_256_8_32_256_hip 256 8 32 10.2214
foo_256_4_64_256_hip 256 4 64 10.7077
foo_256_2_128_256_hip 256 2 128 9.92052
foo_128_16_8_512_hip 128 16 8 17.4566
foo_128_8_16_512_hip 128 8 16 17.3738
foo_128_4_32_512_hip 128 4 32 17.6339
foo_128_2_64_512_hip 128 2 64 17.3206
foo_64_8_8_1024_hip 64 8 8 35.1144
foo_64_4_16_1024_hip 64 4 16 32.3151
foo_64_2_32_1024_hip 64 2 32 27.8347
Shader Clock Mode: 3 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 41.2278
foo_256_8_32_256_asm 256 8 32 43.6259
foo_256_4_64_256_asm 256 4 64 41.1857
foo_256_2_128_256_asm 256 2 128 31.3954
foo_128_16_8_512_asm 128 16 8 43.6318
foo_128_8_16_512_asm 128 8 16 41.4457
foo_128_4_32_512_asm 128 4 32 41.2901
foo_128_2_64_512_asm 128 2 64 44.1973
foo_64_8_8_1024_asm 64 8 8 41.6363
foo_64_4_16_1024_asm 64 4 16 41.2166
foo_64_2_32_1024_asm 64 2 32 39.76
foo_256_16_16_256_hip 256 16 16 10.1318
foo_256_8_32_256_hip 256 8 32 10.2571
foo_256_4_64_256_hip 256 4 64 10.419
foo_256_2_128_256_hip 256 2 128 10.2128
foo_128_16_8_512_hip 128 16 8 18.8603
foo_128_8_16_512_hip 128 8 16 19.1053
foo_128_4_32_512_hip 128 4 32 19.6183
foo_128_2_64_512_hip 128 2 64 19.3379
foo_64_8_8_1024_hip 64 8 8 35.2618
foo_64_4_16_1024_hip 64 4 16 32.3274
foo_64_2_32_1024_hip 64 2 32 31.4462
Shader Clock Mode: 3 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 41.327
foo_256_8_32_256_asm 256 8 32 45.4691
foo_256_4_64_256_asm 256 4 64 41.0049
foo_256_2_128_256_asm 256 2 128 31.7807
foo_128_16_8_512_asm 128 16 8 41.765
foo_128_8_16_512_asm 128 8 16 41.468
foo_128_4_32_512_asm 128 4 32 41.2357
foo_128_2_64_512_asm 128 2 64 41.1159
foo_64_8_8_1024_asm 64 8 8 41.5511
foo_64_4_16_1024_asm 64 4 16 41.2415
foo_64_2_32_1024_asm 64 2 32 39.8973
foo_256_16_16_256_hip 256 16 16 10.0867
foo_256_8_32_256_hip 256 8 32 10.1622
foo_256_4_64_256_hip 256 4 64 10.6341
foo_256_2_128_256_hip 256 2 128 9.82642
foo_128_16_8_512_hip 128 16 8 19.4036
foo_128_8_16_512_hip 128 8 16 18.9199
foo_128_4_32_512_hip 128 4 32 19.1957
foo_128_2_64_512_hip 128 2 64 19.1648
foo_64_8_8_1024_hip 64 8 8 34.0936
foo_64_4_16_1024_hip 64 4 16 32.1041
foo_64_2_32_1024_hip 64 2 32 33.2612
Shader Clock Mode: 3 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 45.4564
foo_256_8_32_256_asm 256 8 32 41.6166
foo_256_4_64_256_asm 256 4 64 42.3572
foo_256_2_128_256_asm 256 2 128 33.9154
foo_128_16_8_512_asm 128 16 8 42.6925
foo_128_8_16_512_asm 128 8 16 41.5797
foo_128_4_32_512_asm 128 4 32 41.2883
foo_128_2_64_512_asm 128 2 64 41.2707
foo_64_8_8_1024_asm 64 8 8 41.5616
foo_64_4_16_1024_asm 64 4 16 41.6296
foo_64_2_32_1024_asm 64 2 32 39.8316
foo_256_16_16_256_hip 256 16 16 9.72693
foo_256_8_32_256_hip 256 8 32 10.363
foo_256_4_64_256_hip 256 4 64 10.5394
foo_256_2_128_256_hip 256 2 128 9.97363
foo_128_16_8_512_hip 128 16 8 19.4693
foo_128_8_16_512_hip 128 8 16 19.2385
foo_128_4_32_512_hip 128 4 32 19.3148
foo_128_2_64_512_hip 128 2 64 18.8446
foo_64_8_8_1024_hip 64 8 8 32.5342
foo_64_4_16_1024_hip 64 4 16 32.2757
foo_64_2_32_1024_hip 64 2 32 30.7703
Shader Clock Mode: 4 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 43.3512
foo_256_8_32_256_asm 256 8 32 44.4053
foo_256_4_64_256_asm 256 4 64 43.477
foo_256_2_128_256_asm 256 2 128 33.0178
foo_128_16_8_512_asm 128 16 8 43.6284
foo_128_8_16_512_asm 128 8 16 43.6256
foo_128_4_32_512_asm 128 4 32 43.3635
foo_128_2_64_512_asm 128 2 64 43.2459
foo_64_8_8_1024_asm 64 8 8 44.5248
foo_64_4_16_1024_asm 64 4 16 43.2275
foo_64_2_32_1024_asm 64 2 32 45.8971
foo_256_16_16_256_hip 256 16 16 10.7234
foo_256_8_32_256_hip 256 8 32 11.8576
foo_256_4_64_256_hip 256 4 64 11.4898
foo_256_2_128_256_hip 256 2 128 10.2819
foo_128_16_8_512_hip 128 16 8 19.7407
foo_128_8_16_512_hip 128 8 16 20.3669
foo_128_4_32_512_hip 128 4 32 20.0124
foo_128_2_64_512_hip 128 2 64 19.7767
foo_64_8_8_1024_hip 64 8 8 33.8255
foo_64_4_16_1024_hip 64 4 16 36.3852
foo_64_2_32_1024_hip 64 2 32 32.3029
Shader Clock Mode: 4 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 43.2247
foo_256_8_32_256_asm 256 8 32 43.2795
foo_256_4_64_256_asm 256 4 64 44.466
foo_256_2_128_256_asm 256 2 128 32.9393
foo_128_16_8_512_asm 128 16 8 43.6634
foo_128_8_16_512_asm 128 8 16 43.674
foo_128_4_32_512_asm 128 4 32 43.0996
foo_128_2_64_512_asm 128 2 64 46.8008
foo_64_8_8_1024_asm 64 8 8 43.7005
foo_64_4_16_1024_asm 64 4 16 47.3426
foo_64_2_32_1024_asm 64 2 32 41.488
foo_256_16_16_256_hip 256 16 16 11.3939
foo_256_8_32_256_hip 256 8 32 11.0342
foo_256_4_64_256_hip 256 4 64 10.8249
foo_256_2_128_256_hip 256 2 128 10.3495
foo_128_16_8_512_hip 128 16 8 20.477
foo_128_8_16_512_hip 128 8 16 20.2525
foo_128_4_32_512_hip 128 4 32 20.0984
foo_128_2_64_512_hip 128 2 64 19.342
foo_64_8_8_1024_hip 64 8 8 33.9075
foo_64_4_16_1024_hip 64 4 16 33.8736
foo_64_2_32_1024_hip 64 2 32 31.3617
Shader Clock Mode: 4 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 43.4321
foo_256_8_32_256_asm 256 8 32 43.8039
foo_256_4_64_256_asm 256 4 64 44.651
foo_256_2_128_256_asm 256 2 128 32.9025
foo_128_16_8_512_asm 128 16 8 43.9744
foo_128_8_16_512_asm 128 8 16 43.5793
foo_128_4_32_512_asm 128 4 32 44.8048
foo_128_2_64_512_asm 128 2 64 46.6593
foo_64_8_8_1024_asm 64 8 8 43.6848
foo_64_4_16_1024_asm 64 4 16 44.2621
foo_64_2_32_1024_asm 64 2 32 41.4785
foo_256_16_16_256_hip 256 16 16 10.4434
foo_256_8_32_256_hip 256 8 32 10.9097
foo_256_4_64_256_hip 256 4 64 11.7004
foo_256_2_128_256_hip 256 2 128 10.4196
foo_128_16_8_512_hip 128 16 8 19.7707
foo_128_8_16_512_hip 128 8 16 20.3289
foo_128_4_32_512_hip 128 4 32 20.2061
foo_128_2_64_512_hip 128 2 64 20.3654
foo_64_8_8_1024_hip 64 8 8 34.0094
foo_64_4_16_1024_hip 64 4 16 38.1943
foo_64_2_32_1024_hip 64 2 32 36.3818
Shader Clock Mode: 4 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 43.4939
foo_256_8_32_256_asm 256 8 32 47.4022
foo_256_4_64_256_asm 256 4 64 46.7195
foo_256_2_128_256_asm 256 2 128 33.1241
foo_128_16_8_512_asm 128 16 8 43.8099
foo_128_8_16_512_asm 128 8 16 43.6825
foo_128_4_32_512_asm 128 4 32 43.4088
foo_128_2_64_512_asm 128 2 64 43.1301
foo_64_8_8_1024_asm 64 8 8 43.7547
foo_64_4_16_1024_asm 64 4 16 43.3206
foo_64_2_32_1024_asm 64 2 32 42.3748
foo_256_16_16_256_hip 256 16 16 10.167
foo_256_8_32_256_hip 256 8 32 11.0194
foo_256_4_64_256_hip 256 4 64 10.9406
foo_256_2_128_256_hip 256 2 128 10.0831
foo_128_16_8_512_hip 128 16 8 19.8458
foo_128_8_16_512_hip 128 8 16 19.8851
foo_128_4_32_512_hip 128 4 32 20.6185
foo_128_2_64_512_hip 128 2 64 18.7019
foo_64_8_8_1024_hip 64 8 8 34.4267
foo_64_4_16_1024_hip 64 4 16 33.7269
foo_64_2_32_1024_hip 64 2 32 32.325
Shader Clock Mode: 5 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 49.6568
foo_256_8_32_256_asm 256 8 32 52.0945
foo_256_4_64_256_asm 256 4 64 49.4921
foo_256_2_128_256_asm 256 2 128 37.0957
foo_128_16_8_512_asm 128 16 8 49.4549
foo_128_8_16_512_asm 128 8 16 49.3856
foo_128_4_32_512_asm 128 4 32 48.7562
foo_128_2_64_512_asm 128 2 64 48.4434
foo_64_8_8_1024_asm 64 8 8 47.006
foo_64_4_16_1024_asm 64 4 16 48.6814
foo_64_2_32_1024_asm 64 2 32 48.0662
foo_256_16_16_256_hip 256 16 16 12.8503
foo_256_8_32_256_hip 256 8 32 13.0366
foo_256_4_64_256_hip 256 4 64 13.2047
foo_256_2_128_256_hip 256 2 128 12.1377
foo_128_16_8_512_hip 128 16 8 22.1951
foo_128_8_16_512_hip 128 8 16 22.4086
foo_128_4_32_512_hip 128 4 32 23.9925
foo_128_2_64_512_hip 128 2 64 22.1024
foo_64_8_8_1024_hip 64 8 8 38.0401
foo_64_4_16_1024_hip 64 4 16 41.2868
foo_64_2_32_1024_hip 64 2 32 36.4117
Shader Clock Mode: 5 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 48.3797
foo_256_8_32_256_asm 256 8 32 50.002
foo_256_4_64_256_asm 256 4 64 48.7579
foo_256_2_128_256_asm 256 2 128 36.8135
foo_128_16_8_512_asm 128 16 8 50.073
foo_128_8_16_512_asm 128 8 16 48.8974
foo_128_4_32_512_asm 128 4 32 50.1953
foo_128_2_64_512_asm 128 2 64 49.0316
foo_64_8_8_1024_asm 64 8 8 49.3678
foo_64_4_16_1024_asm 64 4 16 48.6559
foo_64_2_32_1024_asm 64 2 32 47.456
foo_256_16_16_256_hip 256 16 16 12.9035
foo_256_8_32_256_hip 256 8 32 13.0232
foo_256_4_64_256_hip 256 4 64 13.0747
foo_256_2_128_256_hip 256 2 128 12.4089
foo_128_16_8_512_hip 128 16 8 22.1202
foo_128_8_16_512_hip 128 8 16 23.0155
foo_128_4_32_512_hip 128 4 32 23.238
foo_128_2_64_512_hip 128 2 64 23.0136
foo_64_8_8_1024_hip 64 8 8 38.5107
foo_64_4_16_1024_hip 64 4 16 36.2676
foo_64_2_32_1024_hip 64 2 32 35.8104
Shader Clock Mode: 5 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 49.2049
foo_256_8_32_256_asm 256 8 32 49.5089
foo_256_4_64_256_asm 256 4 64 49.1032
foo_256_2_128_256_asm 256 2 128 37.2324
foo_128_16_8_512_asm 128 16 8 49.4819
foo_128_8_16_512_asm 128 8 16 50.0588
foo_128_4_32_512_asm 128 4 32 48.7214
foo_128_2_64_512_asm 128 2 64 48.2511
foo_64_8_8_1024_asm 64 8 8 49.605
foo_64_4_16_1024_asm 64 4 16 48.8315
foo_64_2_32_1024_asm 64 2 32 47.416
foo_256_16_16_256_hip 256 16 16 12.7624
foo_256_8_32_256_hip 256 8 32 10.989
foo_256_4_64_256_hip 256 4 64 12.9687
foo_256_2_128_256_hip 256 2 128 12.4947
foo_128_16_8_512_hip 128 16 8 22.1283
foo_128_8_16_512_hip 128 8 16 22.1522
foo_128_4_32_512_hip 128 4 32 22.4593
foo_128_2_64_512_hip 128 2 64 22.0875
foo_64_8_8_1024_hip 64 8 8 37.9681
foo_64_4_16_1024_hip 64 4 16 38.1528
foo_64_2_32_1024_hip 64 2 32 36.8064
Shader Clock Mode: 5 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 48.1779
foo_256_8_32_256_asm 256 8 32 45.6059
foo_256_4_64_256_asm 256 4 64 50.5598
foo_256_2_128_256_asm 256 2 128 37.3214
foo_128_16_8_512_asm 128 16 8 49.5492
foo_128_8_16_512_asm 128 8 16 49.5804
foo_128_4_32_512_asm 128 4 32 48.7196
foo_128_2_64_512_asm 128 2 64 48.6164
foo_64_8_8_1024_asm 64 8 8 49.5035
foo_64_4_16_1024_asm 64 4 16 48.7164
foo_64_2_32_1024_asm 64 2 32 47.5088
foo_256_16_16_256_hip 256 16 16 10.723
foo_256_8_32_256_hip 256 8 32 13.0906
foo_256_4_64_256_hip 256 4 64 12.6982
foo_256_2_128_256_hip 256 2 128 11.899
foo_128_16_8_512_hip 128 16 8 22.8792
foo_128_8_16_512_hip 128 8 16 22.2558
foo_128_4_32_512_hip 128 4 32 23.4296
foo_128_2_64_512_hip 128 2 64 22.6085
foo_64_8_8_1024_hip 64 8 8 37.9665
foo_64_4_16_1024_hip 64 4 16 37.9257
foo_64_2_32_1024_hip 64 2 32 36.2078
Shader Clock Mode: 6 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 56.2099
foo_256_8_32_256_asm 256 8 32 57.3951
foo_256_4_64_256_asm 256 4 64 55.4569
foo_256_2_128_256_asm 256 2 128 41.2756
foo_128_16_8_512_asm 128 16 8 57.1089
foo_128_8_16_512_asm 128 8 16 56.127
foo_128_4_32_512_asm 128 4 32 56.4229
foo_128_2_64_512_asm 128 2 64 56.2217
foo_64_8_8_1024_asm 64 8 8 59.4802
foo_64_4_16_1024_asm 64 4 16 55.5572
foo_64_2_32_1024_asm 64 2 32 54.2364
foo_256_16_16_256_hip 256 16 16 14.1658
foo_256_8_32_256_hip 256 8 32 14.1554
foo_256_4_64_256_hip 256 4 64 14.369
foo_256_2_128_256_hip 256 2 128 13.6906
foo_128_16_8_512_hip 128 16 8 24.916
foo_128_8_16_512_hip 128 8 16 24.9177
foo_128_4_32_512_hip 128 4 32 25.4236
foo_128_2_64_512_hip 128 2 64 25.7741
foo_64_8_8_1024_hip 64 8 8 43.273
foo_64_4_16_1024_hip 64 4 16 47.0031
foo_64_2_32_1024_hip 64 2 32 41.118
Shader Clock Mode: 6 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 56.1863
foo_256_8_32_256_asm 256 8 32 55.947
foo_256_4_64_256_asm 256 4 64 55.8415
foo_256_2_128_256_asm 256 2 128 42.0598
foo_128_16_8_512_asm 128 16 8 60.2057
foo_128_8_16_512_asm 128 8 16 56.6194
foo_128_4_32_512_asm 128 4 32 56.1401
foo_128_2_64_512_asm 128 2 64 56.2803
foo_64_8_8_1024_asm 64 8 8 56.7437
foo_64_4_16_1024_asm 64 4 16 56.3793
foo_64_2_32_1024_asm 64 2 32 53.9138
foo_256_16_16_256_hip 256 16 16 14.6374
foo_256_8_32_256_hip 256 8 32 14.3081
foo_256_4_64_256_hip 256 4 64 15.095
foo_256_2_128_256_hip 256 2 128 13.8832
foo_128_16_8_512_hip 128 16 8 24.8205
foo_128_8_16_512_hip 128 8 16 25.9067
foo_128_4_32_512_hip 128 4 32 25.3147
foo_128_2_64_512_hip 128 2 64 24.8343
foo_64_8_8_1024_hip 64 8 8 43.0608
foo_64_4_16_1024_hip 64 4 16 43.471
foo_64_2_32_1024_hip 64 2 32 40.9613
Shader Clock Mode: 6 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 59.6959
foo_256_8_32_256_asm 256 8 32 56.2845
foo_256_4_64_256_asm 256 4 64 56.4182
foo_256_2_128_256_asm 256 2 128 41.5353
foo_128_16_8_512_asm 128 16 8 57.1416
foo_128_8_16_512_asm 128 8 16 56.6156
foo_128_4_32_512_asm 128 4 32 55.6158
foo_128_2_64_512_asm 128 2 64 56.1434
foo_64_8_8_1024_asm 64 8 8 56.7461
foo_64_4_16_1024_asm 64 4 16 55.5987
foo_64_2_32_1024_asm 64 2 32 54.3275
foo_256_16_16_256_hip 256 16 16 14.2308
foo_256_8_32_256_hip 256 8 32 14.518
foo_256_4_64_256_hip 256 4 64 14.4723
foo_256_2_128_256_hip 256 2 128 13.9899
foo_128_16_8_512_hip 128 16 8 24.8532
foo_128_8_16_512_hip 128 8 16 25.1085
foo_128_4_32_512_hip 128 4 32 25.1167
foo_128_2_64_512_hip 128 2 64 24.914
foo_64_8_8_1024_hip 64 8 8 42.8723
foo_64_4_16_1024_hip 64 4 16 43.1057
foo_64_2_32_1024_hip 64 2 32 40.7676
Shader Clock Mode: 6 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 56.127
foo_256_8_32_256_asm 256 8 32 56.7302
foo_256_4_64_256_asm 256 4 64 55.7858
foo_256_2_128_256_asm 256 2 128 43.402
foo_128_16_8_512_asm 128 16 8 57.1669
foo_128_8_16_512_asm 128 8 16 56.5392
foo_128_4_32_512_asm 128 4 32 58.9282
foo_128_2_64_512_asm 128 2 64 60.8621
foo_64_8_8_1024_asm 64 8 8 51.9406
foo_64_4_16_1024_asm 64 4 16 57.3843
foo_64_2_32_1024_asm 64 2 32 54.0801
foo_256_16_16_256_hip 256 16 16 14.4613
foo_256_8_32_256_hip 256 8 32 14.3575
foo_256_4_64_256_hip 256 4 64 14.5986
foo_256_2_128_256_hip 256 2 128 13.7027
foo_128_16_8_512_hip 128 16 8 24.8432
foo_128_8_16_512_hip 128 8 16 24.916
foo_128_4_32_512_hip 128 4 32 25.2033
foo_128_2_64_512_hip 128 2 64 24.8006
foo_64_8_8_1024_hip 64 8 8 43.0245
foo_64_4_16_1024_hip 64 4 16 43.604
foo_64_2_32_1024_hip 64 2 32 36.4932
Shader Clock Mode: 7 Memory Clock Mode: 0
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 63.9706
foo_256_8_32_256_asm 256 8 32 64.3671
foo_256_4_64_256_asm 256 4 64 60.0933
foo_256_2_128_256_asm 256 2 128 44.2317
foo_128_16_8_512_asm 128 16 8 60.4846
foo_128_8_16_512_asm 128 8 16 60.2718
foo_128_4_32_512_asm 128 4 32 59.4849
foo_128_2_64_512_asm 128 2 64 59.6571
foo_64_8_8_1024_asm 64 8 8 60.3282
foo_64_4_16_1024_asm 64 4 16 56.2609
foo_64_2_32_1024_asm 64 2 32 57.1343
foo_256_16_16_256_hip 256 16 16 15.4658
foo_256_8_32_256_hip 256 8 32 15.0604
foo_256_4_64_256_hip 256 4 64 15.5503
foo_256_2_128_256_hip 256 2 128 14.689
foo_128_16_8_512_hip 128 16 8 28.8541
foo_128_8_16_512_hip 128 8 16 26.448
foo_128_4_32_512_hip 128 4 32 26.669
foo_128_2_64_512_hip 128 2 64 27.6992
foo_64_8_8_1024_hip 64 8 8 50.4653
foo_64_4_16_1024_hip 64 4 16 50.0936
foo_64_2_32_1024_hip 64 2 32 47.9081
Shader Clock Mode: 7 Memory Clock Mode: 1
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 59.4353
foo_256_8_32_256_asm 256 8 32 58.0211
foo_256_4_64_256_asm 256 4 64 59.849
foo_256_2_128_256_asm 256 2 128 45.4913
foo_128_16_8_512_asm 128 16 8 60.9701
foo_128_8_16_512_asm 128 8 16 59.6827
foo_128_4_32_512_asm 128 4 32 59.7492
foo_128_2_64_512_asm 128 2 64 55.4174
foo_64_8_8_1024_asm 64 8 8 61.2008
foo_64_4_16_1024_asm 64 4 16 59.7332
foo_64_2_32_1024_asm 64 2 32 57.4804
foo_256_16_16_256_hip 256 16 16 15.3231
foo_256_8_32_256_hip 256 8 32 15.3286
foo_256_4_64_256_hip 256 4 64 15.2471
foo_256_2_128_256_hip 256 2 128 14.5306
foo_128_16_8_512_hip 128 16 8 26.2311
foo_128_8_16_512_hip 128 8 16 26.3118
foo_128_4_32_512_hip 128 4 32 26.5301
foo_128_2_64_512_hip 128 2 64 26.1591
foo_64_8_8_1024_hip 64 8 8 45.8011
foo_64_4_16_1024_hip 64 4 16 45.8597
foo_64_2_32_1024_hip 64 2 32 43.6987
Shader Clock Mode: 7 Memory Clock Mode: 2
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 60.0368
foo_256_8_32_256_asm 256 8 32 63.4832
foo_256_4_64_256_asm 256 4 64 59.5908
foo_256_2_128_256_asm 256 2 128 43.8131
foo_128_16_8_512_asm 128 16 8 60.7126
foo_128_8_16_512_asm 128 8 16 60.0562
foo_128_4_32_512_asm 128 4 32 59.6885
foo_128_2_64_512_asm 128 2 64 59.429
foo_64_8_8_1024_asm 64 8 8 60.3309
foo_64_4_16_1024_asm 64 4 16 59.1597
foo_64_2_32_1024_asm 64 2 32 59.6938
foo_256_16_16_256_hip 256 16 16 15.2384
foo_256_8_32_256_hip 256 8 32 15.1847
foo_256_4_64_256_hip 256 4 64 15.5662
foo_256_2_128_256_hip 256 2 128 14.689
foo_128_16_8_512_hip 128 16 8 26.2406
foo_128_8_16_512_hip 128 8 16 27.2585
foo_128_4_32_512_hip 128 4 32 26.5438
foo_128_2_64_512_hip 128 2 64 26.1103
foo_64_8_8_1024_hip 64 8 8 49.9524
foo_64_4_16_1024_hip 64 4 16 45.3195
foo_64_2_32_1024_hip 64 2 32 43.5032
Shader Clock Mode: 7 Memory Clock Mode: 3
Length: 262144
Kernel Name Total Loops Unroll Factor Num Loops Bandwidth
foo_256_16_16_256_asm 256 16 16 59.3716
foo_256_8_32_256_asm 256 8 32 60.3804
foo_256_4_64_256_asm 256 4 64 59.5526
foo_256_2_128_256_asm 256 2 128 44.1315
foo_128_16_8_512_asm 128 16 8 60.4627
foo_128_8_16_512_asm 128 8 16 61.6373
foo_128_4_32_512_asm 128 4 32 59.6736
foo_128_2_64_512_asm 128 2 64 59.7721
foo_64_8_8_1024_asm 64 8 8 62.0857
foo_64_4_16_1024_asm 64 4 16 59.9695
foo_64_2_32_1024_asm 64 2 32 57.3078
foo_256_16_16_256_hip 256 16 16 15.3933
foo_256_8_32_256_hip 256 8 32 15.4444
foo_256_4_64_256_hip 256 4 64 15.3971
foo_256_2_128_256_hip 256 2 128 14.4963
foo_128_16_8_512_hip 128 16 8 26.3634
foo_128_8_16_512_hip 128 8 16 26.2662
foo_128_4_32_512_hip 128 4 32 26.5901
foo_128_2_64_512_hip 128 2 64 26.1038
foo_64_8_8_1024_hip 64 8 8 45.6933
foo_64_4_16_1024_hip 64 4 16 46.8397
foo_64_2_32_1024_hip 64 2 32 44.985