Skip to content

Commit d4aa159

Browse files
committed
fix interpolation and at_unroll
1 parent d4712f1 commit d4aa159

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

examples/performance.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using KernelAbstractions, CUDA, Test
2+
using KernelAbstractions.Extras: @unroll
23

34
has_cuda_gpu() || exit()
45
CUDA.allowscalar(false)
@@ -156,7 +157,7 @@ for (name, kernel) in (
156157
("transpose", lmem_transpose_kernel!(CUDADevice(), (TILE_DIM, TILE_DIM))),
157158
)
158159
for bank in (true, false)
159-
NVTX.@range "Localmem $name $(TILE_DIM, TILE_DIM) bank=$bank" let
160+
NVTX.@range "Localmem $name ($TILE_DIM, $TILE_DIM) bank=$bank" let
160161
input = CUDA.rand(T, (N, N))
161162
output = similar(input)
162163

@@ -178,7 +179,7 @@ for (name, kernel) in (
178179
("transpose", coalesced_transpose_kernel!(CUDADevice(), (TILE_DIM, BLOCK_ROWS))),
179180
)
180181
for bank in (true, false)
181-
NVTX.@range "Localmem + multiple elements $name $(TILE_DIM, BLOCK_ROWS) bank=$bank" let
182+
NVTX.@range "Localmem + multiple elements $name ($TILE_DIM, $BLOCK_ROWS) bank=$bank" let
182183
input = CUDA.rand(T, (N, N))
183184
output = similar(input)
184185

0 commit comments

Comments
 (0)