File tree Expand file tree Collapse file tree 2 files changed +29
-2
lines changed Expand file tree Collapse file tree 2 files changed +29
-2
lines changed Original file line number Diff line number Diff line change @@ -7,7 +7,15 @@ function __kernel(expr)
7
7
body = expr. args[2 ]
8
8
9
9
# parse decl
10
- @assert isexpr (decl, :call )
10
+ # `@kernel fname(::T) where {T}`
11
+ if isexpr (decl, :where )
12
+ iswhere = true
13
+ whereargs = decl. args[2 : end ]
14
+ decl = decl. args[1 ]
15
+ else
16
+ iswhere = false
17
+ end
18
+ @assert isexpr (decl, :call )
11
19
name = decl. args[1 ]
12
20
13
21
# List of tuple (Symbol, Bool) where the bool
@@ -37,6 +45,11 @@ function __kernel(expr)
37
45
gpu_decl = Expr (:call , gpu_name, arglist... )
38
46
cpu_decl = Expr (:call , cpu_name, arglist... )
39
47
48
+ if iswhere
49
+ gpu_decl = Expr (:where , gpu_decl, whereargs... )
50
+ cpu_decl = Expr (:where , cpu_decl, whereargs... )
51
+ end
52
+
40
53
# Without the deepcopy we might accidentially modify expr shared between CPU and GPU
41
54
gpu_body = transform_gpu (deepcopy (body), args)
42
55
gpu_function = Expr (:function , gpu_decl, gpu_body)
Original file line number Diff line number Diff line change 167
167
@test occursin (" @llvm.nvvm.ldg" , IR)
168
168
end
169
169
end
170
- end
170
+ end
171
+
172
+ @kernel function kernel_val! (a, :: Val{m} ) where {m}
173
+ I = @index (Global)
174
+ @inbounds a[I] = m
175
+ end
176
+
177
+ A = zeros (Int64, 1024 )
178
+ wait (kernel_val! (CPU ())(A,Val (3 ), ndrange= size (A)))
179
+ @test all ((a)-> a== 3 , A)
180
+ if has_cuda_gpu ()
181
+ A = CuArrays. zeros (Int64, 1024 )
182
+ wait (kernel_val! (CUDA ())(A,Val (3 ), ndrange= size (A)))
183
+ @test all ((a)-> a== 3 , A)
184
+ end
You can’t perform that action at this time.
0 commit comments