From 47a1d8fcc22e9a88edc8cc333bfae18bb9828212 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 21 Oct 2024 17:17:44 +0200 Subject: [PATCH 1/3] add N for hardware indices --- src/KernelAbstractions.jl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 55d5d465e..f6b13b754 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -16,13 +16,15 @@ using StaticArrays using Adapt """ - @kernel function f(args) end + @kernel [N] function f(args) end Takes a function definition and generates a [`Kernel`](@ref) constructor from it. The enclosed function is allowed to contain kernel language constructs. In order to call it the kernel has first to be specialized on the backend and then invoked on the arguments. +The optional `N` parameter can be used to fix the number of dimensions used for the ndrange. + # Kernel language - [`@Const`](@ref) @@ -54,7 +56,7 @@ macro kernel(expr) end """ - @kernel config function f(args) end + @kernel [N] config function f(args) end This allows for two different configurations: @@ -584,17 +586,17 @@ in a workgroup. ``` As well as the on-device functionality. """ -struct Kernel{Backend, WorkgroupSize <: _Size, NDRange <: _Size, Fun} +struct Kernel{Backend, N, WorkgroupSize <: _Size, NDRange <: _Size, Fun} backend::Backend f::Fun end -function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F} - Kernel{D, WS, ND, F}(kernel.backend, f) +function Base.similar(kernel::Kernel{D, N, WS, ND}, f::F) where {D, N, WS, ND, F} + Kernel{D, N, WS, ND, F}(kernel.backend, f) end -workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize -ndrange(::Kernel{D, WorkgroupSize, NDRange}) where {D, WorkgroupSize, NDRange} = NDRange +workgroupsize(::Kernel{D, N, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize +ndrange(::Kernel{D, N, WorkgroupSize, NDRange}) where {D, WorkgroupSize, NDRange} = NDRange backend(kernel::Kernel) = kernel.backend """ @@ -657,8 +659,8 @@ Partition a kernel for the given ndrange and workgroupsize. return iterspace, dynamic end -function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName} - return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name) +function construct(backend::Backend, ::Val{N}, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, N, S <: _Size, NDRange <: _Size, XPUName} + return Kernel{Backend, N, S, NDRange, XPUName}(backend, xpu_name) end ### From d7833d42efa318fad781072e058a6f106b93d983 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 7 Jan 2025 10:46:38 +0100 Subject: [PATCH 2/3] fixup N support --- src/KernelAbstractions.jl | 22 +++++++++++++++------- src/macros.jl | 6 +++--- test/test.jl | 4 ++-- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index f6b13b754..c50d62bfd 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -52,7 +52,7 @@ synchronize(backend) ``` """ macro kernel(expr) - __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false) + __kernel(DynamicSize(), expr, #=generate_cpu=# true, #=force_inbounds=# false) end """ @@ -70,10 +70,11 @@ This allows for two different configurations: """ macro kernel(ex...) if length(ex) == 1 - __kernel(ex[1], true, false) + __kernel(DynamicSize(), ex[1], true, false) else generate_cpu = true force_inbounds = false + N = DynamicSize() # TODO parse N for i in 1:(length(ex) - 1) if ex[i] isa Expr && ex[i].head == :(=) && ex[i].args[1] == :cpu && ex[i].args[2] isa Bool @@ -90,7 +91,7 @@ macro kernel(ex...) ) end end - __kernel(ex[end], generate_cpu, force_inbounds) + __kernel(N, ex[end], generate_cpu, force_inbounds) end end @@ -586,7 +587,7 @@ in a workgroup. ``` As well as the on-device functionality. """ -struct Kernel{Backend, N, WorkgroupSize <: _Size, NDRange <: _Size, Fun} +struct Kernel{Backend, N <: _Size, WorkgroupSize <: _Size, NDRange <: _Size, Fun} backend::Backend f::Fun end @@ -595,8 +596,9 @@ function Base.similar(kernel::Kernel{D, N, WS, ND}, f::F) where {D, N, WS, ND, F Kernel{D, N, WS, ND, F}(kernel.backend, f) end -workgroupsize(::Kernel{D, N, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize -ndrange(::Kernel{D, N, WorkgroupSize, NDRange}) where {D, WorkgroupSize, NDRange} = NDRange +workgroupsize(::Kernel{D, N, WorkgroupSize}) where {D, N, WorkgroupSize} = WorkgroupSize +ndrange(::Kernel{D, N, WorkgroupSize, NDRange}) where {D, N, WorkgroupSize, NDRange} = NDRange +ndims(::Kernel{D, N}) where {D, N} = N backend(kernel::Kernel) = kernel.backend """ @@ -605,6 +607,7 @@ Partition a kernel for the given ndrange and workgroupsize. @inline function partition(kernel, ndrange, workgroupsize) static_ndrange = KernelAbstractions.ndrange(kernel) static_workgroupsize = KernelAbstractions.workgroupsize(kernel) + static_ndims = KernelAbstractions.ndims(kernel) if ndrange === nothing && static_ndrange <: DynamicSize || workgroupsize === nothing && static_workgroupsize <: DynamicSize @@ -655,11 +658,16 @@ Partition a kernel for the given ndrange and workgroupsize. workgroupsize = CartesianIndices(workgroupsize) end + if static_ndims <: StaticSize + @assert get(static_ndims) == length(ndrange) + end + + # TODO: Add static_ndims iterspace = NDRange{length(ndrange), static_blocks, static_workgroupsize}(blocks, workgroupsize) return iterspace, dynamic end -function construct(backend::Backend, ::Val{N}, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, N, S <: _Size, NDRange <: _Size, XPUName} +function construct(backend::Backend, ::N, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, N <: _Size, S <: _Size, NDRange <: _Size, XPUName} return Kernel{Backend, N, S, NDRange, XPUName}(backend, xpu_name) end diff --git a/src/macros.jl b/src/macros.jl index a511758dc..3fc7d48b4 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -10,7 +10,7 @@ function find_return(stmt) end # XXX: Proper errors -function __kernel(expr, generate_cpu = true, force_inbounds = false) +function __kernel(N, expr, generate_cpu = true, force_inbounds = false) def = splitdef(expr) name = def[:name] args = def[:args] @@ -57,10 +57,10 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false) $name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range)) function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S <: $_Size, NDRange <: $_Size} if $isgpu(dev) - return $construct(dev, sz, range, $gpu_name) + return $construct(dev, $(N), sz, range, $gpu_name) else if $generate_cpu - return $construct(dev, sz, range, $cpu_name) + return $construct(dev, $(N), sz, range, $cpu_name) else error("This kernel is unavailable for backend CPU") end diff --git a/test/test.jl b/test/test.jl index 337af10d4..aa7875448 100644 --- a/test/test.jl +++ b/test/test.jl @@ -10,7 +10,7 @@ identity(x) = x function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; skip_tests = Set{String}()) @conditional_testset "partition" skip_tests begin backend = Backend() - let kernel = KernelAbstractions.Kernel{typeof(backend), StaticSize{(64,)}, DynamicSize, typeof(identity)}(backend, identity) + let kernel = KernelAbstractions.Kernel{typeof(backend), DynamicSize, StaticSize{(64,)}, DynamicSize, typeof(identity)}(backend, identity) iterspace, dynamic = KernelAbstractions.partition(kernel, (128,), nothing) @test length(blocks(iterspace)) == 2 @test dynamic isa NoDynamicCheck @@ -26,7 +26,7 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk @test_throws ErrorException KernelAbstractions.partition(kernel, (129,), (65,)) @test KernelAbstractions.backend(kernel) == backend end - let kernel = KernelAbstractions.Kernel{typeof(backend), StaticSize{(64,)}, StaticSize{(128,)}, typeof(identity)}(backend, identity) + let kernel = KernelAbstractions.Kernel{typeof(backend), DynamicSize, StaticSize{(64,)}, StaticSize{(128,)}, typeof(identity)}(backend, identity) iterspace, dynamic = KernelAbstractions.partition(kernel, (128,), nothing) @test length(blocks(iterspace)) == 2 @test dynamic isa NoDynamicCheck From 2afdea1881607fa071727f05dfc9cabc61e8a75a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 7 Jan 2025 15:46:47 +0100 Subject: [PATCH 3/3] fixup N support --- src/KernelAbstractions.jl | 7 ++++++- test/runtests.jl | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index c50d62bfd..4acdbab9a 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -82,6 +82,8 @@ macro kernel(ex...) elseif ex[i] isa Expr && ex[i].head == :(=) && ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool force_inbounds = ex[i].args[2] + elseif ex[i] isa Int + N = StaticSize(ex[i]) else error( "Configuration should be of form:\n" * @@ -659,7 +661,10 @@ Partition a kernel for the given ndrange and workgroupsize. end if static_ndims <: StaticSize - @assert get(static_ndims) == length(ndrange) + N = only(get(static_ndims)) + if N !== length(ndrange) + error("Mismatch between static kernel dimension (N=$N) and ndrange=$ndrange") + end end # TODO: Add static_ndims diff --git a/test/runtests.jl b/test/runtests.jl index f992afad2..9c4c06e55 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,6 +21,17 @@ kern_static(CPU(static = true), (1,))(A, ndrange = length(A)) end @test_throws ErrorException("This kernel is unavailable for backend CPU") my_no_cpu_kernel(CPU()) +@kernel 1 function OneD() +end + +@kernel 2 function TwoD() +end + +@test OneD(CPU())(ndrange=1024) === nothing +@test_throws ErrorException("Mismatch between static kernel dimension (N=1) and ndrange=(1024, 1)") OneD(CPU())(ndrange=(1024, 1)) +@test_throws ErrorException("Mismatch between static kernel dimension (N=2) and ndrange=(1024, 1)") TwoD(CPU())(ndrange=1024) +@test TwoD(CPU())(ndrange=(1024,1)) === nothing + # testing multiple configurations at the same time @kernel cpu = false inbounds = false function my_no_cpu_kernel2(a) end