Skip to content

Commit cfc99dd

Browse files
committed
Update comment.
1 parent 7cb79cc commit cfc99dd

File tree

1 file changed

+16
-17
lines changed

1 file changed

+16
-17
lines changed

src/compiler/execution.jl

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -162,28 +162,27 @@ struct HostKernel{F,TT} <: AbstractKernel{F,TT}
162162
end
163163

164164
function launch_configuration(kernel::HostKernel{F,TT}) where {F,TT}
165-
# XXX: have the user pass in a global size to clamp against
166-
# maxGroupSizeX/Y/Z?
167-
168-
# XXX: shrink until a multiple of preferredGroupSize?
165+
# Level Zero's zeKernelSuggestGroupSize provides a launch configuration
166+
# that exactly cover the input size. This can result in very awkward
167+
# configurations, so roll our own version that behaves like CUDA's
168+
# occupancy API and assumes the kernel still does bounds checking.
169169

170170
# once the MAX_GROUP_SIZE extension is implemented, we can use it here
171171
kernel_props = oneL0.properties(kernel.fun)
172-
if kernel_props.maxGroupSize !== missing
173-
return kernel_props.maxGroupSize
172+
group_size = if kernel_props.maxGroupSize !== missing
173+
kernel_props.maxGroupSize
174+
else
175+
dev = kernel.fun.mod.device
176+
compute_props = oneL0.compute_properties(dev)
177+
max_size = compute_props.maxTotalGroupSize
178+
179+
## when the kernel uses many registers (which we can't query without
180+
## extensions that landed _after_ MAX_GROUP_SIZE, so don't bother)
181+
## the groupsize should be halved
182+
group_size = max_size ÷ 2
174183
end
175184

176-
# otherwise, we'd use `zeKernelSuggestGroupSize` but it's been observed
177-
# to return really bad configs (JuliaGPU/oneAPI.jl#430)
178-
179-
# so instead, calculate it ourselves based on the device properties
180-
dev = kernel.fun.mod.device
181-
compute_props = oneL0.compute_properties(dev)
182-
max_size = compute_props.maxTotalGroupSize
183-
## when the kernel uses many registers (which we can't query without
184-
## extensions that landed _after_ MAX_GROUP_SIZE, so don't bother)
185-
## the groupsize should be halved
186-
group_size = max_size ÷ 2
185+
# TODO: align the group size based on preferredGroupSize
187186

188187
return group_size
189188
end

0 commit comments

Comments
 (0)