Skip to content

Commit 9dd8cdf

Browse files
committed
Work around NEO not reporting MAX_GROUP_SIZE.
1 parent a241418 commit 9dd8cdf

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

lib/level-zero/module.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,10 @@ function properties(kernel::ZeKernel)
237237
preferred_group_size_props_ref = Ref(ze_kernel_preferred_group_size_properties_t())
238238
link_extensions(props_ref, preferred_group_size_props_ref)
239239
if haskey(oneL0.extension_properties(kernel.mod.context.driver),
240-
"ZE_extension_kernel_max_group_size_properties")
240+
"ZE_extension_kernel_max_group_size_properties") ||
241+
# intel/compute-runtime#733
242+
(properties(kernel.mod.device).vendorId == 0x8086 &&
243+
properties(kernel.mod.context.driver).driverVersion >= v"1.3.29138")
241244
# TODO: memoize
242245
max_group_size_props_ref = Ref(ze_kernel_max_group_size_properties_ext_t())
243246
link_extensions(preferred_group_size_props_ref, max_group_size_props_ref)

src/compiler/execution.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,11 @@ function launch_configuration(kernel::HostKernel{F,TT}) where {F,TT}
167167
# configurations, so roll our own version that behaves like CUDA's
168168
# occupancy API and assumes the kernel still does bounds checking.
169169

170-
# once the MAX_GROUP_SIZE extension is implemented, we can use it here
171170
kernel_props = oneL0.properties(kernel.fun)
172171
group_size = if kernel_props.maxGroupSize !== missing
173172
kernel_props.maxGroupSize
174173
else
174+
# without the MAX_GROUP_SIZE extension, we need to be conservative
175175
dev = kernel.fun.mod.device
176176
compute_props = oneL0.compute_properties(dev)
177177
max_size = compute_props.maxTotalGroupSize

0 commit comments

Comments
 (0)