Skip to content

Commit 7472406

Browse files
authored
Merge branch 'main' into num_compute_units
2 parents b8216ab + 1918293 commit 7472406

27 files changed

+646
-204
lines changed

include/ur_api.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5189,7 +5189,6 @@ typedef struct ur_kernel_arg_pointer_properties_t {
51895189
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
51905190
/// + `NULL == hKernel`
51915191
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
5192-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
51935192
UR_APIEXPORT ur_result_t UR_APICALL
51945193
urKernelSetArgPointer(
51955194
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object

scripts/core/kernel.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,6 @@ params:
352352
desc: "[in][optional] Pointer obtained by USM allocation or virtual memory mapping operation. If null then argument value is considered null."
353353
returns:
354354
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
355-
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
356355
--- #--------------------------------------------------------------------------
357356
type: struct
358357
desc: "Properties for for $xKernelSetExecInfo."

source/adapters/level_zero/kernel.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -813,12 +813,13 @@ ur_result_t urKernelGetGroupInfo(
813813
(ZeKernelDevice, &kernelProperties));
814814
if (ZeResult || workGroupProperties.maxGroupSize == 0) {
815815
return ReturnValue(
816-
uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
816+
size_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
817817
}
818-
return ReturnValue(workGroupProperties.maxGroupSize);
818+
// Specification states this returns a size_t.
819+
return ReturnValue(size_t{workGroupProperties.maxGroupSize});
819820
} else {
820821
return ReturnValue(
821-
uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
822+
size_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
822823
}
823824
}
824825
case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
@@ -830,12 +831,12 @@ ur_result_t urKernelGetGroupInfo(
830831
return ReturnValue(WgSize);
831832
}
832833
case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE:
833-
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->localMemSize});
834+
return ReturnValue(size_t{Kernel->ZeKernelProperties->localMemSize});
834835
case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
835836
return ReturnValue(size_t{Device->ZeDeviceProperties->physicalEUSimdWidth});
836837
}
837838
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
838-
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->privateMemSize});
839+
return ReturnValue(size_t{Kernel->ZeKernelProperties->privateMemSize});
839840
}
840841
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
841842
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:

source/adapters/level_zero/v2/kernel.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -512,10 +512,11 @@ ur_result_t urKernelGetGroupInfo(
512512
ZE_CALL_NOCHECK(zeKernelGetProperties, (zeDevice, &kernelProperties));
513513
if (zeResult == ZE_RESULT_SUCCESS &&
514514
workGroupProperties.maxGroupSize != 0) {
515-
return returnValue(workGroupProperties.maxGroupSize);
515+
// Specification states this returns a size_t.
516+
return returnValue(size_t{workGroupProperties.maxGroupSize});
516517
}
517518
return returnValue(
518-
uint64_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
519+
size_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
519520
}
520521
case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
521522
auto props = hKernel->getProperties(hDevice);
@@ -527,15 +528,15 @@ ur_result_t urKernelGetGroupInfo(
527528
}
528529
case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
529530
auto props = hKernel->getProperties(hDevice);
530-
return returnValue(uint32_t{props.localMemSize});
531+
return returnValue(size_t{props.localMemSize});
531532
}
532533
case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
533534
return returnValue(
534535
size_t{hDevice->ZeDeviceProperties->physicalEUSimdWidth});
535536
}
536537
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
537538
auto props = hKernel->getProperties(hDevice);
538-
return returnValue(uint32_t{props.privateMemSize});
539+
return returnValue(size_t{props.privateMemSize});
539540
}
540541
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
541542
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:

source/adapters/native_cpu/enqueue.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
138138
#else
139139
bool isLocalSizeOne =
140140
ndr.LocalSize[0] == 1 && ndr.LocalSize[1] == 1 && ndr.LocalSize[2] == 1;
141-
if (isLocalSizeOne && ndr.GlobalSize[0] > numParallelThreads) {
141+
if (isLocalSizeOne && ndr.GlobalSize[0] > numParallelThreads &&
142+
!hKernel->hasLocalArgs()) {
142143
// If the local size is one, we make the assumption that we are running a
143144
// parallel_for over a sycl::range.
144-
// Todo: we could add compiler checks and
145-
// kernel properties for this (e.g. check that no barriers are called, no
146-
// local memory args).
145+
// Todo: we could add more compiler checks and
146+
// kernel properties for this (e.g. check that no barriers are called).
147147

148148
// Todo: this assumes that dim 0 is the best dimension over which we want to
149149
// parallelize

source/adapters/native_cpu/kernel.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ struct ur_kernel_handle_t_ : RefCounted {
142142
_localMemPoolSize = reqSize;
143143
}
144144

145-
// To be called before executing a work group
145+
bool hasLocalArgs() const { return !_localArgInfo.empty(); }
146+
147+
// To be called before executing a work group if local args are present
146148
void handleLocalArgs(size_t numParallelThread, size_t threadId) {
147149
// For each local argument we have size*numthreads
148150
size_t offset = 0;

source/loader/ur_libapi.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3983,7 +3983,6 @@ ur_result_t UR_APICALL urKernelRelease(
39833983
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
39843984
/// + `NULL == hKernel`
39853985
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
3986-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
39873986
ur_result_t UR_APICALL urKernelSetArgPointer(
39883987
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
39893988
uint32_t argIndex, ///< [in] argument index in range [0, num args - 1]

source/ur_api.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3395,7 +3395,6 @@ ur_result_t UR_APICALL urKernelRelease(
33953395
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
33963396
/// + `NULL == hKernel`
33973397
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
3398-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
33993398
ur_result_t UR_APICALL urKernelSetArgPointer(
34003399
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
34013400
uint32_t argIndex, ///< [in] argument index in range [0, num args - 1]

test/conformance/device_code/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp)
159159
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp)
160160
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp)
161161
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp)
162+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_sg_size.cpp)
162163
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp)
164+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/max_wg_size.cpp)
163165
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/sequence.cpp)
164166
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp)
165167
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <sycl/sycl.hpp>
7+
8+
struct KernelFunctor {
9+
void operator()(sycl::nd_item<3>) const {}
10+
void operator()(sycl::item<3>) const {}
11+
12+
auto get(sycl::ext::oneapi::experimental::properties_tag) {
13+
return sycl::ext::oneapi::experimental::properties{
14+
sycl::ext::oneapi::experimental::sub_group_size<8>};
15+
}
16+
};
17+
18+
int main() {
19+
sycl::queue myQueue;
20+
myQueue.submit([&](sycl::handler &cgh) {
21+
cgh.parallel_for<class FixedSgSize>(sycl::range<3>(8, 8, 8),
22+
KernelFunctor{});
23+
});
24+
25+
myQueue.wait();
26+
return 0;
27+
}

0 commit comments

Comments
 (0)