Skip to content

Commit cc27555

Browse files
authored
Merge branch 'main' into num_compute_units
2 parents 7472406 + e6d4355 commit cc27555

File tree

23 files changed

+436
-363
lines changed

23 files changed

+436
-363
lines changed

scripts/core/enqueue.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,9 @@ desc: "Enqueue a command to fill an image object with specified color"
782782
class: $xEnqueue
783783
name: MemImageFill
784784
ordinal: "0"
785-
version: "9999.0" # see #50
785+
# Will not be generated
786+
# https://github.com/oneapi-src/unified-runtime/issues/50
787+
version: "9999.0"
786788
details:
787789
- "Currently not implemented in Level Zero"
788790
- "TODO: add a driver function in Level Zero?"
@@ -919,7 +921,9 @@ type: function
919921
desc: "Enqueue a command to map a region of the image object into the host address space and return a pointer to the mapped region"
920922
class: $xEnqueue
921923
name: MemImageMap
922-
version: "9999.0" # See #50
924+
# Will not be generated
925+
# https://github.com/oneapi-src/unified-runtime/issues/50
926+
version: "9999.0"
923927
ordinal: "0"
924928
details:
925929
- "Input parameter blockingMap indicates if the map is blocking or non-blocking."

scripts/core/event.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,18 @@ etors:
4343
desc: Event created by $xEnqueueMemImageWrite
4444
- name: MEM_IMAGE_COPY
4545
desc: Event created by $xEnqueueMemImageCopy
46+
# Will not be generated
47+
# https://github.com/oneapi-src/unified-runtime/issues/50
4648
- name: MEM_IMAGE_FILL
4749
desc: Event created by $xEnqueueMemImageFill
48-
version: "9999.0" # See #50
50+
version: "9999.0"
4951
- name: MEM_BUFFER_MAP
5052
desc: Event created by $xEnqueueMemBufferMap
53+
# Will not be generated
54+
# https://github.com/oneapi-src/unified-runtime/issues/50
5155
- name: MEM_IMAGE_MAP
5256
desc: Event created by $xEnqueueMemImageMap
53-
version: "9999.0" # See #50
57+
version: "9999.0"
5458
- name: MEM_UNMAP
5559
desc: Event created by $xEnqueueMemUnmap
5660
- name: USM_FILL

scripts/templates/ldrddi.cpp.mako

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,11 +273,17 @@ namespace ur_loader
273273
274274
%endif
275275
%endif
276-
## Before we can re-enable the releases we will need ref-counted object_t.
277-
## See unified-runtime github issue #1784
278-
##%if item['release']:
279-
##// release loader handle
280-
##${item['factory']}.release( ${item['name']} );
276+
## Possibly handle release/retain ref counting - there are no ur_exp-image factories
277+
%if 'factory' in item and '_exp_image_' not in item['factory']:
278+
%if item['release']:
279+
// release loader handle
280+
context->factories.${item['factory']}.release( ${item['name']} );
281+
%endif
282+
%if item['retain']:
283+
// increment refcount of handle
284+
context->factories.${item['factory']}.retain( ${item['name']} );
285+
%endif
286+
%endif
281287
%if not item['release'] and not item['retain'] and not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle':
282288
try
283289
{

source/adapters/cuda/adapter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ class ur_legacy_sink : public logger::Sink {
3838
};
3939

4040
// FIXME: Remove the default log level when querying logging info is supported
41-
// through UR entry points. See #1330.
41+
// through UR entry points.
42+
// https://github.com/oneapi-src/unified-runtime/issues/1330
4243
ur_adapter_handle_t_::ur_adapter_handle_t_()
4344
: logger(logger::get_logger("cuda",
4445
/*default_log_level*/ logger::Level::ERR)) {

source/adapters/cuda/memory.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
/// Creates a UR Memory object using a CUDA memory allocation.
1919
/// Can trigger a manual copy depending on the mode.
20-
/// \TODO Implement USE_HOST_PTR using cuHostRegister - See #9789
20+
/// \TODO Implement USE_HOST_PTR using cuHostRegister
21+
/// https://github.com/intel/llvm/issues/9789
2122
///
2223
UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
2324
ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size,

source/adapters/hip/adapter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ class ur_legacy_sink : public logger::Sink {
3737
};
3838

3939
// FIXME: Remove the default log level when querying logging info is supported
40-
// through UR entry points. See #1330.
40+
// through UR entry points.
41+
// https://github.com/oneapi-src/unified-runtime/issues/1330
4142
ur_adapter_handle_t_::ur_adapter_handle_t_()
4243
: logger(
4344
logger::get_logger("hip", /*default_log_level*/ logger::Level::ERR)) {

source/adapters/hip/memory.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
9090

9191
/// Creates a UR Memory object using a HIP memory allocation.
9292
/// Can trigger a manual copy depending on the mode.
93-
/// \TODO Implement USE_HOST_PTR using hipHostRegister - See #9789
93+
/// \TODO Implement USE_HOST_PTR using hipHostRegister
94+
/// https://github.com/intel/llvm/issues/9789
9495
UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
9596
ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size,
9697
const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) {

source/adapters/level_zero/v2/api.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,6 @@ ur_result_t urKernelSetSpecializationConstants(
103103
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
104104
}
105105

106-
ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel,
107-
ur_queue_handle_t hQueue,
108-
uint32_t numWorkDim,
109-
const size_t *pGlobalWorkOffset,
110-
const size_t *pGlobalWorkSize,
111-
size_t *pSuggestedLocalWorkSize) {
112-
logger::error("{} function not implemented!", __FUNCTION__);
113-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
114-
}
115-
116106
ur_result_t urEventSetCallback(ur_event_handle_t hEvent,
117107
ur_execution_info_t execStatus,
118108
ur_event_callback_t pfnNotify, void *pUserData) {
@@ -484,14 +474,6 @@ ur_result_t urCommandBufferCommandGetInfoExp(
484474
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
485475
}
486476

487-
ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
488-
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
489-
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
490-
uint32_t *pGroupCountRet) {
491-
logger::error("{} function not implemented!", __FUNCTION__);
492-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
493-
}
494-
495477
ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem,
496478
size_t size) {
497479
logger::error("{} function not implemented!", __FUNCTION__);

source/adapters/level_zero/v2/kernel.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "context.hpp"
1414
#include "kernel.hpp"
1515
#include "memory.hpp"
16+
#include "queue_api.hpp"
1617

1718
#include "../device.hpp"
1819
#include "../helpers/kernel_helpers.hpp"
@@ -624,4 +625,48 @@ ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel,
624625
} catch (...) {
625626
return exceptionToResult(std::current_exception());
626627
}
628+
629+
ur_result_t urKernelGetSuggestedLocalWorkSize(
630+
ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim,
631+
[[maybe_unused]] const size_t *pGlobalWorkOffset,
632+
const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) {
633+
UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
634+
UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
635+
UR_ASSERT(pSuggestedLocalWorkSize != nullptr,
636+
UR_RESULT_ERROR_INVALID_NULL_POINTER);
637+
638+
uint32_t localWorkSize[3];
639+
size_t globalWorkSize3D[3]{1, 1, 1};
640+
std::copy(pGlobalWorkSize, pGlobalWorkSize + workDim, globalWorkSize3D);
641+
642+
ur_device_handle_t hDevice;
643+
UR_CALL(hQueue->queueGetInfo(UR_QUEUE_INFO_DEVICE, sizeof(hDevice),
644+
reinterpret_cast<void *>(&hDevice), nullptr));
645+
646+
UR_CALL(getSuggestedLocalWorkSize(hDevice, hKernel->getZeHandle(hDevice),
647+
globalWorkSize3D, localWorkSize));
648+
649+
std::copy(localWorkSize, localWorkSize + workDim, pSuggestedLocalWorkSize);
650+
return UR_RESULT_SUCCESS;
651+
}
652+
653+
ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
654+
ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
655+
const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
656+
uint32_t *pGroupCountRet) {
657+
(void)dynamicSharedMemorySize;
658+
659+
uint32_t wg[3];
660+
wg[0] = ur_cast<uint32_t>(pLocalWorkSize[0]);
661+
wg[1] = workDim >= 2 ? ur_cast<uint32_t>(pLocalWorkSize[1]) : 1;
662+
wg[2] = workDim == 3 ? ur_cast<uint32_t>(pLocalWorkSize[2]) : 1;
663+
ZE2UR_CALL(zeKernelSetGroupSize,
664+
(hKernel->getZeHandle(hDevice), wg[0], wg[1], wg[2]));
665+
666+
uint32_t totalGroupCount = 0;
667+
ZE2UR_CALL(zeKernelSuggestMaxCooperativeGroupCount,
668+
(hKernel->getZeHandle(hDevice), &totalGroupCount));
669+
*pGroupCountRet = totalGroupCount;
670+
return UR_RESULT_SUCCESS;
671+
}
627672
} // namespace ur::level_zero

source/common/ur_singleton.hpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,26 @@
1111
#ifndef UR_SINGLETON_H
1212
#define UR_SINGLETON_H 1
1313

14+
#include <cassert>
1415
#include <memory>
1516
#include <mutex>
1617
#include <unordered_map>
1718

1819
//////////////////////////////////////////////////////////////////////////
1920
/// a abstract factory for creation of singleton objects
2021
template <typename singleton_tn, typename key_tn> class singleton_factory_t {
22+
struct entry_t {
23+
std::unique_ptr<singleton_tn> ptr;
24+
size_t ref_count;
25+
};
26+
2127
protected:
2228
using singleton_t = singleton_tn;
2329
using key_t = typename std::conditional<std::is_pointer<key_tn>::value,
2430
size_t, key_tn>::type;
2531

2632
using ptr_t = std::unique_ptr<singleton_t>;
27-
using map_t = std::unordered_map<key_t, ptr_t>;
33+
using map_t = std::unordered_map<key_t, entry_t>;
2834

2935
std::mutex mut; ///< lock for thread-safety
3036
map_t map; ///< single instance of singleton for each unique key
@@ -60,16 +66,31 @@ template <typename singleton_tn, typename key_tn> class singleton_factory_t {
6066
if (map.end() == iter) {
6167
auto ptr =
6268
std::make_unique<singleton_t>(std::forward<Ts>(params)...);
63-
iter = map.emplace(key, std::move(ptr)).first;
69+
iter = map.emplace(key, entry_t{std::move(ptr), 0}).first;
70+
} else {
71+
iter->second.ref_count++;
6472
}
65-
return iter->second.get();
73+
return iter->second.ptr.get();
74+
}
75+
76+
void retain(key_tn key) {
77+
std::lock_guard<std::mutex> lk(mut);
78+
auto iter = map.find(getKey(key));
79+
assert(iter != map.end());
80+
iter->second.ref_count++;
6681
}
6782

6883
//////////////////////////////////////////////////////////////////////////
6984
/// once the key is no longer valid, release the singleton
7085
void release(key_tn key) {
7186
std::lock_guard<std::mutex> lk(mut);
72-
map.erase(getKey(key));
87+
auto iter = map.find(getKey(key));
88+
assert(iter != map.end());
89+
if (iter->second.ref_count == 0) {
90+
map.erase(iter);
91+
} else {
92+
iter->second.ref_count--;
93+
}
7394
}
7495

7596
void clear() {

0 commit comments

Comments
 (0)