Skip to content

Commit 9f7bf16

Browse files
committed
Merge branch 'main' into detect-memory-leak
2 parents 8e3732f + f404f4d commit 9f7bf16

File tree

10 files changed

+878
-708
lines changed

10 files changed

+878
-708
lines changed

source/adapters/hip/device.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
549549
// native asserts are in progress
550550
std::string SupportedExtensions = "";
551551
SupportedExtensions += "pi_ext_intel_devicelib_assert ";
552+
SupportedExtensions += "ur_exp_usm_p2p ";
552553

553554
int RuntimeVersion = 0;
554555
UR_CHECK_ERROR(hipRuntimeGetVersion(&RuntimeVersion));

source/adapters/level_zero/command_buffer.cpp

Lines changed: 819 additions & 662 deletions
Large diffs are not rendered by default.

source/adapters/level_zero/command_buffer.hpp

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,25 +30,40 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
3030
ze_command_list_handle_t CommandList,
3131
ze_command_list_handle_t CommandListTranslated,
3232
ze_command_list_handle_t CommandListResetEvents,
33-
ze_command_list_handle_t CopyCommandList,
34-
ZeStruct<ze_command_list_desc_t> ZeDesc,
35-
ZeStruct<ze_command_list_desc_t> ZeCopyDesc,
33+
ze_command_list_handle_t CopyCommandList, ur_event_handle_t SignalEvent,
34+
ur_event_handle_t WaitEvent, ur_event_handle_t AllResetEvent,
3635
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList);
3736

3837
~ur_exp_command_buffer_handle_t_();
3938

40-
void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
41-
ur_event_handle_t Event) {
42-
SyncPoints[SyncPoint] = Event;
43-
NextSyncPoint++;
44-
}
39+
void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
40+
ur_event_handle_t Event);
4541

46-
ur_exp_command_buffer_sync_point_t GetNextSyncPoint() const {
42+
ur_exp_command_buffer_sync_point_t getNextSyncPoint() const {
4743
return NextSyncPoint;
4844
}
4945

5046
// Indicates if a copy engine is available for use
51-
bool UseCopyEngine() const { return ZeCopyCommandList != nullptr; }
47+
bool useCopyEngine() const { return ZeCopyCommandList != nullptr; }
48+
49+
/**
50+
* Obtains a fence for a specific L0 queue. If there is already an available
51+
* fence for this queue, it will be reused.
52+
* @param[in] ZeCommandQueue The L0 queue associated with the fence.
53+
* @param[out] ZeFence The fence.
54+
* @return UR_RESULT_SUCCESS or an error code on failure
55+
*/
56+
ur_result_t getFenceForQueue(ze_command_queue_handle_t &ZeCommandQueue,
57+
ze_fence_handle_t &ZeFence);
58+
59+
/**
60+
* Chooses which command list to use when appending a command to this command
61+
* buffer.
62+
* @param[in] PreferCopyEngine If true, will try to choose a copy engine
63+
* command-list. Will choose a compute command-list otherwise.
64+
* @return The chosen command list.
65+
*/
66+
ze_command_list_handle_t chooseCommandList(bool PreferCopyEngine);
5267

5368
// UR context associated with this command-buffer
5469
ur_context_handle_t Context;
@@ -61,12 +76,17 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
6176
ze_command_list_handle_t ZeComputeCommandListTranslated;
6277
// Level Zero command list handle
6378
ze_command_list_handle_t ZeCommandListResetEvents;
64-
// Level Zero command list descriptor
65-
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
6679
// Level Zero Copy command list handle
6780
ze_command_list_handle_t ZeCopyCommandList;
68-
// Level Zero Copy command list descriptor
69-
ZeStruct<ze_command_list_desc_t> ZeCopyCommandListDesc;
81+
// Event which will signals the most recent execution of the command-buffer
82+
// has finished
83+
ur_event_handle_t SignalEvent = nullptr;
84+
// Event which a command-buffer waits on until the wait-list dependencies
85+
// passed to a command-buffer enqueue have been satisfied.
86+
ur_event_handle_t WaitEvent = nullptr;
87+
// Event which a command-buffer waits on until the main command-list event
88+
// have been reset.
89+
ur_event_handle_t AllResetEvent = nullptr;
7090
// This flag is must be set to false if at least one copy command has been
7191
// added to `ZeCopyCommandList`
7292
bool MCopyCommandListEmpty = true;
@@ -77,26 +97,15 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
7797
// Must be an element in ZeFencesMap, so is not required to be destroyed
7898
// itself.
7999
ze_fence_handle_t ZeActiveFence;
80-
// Queue properties from command-buffer descriptor
81-
// TODO: Do we need these?
82-
ur_queue_properties_t QueueProperties;
83100
// Map of sync_points to ur_events
84101
std::unordered_map<ur_exp_command_buffer_sync_point_t, ur_event_handle_t>
85102
SyncPoints;
86103
// Next sync_point value (may need to consider ways to reuse values if 32-bits
87104
// is not enough)
88105
ur_exp_command_buffer_sync_point_t NextSyncPoint;
89-
// List of Level Zero events associated to submitted commands.
106+
// List of Level Zero events associated with submitted commands.
90107
std::vector<ze_event_handle_t> ZeEventsList;
91-
// Event which will signals the most recent execution of the command-buffer
92-
// has finished
93-
ur_event_handle_t SignalEvent = nullptr;
94-
// Event which a command-buffer waits on until the wait-list dependencies
95-
// passed to a command-buffer enqueue have been satisfied.
96-
ur_event_handle_t WaitEvent = nullptr;
97-
// Event which a command-buffer waits on until the main command-list event
98-
// have been reset.
99-
ur_event_handle_t AllResetEvent = nullptr;
108+
100109
// Indicates if command-buffer commands can be updated after it is closed.
101110
bool IsUpdatable = false;
102111
// Indicates if command buffer was finalized.

source/adapters/level_zero/device.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
247247
SupportedExtensions += ("ur_exp_command_buffer ");
248248
// Return supported for the UR multi-device compile experimental feature
249249
SupportedExtensions += ("ur_exp_multi_device_compile ");
250+
SupportedExtensions += ("ur_exp_usm_p2p ");
250251

251252
return ReturnValue(SupportedExtensions.c_str());
252253
}
@@ -803,7 +804,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
803804
return ReturnValue(uint32_t{Device->ZeDeviceProperties->numThreadsPerEU});
804805
case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH:
805806
// currently not supported in level zero runtime
806-
return UR_RESULT_ERROR_INVALID_VALUE;
807+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
807808
case UR_DEVICE_INFO_BFLOAT16: {
808809
// bfloat16 math functions are not yet supported on Intel GPUs.
809810
return ReturnValue(ur_bool_t{false});
@@ -996,7 +997,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
996997
logger::error("Unsupported ParamName in urGetDeviceInfo");
997998
logger::error("ParamNameParamName={}(0x{})", ParamName,
998999
logger::toHex(ParamName));
999-
return UR_RESULT_ERROR_INVALID_VALUE;
1000+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
10001001
}
10011002

10021003
return UR_RESULT_SUCCESS;
@@ -1554,8 +1555,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(
15541555
) {
15551556
const uint64_t &ZeTimerResolution =
15561557
Device->ZeDeviceProperties->timerResolution;
1557-
const uint64_t TimestampMaxCount =
1558-
((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL);
1558+
const uint64_t TimestampMaxCount = Device->getTimestampMask();
15591559
uint64_t DeviceClockCount, Dummy;
15601560

15611561
ZE2UR_CALL(zeDeviceGetGlobalTimestamps,

source/adapters/level_zero/device.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,12 @@ struct ur_device_handle_t_ : _ur_object {
198198
.ZeIndex >= 0;
199199
}
200200

201+
uint64_t getTimestampMask() {
202+
auto ValidBits = ZeDeviceProperties->kernelTimestampValidBits;
203+
assert(ValidBits <= 64);
204+
return ValidBits == 64 ? ~0ULL : (1ULL << ValidBits) - 1ULL;
205+
}
206+
201207
// Cache of the immutable device properties.
202208
ZeCache<ZeStruct<ze_device_properties_t>> ZeDeviceProperties;
203209
ZeCache<ZeStruct<ze_device_compute_properties_t>> ZeDeviceComputeProperties;

source/adapters/level_zero/event.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
494494
: Event->Context->Devices[0];
495495

496496
uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution;
497-
const uint64_t TimestampMaxValue =
498-
((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL);
497+
const uint64_t TimestampMaxValue = Device->getTimestampMask();
499498

500499
UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet);
501500

source/adapters/level_zero/kernel.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch(
130130
*OutEvent ///< [in,out][optional] return an event object that identifies
131131
///< this particular kernel execution instance.
132132
) {
133-
auto Queue = this;
133+
UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
134+
UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
134135

136+
auto Queue = this;
135137
ze_kernel_handle_t ZeKernel{};
136138
UR_CALL(getZeKernel(Queue, Kernel, &ZeKernel));
137139

@@ -337,6 +339,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp(
337339
*OutEvent ///< [in,out][optional] return an event object that identifies
338340
///< this particular kernel execution instance.
339341
) {
342+
UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
343+
UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
344+
340345
auto Queue = this;
341346
auto ZeDevice = Queue->Device->ZeDevice;
342347

source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,8 +1550,7 @@ ur_result_t ur_queue_handle_legacy_t_::active_barriers::clear() {
15501550

15511551
void ur_queue_handle_legacy_t_::clearEndTimeRecordings() {
15521552
uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution;
1553-
const uint64_t TimestampMaxValue =
1554-
((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL);
1553+
const uint64_t TimestampMaxValue = Device->getTimestampMask();
15551554

15561555
for (auto Entry : EndTimeRecordings) {
15571556
auto &Event = Entry.first;

source/adapters/level_zero/usm_p2p.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp(
1717
std::ignore = commandDevice;
1818
std::ignore = peerDevice;
1919

20-
logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
21-
"{} function not implemented!", __FUNCTION__);
22-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
20+
// L0 has peer devices enabled by default
21+
return UR_RESULT_SUCCESS;
2322
}
2423

2524
UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp(
@@ -28,9 +27,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp(
2827
std::ignore = commandDevice;
2928
std::ignore = peerDevice;
3029

31-
logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
32-
"{} function not implemented!", __FUNCTION__);
33-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
30+
// L0 has peer devices enabled by default
31+
return UR_RESULT_SUCCESS;
3432
}
3533

3634
UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
11
urDeviceCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle
22
{{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime
33
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GLOBAL_MEM_FREE
4-
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH
5-
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_ASYNC_BARRIER
6-
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED
7-
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP

0 commit comments

Comments
 (0)