Skip to content

Commit 1678894

Browse files
authored
Merge branch 'main' into steffen/virtual_mem_adapters
2 parents 0563259 + 67e4d1b commit 1678894

21 files changed

+83
-96
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
2323
ur_context_handle_t hContext, ur_device_handle_t hDevice)
24-
: Context(hContext),
25-
Device(hDevice), CudaGraph{nullptr}, CudaGraphExec{nullptr}, RefCount{1} {
24+
: Context(hContext), Device(hDevice), CudaGraph{nullptr},
25+
CudaGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} {
2626
urContextRetain(hContext);
2727
urDeviceRetain(hDevice);
2828
}

source/adapters/cuda/command_buffer.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ struct ur_exp_command_buffer_handle_t_ {
184184

185185
void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
186186
std::shared_ptr<CUgraphNode> CuNode) {
187-
SyncPoints[SyncPoint] = CuNode;
187+
SyncPoints[SyncPoint] = std::move(CuNode);
188188
NextSyncPoint++;
189189
}
190190

@@ -193,12 +193,12 @@ struct ur_exp_command_buffer_handle_t_ {
193193
}
194194

195195
// Helper to register next sync point
196-
// @param CuNode Node to register as next sycn point
196+
// @param CuNode Node to register as next sync point
197197
// @return Pointer to the sync that registers the Node
198198
ur_exp_command_buffer_sync_point_t
199199
AddSyncPoint(std::shared_ptr<CUgraphNode> CuNode) {
200200
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
201-
RegisterSyncPoint(SyncPoint, CuNode);
201+
RegisterSyncPoint(SyncPoint, std::move(CuNode));
202202
return SyncPoint;
203203
}
204204

source/adapters/cuda/device.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,17 +1144,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
11441144
if (Result != UR_RESULT_SUCCESS)
11451145
return Result;
11461146

1147-
ur_platform_handle_t *Plat = static_cast<ur_platform_handle_t *>(
1148-
malloc(NumPlatforms * sizeof(ur_platform_handle_t)));
1149-
Result = urPlatformGet(&AdapterHandle, 1, NumPlatforms, Plat, nullptr);
1147+
std::vector<ur_platform_handle_t> Platforms(NumPlatforms);
1148+
1149+
Result =
1150+
urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr);
11501151
if (Result != UR_RESULT_SUCCESS)
11511152
return Result;
11521153

11531154
// Iterate through platforms to find device that matches nativeHandle
1154-
for (uint32_t j = 0; j < NumPlatforms; ++j) {
1155-
auto SearchRes =
1156-
std::find_if(begin(Plat[j]->Devices), end(Plat[j]->Devices), IsDevice);
1157-
if (SearchRes != end(Plat[j]->Devices)) {
1155+
for (const auto Platform : Platforms) {
1156+
auto SearchRes = std::find_if(std::begin(Platform->Devices),
1157+
std::end(Platform->Devices), IsDevice);
1158+
if (SearchRes != end(Platform->Devices)) {
11581159
*phDevice = static_cast<ur_device_handle_t>((*SearchRes).get());
11591160
return UR_RESULT_SUCCESS;
11601161
}

source/adapters/cuda/event.cpp

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "event.hpp"
12-
#include "common.hpp"
1312
#include "context.hpp"
1413
#include "device.hpp"
1514
#include "queue.hpp"
@@ -19,35 +18,25 @@
1918

2019
ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
2120
ur_context_handle_t Context,
22-
ur_queue_handle_t Queue, CUstream Stream,
21+
ur_queue_handle_t Queue,
22+
native_type EvEnd, native_type EvQueued,
23+
native_type EvStart, CUstream Stream,
2324
uint32_t StreamToken)
2425
: CommandType{Type}, RefCount{1}, HasOwnership{true},
2526
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
26-
StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr},
27-
EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} {
28-
29-
bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE;
30-
31-
UR_CHECK_ERROR(cuEventCreate(
32-
&EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));
33-
34-
if (ProfilingEnabled) {
35-
UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT));
36-
UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT));
37-
}
38-
39-
if (Queue != nullptr) {
40-
urQueueRetain(Queue);
41-
}
27+
StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart},
28+
EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} {
29+
urQueueRetain(Queue);
4230
urContextRetain(Context);
4331
}
4432

4533
ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context,
4634
CUevent EventNative)
4735
: CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false},
4836
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
49-
StreamToken{std::numeric_limits<uint32_t>::max()}, EvEnd{EventNative},
50-
EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} {
37+
StreamToken{std::numeric_limits<uint32_t>::max()}, EventID{0},
38+
EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr},
39+
Stream{nullptr}, Context{Context} {
5140
urContextRetain(Context);
5241
}
5342

source/adapters/cuda/event.hpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <cuda.h>
1313
#include <ur/ur.hpp>
1414

15+
#include "common.hpp"
1516
#include "queue.hpp"
1617

1718
/// UR Event mapping to CUevent
@@ -82,8 +83,18 @@ struct ur_event_handle_t_ {
8283
static ur_event_handle_t
8384
makeNative(ur_command_t Type, ur_queue_handle_t Queue, CUstream Stream,
8485
uint32_t StreamToken = std::numeric_limits<uint32_t>::max()) {
85-
return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream,
86-
StreamToken);
86+
const bool ProfilingEnabled =
87+
Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE;
88+
native_type EvEnd = nullptr, EvQueued = nullptr, EvStart = nullptr;
89+
UR_CHECK_ERROR(cuEventCreate(
90+
&EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));
91+
92+
if (ProfilingEnabled) {
93+
UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT));
94+
UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT));
95+
}
96+
return new ur_event_handle_t_(Type, Queue->getContext(), Queue, EvEnd,
97+
EvQueued, EvStart, Stream, StreamToken);
8798
}
8899

89100
static ur_event_handle_t makeWithNative(ur_context_handle_t context,
@@ -99,7 +110,8 @@ struct ur_event_handle_t_ {
99110
// This constructor is private to force programmers to use the makeNative /
100111
// make_user static members in order to create a pi_event for CUDA.
101112
ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context,
102-
ur_queue_handle_t Queue, CUstream Stream,
113+
ur_queue_handle_t Queue, native_type EvEnd,
114+
native_type EvQueued, native_type EvStart, CUstream Stream,
103115
uint32_t StreamToken);
104116

105117
// This constructor is private to force programmers to use the

source/adapters/cuda/image.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format,
234234

235235
ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
236236
const ur_image_desc_t *pImageDesc,
237-
CUDA_RESOURCE_DESC ResourceDesc,
237+
const CUDA_RESOURCE_DESC &ResourceDesc,
238238
ur_exp_image_handle_t *phRetImage) {
239239

240240
try {

source/adapters/cuda/memory.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,15 +190,15 @@ struct ur_mem_handle_t_ {
190190
/// Constructs the UR allocation for an unsampled image object
191191
ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array,
192192
CUsurfObject Surf, ur_mem_type_t ImageType)
193-
: Context{Context}, RefCount{1}, MemType{Type::Surface},
193+
: Context{Context}, RefCount{1}, MemType{Type::Surface}, MemFlags{0},
194194
Mem{ImageMem{Array, (void *)Surf, ImageType, nullptr}} {
195195
urContextRetain(Context);
196196
}
197197

198198
/// Constructs the UR allocation for a sampled image object
199199
ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUtexObject Tex,
200200
ur_sampler_handle_t Sampler, ur_mem_type_t ImageType)
201-
: Context{Context}, RefCount{1}, MemType{Type::Texture},
201+
: Context{Context}, RefCount{1}, MemType{Type::Texture}, MemFlags{0},
202202
Mem{ImageMem{Array, (void *)Tex, ImageType, Sampler}} {
203203
urContextRetain(Context);
204204
}

source/adapters/cuda/program.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
137137

138138
if (!this->BuildOptions.empty()) {
139139
unsigned int MaxRegs;
140-
bool Valid = getMaxRegistersJitOptionValue(BuildOptions, MaxRegs);
140+
const bool Valid =
141+
getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs);
141142
if (Valid) {
142143
Options.push_back(CU_JIT_MAX_REGISTERS);
143144
OptionVals.push_back(reinterpret_cast<void *>(MaxRegs));

source/adapters/cuda/sampler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
1717
std::unique_ptr<ur_sampler_handle_t_> Sampler{
1818
new ur_sampler_handle_t_(hContext)};
1919

20-
if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
20+
if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
2121
Sampler->Props |= pDesc->normalizedCoords;
2222
Sampler->Props |= pDesc->filterMode << 1;
2323
Sampler->Props |= pDesc->addressingMode << 2;

source/adapters/hip/device.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ struct ur_device_handle_t_ {
3333
: HIPDevice(HipDevice), RefCount{1}, Platform(Platform),
3434
HIPContext(Context), DeviceIndex(DeviceIndex) {}
3535

36-
~ur_device_handle_t_() {
36+
~ur_device_handle_t_() noexcept(false) {
3737
UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice));
3838
}
3939

0 commit comments

Comments
 (0)