Skip to content

Commit 975313c

Browse files
authored
Merge pull request #1774 from hdelan/readytomerge-branch
[UR][HIP][CUDA] Master Branch
2 parents ded4b88 + 836056e commit 975313c

29 files changed

+287
-212
lines changed

source/adapters/cuda/adapter.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,12 @@ class ur_legacy_sink : public logger::Sink {
3636

3737
~ur_legacy_sink() = default;
3838
};
39+
40+
// FIXME: Remove the default log level when querying logging info is supported
41+
// through UR entry points. See #1330.
3942
ur_adapter_handle_t_::ur_adapter_handle_t_()
40-
: logger(logger::get_logger("cuda")) {
43+
: logger(logger::get_logger("cuda",
44+
/*default_log_level*/ logger::Level::ERR)) {
4145

4246
if (std::getenv("UR_LOG_CUDA") != nullptr)
4347
return;

source/adapters/cuda/context.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle(
142142
}
143143

144144
UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
145-
ur_native_handle_t hNativeContext, uint32_t numDevices,
146-
const ur_device_handle_t *phDevices,
147-
const ur_context_native_properties_t *pProperties,
148-
ur_context_handle_t *phContext) {
149-
std::ignore = hNativeContext;
150-
std::ignore = numDevices;
151-
std::ignore = phDevices;
152-
std::ignore = pProperties;
153-
std::ignore = phContext;
154-
145+
[[maybe_unused]] ur_native_handle_t hNativeContext,
146+
[[maybe_unused]] uint32_t numDevices,
147+
[[maybe_unused]] const ur_device_handle_t *phDevices,
148+
[[maybe_unused]] const ur_context_native_properties_t *pProperties,
149+
[[maybe_unused]] ur_context_handle_t *phContext) {
155150
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
156151
}
157152

source/adapters/cuda/event.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context,
3636
CUevent EventNative)
3737
: CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false},
3838
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
39-
StreamToken{std::numeric_limits<uint32_t>::max()}, EventID{0},
40-
EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr},
41-
Stream{nullptr}, Context{Context} {
39+
IsInterop{true}, StreamToken{std::numeric_limits<uint32_t>::max()},
40+
EventID{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr},
41+
Queue{nullptr}, Stream{nullptr}, Context{Context} {
4242
urContextRetain(Context);
4343
}
4444

source/adapters/cuda/event.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ struct ur_event_handle_t_ {
4545

4646
bool isCompleted() const noexcept;
4747

48+
bool isInterop() const noexcept { return IsInterop; };
49+
4850
uint32_t getExecutionStatus() const noexcept {
4951

5052
if (!isRecorded()) {
@@ -141,6 +143,8 @@ struct ur_event_handle_t_ {
141143
bool IsStarted; // Signifies wether the operation associated with the
142144
// UR event has started or not
143145

146+
const bool IsInterop{false}; // Made with urEventCreateWithNativeHandle
147+
144148
uint32_t StreamToken;
145149
uint32_t EventID; // Queue identifier of the event.
146150

@@ -195,7 +199,8 @@ ur_result_t forLatestEvents(const ur_event_handle_t *EventWaitList,
195199
CUstream LastSeenStream = 0;
196200
for (size_t i = 0; i < Events.size(); i++) {
197201
auto Event = Events[i];
198-
if (!Event || (i != 0 && Event->getStream() == LastSeenStream)) {
202+
if (!Event || (i != 0 && !Event->isInterop() &&
203+
Event->getStream() == LastSeenStream)) {
199204
continue;
200205
}
201206

source/adapters/cuda/memory.cpp

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
5656

5757
auto URMemObj = std::unique_ptr<ur_mem_handle_t_>(
5858
new ur_mem_handle_t_{hContext, flags, AllocMode, HostPtr, size});
59-
if (URMemObj == nullptr) {
60-
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
61-
}
6259

6360
// First allocation will be made at urMemBufferCreate if context only
6461
// has one device
@@ -74,6 +71,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
7471
MemObj = URMemObj.release();
7572
} catch (ur_result_t Err) {
7673
return Err;
74+
} catch (std::bad_alloc &) {
75+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
7776
} catch (...) {
7877
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
7978
}
@@ -102,15 +101,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
102101
return UR_RESULT_SUCCESS;
103102
}
104103

105-
// make sure hMem is released in case checkErrorUR throws
104+
// Call destructor
106105
std::unique_ptr<ur_mem_handle_t_> MemObjPtr(hMem);
107106

108-
if (hMem->isSubBuffer()) {
109-
return UR_RESULT_SUCCESS;
110-
}
111-
112-
UR_CHECK_ERROR(hMem->clear());
113-
114107
} catch (ur_result_t Err) {
115108
Result = Err;
116109
} catch (...) {
@@ -230,13 +223,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate(
230223
UR_ASSERT(pImageFormat->channelOrder == UR_IMAGE_CHANNEL_ORDER_RGBA,
231224
UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT);
232225

233-
auto URMemObj = std::unique_ptr<ur_mem_handle_t_>(
234-
new ur_mem_handle_t_{hContext, flags, *pImageFormat, *pImageDesc, pHost});
235-
236-
UR_ASSERT(std::get<SurfaceMem>(URMemObj->Mem).PixelTypeSizeBytes,
237-
UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT);
238-
239226
try {
227+
auto URMemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_{
228+
hContext, flags, *pImageFormat, *pImageDesc, pHost});
229+
UR_ASSERT(std::get<SurfaceMem>(URMemObj->Mem).PixelTypeSizeBytes,
230+
UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT);
231+
240232
if (PerformInitialCopy) {
241233
for (const auto &Device : hContext->getDevices()) {
242234
// Synchronous behaviour is best in this case
@@ -248,16 +240,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate(
248240
}
249241
}
250242

251-
if (URMemObj == nullptr) {
252-
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
253-
}
254-
255243
*phMem = URMemObj.release();
256244
} catch (ur_result_t Err) {
257-
(*phMem)->clear();
258245
return Err;
246+
} catch (std::bad_alloc &) {
247+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
259248
} catch (...) {
260-
(*phMem)->clear();
261249
return UR_RESULT_ERROR_UNKNOWN;
262250
}
263251
return UR_RESULT_SUCCESS;

source/adapters/cuda/memory.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ struct ur_mem_handle_t_ {
394394
}
395395

396396
~ur_mem_handle_t_() {
397+
clear();
397398
if (isBuffer() && isSubBuffer()) {
398399
urMemRelease(std::get<BufferMem>(Mem).Parent);
399400
return;

source/adapters/cuda/physical_mem.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate(
3232
default:
3333
UR_CHECK_ERROR(Result);
3434
}
35-
*phPhysicalMem = new ur_physical_mem_handle_t_(ResHandle, hContext, hDevice);
36-
35+
try {
36+
*phPhysicalMem =
37+
new ur_physical_mem_handle_t_(ResHandle, hContext, hDevice);
38+
} catch (std::bad_alloc &) {
39+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
40+
} catch (...) {
41+
return UR_RESULT_ERROR_UNKNOWN;
42+
}
3743
return UR_RESULT_SUCCESS;
3844
}
3945

@@ -53,10 +59,10 @@ urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
5359

5460
ScopedContext Active(hPhysicalMem->getDevice());
5561
UR_CHECK_ERROR(cuMemRelease(hPhysicalMem->get()));
56-
return UR_RESULT_SUCCESS;
5762
} catch (ur_result_t err) {
5863
return err;
5964
} catch (...) {
6065
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
6166
}
67+
return UR_RESULT_SUCCESS;
6268
}

source/adapters/cuda/program.cpp

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -187,23 +187,30 @@ ur_result_t createProgram(ur_context_handle_t hContext,
187187
UR_RESULT_ERROR_INVALID_CONTEXT);
188188
UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE);
189189

190-
std::unique_ptr<ur_program_handle_t_> RetProgram{
191-
new ur_program_handle_t_{hContext, hDevice}};
192-
193-
if (pProperties) {
194-
if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) {
195-
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
196-
} else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr) {
197-
return UR_RESULT_ERROR_INVALID_SIZE;
190+
try {
191+
std::unique_ptr<ur_program_handle_t_> RetProgram{
192+
new ur_program_handle_t_{hContext, hDevice}};
193+
194+
if (pProperties) {
195+
if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) {
196+
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
197+
} else if (pProperties->count == 0 &&
198+
pProperties->pMetadatas != nullptr) {
199+
return UR_RESULT_ERROR_INVALID_SIZE;
200+
}
201+
UR_CHECK_ERROR(
202+
RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count));
198203
}
199-
UR_CHECK_ERROR(
200-
RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count));
201-
}
202204

203-
auto pBinary_string = reinterpret_cast<const char *>(pBinary);
205+
auto pBinary_string = reinterpret_cast<const char *>(pBinary);
204206

205-
UR_CHECK_ERROR(RetProgram->setBinary(pBinary_string, size));
206-
*phProgram = RetProgram.release();
207+
UR_CHECK_ERROR(RetProgram->setBinary(pBinary_string, size));
208+
*phProgram = RetProgram.release();
209+
} catch (std::bad_alloc &) {
210+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
211+
} catch (...) {
212+
return UR_RESULT_ERROR_UNKNOWN;
213+
}
207214

208215
return UR_RESULT_SUCCESS;
209216
}
@@ -317,6 +324,8 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count,
317324

318325
} catch (ur_result_t Err) {
319326
Result = Err;
327+
} catch (std::bad_alloc &) {
328+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
320329
}
321330
return Result;
322331
}
@@ -345,16 +354,24 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice,
345354
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
346355

347356
switch (propName) {
348-
case UR_PROGRAM_BUILD_INFO_STATUS: {
357+
case UR_PROGRAM_BUILD_INFO_STATUS:
349358
return ReturnValue(hProgram->BuildStatus);
350-
}
351359
case UR_PROGRAM_BUILD_INFO_OPTIONS:
352360
return ReturnValue(hProgram->BuildOptions.c_str());
353-
case UR_PROGRAM_BUILD_INFO_LOG:
354-
return ReturnValue(hProgram->InfoLog, hProgram->MaxLogSize);
355-
case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: {
356-
return ReturnValue(hProgram->BinaryType);
361+
case UR_PROGRAM_BUILD_INFO_LOG: {
362+
// We only know the maximum log length, which CUDA guarantees will include
363+
// the null terminator.
364+
// To determine the actual length of the log, search for the first
365+
// null terminator, not searching past the known maximum. If that does find
366+
// one, it will return the length excluding the null terminator, so remember
367+
// to include that.
368+
auto LogLen =
369+
std::min(hProgram->MaxLogSize,
370+
strnlen(hProgram->InfoLog, hProgram->MaxLogSize) + 1);
371+
return ReturnValue(hProgram->InfoLog, LogLen);
357372
}
373+
case UR_PROGRAM_BUILD_INFO_BINARY_TYPE:
374+
return ReturnValue(hProgram->BinaryType);
358375
default:
359376
break;
360377
}

source/adapters/cuda/queue.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,11 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,
167167

168168
return UR_RESULT_SUCCESS;
169169
} catch (ur_result_t Err) {
170-
171170
return Err;
172-
171+
} catch (std::bad_alloc &) {
172+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
173173
} catch (...) {
174-
175-
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
174+
return UR_RESULT_ERROR_UNKNOWN;
176175
}
177176
}
178177

source/adapters/cuda/sampler.cpp

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,46 +14,55 @@
1414
UR_APIEXPORT ur_result_t UR_APICALL
1515
urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
1616
ur_sampler_handle_t *phSampler) {
17-
std::unique_ptr<ur_sampler_handle_t_> Sampler{
18-
new ur_sampler_handle_t_(hContext)};
17+
try {
18+
std::unique_ptr<ur_sampler_handle_t_> Sampler{
19+
new ur_sampler_handle_t_(hContext)};
1920

20-
if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
21-
Sampler->Props |= static_cast<uint32_t>(pDesc->normalizedCoords);
22-
Sampler->Props |= pDesc->filterMode << 1;
23-
Sampler->Props |= pDesc->addressingMode << 2;
24-
} else {
25-
// Set default values
26-
Sampler->Props |= true; // Normalized Coords
27-
Sampler->Props |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2;
28-
}
21+
if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
22+
Sampler->Props |= static_cast<uint32_t>(pDesc->normalizedCoords);
23+
Sampler->Props |= pDesc->filterMode << 1;
24+
Sampler->Props |= pDesc->addressingMode << 2;
25+
} else {
26+
// Set default values
27+
Sampler->Props |= true; // Normalized Coords
28+
Sampler->Props |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2;
29+
}
2930

30-
void *pNext = const_cast<void *>(pDesc->pNext);
31-
while (pNext != nullptr) {
32-
const ur_base_desc_t *BaseDesc =
33-
reinterpret_cast<const ur_base_desc_t *>(pNext);
34-
if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) {
35-
const ur_exp_sampler_mip_properties_t *SamplerMipProperties =
36-
reinterpret_cast<const ur_exp_sampler_mip_properties_t *>(pNext);
37-
Sampler->MaxMipmapLevelClamp = SamplerMipProperties->maxMipmapLevelClamp;
38-
Sampler->MinMipmapLevelClamp = SamplerMipProperties->minMipmapLevelClamp;
39-
Sampler->MaxAnisotropy = SamplerMipProperties->maxAnisotropy;
40-
Sampler->Props |= SamplerMipProperties->mipFilterMode << 11;
41-
} else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES) {
42-
const ur_exp_sampler_addr_modes_t *SamplerAddrModes =
43-
reinterpret_cast<const ur_exp_sampler_addr_modes_t *>(pNext);
44-
Sampler->Props |= SamplerAddrModes->addrModes[0] << 2;
45-
Sampler->Props |= SamplerAddrModes->addrModes[1] << 5;
46-
Sampler->Props |= SamplerAddrModes->addrModes[2] << 8;
47-
} else if (BaseDesc->stype ==
48-
UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES) {
49-
const ur_exp_sampler_cubemap_properties_t *SamplerCubemapProperties =
50-
reinterpret_cast<const ur_exp_sampler_cubemap_properties_t *>(pNext);
51-
Sampler->Props |= SamplerCubemapProperties->cubemapFilterMode << 12;
31+
void *pNext = const_cast<void *>(pDesc->pNext);
32+
while (pNext != nullptr) {
33+
const ur_base_desc_t *BaseDesc =
34+
reinterpret_cast<const ur_base_desc_t *>(pNext);
35+
if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) {
36+
const ur_exp_sampler_mip_properties_t *SamplerMipProperties =
37+
reinterpret_cast<const ur_exp_sampler_mip_properties_t *>(pNext);
38+
Sampler->MaxMipmapLevelClamp =
39+
SamplerMipProperties->maxMipmapLevelClamp;
40+
Sampler->MinMipmapLevelClamp =
41+
SamplerMipProperties->minMipmapLevelClamp;
42+
Sampler->MaxAnisotropy = SamplerMipProperties->maxAnisotropy;
43+
Sampler->Props |= SamplerMipProperties->mipFilterMode << 11;
44+
} else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES) {
45+
const ur_exp_sampler_addr_modes_t *SamplerAddrModes =
46+
reinterpret_cast<const ur_exp_sampler_addr_modes_t *>(pNext);
47+
Sampler->Props |= SamplerAddrModes->addrModes[0] << 2;
48+
Sampler->Props |= SamplerAddrModes->addrModes[1] << 5;
49+
Sampler->Props |= SamplerAddrModes->addrModes[2] << 8;
50+
} else if (BaseDesc->stype ==
51+
UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES) {
52+
const ur_exp_sampler_cubemap_properties_t *SamplerCubemapProperties =
53+
reinterpret_cast<const ur_exp_sampler_cubemap_properties_t *>(
54+
pNext);
55+
Sampler->Props |= SamplerCubemapProperties->cubemapFilterMode << 12;
56+
}
57+
pNext = const_cast<void *>(BaseDesc->pNext);
5258
}
53-
pNext = const_cast<void *>(BaseDesc->pNext);
54-
}
5559

56-
*phSampler = Sampler.release();
60+
*phSampler = Sampler.release();
61+
} catch (std::bad_alloc &) {
62+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
63+
} catch (...) {
64+
return UR_RESULT_ERROR_UNKNOWN;
65+
}
5766
return UR_RESULT_SUCCESS;
5867
}
5968

0 commit comments

Comments
 (0)