Skip to content

Commit 4ea7916

Browse files
authored
Merge pull request #1653 from hdelan/fix-hip-context-assumptions
[HIP][CUDA] Don't assume ctx contains all devs in platform
2 parents 7627337 + 82d4d82 commit 4ea7916

File tree

6 files changed

+104
-84
lines changed

6 files changed

+104
-84
lines changed

source/adapters/cuda/context.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ struct ur_context_handle_t_ {
116116
return Devices;
117117
}
118118

119+
// Gets the index of the device relative to other devices in the context
120+
size_t getDeviceIndex(ur_device_handle_t hDevice) {
121+
auto It = std::find(Devices.begin(), Devices.end(), hDevice);
122+
assert(It != Devices.end());
123+
return std::distance(Devices.begin(), It);
124+
}
125+
119126
uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
120127

121128
uint32_t decrementReferenceCount() noexcept { return --RefCount; }

source/adapters/cuda/memory.cpp

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -429,11 +429,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition(
429429
ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
430430
const ur_device_handle_t hDevice) {
431431
ScopedContext Active(hDevice);
432+
auto DeviceIdx = Mem->getContext()->getDeviceIndex(hDevice);
432433
ur_lock LockGuard(Mem->MemoryAllocationMutex);
433434

434435
if (Mem->isBuffer()) {
435436
auto &Buffer = std::get<BufferMem>(Mem->Mem);
436-
auto &DevPtr = Buffer.Ptrs[hDevice->getIndex() % Buffer.Ptrs.size()];
437+
auto &DevPtr = Buffer.Ptrs[DeviceIdx];
437438

438439
// Allocation has already been made
439440
if (DevPtr != BufferMem::native_type{0}) {
@@ -456,11 +457,11 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
456457
try {
457458
auto &Image = std::get<SurfaceMem>(Mem->Mem);
458459
// Allocation has already been made
459-
if (Image.Arrays[hDevice->getIndex() % Image.Arrays.size()]) {
460+
if (Image.Arrays[DeviceIdx]) {
460461
return UR_RESULT_SUCCESS;
461462
}
462463
UR_CHECK_ERROR(cuArray3DCreate(&ImageArray, &Image.ArrayDesc));
463-
Image.Arrays[hDevice->getIndex() % Image.Arrays.size()] = ImageArray;
464+
Image.Arrays[DeviceIdx] = ImageArray;
464465

465466
// CUDA_RESOURCE_DESC is a union of different structs, shown here
466467
// https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXOBJECT.html
@@ -475,7 +476,7 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
475476
ImageResDesc.flags = 0;
476477

477478
UR_CHECK_ERROR(cuSurfObjectCreate(&Surface, &ImageResDesc));
478-
Image.SurfObjs[hDevice->getIndex() % Image.SurfObjs.size()] = Surface;
479+
Image.SurfObjs[DeviceIdx] = Surface;
479480
} catch (ur_result_t Err) {
480481
if (ImageArray) {
481482
UR_CHECK_ERROR(cuArrayDestroy(ImageArray));
@@ -590,9 +591,8 @@ ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t Mem,
590591
UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
591592
// Device allocation has already been initialized with most up to date
592593
// data in buffer
593-
if (Mem->HaveMigratedToDeviceSinceLastWrite
594-
[hDevice->getIndex() %
595-
Mem->HaveMigratedToDeviceSinceLastWrite.size()]) {
594+
if (Mem->HaveMigratedToDeviceSinceLastWrite[Mem->getContext()->getDeviceIndex(
595+
hDevice)]) {
596596
return UR_RESULT_SUCCESS;
597597
}
598598

@@ -603,8 +603,35 @@ ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t Mem,
603603
UR_CHECK_ERROR(migrateImageToDevice(Mem, hDevice));
604604
}
605605

606-
Mem->HaveMigratedToDeviceSinceLastWrite
607-
[hDevice->getIndex() % Mem->HaveMigratedToDeviceSinceLastWrite.size()] =
608-
true;
606+
Mem->HaveMigratedToDeviceSinceLastWrite[Mem->getContext()->getDeviceIndex(
607+
hDevice)] = true;
609608
return UR_RESULT_SUCCESS;
610609
}
610+
611+
BufferMem::native_type
612+
BufferMem::getPtrWithOffset(const ur_device_handle_t Device, size_t Offset) {
613+
if (ur_result_t Err = allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
614+
Err != UR_RESULT_SUCCESS) {
615+
throw Err;
616+
}
617+
return reinterpret_cast<native_type>(
618+
reinterpret_cast<uint8_t *>(
619+
Ptrs[OuterMemStruct->getContext()->getDeviceIndex(Device)]) +
620+
Offset);
621+
}
622+
623+
CUarray SurfaceMem::getArray(const ur_device_handle_t Device) {
624+
if (ur_result_t Err = allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
625+
Err != UR_RESULT_SUCCESS) {
626+
throw Err;
627+
}
628+
return Arrays[OuterMemStruct->getContext()->getDeviceIndex(Device)];
629+
}
630+
631+
CUsurfObject SurfaceMem::getSurface(const ur_device_handle_t Device) {
632+
if (ur_result_t Err = allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
633+
Err != UR_RESULT_SUCCESS) {
634+
throw Err;
635+
}
636+
return SurfObjs[OuterMemStruct->getContext()->getDeviceIndex(Device)];
637+
}

source/adapters/cuda/memory.hpp

Lines changed: 9 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,6 @@
2020
#include "device.hpp"
2121
#include "event.hpp"
2222

23-
ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t,
24-
const ur_device_handle_t);
25-
ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t,
26-
const ur_device_handle_t);
27-
2823
// Handler for plain, pointer-based CUDA allocations
2924
struct BufferMem {
3025

@@ -97,16 +92,7 @@ struct BufferMem {
9792

9893
BufferMem(const BufferMem &Buffer) = default;
9994

100-
native_type getPtrWithOffset(const ur_device_handle_t Device, size_t Offset) {
101-
if (ur_result_t Err =
102-
allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
103-
Err != UR_RESULT_SUCCESS) {
104-
throw Err;
105-
}
106-
return reinterpret_cast<native_type>(
107-
reinterpret_cast<uint8_t *>(Ptrs[Device->getIndex() % Ptrs.size()]) +
108-
Offset);
109-
}
95+
native_type getPtrWithOffset(const ur_device_handle_t Device, size_t Offset);
11096

11197
native_type getPtr(const ur_device_handle_t Device) {
11298
return getPtrWithOffset(Device, 0);
@@ -269,23 +255,10 @@ struct SurfaceMem {
269255
}
270256

271257
// Will allocate a new array on device if not already allocated
272-
CUarray getArray(const ur_device_handle_t Device) {
273-
if (ur_result_t Err =
274-
allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
275-
Err != UR_RESULT_SUCCESS) {
276-
throw Err;
277-
}
278-
return Arrays[Device->getIndex() % Arrays.size()];
279-
}
258+
CUarray getArray(const ur_device_handle_t Device);
259+
280260
// Will allocate a new surface on device if not already allocated
281-
CUsurfObject getSurface(const ur_device_handle_t Device) {
282-
if (ur_result_t Err =
283-
allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
284-
Err != UR_RESULT_SUCCESS) {
285-
throw Err;
286-
}
287-
return SurfObjs[Device->getIndex() % SurfObjs.size()];
288-
}
261+
CUsurfObject getSurface(const ur_device_handle_t Device);
289262

290263
ur_mem_type_t getType() { return ImageDesc.type; }
291264

@@ -515,9 +488,11 @@ struct ur_mem_handle_t_ {
515488
for (const auto &Device : Context->getDevices()) {
516489
// This event is never an interop event so will always have an associated
517490
// queue
518-
HaveMigratedToDeviceSinceLastWrite
519-
[Device->getIndex() % HaveMigratedToDeviceSinceLastWrite.size()] =
520-
Device == NewEvent->getQueue()->getDevice();
491+
HaveMigratedToDeviceSinceLastWrite[Context->getDeviceIndex(Device)] =
492+
Device == NewEvent->getQueue()->getDevice();
521493
}
522494
}
523495
};
496+
497+
ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t,
498+
const ur_device_handle_t);

source/adapters/hip/context.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@ struct ur_context_handle_t_ {
112112
return Devices;
113113
}
114114

115+
// Gets the index of the device relative to other devices in the context
116+
size_t getDeviceIndex(ur_device_handle_t hDevice) {
117+
auto It = std::find(Devices.begin(), Devices.end(), hDevice);
118+
assert(It != Devices.end());
119+
return std::distance(Devices.begin(), It);
120+
}
121+
115122
uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
116123

117124
uint32_t decrementReferenceCount() noexcept { return --RefCount; }

source/adapters/hip/memory.cpp

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -455,11 +455,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) {
455455
ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
456456
const ur_device_handle_t hDevice) {
457457
ScopedContext Active(hDevice);
458+
auto DeviceIdx = Mem->getContext()->getDeviceIndex(hDevice);
458459
ur_lock LockGuard(Mem->MemoryAllocationMutex);
459460

460461
if (Mem->isBuffer()) {
461462
auto &Buffer = std::get<BufferMem>(Mem->Mem);
462-
hipDeviceptr_t &DevPtr = Buffer.Ptrs[hDevice->getIndex()];
463+
hipDeviceptr_t &DevPtr = Buffer.Ptrs[DeviceIdx];
463464

464465
// Allocation has already been made
465466
if (DevPtr != BufferMem::native_type{0}) {
@@ -482,12 +483,12 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
482483
try {
483484
auto &Image = std::get<SurfaceMem>(Mem->Mem);
484485
// Allocation has already been made
485-
if (Image.Arrays[hDevice->getIndex()]) {
486+
if (Image.Arrays[DeviceIdx]) {
486487
return UR_RESULT_SUCCESS;
487488
}
488489
UR_CHECK_ERROR(hipArray3DCreate(
489490
reinterpret_cast<hipCUarray *>(&ImageArray), &Image.ArrayDesc));
490-
Image.Arrays[hDevice->getIndex()] = ImageArray;
491+
Image.Arrays[DeviceIdx] = ImageArray;
491492
// HIP_RESOURCE_DESC is a union of different structs, shown here
492493
// We need to fill it as described here to use it for a surface or texture
493494
// HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and
@@ -499,7 +500,7 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
499500
ImageResDesc.resType = hipResourceTypeArray;
500501

501502
UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc));
502-
Image.SurfObjs[hDevice->getIndex()] = Surface;
503+
Image.SurfObjs[DeviceIdx] = Surface;
503504
} catch (ur_result_t Err) {
504505
if (ImageArray) {
505506
UR_CHECK_ERROR(hipFreeArray(ImageArray));
@@ -608,11 +609,11 @@ inline ur_result_t migrateImageToDevice(ur_mem_handle_t Mem,
608609
ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t Mem,
609610
const ur_device_handle_t hDevice) {
610611
UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
612+
auto DeviceIdx = Mem->getContext()->getDeviceIndex(hDevice);
611613
// Device allocation has already been initialized with most up to date
612614
// data in buffer
613-
if (Mem->HaveMigratedToDeviceSinceLastWrite[hDevice->getIndex()]) {
615+
if (Mem->HaveMigratedToDeviceSinceLastWrite[DeviceIdx])
614616
return UR_RESULT_SUCCESS;
615-
}
616617

617618
ScopedContext Active(hDevice);
618619
if (Mem->isBuffer()) {
@@ -621,6 +622,34 @@ ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t Mem,
621622
UR_CHECK_ERROR(migrateImageToDevice(Mem, hDevice));
622623
}
623624

624-
Mem->HaveMigratedToDeviceSinceLastWrite[hDevice->getIndex()] = true;
625+
Mem->HaveMigratedToDeviceSinceLastWrite[DeviceIdx] = true;
625626
return UR_RESULT_SUCCESS;
626627
}
628+
629+
BufferMem::native_type
630+
BufferMem::getPtrWithOffset(const ur_device_handle_t Device, size_t Offset) {
631+
if (ur_result_t Err = allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
632+
Err != UR_RESULT_SUCCESS) {
633+
throw Err;
634+
}
635+
return reinterpret_cast<native_type>(
636+
reinterpret_cast<uint8_t *>(
637+
Ptrs[OuterMemStruct->getContext()->getDeviceIndex(Device)]) +
638+
Offset);
639+
}
640+
641+
hipArray *SurfaceMem::getArray(const ur_device_handle_t Device) {
642+
if (ur_result_t Err = allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
643+
Err != UR_RESULT_SUCCESS) {
644+
throw Err;
645+
}
646+
return Arrays[OuterMemStruct->getContext()->getDeviceIndex(Device)];
647+
}
648+
649+
hipSurfaceObject_t SurfaceMem::getSurface(const ur_device_handle_t Device) {
650+
if (ur_result_t Err = allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
651+
Err != UR_RESULT_SUCCESS) {
652+
throw Err;
653+
}
654+
return SurfObjs[OuterMemStruct->getContext()->getDeviceIndex(Device)];
655+
}

source/adapters/hip/memory.hpp

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,14 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12+
#include "common.hpp"
1213
#include "context.hpp"
1314
#include "event.hpp"
1415
#include <cassert>
1516
#include <memory>
1617
#include <unordered_map>
1718
#include <variant>
1819

19-
#include "common.hpp"
20-
21-
ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t,
22-
const ur_device_handle_t);
23-
ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t,
24-
const ur_device_handle_t);
25-
2620
// Handler for plain, pointer-based HIP allocations
2721
struct BufferMem {
2822
struct BufferMap {
@@ -95,15 +89,7 @@ struct BufferMem {
9589

9690
// This will allocate memory on device with index Index if there isn't already
9791
// an active allocation on the device
98-
native_type getPtrWithOffset(const ur_device_handle_t Device, size_t Offset) {
99-
if (ur_result_t Err =
100-
allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
101-
Err != UR_RESULT_SUCCESS) {
102-
throw Err;
103-
}
104-
return reinterpret_cast<native_type>(
105-
reinterpret_cast<uint8_t *>(Ptrs[Device->getIndex()]) + Offset);
106-
}
92+
native_type getPtrWithOffset(const ur_device_handle_t Device, size_t Offset);
10793

10894
// This will allocate memory on device if there isn't already an active
10995
// allocation on the device
@@ -260,24 +246,10 @@ struct SurfaceMem {
260246
}
261247

262248
// Will allocate a new array on device if not already allocated
263-
hipArray *getArray(const ur_device_handle_t Device) {
264-
if (ur_result_t Err =
265-
allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
266-
Err != UR_RESULT_SUCCESS) {
267-
throw Err;
268-
}
269-
return Arrays[Device->getIndex()];
270-
}
249+
hipArray *getArray(const ur_device_handle_t Device);
271250

272251
// Will allocate a new surface on device if not already allocated
273-
hipSurfaceObject_t getSurface(const ur_device_handle_t Device) {
274-
if (ur_result_t Err =
275-
allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device);
276-
Err != UR_RESULT_SUCCESS) {
277-
throw Err;
278-
}
279-
return SurfObjs[Device->getIndex()];
280-
}
252+
hipSurfaceObject_t getSurface(const ur_device_handle_t Device);
281253

282254
ur_mem_type_t getImageType() const noexcept { return ImageDesc.type; }
283255

@@ -510,8 +482,11 @@ struct ur_mem_handle_t_ {
510482
urEventRetain(NewEvent);
511483
LastEventWritingToMemObj = NewEvent;
512484
for (const auto &Device : Context->getDevices()) {
513-
HaveMigratedToDeviceSinceLastWrite[Device->getIndex()] =
485+
HaveMigratedToDeviceSinceLastWrite[Context->getDeviceIndex(Device)] =
514486
Device == NewEvent->getQueue()->getDevice();
515487
}
516488
}
517489
};
490+
491+
ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t,
492+
const ur_device_handle_t);

0 commit comments

Comments
 (0)