@@ -28,26 +28,26 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
28
28
// /
29
29
// / One of the main differences between the UR API and the HIP driver API is
30
30
// / that the second modifies the state of the threads by assigning
31
- // / ` hipCtx_t` objects to threads. ` hipCtx_t` objects store data associated
31
+ // / \c hipCtx_t objects to threads. \c hipCtx_t objects store data associated
32
32
// / with a given device and control access to said device from the user side.
33
33
// / UR API context are objects that are passed to functions, and not bound
34
34
// / to threads.
35
- // / The ur_context_handle_t_ object doesn't implement this behavior. It only
36
- // / holds the HIP context data. The RAII object \ref ScopedContext implements
37
- // / the active context behavior.
38
35
// /
39
- // / <b> Primary vs UserDefined context </b>
36
+ // / Since the \c ur_context_handle_t can contain multiple devices, and a \c
37
+ // / hipCtx_t refers to only a single device, the \c hipCtx_t is more tightly
38
+ // / coupled to a \c ur_device_handle_t than a \c ur_context_handle_t. In order
39
+ // / to remove some ambiguities about the different semantics of \c
40
+ // / \c ur_context_handle_t and native \c hipCtx_t, we access the native \c
41
+ // / hipCtx_t solely through the \c ur_device_handle_t class, by using the object
42
+ // / \ref ScopedContext, which sets the active device (by setting the active
43
+ // / native \c hipCtx_t).
40
44
// /
41
- // / HIP has two different types of context, the Primary context,
42
- // / which is usable by all threads on a given process for a given device, and
43
- // / the aforementioned custom contexts.
44
- // / The HIP documentation, and performance analysis, suggest using the Primary
45
- // / context whenever possible. The Primary context is also used by the HIP
46
- // / Runtime API. For UR applications to interop with HIP Runtime API, they have
47
- // / to use the primary context - and make that active in the thread. The
48
- // / `ur_context_handle_t_` object can be constructed with a `kind` parameter
49
- // / that allows to construct a Primary or `UserDefined` context, so that
50
- // / the UR object interface is always the same.
45
+ // / <b> Primary vs User-defined \c hipCtx_t </b>
46
+ // /
47
+ // / HIP has two different types of \c hipCtx_t, the Primary context, which is
48
+ // / usable by all threads on a given process for a given device, and the
49
+ // / aforementioned custom \c hipCtx_t s. The HIP documentation, confirmed with
50
+ // / performance analysis, suggest using the Primary context whenever possible.
51
51
// /
52
52
// / <b> Destructor callback </b>
53
53
// /
@@ -57,6 +57,16 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
57
57
// / See proposal for details.
58
58
// / https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
59
59
// /
60
+ // / <b> Memory Management for Devices in a Context <\b>
61
+ // /
62
+ // / A \c ur_mem_handle_t is associated with a \c ur_context_handle_t_, which
63
+ // / may refer to multiple devices. Therefore the \c ur_mem_handle_t must
64
+ // / handle a native allocation for each device in the context. UR is
65
+ // / responsible for automatically handling event dependencies for kernels
66
+ // / writing to or reading from the same \c ur_mem_handle_t and migrating memory
67
+ // / between native allocations for devices in the same \c ur_context_handle_t_
68
+ // / if necessary.
69
+ // /
60
70
struct ur_context_handle_t_ {
61
71
62
72
struct deleter_data {
@@ -68,15 +78,22 @@ struct ur_context_handle_t_ {
68
78
69
79
using native_type = hipCtx_t;
70
80
71
- ur_device_handle_t DeviceId;
81
+ std::vector<ur_device_handle_t > Devices;
82
+
72
83
std::atomic_uint32_t RefCount;
73
84
74
- ur_context_handle_t_ (ur_device_handle_t DevId)
75
- : DeviceId{DevId}, RefCount{1 } {
76
- urDeviceRetain (DeviceId);
85
+ ur_context_handle_t_ (const ur_device_handle_t *Devs, uint32_t NumDevices)
86
+ : Devices{Devs, Devs + NumDevices}, RefCount{1 } {
87
+ for (auto &Dev : Devices) {
88
+ urDeviceRetain (Dev);
89
+ }
77
90
};
78
91
79
- ~ur_context_handle_t_ () { urDeviceRelease (DeviceId); }
92
+ ~ur_context_handle_t_ () {
93
+ for (auto &Dev : Devices) {
94
+ urDeviceRelease (Dev);
95
+ }
96
+ }
80
97
81
98
void invokeExtendedDeleters () {
82
99
std::lock_guard<std::mutex> Guard (Mutex);
@@ -91,7 +108,9 @@ struct ur_context_handle_t_ {
91
108
ExtendedDeleters.emplace_back (deleter_data{Function, UserData});
92
109
}
93
110
94
- ur_device_handle_t getDevice () const noexcept { return DeviceId; }
111
+ const std::vector<ur_device_handle_t > &getDevices () const noexcept {
112
+ return Devices;
113
+ }
95
114
96
115
uint32_t incrementReferenceCount () noexcept { return ++RefCount; }
97
116
0 commit comments