@@ -68,22 +68,33 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
68
68
{
69
69
std::lock_guard<std::mutex> Lock (NewAlloc.MInitEventMutex );
70
70
ur_event_handle_t InitEvent;
71
- // C++ guarantees members appear in memory in the order they are declared,
72
- // so since the member variable that contains the initial contents of the
73
- // device_global is right after the usm_ptr member variable we can do
74
- // some pointer arithmetic to memcopy over this value to the usm_ptr. This
75
- // value inside of the device_global will be zero-initialized if it was not
76
- // given a value on construction.
77
-
78
- MemoryManager::copy_usm (reinterpret_cast <const void *>(
79
- reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
80
- sizeof (MDeviceGlobalPtr)),
81
- QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr ,
82
- std::vector<ur_event_handle_t >{}, &InitEvent);
71
+ if (MDeviceGlobalPtr) {
72
+ // C++ guarantees members appear in memory in the order they are declared,
73
+ // so since the member variable that contains the initial contents of the
74
+ // device_global is right after the usm_ptr member variable we can do
75
+ // some pointer arithmetic to memcopy over this value to the usm_ptr. This
76
+ // value inside of the device_global will be zero-initialized if it was
77
+ // not given a value on construction.
78
+ MemoryManager::copy_usm (
79
+ reinterpret_cast <const void *>(
80
+ reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
81
+ sizeof (MDeviceGlobalPtr)),
82
+ QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr ,
83
+ std::vector<ur_event_handle_t >{}, &InitEvent);
84
+ } else {
85
+ // For SYCLBIN device globals we do not have a host pointer to copy from,
86
+ // so instead we fill the USM memory with 0's.
87
+ MemoryManager::fill_usm (NewAlloc.MPtr , QueueImpl, MDeviceGlobalTSize,
88
+ {static_cast <unsigned char >(0 )}, {}, &InitEvent);
89
+ }
83
90
NewAlloc.MInitEvent = InitEvent;
84
91
}
85
92
86
- CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
93
+ // Only device globals with host variables need to be registered with the
94
+ // context. The rest will be managed by their kernel bundles and cleaned up
95
+ // accordingly.
96
+ if (MDeviceGlobalPtr)
97
+ CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
87
98
return NewAlloc;
88
99
}
89
100
@@ -111,19 +122,32 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
111
122
" USM allocation for device and context already happened." );
112
123
DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first ->second ;
113
124
114
- // C++ guarantees members appear in memory in the order they are declared,
115
- // so since the member variable that contains the initial contents of the
116
- // device_global is right after the usm_ptr member variable we can do
117
- // some pointer arithmetic to memcopy over this value to the usm_ptr. This
118
- // value inside of the device_global will be zero-initialized if it was not
119
- // given a value on construction.
120
- MemoryManager::context_copy_usm (
121
- reinterpret_cast <const void *>(
122
- reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
123
- sizeof (MDeviceGlobalPtr)),
124
- &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr );
125
-
126
- CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
125
+ if (MDeviceGlobalPtr) {
126
+ // C++ guarantees members appear in memory in the order they are declared,
127
+ // so since the member variable that contains the initial contents of the
128
+ // device_global is right after the usm_ptr member variable we can do
129
+ // some pointer arithmetic to memcopy over this value to the usm_ptr. This
130
+ // value inside of the device_global will be zero-initialized if it was not
131
+ // given a value on construction.
132
+ MemoryManager::context_copy_usm (
133
+ reinterpret_cast <const void *>(
134
+ reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
135
+ sizeof (MDeviceGlobalPtr)),
136
+ &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr );
137
+ } else {
138
+ // For SYCLBIN device globals we do not have a host pointer to copy from,
139
+ // so instead we fill the USM memory with 0's.
140
+ std::vector<unsigned char > ImmBuff (MDeviceGlobalTSize,
141
+ static_cast <unsigned char >(0 ));
142
+ MemoryManager::context_copy_usm (ImmBuff.data (), &CtxImpl,
143
+ MDeviceGlobalTSize, NewAlloc.MPtr );
144
+ }
145
+
146
+ // Only device globals with host variables need to be registered with the
147
+ // context. The rest will be managed by their kernel bundles and cleaned up
148
+ // accordingly.
149
+ if (MDeviceGlobalPtr)
150
+ CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
127
151
return NewAlloc;
128
152
}
129
153
@@ -150,6 +174,30 @@ void DeviceGlobalMapEntry::removeAssociatedResources(
150
174
}
151
175
}
152
176
177
+ void DeviceGlobalMapEntry::cleanup () {
178
+ std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
179
+ assert (MDeviceGlobalPtr == nullptr &&
180
+ " Entry has host variable, so it should be associated with a context "
181
+ " and should be cleaned up by its dtor." );
182
+ for (auto &USMPtrIt : MDeviceToUSMPtrMap) {
183
+ // The context should be alive through the kernel_bundle owning these
184
+ // device_global entries.
185
+ const context_impl *CtxImpl = USMPtrIt.first .second ;
186
+ DeviceGlobalUSMMem &USMMem = USMPtrIt.second ;
187
+ detail::usm::freeInternal (USMMem.MPtr , CtxImpl);
188
+ if (USMMem.MInitEvent .has_value ())
189
+ CtxImpl->getAdapter ()->call <UrApiKind::urEventRelease>(
190
+ *USMMem.MInitEvent );
191
+ #ifndef NDEBUG
192
+ // For debugging we set the event and memory to some recognizable values
193
+ // to allow us to check that this cleanup happens before erasure.
194
+ USMMem.MPtr = nullptr ;
195
+ USMMem.MInitEvent = {};
196
+ #endif
197
+ }
198
+ MDeviceToUSMPtrMap.clear ();
199
+ }
200
+
153
201
} // namespace detail
154
202
} // namespace _V1
155
203
} // namespace sycl
0 commit comments