Skip to content

Commit 15b3ece

Browse files
author
Georgi Mirazchiyski
authored
[Bindless][SYCL][UR] Create a sampled image with a single UR API (#18384)
We no longer need to create a UR sampler object in order to create a sampled image. The backends can simply use the sampler description to create the sampled image. Note this is an ABI break in the currently still experimental bindless images extension for Unified Runtime. Addresses the following issue: oneapi-src/unified-runtime#1463 Relevant discussion from oneapi-src/unified-runtime#2640 (comment) Additionally, avoiding the allocation of a sampler object itself fixes memory leak(s) in all adapters where the UR sampler handle was never released which was caused by the use of the API in the SYCL RT only calling urSamplerCreate and not Release for the sampler associated with the image. Also, while HIP and Cuda do not further create backend specific sampler objects in the respective adapters, Level Zero was creating one that never got released upon freeing the image memory which lead to leaking the Level Zero sampler memory as well. Now that Level Zero can create a sampled image from a descriptor only, the sampler object is not needed altogether, allowing to rid of the above mentioned leaking memory allocations and simplify the runtime.
1 parent 5fc8472 commit 15b3ece

File tree

22 files changed

+375
-242
lines changed

22 files changed

+375
-242
lines changed

sycl/source/detail/bindless_images.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ __SYCL_EXPORT sampled_image_handle
265265
create_image(image_mem_handle memHandle, const bindless_image_sampler &sampler,
266266
const image_descriptor &desc, const sycl::device &syclDevice,
267267
const sycl::context &syclContext) {
268-
return create_image(reinterpret_cast<void*>(memHandle.raw_handle),
268+
return create_image(reinterpret_cast<void *>(memHandle.raw_handle),
269269
0 /*pitch*/, sampler, desc, syclDevice, syclContext);
270270
}
271271

@@ -280,14 +280,14 @@ __SYCL_EXPORT sampled_image_handle
280280
create_image(image_mem &imgMem, const bindless_image_sampler &sampler,
281281
const image_descriptor &desc, const sycl::device &syclDevice,
282282
const sycl::context &syclContext) {
283-
return create_image(reinterpret_cast<void*>(imgMem.get_handle().raw_handle),
283+
return create_image(reinterpret_cast<void *>(imgMem.get_handle().raw_handle),
284284
0 /*pitch*/, sampler, desc, syclDevice, syclContext);
285285
}
286286

287287
__SYCL_EXPORT sampled_image_handle
288288
create_image(image_mem &imgMem, const bindless_image_sampler &sampler,
289289
const image_descriptor &desc, const sycl::queue &syclQueue) {
290-
return create_image(reinterpret_cast<void*>(imgMem.get_handle().raw_handle),
290+
return create_image(reinterpret_cast<void *>(imgMem.get_handle().raw_handle),
291291
0 /*pitch*/, sampler, desc, syclQueue.get_device(),
292292
syclQueue.get_context());
293293
}
@@ -367,10 +367,6 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
367367
translate_cubemap_filter_mode(sampler.cubemap_filtering)};
368368
UrAddrModes.pNext = &UrCubemapProps;
369369

370-
ur_sampler_handle_t urSampler = nullptr;
371-
Adapter->call<sycl::errc::runtime, sycl::detail::UrApiKind::urSamplerCreate>(
372-
urCtx, &UrSamplerProps, &urSampler);
373-
374370
ur_image_desc_t urDesc;
375371
ur_image_format_t urFormat;
376372
populate_ur_structs(desc, urDesc, urFormat, pitch);
@@ -381,7 +377,7 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
381377
sycl::detail::UrApiKind::urBindlessImagesSampledImageCreateExp>(
382378
urCtx, urDevice,
383379
reinterpret_cast<ur_exp_image_mem_native_handle_t>(devPtr), &urFormat,
384-
&urDesc, urSampler, &urImageHandle);
380+
&urDesc, &UrSamplerProps, &urImageHandle);
385381

386382
return sampled_image_handle{urImageHandle};
387383
}
@@ -650,7 +646,7 @@ __SYCL_EXPORT external_semaphore import_external_semaphore(
650646
urCtx, urDevice, urHandleType, &urExternalSemDesc, &urExternalSemaphore);
651647

652648
return external_semaphore{urExternalSemaphore,
653-
externalSemaphoreDesc.handle_type};
649+
externalSemaphoreDesc.handle_type};
654650
}
655651

656652
template <>

unified-runtime/include/ur_api.h

Lines changed: 9 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/include/ur_ddi.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/include/ur_print.hpp

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/scripts/core/EXP-BINDLESS-IMAGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,9 @@ Changelog
290290
+----------+-------------------------------------------------------------+
291291
| 23.0 | Added BindlessImagesFreeMappedLinearMemory function. |
292292
+----------+-------------------------------------------------------------+
293+
| 24.0 || Update the ${x}BindlessImagesSampledImageCreateExp API |
294+
| || to take a sampler description instead of sampler handle. |
295+
+----------+-------------------------------------------------------------+
293296

294297
Contributors
295298
--------------------------------------------------------------------------------
@@ -300,3 +303,4 @@ Contributors
300303
* Chedy Najjar `chedy.najjar@codeplay.com <chedy.najjar@codeplay.com>`_
301304
* Sean Stirling `sean.stirling@codeplay.com <sean.stirling@codeplay.com>`_
302305
* Peter Zuzek `peter@codeplay.com peter@codeplay.com <peter@codeplay.com>`_
306+
* Georgi Mirazchiyski `georgi.mirazchiyski@codeplay.com <georgi.mirazchiyski@codeplay.com>`_

unified-runtime/scripts/core/exp-bindless-images.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ name: SampledImageCreateExp
527527
ordinal: "0"
528528
analogue:
529529
- "**cuTexObjectCreate**"
530+
- "**hipTexObjectCreate**"
530531
params:
531532
- type: $x_context_handle_t
532533
name: hContext
@@ -543,9 +544,9 @@ params:
543544
- type: "const $x_image_desc_t*"
544545
name: pImageDesc
545546
desc: "[in] pointer to image description"
546-
- type: $x_sampler_handle_t
547-
name: hSampler
548-
desc: "[in] sampler to be used"
547+
- type: const $x_sampler_desc_t*
548+
name: pSamplerDesc
549+
desc: "[in] pointer to sampler description to be used"
549550
- type: $x_exp_image_native_handle_t*
550551
name: phImage
551552
desc: "[out][alloc] pointer to handle of image object created"

unified-runtime/source/adapters/cuda/image.cpp

Lines changed: 112 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -154,75 +154,127 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format,
154154
}
155155
}
156156

157-
ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
157+
ur_result_t urToCudaFilterMode(ur_sampler_filter_mode_t FilterMode,
158+
CUfilter_mode &CudaFilterMode) {
159+
switch (FilterMode) {
160+
case UR_SAMPLER_FILTER_MODE_NEAREST:
161+
CudaFilterMode = CU_TR_FILTER_MODE_POINT;
162+
break;
163+
case UR_SAMPLER_FILTER_MODE_LINEAR:
164+
CudaFilterMode = CU_TR_FILTER_MODE_LINEAR;
165+
break;
166+
default:
167+
setErrorMessage("Invalid filter mode was requested for CUDA.",
168+
UR_RESULT_ERROR_INVALID_VALUE);
169+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
170+
}
171+
172+
return UR_RESULT_SUCCESS;
173+
}
174+
175+
ur_result_t urToCudaAddressingMode(ur_sampler_addressing_mode_t AddressMode,
176+
CUaddress_mode &CudaAddressMode) {
177+
switch (AddressMode) {
178+
case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
179+
CudaAddressMode = CU_TR_ADDRESS_MODE_CLAMP;
180+
break;
181+
case UR_SAMPLER_ADDRESSING_MODE_CLAMP:
182+
CudaAddressMode = CU_TR_ADDRESS_MODE_BORDER;
183+
break;
184+
case UR_SAMPLER_ADDRESSING_MODE_REPEAT:
185+
CudaAddressMode = CU_TR_ADDRESS_MODE_WRAP;
186+
break;
187+
case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
188+
CudaAddressMode = CU_TR_ADDRESS_MODE_MIRROR;
189+
break;
190+
default:
191+
setErrorMessage("Invalid addressing mode was requested for CUDA.",
192+
UR_RESULT_ERROR_INVALID_VALUE);
193+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
194+
}
195+
196+
return UR_RESULT_SUCCESS;
197+
}
198+
199+
ur_result_t urTextureCreate(const ur_sampler_desc_t *pSamplerDesc,
158200
const ur_image_desc_t *pImageDesc,
159201
const CUDA_RESOURCE_DESC &ResourceDesc,
160202
const unsigned int normalized_dtype_flag,
161203
ur_exp_image_native_handle_t *phRetImage) {
162-
163204
try {
164-
/// pi_sampler_properties
165-
/// Layout of UR samplers for CUDA
166-
///
167-
/// Sampler property layout:
168-
/// | <bits> | <usage>
169-
/// -----------------------------------
170-
/// | 31 30 ... 13 | N/A
171-
/// | 12 | cubemap filter mode
172-
/// | 11 | mip filter mode
173-
/// | 10 9 8 | addressing mode 3
174-
/// | 7 6 5 | addressing mode 2
175-
/// | 4 3 2 | addressing mode 1
176-
/// | 1 | filter mode
177-
/// | 0 | normalize coords
178205
CUDA_TEXTURE_DESC ImageTexDesc = {};
179-
CUaddress_mode AddrMode[3] = {};
180-
for (size_t i = 0; i < 3; i++) {
181-
ur_sampler_addressing_mode_t AddrModeProp =
182-
hSampler->getAddressingModeDim(i);
183-
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
184-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
185-
AddrMode[i] = CU_TR_ADDRESS_MODE_CLAMP;
186-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
187-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
188-
AddrMode[i] = CU_TR_ADDRESS_MODE_BORDER;
189-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
190-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
191-
AddrMode[i] = CU_TR_ADDRESS_MODE_WRAP;
192-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
193-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
194-
AddrMode[i] = CU_TR_ADDRESS_MODE_MIRROR;
206+
// Enumarate to linked properties (extension-specific structures).
207+
void *pNext = const_cast<void *>(pSamplerDesc->pNext);
208+
while (pNext != nullptr) {
209+
const ur_base_desc_t *BaseDesc =
210+
reinterpret_cast<const ur_base_desc_t *>(pNext);
211+
if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) {
212+
// UR Mipmap properties
213+
const ur_exp_sampler_mip_properties_t *SamplerMipProperties =
214+
reinterpret_cast<const ur_exp_sampler_mip_properties_t *>(pNext);
215+
ImageTexDesc.maxMipmapLevelClamp =
216+
SamplerMipProperties->maxMipmapLevelClamp;
217+
ImageTexDesc.minMipmapLevelClamp =
218+
SamplerMipProperties->minMipmapLevelClamp;
219+
ImageTexDesc.maxAnisotropy = SamplerMipProperties->maxAnisotropy;
220+
// Cuda Mipmap attributes
221+
CUfilter_mode MipFilterMode;
222+
ur_sampler_filter_mode_t MipFilterModeProp =
223+
SamplerMipProperties->mipFilterMode;
224+
UR_CALL(urToCudaFilterMode(MipFilterModeProp, MipFilterMode));
225+
ImageTexDesc.mipmapFilterMode = MipFilterMode;
226+
} else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES) {
227+
// UR Addressing modes
228+
const ur_exp_sampler_addr_modes_t *SamplerAddrModes =
229+
reinterpret_cast<const ur_exp_sampler_addr_modes_t *>(pNext);
230+
// Cuda Addressing modes
231+
CUaddress_mode AddrMode[3] = {};
232+
for (size_t i = 0; i < 3; i++) {
233+
ur_sampler_addressing_mode_t AddrModeProp =
234+
SamplerAddrModes->addrModes[i];
235+
UR_CALL(urToCudaAddressingMode(AddrModeProp, AddrMode[i]));
236+
}
237+
// The address modes can interfere with other dimensions
238+
// e.g. 1D texture sampling can be interfered with when setting other
239+
// dimension address modes despite their nonexistence.
240+
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
241+
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
242+
? AddrMode[1]
243+
: ImageTexDesc.addressMode[1]; // 2D
244+
ImageTexDesc.addressMode[2] = pImageDesc->depth > 0
245+
? AddrMode[2]
246+
: ImageTexDesc.addressMode[2]; // 3D
247+
} else if (BaseDesc->stype ==
248+
UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES) {
249+
// UR Cubemap properties
250+
const ur_exp_sampler_cubemap_properties_t *SamplerCubemapProperties =
251+
reinterpret_cast<const ur_exp_sampler_cubemap_properties_t *>(
252+
pNext);
253+
ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp =
254+
SamplerCubemapProperties->cubemapFilterMode;
255+
// Cuda Cubemap attributes
256+
if (CubemapFilterModeProp ==
257+
UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) {
258+
#if CUDA_VERSION >= 11060
259+
ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP;
260+
#else
261+
setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS "
262+
"feature requires cuda 11.6 or later.",
263+
UR_RESULT_ERROR_UNSUPPORTED_FEATURE);
264+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
265+
#endif
266+
}
195267
}
268+
pNext = const_cast<void *>(BaseDesc->pNext);
196269
}
197270

198-
CUfilter_mode FilterMode;
199-
ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode();
200-
FilterMode =
201-
FilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
271+
CUfilter_mode FilterMode = pSamplerDesc->filterMode
272+
? CU_TR_FILTER_MODE_LINEAR
273+
: CU_TR_FILTER_MODE_POINT;
202274
ImageTexDesc.filterMode = FilterMode;
203275

204-
// Mipmap attributes
205-
CUfilter_mode MipFilterMode;
206-
ur_sampler_filter_mode_t MipFilterModeProp = hSampler->getMipFilterMode();
207-
MipFilterMode =
208-
MipFilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
209-
ImageTexDesc.mipmapFilterMode = MipFilterMode;
210-
ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
211-
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
212-
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);
213-
214-
// The address modes can interfere with other dimensions
215-
// e.g. 1D texture sampling can be interfered with when setting other
216-
// dimension address modes despite their nonexistence.
217-
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
218-
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
219-
? AddrMode[1]
220-
: ImageTexDesc.addressMode[1]; // 2D
221-
ImageTexDesc.addressMode[2] =
222-
pImageDesc->depth > 0 ? AddrMode[2] : ImageTexDesc.addressMode[2]; // 3D
223-
224276
// flags takes the normalized coordinates setting -- unnormalized is default
225-
ImageTexDesc.flags = (hSampler->isNormalizedCoords())
277+
ImageTexDesc.flags = (pSamplerDesc->normalizedCoords)
226278
? CU_TRSF_NORMALIZED_COORDINATES
227279
: ImageTexDesc.flags;
228280

@@ -231,20 +283,6 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
231283
if (!normalized_dtype_flag) {
232284
ImageTexDesc.flags |= CU_TRSF_READ_AS_INTEGER;
233285
}
234-
// Cubemap attributes
235-
ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp =
236-
hSampler->getCubemapFilterMode();
237-
if (CubemapFilterModeProp == UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) {
238-
#if CUDA_VERSION >= 11060
239-
ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP;
240-
#else
241-
setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS "
242-
"feature requires cuda 11.6 or later.",
243-
UR_RESULT_ERROR_UNSUPPORTED_FEATURE);
244-
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
245-
#endif
246-
}
247-
248286
CUtexObject Texture;
249287
UR_CHECK_ERROR(
250288
cuTexObjectCreate(&Texture, &ResourceDesc, &ImageTexDesc, nullptr));
@@ -506,7 +544,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
506544
ur_context_handle_t hContext, ur_device_handle_t hDevice,
507545
ur_exp_image_mem_native_handle_t hImageMem,
508546
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
509-
ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage) {
547+
const ur_sampler_desc_t *pSamplerDesc,
548+
ur_exp_image_native_handle_t *phImage) {
510549
UR_ASSERT(std::find(hContext->getDevices().begin(),
511550
hContext->getDevices().end(),
512551
hDevice) != hContext->getDevices().end(),
@@ -573,7 +612,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
573612
return UR_RESULT_ERROR_INVALID_VALUE;
574613
}
575614

576-
UR_CHECK_ERROR(urTextureCreate(hSampler, pImageDesc, image_res_desc,
615+
UR_CHECK_ERROR(urTextureCreate(pSamplerDesc, pImageDesc, image_res_desc,
577616
normalized_dtype_flag, phImage));
578617

579618
} catch (ur_result_t Err) {

unified-runtime/source/adapters/cuda/image.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@ ur_result_t
2828
cudaToUrImageChannelFormat(CUarray_format cuda_format,
2929
ur_image_channel_type_t *return_image_channel_type);
3030

31-
ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
31+
ur_result_t urToCudaFilterMode(ur_sampler_filter_mode_t FilterMode,
32+
CUfilter_mode &CudaFilterMode);
33+
ur_result_t urToCudaAddressingMode(ur_sampler_addressing_mode_t AddressMode,
34+
CUaddress_mode &CudaAddressMode);
35+
36+
ur_result_t urTextureCreate(const ur_sampler_desc_t *pSamplerDesc,
3237
const ur_image_desc_t *pImageDesc,
3338
const CUDA_RESOURCE_DESC &ResourceDesc,
3439
const unsigned int normalized_dtype_flag,

0 commit comments

Comments
 (0)