Skip to content

Commit 3910b5b

Browse files
committed
Merge branch 'main' into sanitizer-pr-cpu-local
2 parents 42d5d10 + 5d5c810 commit 3910b5b

File tree

78 files changed

+1104
-427
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+1104
-427
lines changed

.github/workflows/cmake.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
if: matrix.os == 'ubuntu-22.04'
6969
run: |
7070
sudo apt install libncurses5
71-
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2023-09-21/sycl_linux.tar.gz
71+
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz
7272
mkdir -p ${{github.workspace}}/dpcpp_compiler
7373
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C ${{github.workspace}}/dpcpp_compiler
7474
@@ -169,7 +169,8 @@ jobs:
169169
{name: CUDA, platform: ""},
170170
{name: HIP, platform: ""},
171171
{name: L0, platform: ""},
172-
{name: OPENCL, platform: "Intel(R) OpenCL"}
172+
{name: OPENCL, platform: "Intel(R) OpenCL"},
173+
{name: NATIVE_CPU, platform: ""}
173174
]
174175
build_type: [Debug, Release]
175176
compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}]
@@ -184,7 +185,7 @@ jobs:
184185

185186
- name: Download DPC++
186187
run: |
187-
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2023-09-21/sycl_linux.tar.gz
188+
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz
188189
mkdir dpcpp_compiler
189190
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler
190191
@@ -230,7 +231,8 @@ jobs:
230231

231232
examples-build-hw:
232233
name: Build - examples on HW
233-
if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW
234+
# if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW
235+
if: false # temporaily disabled due to conda env setup issues
234236
strategy:
235237
matrix:
236238
adapter: [

.github/workflows/nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626

2727
- name: Download DPC++
2828
run: |
29-
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2023-08-31/sycl_linux.tar.gz
29+
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz
3030
mkdir dpcpp_compiler
3131
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler
3232

cmake/FindRocmAgentEnumerator.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# rocm_agent_enumerator is found.
1010
#
1111

12-
find_program(ROCM_AGENT_ENUMERATOR NAMES not_rocm_agent_enumerator)
12+
find_program(ROCM_AGENT_ENUMERATOR NAMES rocm_agent_enumerator)
1313

1414
if(ROCM_AGENT_ENUMERATOR)
1515
set(ROCM_AGENT_ENUMERATOR_FOUND TRUE)

include/ur_api.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2723,13 +2723,15 @@ urMemBufferPartition(
27232723
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
27242724
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
27252725
/// + `NULL == hMem`
2726+
/// + `NULL == hDevice`
27262727
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
27272728
/// + `NULL == phNativeMem`
27282729
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
27292730
/// + If the adapter has no underlying equivalent handle.
27302731
UR_APIEXPORT ur_result_t UR_APICALL
27312732
urMemGetNativeHandle(
27322733
ur_mem_handle_t hMem, ///< [in] handle of the mem.
2734+
ur_device_handle_t hDevice, ///< [in] handle of the device that the native handle will be resident on.
27332735
ur_native_handle_t *phNativeMem ///< [out] a pointer to the native handle of the mem.
27342736
);
27352737

@@ -9488,6 +9490,7 @@ typedef struct ur_mem_buffer_partition_params_t {
94889490
/// allowing the callback the ability to modify the parameter's value
94899491
typedef struct ur_mem_get_native_handle_params_t {
94909492
ur_mem_handle_t *phMem;
9493+
ur_device_handle_t *phDevice;
94919494
ur_native_handle_t **pphNativeMem;
94929495
} ur_mem_get_native_handle_params_t;
94939496

include/ur_ddi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,7 @@ typedef ur_result_t(UR_APICALL *ur_pfnMemBufferPartition_t)(
770770
/// @brief Function-pointer for urMemGetNativeHandle
771771
typedef ur_result_t(UR_APICALL *ur_pfnMemGetNativeHandle_t)(
772772
ur_mem_handle_t,
773+
ur_device_handle_t,
773774
ur_native_handle_t *);
774775

775776
///////////////////////////////////////////////////////////////////////////////

include/ur_print.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11174,6 +11174,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1117411174
ur::details::printPtr(os,
1117511175
*(params->phMem));
1117611176

11177+
os << ", ";
11178+
os << ".hDevice = ";
11179+
11180+
ur::details::printPtr(os,
11181+
*(params->phDevice));
11182+
1117711183
os << ", ";
1117811184
os << ".phNativeMem = ";
1117911185

scripts/core/memory.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,10 @@ params:
432432
name: hMem
433433
desc: |
434434
[in] handle of the mem.
435+
- type: $x_device_handle_t
436+
name: hDevice
437+
desc: |
438+
[in] handle of the device that the native handle will be resident on.
435439
- type: $x_native_handle_t*
436440
name: phNativeMem
437441
desc: |

source/adapters/cuda/enqueue.cpp

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
862862
}
863863
}
864864

865-
/// General ND memory copy operation for images (where N > 1).
865+
/// General ND memory copy operation for images.
866866
/// This function requires the corresponding CUDA context to be at the top of
867867
/// the context stack
868868
/// If the source and/or destination is an array, SrcPtr and/or DstPtr
@@ -877,27 +877,27 @@ static ur_result_t commonEnqueueMemImageNDCopy(
877877
UR_ASSERT(DstType == CU_MEMORYTYPE_ARRAY || DstType == CU_MEMORYTYPE_HOST,
878878
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
879879

880-
if (ImgType == UR_MEM_TYPE_IMAGE2D) {
880+
if (ImgType == UR_MEM_TYPE_IMAGE1D || ImgType == UR_MEM_TYPE_IMAGE2D) {
881881
CUDA_MEMCPY2D CpyDesc;
882882
memset(&CpyDesc, 0, sizeof(CpyDesc));
883883
CpyDesc.srcMemoryType = SrcType;
884884
if (SrcType == CU_MEMORYTYPE_ARRAY) {
885885
CpyDesc.srcArray = *static_cast<const CUarray *>(SrcPtr);
886886
CpyDesc.srcXInBytes = SrcOffset.x;
887-
CpyDesc.srcY = SrcOffset.y;
887+
CpyDesc.srcY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : SrcOffset.y;
888888
} else {
889889
CpyDesc.srcHost = SrcPtr;
890890
}
891891
CpyDesc.dstMemoryType = DstType;
892892
if (DstType == CU_MEMORYTYPE_ARRAY) {
893893
CpyDesc.dstArray = *static_cast<CUarray *>(DstPtr);
894894
CpyDesc.dstXInBytes = DstOffset.x;
895-
CpyDesc.dstY = DstOffset.y;
895+
CpyDesc.dstY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : DstOffset.y;
896896
} else {
897897
CpyDesc.dstHost = DstPtr;
898898
}
899899
CpyDesc.WidthInBytes = Region.width;
900-
CpyDesc.Height = Region.height;
900+
CpyDesc.Height = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 1 : Region.height;
901901
UR_CHECK_ERROR(cuMemcpy2DAsync(&CpyDesc, CuStream));
902902
return UR_RESULT_SUCCESS;
903903
}
@@ -1124,21 +1124,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
11241124
}
11251125

11261126
ur_mem_type_t ImgType = std::get<SurfaceMem>(hImageSrc->Mem).getImageType();
1127-
if (ImgType == UR_MEM_TYPE_IMAGE1D) {
1128-
UR_CHECK_ERROR(cuMemcpyAtoA(DstArray, DstByteOffsetX, SrcArray,
1129-
SrcByteOffsetX, BytesToCopy));
1130-
} else {
1131-
ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
1132-
region.depth};
1133-
ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
1134-
ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
11351127

1136-
Result = commonEnqueueMemImageNDCopy(
1137-
CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
1138-
SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
1139-
if (Result != UR_RESULT_SUCCESS) {
1140-
return Result;
1141-
}
1128+
ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
1129+
region.depth};
1130+
ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
1131+
ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
1132+
1133+
Result = commonEnqueueMemImageNDCopy(
1134+
CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
1135+
SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
1136+
if (Result != UR_RESULT_SUCCESS) {
1137+
return Result;
11421138
}
11431139

11441140
if (phEvent) {

source/adapters/cuda/image.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -239,29 +239,38 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
239239

240240
try {
241241
/// pi_sampler_properties
242+
/// Layout of UR samplers for CUDA
243+
///
244+
/// Sampler property layout:
242245
/// | <bits> | <usage>
243246
/// -----------------------------------
244-
/// | 31 30 ... 6 | N/A
245-
/// | 5 | mip filter mode
246-
/// | 4 3 2 | addressing mode
247+
/// | 31 30 ... 12 | N/A
248+
/// | 11 | mip filter mode
249+
/// | 10 9 8 | addressing mode 3
250+
/// | 7 6 5 | addressing mode 2
251+
/// | 4 3 2 | addressing mode 1
247252
/// | 1 | filter mode
248253
/// | 0 | normalize coords
249254
CUDA_TEXTURE_DESC ImageTexDesc = {};
250-
CUaddress_mode AddrMode = {};
251-
ur_sampler_addressing_mode_t AddrModeProp = hSampler->getAddressingMode();
252-
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
253-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
254-
AddrMode = CU_TR_ADDRESS_MODE_CLAMP;
255-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
256-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
257-
AddrMode = CU_TR_ADDRESS_MODE_BORDER;
258-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
259-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
260-
AddrMode = CU_TR_ADDRESS_MODE_WRAP;
261-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
262-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
263-
AddrMode = CU_TR_ADDRESS_MODE_MIRROR;
255+
CUaddress_mode AddrMode[3];
256+
for (size_t i = 0; i < 3; i++) {
257+
ur_sampler_addressing_mode_t AddrModeProp =
258+
hSampler->getAddressingModeDim(i);
259+
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
260+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
261+
AddrMode[i] = CU_TR_ADDRESS_MODE_CLAMP;
262+
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
263+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
264+
AddrMode[i] = CU_TR_ADDRESS_MODE_BORDER;
265+
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
266+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
267+
AddrMode[i] = CU_TR_ADDRESS_MODE_WRAP;
268+
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
269+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
270+
AddrMode[i] = CU_TR_ADDRESS_MODE_MIRROR;
271+
}
264272
}
273+
265274
CUfilter_mode FilterMode;
266275
ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode();
267276
FilterMode =
@@ -278,14 +287,15 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
278287
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
279288
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);
280289

281-
// The address modes can interfere with other dimensionsenqueueEventsWait
290+
// The address modes can interfere with other dimensions
282291
// e.g. 1D texture sampling can be interfered with when setting other
283292
// dimension address modes despite their nonexistence.
284-
ImageTexDesc.addressMode[0] = AddrMode; // 1D
285-
ImageTexDesc.addressMode[1] =
286-
pImageDesc->height > 0 ? AddrMode : ImageTexDesc.addressMode[1]; // 2D
293+
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
294+
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
295+
? AddrMode[1]
296+
: ImageTexDesc.addressMode[1]; // 2D
287297
ImageTexDesc.addressMode[2] =
288-
pImageDesc->depth > 0 ? AddrMode : ImageTexDesc.addressMode[2]; // 3D
298+
pImageDesc->depth > 0 ? AddrMode[2] : ImageTexDesc.addressMode[2]; // 3D
289299

290300
// flags takes the normalized coordinates setting -- unnormalized is default
291301
ImageTexDesc.flags = (hSampler->isNormalizedCoords())

source/adapters/cuda/memory.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
161161
/// \param[out] phNativeMem Set to the native handle of the UR mem object.
162162
///
163163
/// \return UR_RESULT_SUCCESS
164-
UR_APIEXPORT ur_result_t UR_APICALL
165-
urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) {
164+
UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle(
165+
ur_mem_handle_t hMem, ur_device_handle_t, ur_native_handle_t *phNativeMem) {
166166
*phNativeMem = reinterpret_cast<ur_native_handle_t>(
167167
std::get<BufferMem>(hMem->Mem).get());
168168
return UR_RESULT_SUCCESS;

0 commit comments

Comments
 (0)