Skip to content

Commit b944210

Browse files
committed
Merge branch 'main' into sanitizer-buffer
2 parents 1ba7d3c + fb3cbd1 commit b944210

27 files changed

+514
-212
lines changed

CMakeLists.txt

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
4747
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
4848
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
4949
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
50+
option(UR_BUILD_XPTI_LIBS "Build the XPTI libraries when tracing is enabled" ON)
5051
set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable")
5152
set(UR_DPCXX_BUILD_FLAGS "" CACHE STRING "Build flags to pass to DPC++ when compiling device programs")
5253
set(UR_SYCL_LIBRARY_DIR "" CACHE PATH
@@ -109,31 +110,34 @@ endif()
109110
if(UR_ENABLE_TRACING)
110111
add_compile_definitions(UR_ENABLE_TRACING)
111112

112-
# fetch xpti proxy library for the tracing layer
113-
FetchContentSparse_Declare(xpti https://github.com/intel/llvm.git "sycl-nightly/20230703" "xpti")
114-
FetchContent_MakeAvailable(xpti)
113+
if (UR_BUILD_XPTI_LIBS)
114+
# fetch xpti proxy library for the tracing layer
115+
FetchContentSparse_Declare(xpti https://github.com/intel/llvm.git "sycl-nightly/20230703" "xpti")
116+
FetchContent_MakeAvailable(xpti)
115117

116-
# set -fPIC for xpti since we are linking it with a shared library
117-
set_target_properties(xpti PROPERTIES POSITION_INDEPENDENT_CODE ON)
118+
# set -fPIC for xpti since we are linking it with a shared library
119+
set_target_properties(xpti PROPERTIES POSITION_INDEPENDENT_CODE ON)
118120

119-
# fetch the xptifw dispatcher, mostly used for testing
120-
# these variables need to be set for xptifw to compile
121-
set(XPTI_SOURCE_DIR ${xpti_SOURCE_DIR})
122-
set(XPTI_DIR ${xpti_SOURCE_DIR})
123-
set(XPTI_ENABLE_TESTS OFF CACHE INTERNAL "Turn off xptifw tests")
121+
# fetch the xptifw dispatcher, mostly used for testing
122+
# these variables need to be set for xptifw to compile
123+
set(XPTI_SOURCE_DIR ${xpti_SOURCE_DIR})
124+
set(XPTI_DIR ${xpti_SOURCE_DIR})
125+
set(XPTI_ENABLE_TESTS OFF CACHE INTERNAL "Turn off xptifw tests")
124126

125-
FetchContentSparse_Declare(xptifw https://github.com/intel/llvm.git "sycl-nightly/20230703" "xptifw")
127+
FetchContentSparse_Declare(xptifw https://github.com/intel/llvm.git "sycl-nightly/20230703" "xptifw")
126128

127-
FetchContent_MakeAvailable(xptifw)
129+
FetchContent_MakeAvailable(xptifw)
128130

129-
check_cxx_compiler_flag("-Wno-error=maybe-uninitialized" HAS_MAYBE_UNINIT)
130-
if (HAS_MAYBE_UNINIT)
131-
target_compile_options(xptifw PRIVATE -Wno-error=maybe-uninitialized)
131+
check_cxx_compiler_flag("-Wno-error=maybe-uninitialized" HAS_MAYBE_UNINIT)
132+
if (HAS_MAYBE_UNINIT)
133+
target_compile_options(xptifw PRIVATE -Wno-error=maybe-uninitialized)
134+
endif()
135+
136+
set_target_properties(xptifw PROPERTIES
137+
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
138+
)
132139
endif()
133140

134-
set_target_properties(xptifw PROPERTIES
135-
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
136-
)
137141
if (MSVC)
138142
set(TARGET_XPTI $<IF:$<CONFIG:Release>,xpti,xptid>)
139143
else()

include/ur_api.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4772,7 +4772,8 @@ typedef enum ur_kernel_group_info_t {
47724772
UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0, ///< [size_t[3]] Return Work Group maximum global size
47734773
UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 1, ///< [size_t] Return maximum Work Group size
47744774
UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 2, ///< [size_t[3]] Return Work Group size required by the source code, such
4775-
///< as __attribute__((required_work_group_size(X,Y,Z))
4775+
///< as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if
4776+
///< unspecified
47764777
UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = 3, ///< [size_t] Return local memory required by the Kernel
47774778
UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4, ///< [size_t] Return preferred multiple of Work Group size for launch
47784779
UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 5, ///< [size_t] Return minimum amount of private memory in bytes used by each
@@ -4788,7 +4789,8 @@ typedef enum ur_kernel_group_info_t {
47884789
typedef enum ur_kernel_sub_group_info_t {
47894790
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE = 0, ///< [uint32_t] Return maximum SubGroup size
47904791
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS = 1, ///< [uint32_t] Return maximum number of SubGroup
4791-
UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code
4792+
UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code or 0
4793+
///< if unspecified
47924794
UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL = 3, ///< [uint32_t] Return SubGroup size required by Intel
47934795
/// @cond
47944796
UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32 = 0x7fffffff
@@ -5989,6 +5991,7 @@ urEventSetCallback(
59895991
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
59905992
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
59915993
/// - ::UR_RESULT_ERROR_INVALID_VALUE
5994+
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
59925995
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
59935996
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
59945997
UR_APIEXPORT ur_result_t UR_APICALL

scripts/core/enqueue.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ returns:
6565
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION
6666
- $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
6767
- $X_RESULT_ERROR_INVALID_VALUE
68+
- $X_RESULT_ERROR_INVALID_KERNEL_ARGS
69+
- "The kernel argument values have not been specified."
6870
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
6971
- $X_RESULT_ERROR_OUT_OF_RESOURCES
7072
--- #--------------------------------------------------------------------------

scripts/core/kernel.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ etors:
135135
- name: WORK_GROUP_SIZE
136136
desc: "[size_t] Return maximum Work Group size"
137137
- name: COMPILE_WORK_GROUP_SIZE
138-
desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z))"
138+
desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if unspecified"
139139
- name: LOCAL_MEM_SIZE
140140
desc: "[size_t] Return local memory required by the Kernel"
141141
- name: PREFERRED_WORK_GROUP_SIZE_MULTIPLE
@@ -154,7 +154,7 @@ etors:
154154
- name: MAX_NUM_SUB_GROUPS
155155
desc: "[uint32_t] Return maximum number of SubGroup"
156156
- name: COMPILE_NUM_SUB_GROUPS
157-
desc: "[uint32_t] Return number of SubGroup required by the source code"
157+
desc: "[uint32_t] Return number of SubGroup required by the source code or 0 if unspecified"
158158
- name: SUB_GROUP_SIZE_INTEL
159159
desc: "[uint32_t] Return SubGroup size required by Intel"
160160
--- #--------------------------------------------------------------------------

source/adapters/hip/enqueue.cpp

Lines changed: 50 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "kernel.hpp"
1616
#include "memory.hpp"
1717
#include "queue.hpp"
18+
#include "ur_api.h"
1819

1920
#include <ur/ur.hpp>
2021

@@ -1239,49 +1240,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
12391240
UR_ASSERT(offset + size <= BufferImpl.getSize(),
12401241
UR_RESULT_ERROR_INVALID_SIZE);
12411242

1242-
ur_result_t Result = UR_RESULT_ERROR_INVALID_OPERATION;
1243-
const bool IsPinned =
1244-
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;
1245-
1246-
// Currently no support for overlapping regions
1247-
if (BufferImpl.getMapPtr() != nullptr) {
1248-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1243+
auto MapPtr = BufferImpl.mapToPtr(size, offset, mapFlags);
1244+
if (!MapPtr) {
1245+
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
12491246
}
12501247

1251-
// Allocate a pointer in the host to store the mapped information
1252-
auto HostPtr = BufferImpl.mapToPtr(size, offset, mapFlags);
1253-
*ppRetMap = std::get<BufferMem>(hBuffer->Mem).getMapPtr();
1254-
if (HostPtr) {
1255-
Result = UR_RESULT_SUCCESS;
1256-
}
1248+
const bool IsPinned =
1249+
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;
12571250

1258-
if (!IsPinned &&
1259-
((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) {
1260-
// Pinned host memory is already on host so it doesn't need to be read.
1261-
Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size,
1262-
HostPtr, numEventsInWaitList,
1263-
phEventWaitList, phEvent);
1264-
} else {
1265-
ScopedContext Active(hQueue->getDevice());
1251+
try {
1252+
if (!IsPinned && (mapFlags & (UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE))) {
1253+
// Pinned host memory is already on host so it doesn't need to be read.
1254+
UR_CHECK_ERROR(urEnqueueMemBufferRead(
1255+
hQueue, hBuffer, blockingMap, offset, size, MapPtr,
1256+
numEventsInWaitList, phEventWaitList, phEvent));
1257+
} else {
1258+
ScopedContext Active(hQueue->getDevice());
12661259

1267-
if (IsPinned) {
1268-
Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
1269-
nullptr);
1270-
}
1260+
if (IsPinned) {
1261+
UR_CHECK_ERROR(urEnqueueEventsWait(hQueue, numEventsInWaitList,
1262+
phEventWaitList, nullptr));
1263+
}
12711264

1272-
if (phEvent) {
1273-
try {
1265+
if (phEvent) {
12741266
*phEvent = ur_event_handle_t_::makeNative(
12751267
UR_COMMAND_MEM_BUFFER_MAP, hQueue, hQueue->getNextTransferStream());
12761268
UR_CHECK_ERROR((*phEvent)->start());
12771269
UR_CHECK_ERROR((*phEvent)->record());
1278-
} catch (ur_result_t Error) {
1279-
Result = Error;
12801270
}
12811271
}
1272+
} catch (ur_result_t Error) {
1273+
return Error;
12821274
}
12831275

1284-
return Result;
1276+
*ppRetMap = MapPtr;
1277+
1278+
return UR_RESULT_SUCCESS;
12851279
}
12861280

12871281
/// Implements the unmap from the host, using a BufferWrite operation.
@@ -1292,47 +1286,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
12921286
ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
12931287
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
12941288
ur_event_handle_t *phEvent) {
1295-
ur_result_t Result = UR_RESULT_SUCCESS;
12961289
UR_ASSERT(hMem->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
1297-
UR_ASSERT(std::get<BufferMem>(hMem->Mem).getMapPtr() != nullptr,
1298-
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
1299-
UR_ASSERT(std::get<BufferMem>(hMem->Mem).getMapPtr() == pMappedPtr,
1300-
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
1290+
auto &BufferImpl = std::get<BufferMem>(hMem->Mem);
13011291

1302-
const bool IsPinned = std::get<BufferMem>(hMem->Mem).MemAllocMode ==
1303-
BufferMem::AllocMode::AllocHostPtr;
1304-
1305-
if (!IsPinned &&
1306-
((std::get<BufferMem>(hMem->Mem).getMapFlags() & UR_MAP_FLAG_WRITE) ||
1307-
(std::get<BufferMem>(hMem->Mem).getMapFlags() &
1308-
UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) {
1309-
// Pinned host memory is only on host so it doesn't need to be written to.
1310-
Result = urEnqueueMemBufferWrite(
1311-
hQueue, hMem, true, std::get<BufferMem>(hMem->Mem).getMapOffset(),
1312-
std::get<BufferMem>(hMem->Mem).getMapSize(), pMappedPtr,
1313-
numEventsInWaitList, phEventWaitList, phEvent);
1314-
} else {
1315-
ScopedContext Active(hQueue->getDevice());
1292+
auto *Map = BufferImpl.getMapDetails(pMappedPtr);
1293+
UR_ASSERT(Map != nullptr, UR_RESULT_ERROR_INVALID_MEM_OBJECT);
13161294

1317-
if (IsPinned) {
1318-
Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
1319-
nullptr);
1320-
}
1295+
const bool IsPinned =
1296+
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;
13211297

1322-
if (phEvent) {
1323-
try {
1298+
try {
1299+
if (!IsPinned &&
1300+
(Map->getMapFlags() &
1301+
(UR_MAP_FLAG_WRITE | UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) {
1302+
// Pinned host memory is only on host so it doesn't need to be written to.
1303+
UR_CHECK_ERROR(urEnqueueMemBufferWrite(
1304+
hQueue, hMem, true, Map->getMapOffset(), Map->getMapSize(),
1305+
pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent));
1306+
} else {
1307+
ScopedContext Active(hQueue->getDevice());
1308+
1309+
if (IsPinned) {
1310+
UR_CHECK_ERROR(urEnqueueEventsWait(hQueue, numEventsInWaitList,
1311+
phEventWaitList, nullptr));
1312+
}
1313+
1314+
if (phEvent) {
13241315
*phEvent = ur_event_handle_t_::makeNative(
13251316
UR_COMMAND_MEM_UNMAP, hQueue, hQueue->getNextTransferStream());
13261317
UR_CHECK_ERROR((*phEvent)->start());
13271318
UR_CHECK_ERROR((*phEvent)->record());
1328-
} catch (ur_result_t Error) {
1329-
Result = Error;
13301319
}
13311320
}
1321+
} catch (ur_result_t Error) {
1322+
return Error;
13321323
}
13331324

1334-
std::get<BufferMem>(hMem->Mem).unmap(pMappedPtr);
1335-
return Result;
1325+
BufferImpl.unmap(pMappedPtr);
1326+
return UR_RESULT_SUCCESS;
13361327
}
13371328

13381329
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(

0 commit comments

Comments
 (0)