Skip to content

Commit 9433a80

Browse files
authored
[SYCL][NativeCPU] Process nativecpu_utils with prepare_builtins (#17850)
This ensures that functions have the right linkage. Several functions are marked as used to prevent them from being removed as dead code before the work item loop pass and `PrepareSYCLNativeCPUPass` run.
1 parent f531ef3 commit 9433a80

File tree

2 files changed

+30
-13
lines changed

2 files changed

+30
-13
lines changed

libdevice/cmake/modules/SYCLLibdevice.cmake

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,25 @@ if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS)
315315
endif()
316316
# Include NativeCPU UR adapter path to enable finding header file with state struct.
317317
# libsycl-nativecpu_utils is only needed as BC file by NativeCPU.
318-
# Todo: add versions for other targets (for cross-compilation)
319-
compile_lib(libsycl-nativecpu_utils
320-
FILETYPE bc
321-
SRC nativecpu_utils.cpp
322-
DEPENDENCIES ${itt_obj_deps}
323-
EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only
324-
-fsycl-device-obj=llvmir)
318+
add_custom_command(
319+
OUTPUT ${bc_binary_dir}/nativecpu_utils.bc
320+
COMMAND ${clang_exe} ${compile_opts} ${bc_device_compile_opts} -fsycl-targets=native_cpu
321+
-I ${NATIVE_CPU_DIR}
322+
${CMAKE_CURRENT_SOURCE_DIR}/nativecpu_utils.cpp
323+
-o ${bc_binary_dir}/nativecpu_utils.bc
324+
MAIN_DEPENDENCY nativecpu_utils.cpp
325+
DEPENDS ${sycl-compiler_deps}
326+
VERBATIM)
327+
add_custom_target(nativecpu_utils-bc DEPENDS ${bc_binary_dir}/nativecpu_utils.bc)
328+
process_bc(libsycl-nativecpu_utils.bc
329+
LIB_TGT libsycl-nativecpu_utils
330+
IN_FILE ${bc_binary_dir}/nativecpu_utils.bc
331+
OUT_DIR ${bc_binary_dir})
332+
add_custom_target(libsycl-nativecpu_utils-bc DEPENDS ${bc_binary_dir}/libsycl-nativecpu_utils.bc)
333+
add_dependencies(libsycldevice-bc libsycl-nativecpu_utils-bc)
334+
install(FILES ${bc_binary_dir}/libsycl-nativecpu_utils.bc
335+
DESTINATION ${install_dest_bc}
336+
COMPONENT libsycldevice)
325337
endif()
326338

327339
# Add all device libraries for each filetype except for the Intel math function

libdevice/nativecpu_utils.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ using __nativecpu_state = native_cpu::state;
2929
#define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline))
3030
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline))
3131

32+
// Several functions are used implicitly by WorkItemLoopsPass and
33+
// PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being
34+
// removed early.
35+
#define USED __attribute__((used))
36+
3237
#define OCL_LOCAL __attribute__((opencl_local))
3338
#define OCL_GLOBAL __attribute__((opencl_global))
3439
#define OCL_PRIVATE __attribute__((opencl_private))
@@ -360,7 +365,7 @@ using MakeGlobalType = typename sycl::detail::DecoratedType<
360365
T, sycl::access::address_space::global_space>::type;
361366

362367
#define DefStateSetWithType(name, field, type) \
363-
DEVICE_EXTERNAL_C void __dpcpp_nativecpu_##name( \
368+
DEVICE_EXTERNAL_C USED void __dpcpp_nativecpu_##name( \
364369
type value, MakeGlobalType<__nativecpu_state> *s) { \
365370
s->field = value; \
366371
} \
@@ -372,7 +377,7 @@ DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t);
372377
DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t);
373378

374379
#define DefineStateGetWithType(name, field, type) \
375-
DEVICE_EXTERNAL_C GET_PROPS type __dpcpp_nativecpu_##name( \
380+
DEVICE_EXTERNAL_C GET_PROPS USED type __dpcpp_nativecpu_##name( \
376381
MakeGlobalType<const __nativecpu_state> *s) { \
377382
return s->field; \
378383
} \
@@ -388,7 +393,7 @@ DefineStateGet_U32(get_max_sub_group_size, SubGroup_size);
388393
DefineStateGet_U32(get_num_sub_groups, NumSubGroups);
389394

390395
#define DefineStateGetWithType2(name, field, rtype, ptype) \
391-
DEVICE_EXTERNAL_C GET_PROPS rtype __dpcpp_nativecpu_##name( \
396+
DEVICE_EXTERNAL_C GET_PROPS USED rtype __dpcpp_nativecpu_##name( \
392397
ptype dim, MakeGlobalType<const __nativecpu_state> *s) { \
393398
return s->field[dim]; \
394399
} \
@@ -406,9 +411,9 @@ DefineStateGet_U64(get_num_groups, MNumGroups);
406411
DefineStateGet_U64(get_wg_size, MWorkGroup_size);
407412
DefineStateGet_U64(get_wg_id, MWorkGroup_id);
408413

409-
DEVICE_EXTERNAL_C
410-
void __dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
411-
MakeGlobalType<__nativecpu_state> *s) {
414+
DEVICE_EXTERNAL_C USED void
415+
__dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
416+
MakeGlobalType<__nativecpu_state> *s) {
412417
s->MLocal_id[dim] = value;
413418
s->MGlobal_id[dim] = s->MWorkGroup_size[dim] * s->MWorkGroup_id[dim] +
414419
s->MLocal_id[dim] + s->MGlobalOffset[dim];

0 commit comments

Comments
 (0)