Skip to content

Commit 26da8ed

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (7 commits)
2 parents 2d2300d + 7b9490b commit 26da8ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+821
-209
lines changed

libclc/libspirv/lib/generic/integer/clz.cl

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,46 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <clc/clcmacro.h>
9+
#include <clc/integer/clc_clz.h>
1010
#include <libspirv/spirv.h>
1111

12-
_CLC_OVERLOAD _CLC_DEF char __spirv_ocl_clz(char x) {
13-
return __spirv_ocl_clz((ushort)(uchar)x) - 8;
14-
}
12+
#define FUNCTION __spirv_ocl_clz
13+
#define __CLC_FUNCTION(x) __clc_clz
14+
#define __CLC_BODY <clc/shared/unary_def.inc>
1515

16-
_CLC_OVERLOAD _CLC_DEF uchar __spirv_ocl_clz(uchar x) {
17-
return __spirv_ocl_clz((ushort)x) - 8;
18-
}
19-
20-
_CLC_OVERLOAD _CLC_DEF short __spirv_ocl_clz(short x) {
21-
return x ? __builtin_clzs(x) : 16;
22-
}
23-
24-
_CLC_OVERLOAD _CLC_DEF ushort __spirv_ocl_clz(ushort x) {
25-
return x ? __builtin_clzs(x) : 16;
26-
}
27-
28-
_CLC_OVERLOAD _CLC_DEF int __spirv_ocl_clz(int x) {
29-
return x ? __builtin_clz(x) : 32;
30-
}
31-
32-
_CLC_OVERLOAD _CLC_DEF uint __spirv_ocl_clz(uint x) {
33-
return x ? __builtin_clz(x) : 32;
34-
}
35-
36-
_CLC_OVERLOAD _CLC_DEF long __spirv_ocl_clz(long x) {
37-
return x ? __builtin_clzl(x) : 64;
38-
}
39-
40-
_CLC_OVERLOAD _CLC_DEF ulong __spirv_ocl_clz(ulong x) {
41-
return x ? __builtin_clzl(x) : 64;
42-
}
43-
44-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __spirv_ocl_clz, char)
45-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __spirv_ocl_clz, uchar)
46-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __spirv_ocl_clz, short)
47-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __spirv_ocl_clz, ushort)
48-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __spirv_ocl_clz, int)
49-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, __spirv_ocl_clz, uint)
50-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, __spirv_ocl_clz, long)
51-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, __spirv_ocl_clz, ulong)
16+
#include <clc/integer/gentype.inc>

libdevice/sanitizer/tsan_rtl.cpp

Lines changed: 101 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static const __SYCL_CONSTANT__ char __tsan_print_generic_to[] =
2222
"[kernel] %p(4) - %p(%d)\n";
2323

2424
static const __SYCL_CONSTANT__ char __tsan_print_raw_shadow[] =
25-
"[kernel] %p(%d) -> %p: {%x, %x, %x, %x}\n";
25+
"[kernel] %p(%d) -> %p: {%x, %x}\n";
2626

2727
static const __SYCL_CONSTANT__ char __tsan_print_shadow_value[] =
2828
"[kernel] %p(%d) : {size: %d, access: %x, sid: %d, clock: %d, is_write: "
@@ -90,26 +90,36 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow_PVC(uptr addr, uint32_t as) {
9090
ConvertGenericPointer(addr, as);
9191
}
9292

93-
if (as != ADDRESS_SPACE_GLOBAL)
94-
return nullptr;
95-
9693
addr = RoundDownTo(addr, kShadowCell);
9794

98-
if (addr & 0xff00'0000'0000'0000ULL) {
99-
// device usm
100-
return addr < TsanLaunchInfo->GlobalShadowOffset
101-
? reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
102-
addr + (TsanLaunchInfo->GlobalShadowOffset +
103-
0x200'0000'0000ULL - 0xff00'0000'0000'0000ULL))
104-
: reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
105-
addr - (0xff00'ffff'ffff'ffffULL -
106-
TsanLaunchInfo->GlobalShadowOffsetEnd + 1));
107-
} else {
108-
// host & shared usm
109-
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
110-
(addr & 0xffffffffffULL) + TsanLaunchInfo->GlobalShadowOffset +
111-
((addr & 0x800000000000ULL) >> 7));
95+
if (as == ADDRESS_SPACE_GLOBAL) {
96+
if (addr & 0xff00'0000'0000'0000ULL) {
97+
// device usm
98+
return addr < TsanLaunchInfo->GlobalShadowOffset
99+
? reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
100+
addr + (TsanLaunchInfo->GlobalShadowOffset +
101+
0x200'0000'0000ULL - 0xff00'0000'0000'0000ULL))
102+
: reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
103+
addr - (0xff00'ffff'ffff'ffffULL -
104+
TsanLaunchInfo->GlobalShadowOffsetEnd + 1));
105+
} else {
106+
// host & shared usm
107+
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
108+
(addr & 0xffffffffffULL) + TsanLaunchInfo->GlobalShadowOffset +
109+
((addr & 0x800000000000ULL) >> 7));
110+
}
111+
} else if (as == ADDRESS_SPACE_LOCAL) {
112+
const auto shadow_offset = TsanLaunchInfo->LocalShadowOffset;
113+
if (shadow_offset != 0) {
114+
// The size of SLM is 128KB on PVC
115+
constexpr unsigned SLM_SIZE = 128 * 1024;
116+
const size_t wid = WorkGroupLinearId();
117+
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
118+
shadow_offset + (wid * SLM_SIZE) + (addr & (SLM_SIZE - 1)));
119+
}
112120
}
121+
122+
return nullptr;
113123
}
114124

115125
inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
@@ -151,7 +161,7 @@ inline void StoreShadow(__SYCL_GLOBAL__ RawShadow *p, RawShadow s) {
151161
}
152162

153163
inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
154-
uptr addr, uint32_t size,
164+
uptr addr, uint32_t size, uint32_t as,
155165
const char __SYCL_CONSTANT__ *file, uint32_t line,
156166
const char __SYCL_CONSTANT__ *func) {
157167
// This prevents trapping on this address in future.
@@ -167,6 +177,11 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
167177
return;
168178
}
169179

180+
if (as == ADDRESS_SPACE_GENERIC &&
181+
TsanLaunchInfo->DeviceTy != DeviceType::CPU) {
182+
ConvertGenericPointer(addr, as);
183+
}
184+
170185
// Check if current address already being recorded before.
171186
for (uint32_t i = 0; i < TsanLaunchInfo->RecordedReportCount; i++) {
172187
auto &SanitizerReport = TsanLaunchInfo->Report[i];
@@ -180,7 +195,8 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
180195
TsanLaunchInfo->Report[TsanLaunchInfo->RecordedReportCount++];
181196

182197
SanitizerReport.Address = addr;
183-
SanitizerReport.Type = type;
198+
SanitizerReport.Type =
199+
type | (as == ADDRESS_SPACE_LOCAL ? kAccessLocal : 0);
184200
SanitizerReport.AccessSize = size;
185201

186202
int FileLength = 0;
@@ -224,7 +240,7 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
224240
}
225241

226242
inline bool CheckRace(__SYCL_GLOBAL__ RawShadow *s, Shadow cur, AccessType type,
227-
uptr addr, uint32_t size,
243+
uptr addr, uint32_t size, uint32_t as,
228244
const char __SYCL_CONSTANT__ *file, uint32_t line,
229245
const char __SYCL_CONSTANT__ *func) {
230246
bool stored = false;
@@ -258,7 +274,7 @@ inline bool CheckRace(__SYCL_GLOBAL__ RawShadow *s, Shadow cur, AccessType type,
258274
if (TsanLaunchInfo->Clock[cur.sid()].clk_[old.sid()] >= old.clock())
259275
continue;
260276

261-
DoReportRace(s, type, addr, size, file, line, func);
277+
DoReportRace(s, type, addr, size, as, file, line, func);
262278
return true;
263279
}
264280

@@ -301,17 +317,17 @@ inline bool ContainsSameAccess(__SYCL_GLOBAL__ RawShadow *s, Shadow cur,
301317
return; \
302318
Sid sid = GetCurrentSid(); \
303319
uint16_t current_clock = IncrementEpoch(sid) + 1; \
304-
TSAN_DEBUG(__spirv_ocl_printf( \
305-
__tsan_print_raw_shadow, (void *)addr, as, (void *)shadow_mem, \
306-
shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
320+
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)addr, as, \
321+
(void *)shadow_mem, shadow_mem[0], \
322+
shadow_mem[1])); \
307323
AccessType type = is_write ? kAccessWrite : kAccessRead; \
308324
Shadow cur(addr, size, current_clock, sid, type); \
309325
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_shadow_value, (void *)addr, as, \
310326
size, cur.access(), cur.sid(), cur.clock(), \
311327
is_write)); \
312328
if (ContainsSameAccess(shadow_mem, cur, type)) \
313329
return; \
314-
CheckRace(shadow_mem, cur, type, addr, size, file, line, func); \
330+
CheckRace(shadow_mem, cur, type, addr, size, as, file, line, func); \
315331
}
316332

317333
TSAN_CHECK(read, false, 1)
@@ -349,16 +365,16 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
349365
AccessType type = is_write ? kAccessWrite : kAccessRead; \
350366
uptr size1 = Min(size, RoundUpTo(addr + 1, kShadowCell) - addr); \
351367
{ \
352-
TSAN_DEBUG(__spirv_ocl_printf( \
353-
__tsan_print_raw_shadow, (void *)addr, as, (void *)shadow_mem, \
354-
shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
368+
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)addr, as, \
369+
(void *)shadow_mem, shadow_mem[0], \
370+
shadow_mem[1])); \
355371
Shadow cur(addr, size1, current_clock, sid, type); \
356372
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_shadow_value, (void *)addr, \
357373
as, size1, cur.access(), cur.sid(), \
358374
cur.clock(), is_write)); \
359375
if (ContainsSameAccess(shadow_mem, cur, type)) \
360376
goto SECOND; \
361-
if (CheckRace(shadow_mem, cur, type, addr, size1, file, line, func)) \
377+
if (CheckRace(shadow_mem, cur, type, addr, size1, as, file, line, func)) \
362378
return; \
363379
} \
364380
SECOND: \
@@ -367,17 +383,17 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
367383
return; \
368384
shadow_mem += kShadowCnt; \
369385
{ \
370-
TSAN_DEBUG( \
371-
__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)(addr + size1), \
372-
as, (void *)shadow_mem, shadow_mem[0], \
373-
shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
386+
TSAN_DEBUG(__spirv_ocl_printf( \
387+
__tsan_print_raw_shadow, (void *)(addr + size1), as, \
388+
(void *)shadow_mem, shadow_mem[0], shadow_mem[1])); \
374389
Shadow cur(0, size2, current_clock, sid, type); \
375390
TSAN_DEBUG(__spirv_ocl_printf( \
376391
__tsan_print_shadow_value, (void *)(addr + size1), as, size2, \
377392
cur.access(), cur.sid(), cur.clock(), is_write)); \
378393
if (ContainsSameAccess(shadow_mem, cur, type)) \
379394
return; \
380-
CheckRace(shadow_mem, cur, type, addr + size1, size2, file, line, func); \
395+
CheckRace(shadow_mem, cur, type, addr + size1, size2, as, file, line, \
396+
func); \
381397
} \
382398
}
383399

@@ -420,7 +436,7 @@ static inline void __tsan_cleanup_private_cpu_impl(uptr addr, uint32_t size) {
420436
}
421437
}
422438

423-
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
439+
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, size_t size) {
424440
#if defined(__LIBDEVICE_CPU__)
425441
__tsan_cleanup_private_cpu_impl(addr, size);
426442
#elif defined(__LIBDEVICE_PVC__)
@@ -433,6 +449,55 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
433449
#endif
434450
}
435451

452+
static __SYCL_CONSTANT__ const char __tsan_print_cleanup_local[] =
453+
"[kernel] cleanup shadow (%p ~ %p) for local %p\n";
454+
455+
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_static_local(uptr addr,
456+
size_t size) {
457+
// Update shadow memory of local memory only on first work-item
458+
if (__spirv_LocalInvocationId_x() + __spirv_LocalInvocationId_y() +
459+
__spirv_LocalInvocationId_z() ==
460+
0) {
461+
if (TsanLaunchInfo->LocalShadowOffset == 0)
462+
return;
463+
464+
addr = RoundDownTo(addr, kShadowCell);
465+
size = RoundUpTo(size, kShadowCell);
466+
467+
RawShadow *Begin = MemToShadow(addr, ADDRESS_SPACE_LOCAL);
468+
for (uptr i = 0; i < size / kShadowCell * kShadowCnt; i++)
469+
Begin[i] = 0;
470+
471+
TSAN_DEBUG(__spirv_ocl_printf(
472+
__tsan_print_cleanup_local, addr, Begin,
473+
(uptr)Begin + size / kShadowCell * kShadowCnt * kShadowSize - 1));
474+
}
475+
}
476+
477+
static __SYCL_CONSTANT__ const char __tsan_print_report_arg_count_incorrect[] =
478+
"[kernel] ERROR: The number of local args is incorrect, expect %d, actual "
479+
"%d\n";
480+
481+
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_dynamic_local(uptr ptr,
482+
uint32_t num_args) {
483+
if (!TsanLaunchInfo->LocalShadowOffset)
484+
return;
485+
486+
if (num_args != TsanLaunchInfo->NumLocalArgs) {
487+
__spirv_ocl_printf(__tsan_print_report_arg_count_incorrect, num_args,
488+
TsanLaunchInfo->NumLocalArgs);
489+
return;
490+
}
491+
492+
uptr *args = (uptr *)ptr;
493+
494+
for (uint32_t i = 0; i < num_args; ++i) {
495+
auto *local_arg = &TsanLaunchInfo->LocalArgs[i];
496+
497+
__tsan_cleanup_static_local(args[i], local_arg->Size);
498+
}
499+
}
500+
436501
DEVICE_EXTERN_C_INLINE void __tsan_device_barrier() {
437502
Sid sid = GetCurrentSid();
438503

llvm/include/llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H
1313
#define LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H
1414

15+
#include "llvm/IR/Constants.h"
1516
#include "llvm/IR/DerivedTypes.h"
1617
#include "llvm/IR/Type.h"
1718
#include "llvm/IR/Value.h"
@@ -24,8 +25,17 @@ constexpr unsigned kSpirOffloadConstantAS = 2;
2425
constexpr unsigned kSpirOffloadLocalAS = 3;
2526
constexpr unsigned kSpirOffloadGenericAS = 4;
2627

28+
// If the type is or has target extension type just return the type, otherwise
29+
// return nullptr.
2730
TargetExtType *getTargetExtType(Type *Ty);
31+
32+
// Check if it's a joint matrix access operation.
2833
bool isJointMatrixAccess(Value *V);
34+
35+
// If the User is an instruction of constant expr, try to get the functions that
36+
// it has been used.
37+
void getFunctionsOfUser(User *User, SmallVectorImpl<Function *> &Functions);
38+
2939
} // namespace llvm
3040

3141
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2968,15 +2968,6 @@ void ModuleAddressSanitizer::instrumentDeviceGlobal(IRBuilder<> &IRB) {
29682968
G->eraseFromParent();
29692969
}
29702970

2971-
static void getFunctionsOfUser(User *User, DenseSet<Function *> &Functions) {
2972-
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
2973-
Functions.insert(Inst->getFunction());
2974-
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
2975-
for (auto *U : CE->users())
2976-
getFunctionsOfUser(U, Functions);
2977-
}
2978-
}
2979-
29802971
void ModuleAddressSanitizer::initializeRetVecMap(Function *F) {
29812972
if (KernelToRetVecMap.find(F) != KernelToRetVecMap.end())
29822973
return;
@@ -3109,19 +3100,23 @@ void ModuleAddressSanitizer::instrumentSyclStaticLocalMemory(IRBuilder<> &IRB) {
31093100
// We only instrument on spir_kernel, because local variables are
31103101
// kind of global variable
31113102
for (auto *G : LocalGlobals) {
3112-
DenseSet<Function *> InstrumentedFunc;
3103+
SmallVector<Function *> WorkList;
3104+
DenseSet<Function *> InstrumentedKernel;
31133105
for (auto *User : G->users())
3114-
getFunctionsOfUser(User, InstrumentedFunc);
3115-
for (Function *F : InstrumentedFunc) {
3106+
getFunctionsOfUser(User, WorkList);
3107+
while (!WorkList.empty()) {
3108+
Function *F = WorkList.pop_back_val();
31163109
if (F->getCallingConv() == CallingConv::SPIR_KERNEL) {
3117-
Instrument(G, F);
3110+
if (!InstrumentedKernel.contains(F)) {
3111+
Instrument(G, F);
3112+
InstrumentedKernel.insert(F);
3113+
}
31183114
continue;
31193115
}
31203116
// Get root spir_kernel of spir_func
31213117
initializeKernelCallerMap(F);
3122-
for (Function *Kernel : FuncToKernelCallerMap[F])
3123-
if (!InstrumentedFunc.contains(Kernel))
3124-
Instrument(G, Kernel);
3118+
for (auto *F : FuncToKernelCallerMap[F])
3119+
WorkList.push_back(F);
31253120
}
31263121
}
31273122
}

0 commit comments

Comments
 (0)