Skip to content

Commit bd2d590

Browse files
committed
Merge commit 'a485cbf0d953' into llvmspirv_pulldown
2 parents 70cc5ea + a485cbf commit bd2d590

File tree

12 files changed

+131
-129
lines changed

12 files changed

+131
-129
lines changed

devops/dependencies-igc-dev.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"linux": {
33
"igc_dev": {
4-
"github_tag": "igc-dev-81b4de2",
5-
"version": "81b4de2",
6-
"updated_at": "2025-03-29T17:38:03Z",
7-
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2844658814/zip",
4+
"github_tag": "igc-dev-b74b7ab",
5+
"version": "b74b7ab",
6+
"updated_at": "2025-04-02T18:41:33Z",
7+
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2869865189/zip",
88
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
99
}
1010
}

devops/scripts/benchmarks/utils/compute_runtime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def build_igc(self, repo, commit):
9797
options.workdir,
9898
"vc-intrinsics",
9999
"https://github.com/intel/vc-intrinsics",
100-
"facb2076a2ce6cd6527c1e16570ba0fbaa2f1dba",
100+
"b980474c99859f7e4eb157828c5e80202b062177",
101101
)
102102
self.llvm_project = git_clone(
103103
options.workdir,

libdevice/nativecpu_utils.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,10 @@ DefShuffleINTEL_All(_Float16, f16, _Float16);
318318

319319
Define2ArgForward(uint64_t, __spirv_ocl_u_min, std::min);
320320

321+
#define GET_PROPS __attribute__((pure))
321322
#define GEN_u32(bname, muxname) \
322-
DEVICE_EXTERN_C uint32_t muxname(); \
323-
DEVICE_EXTERNAL uint32_t bname() { return muxname(); } \
323+
DEVICE_EXTERN_C GET_PROPS uint32_t muxname(); \
324+
DEVICE_EXTERNAL GET_PROPS uint32_t bname() { return muxname(); } \
324325
static_assert(true)
325326
// subgroup
326327
GEN_u32(__spirv_SubgroupLocalInvocationId, __mux_get_sub_group_local_id);
@@ -331,8 +332,8 @@ GEN_u32(__spirv_SubgroupSize, __mux_get_sub_group_size);
331332

332333
// I64_I32
333334
#define GEN_p(bname, muxname, arg) \
334-
DEVICE_EXTERN_C uint64_t muxname(uint32_t); \
335-
DEVICE_EXTERNAL uint64_t bname() { return muxname(arg); } \
335+
DEVICE_EXTERN_C GET_PROPS uint64_t muxname(uint32_t); \
336+
DEVICE_EXTERNAL GET_PROPS uint64_t bname() { return muxname(arg); } \
336337
static_assert(true)
337338

338339
#define GEN_xyz(bname, ncpu_name) \
@@ -365,8 +366,8 @@ DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t);
365366
DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t);
366367

367368
#define DefineStateGetWithType(name, field, type) \
368-
DEVICE_EXTERNAL_C type __dpcpp_nativecpu_##name( \
369-
MakeGlobalType<__nativecpu_state> *s) { \
369+
DEVICE_EXTERNAL_C GET_PROPS type __dpcpp_nativecpu_##name( \
370+
MakeGlobalType<const __nativecpu_state> *s) { \
370371
return s->field; \
371372
} \
372373
static_assert(true)
@@ -381,8 +382,8 @@ DefineStateGet_U32(get_max_sub_group_size, SubGroup_size);
381382
DefineStateGet_U32(get_num_sub_groups, NumSubGroups);
382383

383384
#define DefineStateGetWithType2(name, field, rtype, ptype) \
384-
DEVICE_EXTERNAL_C rtype __dpcpp_nativecpu_##name( \
385-
ptype dim, MakeGlobalType<__nativecpu_state> *s) { \
385+
DEVICE_EXTERNAL_C GET_PROPS rtype __dpcpp_nativecpu_##name( \
386+
ptype dim, MakeGlobalType<const __nativecpu_state> *s) { \
386387
return s->field[dim]; \
387388
} \
388389
static_assert(true)

llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -301,29 +301,20 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
301301

302302
llvm::Constant *CurrentStatePointerTLS = nullptr;
303303

304-
// check if any of the kernels is called by some other function.
305-
// This can happen e.g. with OCK, where wrapper functions are
306-
// created around the original kernel.
307-
bool KernelIsCalled = false;
308-
for (auto &K : OldKernels) {
309-
for (auto &U : K->uses()) {
310-
if (isa<CallBase>(U.getUser())) {
311-
KernelIsCalled = true;
312-
}
313-
}
314-
}
304+
// Contains the used builtins and kernels that need to be processed to
305+
// receive a pointer to the state struct.
306+
llvm::SmallVector<std::pair<llvm::Function *, StringRef>>
307+
UsedBuiltinsAndKernels;
315308

316309
// Then we iterate over all the supported builtins, find the used ones
317-
llvm::SmallVector<std::pair<llvm::Function *, StringRef>> UsedBuiltins;
318310
for (const auto &Entry : BuiltinNamesMap) {
319311
auto *Glob = M.getFunction(Entry.first);
320312
if (!Glob)
321313
continue;
322314
if (CurrentStatePointerTLS == nullptr) {
323315
for (const auto &Use : Glob->uses()) {
324316
auto *I = cast<CallBase>(Use.getUser());
325-
if (KernelIsCalled ||
326-
IsNonKernelCalledByNativeCPUKernel(I->getFunction())) {
317+
if (IsNonKernelCalledByNativeCPUKernel(I->getFunction())) {
327318
// only use the threadlocal if we have kernels calling builtins
328319
// indirectly, or if the kernel is called by some other func.
329320
CurrentStatePointerTLS = M.getOrInsertGlobal(
@@ -344,7 +335,7 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
344335
}
345336
}
346337
}
347-
UsedBuiltins.push_back({Glob, Entry.second});
338+
UsedBuiltinsAndKernels.push_back({Glob, Entry.second});
348339
}
349340

350341
#ifdef NATIVECPU_USE_OCK
@@ -407,6 +398,11 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
407398
OldF->replaceAllUsesWith(NewF);
408399
OldF->eraseFromParent();
409400
NewKernels.push_back(NewF);
401+
if (!CurrentStatePointerTLS && NewF->getNumUses() > 0)
402+
// If a thread_local is not used we need to keep track of the called
403+
// kernel so we can update its call sites with the pointer to the state
404+
// struct like we do for the called builtins.
405+
UsedBuiltinsAndKernels.push_back({NewF, ""});
410406
ModuleChanged = true;
411407
}
412408

@@ -419,13 +415,25 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
419415

420416
// Then we iterate over all used builtins and
421417
// replace them with calls to our Native CPU functions.
422-
for (const auto &Entry : UsedBuiltins) {
418+
// For the used kernels we need to replace calls to them
419+
// with calls receiving the state pointer argument.
420+
for (const auto &Entry : UsedBuiltinsAndKernels) {
423421
SmallVector<std::pair<Instruction *, Instruction *>> ToRemove;
424422
SmallVector<Function *> ToRemove2;
425423
Function *const Glob = Entry.first;
426424
Function *ReplaceFunc = nullptr;
427425
for (const auto &Use : Glob->uses()) {
428426
auto I = cast<CallBase>(Use.getUser());
427+
if (Entry.second == "") {
428+
if (const Function *CF = I->getCalledFunction()) {
429+
unsigned numParams = CF->getFunctionType()->getNumParams();
430+
auto numArgs = I->arg_size();
431+
if (numArgs == numParams)
432+
continue;
433+
assert(numArgs + 1 == numParams);
434+
}
435+
ReplaceFunc = Entry.first;
436+
}
429437
Function *const C = I->getFunction();
430438
if (IsUnusedBuiltinOrPrivateDef(*C)) {
431439
ToRemove2.push_back(C);
@@ -464,6 +472,10 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
464472
for (auto Temp : ToRemove2)
465473
Temp->eraseFromParent();
466474

475+
// Don't erase if it's not a builtin
476+
if (Entry.second == "")
477+
continue;
478+
467479
// Finally, we erase the builtin from the module
468480
Glob->eraseFromParent();
469481
}

sycl/source/detail/queue_impl.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -870,10 +870,7 @@ class queue_impl {
870870
event finalizeHandlerPostProcess(
871871
HandlerType &Handler,
872872
const optional<SubmitPostProcessF> &PostProcessorFunc) {
873-
auto HandlerImpl = detail::getSyclObjImpl(Handler);
874-
const CGType Type = HandlerImpl->MCGType;
875-
876-
bool IsKernel = Type == CGType::Kernel;
873+
bool IsKernel = Handler.getType() == CGType::Kernel;
877874
bool KernelUsesAssert = false;
878875

879876
if (IsKernel)
Lines changed: 75 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
// REQUIRES: aspect-ext_intel_legacy_image
2-
// RUN: %{build} -o %t.out
2+
3+
// %O0 added because of GSD-10960. Without it, IGC will fail with
4+
// an access violation error.
5+
// RUN: %{build} %O0 -o %t.out
36
// RUN: %{run} %t.out
47

58
// UNSUPPORTED: cuda
@@ -13,100 +16,88 @@
1316
#include <sycl/detail/core.hpp>
1417
using namespace sycl;
1518

16-
void init(uint32_t *A, uint32_t *B, size_t NumI32Elts) {
17-
for (int I = 0; I < NumI32Elts; I++) {
18-
A[I] = I;
19-
B[I] = 0;
20-
}
21-
}
19+
template <int Dimensions> class CopyKernel;
2220

23-
int check(uint32_t *B, size_t NumI32Elts) {
24-
for (int I = 0; I < NumI32Elts; I++) {
25-
if (B[I] != I) {
26-
std::cout << "Failed" << std::endl;
27-
std::cerr << "Error for the index: " << I << ", computed: " << B[I]
28-
<< std::endl;
29-
return 1;
30-
}
31-
}
32-
std::cout << "Passed" << std::endl;
33-
return 0;
34-
}
21+
template <int Dimensions>
22+
bool testND(queue &Q, size_t XSize, size_t YSize, size_t ZSize = 1) {
3523

36-
int test2D(queue &Q, size_t XSize, size_t YSize) {
37-
std::cout << "Starting the test with size = {" << XSize << ", " << YSize
38-
<< "} ... ";
39-
size_t NumI32Elts = XSize * YSize * 4;
40-
uint32_t *A = (uint32_t *)malloc(NumI32Elts * sizeof(uint32_t));
41-
uint32_t *B = (uint32_t *)malloc(NumI32Elts * sizeof(uint32_t));
42-
init(A, B, NumI32Elts);
24+
static_assert(Dimensions == 2 || Dimensions == 3,
25+
"Only 2D and 3D images are supported.");
4326

44-
try {
45-
image<2> ImgA(A, image_channel_order::rgba,
46-
image_channel_type::unsigned_int32, range<2>{XSize, YSize});
47-
image<2> ImgB(B, image_channel_order::rgba,
48-
image_channel_type::unsigned_int32, range<2>{XSize, YSize});
27+
if constexpr (Dimensions == 2)
28+
std::cout << "Starting the test with size = {" << XSize << ", " << YSize
29+
<< "} ... ";
30+
else
31+
std::cout << "Starting the test with size = {" << XSize << ", " << YSize
32+
<< ", " << ZSize << "} ... ";
4933

50-
Q.submit([&](handler &CGH) {
51-
auto AAcc = ImgA.get_access<uint4, access::mode::read>(CGH);
52-
auto BAcc = ImgB.get_access<uint4, access::mode::write>(CGH);
53-
CGH.parallel_for<class I2D>(range<2>{XSize, YSize}, [=](id<2> Id) {
54-
sycl::int2 Coord(Id[0], Id[1]);
55-
BAcc.write(Coord, AAcc.read(Coord));
56-
});
57-
}).wait();
58-
} catch (exception const &e) {
59-
std::cout << "Failed" << std::endl;
60-
std::cerr << "SYCL Exception caught: " << e.what();
61-
return 1;
62-
}
34+
const size_t NumI32Elts = XSize * YSize * ZSize * 4;
35+
range<Dimensions> ImgRange;
36+
if constexpr (Dimensions == 2)
37+
ImgRange = range<Dimensions>{XSize, YSize};
38+
else
39+
ImgRange = range<Dimensions>{XSize, YSize, ZSize};
6340

64-
int NumErrors = check(B, NumI32Elts);
65-
free(A);
66-
free(B);
67-
return NumErrors;
68-
}
41+
// Allocate input buffer and initialize it with some values.
42+
uint32_t *Input = (uint32_t *)malloc(NumI32Elts * sizeof(uint32_t));
43+
for (int i = 0; i < NumI32Elts; i++)
44+
Input[i] = i;
6945

70-
int test3D(queue &Q, size_t XSize, size_t YSize, size_t ZSize) {
71-
std::cout << "Starting the test with size = {" << XSize << ", " << YSize
72-
<< ", " << ZSize << "} ... ";
73-
size_t NumI32Elts = XSize * YSize * ZSize * 4;
74-
uint32_t *A = (uint32_t *)malloc(NumI32Elts * sizeof(uint32_t));
75-
uint32_t *B = (uint32_t *)malloc(NumI32Elts * sizeof(uint32_t));
76-
init(A, B, NumI32Elts);
46+
// calloc to ensure that the output buffer is initialized to zero.
47+
uint32_t *Output = (uint32_t *)calloc(NumI32Elts, sizeof(uint32_t));
7748

49+
// Create the image and submit the copy kernel.
7850
try {
79-
image<3> ImgA(A, image_channel_order::rgba,
80-
image_channel_type::unsigned_int32,
81-
range<3>{XSize, YSize, ZSize});
82-
image<3> ImgB(B, image_channel_order::rgba,
83-
image_channel_type::unsigned_int32,
84-
range<3>{XSize, YSize, ZSize});
51+
image<Dimensions> ImgA(Input, image_channel_order::rgba,
52+
image_channel_type::unsigned_int32, ImgRange);
53+
image<Dimensions> ImgB(Output, image_channel_order::rgba,
54+
image_channel_type::unsigned_int32, ImgRange);
8555

8656
Q.submit([&](handler &CGH) {
87-
auto AAcc = ImgA.get_access<uint4, access::mode::read>(CGH);
88-
auto BAcc = ImgB.get_access<uint4, access::mode::write>(CGH);
89-
CGH.parallel_for<class I3D>(range<3>{XSize, YSize, ZSize},
90-
[=](id<3> Id) {
91-
sycl::int4 Coord(Id[0], Id[1], Id[2], 0);
92-
BAcc.write(Coord, AAcc.read(Coord));
93-
});
57+
auto AAcc = ImgA.template get_access<uint4, access::mode::read>(CGH);
58+
auto BAcc = ImgB.template get_access<uint4, access::mode::write>(CGH);
59+
CGH.parallel_for<CopyKernel<Dimensions>>(
60+
ImgRange, [=](id<Dimensions> Id) {
61+
// Use int2 for 2D and int4 for 3D images.
62+
if constexpr (Dimensions == 3) {
63+
sycl::int4 Coord(Id[0], Id[1], Id[2], 0);
64+
BAcc.write(Coord, AAcc.read(Coord));
65+
} else {
66+
sycl::int2 Coord(Id[0], Id[1]);
67+
BAcc.write(Coord, AAcc.read(Coord));
68+
}
69+
});
9470
}).wait();
9571
} catch (exception const &e) {
72+
9673
std::cout << "Failed" << std::endl;
9774
std::cerr << "SYCL Exception caught: " << e.what();
75+
free(Input);
76+
free(Output);
9877
return 1;
9978
}
10079

101-
int NumErrors = check(B, NumI32Elts);
102-
free(A);
103-
free(B);
104-
return NumErrors;
80+
// Check the output buffer.
81+
bool HasError = false;
82+
for (int i = 0; i < NumI32Elts; i++) {
83+
if (Output[i] != i) {
84+
HasError = true;
85+
break;
86+
}
87+
}
88+
89+
if (!HasError) {
90+
std::cout << "Passed" << std::endl;
91+
} else {
92+
std::cout << "Failed" << std::endl;
93+
}
94+
95+
free(Input);
96+
free(Output);
97+
return HasError;
10598
}
10699

107100
int main() {
108-
int NumErrors = 0;
109-
110101
queue Q;
111102
device Dev = Q.get_device();
112103
std::cout << "Running on " << Dev.get_info<info::device::name>()
@@ -127,17 +118,18 @@ int main() {
127118

128119
// Using max sizes in one image may require too much memory.
129120
// Check them one by one.
130-
NumErrors += test2D(Q, MaxWidth2D, 2);
131-
NumErrors += test2D(Q, 2, MaxHeight2D);
121+
bool HasError = false;
122+
HasError |= testND<2>(Q, MaxWidth2D, 2);
123+
HasError |= testND<2>(Q, 2, MaxHeight2D);
132124

133-
NumErrors += test3D(Q, MaxWidth3D, 2, 3);
134-
NumErrors += test3D(Q, 2, MaxHeight3D, 3);
135-
NumErrors += test3D(Q, 2, 3, MaxDepth3D);
125+
HasError |= testND<3>(Q, MaxWidth3D, 2, 3);
126+
HasError |= testND<3>(Q, 2, MaxHeight3D, 3);
127+
HasError |= testND<3>(Q, 2, 3, MaxDepth3D);
136128

137-
if (NumErrors)
138-
std::cerr << "Test failed." << std::endl;
129+
if (HasError)
130+
std::cout << "Test failed." << std::endl;
139131
else
140132
std::cout << "Test passed." << std::endl;
141133

142-
return NumErrors;
134+
return HasError ? 1 : 0;
143135
}

sycl/test-e2e/Graph/Profiling/event_profiling_info.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
// Extra run to check for immediate-command-list in Level Zero
77
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
88

9+
// UNSUPPORTED: level_zero_v2_adapter
10+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17760
11+
912
// This test checks the profiling of an event returned
1013
// from graph submission with event::get_profiling_info().
1114
// It first tests a graph made exclusively of memory operations,

sycl/test-e2e/Graph/Profiling/event_profiling_info_usm.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
// Extra run to check for immediate-command-list in Level Zero
77
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
88

9+
// UNSUPPORTED: level_zero_v2_adapter
10+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17760
11+
912
// This test checks the profiling of an event returned
1013
// from graph submission with event::get_profiling_info().
1114
// It first tests a graph made exclusively of memory operations,
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
config.required_features += ['aspect-ext_oneapi_graph']
2+
# V2 does not have support for MCL yet
3+
config.unsupported_features += ['level_zero_v2_adapter']

0 commit comments

Comments
 (0)