Skip to content

Commit 04e2883

Browse files
authored
[SYCL] Cover offload-compress for bfloat16 devicelib tests (#18192)
Signed-off-by: jinge90 <ge.jin@intel.com>
1 parent fffc5db commit 04e2883

File tree

3 files changed

+131
-111
lines changed

3 files changed

+131
-111
lines changed

sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp

Lines changed: 1 addition & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -24,114 +24,4 @@
2424
// UNSUPPORTED: target-nvidia || target-amd
2525
// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia.
2626

27-
#include <sycl/detail/core.hpp>
28-
#include <sycl/ext/oneapi/bfloat16.hpp>
29-
#include <sycl/kernel_bundle.hpp>
30-
31-
#include <dlfcn.h>
32-
#include <iostream>
33-
34-
using namespace sycl;
35-
36-
constexpr access::mode sycl_read = access::mode::read;
37-
constexpr access::mode sycl_write = access::mode::write;
38-
39-
using BFP = sycl::ext::oneapi::bfloat16;
40-
41-
#ifdef BUILD_LIB
42-
class FOO_KERN;
43-
void foo() {
44-
queue deviceQueue;
45-
BFP bf16_v;
46-
float fp32_v = 16.5f;
47-
{
48-
buffer<float, 1> fp32_buffer{&fp32_v, 1};
49-
buffer<BFP, 1> bf16_buffer{&bf16_v, 1};
50-
deviceQueue
51-
.submit([&](handler &cgh) {
52-
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
53-
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
54-
cgh.single_task<FOO_KERN>([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; });
55-
})
56-
.wait();
57-
}
58-
std::cout << "In foo: " << bf16_v << std::endl;
59-
}
60-
#else
61-
62-
class MAINRUN;
63-
void main_run(queue &deviceQueue) {
64-
BFP bf16_v;
65-
float fp32_v = 16.5f;
66-
{
67-
buffer<float, 1> fp32_buffer{&fp32_v, 1};
68-
buffer<BFP, 1> bf16_buffer{&bf16_v, 1};
69-
deviceQueue
70-
.submit([&](handler &cgh) {
71-
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
72-
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
73-
cgh.single_task<class MAINRUN>(
74-
[=]() { bf16_acc[0] = BFP{fp32_acc[0] + 0.5f}; });
75-
})
76-
.wait();
77-
}
78-
std::cout << "In run: " << bf16_v << std::endl;
79-
}
80-
81-
#define STRINGIFY_HELPER(A) #A
82-
#define STRINGIFY(A) STRINGIFY_HELPER(A)
83-
#define SO_FNAME "lib" STRINGIFY(FNAME) ".so"
84-
85-
int main() {
86-
BFP bf16_array[3];
87-
float fp32_array[3] = {7.0f, 8.5f, 0.5f};
88-
queue deviceQueue;
89-
std::vector<sycl::kernel_id> all_kernel_ids;
90-
bool dynlib_kernel_available = false;
91-
bool dynlib_kernel_unavailable = true;
92-
main_run(deviceQueue);
93-
94-
void *handle = dlopen(SO_FNAME, RTLD_LAZY);
95-
void (*func)();
96-
*(void **)(&func) = dlsym(handle, "_Z3foov");
97-
func();
98-
all_kernel_ids = sycl::get_kernel_ids();
99-
for (auto k : all_kernel_ids) {
100-
if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN"))
101-
dynlib_kernel_available = true;
102-
}
103-
104-
// Before dlclose, the FOO_KERN from sycl dynamic library must exist.
105-
assert(dynlib_kernel_available);
106-
107-
dlclose(handle);
108-
109-
all_kernel_ids = sycl::get_kernel_ids();
110-
for (auto k : all_kernel_ids) {
111-
if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN"))
112-
dynlib_kernel_unavailable = false;
113-
}
114-
115-
assert(dynlib_kernel_unavailable);
116-
117-
{
118-
buffer<float, 1> fp32_buffer{fp32_array, 3};
119-
buffer<BFP, 1> bf16_buffer{bf16_array, 3};
120-
deviceQueue
121-
.submit([&](handler &cgh) {
122-
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
123-
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
124-
cgh.single_task([=]() {
125-
bf16_acc[0] = BFP{fp32_acc[0]};
126-
bf16_acc[1] = BFP{fp32_acc[1]};
127-
bf16_acc[2] = BFP{fp32_acc[2]};
128-
});
129-
})
130-
.wait();
131-
}
132-
std::cout << "In main: " << bf16_array[0] << " " << bf16_array[1] << " "
133-
<< bf16_array[2] << std::endl;
134-
135-
return 0;
136-
}
137-
#endif
27+
#include "bfloat16_conversion_dlopen_test.hpp"
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#include <sycl/detail/core.hpp>
2+
#include <sycl/ext/oneapi/bfloat16.hpp>
3+
#include <sycl/kernel_bundle.hpp>
4+
5+
#include <dlfcn.h>
6+
#include <iostream>
7+
8+
using namespace sycl;
9+
10+
constexpr access::mode sycl_read = access::mode::read;
11+
constexpr access::mode sycl_write = access::mode::write;
12+
13+
using BFP = sycl::ext::oneapi::bfloat16;
14+
15+
#ifdef BUILD_LIB
16+
class FOO_KERN;
17+
void foo() {
18+
queue deviceQueue;
19+
BFP bf16_v;
20+
float fp32_v = 16.5f;
21+
{
22+
buffer<float, 1> fp32_buffer{&fp32_v, 1};
23+
buffer<BFP, 1> bf16_buffer{&bf16_v, 1};
24+
deviceQueue
25+
.submit([&](handler &cgh) {
26+
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
27+
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
28+
cgh.single_task<FOO_KERN>([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; });
29+
})
30+
.wait();
31+
}
32+
std::cout << "In foo: " << bf16_v << std::endl;
33+
}
34+
#else
35+
36+
class MAINRUN;
37+
void main_run(queue &deviceQueue) {
38+
BFP bf16_v;
39+
float fp32_v = 16.5f;
40+
{
41+
buffer<float, 1> fp32_buffer{&fp32_v, 1};
42+
buffer<BFP, 1> bf16_buffer{&bf16_v, 1};
43+
deviceQueue
44+
.submit([&](handler &cgh) {
45+
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
46+
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
47+
cgh.single_task<class MAINRUN>(
48+
[=]() { bf16_acc[0] = BFP{fp32_acc[0] + 0.5f}; });
49+
})
50+
.wait();
51+
}
52+
std::cout << "In run: " << bf16_v << std::endl;
53+
}
54+
55+
#define STRINGIFY_HELPER(A) #A
56+
#define STRINGIFY(A) STRINGIFY_HELPER(A)
57+
#define SO_FNAME "lib" STRINGIFY(FNAME) ".so"
58+
59+
int main() {
60+
BFP bf16_array[3];
61+
float fp32_array[3] = {7.0f, 8.5f, 0.5f};
62+
queue deviceQueue;
63+
std::vector<sycl::kernel_id> all_kernel_ids;
64+
bool dynlib_kernel_available = false;
65+
bool dynlib_kernel_unavailable = true;
66+
main_run(deviceQueue);
67+
68+
void *handle = dlopen(SO_FNAME, RTLD_LAZY);
69+
void (*func)();
70+
*(void **)(&func) = dlsym(handle, "_Z3foov");
71+
func();
72+
all_kernel_ids = sycl::get_kernel_ids();
73+
for (auto k : all_kernel_ids) {
74+
if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN"))
75+
dynlib_kernel_available = true;
76+
}
77+
78+
// Before dlclose, the FOO_KERN from sycl dynamic library must exist.
79+
assert(dynlib_kernel_available);
80+
81+
dlclose(handle);
82+
83+
all_kernel_ids = sycl::get_kernel_ids();
84+
for (auto k : all_kernel_ids) {
85+
if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN"))
86+
dynlib_kernel_unavailable = false;
87+
}
88+
89+
assert(dynlib_kernel_unavailable);
90+
91+
{
92+
buffer<float, 1> fp32_buffer{fp32_array, 3};
93+
buffer<BFP, 1> bf16_buffer{bf16_array, 3};
94+
deviceQueue
95+
.submit([&](handler &cgh) {
96+
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
97+
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
98+
cgh.single_task([=]() {
99+
bf16_acc[0] = BFP{fp32_acc[0]};
100+
bf16_acc[1] = BFP{fp32_acc[1]};
101+
bf16_acc[2] = BFP{fp32_acc[2]};
102+
});
103+
})
104+
.wait();
105+
}
106+
std::cout << "In main: " << bf16_array[0] << " " << bf16_array[1] << " "
107+
<< bf16_array[2] << std::endl;
108+
109+
return 0;
110+
}
111+
#endif
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//==---------- bf1oat16 devicelib dlopen_test_compress for SYCL JIT --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// Check bfloat16 devicelib device image compression.
10+
11+
// REQUIRES: linux, zstd
12+
// RUN: %{build} --offload-compress -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t_compress.so
13+
// RUN: %{build} --offload-compress -DFNAME=%basename_t_compress -ldl -o %t1.out -Wl,-rpath=%T
14+
// RUN: %{run} %t1.out
15+
16+
// UNSUPPORTED: target-nvidia || target-amd
17+
// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia.
18+
19+
#include "bfloat16_conversion_dlopen_test.hpp"

0 commit comments

Comments
 (0)