|
| 1 | +/*************************************************************************** |
| 2 | + * |
| 3 | + * Copyright (C) Codeplay Software Ltd. |
| 4 | + * |
| 5 | + * Part of the LLVM Project, under the Apache License v2.0 with LLVM |
| 6 | + * Exceptions. See https://llvm.org/LICENSE.txt for license information. |
| 7 | + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 8 | + * |
| 9 | + * Unless required by applicable law or agreed to in writing, software |
| 10 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | + * See the License for the specific language governing permissions and |
| 13 | + * limitations under the License. |
| 14 | + * |
| 15 | + * SYCLcompat API |
| 16 | + * |
| 17 | + * launch_inlining.cpp |
| 18 | + * |
| 19 | + * Description: |
| 20 | + * Ensure kernels are inlined |
| 21 | + **************************************************************************/ |
| 22 | +// RUN: %clangxx -fsycl -fgpu-inline-threshold=0 %if cl_options %{/clang:-S /clang:-emit-llvm%} %else %{-S -emit-llvm%} %s -o - | FileCheck %s |
| 23 | +// We set -fgpu-inline-threshold=0 to disable heuristic inlining for the |
| 24 | +// purposes of the test |
| 25 | +#include <sycl/detail/core.hpp> |
| 26 | +#include <sycl/group_barrier.hpp> |
| 27 | +#include <syclcompat/launch.hpp> |
| 28 | +#include <syclcompat/memory.hpp> |
| 29 | + |
| 30 | +namespace compat_exp = syclcompat::experimental; |
| 31 | +namespace sycl_exp = sycl::ext::oneapi::experimental; |
| 32 | +namespace sycl_intel_exp = sycl::ext::intel::experimental; |
| 33 | + |
| 34 | +static constexpr int LOCAL_MEM_SIZE = 1024; |
| 35 | + |
| 36 | +// CHECK: define {{.*}}spir_kernel{{.*}}write_mem_kernel{{.*}} { |
| 37 | +// CHECK-NOT: call {{.*}}write_mem_kernel |
| 38 | +// CHECK: } |
| 39 | + |
| 40 | +template <typename T> void write_mem_kernel(T *data, int num_elements) { |
| 41 | + const int id = |
| 42 | + sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_id(0); |
| 43 | + if (id < num_elements) { |
| 44 | + data[id] = static_cast<T>(id); |
| 45 | + } |
| 46 | +}; |
| 47 | + |
| 48 | +// CHECK: define {{.*}}spir_kernel{{.*}}dynamic_local_mem_typed_kernel{{.*}} { |
| 49 | +// CHECK-NOT: call {{.*}}dynamic_local_mem_typed_kernel |
| 50 | +// CHECK: } |
| 51 | +template <typename T> |
| 52 | +void dynamic_local_mem_typed_kernel(T *data, char *local_mem) { |
| 53 | + constexpr size_t num_elements = LOCAL_MEM_SIZE / sizeof(T); |
| 54 | + T *typed_local_mem = reinterpret_cast<T *>(local_mem); |
| 55 | + |
| 56 | + const int id = |
| 57 | + sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_id(0); |
| 58 | + if (id < num_elements) { |
| 59 | + typed_local_mem[id] = static_cast<T>(id); |
| 60 | + } |
| 61 | + sycl::group_barrier(sycl::ext::oneapi::this_work_item::get_work_group<1>()); |
| 62 | + if (id < num_elements) { |
| 63 | + data[id] = typed_local_mem[num_elements - id - 1]; |
| 64 | + } |
| 65 | +}; |
| 66 | + |
| 67 | +int test_write_mem() { |
| 68 | + compat_exp::launch_policy my_dim3_config(syclcompat::dim3{32}, |
| 69 | + syclcompat::dim3{32}); |
| 70 | + |
| 71 | + const int memsize = 1024; |
| 72 | + int *d_a = (int *)syclcompat::malloc(memsize); |
| 73 | + compat_exp::launch<write_mem_kernel<int>>(my_dim3_config, d_a, |
| 74 | + memsize / sizeof(int)) |
| 75 | + .wait(); |
| 76 | + |
| 77 | + syclcompat::free(d_a); |
| 78 | + return 0; |
| 79 | +} |
| 80 | + |
| 81 | +int test_lmem_launch() { |
| 82 | + int local_mem_size = LOCAL_MEM_SIZE; |
| 83 | + |
| 84 | + size_t num_elements = local_mem_size / sizeof(int); |
| 85 | + int *d_a = (int *)syclcompat::malloc(local_mem_size); |
| 86 | + |
| 87 | + compat_exp::launch_policy my_config( |
| 88 | + sycl::nd_range<1>{{256}, {256}}, |
| 89 | + compat_exp::local_mem_size(local_mem_size)); |
| 90 | + |
| 91 | + compat_exp::launch<dynamic_local_mem_typed_kernel<int>>(my_config, d_a) |
| 92 | + .wait(); |
| 93 | + |
| 94 | + syclcompat::free(d_a); |
| 95 | + |
| 96 | + return 0; |
| 97 | +} |
0 commit comments