Skip to content

Commit 25ba5bc

Browse files
authored
[SYCL] Implement SYCL/device-requirements (#6956)
As part of optional kernel features implementation we need to create the "SYCL/device-requirements" property set and populate it with the "aspects" property. Since we haven't implemented device code split per-aspect part yet, this PR only covers "aspects" part of "SYCL/device-requirements" property set. Co-authored-by: @AlexeySachkov
1 parent 56554d7 commit 25ba5bc

File tree

7 files changed

+223
-0
lines changed

7 files changed

+223
-0
lines changed

llvm/include/llvm/Support/PropertySetIO.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ class PropertySetRegistry {
193193
static constexpr char SYCL_ASSERT_USED[] = "SYCL/assert used";
194194
static constexpr char SYCL_EXPORTED_SYMBOLS[] = "SYCL/exported symbols";
195195
static constexpr char SYCL_DEVICE_GLOBALS[] = "SYCL/device globals";
196+
static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements";
196197

197198
// Function for bulk addition of an entire property set under given category
198199
// (property set name).

llvm/lib/Support/PropertySetIO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ constexpr char PropertySetRegistry::SYCL_MISC_PROP[];
202202
constexpr char PropertySetRegistry::SYCL_ASSERT_USED[];
203203
constexpr char PropertySetRegistry::SYCL_EXPORTED_SYMBOLS[];
204204
constexpr char PropertySetRegistry::SYCL_DEVICE_GLOBALS[];
205+
constexpr char PropertySetRegistry::SYCL_DEVICE_REQUIREMENTS[];
205206

206207
} // namespace util
207208
} // namespace llvm
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
; Original code:
2+
; #include <sycl/sycl.hpp>
3+
4+
; [[__sycl_detail__::__uses_aspects__(sycl::aspect::fp64, sycl::aspect::cpu)]] void foo() {}
5+
6+
; [[__sycl_detail__::__uses_aspects__(sycl::aspect::queue_profiling, sycl::aspect::host, sycl::aspect::image)]] void bar() {}
7+
8+
; int main() {
9+
; sycl::queue q;
10+
; q.submit([&](sycl::handler &cgh) {
11+
; cgh.single_task([=]() { foo(); });
12+
; cgh.single_task([=]() { bar(); });
13+
; });
14+
; }
15+
16+
; RUN: sycl-post-link -split=auto %s -o %t.files.table
17+
; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT
18+
19+
; RUN: sycl-post-link -split=kernel %s -o %t.files.table
20+
; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-0
21+
; RUN: FileCheck %s -input-file=%t.files_1.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-1
22+
23+
; CHECK-PROP-AUTO-SPLIT: [SYCL/device requirements]
24+
; CHECK-PROP-AUTO-SPLIT-NEXT: aspects=2|gCAAAAAAAAAAAAAABAAAAYAAAAQCAAAAMAAAAA
25+
26+
; CHECK-PROP-KERNEL-SPLIT-0: [SYCL/device requirements]
27+
; CHECK-PROP-KERNEL-SPLIT-0-NEXT: aspects=2|gBAAAAAAAAAAAAAAJAAAAwAAAAA
28+
29+
; CHECK-PROP-KERNEL-SPLIT-1: [SYCL/device requirements]
30+
; CHECK-PROP-KERNEL-SPLIT-1-NEXT: aspects=2|ABAAAAAAAAQAAAAAGAAAAA
31+
32+
source_filename = "llvm-link"
33+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
34+
target triple = "spir64-unknown-unknown"
35+
36+
%class.anon = type { i8 }
37+
38+
$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_ = comdat any
39+
40+
$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_ = comdat any
41+
42+
; Function Attrs: convergent mustprogress noinline norecurse optnone
43+
define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_() #0 comdat !kernel_arg_buffer_location !43 {
44+
entry:
45+
%__SYCLKernel = alloca %class.anon, align 1
46+
%__SYCLKernel.ascast = addrspacecast %class.anon* %__SYCLKernel to %class.anon addrspace(4)*
47+
call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(%class.anon addrspace(4)* noundef align 1 dereferenceable_or_null(1) %__SYCLKernel.ascast) #3
48+
ret void
49+
}
50+
51+
; Function Attrs: convergent mustprogress noinline norecurse optnone
52+
define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(%class.anon addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #1 align 2 {
53+
entry:
54+
%this.addr = alloca %class.anon addrspace(4)*, align 8
55+
%this.addr.ascast = addrspacecast %class.anon addrspace(4)** %this.addr to %class.anon addrspace(4)* addrspace(4)*
56+
store %class.anon addrspace(4)* %this, %class.anon addrspace(4)* addrspace(4)* %this.addr.ascast, align 8
57+
%this1 = load %class.anon addrspace(4)*, %class.anon addrspace(4)* addrspace(4)* %this.addr.ascast, align 8
58+
call spir_func void @_Z3foov() #3
59+
ret void
60+
}
61+
62+
; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
63+
define dso_local spir_func void @_Z3foov() #2 !sycl_used_aspects !44 {
64+
entry:
65+
ret void
66+
}
67+
68+
; Function Attrs: convergent mustprogress noinline norecurse optnone
69+
define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_() #0 comdat !kernel_arg_buffer_location !43 {
70+
entry:
71+
%__SYCLKernel = alloca %class.anon, align 1
72+
%__SYCLKernel.ascast = addrspacecast %class.anon* %__SYCLKernel to %class.anon addrspace(4)*
73+
call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE0_clEv(%class.anon addrspace(4)* noundef align 1 dereferenceable_or_null(1) %__SYCLKernel.ascast) #3
74+
ret void
75+
}
76+
77+
; Function Attrs: convergent mustprogress noinline norecurse optnone
78+
define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE0_clEv(%class.anon addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #1 align 2 {
79+
entry:
80+
%this.addr = alloca %class.anon addrspace(4)*, align 8
81+
%this.addr.ascast = addrspacecast %class.anon addrspace(4)** %this.addr to %class.anon addrspace(4)* addrspace(4)*
82+
store %class.anon addrspace(4)* %this, %class.anon addrspace(4)* addrspace(4)* %this.addr.ascast, align 8
83+
%this1 = load %class.anon addrspace(4)*, %class.anon addrspace(4)* addrspace(4)* %this.addr.ascast, align 8
84+
call spir_func void @_Z3barv() #3
85+
ret void
86+
}
87+
88+
; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
89+
define dso_local spir_func void @_Z3barv() #2 !sycl_used_aspects !45 {
90+
entry:
91+
ret void
92+
}
93+
94+
attributes #0 = { convergent mustprogress noinline norecurse optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main2.cpp" "uniform-work-group-size"="true" }
95+
attributes #1 = { convergent mustprogress noinline norecurse optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
96+
attributes #2 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
97+
attributes #3 = { convergent }
98+
99+
!opencl.spir.version = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
100+
!spirv.Source = !{!1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1}
101+
!sycl_aspects = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39}
102+
!llvm.ident = !{!40, !40, !40, !40, !40, !40, !40, !40, !40, !40, !40, !40, !40, !40, !40, !40}
103+
!llvm.module.flags = !{!41, !42}
104+
105+
!0 = !{i32 1, i32 2}
106+
!1 = !{i32 4, i32 100000}
107+
!2 = !{!"host", i32 0}
108+
!3 = !{!"cpu", i32 1}
109+
!4 = !{!"gpu", i32 2}
110+
!5 = !{!"accelerator", i32 3}
111+
!6 = !{!"custom", i32 4}
112+
!7 = !{!"fp16", i32 5}
113+
!8 = !{!"fp64", i32 6}
114+
!9 = !{!"int64_base_atomics", i32 7}
115+
!10 = !{!"int64_extended_atomics", i32 8}
116+
!11 = !{!"image", i32 9}
117+
!12 = !{!"online_compiler", i32 10}
118+
!13 = !{!"online_linker", i32 11}
119+
!14 = !{!"queue_profiling", i32 12}
120+
!15 = !{!"usm_device_allocations", i32 13}
121+
!16 = !{!"usm_host_allocations", i32 14}
122+
!17 = !{!"usm_shared_allocations", i32 15}
123+
!18 = !{!"usm_restricted_shared_allocations", i32 16}
124+
!19 = !{!"usm_system_allocations", i32 17}
125+
!20 = !{!"usm_system_allocator", i32 17}
126+
!21 = !{!"ext_intel_pci_address", i32 18}
127+
!22 = !{!"ext_intel_gpu_eu_count", i32 19}
128+
!23 = !{!"ext_intel_gpu_eu_simd_width", i32 20}
129+
!24 = !{!"ext_intel_gpu_slices", i32 21}
130+
!25 = !{!"ext_intel_gpu_subslices_per_slice", i32 22}
131+
!26 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23}
132+
!27 = !{!"ext_intel_max_mem_bandwidth", i32 24}
133+
!28 = !{!"ext_intel_mem_channel", i32 25}
134+
!29 = !{!"usm_atomic_host_allocations", i32 26}
135+
!30 = !{!"usm_atomic_shared_allocations", i32 27}
136+
!31 = !{!"atomic64", i32 28}
137+
!32 = !{!"ext_intel_device_info_uuid", i32 29}
138+
!33 = !{!"ext_oneapi_srgb", i32 30}
139+
!34 = !{!"ext_oneapi_native_assert", i32 31}
140+
!35 = !{!"host_debuggable", i32 32}
141+
!36 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33}
142+
!37 = !{!"ext_oneapi_cuda_async_barrier", i32 34}
143+
!38 = !{!"ext_oneapi_bfloat16", i32 35}
144+
!39 = !{!"ext_intel_free_memory", i32 36}
145+
!40 = !{!"clang version 16.0.0"}
146+
!41 = !{i32 1, !"wchar_size", i32 4}
147+
!42 = !{i32 7, !"frame-pointer", i32 2}
148+
!43 = !{}
149+
!44 = !{i32 6, i32 1}
150+
!45 = !{i32 12, i32 0, i32 9}

llvm/tools/sycl-post-link/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ add_llvm_tool(sycl-post-link
2828
SpecConstants.cpp
2929
SYCLDeviceLibReqMask.cpp
3030
SYCLKernelParamOptInfo.cpp
31+
SYCLDeviceRequirements.cpp
3132
ADDITIONAL_HEADER_DIRS
3233
${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include
3334
${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===----- SYCLDeviceRequirements.cpp - collect data for used aspects ----=-==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "SYCLDeviceRequirements.h"
10+
11+
#include "llvm/ADT/StringRef.h"
12+
#include "llvm/IR/Module.h"
13+
14+
#include <set>
15+
#include <vector>
16+
17+
using namespace llvm;
18+
19+
std::map<StringRef, std::vector<uint32_t>>
20+
llvm::getSYCLDeviceRequirements(const Module &M) {
21+
std::map<StringRef, std::vector<uint32_t>> Result;
22+
auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N,
23+
unsigned OpNo) -> unsigned {
24+
Constant *C =
25+
cast<ConstantAsMetadata>(N->getOperand(OpNo).get())->getValue();
26+
return static_cast<uint32_t>(C->getUniqueInteger().getZExtValue());
27+
};
28+
std::set<uint32_t> Aspects;
29+
for (const Function &F : M) {
30+
if (!F.hasMetadata("sycl_used_aspects"))
31+
continue;
32+
33+
const MDNode *MD = F.getMetadata("sycl_used_aspects");
34+
for (size_t I = 0, E = MD->getNumOperands(); I < E; ++I) {
35+
Aspects.insert(ExtractIntegerFromMDNodeOperand(MD, I));
36+
}
37+
}
38+
39+
Result["aspects"] = std::vector<uint32_t>(Aspects.begin(), Aspects.end());
40+
return Result;
41+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----- SYCLDeviceRequirements.h - collect data for used aspects ------=-==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <cstdint>
12+
#include <map>
13+
#include <vector>
14+
15+
namespace llvm {
16+
17+
class Module;
18+
class StringRef;
19+
20+
std::map<StringRef, std::vector<uint32_t>>
21+
getSYCLDeviceRequirements(const Module &M);
22+
23+
} // namespace llvm

llvm/tools/sycl-post-link/sycl-post-link.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "DeviceGlobals.h"
1818
#include "ModuleSplitter.h"
1919
#include "SYCLDeviceLibReqMask.h"
20+
#include "SYCLDeviceRequirements.h"
2021
#include "SYCLKernelParamOptInfo.h"
2122
#include "SpecConstants.h"
2223
#include "Support.h"
@@ -362,6 +363,11 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD,
362363
std::map<StringRef, uint32_t> RMEntry = {{"DeviceLibReqMask", MRMask}};
363364
PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry);
364365
}
366+
{
367+
std::map<StringRef, std::vector<uint32_t>> Requirements =
368+
getSYCLDeviceRequirements(M);
369+
PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, Requirements);
370+
}
365371
if (MD.Props.SpecConstsMet) {
366372
// extract spec constant maps per each module
367373
SpecIDMapTy TmpSpecIDMap;

0 commit comments

Comments
 (0)