Skip to content

Commit 6aa97b5

Browse files
[sycl-post-link] Add padding to default values of structs (#4855)
Some structs may require additional padding at the end of their default values to correctly fill the size they are reported to have. These changes enforce additional padding on default values for structs when the added bytes do not correspond to the reported size of the struct. This ensures correct offset of values, corresponding to those expected by both the DPC++ runtime and the generated SYCL kernels.
1 parent 7fa0569 commit 6aa97b5

File tree

2 files changed

+123
-1
lines changed

2 files changed

+123
-1
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; RUN: sycl-post-link -spec-const=default %s -o %t.files.table
2+
; RUN: FileCheck %s -input-file=%t.files_0.prop
3+
;
4+
; This test checks that composite specialization constants with padding gets the
5+
; correct padding in their default values to prevent values being inserted at
6+
; incorrect offsets.
7+
8+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
9+
target triple = "spir64_x86_64-unknown-unknown"
10+
11+
%"class.cl::sycl::specialization_id.7" = type { i8 }
12+
%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" }
13+
%"class.cl::sycl::detail::array" = type { [1 x i64] }
14+
%struct.TestStruct = type <{ i32, i8, [3 x i8] }>
15+
16+
$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlNS0_14kernel_handlerEE_ = comdat any
17+
18+
@__usid_str = private unnamed_addr constant [37 x i8] c"9d329ad59055e972____ZL12StructSpecId\00", align 1
19+
@_ZL12StructSpecId = internal addrspace(1) constant { { i32, i8 } } { { i32, i8 } { i32 20, i8 99 } }, align 4
20+
@__usid_str.1 = private unnamed_addr constant [35 x i8] c"9d329ad59055e972____ZL10BoolSpecId\00", align 1
21+
@_ZL10BoolSpecId = internal addrspace(1) constant %"class.cl::sycl::specialization_id.7" { i8 1 }, align 1
22+
23+
; Function Attrs: convergent norecurse
24+
define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlNS0_14kernel_handlerEE_(%struct.TestStruct addrspace(1)* %_arg_, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_1, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_2, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_3, i8 addrspace(1)* %_arg_4, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_6, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_7, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_8, i8 addrspace(1)* %_arg__specialization_constants_buffer) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 {
25+
entry:
26+
%ref.tmp.i = alloca %struct.TestStruct, align 4
27+
%0 = getelementptr inbounds %"class.cl::sycl::range", %"class.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0
28+
%1 = addrspacecast i64* %0 to i64 addrspace(4)*
29+
%2 = load i64, i64 addrspace(4)* %1, align 8
30+
%add.ptr.i = getelementptr inbounds %struct.TestStruct, %struct.TestStruct addrspace(1)* %_arg_, i64 %2
31+
%3 = getelementptr inbounds %"class.cl::sycl::range", %"class.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0
32+
%4 = addrspacecast i64* %3 to i64 addrspace(4)*
33+
%5 = load i64, i64 addrspace(4)* %4, align 8
34+
%add.ptr.i33 = getelementptr inbounds i8, i8 addrspace(1)* %_arg_4, i64 %5
35+
%6 = addrspacecast i8 addrspace(1)* %_arg__specialization_constants_buffer to i8 addrspace(4)*
36+
%ref.tmp.ascast.i = addrspacecast %struct.TestStruct* %ref.tmp.i to %struct.TestStruct addrspace(4)*
37+
%7 = bitcast %struct.TestStruct* %ref.tmp.i to i8*
38+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %7) #5
39+
call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI10TestStructET_PKcPKvS5_(%struct.TestStruct addrspace(4)* sret(%struct.TestStruct) align 4 %ref.tmp.ascast.i, i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([37 x i8], [37 x i8]* @__usid_str, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ { i32, i8 } } addrspace(1)* @_ZL12StructSpecId to i8 addrspace(1)*) to i8 addrspace(4)*), i8 addrspace(4)* %6) #4
40+
%8 = bitcast %struct.TestStruct addrspace(1)* %add.ptr.i to i8 addrspace(1)*
41+
%9 = addrspacecast i8 addrspace(1)* %8 to i8 addrspace(4)*
42+
%10 = addrspacecast i8* %7 to i8 addrspace(4)*
43+
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* noundef align 4 dereferenceable(5) %9, i8 addrspace(4)* noundef align 4 dereferenceable(5) %10, i64 5, i1 false), !tbaa.struct !6
44+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %7) #5
45+
%call.i.i.i = call spir_func zeroext i1 @_Z37__sycl_getScalar2020SpecConstantValueIbET_PKcPKvS4_(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([35 x i8], [35 x i8]* @__usid_str.1, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%"class.cl::sycl::specialization_id.7", %"class.cl::sycl::specialization_id.7" addrspace(1)* @_ZL10BoolSpecId, i64 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* %6) #4
46+
%arrayidx.ascast.i.i = addrspacecast i8 addrspace(1)* %add.ptr.i33 to i8 addrspace(4)*
47+
%frombool.i = zext i1 %call.i.i.i to i8
48+
store i8 %frombool.i, i8 addrspace(4)* %arrayidx.ascast.i.i, align 1, !tbaa !12
49+
ret void
50+
}
51+
52+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
53+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
54+
55+
; Function Attrs: convergent
56+
declare dso_local spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI10TestStructET_PKcPKvS5_(%struct.TestStruct addrspace(4)* sret(%struct.TestStruct) align 4, i8 addrspace(4)*, i8 addrspace(4)*, i8 addrspace(4)*) local_unnamed_addr #2
57+
58+
; Function Attrs: argmemonly nofree nounwind willreturn
59+
declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* noalias nocapture writeonly, i8 addrspace(4)* noalias nocapture readonly, i64, i1 immarg) #3
60+
61+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
62+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
63+
64+
; Function Attrs: convergent
65+
declare dso_local spir_func zeroext i1 @_Z37__sycl_getScalar2020SpecConstantValueIbET_PKcPKvS4_(i8 addrspace(4)*, i8 addrspace(4)*, i8 addrspace(4)*) local_unnamed_addr #2
66+
67+
attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "uniform-work-group-size"="true" }
68+
attributes #1 = { argmemonly nofree nosync nounwind willreturn }
69+
attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
70+
attributes #3 = { argmemonly nofree nounwind willreturn }
71+
attributes #4 = { convergent }
72+
attributes #5 = { nounwind }
73+
74+
!opencl.spir.version = !{!0}
75+
!spirv.Source = !{!1}
76+
!llvm.ident = !{!2}
77+
!llvm.module.flags = !{!3, !4}
78+
79+
!0 = !{i32 1, i32 2}
80+
!1 = !{i32 4, i32 100000}
81+
!2 = !{!"clang version 14.0.0 (https://github.com/intel/llvm)"}
82+
!3 = !{i32 1, !"wchar_size", i32 4}
83+
!4 = !{i32 7, !"frame-pointer", i32 2}
84+
!5 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
85+
!6 = !{i64 0, i64 4, !7, i64 4, i64 1, !11}
86+
!7 = !{!8, !8, i64 0}
87+
!8 = !{!"int", !9, i64 0}
88+
!9 = !{!"omnipotent char", !10, i64 0}
89+
!10 = !{!"Simple C++ TBAA"}
90+
!11 = !{!9, !9, i64 0}
91+
!12 = !{!13, !13, i64 0}
92+
!13 = !{!"bool", !9, i64 0}
93+
94+
; Make sure the specialization constants occur in the order with the padded
95+
; struct first followed by the boolean specialization constant.
96+
; Most important information from the corresponding encoded data is the size of
97+
; the specialization constants, i.e. 8 and 1 bytes respectively.
98+
; CHECK: [SYCL/specialization constants]
99+
; CHECK-NEXT: 9d329ad59055e972____ZL12StructSpecId=2|gBAAAAAAAAAAAAAAAAAAAgAAAAA
100+
; CHECK-NEXT: 9d329ad59055e972____ZL10BoolSpecId=2|gBAAAAAAAAQAAAAAAAAAAEAAAAA
101+
102+
; Ensure that the default values are correct.
103+
; IBAAAAAAAAAFAAAAjBAAAEA is decoded to "0x48 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x14
104+
; 0x0 0x0 0x0 0x63 0x0 0x0 0x0 0x1" which consists of:
105+
; 1. 8 bytes denoting the bit-size of the byte array, here 72 bits or 9 bytes.
106+
; 2. 4 bytes with the default value of the 32-bit integer member of
107+
; %struct.TestStruct. Its value being 20.
108+
; 3. 1 byte with the default value of the char member of %struct.TestStruct.
109+
; Its value being 'c'.
110+
; 4. 3 bytes of padding for %struct.TestStruct.
111+
; 5. 1 byte with the default value of the boolean specialization constant. Its
112+
; value being true.
113+
; CHECK: [SYCL/specialization constants default values]
114+
; CHECK-NEXT: all=2|IBAAAAAAAAAFAAAAjBAAAEA

llvm/tools/sycl-post-link/SpecConstants.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ void collectCompositeElementsDefaultValuesRecursive(
310310
}
311311
} else if (auto *StructTy = dyn_cast<StructType>(Ty)) {
312312
const StructLayout *SL = M.getDataLayout().getStructLayout(StructTy);
313+
const size_t BaseDefaultValueOffset = DefaultValues.size();
313314
for (size_t I = 0, E = StructTy->getNumElements(); I < E; ++I) {
314315
Constant *El = nullptr;
315316
if (C->isZeroValue())
@@ -330,9 +331,16 @@ void collectCompositeElementsDefaultValuesRecursive(
330331
collectCompositeElementsDefaultValuesRecursive(M, El, LocalOffset,
331332
DefaultValues);
332333
}
334+
const size_t SLSize = SL->getSizeInBytes();
335+
336+
// Additional padding may be needed at the end of the struct if size does
337+
// not match the number of bytes inserted.
338+
if (DefaultValues.size() < BaseDefaultValueOffset + SLSize)
339+
DefaultValues.resize(BaseDefaultValueOffset + SLSize);
340+
333341
// Update "global" offset according to the total size of a handled struct
334342
// type.
335-
Offset += SL->getSizeInBytes();
343+
Offset += SLSize;
336344
} else { // Assume that we encountered some scalar element
337345
int NumBytes = M.getDataLayout().getTypeStoreSize(Ty);
338346

0 commit comments

Comments
 (0)