Skip to content

Commit 12d7c1f

Browse files
[SYCL][sycl-post-link] Add post-struct padding in descs and fix blob size calculations (#5376)
These changes fix two related issues: 1. The runtime does not correctly handle the offset in specialization constant descriptors as it does not take into account the size of the previous element in composite types, effectively always adding padding between each element equal to at least the size of the previous element. This is fixed by changing it to keep track of the local offset into the current composite type and subtracting that from the offset of the descriptor to find the required padding. 2. Composite types may have padding at the end. sycl-post-link does not currently generate enough information for the runtime to take the additional padding into account when calculating the size of the specialization constant default value blob. This is fixed by inserting an additional descriptor after the last element of a composite type with end-padding. This "padding-descriptor" will have the offset right after the last element of the composite type and a size corresponding to the size of the required padding. padding-descriptors all have the same ID, namely the maximum value of the (unsigned) ID. Signed-off-by: Steffen Larsen <steffen.larsen@intel.com>
1 parent 88123c1 commit 12d7c1f

File tree

3 files changed

+125
-19
lines changed

3 files changed

+125
-19
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; RUN: sycl-post-link -spec-const=rt %s -o %t.files.table
2+
; RUN: FileCheck %s -input-file=%t.files_0.prop
3+
;
4+
; This test checks that composite specialization constants with implicit padding
5+
; at the end of the composite type will have an additional padding descriptor at
6+
; the end of the descriptor list.
7+
8+
; ModuleID = 'test.bc'
9+
source_filename = "llvm-link"
10+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
11+
target triple = "spir64-unknown-unknown"
12+
13+
%"class.cl::sycl::specialization_id" = type { %struct.TestStruct }
14+
%struct.TestStruct = type { i32, i8 }
15+
16+
$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E10KernelName = comdat any
17+
18+
@__usid_str = private unnamed_addr constant [33 x i8] c"fb86570d411366d1____ZL9SpecConst\00", align 1
19+
@_ZL9SpecConst = internal addrspace(1) constant %"class.cl::sycl::specialization_id" zeroinitializer, align 4
20+
21+
; Function Attrs: convergent norecurse
22+
define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E10KernelName() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 !sycl_kernel_omit_args !6 {
23+
entry:
24+
%tmp.i = alloca %struct.TestStruct, align 4
25+
%tmp.ascast.i = addrspacecast %struct.TestStruct* %tmp.i to %struct.TestStruct addrspace(4)*
26+
%0 = bitcast %struct.TestStruct* %tmp.i to i8*
27+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3
28+
call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI10TestStructET_PKcPKvS5_(%struct.TestStruct addrspace(4)* sret(%struct.TestStruct) align 4 %tmp.ascast.i, i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([33 x i8], [33 x i8]* @__usid_str, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast (%"class.cl::sycl::specialization_id" addrspace(1)* @_ZL9SpecConst to i8 addrspace(1)*) to i8 addrspace(4)*), i8 addrspace(4)* null) #4
29+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3
30+
ret void
31+
}
32+
33+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
34+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
35+
36+
; Function Attrs: convergent
37+
declare dso_local spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI10TestStructET_PKcPKvS5_(%struct.TestStruct addrspace(4)* sret(%struct.TestStruct) align 4, i8 addrspace(4)*, i8 addrspace(4)*, i8 addrspace(4)*) local_unnamed_addr #2
38+
39+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
40+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
41+
42+
attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "uniform-work-group-size"="true" }
43+
attributes #1 = { argmemonly nofree nosync nounwind willreturn }
44+
attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
45+
attributes #3 = { nounwind }
46+
attributes #4 = { convergent }
47+
48+
!opencl.spir.version = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
49+
!spirv.Source = !{!1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1}
50+
!llvm.ident = !{!2, !2, !2, !2, !2, !2, !2, !2, !2, !2, !2, !2}
51+
!llvm.module.flags = !{!3, !4}
52+
53+
!0 = !{i32 1, i32 2}
54+
!1 = !{i32 4, i32 100000}
55+
!2 = !{!"clang version 14.0.0"}
56+
!3 = !{i32 1, !"wchar_size", i32 4}
57+
!4 = !{i32 7, !"frame-pointer", i32 2}
58+
!5 = !{i32 -1}
59+
!6 = !{i1 true}
60+
61+
; We expect the following in the descriptor list for SpecConst:
62+
; First 8 bytes are the size of the list.
63+
; Each 12 bytes after the size comprise a descriptor consisting of 3 32-bit
64+
; unsigned integers. For SpecConst these are:
65+
; ID | Composite offset | Size
66+
; 0x00 0x00 0x00 0x00 (0) | 0x00 0x00 0x00 0x00 (0) | 0x04 0x00 0x00 0x00 (4)
67+
; 0x01 0x00 0x00 0x00 (1) | 0x04 0x00 0x00 0x00 (4) | 0x01 0x00 0x00 0x00 (1)
68+
; 0xff 0xff 0xff 0xff (max) | 0x05 0x00 0x00 0x00 (5) | 0x03 0x00 0x00 0x00 (3)
69+
; Most important for this test is the last descriptor which represents 3-bytes
70+
; implicit padding at the end of the composite type of the spec constant.
71+
;
72+
; CHECK: [SYCL/specialization constants]
73+
; CHECK-NEXT: fb86570d411366d1____ZL9SpecConst=2|gEAAAAAAAAAAAAAAAAAAAQAAAAQAAAAAEAAAAEAAAAw/////FAAAAMAAAAA

llvm/tools/sycl-post-link/SpecConstants.cpp

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -236,29 +236,44 @@ MDNode *generateSpecConstDefaultValueMetadata(StringRef SymID, Value *Default) {
236236
/// Recursively iterates over a composite type in order to collect information
237237
/// about its scalar elements.
238238
void collectCompositeElementsInfoRecursive(
239-
const Module &M, Type *Ty, unsigned &Index, unsigned &Offset,
239+
const Module &M, Type *Ty, const unsigned *&IDIter, unsigned &Offset,
240240
std::vector<SpecConstantDescriptor> &Result) {
241241
if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
242242
for (size_t I = 0; I < ArrTy->getNumElements(); ++I) {
243243
// TODO: this is a spot for potential optimization: for arrays we could
244244
// just make a single recursive call here and use it to populate Result
245245
// in a loop.
246-
collectCompositeElementsInfoRecursive(M, ArrTy->getElementType(), Index,
246+
collectCompositeElementsInfoRecursive(M, ArrTy->getElementType(), IDIter,
247247
Offset, Result);
248248
}
249249
} else if (auto *StructTy = dyn_cast<StructType>(Ty)) {
250250
const StructLayout *SL = M.getDataLayout().getStructLayout(StructTy);
251+
const unsigned BaseOffset = Offset;
252+
unsigned LocalOffset = Offset;
251253
for (size_t I = 0, E = StructTy->getNumElements(); I < E; ++I) {
252254
auto *ElTy = StructTy->getElementType(I);
253255
// When handling elements of a structure, we do not use manually
254256
// calculated offsets (which are sum of sizes of all previously
255257
// encountered elements), but instead rely on data provided for us by
256258
// DataLayout, because the structure can be unpacked, i.e. padded in
257259
// order to ensure particular alignment of its elements.
258-
unsigned LocalOffset = Offset + SL->getElementOffset(I);
259-
collectCompositeElementsInfoRecursive(M, ElTy, Index, LocalOffset,
260+
LocalOffset = Offset + SL->getElementOffset(I);
261+
collectCompositeElementsInfoRecursive(M, ElTy, IDIter, LocalOffset,
260262
Result);
261263
}
264+
265+
// Add a special descriptor if the struct has padding at the end.
266+
const unsigned PostStructPadding =
267+
BaseOffset + SL->getSizeInBytes() - LocalOffset;
268+
if (PostStructPadding > 0) {
269+
SpecConstantDescriptor Desc;
270+
// ID of padding descriptors is the max value possible.
271+
Desc.ID = std::numeric_limits<unsigned>::max();
272+
Desc.Offset = LocalOffset;
273+
Desc.Size = PostStructPadding;
274+
Result.push_back(Desc);
275+
}
276+
262277
// Update "global" offset according to the total size of a handled struct
263278
// type.
264279
Offset += SL->getSizeInBytes();
@@ -267,15 +282,18 @@ void collectCompositeElementsInfoRecursive(
267282
// TODO: this is a spot for potential optimization: for vectors we could
268283
// just make a single recursive call here and use it to populate Result
269284
// in a loop.
270-
collectCompositeElementsInfoRecursive(M, VecTy->getElementType(), Index,
285+
collectCompositeElementsInfoRecursive(M, VecTy->getElementType(), IDIter,
271286
Offset, Result);
272287
}
273288
} else { // Assume that we encountered some scalar element
274289
SpecConstantDescriptor Desc;
275-
Desc.ID = 0; // To be filled later
290+
Desc.ID = *IDIter;
276291
Desc.Offset = Offset;
277292
Desc.Size = M.getDataLayout().getTypeStoreSize(Ty);
278-
Result[Index++] = Desc;
293+
Result.push_back(Desc);
294+
295+
// Move current ID and offset
296+
++IDIter;
279297
Offset += Desc.Size;
280298
}
281299
}
@@ -392,13 +410,19 @@ MDNode *generateSpecConstantMetadata(const Module &M, StringRef SymbolicID,
392410
MDOps.push_back(MDString::get(Ctx, SymbolicID));
393411

394412
if (IsNativeSpecConstant) {
395-
std::vector<SpecConstantDescriptor> Result(IDs.size());
396-
unsigned Index = 0, Offset = 0;
397-
collectCompositeElementsInfoRecursive(M, SCTy, Index, Offset, Result);
413+
std::vector<SpecConstantDescriptor> Result;
414+
Result.reserve(IDs.size());
415+
unsigned Offset = 0;
416+
const unsigned *IDPtr = IDs.data();
417+
collectCompositeElementsInfoRecursive(M, SCTy, IDPtr, Offset, Result);
418+
419+
// We may have padding elements so size should be at least the same size as
420+
// the ID vector.
421+
assert(Result.size() >= IDs.size());
398422

399423
for (unsigned I = 0; I < Result.size(); ++I) {
400424
MDOps.push_back(ConstantAsMetadata::get(
401-
Constant::getIntegerValue(Int32Ty, APInt(32, IDs[I]))));
425+
Constant::getIntegerValue(Int32Ty, APInt(32, Result[I].ID))));
402426
MDOps.push_back(ConstantAsMetadata::get(
403427
Constant::getIntegerValue(Int32Ty, APInt(32, Result[I].Offset))));
404428
MDOps.push_back(ConstantAsMetadata::get(

sycl/source/detail/device_image_impl.hpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -267,16 +267,22 @@ class device_image_impl {
267267
auto *It = reinterpret_cast<const std::uint32_t *>(&Descriptors[8]);
268268
auto *End = reinterpret_cast<const std::uint32_t *>(&Descriptors[0] +
269269
Descriptors.size());
270-
unsigned PrevOffset = 0;
270+
unsigned LocalOffset = 0;
271271
while (It != End) {
272272
// Make sure that alignment is correct in blob.
273-
BlobOffset += /*Offset*/ It[1] - PrevOffset;
274-
PrevOffset = It[1];
275-
// The map is not locked here because updateSpecConstSymMap() is only
276-
// supposed to be called from c'tor.
277-
MSpecConstSymMap[std::string{SCName}].push_back(
278-
SpecConstDescT{/*ID*/ It[0], /*CompositeOffset*/ It[1],
279-
/*Size*/ It[2], BlobOffset});
273+
const unsigned OffsetFromLast = /*Offset*/ It[1] - LocalOffset;
274+
BlobOffset += OffsetFromLast;
275+
// Composites may have a special padding element at the end which
276+
// should not have a descriptor. These padding elements all have max
277+
// ID value.
278+
if (It[0] != std::numeric_limits<std::uint32_t>::max()) {
279+
// The map is not locked here because updateSpecConstSymMap() is
280+
// only supposed to be called from c'tor.
281+
MSpecConstSymMap[std::string{SCName}].push_back(
282+
SpecConstDescT{/*ID*/ It[0], /*CompositeOffset*/ It[1],
283+
/*Size*/ It[2], BlobOffset});
284+
}
285+
LocalOffset += OffsetFromLast + /*Size*/ It[2];
280286
BlobOffset += /*Size*/ It[2];
281287
It += NumElements;
282288
}
@@ -288,6 +294,9 @@ class device_image_impl {
288294
if (HasDefaultValues) {
289295
pi::ByteArray DefValDescriptors =
290296
pi::DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
297+
assert(DefValDescriptors.size() - 8 == MSpecConstsBlob.size() &&
298+
"Specialization constant default value blob do not have the "
299+
"expected size.");
291300
std::uninitialized_copy(&DefValDescriptors[8],
292301
&DefValDescriptors[8] + MSpecConstsBlob.size(),
293302
MSpecConstsBlob.data());

0 commit comments

Comments
 (0)