Skip to content

Commit fd7e54b

Browse files
authored
Pix: Cope with group shared AS->MS payload (#6619)
This change copes with the AS->MS payload being placed in group-shared by the application (and MSFT's samples do indeed do this). (TIL, thanks to pow2clk, that the spec says that the payload counts against the group-shared total, implying, if not explicitly stating, that at least on some platforms, the payload will be in group-shared anyway.) The MS pass needs to be given data from the AS about the AS's thread group topology, and this is done by extending the payload struct to add three uints. This can't be done when the payload is resident in group-shared, of course, because that would change the layout of group-shared memory. So the new approach here is to copy the payload to a new alloca (in the default address space) struct with the members of the base struct plus the extended data the MS needs, and then to copy piece-wise because llvm.memcpy isn't appropriate for group-shared-to-normal address space copies.
1 parent d9caef5 commit fd7e54b

File tree

5 files changed

+326
-122
lines changed

5 files changed

+326
-122
lines changed

lib/DxilPIXPasses/DxilPIXAddTidToAmplificationShaderPayload.cpp

Lines changed: 127 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -45,143 +45,154 @@ void DxilPIXAddTidToAmplificationShaderPayload::applyOptions(PassOptions O) {
4545
}
4646

4747
void AddValueToExpandedPayload(OP *HlslOP, llvm::IRBuilder<> &B,
48-
ExpandedStruct &expanded,
4948
AllocaInst *NewStructAlloca,
5049
unsigned int expandedValueIndex, Value *value) {
5150
Constant *Zero32Arg = HlslOP->GetU32Const(0);
5251
SmallVector<Value *, 2> IndexToAppendedValue;
5352
IndexToAppendedValue.push_back(Zero32Arg);
5453
IndexToAppendedValue.push_back(HlslOP->GetU32Const(expandedValueIndex));
5554
auto *PointerToEmbeddedNewValue = B.CreateInBoundsGEP(
56-
expanded.ExpandedPayloadStructType, NewStructAlloca, IndexToAppendedValue,
55+
NewStructAlloca, IndexToAppendedValue,
5756
"PointerToEmbeddedNewValue" + std::to_string(expandedValueIndex));
5857
B.CreateStore(value, PointerToEmbeddedNewValue);
5958
}
6059

61-
bool DxilPIXAddTidToAmplificationShaderPayload::runOnModule(Module &M) {
60+
void CopyAggregate(IRBuilder<> &B, Type *Ty, Value *Source, Value *Dest,
61+
ArrayRef<Value *> GEPIndices) {
62+
if (StructType *ST = dyn_cast<StructType>(Ty)) {
63+
SmallVector<Value *, 16> StructIndices;
64+
StructIndices.append(GEPIndices.begin(), GEPIndices.end());
65+
StructIndices.push_back(nullptr);
66+
for (unsigned j = 0; j < ST->getNumElements(); ++j) {
67+
StructIndices.back() = B.getInt32(j);
68+
CopyAggregate(B, ST->getElementType(j), Source, Dest, StructIndices);
69+
}
70+
} else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
71+
SmallVector<Value *, 16> StructIndices;
72+
StructIndices.append(GEPIndices.begin(), GEPIndices.end());
73+
StructIndices.push_back(nullptr);
74+
for (unsigned j = 0; j < AT->getNumElements(); ++j) {
75+
StructIndices.back() = B.getInt32(j);
76+
CopyAggregate(B, AT->getArrayElementType(), Source, Dest, StructIndices);
77+
}
78+
} else {
79+
auto *SourceGEP = B.CreateGEP(Source, GEPIndices, "CopyStructSourceGEP");
80+
Value *Val = B.CreateLoad(SourceGEP, "CopyStructLoad");
81+
auto *DestGEP = B.CreateGEP(Dest, GEPIndices, "CopyStructDestGEP");
82+
B.CreateStore(Val, DestGEP, "CopyStructStore");
83+
}
84+
}
6285

86+
bool DxilPIXAddTidToAmplificationShaderPayload::runOnModule(Module &M) {
6387
DxilModule &DM = M.GetOrCreateDxilModule();
6488
LLVMContext &Ctx = M.getContext();
6589
OP *HlslOP = DM.GetOP();
66-
67-
Type *OriginalPayloadStructPointerType = nullptr;
68-
Type *OriginalPayloadStructType = nullptr;
69-
ExpandedStruct expanded;
7090
llvm::Function *entryFunction = PIXPassHelpers::GetEntryFunction(DM);
7191
for (inst_iterator I = inst_begin(entryFunction), E = inst_end(entryFunction);
7292
I != E; ++I) {
73-
if (auto *Instr = llvm::cast<Instruction>(&*I)) {
74-
if (hlsl::OP::IsDxilOpFuncCallInst(Instr,
75-
hlsl::OP::OpCode::DispatchMesh)) {
76-
DxilInst_DispatchMesh DispatchMesh(Instr);
77-
OriginalPayloadStructPointerType =
78-
DispatchMesh.get_payload()->getType();
79-
OriginalPayloadStructType =
80-
OriginalPayloadStructPointerType->getPointerElementType();
81-
expanded = ExpandStructType(Ctx, OriginalPayloadStructType);
82-
}
83-
}
84-
}
85-
86-
AllocaInst *OldStructAlloca = nullptr;
87-
AllocaInst *NewStructAlloca = nullptr;
88-
std::vector<AllocaInst *> allocasOfPayloadType;
89-
for (inst_iterator I = inst_begin(entryFunction), E = inst_end(entryFunction);
90-
I != E; ++I) {
91-
auto *Inst = &*I;
92-
if (llvm::isa<AllocaInst>(Inst)) {
93-
auto *Alloca = llvm::cast<AllocaInst>(Inst);
94-
if (Alloca->getType() == OriginalPayloadStructPointerType) {
95-
allocasOfPayloadType.push_back(Alloca);
96-
}
93+
if (hlsl::OP::IsDxilOpFuncCallInst(&*I, hlsl::OP::OpCode::DispatchMesh)) {
94+
DxilInst_DispatchMesh DispatchMesh(&*I);
95+
Type *OriginalPayloadStructPointerType =
96+
DispatchMesh.get_payload()->getType();
97+
Type *OriginalPayloadStructType =
98+
OriginalPayloadStructPointerType->getPointerElementType();
99+
ExpandedStruct expanded =
100+
ExpandStructType(Ctx, OriginalPayloadStructType);
101+
102+
llvm::IRBuilder<> B(&*I);
103+
104+
auto *NewStructAlloca =
105+
B.CreateAlloca(expanded.ExpandedPayloadStructType,
106+
HlslOP->GetU32Const(1), "NewPayload");
107+
NewStructAlloca->setAlignment(4);
108+
auto PayloadType =
109+
llvm::dyn_cast<PointerType>(DispatchMesh.get_payload()->getType());
110+
SmallVector<Value *, 16> GEPIndices;
111+
GEPIndices.push_back(B.getInt32(0));
112+
CopyAggregate(B, PayloadType->getPointerElementType(),
113+
DispatchMesh.get_payload(), NewStructAlloca, GEPIndices);
114+
115+
Constant *Zero32Arg = HlslOP->GetU32Const(0);
116+
Constant *One32Arg = HlslOP->GetU32Const(1);
117+
Constant *Two32Arg = HlslOP->GetU32Const(2);
118+
119+
auto GroupIdFunc =
120+
HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(Ctx));
121+
Constant *GroupIdOpcode =
122+
HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
123+
auto *GroupIdX =
124+
B.CreateCall(GroupIdFunc, {GroupIdOpcode, Zero32Arg}, "GroupIdX");
125+
auto *GroupIdY =
126+
B.CreateCall(GroupIdFunc, {GroupIdOpcode, One32Arg}, "GroupIdY");
127+
auto *GroupIdZ =
128+
B.CreateCall(GroupIdFunc, {GroupIdOpcode, Two32Arg}, "GroupIdZ");
129+
130+
// FlatGroupID = z + y*numZ + x*numY*numZ
131+
// Where x,y,z are the group ID components, and numZ and numY are the
132+
// corresponding AS group-count arguments to the DispatchMesh Direct3D API
133+
auto *GroupYxNumZ = B.CreateMul(
134+
GroupIdY, HlslOP->GetU32Const(m_DispatchArgumentZ), "GroupYxNumZ");
135+
auto *FlatGroupNumZY =
136+
B.CreateAdd(GroupIdZ, GroupYxNumZ, "FlatGroupNumZY");
137+
auto *GroupXxNumYZ = B.CreateMul(
138+
GroupIdX,
139+
HlslOP->GetU32Const(m_DispatchArgumentY * m_DispatchArgumentZ),
140+
"GroupXxNumYZ");
141+
auto *FlatGroupID =
142+
B.CreateAdd(GroupXxNumYZ, FlatGroupNumZY, "FlatGroupID");
143+
144+
// The ultimate goal is a single unique thread ID for this AS thread.
145+
// So take the flat group number, multiply it by the number of
146+
// threads per group...
147+
auto *FlatGroupIDWithSpaceForThreadInGroupId = B.CreateMul(
148+
FlatGroupID,
149+
HlslOP->GetU32Const(DM.GetNumThreads(0) * DM.GetNumThreads(1) *
150+
DM.GetNumThreads(2)),
151+
"FlatGroupIDWithSpaceForThreadInGroupId");
152+
153+
auto *FlattenedThreadIdInGroupFunc = HlslOP->GetOpFunc(
154+
DXIL::OpCode::FlattenedThreadIdInGroup, Type::getInt32Ty(Ctx));
155+
Constant *FlattenedThreadIdInGroupOpcode =
156+
HlslOP->GetU32Const((unsigned)DXIL::OpCode::FlattenedThreadIdInGroup);
157+
auto FlatThreadIdInGroup = B.CreateCall(FlattenedThreadIdInGroupFunc,
158+
{FlattenedThreadIdInGroupOpcode},
159+
"FlattenedThreadIdInGroup");
160+
161+
// ...and add the flat thread id:
162+
auto *FlatId = B.CreateAdd(FlatGroupIDWithSpaceForThreadInGroupId,
163+
FlatThreadIdInGroup, "FlatId");
164+
165+
AddValueToExpandedPayload(
166+
HlslOP, B, NewStructAlloca,
167+
expanded.ExpandedPayloadStructType->getStructNumElements() - 3,
168+
FlatId);
169+
AddValueToExpandedPayload(
170+
HlslOP, B, NewStructAlloca,
171+
expanded.ExpandedPayloadStructType->getStructNumElements() - 2,
172+
DispatchMesh.get_threadGroupCountY());
173+
AddValueToExpandedPayload(
174+
HlslOP, B, NewStructAlloca,
175+
expanded.ExpandedPayloadStructType->getStructNumElements() - 1,
176+
DispatchMesh.get_threadGroupCountZ());
177+
178+
auto DispatchMeshFn = HlslOP->GetOpFunc(
179+
DXIL::OpCode::DispatchMesh, expanded.ExpandedPayloadStructPtrType);
180+
Constant *DispatchMeshOpcode =
181+
HlslOP->GetU32Const((unsigned)DXIL::OpCode::DispatchMesh);
182+
B.CreateCall(DispatchMeshFn,
183+
{DispatchMeshOpcode, DispatchMesh.get_threadGroupCountX(),
184+
DispatchMesh.get_threadGroupCountY(),
185+
DispatchMesh.get_threadGroupCountZ(), NewStructAlloca});
186+
I->removeFromParent();
187+
delete &*I;
188+
// Validation requires exactly one DispatchMesh in an AS, so we can exit
189+
// after the first one:
190+
DM.ReEmitDxilResources();
191+
return true;
97192
}
98193
}
99-
for (auto &Alloca : allocasOfPayloadType) {
100-
OldStructAlloca = Alloca;
101-
llvm::IRBuilder<> B(Alloca->getContext());
102-
NewStructAlloca = B.CreateAlloca(expanded.ExpandedPayloadStructType,
103-
HlslOP->GetU32Const(1), "NewPayload");
104-
NewStructAlloca->setAlignment(Alloca->getAlignment());
105-
NewStructAlloca->insertAfter(Alloca);
106-
107-
ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction(
108-
Alloca, NewStructAlloca, expanded.ExpandedPayloadStructType);
109-
}
110-
111-
auto F = HlslOP->GetOpFunc(DXIL::OpCode::DispatchMesh,
112-
expanded.ExpandedPayloadStructPtrType);
113-
for (auto FI = F->user_begin(); FI != F->user_end();) {
114-
auto *FunctionUser = *FI++;
115-
auto *UserInstruction = llvm::cast<Instruction>(FunctionUser);
116-
DxilInst_DispatchMesh DispatchMesh(UserInstruction);
117-
118-
llvm::IRBuilder<> B(UserInstruction);
119-
120-
Constant *Zero32Arg = HlslOP->GetU32Const(0);
121-
Constant *One32Arg = HlslOP->GetU32Const(1);
122-
Constant *Two32Arg = HlslOP->GetU32Const(2);
123-
124-
auto GroupIdFunc =
125-
HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(Ctx));
126-
Constant *GroupIdOpcode =
127-
HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
128-
auto *GroupIdX =
129-
B.CreateCall(GroupIdFunc, {GroupIdOpcode, Zero32Arg}, "GroupIdX");
130-
auto *GroupIdY =
131-
B.CreateCall(GroupIdFunc, {GroupIdOpcode, One32Arg}, "GroupIdY");
132-
auto *GroupIdZ =
133-
B.CreateCall(GroupIdFunc, {GroupIdOpcode, Two32Arg}, "GroupIdZ");
134-
135-
// FlatGroupID = z + y*numZ + x*numY*numZ
136-
// Where x,y,z are the group ID components, and numZ and numY are the
137-
// corresponding AS group-count arguments to the DispatchMesh Direct3D API
138-
auto *GroupYxNumZ = B.CreateMul(
139-
GroupIdY, HlslOP->GetU32Const(m_DispatchArgumentZ), "GroupYxNumZ");
140-
auto *FlatGroupNumZY = B.CreateAdd(GroupIdZ, GroupYxNumZ, "FlatGroupNumZY");
141-
auto *GroupXxNumYZ = B.CreateMul(
142-
GroupIdX,
143-
HlslOP->GetU32Const(m_DispatchArgumentY * m_DispatchArgumentZ),
144-
"GroupXxNumYZ");
145-
auto *FlatGroupID =
146-
B.CreateAdd(GroupXxNumYZ, FlatGroupNumZY, "FlatGroFlatGroupIDupNum");
147-
148-
// The ultimate goal is a single unique thread ID for this AS thread.
149-
// So take the flat group number, multiply it by the number of
150-
// threads per group...
151-
auto *FlatGroupIDWithSpaceForThreadInGroupId = B.CreateMul(
152-
FlatGroupID,
153-
HlslOP->GetU32Const(DM.GetNumThreads(0) * DM.GetNumThreads(1) *
154-
DM.GetNumThreads(2)),
155-
"FlatGroupIDWithSpaceForThreadInGroupId");
156-
157-
auto *FlattenedThreadIdInGroupFunc = HlslOP->GetOpFunc(
158-
DXIL::OpCode::FlattenedThreadIdInGroup, Type::getInt32Ty(Ctx));
159-
Constant *FlattenedThreadIdInGroupOpcode =
160-
HlslOP->GetU32Const((unsigned)DXIL::OpCode::FlattenedThreadIdInGroup);
161-
auto FlatThreadIdInGroup = B.CreateCall(FlattenedThreadIdInGroupFunc,
162-
{FlattenedThreadIdInGroupOpcode},
163-
"FlattenedThreadIdInGroup");
164-
165-
// ...and add the flat thread id:
166-
auto *FlatId = B.CreateAdd(FlatGroupIDWithSpaceForThreadInGroupId,
167-
FlatThreadIdInGroup, "FlatId");
168-
169-
AddValueToExpandedPayload(HlslOP, B, expanded, NewStructAlloca,
170-
OriginalPayloadStructType->getStructNumElements(),
171-
FlatId);
172-
AddValueToExpandedPayload(
173-
HlslOP, B, expanded, NewStructAlloca,
174-
OriginalPayloadStructType->getStructNumElements() + 1,
175-
DispatchMesh.get_threadGroupCountY());
176-
AddValueToExpandedPayload(
177-
HlslOP, B, expanded, NewStructAlloca,
178-
OriginalPayloadStructType->getStructNumElements() + 2,
179-
DispatchMesh.get_threadGroupCountZ());
180-
}
181-
182-
DM.ReEmitDxilResources();
183194

184-
return true;
195+
return false;
185196
}
186197

187198
char DxilPIXAddTidToAmplificationShaderPayload::ID = 0;
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// RUN: %dxc -enable-16bit-types -Od -Emain -Tas_6_6 %s | %opt -S -hlsl-dxil-PIX-add-tid-to-as-payload,dispatchArgY=3,dispatchArgZ=7 | %FileCheck %s
2+
3+
// Check that the payload was piece-wise copied into a local copy from group-shared:
4+
// There are 28 elements:
5+
6+
// CHECK: [[LOAD0:%.*]] = load [[TYPE0:.*]], [[TYPE0]] addrspace(3)* getelementptr inbounds
7+
// CHECK:store volatile [[TYPE0]] [[LOAD0]]
8+
// CHECK: [[LOAD1:%.*]] = load [[TYPE1:.*]], [[TYPE1]] addrspace(3)* getelementptr inbounds
9+
// CHECK:store volatile [[TYPE1]] [[LOAD1]]
10+
// CHECK: [[LOAD2:%.*]] = load [[TYPE2:.*]], [[TYPE2]] addrspace(3)* getelementptr inbounds
11+
// CHECK:store volatile [[TYPE2]] [[LOAD2]]
12+
// CHECK: [[LOAD3:%.*]] = load [[TYPE3:.*]], [[TYPE3]] addrspace(3)* getelementptr inbounds
13+
// CHECK:store volatile [[TYPE3]] [[LOAD3]]
14+
// CHECK: [[LOAD4:%.*]] = load [[TYPE4:.*]], [[TYPE4]] addrspace(3)* getelementptr inbounds
15+
// CHECK:store volatile [[TYPE4]] [[LOAD4]]
16+
// CHECK: [[LOAD5:%.*]] = load [[TYPE5:.*]], [[TYPE5]] addrspace(3)* getelementptr inbounds
17+
// CHECK:store volatile [[TYPE5]] [[LOAD5]]
18+
// CHECK: [[LOAD6:%.*]] = load [[TYPE6:.*]], [[TYPE6]] addrspace(3)* getelementptr inbounds
19+
// CHECK:store volatile [[TYPE6]] [[LOAD6]]
20+
// CHECK: [[LOAD7:%.*]] = load [[TYPE7:.*]], [[TYPE7]] addrspace(3)* getelementptr inbounds
21+
// CHECK:store volatile [[TYPE7]] [[LOAD7]]
22+
// CHECK: [[LOAD8:%.*]] = load [[TYPE8:.*]], [[TYPE8]] addrspace(3)* getelementptr inbounds
23+
// CHECK:store volatile [[TYPE8]] [[LOAD8]]
24+
// CHECK: [[LOAD9:%.*]] = load [[TYPE9:.*]], [[TYPE9]] addrspace(3)* getelementptr inbounds
25+
// CHECK:store volatile [[TYPE9]] [[LOAD9]]
26+
27+
// CHECK: [[LOAD10:%.*]] = load [[TYPE10:.*]], [[TYPE10]] addrspace(3)* getelementptr inbounds
28+
// CHECK:store volatile [[TYPE10]] [[LOAD10]]
29+
// CHECK: [[LOAD11:%.*]] = load [[TYPE11:.*]], [[TYPE11]] addrspace(3)* getelementptr inbounds
30+
// CHECK:store volatile [[TYPE11]] [[LOAD11]]
31+
// CHECK: [[LOAD12:%.*]] = load [[TYPE12:.*]], [[TYPE12]] addrspace(3)* getelementptr inbounds
32+
// CHECK:store volatile [[TYPE12]] [[LOAD12]]
33+
// CHECK: [[LOAD13:%.*]] = load [[TYPE13:.*]], [[TYPE13]] addrspace(3)* getelementptr inbounds
34+
// CHECK:store volatile [[TYPE13]] [[LOAD13]]
35+
// CHECK: [[LOAD14:%.*]] = load [[TYPE14:.*]], [[TYPE14]] addrspace(3)* getelementptr inbounds
36+
// CHECK:store volatile [[TYPE14]] [[LOAD14]]
37+
// CHECK: [[LOAD15:%.*]] = load [[TYPE15:.*]], [[TYPE15]] addrspace(3)* getelementptr inbounds
38+
// CHECK:store volatile [[TYPE15]] [[LOAD15]]
39+
// CHECK: [[LOAD16:%.*]] = load [[TYPE16:.*]], [[TYPE16]] addrspace(3)* getelementptr inbounds
40+
// CHECK:store volatile [[TYPE16]] [[LOAD16]]
41+
// CHECK: [[LOAD17:%.*]] = load [[TYPE17:.*]], [[TYPE17]] addrspace(3)* getelementptr inbounds
42+
// CHECK:store volatile [[TYPE17]] [[LOAD17]]
43+
// CHECK: [[LOAD18:%.*]] = load [[TYPE18:.*]], [[TYPE18]] addrspace(3)* getelementptr inbounds
44+
// CHECK:store volatile [[TYPE18]] [[LOAD18]]
45+
// CHECK: [[LOAD19:%.*]] = load [[TYPE19:.*]], [[TYPE19]] addrspace(3)* getelementptr inbounds
46+
// CHECK:store volatile [[TYPE19]] [[LOAD19]]
47+
48+
// CHECK: [[LOAD20:%.*]] = load [[TYPE20:.*]], [[TYPE20]] addrspace(3)* getelementptr inbounds
49+
// CHECK:store volatile [[TYPE20]] [[LOAD20]]
50+
// CHECK: [[LOAD21:%.*]] = load [[TYPE21:.*]], [[TYPE21]] addrspace(3)* getelementptr inbounds
51+
// CHECK:store volatile [[TYPE21]] [[LOAD21]]
52+
// CHECK: [[LOAD22:%.*]] = load [[TYPE22:.*]], [[TYPE22]] addrspace(3)* getelementptr inbounds
53+
// CHECK:store volatile [[TYPE22]] [[LOAD22]]
54+
// CHECK: [[LOAD23:%.*]] = load [[TYPE23:.*]], [[TYPE23]] addrspace(3)* getelementptr inbounds
55+
// CHECK:store volatile [[TYPE23]] [[LOAD23]]
56+
// CHECK: [[LOAD24:%.*]] = load [[TYPE24:.*]], [[TYPE24]] addrspace(3)* getelementptr inbounds
57+
// CHECK:store volatile [[TYPE24]] [[LOAD24]]
58+
// CHECK: [[LOAD25:%.*]] = load [[TYPE25:.*]], [[TYPE25]] addrspace(3)* getelementptr inbounds
59+
// CHECK:store volatile [[TYPE25]] [[LOAD25]]
60+
// CHECK: [[LOAD26:%.*]] = load [[TYPE26:.*]], [[TYPE26]] addrspace(3)* getelementptr inbounds
61+
// CHECK:store volatile [[TYPE26]] [[LOAD26]]
62+
// CHECK: [[LOAD27:%.*]] = load [[TYPE27:.*]], [[TYPE27]] addrspace(3)* getelementptr inbounds
63+
// CHECK:store volatile [[TYPE27]] [[LOAD27]]
64+
65+
// And no more:
66+
// CHECK-NOT: [[LOAD28:%.*]] = load [[TYPE28:.*]], [[TYPE28]] addrspace(3)* getelementptr inbounds
67+
68+
struct Contained {
69+
uint j;
70+
float af[3];
71+
};
72+
73+
struct Bigger {
74+
half h;
75+
Contained a[2];
76+
};
77+
78+
struct MyPayload {
79+
uint i;
80+
Bigger big[3];
81+
};
82+
83+
groupshared MyPayload payload;
84+
85+
[numthreads(1, 1, 1)] void main(uint gid
86+
: SV_GroupID) {
87+
DispatchMesh(1, 1, 1, payload);
88+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: %dxc -Od -Emain -Tas_6_6 %s | %opt -S -hlsl-dxil-PIX-add-tid-to-as-payload,dispatchArgY=3,dispatchArgZ=7 | %FileCheck %s
2+
3+
// Check that the payload was piece-wise copied into a local copy
4+
// CHECK: [[LOADGEP:%.*]] = getelementptr %struct.MyPayload
5+
// CHECK: [[LOAD:%.*]] = load i32, i32* [[LOADGEP]]
6+
// CHECK: store volatile i32 [[LOAD]]
7+
8+
struct MyPayload
9+
{
10+
uint i;
11+
};
12+
13+
groupshared MyPayload payload;
14+
15+
[numthreads(1, 1, 1)]
16+
void main(uint gid : SV_GroupID)
17+
{
18+
MyPayload copy;
19+
copy = payload;
20+
DispatchMesh(1, 1, 1, copy);
21+
}

0 commit comments

Comments
 (0)