Skip to content

Commit 1707158

Browse files
committed
Fix compilation problems
1 parent d9ddf41 commit 1707158

File tree

2 files changed

+33
-37
lines changed

2 files changed

+33
-37
lines changed

include/nbl/builtin/hlsl/property_pool/copy.comp.hlsl

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,6 @@
22
#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
33
#include "nbl/builtin/hlsl/property_pool/transfer.hlsl"
44

5-
// https://github.com/microsoft/DirectXShaderCompiler/issues/6144
6-
// template<typename capability_traits=nbl::hlsl::jit::device_capabilities_traits>
7-
// uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() {
8-
// return uint32_t3(capability_traits::maxOptimallyResidentWorkgroupInvocations, 1, 1);
9-
// }
10-
11-
[[numthreads(1, 1, 1)]
12-
void main(uint32_t3 dispatchId : SV_DispatchThreadID)
13-
{
14-
nbl::hlsl::property_pool::main(dispatchId);
15-
}
16-
175
namespace nbl
186
{
197
namespace hlsl
@@ -28,14 +16,14 @@ struct TransferLoop
2816
{
2917
void iteration(uint propertyId, uint64_t propertySize, uint64_t srcAddr, uint64_t dstAddr, uint invocationIndex)
3018
{
31-
const uint srcOffset = uint64_t(invocationIndex) * (uint64_t(1) << SrcIndexSizeLog2) * propertySize;
32-
const uint dstOffset = uint64_t(invocationIndex) * (uint64_t(1) << DstIndexSizeLog2) * propertySize;
19+
const uint64_t srcOffset = uint64_t(invocationIndex) * (uint64_t(1) << SrcIndexSizeLog2) * propertySize;
20+
const uint64_t dstOffset = uint64_t(invocationIndex) * (uint64_t(1) << DstIndexSizeLog2) * propertySize;
3321

34-
const uint srcIndexAddress = Fill ? srcAddr + srcOffset : srcAddr;
35-
const uint dstIndexAddress = Fill ? dstAddr + dstOffset : dstAddr;
22+
const uint64_t srcIndexAddress = Fill ? srcAddr + srcOffset : srcAddr;
23+
const uint64_t dstIndexAddress = Fill ? dstAddr + dstOffset : dstAddr;
3624

37-
const uint srcAddressMapped = SrcIndexIota ? srcIndexAddress : vk::RawBufferLoad<uint64_t>(srcIndexAddress);
38-
const uint dstAddressMapped = DstIndexIota ? dstIndexAddress : vk::RawBufferLoad<uint64_t>(dstIndexAddress);
25+
const uint64_t srcAddressMapped = SrcIndexIota ? srcIndexAddress : vk::RawBufferLoad<uint64_t>(srcIndexAddress);
26+
const uint64_t dstAddressMapped = DstIndexIota ? dstIndexAddress : vk::RawBufferLoad<uint64_t>(dstIndexAddress);
3927

4028
if (SrcIndexSizeLog2 == 0) {} // we can't write individual bytes
4129
else if (SrcIndexSizeLog2 == 1) vk::RawBufferStore<uint16_t>(dstAddressMapped, vk::RawBufferLoad<uint16_t>(srcAddressMapped));
@@ -51,21 +39,21 @@ struct TransferLoop
5139
iteration(propertyId, transferRequest.propertySize, transferRequest.srcAddr, transferRequest.dstAddr, invocationIndex);
5240
}
5341
}
54-
};
42+
};
5543

5644
// For creating permutations of the functions based on parameters that are constant over the transfer request
57-
// These branches should all be scalar, and because of how templates work, the loops shouldn't have any
45+
// These branches should all be scalar, and because of how templates are compiled statically, the loops shouldn't have any
5846
// branching within them
5947

6048
template<bool Fill, bool SrcIndexIota, bool DstIndexIota, uint64_t SrcIndexSizeLog2>
6149
struct TransferLoopPermutationSrcIndexSizeLog
6250
{
6351
void copyLoop(uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
6452
{
65-
if (transferRequest.dstIndexSizeLog2 == 0) TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 0>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
66-
else if (transferRequest.dstIndexSizeLog2 == 1) TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 1>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
67-
else if (transferRequest.dstIndexSizeLog2 == 2) TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 2>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
68-
else /*if (transferRequest.dstIndexSizeLog2 == 3)*/ TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 3>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
53+
if (transferRequest.dstIndexSizeLog2 == 0) { TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 0> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
54+
else if (transferRequest.dstIndexSizeLog2 == 1) { TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 1> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
55+
else if (transferRequest.dstIndexSizeLog2 == 2) { TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 2> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
56+
else /*if (transferRequest.dstIndexSizeLog2 == 3)*/ { TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 3> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
6957
}
7058
};
7159

@@ -74,10 +62,10 @@ struct TransferLoopPermutationDstIota
7462
{
7563
void copyLoop(uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
7664
{
77-
if (transferRequest.srcIndexSizeLog2 == 0) TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 0>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
78-
else if (transferRequest.srcIndexSizeLog2 == 1) TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 1>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
79-
else if (transferRequest.srcIndexSizeLog2 == 2) TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 2>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
80-
else /*if (transferRequest.srcIndexSizeLog2 == 3)*/ TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 3>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
65+
if (transferRequest.srcIndexSizeLog2 == 0) { TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 0> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
66+
else if (transferRequest.srcIndexSizeLog2 == 1) { TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 1> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
67+
else if (transferRequest.srcIndexSizeLog2 == 2) { TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 2> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
68+
else /*if (transferRequest.srcIndexSizeLog2 == 3)*/ { TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 3> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
8169
}
8270
};
8371

@@ -87,8 +75,8 @@ struct TransferLoopPermutationSrcIota
8775
void copyLoop(uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
8876
{
8977
bool dstIota = transferRequest.dstAddr == 0;
90-
if (dstIota) TransferLoopPermutationDstIota<Fill, SrcIndexIota, true>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
91-
else TransferLoopPermutationDstIota<Fill, SrcIndexIota, false>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
78+
if (dstIota) { TransferLoopPermutationDstIota<Fill, SrcIndexIota, true> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
79+
else { TransferLoopPermutationDstIota<Fill, SrcIndexIota, false> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
9280
}
9381
};
9482

@@ -98,19 +86,20 @@ struct TransferLoopPermutationFill
9886
void copyLoop(uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
9987
{
10088
bool srcIota = transferRequest.srcAddr == 0;
101-
if (srcIota) TransferLoopPermutationSrcIota<Fill, true>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
102-
else TransferLoopPermutationSrcIota<Fill, false>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
89+
if (srcIota) { TransferLoopPermutationSrcIota<Fill, true> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
90+
else { TransferLoopPermutationSrcIota<Fill, false> loop; loop.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize); }
10391
}
10492
};
10593

94+
template<typename device_capabilities>
10695
void main(uint32_t3 dispatchId)
10796
{
10897
const uint propertyId = dispatchId.y;
10998
const uint invocationIndex = dispatchId.x;
11099

111100
// Loading transfer request from the pointer (can't use struct
112101
// with BDA on HLSL SPIRV)
113-
const TransferRequest transferRequest;
102+
TransferRequest transferRequest;
114103
transferRequest.srcAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress);
115104
transferRequest.dstAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t));
116105
transferRequest.srcIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 2);
@@ -124,13 +113,20 @@ void main(uint32_t3 dispatchId)
124113
transferRequest.srcIndexSizeLog2 = bitfieldType >> (35 + 24 + 1);
125114
transferRequest.dstIndexSizeLog2 = bitfieldType >> (35 + 24 + 1 + 2);
126115

127-
const uint dispatchSize = capability_traits::maxOptimallyResidentWorkgroupInvocations;
116+
const uint dispatchSize = nbl::hlsl::device_capabilities_traits<device_capabilities>::maxOptimallyResidentWorkgroupInvocations;
128117
const bool fill = transferRequest.fill == 1;
129118

130-
if (fill) TransferLoopPermutationFill<true>.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize);
131-
else TransferLoopPermutationFill<false>.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize);
119+
if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
120+
else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
132121
}
133122

134123
}
135124
}
136125
}
126+
127+
[numthreads(1,1,1)]
128+
void main(uint32_t3 dispatchId : SV_DispatchThreadID)
129+
{
130+
nbl::hlsl::property_pools::main<nbl::hlsl::jit::device_capabilities>(dispatchId);
131+
}
132+

include/nbl/builtin/hlsl/property_pool/transfer.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ struct TransferRequest
1717
uint64_t dstIndexAddr; // IOTA default
1818
uint64_t elementCount : 35; // allow up to 64GB IGPUBuffers
1919
uint64_t propertySize : 24; // all the leftover bits (just use bytes now)
20-
uint64_t fill;
20+
uint64_t fill : 1;
2121
// 0=uint8, 1=uint16, 2=uint32, 3=uint64
2222
uint64_t srcIndexSizeLog2 : 2;
2323
uint64_t dstIndexSizeLog2 : 2;

0 commit comments

Comments
 (0)