Skip to content

Commit adc4d57

Browse files
committed
Fix compilation issues with shader
1 parent a1747c6 commit adc4d57

File tree

3 files changed

+38
-25
lines changed

3 files changed

+38
-25
lines changed

include/nbl/builtin/hlsl/property_pool/copy.hlsl renamed to include/nbl/builtin/hlsl/property_pool/copy.comp.hlsl

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,25 @@
11
#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl"
2+
#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
23
#include "nbl/builtin/hlsl/property_pool/transfer.hlsl"
34

5+
// https://github.com/microsoft/DirectXShaderCompiler/issues/6144
6+
template<typename capability_traits=nbl::hlsl::jit::device_capabilities_traits>
7+
uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() {
8+
return uint32_t3(capability_traits::maxOptimallyResidentWorkgroupInvocations, 1, 1);
9+
}
10+
11+
[[numthreads(1, 1, 1)]
12+
void main(uint32_t3 dispatchId : SV_DispatchThreadID)
13+
{
14+
nbl::hlsl::property_pool::main(dispatchId);
15+
}
16+
417
namespace nbl
518
{
619
namespace hlsl
720
{
821
namespace property_pools
922
{
10-
// https://github.com/microsoft/DirectXShaderCompiler/issues/6144
11-
template<typename capability_traits=nbl::hlsl::jit::device_capabilities_traits>
12-
uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() {
13-
return uint32_t3(capability_traits::maxOptimallyResidentWorkgroupInvocations, 1, 1);
14-
}
1523

1624
[[vk::push_constant]] GlobalPushContants globals;
1725

@@ -37,13 +45,13 @@ struct TransferLoop
3745

3846
void copyLoop(uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
3947
{
40-
uint lastInvocation = min(transferRequest.elementCount, gloabls.endOffset);
48+
uint lastInvocation = min(transferRequest.elementCount, globals.endOffset);
4149
for (uint invocationIndex = globals.beginOffset + baseInvocationIndex; invocationIndex < lastInvocation; invocationIndex += dispatchSize)
4250
{
4351
iteration(propertyId, transferRequest.propertySize, transferRequest.srcAddr, transferRequest.dstAddr, invocationIndex);
4452
}
4553
}
46-
}
54+
};
4755

4856
// For creating permutations of the functions based on parameters that are constant over the transfer request
4957
// These branches should all be scalar, and because of how templates work, the loops shouldn't have any
@@ -59,7 +67,7 @@ struct TransferLoopPermutationSrcIndexSizeLog
5967
else if (transferRequest.dstIndexSizeLog2 == 2) TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 2>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
6068
else /*if (transferRequest.dstIndexSizeLog2 == 3)*/ TransferLoop<Fill, SrcIndexIota, DstIndexIota, SrcIndexSizeLog2, 3>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
6169
}
62-
}
70+
};
6371

6472
template<bool Fill, bool SrcIndexIota, bool DstIndexIota>
6573
struct TransferLoopPermutationDstIota
@@ -71,7 +79,7 @@ struct TransferLoopPermutationDstIota
7179
else if (transferRequest.srcIndexSizeLog2 == 2) TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 2>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
7280
else /*if (transferRequest.srcIndexSizeLog2 == 3)*/ TransferLoopPermutationSrcIndexSizeLog<Fill, SrcIndexIota, DstIndexIota, 3>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
7381
}
74-
}
82+
};
7583

7684
template<bool Fill, bool SrcIndexIota>
7785
struct TransferLoopPermutationSrcIota
@@ -82,7 +90,7 @@ struct TransferLoopPermutationSrcIota
8290
if (dstIota) TransferLoopPermutationDstIota<Fill, SrcIndexIota, true>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
8391
else TransferLoopPermutationDstIota<Fill, SrcIndexIota, false>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
8492
}
85-
}
93+
};
8694

8795
template<bool Fill>
8896
struct TransferLoopPermutationFill
@@ -93,9 +101,9 @@ struct TransferLoopPermutationFill
93101
if (srcIota) TransferLoopPermutationSrcIota<Fill, true>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
94102
else TransferLoopPermutationSrcIota<Fill, false>.copyLoop(baseInvocationIndex, propertyId, transferRequest, dispatchSize);
95103
}
96-
}
104+
};
97105

98-
void main(uint32_t3 dispatchId : SV_DispatchThreadID)
106+
void main(uint32_t3 dispatchId)
99107
{
100108
const uint propertyId = dispatchId.y;
101109
const uint invocationIndex = dispatchId.x;
@@ -107,12 +115,14 @@ void main(uint32_t3 dispatchId : SV_DispatchThreadID)
107115
transferRequest.dstAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t));
108116
transferRequest.srcIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 2);
109117
transferRequest.dstIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 3);
110-
// TODO: These are all part of the same bitfield and shoulbe read with a single RawBufferLoad
111-
transferRequest.elementCount = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 4);
112-
transferRequest.propertySize = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 5);
113-
transferRequest.fill = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 6);
114-
transferRequest.srcIndexSizeLog2 = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 7);
115-
transferRequest.dstIndexSizeLog2 = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 8);
118+
// Remaining elements are part of the same bitfield
119+
// TODO: Do this only using raw buffer load?
120+
uint64_t bitfieldType = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 4);
121+
transferRequest.elementCount = bitfieldType;
122+
transferRequest.propertySize = bitfieldType >> 35;
123+
transferRequest.fill = bitfieldType >> (35 + 24);
124+
transferRequest.srcIndexSizeLog2 = bitfieldType >> (35 + 24 + 1);
125+
transferRequest.dstIndexSizeLog2 = bitfieldType >> (35 + 24 + 1 + 2);
116126

117127
const uint dispatchSize = capability_traits::maxOptimallyResidentWorkgroupInvocations;
118128
const bool fill = transferRequest.fill == 1;
@@ -124,4 +134,3 @@ void main(uint32_t3 dispatchId : SV_DispatchThreadID)
124134
}
125135
}
126136
}
127-

include/nbl/builtin/hlsl/property_pool/transfer.hlsl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ struct TransferRequest
1010
// This represents a transfer command/request
1111
uint64_t srcAddr;
1212
uint64_t dstAddr;
13-
uint64_t srcIndexAddr = 0; // IOTA default
14-
uint64_t dstIndexAddr = 0; // IOTA default
13+
uint64_t srcIndexAddr; // IOTA default
14+
uint64_t dstIndexAddr; // IOTA default
1515
uint64_t elementCount : 35; // allow up to 64GB IGPUBuffers
1616
uint64_t propertySize : 24; // all the leftover bits (just use bytes now)
17-
uint64_t fill : 1 = false;
17+
uint64_t fill;
1818
// 0=uint8, 1=uint16, 2=uint32, 3=uint64
19-
uint64_t srcIndexSizeLog2 : 2 = 1;
20-
uint64_t dstIndexSizeLog2 : 2 = 1;
19+
uint64_t srcIndexSizeLog2 : 2;
20+
uint64_t dstIndexSizeLog2 : 2;
2121
};
2222

2323
struct GlobalPushContants

src/nbl/builtin/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,4 +300,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup/broadcast.hlsl")
300300
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup/scratch_size.hlsl")
301301
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup/shared_scan.hlsl")
302302

303-
ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")
303+
# property pools
304+
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/property_pool/transfer.hlsl")
305+
LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/property_pool/copy.comp.hlsl")
306+
307+
ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")

0 commit comments

Comments
 (0)