Skip to content

Commit 52d6972

Browse files
committed
WIP testing
1 parent 88d1d00 commit 52d6972

File tree

3 files changed

+23
-17
lines changed

3 files changed

+23
-17
lines changed

include/nbl/builtin/hlsl/property_pool/copy.comp.hlsl

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ namespace property_pools
1414
template<bool Fill, bool SrcIndexIota, bool DstIndexIota, uint64_t SrcIndexSizeLog2, uint64_t DstIndexSizeLog2>
1515
struct TransferLoop
1616
{
17-
void iteration(uint propertyId, TransferRequest transferRequest, uint invocationIndex)
17+
void iteration(uint propertyId, TransferRequest transferRequest, uint64_t invocationIndex)
1818
{
1919
const uint64_t srcIndexSize = uint64_t(1) << SrcIndexSizeLog2;
2020
const uint64_t dstIndexSize = uint64_t(1) << DstIndexSizeLog2;
2121

22-
const uint64_t srcOffset = uint64_t(invocationIndex) * srcIndexSize * transferRequest.propertySize;
23-
const uint64_t dstOffset = uint64_t(invocationIndex) * dstIndexSize * transferRequest.propertySize;
22+
const uint64_t srcOffset = invocationIndex * srcIndexSize * transferRequest.propertySize;
23+
const uint64_t dstOffset = invocationIndex * dstIndexSize * transferRequest.propertySize;
2424

2525
const uint64_t srcIndexAddress = Fill ? transferRequest.srcIndexAddr + srcOffset : transferRequest.srcIndexAddr;
2626
const uint64_t dstIndexAddress = Fill ? transferRequest.dstIndexAddr + dstOffset : transferRequest.dstIndexAddr;
@@ -112,26 +112,28 @@ void main(uint32_t3 dispatchId)
112112
// Loading transfer request from the pointer (can't use struct
113113
// with BDA on HLSL SPIRV)
114114
TransferRequest transferRequest;
115-
transferRequest.srcAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress);
115+
transferRequest.srcAddr = vk::RawBufferLoad<uint>(globals.transferCommandsAddress) | vk::RawBufferLoad<uint>(globals.transferCommandsAddress + sizeof(uint)) << 32;
116116
transferRequest.dstAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t));
117117
transferRequest.srcIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 2);
118118
transferRequest.dstIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 3);
119119
// Remaining elements are part of the same bitfield
120120
// TODO: Do this only using raw buffer load?
121-
uint2 bitfieldType = vk::RawBufferLoad<uint2>(globals.transferCommandsAddress + sizeof(uint64_t) * 4);
122-
transferRequest.elementCount32 = bitfieldType;
123-
transferRequest.elementCountExtra = bitfieldType;
124-
transferRequest.propertySize = bitfieldType >> 3;
125-
transferRequest.fill = bitfieldType >> (3 + 24);
126-
transferRequest.srcIndexSizeLog2 = bitfieldType >> (3 + 24 + 1);
127-
transferRequest.dstIndexSizeLog2 = bitfieldType >> (3 + 24 + 1 + 2);
121+
uint64_t bitfieldType = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof(uint64_t) * 4);
122+
transferRequest.elementCount32 = uint32_t(bitfieldType);
123+
transferRequest.elementCountExtra = uint32_t(bitfieldType);
124+
transferRequest.propertySize = uint32_t(bitfieldType >> 3);
125+
transferRequest.fill = uint32_t(bitfieldType >> (3 + 24));
126+
transferRequest.srcIndexSizeLog2 = uint32_t(bitfieldType >> (3 + 24 + 1));
127+
transferRequest.dstIndexSizeLog2 = uint32_t(bitfieldType >> (3 + 24 + 1 + 2));
128128

129129
const uint dispatchSize = nbl::hlsl::device_capabilities_traits<device_capabilities>::maxOptimallyResidentWorkgroupInvocations;
130130
const bool fill = transferRequest.fill == 1;
131131

132-
vk::RawBufferStore<uint32_t>(transferRequest.dstAddr, 69);
133-
// if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
134-
// else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
132+
vk::RawBufferStore<uint64_t>(globals.transferCommandsAddress + 40 * 3, transferRequest.srcAddr);
133+
vk::RawBufferStore<uint64_t>(globals.transferCommandsAddress + 40 * 4, transferRequest.dstAddr);
134+
vk::RawBufferStore<uint>(globals.transferCommandsAddress + 40 * 5, vk::RawBufferLoad<uint>(transferRequest.srcAddr + sizeof(uint16_t) * 3));
135+
//if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
136+
//else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
135137
}
136138

137139
}

include/nbl/builtin/hlsl/property_pool/transfer.hlsl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ struct TransferRequest
1818
uint64_t srcIndexAddr; // IOTA default
1919
uint64_t dstIndexAddr; // IOTA default
2020
// TODO: go back to this ideal layout when things work
21+
// (Getting a fatal error from DXC when using 64-bit bitfields:)
22+
// fatal error: generated SPIR-V is invalid: [VUID-StandaloneSpirv-Base-04781] Expected 32-bit int type for Base operand: BitFieldInsert
23+
// %58 = OpBitFieldInsert %ulong %42 %57 %uint_0 %uint_35
24+
//
2125
//uint64_t elementCount : 35; // allow up to 64GB IGPUBuffers
2226
//uint64_t propertySize : 24; // all the leftover bits (just use bytes now)
2327
//uint64_t fill : 1;
@@ -34,13 +38,13 @@ struct TransferRequest
3438

3539
struct GlobalPushContants
3640
{
41+
// BDA address (GPU pointer) into the transfer commands buffer
42+
uint64_t transferCommandsAddress;
3743
// Define the range of invocations (X axis) that will be transfered over in this dispatch
3844
// May be sectioned off in the case of overflow or any other situation that doesn't allow
3945
// for a full transfer
4046
uint64_t beginOffset;
4147
uint64_t endOffset;
42-
// BDA address (GPU pointer) into the transfer commands buffer
43-
uint64_t transferCommandsAddress;
4448
};
4549

4650
NBL_CONSTEXPR uint32_t MaxPropertiesPerDispatch = 128;

src/nbl/video/utilities/CPropertyPoolHandler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ bool CPropertyPoolHandler::transferProperties(
147147
pushConstants.endOffset = endDWORD;
148148
pushConstants.transferCommandsAddress = scratchBufferDeviceAddr;
149149
}
150-
cmdbuf->pushConstants(m_pipeline->getLayout(), asset::IShader::ESS_COMPUTE, 0u, sizeof(nbl::hlsl::property_pools::GlobalPushContants), &pushConstants);
150+
cmdbuf->pushConstants(m_pipeline->getLayout(), asset::IShader::ESS_COMPUTE, 0u, sizeof(pushConstants), &pushConstants);
151151

152152
// dispatch
153153
{

0 commit comments

Comments
 (0)