@@ -14,13 +14,13 @@ namespace property_pools
14
14
template<bool Fill, bool SrcIndexIota, bool DstIndexIota, uint64_t SrcIndexSizeLog2, uint64_t DstIndexSizeLog2>
15
15
struct TransferLoop
16
16
{
17
- void iteration (uint propertyId, TransferRequest transferRequest, uint invocationIndex)
17
+ void iteration (uint propertyId, TransferRequest transferRequest, uint64_t invocationIndex)
18
18
{
19
19
const uint64_t srcIndexSize = uint64_t (1 ) << SrcIndexSizeLog2;
20
20
const uint64_t dstIndexSize = uint64_t (1 ) << DstIndexSizeLog2;
21
21
22
- const uint64_t srcOffset = uint64_t ( invocationIndex) * srcIndexSize * transferRequest.propertySize;
23
- const uint64_t dstOffset = uint64_t ( invocationIndex) * dstIndexSize * transferRequest.propertySize;
22
+ const uint64_t srcOffset = invocationIndex * srcIndexSize * transferRequest.propertySize;
23
+ const uint64_t dstOffset = invocationIndex * dstIndexSize * transferRequest.propertySize;
24
24
25
25
const uint64_t srcIndexAddress = Fill ? transferRequest.srcIndexAddr + srcOffset : transferRequest.srcIndexAddr;
26
26
const uint64_t dstIndexAddress = Fill ? transferRequest.dstIndexAddr + dstOffset : transferRequest.dstIndexAddr;
@@ -112,26 +112,28 @@ void main(uint32_t3 dispatchId)
112
112
// Loading transfer request from the pointer (can't use struct
113
113
// with BDA on HLSL SPIRV)
114
114
TransferRequest transferRequest;
115
- transferRequest.srcAddr = vk::RawBufferLoad<uint64_t >(globals.transferCommandsAddress);
115
+ transferRequest.srcAddr = vk::RawBufferLoad<uint >(globals.transferCommandsAddress) | vk::RawBufferLoad< uint >(globals.transferCommandsAddress + sizeof ( uint )) << 32 ;
116
116
transferRequest.dstAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof (uint64_t));
117
117
transferRequest.srcIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof (uint64_t) * 2 );
118
118
transferRequest.dstIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof (uint64_t) * 3 );
119
119
// Remaining elements are part of the same bitfield
120
120
// TODO: Do this only using raw buffer load?
121
- uint2 bitfieldType = vk::RawBufferLoad<uint2 >(globals.transferCommandsAddress + sizeof (uint64_t) * 4 );
122
- transferRequest.elementCount32 = bitfieldType;
123
- transferRequest.elementCountExtra = bitfieldType;
124
- transferRequest.propertySize = bitfieldType >> 3 ;
125
- transferRequest.fill = bitfieldType >> (3 + 24 );
126
- transferRequest.srcIndexSizeLog2 = bitfieldType >> (3 + 24 + 1 );
127
- transferRequest.dstIndexSizeLog2 = bitfieldType >> (3 + 24 + 1 + 2 );
121
+ uint64_t bitfieldType = vk::RawBufferLoad<uint64_t >(globals.transferCommandsAddress + sizeof (uint64_t) * 4 );
122
+ transferRequest.elementCount32 = uint32_t ( bitfieldType) ;
123
+ transferRequest.elementCountExtra = uint32_t ( bitfieldType) ;
124
+ transferRequest.propertySize = uint32_t ( bitfieldType >> 3 ) ;
125
+ transferRequest.fill = uint32_t ( bitfieldType >> (3 + 24 ) );
126
+ transferRequest.srcIndexSizeLog2 = uint32_t ( bitfieldType >> (3 + 24 + 1 ) );
127
+ transferRequest.dstIndexSizeLog2 = uint32_t ( bitfieldType >> (3 + 24 + 1 + 2 ) );
128
128
129
129
const uint dispatchSize = nbl::hlsl::device_capabilities_traits<device_capabilities>::maxOptimallyResidentWorkgroupInvocations;
130
130
const bool fill = transferRequest.fill == 1 ;
131
131
132
- vk::RawBufferStore<uint32_t>(transferRequest.dstAddr, 69 );
133
- // if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
134
- // else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
132
+ vk::RawBufferStore<uint64_t>(globals.transferCommandsAddress + 40 * 3 , transferRequest.srcAddr);
133
+ vk::RawBufferStore<uint64_t>(globals.transferCommandsAddress + 40 * 4 , transferRequest.dstAddr);
134
+ vk::RawBufferStore<uint >(globals.transferCommandsAddress + 40 * 5 , vk::RawBufferLoad<uint >(transferRequest.srcAddr + sizeof (uint16_t) * 3 ));
135
+ //if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
136
+ //else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
135
137
}
136
138
137
139
}
0 commit comments