Skip to content

Commit 7ac728b

Browse files
committed
PR reviews
1 parent 61604ee commit 7ac728b

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

include/nbl/builtin/hlsl/property_pool/copy.comp.hlsl

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,31 @@ struct TransferLoop
2525

2626
// IOTA: Use the index as the fetching offset
2727
// Non IOTA: Read the address buffer ("index buffer") to select fetching offset
28-
const uint64_t srcAddressBufferOffset = SrcIndexIota ? srcOffset : vk::RawBufferLoad<uint32_t>(transferRequest.srcIndexAddr + srcOffset * sizeof(uint32_t));
29-
const uint64_t dstAddressBufferOffset = DstIndexIota ? dstOffset : vk::RawBufferLoad<uint32_t>(transferRequest.dstIndexAddr + dstOffset * sizeof(uint32_t));
28+
uint64_t srcAddressBufferOffset;
29+
uint64_t dstAddressBufferOffset;
30+
31+
if (SrcIndexIota) srcAddressBufferOffset = srcOffset;
32+
else
33+
{
34+
if (SrcIndexSizeLog2 == 0) {} // we can't read individual byte
35+
else if (SrcIndexSizeLog2 == 1) srcAddressBufferOffset = vk::RawBufferLoad<uint16_t>(transferRequest.srcIndexAddr + srcOffset * sizeof(uint16_t));
36+
else if (SrcIndexSizeLog2 == 2) srcAddressBufferOffset = vk::RawBufferLoad<uint32_t>(transferRequest.srcIndexAddr + srcOffset * sizeof(uint32_t));
37+
else if (SrcIndexSizeLog2 == 3) srcAddressBufferOffset = vk::RawBufferLoad<uint64_t>(transferRequest.srcIndexAddr + srcOffset * sizeof(uint64_t));
38+
}
39+
40+
if (DstIndexIota) dstAddressBufferOffset = dstOffset;
41+
else
42+
{
43+
if (DstIndexSizeLog2 == 0) {} // we can't read individual byte
44+
else if (DstIndexSizeLog2 == 1) dstAddressBufferOffset = vk::RawBufferLoad<uint16_t>(transferRequest.dstIndexAddr + dstOffset * sizeof(uint16_t));
45+
else if (DstIndexSizeLog2 == 2) dstAddressBufferOffset = vk::RawBufferLoad<uint32_t>(transferRequest.dstIndexAddr + dstOffset * sizeof(uint32_t));
46+
else if (DstIndexSizeLog2 == 3) dstAddressBufferOffset = vk::RawBufferLoad<uint64_t>(transferRequest.dstIndexAddr + dstOffset * sizeof(uint64_t));
47+
}
3048

3149
const uint64_t srcAddressMapped = transferRequest.srcAddr + srcAddressBufferOffset * srcIndexSize;
3250
const uint64_t dstAddressMapped = transferRequest.dstAddr + dstAddressBufferOffset * dstIndexSize;
3351

34-
//vk::RawBufferStore<uint64_t>(transferRequest.dstAddr + invocationIndex * sizeof(uint64_t) * 2, srcAddressMapped,8);
35-
//vk::RawBufferStore<uint64_t>(transferRequest.dstAddr + invocationIndex * sizeof(uint64_t) * 2 + sizeof(uint64_t), dstAddressMapped,8);
36-
if (SrcIndexSizeLog2 == 0) {} // we can't write individual bytes
37-
else if (SrcIndexSizeLog2 == 1) vk::RawBufferStore<uint16_t>(dstAddressMapped, vk::RawBufferLoad<uint16_t>(srcAddressMapped));
38-
else if (SrcIndexSizeLog2 == 2) vk::RawBufferStore<uint32_t>(dstAddressMapped, vk::RawBufferLoad<uint32_t>(srcAddressMapped));
39-
else if (SrcIndexSizeLog2 == 3) vk::RawBufferStore<uint64_t>(dstAddressMapped, vk::RawBufferLoad<uint64_t>(srcAddressMapped));
52+
vk::RawBufferStore<uint32_t>(dstAddressMapped, vk::RawBufferLoad<uint32_t>(srcAddressMapped));
4053
}
4154

4255
void copyLoop(NBL_CONST_REF_ARG(TransferDispatchInfo) dispatchInfo, uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)

src/nbl/video/utilities/CPropertyPoolHandler.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,8 @@ bool CPropertyPoolHandler::transferProperties(
193193
{
194194
const auto& limits = m_device->getPhysicalDevice()->getLimits();
195195
const auto invocationCoarseness = limits.maxOptimallyResidentWorkgroupInvocations * requestsThisPass;
196-
cmdbuf->dispatch((maxElements - 1) / nbl::hlsl::property_pools::OptimalDispatchSize + 1, requestsThisPass, 1u);
196+
const auto dispatchElements = (maxElements - 1) / requestsThisPass + 1;
197+
cmdbuf->dispatch(limits.computeOptimalPersistentWorkgroupDispatchSize(dispatchElements,invocationCoarseness), requestsThisPass, 1u);
197198
}
198199
// TODO: pipeline barrier
199200
}

0 commit comments

Comments
 (0)