@@ -25,18 +25,31 @@ struct TransferLoop
25
25
26
26
// IOTA: Use the index as the fetching offset
27
27
// Non IOTA: Read the address buffer ("index buffer") to select fetching offset
28
- const uint64_t srcAddressBufferOffset = SrcIndexIota ? srcOffset : vk::RawBufferLoad<uint32_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint32_t));
29
- const uint64_t dstAddressBufferOffset = DstIndexIota ? dstOffset : vk::RawBufferLoad<uint32_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint32_t));
28
+ uint64_t srcAddressBufferOffset;
29
+ uint64_t dstAddressBufferOffset;
30
+
31
+ if (SrcIndexIota) srcAddressBufferOffset = srcOffset;
32
+ else
33
+ {
34
+ if (SrcIndexSizeLog2 == 0 ) {} // we can't read individual byte
35
+ else if (SrcIndexSizeLog2 == 1 ) srcAddressBufferOffset = vk::RawBufferLoad<uint16_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint16_t));
36
+ else if (SrcIndexSizeLog2 == 2 ) srcAddressBufferOffset = vk::RawBufferLoad<uint32_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint32_t));
37
+ else if (SrcIndexSizeLog2 == 3 ) srcAddressBufferOffset = vk::RawBufferLoad<uint64_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint64_t));
38
+ }
39
+
40
+ if (DstIndexIota) dstAddressBufferOffset = dstOffset;
41
+ else
42
+ {
43
+ if (DstIndexSizeLog2 == 0 ) {} // we can't read individual byte
44
+ else if (DstIndexSizeLog2 == 1 ) dstAddressBufferOffset = vk::RawBufferLoad<uint16_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint16_t));
45
+ else if (DstIndexSizeLog2 == 2 ) dstAddressBufferOffset = vk::RawBufferLoad<uint32_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint32_t));
46
+ else if (DstIndexSizeLog2 == 3 ) dstAddressBufferOffset = vk::RawBufferLoad<uint64_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint64_t));
47
+ }
30
48
31
49
const uint64_t srcAddressMapped = transferRequest.srcAddr + srcAddressBufferOffset * srcIndexSize;
32
50
const uint64_t dstAddressMapped = transferRequest.dstAddr + dstAddressBufferOffset * dstIndexSize;
33
51
34
- //vk::RawBufferStore<uint64_t>(transferRequest.dstAddr + invocationIndex * sizeof(uint64_t) * 2, srcAddressMapped,8);
35
- //vk::RawBufferStore<uint64_t>(transferRequest.dstAddr + invocationIndex * sizeof(uint64_t) * 2 + sizeof(uint64_t), dstAddressMapped,8);
36
- if (SrcIndexSizeLog2 == 0 ) {} // we can't write individual bytes
37
- else if (SrcIndexSizeLog2 == 1 ) vk::RawBufferStore<uint16_t>(dstAddressMapped, vk::RawBufferLoad<uint16_t>(srcAddressMapped));
38
- else if (SrcIndexSizeLog2 == 2 ) vk::RawBufferStore<uint32_t>(dstAddressMapped, vk::RawBufferLoad<uint32_t>(srcAddressMapped));
39
- else if (SrcIndexSizeLog2 == 3 ) vk::RawBufferStore<uint64_t>(dstAddressMapped, vk::RawBufferLoad<uint64_t>(srcAddressMapped));
52
+ vk::RawBufferStore<uint32_t>(dstAddressMapped, vk::RawBufferLoad<uint32_t>(srcAddressMapped));
40
53
}
41
54
42
55
void copyLoop (NBL_CONST_REF_ARG (TransferDispatchInfo) dispatchInfo, uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
0 commit comments