Skip to content

Commit b2a3db6

Browse files
committed
Fix bitscan operations
Subgroups invocations greater than 32 were using inappropriate subgroupLtLe mask (using the first when they should be using the one matching their subgroup invocation DWORD)
1 parent 2d87fb6 commit b2a3db6

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

include/nbl/builtin/hlsl/workgroup/arithmetic.hlsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ template<bool Exclusive, uint16_t ItemCount, class BallotAccessor, class Arithme
9898
uint16_t ballotScanBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(ArithmeticAccessor) arithmeticAccessor)
9999
{
100100
const uint16_t subgroupIndex = SubgroupContiguousIndex();
101-
const uint16_t bitfieldIndex = impl::getDWORD(subgroupIndex);
101+
const uint16_t bitfieldIndex = getDWORD(subgroupIndex);
102102
const uint32_t localBitfield = ballotAccessor.get(bitfieldIndex);
103103

104104
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
@@ -111,7 +111,7 @@ uint16_t ballotScanBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_
111111
arithmeticAccessor.set(subgroupIndex,count);
112112
arithmeticAccessor.workgroupExecutionAndMemoryBarrier();
113113
count = arithmeticAccessor.get(bitfieldIndex);
114-
return uint16_t(countbits(localBitfield&(Exclusive ? glsl::gl_SubgroupLtMask():glsl::gl_SubgroupLeMask())[0])+count);
114+
return uint16_t(countbits(localBitfield&(Exclusive ? glsl::gl_SubgroupLtMask():glsl::gl_SubgroupLeMask())[getDWORD(glsl::gl_SubgroupInvocationID())])+count);
115115
}
116116
}
117117

0 commit comments

Comments
 (0)