Skip to content

Commit e40b162

Browse files
committed
[libspirv] Restore missing builtin removed in 1b6070c
1 parent 2b1e0ae commit e40b162

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

libclc/libspirv/lib/ptx-nvidiacl/group/group_non_uniform.cl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "membermask.h"
1010

11+
#include <clc/integer/clc_popcount.h>
1112
#include <libspirv/spirv.h>
1213
#include <libspirv/spirv_types.h>
1314

@@ -33,4 +34,19 @@ _Z29__spirv_GroupNonUniformBallotjb(unsigned flag, bool predicate) {
3334
res[0] = __nvvm_vote_ballot_sync(threads, predicate);
3435

3536
return res;
37+
}
38+
39+
_CLC_DEF _CLC_CONVERGENT uint
40+
_Z37__spirv_GroupNonUniformBallotBitCountN5__spv5Scope4FlagEiDv4_j(
41+
uint scope, uint flag, __clc_vec4_uint32_t mask) {
42+
// here we assume scope == __spv::Scope::Subgroup
43+
// flag == InclusiveScan is not yet implemented
44+
if (flag == Reduce) {
45+
return __clc_popcount(mask[0]);
46+
} else if (flag == ExclusiveScan) {
47+
return __clc_popcount(__nvvm_read_ptx_sreg_lanemask_lt() & mask[0]);
48+
} else {
49+
__builtin_trap();
50+
__builtin_unreachable();
51+
}
3652
}

0 commit comments

Comments
 (0)