Skip to content

Commit 370b25e

Browse files
committed
Implemented runtime macro free code
1 parent b1cecd5 commit 370b25e

File tree

2 files changed

+37
-25
lines changed

2 files changed

+37
-25
lines changed

include/nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "nbl/builtin/hlsl/subgroup/basic.hlsl"
99

1010
#include "nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl"
11+
#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl"
1112

1213

1314
namespace nbl
@@ -17,20 +18,16 @@ namespace hlsl
1718
namespace subgroup
1819
{
1920

20-
#ifdef NBL_GLSL_LIMIT_SHADER_SUBGROUP_ARITHMETIC
21-
#define IMPL native
22-
#else
23-
#define IMPL portability
24-
#endif
21+
// TODO: change to alias using template when DXC finally fixes SPIR-V codegen impartity
22+
//template<class Binop, class device_capability_traits/*=TODO: Atil give us the traits so we can default with `void`!*/>
23+
//struct reduction : impl::reduction<Binop,device_capability_traits::subgroupArithmetic> {};
2524

2625
template<class Binop>
27-
struct reduction : IMPL::reduction<Binop> {};
26+
struct reduction : impl::reduction<Binop,nbl::hlsl::jit::device_capabilities::subgroupArithmetic> {};
2827
template<class Binop>
29-
struct inclusive_scan : IMPL::inclusive_scan<Binop> {};
28+
struct inclusive_scan : impl::inclusive_scan<Binop,nbl::hlsl::jit::device_capabilities::subgroupArithmetic> {};
3029
template<class Binop>
31-
struct exclusive_scan : IMPL::exclusive_scan<Binop> {};
32-
33-
#undef IMPL
30+
struct exclusive_scan : impl::exclusive_scan<Binop,nbl::hlsl::jit::device_capabilities::subgroupArithmetic> {};
3431

3532
}
3633
}

include/nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,34 @@ namespace hlsl
2222
namespace subgroup
2323
{
2424

25-
namespace native
25+
namespace impl
2626
{
27-
28-
template<class Binop, typename T=typename Binop::type_t>
27+
template<class Binop, bool native> // might need a 3rd default param `typename T=typename Binop::type_t`
2928
struct reduction;
30-
template<class Binop, typename T=typename Binop::type_t>
29+
template<class Binop, bool native>
3130
struct inclusive_scan;
32-
template<class Binop, typename T=typename Binop::type_t>
31+
template<class Binop, bool native>
3332
struct exclusive_scan;
3433

35-
#define SPECIALIZE(NAME,BINOP,SUBGROUP_OP) template<typename T> struct NAME<BINOP<T>,T> \
34+
// native
35+
template<class Binop>
36+
struct reduction<Binop, true>;
37+
template<class Binop>
38+
struct inclusive_scan<Binop, true>;
39+
template<class Binop>
40+
struct exclusive_scan<Binop, true>;
41+
42+
// portability
43+
template<class Binop>
44+
struct reduction<Binop, false>;
45+
template<class Binop>
46+
struct inclusive_scan<Binop, false>;
47+
template<class Binop>
48+
struct exclusive_scan<Binop, false>;
49+
50+
// specialize native
51+
52+
#define SPECIALIZE(NAME,BINOP,SUBGROUP_OP) template<typename T> struct NAME<BINOP<T>,true> \
3653
{ \
3754
using type_t = T; \
3855
\
@@ -56,19 +73,16 @@ SPECIALIZE_ALL(maximum,Max);
5673
#undef SPECIALIZE_ALL
5774
#undef SPECIALIZE
5875

59-
}
76+
// specialize portability
6077

61-
namespace portability
62-
{
63-
6478
// WARNING
6579
// THIS PORTABILITY IMPLEMENTATION USES SHUFFLE OPS
6680
// Shuffles where you attempt to read an invactive lane, return garbage,
6781
// which means that our portability reductions and prefix sums will also return garbage/UB/UV
6882
// Always use the native subgroup_arithmetic extensions if supported
6983

7084
template<class Binop>
71-
struct inclusive_scan
85+
struct inclusive_scan<Binop, false>
7286
{
7387
using type_t = typename Binop::type_t;
7488

@@ -97,13 +111,13 @@ struct inclusive_scan
97111
};
98112

99113
template<class Binop>
100-
struct exclusive_scan
114+
struct exclusive_scan<Binop, false>
101115
{
102116
using type_t = typename Binop::type_t;
103117

104118
type_t operator()(type_t value)
105119
{
106-
value = inclusive_scan<Binop>::__call(value);
120+
value = inclusive_scan<Binop,false>::__call(value);
107121
// can't risk getting short-circuited, need to store to a var
108122
type_t left = glsl::subgroupShuffleUp<type_t>(value,1);
109123
// the first invocation doesn't have anything in its left so we set to the binop's identity value for exlusive scan
@@ -112,16 +126,17 @@ struct exclusive_scan
112126
};
113127

114128
template<class Binop>
115-
struct reduction
129+
struct reduction<Binop, false>
116130
{
117131
using type_t = typename Binop::type_t;
118132

119133
type_t operator()(NBL_CONST_REF_ARG(type_t) value)
120134
{
121135
// take the last subgroup invocation's value for the reduction
122-
return BroadcastLast<type_t>(inclusive_scan<Binop>::__call(value));
136+
return BroadcastLast<type_t>(inclusive_scan<Binop,false>::__call(value));
123137
}
124138
};
139+
125140
}
126141

127142
}

0 commit comments

Comments
 (0)