@@ -22,17 +22,34 @@ namespace hlsl
22
22
namespace subgroup
23
23
{
24
24
25
- namespace native
25
+ namespace impl
26
26
{
27
-
28
- template<class Binop, typename T=typename Binop::type_t>
27
+ template<class Binop, bool native> // might need a 3rd default param `typename T=typename Binop::type_t`
29
28
struct reduction;
30
- template<class Binop, typename T=typename Binop::type_t >
29
+ template<class Binop, bool native >
31
30
struct inclusive_scan;
32
- template<class Binop, typename T=typename Binop::type_t >
31
+ template<class Binop, bool native >
33
32
struct exclusive_scan;
34
33
35
- #define SPECIALIZE (NAME,BINOP,SUBGROUP_OP) template<typename T> struct NAME<BINOP<T>,T> \
34
+ // native
35
+ template<class Binop>
36
+ struct reduction<Binop, true >;
37
+ template<class Binop>
38
+ struct inclusive_scan<Binop, true >;
39
+ template<class Binop>
40
+ struct exclusive_scan<Binop, true >;
41
+
42
+ // portability
43
+ template<class Binop>
44
+ struct reduction<Binop, false >;
45
+ template<class Binop>
46
+ struct inclusive_scan<Binop, false >;
47
+ template<class Binop>
48
+ struct exclusive_scan<Binop, false >;
49
+
50
+ // specialize native
51
+
52
+ #define SPECIALIZE (NAME,BINOP,SUBGROUP_OP) template<typename T> struct NAME<BINOP<T>,true > \
36
53
{ \
37
54
using type_t = T; \
38
55
\
@@ -56,19 +73,16 @@ SPECIALIZE_ALL(maximum,Max);
56
73
#undef SPECIALIZE_ALL
57
74
#undef SPECIALIZE
58
75
59
- }
76
+ // specialize portability
60
77
61
- namespace portability
62
- {
63
-
64
78
// WARNING
65
79
// THIS PORTABILITY IMPLEMENTATION USES SHUFFLE OPS
66
80
// Shuffles where you attempt to read an invactive lane, return garbage,
67
81
// which means that our portability reductions and prefix sums will also return garbage/UB/UV
68
82
// Always use the native subgroup_arithmetic extensions if supported
69
83
70
84
template<class Binop>
71
- struct inclusive_scan
85
+ struct inclusive_scan<Binop, false >
72
86
{
73
87
using type_t = typename Binop::type_t;
74
88
@@ -97,13 +111,13 @@ struct inclusive_scan
97
111
};
98
112
99
113
template<class Binop>
100
- struct exclusive_scan
114
+ struct exclusive_scan<Binop, false >
101
115
{
102
116
using type_t = typename Binop::type_t;
103
117
104
118
type_t operator ()(type_t value)
105
119
{
106
- value = inclusive_scan<Binop>::__call (value);
120
+ value = inclusive_scan<Binop, false >::__call (value);
107
121
// can't risk getting short-circuited, need to store to a var
108
122
type_t left = glsl::subgroupShuffleUp<type_t>(value,1 );
109
123
// the first invocation doesn't have anything in its left so we set to the binop's identity value for exlusive scan
@@ -112,16 +126,17 @@ struct exclusive_scan
112
126
};
113
127
114
128
template<class Binop>
115
- struct reduction
129
+ struct reduction<Binop, false >
116
130
{
117
131
using type_t = typename Binop::type_t;
118
132
119
133
type_t operator ()(NBL_CONST_REF_ARG (type_t) value)
120
134
{
121
135
// take the last subgroup invocation's value for the reduction
122
- return BroadcastLast<type_t>(inclusive_scan<Binop>::__call (value));
136
+ return BroadcastLast<type_t>(inclusive_scan<Binop, false >::__call (value));
123
137
}
124
138
};
139
+
125
140
}
126
141
127
142
}
0 commit comments