5
5
#define _NBL_BUILTIN_HLSL_MEMORY_ACCESSOR_INCLUDED_
6
6
7
7
#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
8
+ #include "nbl/builtin/hlsl/member_test_macros.hlsl"
8
9
9
10
// weird namespace placing, see the comment where the macro is defined
10
11
GENERATE_METHOD_TESTER (atomicExchange)
@@ -33,104 +34,189 @@ struct pair
33
34
second_type second;
34
35
};
35
36
37
+ namespace accessor_adaptors
38
+ {
39
+ namespace impl
40
+ {
41
+ // only base class to use integral_constant because we need to use void to indicate a dynamic value and all values are valid
42
+ template<typename IndexType, typename Offset>
43
+ struct OffsetBase
44
+ {
45
+ NBL_CONSTEXPR IndexType offset = Offset::value;
46
+ };
47
+ template<typename IndexType>
48
+ struct OffsetBase<IndexType,void >
49
+ {
50
+ IndexType offset;
51
+ };
52
+
53
+ template<typename IndexType, uint64_t ElementStride, uint64_t SubElementStride, typename Offset>
54
+ struct StructureOfArraysStrides
55
+ {
56
+ NBL_CONSTEXPR IndexType elementStride = ElementStride;
57
+ NBL_CONSTEXPR IndexType subElementStride = SubElementStride;
58
+
59
+ //static_assert(elementStride>0 && subElementStride>0);
60
+ };
61
+ template<typename IndexType, typename Offset>
62
+ struct StructureOfArraysStrides<IndexType,0 ,0 ,Offset> : OffsetBase<IndexType,Offset>
63
+ {
64
+ IndexType elementStride;
65
+ IndexType subElementStride;
66
+ };
67
+ #if 0 // don't seem to be able to specialize one at a time
68
+ template<typename IndexType, uint64_t ElementStride, typename Offset>
69
+ struct StructureOfArraysStrides<IndexType,ElementStride,0 ,Offset> : OffsetBase<IndexType,Offset>
70
+ {
71
+ NBL_CONSTEXPR IndexType elementStride = ElementStride;
72
+ IndexType subElementStride;
73
+ };
74
+ template<typename IndexType, uint64_t SubElementStride, typename Offset>
75
+ struct StructureOfArraysStrides<IndexType,0 ,SubElementStride,Offset> : OffsetBase<IndexType,Offset>
76
+ {
77
+ IndexType elementStride;
78
+ NBL_CONSTEXPR IndexType subElementStride = SubElementStride;
79
+ };
80
+ #endif
81
+
82
+
83
+ template<typename IndexType, uint64_t ElementStride, uint64_t SubElementStride, typename Offset>
84
+ struct StructureOfArraysBase : StructureOfArraysStrides<IndexType,ElementStride,SubElementStride,Offset>
85
+ {
86
+ IndexType getIx (const IndexType ix, const IndexType el)
87
+ {
88
+ using base_t = StructureOfArraysStrides<IndexType,ElementStride,SubElementStride,Offset>;
89
+ return base_t::elementStride*ix+base_t::subElementStride*el+OffsetBase<IndexType,Offset>::offset;
90
+ }
91
+ };
92
+
93
+ // maybe we should have our own std::array
94
+ template<typename T, uint64_t count>
95
+ struct array
96
+ {
97
+ T data[count];
98
+ };
99
+ }
36
100
37
- // TODO: find some cool way to SFINAE the default into `_NBL_HLSL_WORKGROUP_SIZE_` if defined, and something like 1 otherwise
38
- template<class BaseAccessor, typename AccessType, typename IndexType=uint32_t, typename Strides=pair<integral_constant<IndexType,1 >,integral_constant<IndexType,_NBL_HLSL_WORKGROUP_SIZE_> > >
39
- struct MemoryAdaptor // TODO: rename to something nicer like StructureOfArrays and add a `namespace accessor_adaptors`
101
+ // TODO: some CRTP thing to forward through atomics and barriers
102
+
103
+ // If you want static strides pass `Stride=pair<integral_constant<IndexType,ElementStride>,integral_constant<IndexType,SubElementStride> >`
104
+ template<class BaseAccessor, typename AccessType, typename IndexType=uint32_t, uint64_t ElementStride=0 , uint64_t SubElementStride=0 , typename _Offset=integral_constant<IndexType,0 > >
105
+ struct StructureOfArrays : impl::StructureOfArraysBase<IndexType,ElementStride,SubElementStride,_Offset>
40
106
{
107
+ using base_t = impl::StructureOfArraysBase<IndexType,ElementStride,SubElementStride,_Offset>;
41
108
// Question: should the `BaseAccessor` let us know what this is?
42
109
using access_t = AccessType;
43
110
using index_t = IndexType;
44
- NBL_CONSTEXPR index_t ElementStride = Strides::first_type::value;
45
- NBL_CONSTEXPR index_t SubElementStride = Strides::second_type::value;
46
111
47
112
BaseAccessor accessor;
48
-
49
- access_t get (const index_t ix)
50
- {
51
- access_t retVal;
52
- get<access_t>(ix,retVal);
53
- return retVal;
54
- }
55
113
56
114
// Question: shall we go back to requiring a `access_t get(index_t)` on the `BaseAccessor`, then we could `enable_if` check the return type (via `has_method_get`) matches and we won't get Nasty HLSL copy-in copy-out conversions
57
115
template<typename T>
58
116
enable_if_t<sizeof (T)%sizeof (access_t)==0 ,void > get (const index_t ix, NBL_REF_ARG (T) value)
59
- {
117
+ {
60
118
NBL_CONSTEXPR uint64_t SubElementCount = sizeof (T)/sizeof (access_t);
61
- access_t aux[SubElementCount];
62
- for (uint64_t i=0 ; i<SubElementCount; i++)
63
- accessor.get (ix*ElementStride+i*SubElementStride,aux[i]);
64
- value = bit_cast<T,access_t[SubElementCount]>(aux);
119
+ // `vector` for now, we'll use `array` later
120
+ vector <access_t,SubElementCount> aux;
121
+ for (index_t i=0 ; i<SubElementCount; i++)
122
+ accessor.get (base_t::getIx (ix,i),aux[i]);
123
+ value = bit_cast<T,vector <access_t,SubElementCount> >(aux);
65
124
}
66
125
67
126
template<typename T>
68
127
enable_if_t<sizeof (T)%sizeof (access_t)==0 ,void > set (const index_t ix, NBL_CONST_REF_ARG (T) value)
69
128
{
70
129
NBL_CONSTEXPR uint64_t SubElementCount = sizeof (T)/sizeof (access_t);
71
- access_t aux[SubElementCount] = bit_cast<access_t[SubElementCount],T>(value);
72
- for (uint64_t i=0 ; i<SubElementCount; i++)
73
- accessor.set (ix*ElementStride+i*SubElementStride,aux[i]);
130
+ // `vector` for now, we'll use `array` later
131
+ vector <access_t,SubElementCount> aux;
132
+ aux = bit_cast<vector <access_t,SubElementCount>,T>(value);
133
+ for (index_t i=0 ; i<SubElementCount; i++)
134
+ accessor.set (base_t::getIx (ix,i),aux[i]);
135
+
74
136
}
75
-
137
+
76
138
template<typename T, typename S=BaseAccessor>
77
139
enable_if_t<
78
- sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<has_method_atomicExchange<S,index_t,access_t>::return_type,access_t>,void
140
+ sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<typename has_method_atomicExchange<S,index_t,access_t>::return_type,access_t>,void
79
141
> atomicExchange (const index_t ix, const T value, NBL_REF_ARG (T) orig)
80
142
{
81
- orig = bit_cast<T,access_t>(accessor.atomicExchange (ix ,bit_cast<access_t,T>(value)));
143
+ orig = bit_cast<T,access_t>(accessor.atomicExchange (getIx (ix) ,bit_cast<access_t,T>(value)));
82
144
}
83
145
template<typename T, typename S=BaseAccessor>
84
146
enable_if_t<
85
- sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<has_method_atomicCompSwap<S,index_t,access_t,access_t>::return_type,access_t>,void
147
+ sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<typename has_method_atomicCompSwap<S,index_t,access_t,access_t>::return_type,access_t>,void
86
148
> atomicCompSwap (const index_t ix, const T value, const T comp, NBL_REF_ARG (T) orig)
87
149
{
88
- orig = bit_cast<T,access_t>(accessor.atomicCompSwap (ix ,bit_cast<access_t,T>(comp),bit_cast<access_t,T>(value)));
150
+ orig = bit_cast<T,access_t>(accessor.atomicCompSwap (getIx (ix) ,bit_cast<access_t,T>(comp),bit_cast<access_t,T>(value)));
89
151
}
90
152
91
153
template<typename T, typename S=BaseAccessor>
92
154
enable_if_t<
93
- sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<has_method_atomicAnd<S,index_t,access_t>::return_type,access_t>,void
155
+ sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<typename has_method_atomicAnd<S,index_t,access_t>::return_type,access_t>,void
94
156
> atomicAnd (const index_t ix, const T value, NBL_REF_ARG (T) orig)
95
157
{
96
- orig = bit_cast<T,access_t>(accessor.atomicAnd (ix ,bit_cast<access_t,T>(value)));
158
+ orig = bit_cast<T,access_t>(accessor.atomicAnd (getIx (ix) ,bit_cast<access_t,T>(value)));
97
159
}
98
160
template<typename T, typename S=BaseAccessor>
99
161
enable_if_t<
100
- sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<has_method_atomicOr<S,index_t,access_t>::return_type,access_t>,void
162
+ sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<typename has_method_atomicOr<S,index_t,access_t>::return_type,access_t>,void
101
163
> atomicOr (const index_t ix, const T value, NBL_REF_ARG (T) orig)
102
164
{
103
- orig = bit_cast<T,access_t>(accessor.atomicOr (ix ,bit_cast<access_t,T>(value)));
165
+ orig = bit_cast<T,access_t>(accessor.atomicOr (getIx (ix) ,bit_cast<access_t,T>(value)));
104
166
}
105
167
template<typename T, typename S=BaseAccessor>
106
168
enable_if_t<
107
- sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<has_method_atomicXor<S,index_t,access_t>::return_type,access_t>,void
169
+ sizeof (T)==sizeof (access_t) && is_same_v<S,BaseAccessor> && is_same_v<typename has_method_atomicXor<S,index_t,access_t>::return_type,access_t>,void
108
170
> atomicXor (const index_t ix, const T value, NBL_REF_ARG (T) orig)
109
171
{
110
- orig = bit_cast<T,access_t>(accessor.atomicXor (ix ,bit_cast<access_t,T>(value)));
172
+ orig = bit_cast<T,access_t>(accessor.atomicXor (getIx (ix) ,bit_cast<access_t,T>(value)));
111
173
}
112
174
113
175
// This has the upside of never calling a `(uint32_t)(uint32_t,uint32_t)` overload of `atomicAdd` because it checks the return type!
114
176
// If someone makes a `(float)(uint32_t,uint32_t)` they will break this detection code, but oh well.
115
177
template<typename T>
116
- enable_if_t<is_same_v<has_method_atomicAdd<BaseAccessor,index_t,T>::return_type,T>,void > atomicAdd (const index_t ix, const T value, NBL_REF_ARG (T) orig)
178
+ enable_if_t<is_same_v<typename has_method_atomicAdd<BaseAccessor,index_t,T>::return_type,T>,void > atomicAdd (const index_t ix, const T value, NBL_REF_ARG (T) orig)
117
179
{
118
- orig = accessor.atomicAdd (ix ,value);
180
+ orig = accessor.atomicAdd (getIx (ix) ,value);
119
181
}
120
182
template<typename T>
121
- enable_if_t<is_same_v<has_method_atomicMin<BaseAccessor,index_t,T>::return_type,T>,void > atomicMin (const index_t ix, const T value, NBL_REF_ARG (T) orig)
183
+ enable_if_t<is_same_v<typename has_method_atomicMin<BaseAccessor,index_t,T>::return_type,T>,void > atomicMin (const index_t ix, const T value, NBL_REF_ARG (T) orig)
122
184
{
123
- orig = accessor.atomicMin (ix ,value);
185
+ orig = accessor.atomicMin (getIx (ix) ,value);
124
186
}
125
187
template<typename T>
126
- enable_if_t<is_same_v<has_method_atomicMax<BaseAccessor,index_t,T>::return_type,T>,void > atomicMax (const index_t ix, const T value, NBL_REF_ARG (T) orig)
188
+ enable_if_t<is_same_v<typename has_method_atomicMax<BaseAccessor,index_t,T>::return_type,T>,void > atomicMax (const index_t ix, const T value, NBL_REF_ARG (T) orig)
189
+ {
190
+ orig = accessor.atomicMax (getIx (ix),value);
191
+ }
192
+
193
+ template<typename S=BaseAccessor>
194
+ enable_if_t<
195
+ is_same_v<S,BaseAccessor> && is_same_v<typename has_method_workgroupExecutionAndMemoryBarrier<S>::return_type,void >,void
196
+ > workgroupExecutionAndMemoryBarrier ()
127
197
{
128
- orig = accessor.atomicMax (ix,value );
198
+ accessor.workgroupExecutionAndMemoryBarrier ( );
129
199
}
200
+ };
201
+
202
+ // ---------------------------------------------- Offset Accessor ----------------------------------------------------
203
+
204
+ template<class BaseAccessor, typename IndexType=uint32_t, typename _Offset=void >
205
+ struct Offset : impl::OffsetBase<IndexType,_Offset>
206
+ {
207
+ using base_t = impl::OffsetBase<IndexType,_Offset>;
208
+
209
+ BaseAccessor accessor;
210
+
211
+ template <typename T>
212
+ void set (uint32_t idx, T value) {accessor.set (idx+base_t::offset,value); }
213
+
214
+ template <typename T>
215
+ void get (uint32_t idx, NBL_REF_ARG (T) value) {accessor.get (idx+base_t::offset,value);}
130
216
131
217
template<typename S=BaseAccessor>
132
218
enable_if_t<
133
- is_same_v<S,BaseAccessor> && is_same_v<has_method_workgroupExecutionAndMemoryBarrier<S>::return_type,void >,void
219
+ is_same_v<S,BaseAccessor> && is_same_v<typename has_method_workgroupExecutionAndMemoryBarrier<S>::return_type,void >,void
134
220
> workgroupExecutionAndMemoryBarrier ()
135
221
{
136
222
accessor.workgroupExecutionAndMemoryBarrier ();
@@ -139,5 +225,5 @@ struct MemoryAdaptor // TODO: rename to something nicer like StructureOfArrays a
139
225
140
226
}
141
227
}
142
-
228
+ }
143
229
#endif
0 commit comments