Skip to content

Commit de27e51

Browse files
Merge remote-tracking branch 'origin/member_test'
Also get rid of NBL_REPEAT
2 parents c9a8053 + d8ca1b5 commit de27e51

File tree

7 files changed

+86
-102
lines changed

7 files changed

+86
-102
lines changed

examples_tests

include/nbl/builtin/hlsl/device_capabilities_traits.hlsl

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,44 @@
77
#include <nbl/builtin/hlsl/member_test_macros.hlsl>
88

99
#ifdef __HLSL_VERSION
10+
11+
NBL_GENERATE_MEMBER_TESTER(shaderFloat64);
12+
NBL_GENERATE_MEMBER_TESTER(shaderDrawParameters);
13+
NBL_GENERATE_MEMBER_TESTER(subgroupArithmetic);
14+
NBL_GENERATE_MEMBER_TESTER(fragmentShaderPixelInterlock);
15+
NBL_GENERATE_MEMBER_TESTER(maxOptimallyResidentWorkgroupInvocations);
16+
17+
#define NBL_GENERATE_GET_OR_DEFAULT(field, ty, default) \
18+
template<typename S, bool = has_member_##field<S>::value> struct get_or_default_##field : integral_constant<ty,S::field> {}; \
19+
template<typename S> struct get_or_default_##field<S,false> : integral_constant<ty,default> {};
20+
1021
namespace nbl
1122
{
1223
namespace hlsl
1324
{
25+
26+
namespace impl
27+
{
28+
NBL_GENERATE_GET_OR_DEFAULT(shaderFloat64, bool, false);
29+
NBL_GENERATE_GET_OR_DEFAULT(shaderDrawParameters, bool, false);
30+
NBL_GENERATE_GET_OR_DEFAULT(subgroupArithmetic, bool, false);
31+
NBL_GENERATE_GET_OR_DEFAULT(fragmentShaderPixelInterlock, bool, false);
32+
NBL_GENERATE_GET_OR_DEFAULT(maxOptimallyResidentWorkgroupInvocations, uint16_t, 0);
33+
}
34+
35+
1436
template<typename device_capabilities>
1537
struct device_capabilities_traits
1638
{
17-
// TODO: check for members and default them to sane things, only do the 5 members in CJITIncludeLoader.cpp struct, we'll do the rest on `vulkan_1_3` branch with Nahim
39+
NBL_CONSTEXPR_STATIC_INLINE bool shaderFloat64 = impl::get_or_default_shaderFloat64<device_capabilities>::value;
40+
NBL_CONSTEXPR_STATIC_INLINE bool shaderDrawParameters = impl::get_or_default_shaderDrawParameters<device_capabilities>::value;
41+
NBL_CONSTEXPR_STATIC_INLINE bool subgroupArithmetic = impl::get_or_default_subgroupArithmetic<device_capabilities>::value;
42+
NBL_CONSTEXPR_STATIC_INLINE bool fragmentShaderPixelInterlock = impl::get_or_default_fragmentShaderPixelInterlock<device_capabilities>::value;
43+
NBL_CONSTEXPR_STATIC_INLINE uint16_t maxOptimallyResidentWorkgroupInvocations = impl::get_or_default_maxOptimallyResidentWorkgroupInvocations<device_capabilities>::value;
1844
};
45+
46+
#undef NBL_GENERATE_GET_OR_DEFAULT
47+
1948
}
2049
}
2150
#endif

include/nbl/builtin/hlsl/member_test_macros.hlsl

Lines changed: 28 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#define _NBL_BUILTIN_HLSL_MEMBER_TEST_MACROS_INCLUDED_
66

77
#include <nbl/builtin/hlsl/type_traits.hlsl>
8+
#include <boost/preprocessor.hpp>
89

910
#ifdef __HLSL_VERSION
1011

@@ -16,19 +17,11 @@ namespace hlsl
1617
namespace impl
1718
{
1819

19-
template<class T, bool C>
20-
struct is_const_helper : bool_constant<C>
21-
{
22-
using type = T;
23-
NBL_CONSTEXPR_STATIC_INLINE bool is_constant = is_const<T>::value;
24-
};
25-
2620
enum e_member_presence
2721
{
28-
absent = 0,
29-
non_static = 1,
30-
as_static = 2,
31-
static_constexpr = 3,
22+
is_present = 1<<0,
23+
is_static = 1<<1,
24+
is_const = 1<<2,
3225
};
3326

3427
template<class T>
@@ -54,16 +47,17 @@ namespace hlsl \
5447
{ \
5548
namespace impl { \
5649
template<class T, class=void> \
57-
struct is_static_member_##a: false_type {NBL_CONSTEXPR_STATIC_INLINE bool is_constant = false; }; \
50+
struct is_static_member_##a: false_type { }; \
5851
template<class T> \
59-
struct is_static_member_##a<T,typename enable_if<!is_same<decltype(T::a),void>::value,void>::type>: is_const_helper<decltype(T::a), true> {}; \
52+
struct is_static_member_##a<T,typename enable_if<!is_same<decltype(T::a),void>::value,void>::type> : true_type { }; \
6053
template<class T, class=void> \
61-
struct is_member_##a: false_type {NBL_CONSTEXPR_STATIC_INLINE bool is_constant = false;}; \
54+
struct is_member_##a: false_type { using type = void; }; \
6255
template<class T> \
63-
struct is_member_##a<T,typename enable_if<!is_same<decltype(declval<T>().a),void>::value,void>::type> : is_const_helper<decltype(declval<T>().a), true>{}; \
56+
struct is_member_##a<T,typename enable_if<!is_same<decltype(declval<T>().a),void>::value,void>::type> : true_type { using type = decltype(declval<T>().a); }; \
6457
} \
6558
template<class T> \
66-
struct has_member_##a { NBL_CONSTEXPR_STATIC_INLINE e_member_presence value = (e_member_presence)(impl::is_member_##a<T>::value + impl::is_static_member_##a<T>::value + impl::is_static_member_##a<T>::is_constant); }; \
59+
struct has_member_##a { NBL_CONSTEXPR_STATIC_INLINE e_member_presence value = (e_member_presence)(impl::is_member_##a<T>::value + 2*impl::is_static_member_##a<T>::value + 4*is_const<typename impl::is_member_##a<T>::type>::value); }; \
60+
template<class T, class F> struct has_member_##a##_with_type : bool_constant<has_member_##a<T>::value && is_same<typename impl::is_member_##a<T>::type, F>::value> {}; \
6761
} \
6862
}
6963

@@ -74,60 +68,30 @@ NBL_GENERATE_MEMBER_TESTER(z)
7468
NBL_GENERATE_MEMBER_TESTER(w)
7569

7670

77-
// Even though it should work for some reason tests fail
78-
// proof it works : https://godbolt.org/z/EzPWGnTPb
7971

80-
#define CAT(x, y) x##y
81-
#define TYPE_DECLARE(n) typename Arg##n
82-
#define TYPE_DECLARE_DEFAULT(n) TYPE_DECLARE(n)=void
83-
#define TYPE_FWD(n) Arg##n
84-
#define DECLVAL_DECLARE(n) impl::declval<Arg##n>()
85-
86-
#define FOR_EACH0(fn)
87-
#define FOR_EACH1(fn) fn(1)
88-
#define FOR_EACH2(fn) fn(2), FOR_EACH1(fn)
89-
#define FOR_EACH3(fn) fn(3), FOR_EACH2(fn)
90-
#define FOR_EACH4(fn) fn(4), FOR_EACH3(fn)
91-
#define FOR_EACH(fn, n) CAT(FOR_EACH, n)(fn)
92-
93-
#define GENERATE_STATIC_METHOD_TESTER_SPEC0(x) \
94-
template<class T> \
95-
struct has_static_method_##x<T, typename make_void<decltype(T::x())>::type> : true_type \
96-
{ \
97-
using return_type = decltype(T::x()); \
98-
NBL_CONSTEXPR_STATIC_INLINE uint arg_count = 0; \
99-
};
72+
#define NBL_TYPE_DECLARE(z, n, x) BOOST_PP_COMMA_IF(x) typename Arg##n
73+
#define NBL_TYPE_DECLARE_DEFAULT(z, n, x) BOOST_PP_COMMA_IF(x) typename Arg##n=void
74+
#define NBL_TYPE_FWD(z, n, x) BOOST_PP_COMMA_IF(x) Arg##n
75+
#define NBL_DECLVAL_DECLARE(z, n, x) impl::declval<Arg##n>() BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), x))
10076

101-
#define GENERATE_STATIC_METHOD_TESTER_SPEC(x, n) \
102-
template<class T, FOR_EACH(TYPE_DECLARE, n)> \
103-
struct has_static_method_##x<T, FOR_EACH(TYPE_FWD, n), typename make_void<decltype(T::x(FOR_EACH(DECLVAL_DECLARE, n)))>::type> : true_type \
77+
#define GENERATE_STATIC_METHOD_TESTER_SPEC(z, n, x) \
78+
template<class T BOOST_PP_REPEAT(n, NBL_TYPE_DECLARE, n)> \
79+
struct has_static_method_##x<T BOOST_PP_REPEAT(n, NBL_TYPE_FWD, n), typename make_void<decltype(T::x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n)))>::type> : true_type \
10480
{ \
105-
using return_type = decltype(T::x(FOR_EACH(DECLVAL_DECLARE, n))); \
81+
using return_type = decltype(T::x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n))); \
10682
NBL_CONSTEXPR_STATIC_INLINE uint arg_count = n; \
10783
};
10884

109-
#define GENERATE_STATIC_METHOD_TESTER(x) \
110-
template<typename T, FOR_EACH(TYPE_DECLARE_DEFAULT, 4), class=void> \
85+
#define GENERATE_STATIC_METHOD_TESTER(x, n) \
86+
template<typename T BOOST_PP_REPEAT(n, NBL_TYPE_DECLARE_DEFAULT, n), class=void> \
11187
struct has_static_method_##x : false_type {}; \
112-
GENERATE_STATIC_METHOD_TESTER_SPEC0(x) \
113-
GENERATE_STATIC_METHOD_TESTER_SPEC(x, 1) \
114-
GENERATE_STATIC_METHOD_TESTER_SPEC(x, 2) \
115-
GENERATE_STATIC_METHOD_TESTER_SPEC(x, 3) \
116-
GENERATE_STATIC_METHOD_TESTER_SPEC(x, 4)
117-
118-
#define GENERATE_METHOD_TESTER_SPEC0(x) \
119-
template<class T> \
120-
struct has_method_##x<T, typename make_void<decltype(impl::declval<T>().x())>::type> : impl::if_2_else_1<impl::has_static_method_##x<T>::value> \
121-
{ \
122-
using return_type = decltype(impl::declval<T>().x()); \
123-
NBL_CONSTEXPR_STATIC_INLINE uint arg_count = 0; \
124-
};
88+
BOOST_PP_REPEAT(n, GENERATE_STATIC_METHOD_TESTER_SPEC, x)
12589

126-
#define GENERATE_METHOD_TESTER_SPEC(x, n) \
127-
template<class T, FOR_EACH(TYPE_DECLARE, n)> \
128-
struct has_method_##x<T, FOR_EACH(TYPE_FWD, n), typename make_void<decltype(impl::declval<T>().x(FOR_EACH(DECLVAL_DECLARE, n)))>::type> : impl::if_2_else_1<impl::has_static_method_##x<T,FOR_EACH(TYPE_FWD, n)>::value> \
90+
#define GENERATE_METHOD_TESTER_SPEC(z, n, x) \
91+
template<class T BOOST_PP_REPEAT(n, NBL_TYPE_DECLARE, n)> \
92+
struct has_method_##x<T BOOST_PP_REPEAT(n, NBL_TYPE_FWD, n), typename make_void<decltype(impl::declval<T>().x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n)))>::type> : impl::if_2_else_1<impl::has_static_method_##x<T BOOST_PP_REPEAT(n, NBL_TYPE_FWD, n)>::value> \
12993
{ \
130-
using return_type = decltype(impl::declval<T>().x(FOR_EACH(DECLVAL_DECLARE, n))); \
94+
using return_type = decltype(impl::declval<T>().x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n))); \
13195
NBL_CONSTEXPR_STATIC_INLINE uint arg_count = n; \
13296
};
13397

@@ -147,14 +111,10 @@ struct has_method_##x<T, FOR_EACH(TYPE_FWD, n), typename make_void<decltype(impl
147111
#define GENERATE_METHOD_TESTER(x) \
148112
namespace nbl { \
149113
namespace hlsl { \
150-
namespace impl { GENERATE_STATIC_METHOD_TESTER(x) } \
151-
template<typename T, FOR_EACH(TYPE_DECLARE_DEFAULT, 4), class=void> \
114+
namespace impl { GENERATE_STATIC_METHOD_TESTER(x, 4) } \
115+
template<typename T BOOST_PP_REPEAT(4, NBL_TYPE_DECLARE_DEFAULT, 4), class=void> \
152116
struct has_method_##x : false_type {}; \
153-
GENERATE_METHOD_TESTER_SPEC0(x) \
154-
GENERATE_METHOD_TESTER_SPEC(x, 1) \
155-
GENERATE_METHOD_TESTER_SPEC(x, 2) \
156-
GENERATE_METHOD_TESTER_SPEC(x, 3) \
157-
GENERATE_METHOD_TESTER_SPEC(x, 4) \
117+
BOOST_PP_REPEAT(4, GENERATE_METHOD_TESTER_SPEC, x) \
158118
}}
159119

160120

include/nbl/builtin/hlsl/spirv_intrinsics/subgroup_shuffle.hlsl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ template<typename T>
1818
[[vk::ext_instruction( spv::OpGroupNonUniformShuffle )]]
1919
T groupShuffle(uint32_t executionScope, T value, uint32_t invocationId);
2020

21-
#ifdef NBL_GL_KHR_shader_subgroup_shuffle_relative
2221
template<typename T>
2322
[[vk::ext_capability( spv::CapabilityGroupNonUniformShuffleRelative )]]
2423
[[vk::ext_instruction( spv::OpGroupNonUniformShuffleUp )]]
@@ -28,7 +27,7 @@ template<typename T>
2827
[[vk::ext_capability( spv::CapabilityGroupNonUniformShuffleRelative )]]
2928
[[vk::ext_instruction( spv::OpGroupNonUniformShuffleDown )]]
3029
T groupShuffleDown(uint32_t executionScope, T value, uint32_t delta);
31-
#endif
30+
3231
}
3332
}
3433
}

include/nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
#define _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_INCLUDED_
66

77

8-
#include "nbl/builtin/hlsl/subgroup/basic.hlsl"
8+
#include "nbl/builtin/hlsl/device_capabilities_traits.hlsl"
99

10+
#include "nbl/builtin/hlsl/subgroup/basic.hlsl"
1011
#include "nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl"
11-
#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl"
1212

1313

1414
namespace nbl
@@ -18,16 +18,12 @@ namespace hlsl
1818
namespace subgroup
1919
{
2020

21-
// TODO: change to alias using template when DXC finally fixes SPIR-V codegen impartity
22-
//template<class Binop, class device_capability_traits/*=TODO: Atil give us the traits so we can default with `void`!*/>
23-
//struct reduction : impl::reduction<Binop,device_capability_traits::subgroupArithmetic> {};
24-
25-
template<class Binop>
26-
struct reduction : impl::reduction<Binop,nbl::hlsl::jit::device_capabilities::subgroupArithmetic> {};
27-
template<class Binop>
28-
struct inclusive_scan : impl::inclusive_scan<Binop,nbl::hlsl::jit::device_capabilities::subgroupArithmetic> {};
29-
template<class Binop>
30-
struct exclusive_scan : impl::exclusive_scan<Binop,nbl::hlsl::jit::device_capabilities::subgroupArithmetic> {};
21+
template<class Binop, class device_capabilities=void>
22+
struct reduction : impl::reduction<Binop,device_capabilities_traits<device_capabilities>::subgroupArithmetic> {};
23+
template<class Binop, class device_capabilities=void>
24+
struct inclusive_scan : impl::inclusive_scan<Binop,device_capabilities_traits<device_capabilities>::subgroupArithmetic> {};
25+
template<class Binop, class device_capabilities=void>
26+
struct exclusive_scan : impl::exclusive_scan<Binop,device_capabilities_traits<device_capabilities>::subgroupArithmetic> {};
3127

3228
}
3329
}

include/nbl/builtin/hlsl/workgroup/arithmetic.hlsl

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,43 +22,43 @@ namespace workgroup
2222
//#define NBL_ALIAS_CALL_OPERATOR_TO_STATIC_IMPL(OPTIONAL_TEMPLATE,RETURN_TYPE,/*tuples of argument types and names*/...)
2323
//#define NBL_ALIAS_TEMPLATED_CALL_OPERATOR_TO_IMPL(TEMPLATE,RETURN_TYPE,/*tuples of argument types and names*/...)
2424

25-
template<class BinOp, uint16_t ItemCount>
25+
template<class BinOp, uint16_t ItemCount, class device_capabilities=void>
2626
struct reduction
2727
{
2828
using type_t = typename BinOp::type_t;
2929

3030
template<class Accessor>
3131
static type_t __call(NBL_CONST_REF_ARG(type_t) value, NBL_REF_ARG(Accessor) accessor)
3232
{
33-
impl::reduce<BinOp,ItemCount> fn;
33+
impl::reduce<BinOp,ItemCount,device_capabilities> fn;
3434
fn.template __call<Accessor>(value,accessor);
3535
accessor.workgroupExecutionAndMemoryBarrier();
3636
return Broadcast<type_t,Accessor>(fn.lastLevelScan,accessor,fn.lastInvocationInLevel);
3737
}
3838
};
3939

40-
template<class BinOp, uint16_t ItemCount>
40+
template<class BinOp, uint16_t ItemCount, class device_capabilities=void>
4141
struct inclusive_scan
4242
{
4343
using type_t = typename BinOp::type_t;
4444

4545
template<class Accessor>
4646
static type_t __call(NBL_CONST_REF_ARG(type_t) value, NBL_REF_ARG(Accessor) accessor)
4747
{
48-
impl::scan<BinOp,false,ItemCount> fn;
48+
impl::scan<BinOp,false,ItemCount,device_capabilities> fn;
4949
return fn.template __call<Accessor>(value,accessor);
5050
}
5151
};
5252

53-
template<class BinOp, uint16_t ItemCount>
53+
template<class BinOp, uint16_t ItemCount, class device_capabilities=void>
5454
struct exclusive_scan
5555
{
5656
using type_t = typename BinOp::type_t;
5757

5858
template<class Accessor>
5959
static type_t __call(NBL_CONST_REF_ARG(type_t) value, NBL_REF_ARG(Accessor) accessor)
6060
{
61-
impl::scan<BinOp,true,ItemCount> fn;
61+
impl::scan<BinOp,true,ItemCount,device_capabilities> fn;
6262
return fn.template __call<Accessor>(value,accessor);
6363
}
6464
};
@@ -94,15 +94,15 @@ uint16_t ballotCountedBitDWORD(NBL_REF_ARG(BallotAccessor) ballotAccessor)
9494
return 0;
9595
}
9696

97-
template<bool Exclusive, uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor>
97+
template<bool Exclusive, uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor, class device_capabilities>
9898
uint16_t ballotScanBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(ArithmeticAccessor) arithmeticAccessor)
9999
{
100100
const uint16_t subgroupIndex = SubgroupContiguousIndex();
101101
const uint16_t bitfieldIndex = impl::getDWORD(subgroupIndex);
102102
const uint32_t localBitfield = ballotAccessor.get(bitfieldIndex);
103103

104104
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
105-
uint32_t count = exclusive_scan<plus<uint32_t>,DWORDCount>::template __call<ArithmeticAccessor>(
105+
uint32_t count = exclusive_scan<plus<uint32_t>,DWORDCount,device_capabilities>::template __call<ArithmeticAccessor>(
106106
ballotCountedBitDWORD<ItemCount,BallotAccessor>(ballotAccessor),
107107
arithmeticAccessor
108108
);
@@ -115,26 +115,26 @@ uint16_t ballotScanBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_
115115
}
116116
}
117117

118-
template<uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor>
118+
template<uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor, class device_capabilities=void>
119119
uint16_t ballotBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(ArithmeticAccessor) arithmeticAccessor)
120120
{
121121
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
122-
return uint16_t(reduction<plus<uint32_t>,DWORDCount>::template __call<ArithmeticAccessor>(
122+
return uint16_t(reduction<plus<uint32_t>,DWORDCount,device_capabilities>::template __call<ArithmeticAccessor>(
123123
impl::ballotCountedBitDWORD<ItemCount,BallotAccessor>(ballotAccessor),
124124
arithmeticAccessor
125125
));
126126
}
127127

128-
template<uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor>
128+
template<uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor, class device_capabilities=void>
129129
uint16_t ballotInclusiveBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(ArithmeticAccessor) arithmeticAccessor)
130130
{
131-
return impl::ballotScanBitCount<false,ItemCount,BallotAccessor,ArithmeticAccessor>(ballotAccessor,arithmeticAccessor);
131+
return impl::ballotScanBitCount<false,ItemCount,BallotAccessor,ArithmeticAccessor,device_capabilities>(ballotAccessor,arithmeticAccessor);
132132
}
133133

134-
template<uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor>
134+
template<uint16_t ItemCount, class BallotAccessor, class ArithmeticAccessor, class device_capabilities=void>
135135
uint16_t ballotExclusiveBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(ArithmeticAccessor) arithmeticAccessor)
136136
{
137-
return impl::ballotScanBitCount<true,ItemCount,BallotAccessor,ArithmeticAccessor>(ballotAccessor,arithmeticAccessor);
137+
return impl::ballotScanBitCount<true,ItemCount,BallotAccessor,ArithmeticAccessor,device_capabilities>(ballotAccessor,arithmeticAccessor);
138138
}
139139

140140
}

include/nbl/builtin/hlsl/workgroup/shared_scan.hlsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace workgroup
1818

1919
namespace impl
2020
{
21-
template<class BinOp, uint16_t ItemCount>
21+
template<class BinOp, uint16_t ItemCount, class device_capabilities>
2222
struct reduce
2323
{
2424
using type_t = typename BinOp::type_t;
@@ -29,7 +29,7 @@ struct reduce
2929
const uint16_t lastInvocation = ItemCount-1;
3030
const uint16_t subgroupMask = uint16_t(glsl::gl_SubgroupSize()-1u);
3131

32-
subgroup::inclusive_scan<BinOp> subgroupOp;
32+
subgroup::inclusive_scan<BinOp,device_capabilities> subgroupOp;
3333

3434
lastInvocationInLevel = lastInvocation;
3535
scanLoadIndex = SubgroupContiguousIndex();
@@ -78,10 +78,10 @@ struct reduce
7878
bool participate;
7979
};
8080

81-
template<class BinOp, bool Exclusive, uint16_t ItemCount>
81+
template<class BinOp, bool Exclusive, uint16_t ItemCount, class device_capabilities>
8282
struct scan// : reduce<BinOp,ItemCount> https://github.com/microsoft/DirectXShaderCompiler/issues/5966
8383
{
84-
using base_t = reduce<BinOp,ItemCount>;
84+
using base_t = reduce<BinOp,ItemCount,device_capabilities>;
8585
base_t __base;
8686
using type_t = typename base_t::type_t;
8787

0 commit comments

Comments
 (0)