@@ -13,6 +13,7 @@ NBL_GENERATE_MEMBER_TESTER(shaderDrawParameters);
13
13
NBL_GENERATE_MEMBER_TESTER (subgroupArithmetic);
14
14
NBL_GENERATE_MEMBER_TESTER (fragmentShaderPixelInterlock);
15
15
NBL_GENERATE_MEMBER_TESTER (maxOptimallyResidentWorkgroupInvocations);
16
+ NBL_GENERATE_MEMBER_TESTER (maxResidentInvocations);
16
17
17
18
#define NBL_GENERATE_GET_OR_DEFAULT (field, ty, default ) \
18
19
template<typename S, bool = has_member_##field<S>::value> struct get_or_default_##field : integral_constant<ty,S::field> {}; \
@@ -30,17 +31,29 @@ NBL_GENERATE_GET_OR_DEFAULT(shaderDrawParameters, bool, false);
30
31
NBL_GENERATE_GET_OR_DEFAULT (subgroupArithmetic, bool , false );
31
32
NBL_GENERATE_GET_OR_DEFAULT (fragmentShaderPixelInterlock, bool , false );
32
33
NBL_GENERATE_GET_OR_DEFAULT (maxOptimallyResidentWorkgroupInvocations, uint16_t, 0 );
34
+ NBL_GENERATE_GET_OR_DEFAULT (maxResidentInvocations, uint32_t, 0 );
33
35
}
34
36
35
37
36
38
template<typename device_capabilities>
37
39
struct device_capabilities_traits
38
40
{
41
+ uint32_t computeOptimalPersistentWorkgroupDispatchSize (const uint32_t elementCount, const uint32_t workgroupSize, const uint32_t workgroupSpinningProtection)
42
+ {
43
+ const uint32_t infinitelyWideDeviceWGCount = (elementCount - 1u) / (workgroupSize * workgroupSpinningProtection) + 1u; // need thtat divAndRoundUp
44
+ return min (infinitelyWideDeviceWGCount,maxResidentInvocations/maxOptimallyResidentWorkgroupInvocations);
45
+ }
46
+ uint32_t computeOptimalPersistentWorkgroupDispatchSize (const uint32_t elementCount, const uint32_t workgroupSize)
47
+ {
48
+ return computeOptimalPersistentWorkgroupDispatchSize (elementCount,workgroupSize,1u);
49
+ }
50
+
39
51
NBL_CONSTEXPR_STATIC_INLINE bool shaderFloat64 = impl::get_or_default_shaderFloat64<device_capabilities>::value;
40
52
NBL_CONSTEXPR_STATIC_INLINE bool shaderDrawParameters = impl::get_or_default_shaderDrawParameters<device_capabilities>::value;
41
53
NBL_CONSTEXPR_STATIC_INLINE bool subgroupArithmetic = impl::get_or_default_subgroupArithmetic<device_capabilities>::value;
42
54
NBL_CONSTEXPR_STATIC_INLINE bool fragmentShaderPixelInterlock = impl::get_or_default_fragmentShaderPixelInterlock<device_capabilities>::value;
43
55
NBL_CONSTEXPR_STATIC_INLINE uint16_t maxOptimallyResidentWorkgroupInvocations = impl::get_or_default_maxOptimallyResidentWorkgroupInvocations<device_capabilities>::value;
56
+ NBL_CONSTEXPR_STATIC_INLINE uint32_t maxResidentInvocations = impl::get_or_default_maxResidentInvocations<device_capabilities>::value;
44
57
};
45
58
46
59
#undef NBL_GENERATE_GET_OR_DEFAULT
0 commit comments