Devsh-Graphics-Programming
diff --git a/‎include/nbl/builtin/glsl/utils/indirect_commands.glsl
Lines changed: 0 additions & 44 deletions b/‎include/nbl/builtin/glsl/utils/indirect_commands.glsl
Lines changed: 0 additions & 44 deletions
diff --git a/‎include/nbl/builtin/glsl/utils/surface_transform.glsl
Lines changed: 0 additions & 149 deletions b/‎include/nbl/builtin/glsl/utils/surface_transform.glsl
Lines changed: 0 additions & 149 deletions
diff --git a/‎include/nbl/builtin/glsl/utils/surface_transform_e.h
Lines changed: 0 additions & 13 deletions b/‎include/nbl/builtin/glsl/utils/surface_transform_e.h
Lines changed: 0 additions & 13 deletions
diff --git a/‎include/nbl/builtin/glsl/utils/surface_transform_transformedDerivatives.glsl
Lines changed: 0 additions & 21 deletions b/‎include/nbl/builtin/glsl/utils/surface_transform_transformedDerivatives.glsl
Lines changed: 0 additions & 21 deletions
diff --git a/‎include/nbl/builtin/glsl/utils/transform.glsl
Lines changed: 0 additions & 62 deletions b/‎include/nbl/builtin/glsl/utils/transform.glsl
Lines changed: 0 additions & 62 deletions
diff --git a/‎include/nbl/builtin/hlsl/device_capabilities_traits.hlsl
Lines changed: 13 additions & 0 deletions b/‎include/nbl/builtin/hlsl/device_capabilities_traits.hlsl
Lines changed: 13 additions & 0 deletions
@@ -34,66 +34,4 @@ mat3 nbl_glsl_mul_with_bounds(out mat3 error, in mat3 a, in mat3 b, in float b_r
     return retval;
 }
 
-
-vec4 nbl_glsl_pseudoMul4x4with3x1(in mat4 m, in vec3 v)
-{
-    return m[0] * v.x + m[1] * v.y + m[2] * v.z + m[3];
-}
-vec3 nbl_glsl_pseudoMul3x4with3x1(in mat4x3 m, in vec3 v)
-{
-    return m[0] * v.x + m[1] * v.y + m[2] * v.z + m[3];
-}
-mat4x3 nbl_glsl_pseudoMul4x3with4x3(in mat4x3 lhs, in mat4x3 rhs) // TODO: change name to 3x4with3x4
-{
-    mat4x3 result;
-    for (int i = 0; i < 4; i++)
-        result[i] = lhs[0] * rhs[i][0] + lhs[1] * rhs[i][1] + lhs[2] * rhs[i][2];
-    result[3] += lhs[3];
-    return result;
-}
-mat4 nbl_glsl_pseudoMul4x4with4x3(in mat4 proj, in mat4x3 tform)
-{
-    mat4 result;
-    for (int i = 0; i < 4; i++)
-        result[i] = proj[0] * tform[i][0] + proj[1] * tform[i][1] + proj[2] * tform[i][2];
-    result[3] += proj[3];
-    return result;
-}
-
-// useful for fast computation of a Normal Matrix (you just need to remember to normalize the transformed normal because of the missing divide by the determinant)
-mat3 nbl_glsl_sub3x3TransposeCofactors(in mat3 sub3x3)
-{
-    return mat3(
-        cross(sub3x3[1],sub3x3[2]),
-        cross(sub3x3[2],sub3x3[0]),
-        cross(sub3x3[0],sub3x3[1])
-    );
-}
-// returns a signflip mask
-uint nbl_glsl_sub3x3TransposeCofactors(in mat3 sub3x3, out mat3 sub3x3TransposeCofactors)
-{
-    sub3x3TransposeCofactors = nbl_glsl_sub3x3TransposeCofactors(sub3x3);
-    return floatBitsToUint(dot(sub3x3[0],sub3x3TransposeCofactors[0]))&0x80000000u;
-}
-
-// use this if you anticipate flipped/mirrored models
-vec3 nbl_glsl_fastNormalTransform(in uint signFlipMask, in mat3 sub3x3TransposeCofactors, in vec3 normal)
-{
-    vec3 tmp = sub3x3TransposeCofactors*normal;
-    const float tmpLenRcp = inversesqrt(dot(tmp,tmp));
-    return tmp*uintBitsToFloat(floatBitsToUint(tmpLenRcp)^signFlipMask);
-}
-
-//
-mat4x3 nbl_glsl_pseudoInverse3x4(in mat4x3 tform)
-{
-    const mat3 sub3x3Inv = inverse(mat3(tform));
-    mat4x3 retval;
-    retval[0] = sub3x3Inv[0];
-    retval[1] = sub3x3Inv[1];
-    retval[2] = sub3x3Inv[2];
-    retval[3] = -sub3x3Inv*tform[3];
-    return retval;
-}
-
 #endif
@@ -13,6 +13,7 @@ NBL_GENERATE_MEMBER_TESTER(shaderDrawParameters);
 NBL_GENERATE_MEMBER_TESTER(subgroupArithmetic);
 NBL_GENERATE_MEMBER_TESTER(fragmentShaderPixelInterlock);
 NBL_GENERATE_MEMBER_TESTER(maxOptimallyResidentWorkgroupInvocations);
+NBL_GENERATE_MEMBER_TESTER(maxResidentInvocations);
 
 #define NBL_GENERATE_GET_OR_DEFAULT(field, ty, default) \
 template<typename S, bool = has_member_##field<S>::value> struct get_or_default_##field : integral_constant<ty,S::field> {}; \
@@ -30,17 +31,29 @@ NBL_GENERATE_GET_OR_DEFAULT(shaderDrawParameters, bool, false);
 NBL_GENERATE_GET_OR_DEFAULT(subgroupArithmetic, bool, false);
 NBL_GENERATE_GET_OR_DEFAULT(fragmentShaderPixelInterlock, bool, false);
 NBL_GENERATE_GET_OR_DEFAULT(maxOptimallyResidentWorkgroupInvocations, uint16_t, 0);
+NBL_GENERATE_GET_OR_DEFAULT(maxResidentInvocations, uint32_t, 0);
 }
 
 
 template<typename device_capabilities>
 struct device_capabilities_traits
 {
+    uint32_t computeOptimalPersistentWorkgroupDispatchSize(const uint32_t elementCount, const uint32_t workgroupSize, const uint32_t workgroupSpinningProtection)
+    {
+	    const uint32_t infinitelyWideDeviceWGCount = (elementCount - 1u) / (workgroupSize * workgroupSpinningProtection) + 1u; // need thtat divAndRoundUp
+	    return min(infinitelyWideDeviceWGCount,maxResidentInvocations/maxOptimallyResidentWorkgroupInvocations);
+    }
+    uint32_t computeOptimalPersistentWorkgroupDispatchSize(const uint32_t elementCount, const uint32_t workgroupSize)
+    {
+	    return computeOptimalPersistentWorkgroupDispatchSize(elementCount,workgroupSize,1u);
+    }
+
     NBL_CONSTEXPR_STATIC_INLINE bool shaderFloat64                                = impl::get_or_default_shaderFloat64<device_capabilities>::value;
     NBL_CONSTEXPR_STATIC_INLINE bool shaderDrawParameters                         = impl::get_or_default_shaderDrawParameters<device_capabilities>::value;
     NBL_CONSTEXPR_STATIC_INLINE bool subgroupArithmetic                           = impl::get_or_default_subgroupArithmetic<device_capabilities>::value;
     NBL_CONSTEXPR_STATIC_INLINE bool fragmentShaderPixelInterlock                 = impl::get_or_default_fragmentShaderPixelInterlock<device_capabilities>::value;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t maxOptimallyResidentWorkgroupInvocations = impl::get_or_default_maxOptimallyResidentWorkgroupInvocations<device_capabilities>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t maxResidentInvocations                   = impl::get_or_default_maxResidentInvocations<device_capabilities>::value;
 };
 
 #undef NBL_GENERATE_GET_OR_DEFAULT