Skip to content

Commit 9ac426c

Browse files
port Swapchain GLSL to HLSL
1 parent 3d3b6d9 commit 9ac426c

File tree

10 files changed

+281
-296
lines changed

10 files changed

+281
-296
lines changed

include/nbl/builtin/glsl/utils/indirect_commands.glsl

Lines changed: 0 additions & 44 deletions
This file was deleted.

include/nbl/builtin/glsl/utils/surface_transform.glsl

Lines changed: 0 additions & 149 deletions
This file was deleted.

include/nbl/builtin/glsl/utils/surface_transform_e.h

Lines changed: 0 additions & 13 deletions
This file was deleted.

include/nbl/builtin/glsl/utils/surface_transform_transformedDerivatives.glsl

Lines changed: 0 additions & 21 deletions
This file was deleted.

include/nbl/builtin/glsl/utils/transform.glsl

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -34,66 +34,4 @@ mat3 nbl_glsl_mul_with_bounds(out mat3 error, in mat3 a, in mat3 b, in float b_r
3434
return retval;
3535
}
3636

37-
38-
vec4 nbl_glsl_pseudoMul4x4with3x1(in mat4 m, in vec3 v)
39-
{
40-
return m[0] * v.x + m[1] * v.y + m[2] * v.z + m[3];
41-
}
42-
vec3 nbl_glsl_pseudoMul3x4with3x1(in mat4x3 m, in vec3 v)
43-
{
44-
return m[0] * v.x + m[1] * v.y + m[2] * v.z + m[3];
45-
}
46-
mat4x3 nbl_glsl_pseudoMul4x3with4x3(in mat4x3 lhs, in mat4x3 rhs) // TODO: change name to 3x4with3x4
47-
{
48-
mat4x3 result;
49-
for (int i = 0; i < 4; i++)
50-
result[i] = lhs[0] * rhs[i][0] + lhs[1] * rhs[i][1] + lhs[2] * rhs[i][2];
51-
result[3] += lhs[3];
52-
return result;
53-
}
54-
mat4 nbl_glsl_pseudoMul4x4with4x3(in mat4 proj, in mat4x3 tform)
55-
{
56-
mat4 result;
57-
for (int i = 0; i < 4; i++)
58-
result[i] = proj[0] * tform[i][0] + proj[1] * tform[i][1] + proj[2] * tform[i][2];
59-
result[3] += proj[3];
60-
return result;
61-
}
62-
63-
// useful for fast computation of a Normal Matrix (you just need to remember to normalize the transformed normal because of the missing divide by the determinant)
64-
mat3 nbl_glsl_sub3x3TransposeCofactors(in mat3 sub3x3)
65-
{
66-
return mat3(
67-
cross(sub3x3[1],sub3x3[2]),
68-
cross(sub3x3[2],sub3x3[0]),
69-
cross(sub3x3[0],sub3x3[1])
70-
);
71-
}
72-
// returns a signflip mask
73-
uint nbl_glsl_sub3x3TransposeCofactors(in mat3 sub3x3, out mat3 sub3x3TransposeCofactors)
74-
{
75-
sub3x3TransposeCofactors = nbl_glsl_sub3x3TransposeCofactors(sub3x3);
76-
return floatBitsToUint(dot(sub3x3[0],sub3x3TransposeCofactors[0]))&0x80000000u;
77-
}
78-
79-
// use this if you anticipate flipped/mirrored models
80-
vec3 nbl_glsl_fastNormalTransform(in uint signFlipMask, in mat3 sub3x3TransposeCofactors, in vec3 normal)
81-
{
82-
vec3 tmp = sub3x3TransposeCofactors*normal;
83-
const float tmpLenRcp = inversesqrt(dot(tmp,tmp));
84-
return tmp*uintBitsToFloat(floatBitsToUint(tmpLenRcp)^signFlipMask);
85-
}
86-
87-
//
88-
mat4x3 nbl_glsl_pseudoInverse3x4(in mat4x3 tform)
89-
{
90-
const mat3 sub3x3Inv = inverse(mat3(tform));
91-
mat4x3 retval;
92-
retval[0] = sub3x3Inv[0];
93-
retval[1] = sub3x3Inv[1];
94-
retval[2] = sub3x3Inv[2];
95-
retval[3] = -sub3x3Inv*tform[3];
96-
return retval;
97-
}
98-
9937
#endif

include/nbl/builtin/hlsl/device_capabilities_traits.hlsl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ NBL_GENERATE_MEMBER_TESTER(shaderDrawParameters);
1313
NBL_GENERATE_MEMBER_TESTER(subgroupArithmetic);
1414
NBL_GENERATE_MEMBER_TESTER(fragmentShaderPixelInterlock);
1515
NBL_GENERATE_MEMBER_TESTER(maxOptimallyResidentWorkgroupInvocations);
16+
NBL_GENERATE_MEMBER_TESTER(maxResidentInvocations);
1617

1718
#define NBL_GENERATE_GET_OR_DEFAULT(field, ty, default) \
1819
template<typename S, bool = has_member_##field<S>::value> struct get_or_default_##field : integral_constant<ty,S::field> {}; \
@@ -30,17 +31,29 @@ NBL_GENERATE_GET_OR_DEFAULT(shaderDrawParameters, bool, false);
3031
NBL_GENERATE_GET_OR_DEFAULT(subgroupArithmetic, bool, false);
3132
NBL_GENERATE_GET_OR_DEFAULT(fragmentShaderPixelInterlock, bool, false);
3233
NBL_GENERATE_GET_OR_DEFAULT(maxOptimallyResidentWorkgroupInvocations, uint16_t, 0);
34+
NBL_GENERATE_GET_OR_DEFAULT(maxResidentInvocations, uint32_t, 0);
3335
}
3436

3537

3638
template<typename device_capabilities>
3739
struct device_capabilities_traits
3840
{
41+
uint32_t computeOptimalPersistentWorkgroupDispatchSize(const uint32_t elementCount, const uint32_t workgroupSize, const uint32_t workgroupSpinningProtection)
42+
{
43+
const uint32_t infinitelyWideDeviceWGCount = (elementCount - 1u) / (workgroupSize * workgroupSpinningProtection) + 1u; // need thtat divAndRoundUp
44+
return min(infinitelyWideDeviceWGCount,maxResidentInvocations/maxOptimallyResidentWorkgroupInvocations);
45+
}
46+
uint32_t computeOptimalPersistentWorkgroupDispatchSize(const uint32_t elementCount, const uint32_t workgroupSize)
47+
{
48+
return computeOptimalPersistentWorkgroupDispatchSize(elementCount,workgroupSize,1u);
49+
}
50+
3951
NBL_CONSTEXPR_STATIC_INLINE bool shaderFloat64 = impl::get_or_default_shaderFloat64<device_capabilities>::value;
4052
NBL_CONSTEXPR_STATIC_INLINE bool shaderDrawParameters = impl::get_or_default_shaderDrawParameters<device_capabilities>::value;
4153
NBL_CONSTEXPR_STATIC_INLINE bool subgroupArithmetic = impl::get_or_default_subgroupArithmetic<device_capabilities>::value;
4254
NBL_CONSTEXPR_STATIC_INLINE bool fragmentShaderPixelInterlock = impl::get_or_default_fragmentShaderPixelInterlock<device_capabilities>::value;
4355
NBL_CONSTEXPR_STATIC_INLINE uint16_t maxOptimallyResidentWorkgroupInvocations = impl::get_or_default_maxOptimallyResidentWorkgroupInvocations<device_capabilities>::value;
56+
NBL_CONSTEXPR_STATIC_INLINE uint32_t maxResidentInvocations = impl::get_or_default_maxResidentInvocations<device_capabilities>::value;
4457
};
4558

4659
#undef NBL_GENERATE_GET_OR_DEFAULT

0 commit comments

Comments
 (0)