Skip to content

Commit b1bb802

Browse files
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla into vulkan_1_3
2 parents 5a6783d + 1cc6a20 commit b1bb802

17 files changed

+196
-199
lines changed

include/nbl/asset/format/EFormat.h

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,18 +1762,28 @@ inline value_type getFormatMaxValue(E_FORMAT format, uint32_t channel)
17621762
{
17631763
switch (format)
17641764
{
1765-
case EF_BC6H_SFLOAT_BLOCK: return 32767;
1766-
case EF_BC6H_UFLOAT_BLOCK: return 65504;
1767-
default: break;
1765+
case EF_B10G11R11_UFLOAT_PACK32:
1766+
if (channel<=1)
1767+
return 65520;
1768+
else if (channel==2)
1769+
return 65504;
1770+
break;
1771+
case EF_E5B9G9R9_UFLOAT_PACK32:
1772+
if (channel<3)
1773+
return 32704;
1774+
break;
1775+
case EF_BC6H_SFLOAT_BLOCK: return 32767;
1776+
case EF_BC6H_UFLOAT_BLOCK: return 65504;
1777+
default: break;
17681778
}
17691779

17701780
auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
17711781
switch (bytesPerChannel)
17721782
{
1773-
case 2u: return 65504;
1774-
case 4u: return FLT_MAX;
1775-
case 8u: return DBL_MAX;
1776-
default: break;
1783+
case 2u: return 65504;
1784+
case 4u: return FLT_MAX;
1785+
case 8u: return DBL_MAX;
1786+
default: break;
17771787
}
17781788
}
17791789
return 0;
@@ -1882,44 +1892,47 @@ inline value_type getFormatPrecision(E_FORMAT format, uint32_t channel, value_ty
18821892
else if (isFloatingPointFormat(format))
18831893
{
18841894
switch (format)
1885-
{
1886-
case EF_B10G11R11_UFLOAT_PACK32:
18871895
{
18881896
// unsigned values are always ordered as + 1
1889-
float f = std::abs(static_cast<float>(value));
1890-
int bitshft = channel == 2u ? 6 : 5;
1891-
1892-
uint16_t f16 = core::Float16Compressor::compress(f);
1893-
uint16_t enc = f16 >> bitshft;
1894-
uint16_t next_f16 = (enc + 1) << bitshft;
1895-
1896-
return core::Float16Compressor::decompress(next_f16) - f;
1897-
}
1898-
case EF_E5B9G9R9_UFLOAT_PACK32:
1899-
return 0; //TODO
1900-
default: break;
1897+
case EF_B10G11R11_UFLOAT_PACK32: [[fallthrough]];
1898+
case EF_E5B9G9R9_UFLOAT_PACK32: // TODO: probably need to change signature and take all values?
1899+
{
1900+
float f = std::abs(static_cast<float>(value));
1901+
int bitshift;
1902+
if (format==EF_B10G11R11_UFLOAT_PACK32)
1903+
bitshift = channel==2u ? 6:5;
1904+
else
1905+
bitshift = 4;
1906+
1907+
uint16_t f16 = core::Float16Compressor::compress(f);
1908+
uint16_t enc = f16 >> bitshift;
1909+
uint16_t next_f16 = (enc + 1) << bitshift;
1910+
1911+
return core::Float16Compressor::decompress(next_f16) - f;
1912+
}
1913+
default: break;
19011914
}
19021915
auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
19031916
switch (bytesPerChannel)
19041917
{
1905-
case 2u:
1906-
{
1907-
float f = std::abs(static_cast<float>(value));
1908-
uint16_t f16 = core::Float16Compressor::compress(f);
1909-
uint16_t dir = core::Float16Compressor::compress(2.f*(f+1.f));
1910-
return core::Float16Compressor::decompress( core::nextafter16(f16, dir) ) - f;
1911-
}
1912-
case 4u:
1913-
{
1914-
float f32 = std::abs(static_cast<float>(value));
1915-
return core::nextafter32(f32,2.f*(f32+1.f))-f32;
1916-
}
1917-
case 8u:
1918-
{
1919-
double f64 = std::abs(static_cast<double>(value));
1920-
return core::nextafter64(f64,2.0*(f64+1.0))-f64;
1921-
}
1922-
default: break;
1918+
case 2u:
1919+
{
1920+
float f = std::abs(static_cast<float>(value));
1921+
uint16_t f16 = core::Float16Compressor::compress(f);
1922+
uint16_t dir = core::Float16Compressor::compress(2.f*(f+1.f));
1923+
return core::Float16Compressor::decompress( core::nextafter16(f16, dir) ) - f;
1924+
}
1925+
case 4u:
1926+
{
1927+
float f32 = std::abs(static_cast<float>(value));
1928+
return core::nextafter32(f32,2.f*(f32+1.f))-f32;
1929+
}
1930+
case 8u:
1931+
{
1932+
double f64 = std::abs(static_cast<double>(value));
1933+
return core::nextafter64(f64,2.0*(f64+1.0))-f64;
1934+
}
1935+
default: break;
19231936
}
19241937
}
19251938

include/nbl/asset/format/encodePixels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,6 +2488,7 @@ namespace asset
24882488
inp >>= 52;
24892489
inp &= 0x7ffull;
24902490
inp -= (1023ull - 15ull);
2491+
// TODO: this is wrong, need to get maximum exponent across all 3 input values
24912492
exp = (static_cast<uint32_t>(inp) << 27);
24922493
}
24932494
for (uint32_t i = 0u; i < 3u; ++i)

include/nbl/builtin/hlsl/device_capabilities_traits.hlsl

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,44 @@
77
#include <nbl/builtin/hlsl/member_test_macros.hlsl>
88

99
#ifdef __HLSL_VERSION
10+
11+
NBL_GENERATE_MEMBER_TESTER(shaderFloat64);
12+
NBL_GENERATE_MEMBER_TESTER(shaderDrawParameters);
13+
NBL_GENERATE_MEMBER_TESTER(subgroupArithmetic);
14+
NBL_GENERATE_MEMBER_TESTER(fragmentShaderPixelInterlock);
15+
NBL_GENERATE_MEMBER_TESTER(maxOptimallyResidentWorkgroupInvocations);
16+
17+
#define NBL_GENERATE_GET_OR_DEFAULT(field, ty, default) \
18+
template<typename S, bool = has_member_##field<S>::value> struct get_or_default_##field : integral_constant<ty,S::field> {}; \
19+
template<typename S> struct get_or_default_##field<S,false> : integral_constant<ty,default> {};
20+
1021
namespace nbl
1122
{
1223
namespace hlsl
1324
{
25+
26+
namespace impl
27+
{
28+
NBL_GENERATE_GET_OR_DEFAULT(shaderFloat64, bool, false);
29+
NBL_GENERATE_GET_OR_DEFAULT(shaderDrawParameters, bool, false);
30+
NBL_GENERATE_GET_OR_DEFAULT(subgroupArithmetic, bool, false);
31+
NBL_GENERATE_GET_OR_DEFAULT(fragmentShaderPixelInterlock, bool, false);
32+
NBL_GENERATE_GET_OR_DEFAULT(maxOptimallyResidentWorkgroupInvocations, uint16_t, 0);
33+
}
34+
35+
1436
template<typename device_capabilities>
1537
struct device_capabilities_traits
1638
{
17-
// TODO: check for members and default them to sane things, only do the 5 members in CJITIncludeLoader.cpp struct, we'll do the rest on `vulkan_1_3` branch with Nahim
39+
NBL_CONSTEXPR_STATIC_INLINE bool shaderFloat64 = impl::get_or_default_shaderFloat64<device_capabilities>::value;
40+
NBL_CONSTEXPR_STATIC_INLINE bool shaderDrawParameters = impl::get_or_default_shaderDrawParameters<device_capabilities>::value;
41+
NBL_CONSTEXPR_STATIC_INLINE bool subgroupArithmetic = impl::get_or_default_subgroupArithmetic<device_capabilities>::value;
42+
NBL_CONSTEXPR_STATIC_INLINE bool fragmentShaderPixelInterlock = impl::get_or_default_fragmentShaderPixelInterlock<device_capabilities>::value;
43+
NBL_CONSTEXPR_STATIC_INLINE uint16_t maxOptimallyResidentWorkgroupInvocations = impl::get_or_default_maxOptimallyResidentWorkgroupInvocations<device_capabilities>::value;
1844
};
45+
46+
#undef NBL_GENERATE_GET_OR_DEFAULT
47+
1948
}
2049
}
2150
#endif

include/nbl/builtin/hlsl/glsl_compat/core.hlsl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,14 @@ T atomicCompSwap(NBL_REF_ARG(T) ptr, T comparator, T value)
6060
* For Compute Shaders
6161
*/
6262

63-
// TODO (Future): Its annoying we have to forward declare those, but accessing gl_NumSubgroups and other gl_* values is not yet possible due to https://github.com/microsoft/DirectXShaderCompiler/issues/4217
64-
// also https://github.com/microsoft/DirectXShaderCompiler/issues/5280
65-
uint32_t gl_LocalInvocationIndex();
63+
// TODO: Extemely annoying that HLSL doesn't have referencies, so we can't transparently alias the variables as `const&` :(
64+
uint32_t3 gl_NumWorkGroups() {return spirv::NumWorkGroups;}
65+
// TODO: DXC BUG prevents us from defining this!
6666
uint32_t3 gl_WorkGroupSize();
67-
uint32_t3 gl_GlobalInvocationID();
68-
uint32_t3 gl_WorkGroupID();
67+
uint32_t3 gl_WorkGroupID() {return spirv::WorkgroupId;}
68+
uint32_t3 gl_LocalInvocationID() {return spirv::LocalInvocationId;}
69+
uint32_t3 gl_GlobalInvocationID() {return spirv::GlobalInvocationId;}
70+
uint32_t gl_LocalInvocationIndex() {return spirv::LocalInvocationIndex;}
6971

7072
void barrier() {
7173
spirv::controlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsWorkgroupMemoryMask);

include/nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,12 @@ namespace hlsl
1414
namespace glsl
1515
{
1616

17-
uint32_t4 gl_SubgroupEqMask()
18-
{
19-
const uint32_t comp = gl_SubgroupInvocationID()>>5;
20-
uint32_t4 retval = uint32_t4(0,0,0,0);
21-
retval[comp] = 0x1u<<(gl_SubgroupInvocationID()&31u);
22-
return retval;
23-
}
24-
25-
uint32_t4 gl_SubgroupGeMask()
26-
{
27-
const uint32_t FullBits = 0xffffffffu;
28-
const uint32_t comp = gl_SubgroupInvocationID()>>5;
29-
uint32_t4 retval = uint32_t4(comp>0 ? 0u:FullBits,comp>1 ? 0u:FullBits,comp>2 ? 0u:FullBits,0u);
30-
retval[comp] = FullBits<<(gl_SubgroupInvocationID()&31u);
31-
return retval;
32-
}
33-
34-
uint32_t4 gl_SubgroupGtMask()
35-
{
36-
uint32_t4 retval = gl_SubgroupGeMask();
37-
const uint32_t comp = gl_SubgroupInvocationID()>>5;
38-
retval[comp] = 0xfffffffeu<<(gl_SubgroupInvocationID()&31u);
39-
return retval;
40-
}
41-
42-
uint32_t4 gl_SubgroupLeMask() {
43-
return ~gl_SubgroupGtMask();
44-
}
45-
46-
uint32_t4 gl_SubgroupLtMask() {
47-
return ~gl_SubgroupGeMask();
48-
}
17+
// TODO: Extemely annoying that HLSL doesn't have referencies, so we can't transparently alias the variables as `const&` :(
18+
uint32_t4 gl_SubgroupEqMask() {return spirv::BuiltInSubgroupEqMask;}
19+
uint32_t4 gl_SubgroupGeMask() {return spirv::BuiltInSubgroupGeMask;}
20+
uint32_t4 gl_SubgroupGtMask() {return spirv::BuiltInSubgroupGtMask;}
21+
uint32_t4 gl_SubgroupLeMask() {return spirv::BuiltInSubgroupLeMask;}
22+
uint32_t4 gl_SubgroupLtMask() {return spirv::BuiltInSubgroupLtMask;}
4923

5024
template<typename T>
5125
T subgroupBroadcastFirst(T value)

include/nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,15 @@ namespace hlsl
1313
{
1414
namespace glsl
1515
{
16-
1716
#ifdef __HLSL_VERSION
18-
uint32_t gl_SubgroupSize() {
19-
return WaveGetLaneCount();
20-
}
21-
22-
uint32_t gl_SubgroupSizeLog2() {
23-
return firstbithigh(gl_SubgroupSize());
24-
}
25-
26-
uint32_t gl_SubgroupInvocationID() {
27-
return WaveGetLaneIndex();
28-
}
17+
// TODO: Extemely annoying that HLSL doesn't have referencies, so we can't transparently alias the variables as `const&` :(
18+
uint32_t gl_SubgroupSize() {return spirv::SubgroupSize;}
19+
uint32_t gl_SubgroupSizeLog2() {return firstbithigh(spirv::SubgroupSize);}
20+
uint32_t gl_SubgroupInvocationID() {return spirv::SubgroupLocalInvocationId;}
2921

3022
// only available in compute
31-
uint32_t gl_SubgroupID() {
32-
// TODO (PentaKon): This is not always correct (subgroup IDs aren't always aligned with invocation index per the spec)
33-
return gl_LocalInvocationIndex() >> gl_SubgroupSizeLog2();
34-
}
23+
uint32_t gl_NumSubgroups() {return spirv::NumSubgroups;}
24+
uint32_t gl_SubgroupID() {return spirv::SubgroupId;}
3525

3626
bool subgroupElect() {
3727
return spirv::subgroupElect(spv::ScopeSubgroup);
@@ -57,7 +47,6 @@ void subgroupMemoryBarrierImage() {
5747
spirv::memoryBarrier(spv::ScopeSubgroup, spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsImageMemoryMask);
5848
}
5949
#endif
60-
6150
}
6251
}
6352
}

include/nbl/builtin/hlsl/macros.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#define NBL_ARG_125(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31,a32,a33,a34,a35,a36,a37,a38,a39,a40,a41,a42,a43,a44,a45,a46,a47,a48,a49,a50,a51,a52,a53,a54,a55,a56,a57,a58,a59,a60,a61,a62,a63,a64,a65,a66,a67,a68,a69,a70,a71,a72,a73,a74,a75,a76,a77,a78,a79,a80,a81,a82,a83,a84,a85,a86,a87,a88,a89,a90,a91,a92,a93,a94,a95,a96,a97,a98,a99,a100,a101,a102,a103,a104,a105,a106,a107,a108,a109,a110,a111,a112,a113,a114,a115,a116,a117,a118,a119,a120,a121,a122,a123,a124,a125, ... ) a125
3030
#define NBL_VA_ARGS_COUNT( ... ) NBL_EVAL(NBL_ARG_125(__VA_ARGS__,125,124,123,122,121,120,119,118,117,116,115,114,113,112,111,110,109,108,107,106,105,104,103,102,101,100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))
3131

32-
//
32+
// TODO: Use BOOST_PP!
3333
#define NBL_FOREACH_0(WHAT)
3434
#define NBL_FOREACH_1(WHAT, X) NBL_EVAL(WHAT(X))
3535
#define NBL_FOREACH_2(WHAT, X, ...) NBL_EVAL(WHAT(X)NBL_FOREACH_1(WHAT, __VA_ARGS__))

0 commit comments

Comments
 (0)