Devsh-Graphics-Programming
diff --git a/‎examples_tests b/‎examples_tests
diff --git a/‎include/nbl/asset/format/EFormat.h
Lines changed: 52 additions & 39 deletions b/‎include/nbl/asset/format/EFormat.h
Lines changed: 52 additions & 39 deletions
diff --git a/‎include/nbl/asset/format/encodePixels.h
Lines changed: 1 addition & 0 deletions b/‎include/nbl/asset/format/encodePixels.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/nbl/builtin/hlsl/device_capabilities_traits.hlsl
Lines changed: 30 additions & 1 deletion b/‎include/nbl/builtin/hlsl/device_capabilities_traits.hlsl
Lines changed: 30 additions & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/glsl_compat/core.hlsl
Lines changed: 7 additions & 5 deletions b/‎include/nbl/builtin/hlsl/glsl_compat/core.hlsl
Lines changed: 7 additions & 5 deletions
diff --git a/‎include/nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl
Lines changed: 6 additions & 32 deletions b/‎include/nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl
Lines changed: 6 additions & 32 deletions
diff --git a/‎include/nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl
Lines changed: 6 additions & 17 deletions b/‎include/nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl
Lines changed: 6 additions & 17 deletions
diff --git a/‎include/nbl/builtin/hlsl/macros.h
Lines changed: 1 addition & 1 deletion b/‎include/nbl/builtin/hlsl/macros.h
Lines changed: 1 addition & 1 deletion
@@ -1762,18 +1762,28 @@ inline value_type getFormatMaxValue(E_FORMAT format, uint32_t channel)
     {
         switch (format)
         {
-        case EF_BC6H_SFLOAT_BLOCK: return 32767;
-        case EF_BC6H_UFLOAT_BLOCK: return 65504;
-        default: break;
+            case EF_B10G11R11_UFLOAT_PACK32:
+                if (channel<=1)
+                    return 65520;
+                else if (channel==2)
+                    return 65504;
+                break;
+            case EF_E5B9G9R9_UFLOAT_PACK32:
+                if (channel<3)
+                    return 32704;
+                break;
+            case EF_BC6H_SFLOAT_BLOCK: return 32767;
+            case EF_BC6H_UFLOAT_BLOCK: return 65504;
+            default: break;
         }
 
         auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
         switch (bytesPerChannel)
         {
-        case 2u: return 65504;
-        case 4u: return FLT_MAX;
-        case 8u: return DBL_MAX;
-        default: break;
+            case 2u: return 65504;
+            case 4u: return FLT_MAX;
+            case 8u: return DBL_MAX;
+            default: break;
         }
     }
     return 0;
@@ -1882,44 +1892,47 @@ inline value_type getFormatPrecision(E_FORMAT format, uint32_t channel, value_ty
     else if (isFloatingPointFormat(format))
     {
         switch (format)
-        {
-        case EF_B10G11R11_UFLOAT_PACK32:
         {
             // unsigned values are always ordered as + 1
-            float f = std::abs(static_cast<float>(value));
-            int bitshft = channel == 2u ? 6 : 5;
-
-            uint16_t f16 = core::Float16Compressor::compress(f);
-            uint16_t enc = f16 >> bitshft;
-            uint16_t next_f16 = (enc + 1) << bitshft;
-
-            return core::Float16Compressor::decompress(next_f16) - f;
-        }
-        case EF_E5B9G9R9_UFLOAT_PACK32:
-            return 0; //TODO
-        default: break;
+            case EF_B10G11R11_UFLOAT_PACK32: [[fallthrough]];
+            case EF_E5B9G9R9_UFLOAT_PACK32: // TODO: probably need to change signature and take all values?
+            {
+                float f = std::abs(static_cast<float>(value));
+                int bitshift;
+                if (format==EF_B10G11R11_UFLOAT_PACK32)
+                    bitshift = channel==2u ? 6:5;
+                else
+                    bitshift = 4;
+
+                uint16_t f16 = core::Float16Compressor::compress(f);
+                uint16_t enc = f16 >> bitshift;
+                uint16_t next_f16 = (enc + 1) << bitshift;
+
+                return core::Float16Compressor::decompress(next_f16) - f;
+            }
+            default: break;
         }
         auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
         switch (bytesPerChannel)
         {
-        case 2u:
-        {
-            float f = std::abs(static_cast<float>(value));
-            uint16_t f16 = core::Float16Compressor::compress(f);
-            uint16_t dir = core::Float16Compressor::compress(2.f*(f+1.f));
-            return core::Float16Compressor::decompress( core::nextafter16(f16, dir) ) - f;
-        }
-        case 4u:
-        {
-            float f32 = std::abs(static_cast<float>(value));
-            return core::nextafter32(f32,2.f*(f32+1.f))-f32;
-        }
-        case 8u:
-        {
-            double f64 = std::abs(static_cast<double>(value));
-            return core::nextafter64(f64,2.0*(f64+1.0))-f64;
-        }
-        default: break;
+            case 2u:
+            {
+                float f = std::abs(static_cast<float>(value));
+                uint16_t f16 = core::Float16Compressor::compress(f);
+                uint16_t dir = core::Float16Compressor::compress(2.f*(f+1.f));
+                return core::Float16Compressor::decompress( core::nextafter16(f16, dir) ) - f;
+            }
+            case 4u:
+            {
+                float f32 = std::abs(static_cast<float>(value));
+                return core::nextafter32(f32,2.f*(f32+1.f))-f32;
+            }
+            case 8u:
+            {
+                double f64 = std::abs(static_cast<double>(value));
+                return core::nextafter64(f64,2.0*(f64+1.0))-f64;
+            }
+            default: break;
         }
     }
 
 
@@ -2488,6 +2488,7 @@ namespace asset
             inp >>= 52;
             inp &= 0x7ffull;
             inp -= (1023ull - 15ull);
+            // TODO: this is wrong, need to get maximum exponent across all 3 input values
             exp = (static_cast<uint32_t>(inp) << 27);
         }
         for (uint32_t i = 0u; i < 3u; ++i)
 
@@ -7,15 +7,44 @@
 #include <nbl/builtin/hlsl/member_test_macros.hlsl>
 
 #ifdef __HLSL_VERSION
+
+NBL_GENERATE_MEMBER_TESTER(shaderFloat64);
+NBL_GENERATE_MEMBER_TESTER(shaderDrawParameters);
+NBL_GENERATE_MEMBER_TESTER(subgroupArithmetic);
+NBL_GENERATE_MEMBER_TESTER(fragmentShaderPixelInterlock);
+NBL_GENERATE_MEMBER_TESTER(maxOptimallyResidentWorkgroupInvocations);
+
+#define NBL_GENERATE_GET_OR_DEFAULT(field, ty, default) \
+template<typename S, bool = has_member_##field<S>::value> struct get_or_default_##field : integral_constant<ty,S::field> {}; \
+template<typename S> struct get_or_default_##field<S,false> : integral_constant<ty,default> {};
+
 namespace nbl
 {
 namespace hlsl
 {
+
+namespace impl
+{
+NBL_GENERATE_GET_OR_DEFAULT(shaderFloat64, bool, false);
+NBL_GENERATE_GET_OR_DEFAULT(shaderDrawParameters, bool, false);
+NBL_GENERATE_GET_OR_DEFAULT(subgroupArithmetic, bool, false);
+NBL_GENERATE_GET_OR_DEFAULT(fragmentShaderPixelInterlock, bool, false);
+NBL_GENERATE_GET_OR_DEFAULT(maxOptimallyResidentWorkgroupInvocations, uint16_t, 0);
+}
+
+
 template<typename device_capabilities>
 struct device_capabilities_traits
 {
-    // TODO: check for members and default them to sane things, only do the 5 members in CJITIncludeLoader.cpp struct, we'll do the rest on `vulkan_1_3` branch with Nahim
+    NBL_CONSTEXPR_STATIC_INLINE bool shaderFloat64                                = impl::get_or_default_shaderFloat64<device_capabilities>::value;
+    NBL_CONSTEXPR_STATIC_INLINE bool shaderDrawParameters                         = impl::get_or_default_shaderDrawParameters<device_capabilities>::value;
+    NBL_CONSTEXPR_STATIC_INLINE bool subgroupArithmetic                           = impl::get_or_default_subgroupArithmetic<device_capabilities>::value;
+    NBL_CONSTEXPR_STATIC_INLINE bool fragmentShaderPixelInterlock                 = impl::get_or_default_fragmentShaderPixelInterlock<device_capabilities>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t maxOptimallyResidentWorkgroupInvocations = impl::get_or_default_maxOptimallyResidentWorkgroupInvocations<device_capabilities>::value;
 };
+
+#undef NBL_GENERATE_GET_OR_DEFAULT
+
 }
 }
 #endif
 
@@ -60,12 +60,14 @@ T atomicCompSwap(NBL_REF_ARG(T) ptr, T comparator, T value)
  * For Compute Shaders
  */
 
-// TODO (Future): Its annoying we have to forward declare those, but accessing gl_NumSubgroups and other gl_* values is not yet possible due to https://github.com/microsoft/DirectXShaderCompiler/issues/4217
-// also https://github.com/microsoft/DirectXShaderCompiler/issues/5280
-uint32_t gl_LocalInvocationIndex();
+// TODO: Extemely annoying that HLSL doesn't have referencies, so we can't transparently alias the variables as `const&` :(
+uint32_t3 gl_NumWorkGroups() {return spirv::NumWorkGroups;}
+// TODO: DXC BUG prevents us from defining this!
 uint32_t3 gl_WorkGroupSize();
-uint32_t3 gl_GlobalInvocationID();
-uint32_t3 gl_WorkGroupID();
+uint32_t3 gl_WorkGroupID() {return spirv::WorkgroupId;}
+uint32_t3 gl_LocalInvocationID() {return spirv::LocalInvocationId;}
+uint32_t3 gl_GlobalInvocationID() {return spirv::GlobalInvocationId;}
+uint32_t gl_LocalInvocationIndex() {return spirv::LocalInvocationIndex;}
 
 void barrier() {
     spirv::controlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsWorkgroupMemoryMask);
 
@@ -14,38 +14,12 @@ namespace hlsl
 namespace glsl
 {
 
-uint32_t4 gl_SubgroupEqMask()
-{
-    const uint32_t comp = gl_SubgroupInvocationID()>>5;
-    uint32_t4 retval = uint32_t4(0,0,0,0);
-    retval[comp] = 0x1u<<(gl_SubgroupInvocationID()&31u);
-    return retval;
-}
-
-uint32_t4 gl_SubgroupGeMask()
-{
-    const uint32_t FullBits = 0xffffffffu;
-    const uint32_t comp = gl_SubgroupInvocationID()>>5;
-    uint32_t4 retval = uint32_t4(comp>0 ? 0u:FullBits,comp>1 ? 0u:FullBits,comp>2 ? 0u:FullBits,0u);
-    retval[comp] = FullBits<<(gl_SubgroupInvocationID()&31u);
-    return retval;
-}
-
-uint32_t4 gl_SubgroupGtMask()
-{
-    uint32_t4 retval = gl_SubgroupGeMask();
-    const uint32_t comp = gl_SubgroupInvocationID()>>5;
-    retval[comp] = 0xfffffffeu<<(gl_SubgroupInvocationID()&31u);
-    return retval;
-}
-
-uint32_t4 gl_SubgroupLeMask() {
-    return ~gl_SubgroupGtMask();
-}
-
-uint32_t4 gl_SubgroupLtMask() {
-    return ~gl_SubgroupGeMask();
-}
+// TODO: Extemely annoying that HLSL doesn't have referencies, so we can't transparently alias the variables as `const&` :(
+uint32_t4 gl_SubgroupEqMask() {return spirv::BuiltInSubgroupEqMask;}
+uint32_t4 gl_SubgroupGeMask() {return spirv::BuiltInSubgroupGeMask;}
+uint32_t4 gl_SubgroupGtMask() {return spirv::BuiltInSubgroupGtMask;}
+uint32_t4 gl_SubgroupLeMask()  {return spirv::BuiltInSubgroupLeMask;}
+uint32_t4 gl_SubgroupLtMask()  {return spirv::BuiltInSubgroupLtMask;}
 
 template<typename T>
 T subgroupBroadcastFirst(T value)
 
@@ -13,25 +13,15 @@ namespace hlsl
 {
 namespace glsl
 {
-
 #ifdef __HLSL_VERSION
-uint32_t gl_SubgroupSize() {
-    return WaveGetLaneCount();
-}
-
-uint32_t gl_SubgroupSizeLog2() {
-    return firstbithigh(gl_SubgroupSize());
-}
-
-uint32_t gl_SubgroupInvocationID() {
-    return WaveGetLaneIndex();
-}
+// TODO: Extemely annoying that HLSL doesn't have referencies, so we can't transparently alias the variables as `const&` :(
+uint32_t gl_SubgroupSize() {return spirv::SubgroupSize;}
+uint32_t gl_SubgroupSizeLog2() {return firstbithigh(spirv::SubgroupSize);}
+uint32_t gl_SubgroupInvocationID() {return spirv::SubgroupLocalInvocationId;}
 
 // only available in compute
-uint32_t gl_SubgroupID() {
-    // TODO (PentaKon): This is not always correct (subgroup IDs aren't always aligned with invocation index per the spec)
-    return gl_LocalInvocationIndex() >> gl_SubgroupSizeLog2();
-}
+uint32_t gl_NumSubgroups() {return spirv::NumSubgroups;}
+uint32_t gl_SubgroupID() {return spirv::SubgroupId;}
 
 bool subgroupElect() {
     return spirv::subgroupElect(spv::ScopeSubgroup);
@@ -57,7 +47,6 @@ void subgroupMemoryBarrierImage() {
     spirv::memoryBarrier(spv::ScopeSubgroup, spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsImageMemoryMask);
 }
 #endif
-
 }
 }
 }
 
@@ -29,7 +29,7 @@
 #define NBL_ARG_125(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31,a32,a33,a34,a35,a36,a37,a38,a39,a40,a41,a42,a43,a44,a45,a46,a47,a48,a49,a50,a51,a52,a53,a54,a55,a56,a57,a58,a59,a60,a61,a62,a63,a64,a65,a66,a67,a68,a69,a70,a71,a72,a73,a74,a75,a76,a77,a78,a79,a80,a81,a82,a83,a84,a85,a86,a87,a88,a89,a90,a91,a92,a93,a94,a95,a96,a97,a98,a99,a100,a101,a102,a103,a104,a105,a106,a107,a108,a109,a110,a111,a112,a113,a114,a115,a116,a117,a118,a119,a120,a121,a122,a123,a124,a125, ... ) a125
 #define NBL_VA_ARGS_COUNT( ... ) NBL_EVAL(NBL_ARG_125(__VA_ARGS__,125,124,123,122,121,120,119,118,117,116,115,114,113,112,111,110,109,108,107,106,105,104,103,102,101,100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))
 
-//
+// TODO: Use BOOST_PP!
 #define NBL_FOREACH_0(WHAT)
 #define NBL_FOREACH_1(WHAT, X) NBL_EVAL(WHAT(X))
 #define NBL_FOREACH_2(WHAT, X, ...) NBL_EVAL(WHAT(X)NBL_FOREACH_1(WHAT, __VA_ARGS__))
Original file line number	Diff line number	Diff line change
`@@ -2488,6 +2488,7 @@ namespace asset`
`2488`	`2488`	`inp >>= 52;`
`2489`	`2489`	`inp &= 0x7ffull;`
`2490`	`2490`	`inp -= (1023ull - 15ull);`
	`2491`	`+ // TODO: this is wrong, need to get maximum exponent across all 3 input values`
`2491`	`2492`	`exp = (static_cast<uint32_t>(inp) << 27);`
`2492`	`2493`	`}`
`2493`	`2494`	`for (uint32_t i = 0u; i < 3u; ++i)`