From da885753a0b3b8dd345abab81015b153a44b6be6 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 21 Oct 2024 13:14:06 -0700 Subject: [PATCH 01/27] Saving work --- examples_tests | 2 +- .../transformation_matrix_utils.hlsl | 112 ++++++++++++++++++ .../builtin/hlsl/projection/projection.hlsl | 33 ++++++ include/quaternion.h | 3 +- 4 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl create mode 100644 include/nbl/builtin/hlsl/projection/projection.hlsl diff --git a/examples_tests b/examples_tests index a565619cd2..fedb9c138f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a565619cd23300181e48df34562f329bd93c538d +Subproject commit fedb9c138fdd41f5713b9b75fc4eeac19dd5d203 diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl new file mode 100644 index 0000000000..d50bc16869 --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -0,0 +1,112 @@ +#ifndef _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +template +matrix getMatrix3x4As4x4(const matrix& mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); + + return output; +} + +// TODO: use portable_float when merged +//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) +template +inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) +{ + const matrix a4x4 = getMatrix3x4As4x4(a); + const matrix b4x4 = getMatrix3x4As4x4(b); + return matrix(mul(a4x4, b4x4)); +} + +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = core::normalize(target - position); + const vector xaxis = core::normalize(core::cross(upVector, zaxis)); + const vector yaxis = core::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -dot(xaxis, position)); + r[1] = vector(yaxis, -dot(yaxis, position)); + r[2] = vector(zaxis, -dot(zaxis, position)); + + return r; +} + +float32_t3x4 buildCameraLookAtMatrixRH( + const float32_t3& position, + const float32_t3& target, + const float32_t3& upVector) +{ + const float32_t3 zaxis = core::normalize(position - target); + const float32_t3 xaxis = core::normalize(core::cross(upVector, zaxis)); + const float32_t3 yaxis = core::cross(zaxis, xaxis); + + float32_t3x4 r; + r[0] = float32_t4(xaxis, -dot(xaxis, position)); + r[1] = float32_t4(yaxis, -dot(yaxis, position)); + r[2] = float32_t4(zaxis, -dot(zaxis, position)); + + return r; +} + +// TODO: test, check if there is better implementation +// TODO: move quaternion to nbl::hlsl +// TODO: why NBL_REF_ARG(MatType) doesn't work????? + +//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged +template +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +{ + static_assert(N == 3 || N == 4); + + outMat[0] = vector( + 1 - 2 * (quat.y * quat.y + quat.z * quat.z), + 2 * (quat.x * quat.y - quat.z * quat.w), + 2 * (quat.x * quat.z + quat.y * quat.w), + outMat[0][3] + ); + + outMat[1] = vector( + 2 * (quat.x * quat.y + quat.z * quat.w), + 1 - 2 * (quat.x * quat.x + quat.z * quat.z), + 2 * (quat.y * quat.z - quat.x * quat.w), + outMat[1][3] + ); + + outMat[2] = vector( + 2 * (quat.x * quat.z - quat.y * quat.w), + 2 * (quat.y * quat.z + quat.x * quat.w), + 1 - 2 * (quat.x * quat.x + quat.y * quat.y), + outMat[2][3] + ); +} + +template +inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector) translation) +{ + static_assert(N == 3 || N == 4); + + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl new file mode 100644 index 0000000000..d973fda25d --- /dev/null +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -0,0 +1,33 @@ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +// TODO: use glm instead for c++ +inline float32_t4x4 buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ +#ifndef __HLSL_VERSION + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero +#endif + + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + const float w = h / aspectRatio; + + float32_t4x4 m; + m[0] = float32_t4(w, 0.f, 0.f, 0.f); + m[1] = float32_t4(0.f, -h, 0.f, 0.f); + m[2] = float32_t4(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = float32_t4(0.f, 0.f, 1.f, 0.f); + + return m; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/quaternion.h b/include/quaternion.h index c1867235db..a0fd689dc6 100644 --- a/include/quaternion.h +++ b/include/quaternion.h @@ -23,7 +23,8 @@ class matrix3x4SIMD; //! Quaternion class for representing rotations. /** It provides cheap combinations and avoids gimbal locks. Also useful for interpolations. */ -class quaternion : private vectorSIMDf +// TODO: move this to nbl::hlsl +class quaternion : public vectorSIMDf { public: //! Default Constructor From ab77ca41a310ecbc105302172ee95bc21d13c02e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 23 Oct 2024 13:51:08 -0700 Subject: [PATCH 02/27] Removed SIMD matrix classes --- examples_tests | 2 +- include/matrix3x4SIMD.h | 263 ---------- include/matrix3x4SIMD_impl.h | 470 ------------------ include/matrix4SIMD.h | 385 -------------- include/matrix4SIMD_impl.h | 299 ----------- .../nbl/builtin/hlsl/glsl_compat/math.hlsl | 65 +++ .../hlsl/math/quaternion/quaternion.hlsl | 74 +++ .../hlsl/math/quaternion/quaternion_impl.hlsl | 25 + 8 files changed, 165 insertions(+), 1418 deletions(-) delete mode 100644 include/matrix3x4SIMD.h delete mode 100644 include/matrix3x4SIMD_impl.h delete mode 100644 include/matrix4SIMD.h delete mode 100644 include/matrix4SIMD_impl.h create mode 100644 include/nbl/builtin/hlsl/glsl_compat/math.hlsl create mode 100644 include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl create mode 100644 include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl diff --git a/examples_tests b/examples_tests index fedb9c138f..6b45fecf8c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit fedb9c138fdd41f5713b9b75fc4eeac19dd5d203 +Subproject commit 6b45fecf8c64e8b11fef8c2b3a7e5618edaca68d diff --git a/include/matrix3x4SIMD.h b/include/matrix3x4SIMD.h deleted file mode 100644 index d52f305cec..0000000000 --- a/include/matrix3x4SIMD.h +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX3X4SIMD_H_INCLUDED__ -#define __NBL_MATRIX3X4SIMD_H_INCLUDED__ - -#include "vectorSIMD.h" -#include "quaternion.h" - -namespace nbl::core -{ - -class matrix4x3; - -#define _NBL_MATRIX_ALIGNMENT _NBL_SIMD_ALIGNMENT -static_assert(_NBL_MATRIX_ALIGNMENT>=_NBL_VECTOR_ALIGNMENT,"Matrix must be equally or more aligned than vector!"); - -//! Equivalent of GLSL's mat4x3 -class matrix3x4SIMD// : private AllocationOverrideBase<_NBL_MATRIX_ALIGNMENT> EBO inheritance problem w.r.t `rows[3]` -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 3u; - vectorSIMDf rows[VectorCount]; - - explicit matrix3x4SIMD( const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f)) : rows{_r0, _r1, _r2} - { - } - - matrix3x4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23) - : matrix3x4SIMD(vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23)) - { - } - - explicit matrix3x4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i); - } - matrix3x4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i, ALIGNED); - } - - float* pointer() { return rows[0].pointer; } - const float* pointer() const { return rows[0].pointer; } - - inline matrix3x4SIMD& set(const matrix4x3& _retarded); - inline matrix4x3 getAsRetardedIrrlichtMatrix() const; - - static inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - static inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - inline matrix3x4SIMD& concatenateAfter(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(*this, _other); - } - - inline matrix3x4SIMD& concatenateBefore(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(_other, *this); - } - - inline matrix3x4SIMD& concatenateAfterPrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(*this, _other); - } - - inline matrix3x4SIMD& concatenateBeforePrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(_other, *this); - } - - inline bool operator==(const matrix3x4SIMD& _other) - { - return !(*this != _other); - } - - inline bool operator!=(const matrix3x4SIMD& _other); - - - inline matrix3x4SIMD operator-() const - { - matrix3x4SIMD retval; - retval.rows[0] = -rows[0]; - retval.rows[1] = -rows[1]; - retval.rows[2] = -rows[2]; - return retval; - } - - - inline matrix3x4SIMD& operator+=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator+(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval += _other; - } - - inline matrix3x4SIMD& operator-=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator-(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval -= _other; - } - - inline matrix3x4SIMD& operator*=(float _scalar); - inline matrix3x4SIMD operator*(float _scalar) const - { - matrix3x4SIMD retval(*this); - return retval *= _scalar; - } - - inline matrix3x4SIMD& setTranslation(const vectorSIMDf& _translation) - { - // no faster way of doing it? - rows[0].w = _translation.x; - rows[1].w = _translation.y; - rows[2].w = _translation.z; - return *this; - } - inline vectorSIMDf getTranslation() const; - inline vectorSIMDf getTranslation3D() const; - - inline matrix3x4SIMD& setScale(const vectorSIMDf& _scale); - - inline vectorSIMDf getScale() const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _in_out) const - { - transformVect(_in_out, _in_out); - } - - inline void pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void pseudoMulWith4x1(vectorSIMDf& _in_out) const - { - pseudoMulWith4x1(_in_out,_in_out); - } - - inline void mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void mulSub3x3WithNx1(vectorSIMDf& _in_out) const - { - mulSub3x3WithNx1(_in_out, _in_out); - } - - inline static matrix3x4SIMD buildCameraLookAtMatrixLH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - inline static matrix3x4SIMD buildCameraLookAtMatrixRH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - - inline matrix3x4SIMD& setRotation(const quaternion& _quat); - - inline matrix3x4SIMD& setScaleRotationAndTranslation( const vectorSIMDf& _scale, - const quaternion& _quat, - const vectorSIMDf& _translation); - - inline vectorSIMDf getPseudoDeterminant() const - { - vectorSIMDf tmp; - return determinant_helper(tmp); - } - - inline bool getInverse(matrix3x4SIMD& _out) const; - bool makeInverse() - { - matrix3x4SIMD tmp; - - if (getInverse(tmp)) - { - *this = tmp; - return true; - } - return false; - } - - // - inline bool getSub3x3InverseTranspose(matrix3x4SIMD& _out) const; - - // - inline bool getSub3x3InverseTransposePacked(float outRows[9]) const - { - matrix3x4SIMD tmp; - if (!getSub3x3InverseTranspose(tmp)) - return false; - - float* _out = outRows; - for (auto i=0; i<3; i++) - { - const auto& row = tmp.rows[i]; - for (auto j=0; j<3; j++) - *(_out++) = row[j]; - } - - return true; - } - - // - inline core::matrix3x4SIMD getSub3x3TransposeCofactors() const; - - // - inline void setTransformationCenter(const vectorSIMDf& _center, const vectorSIMDf& _translation); - - // - static inline matrix3x4SIMD buildAxisAlignedBillboard( - const vectorSIMDf& camPos, - const vectorSIMDf& center, - const vectorSIMDf& translation, - const vectorSIMDf& axis, - const vectorSIMDf& from); - - - // - float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - const float& operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - - // - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - static inline vectorSIMDf doJob(const __m128& a, const matrix3x4SIMD& _mtx); - - // really need that dvec<2> or wider - inline __m128d halfRowAsDouble(size_t _n, bool _0) const; - static inline __m128d doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf); - - vectorSIMDf determinant_helper(vectorSIMDf& r1crossr2) const - { - r1crossr2 = core::cross(rows[1], rows[2]); - return core::dot(rows[0], r1crossr2); - } -}; - -inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - -} - -#endif diff --git a/include/matrix3x4SIMD_impl.h b/include/matrix3x4SIMD_impl.h deleted file mode 100644 index 0e9022efd0..0000000000 --- a/include/matrix3x4SIMD_impl.h +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ -#define _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ - -#include "matrix3x4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" - -namespace nbl::core -{ - -// TODO: move to another implementation header -inline quaternion::quaternion(const matrix3x4SIMD& m) -{ - const vectorSIMDf one(1.f); - auto Qx = m.rows[0].xxxx()^vectorSIMDu32(0,0,0x80000000u,0x80000000u); - auto Qy = m.rows[1].yyyy()^vectorSIMDu32(0,0x80000000u,0,0x80000000u); - auto Qz = m.rows[2].zzzz()^vectorSIMDu32(0,0x80000000u,0x80000000u,0); - - auto tmp = one+Qx+Qy+Qz; - auto invscales = inversesqrt(tmp)*0.5f; - auto scales = tmp*invscales*0.5f; - - // TODO: speed this up - if (tmp.x > 0.0f) - { - X = (m(2, 1) - m(1, 2)) * invscales.x; - Y = (m(0, 2) - m(2, 0)) * invscales.x; - Z = (m(1, 0) - m(0, 1)) * invscales.x; - W = scales.x; - } - else - { - if (tmp.y>0.f) - { - X = scales.y; - Y = (m(0, 1) + m(1, 0)) * invscales.y; - Z = (m(2, 0) + m(0, 2)) * invscales.y; - W = (m(2, 1) - m(1, 2)) * invscales.y; - } - else if (tmp.z>0.f) - { - X = (m(0, 1) + m(1, 0)) * invscales.z; - Y = scales.z; - Z = (m(1, 2) + m(2, 1)) * invscales.z; - W = (m(0, 2) - m(2, 0)) * invscales.z; - } - else - { - X = (m(0, 2) + m(2, 0)) * invscales.w; - Y = (m(1, 2) + m(2, 1)) * invscales.w; - Z = scales.w; - W = (m(1, 0) - m(0, 1)) * invscales.w; - } - } - - *this = normalize(*this); -} - -inline bool matrix3x4SIMD::operator!=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix3x4SIMD& matrix3x4SIMD::operator+=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator-=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_XORMASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_ ? 0x80000000u:0x0u, _y_ ? 0x80000000u:0x0u, _z_ ? 0x80000000u:0x0u, _w_ ? 0x80000000u:0x0u) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ -#ifdef _NBL_DEBUG - assert(is_aligned_to(&_a, _NBL_SIMD_ALIGNMENT)); - assert(is_aligned_to(&_b, _NBL_SIMD_ALIGNMENT)); -#endif // _NBL_DEBUG - __m128 r0 = _a.rows[0].getAsRegister(); - __m128 r1 = _a.rows[1].getAsRegister(); - __m128 r2 = _a.rows[2].getAsRegister(); - - matrix3x4SIMD out; - out.rows[0] = matrix3x4SIMD::doJob(r0, _b); - out.rows[1] = matrix3x4SIMD::doJob(r1, _b); - out.rows[2] = matrix3x4SIMD::doJob(r2, _b); - - return out; -} - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - - matrix3x4SIMD out; - - const __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - - __m128 second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - return out; -} - -inline vectorSIMDf matrix3x4SIMD::getTranslation() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setr_ps(0.f, 0.f, 0.f, 1.f)); // (2z,3z,2w,3w) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,3w) - - return xmm2; -} -inline vectorSIMDf matrix3x4SIMD::getTranslation3D() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,0) - - return xmm2; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const vectorSIMDu32 mask0010 = vectorSIMDu32(BUILD_MASKF(0, 0, 1, 0)); - const vectorSIMDu32 mask0100 = vectorSIMDu32(BUILD_MASKF(0, 1, 0, 0)); - const vectorSIMDu32 mask1000 = vectorSIMDu32(BUILD_MASKF(1, 0, 0, 0)); - - const vectorSIMDu32& scaleAlias = reinterpret_cast(_scale); - - vectorSIMDu32& rowAlias0 = reinterpret_cast(rows[0]); - vectorSIMDu32& rowAlias1 = reinterpret_cast(rows[1]); - vectorSIMDu32& rowAlias2 = reinterpret_cast(rows[2]); - rowAlias0 = (scaleAlias & reinterpret_cast(mask1000)) | (rowAlias0 & reinterpret_cast(mask0001)); - rowAlias1 = (scaleAlias & reinterpret_cast(mask0100)) | (rowAlias1 & reinterpret_cast(mask0001)); - rowAlias2 = (scaleAlias & reinterpret_cast(mask0010)) | (rowAlias2 & reinterpret_cast(mask0001)); - - return *this; -} - -inline core::vectorSIMDf matrix3x4SIMD::getScale() const -{ - // xmm4-7 will now become columuns of B - __m128 xmm4 = rows[0].getAsRegister(); - __m128 xmm5 = rows[1].getAsRegister(); - __m128 xmm6 = rows[2].getAsRegister(); - __m128 xmm7 = _mm_setzero_ps(); - // g==0 - __m128 xmm0 = _mm_unpacklo_ps(xmm4, xmm5); - __m128 xmm1 = _mm_unpacklo_ps(xmm6, xmm7); // (2x,g,2y,g) - __m128 xmm2 = _mm_unpackhi_ps(xmm4, xmm5); - __m128 xmm3 = _mm_unpackhi_ps(xmm6, xmm7); // (2z,g,2w,g) - xmm4 = _mm_movelh_ps(xmm1, xmm0); //(0x,1x,2x,g) - xmm5 = _mm_movehl_ps(xmm1, xmm0); - xmm6 = _mm_movelh_ps(xmm3, xmm2); //(0z,1z,2z,g) - - // See http://www.robertblum.com/articles/2005/02/14/decomposing-matrices - // We have to do the full calculation. - xmm0 = _mm_mul_ps(xmm4, xmm4);// column 0 squared - xmm1 = _mm_mul_ps(xmm5, xmm5);// column 1 squared - xmm2 = _mm_mul_ps(xmm6, xmm6);// column 2 squared - xmm4 = _mm_hadd_ps(xmm0, xmm1); - xmm5 = _mm_hadd_ps(xmm2, xmm7); - xmm6 = _mm_hadd_ps(xmm4, xmm5); - - return _mm_sqrt_ps(xmm6); -} - -inline void matrix3x4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r0 = rows[0] * _in, - r1 = rows[1] * _in, - r2 = rows[2] * _in; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_set1_ps(0.25f)) - ); -} - -inline void matrix3x4SIMD::pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - _out = (_in & mask1110) | _mm_castps_si128(vectorSIMDf(0.f, 0.f, 0.f, 1.f).getAsRegister()); - transformVect(_out); -} - -inline void matrix3x4SIMD::mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - auto maskedIn = _in & BUILD_MASKF(1, 1, 1, 0); - vectorSIMDf r0 = rows[0] * maskedIn, - r1 = rows[1] * maskedIn, - r2 = rows[2] * maskedIn; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_setzero_ps()) - ); -} - - -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixLH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(target - position); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixRH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(position - target); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setRotation(const core::quaternion& _quat) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const core::vectorSIMDf& quat = reinterpret_cast(_quat); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * vectorSIMDf(2.f))) + (quat.zzzz() * (quat.zwxx() & mask1110) * vectorSIMDf(2.f, -2.f, 2.f, 0.f))) | (reinterpret_cast(rows[0]) & (mask0001)); - rows[0].x = 1.f - rows[0].x; - - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * vectorSIMDf(2.f))) + (quat.xxxx() * (quat.yxwx() & mask1110) * vectorSIMDf(2.f, 2.f, -2.f, 0.f))) | (reinterpret_cast(rows[1]) & (mask0001)); - rows[1].y = 1.f - rows[1].y; - - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * vectorSIMDf(2.f))) + (quat.yyyy() * (quat.wzyx() & mask1110) * vectorSIMDf(-2.f, 2.f, 2.f, 0.f))) | (reinterpret_cast(rows[2]) & (mask0001)); - rows[2].z = 1.f - rows[2].z; - - return *this; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScaleRotationAndTranslation(const vectorSIMDf& _scale, const core::quaternion& _quat, const vectorSIMDf& _translation) -{ - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const vectorSIMDf& quat = reinterpret_cast(_quat); - const vectorSIMDf dblScale = (_scale * 2.f) & mask1110; - - vectorSIMDf mlt = dblScale ^ BUILD_XORMASKF(0, 1, 0, 0); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * dblScale)) + (quat.zzzz() * (quat.zwxx() & mask1110) * mlt)); - rows[0].x = _scale.x - rows[0].x; - - mlt = dblScale ^ BUILD_XORMASKF(0, 0, 1, 0); - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * dblScale)) + (quat.xxxx() * (quat.yxwx() & mask1110) * mlt)); - rows[1].y = _scale.y - rows[1].y; - - mlt = dblScale ^ BUILD_XORMASKF(1, 0, 0, 0); - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * dblScale)) + (quat.yyyy() * (quat.wzyx() & mask1110) * mlt)); - rows[2].z = _scale.z - rows[2].z; - - setTranslation(_translation); - - return *this; -} - - -inline bool matrix3x4SIMD::getInverse(matrix3x4SIMD& _out) const //! SUBOPTIMAL - OPTIMIZE! -{ - auto translation = getTranslation(); - // `tmp` will have columns in its `rows` - core::matrix4SIMD tmp; - auto* cols = tmp.rows; - if (!getSub3x3InverseTranspose(reinterpret_cast(tmp))) - return false; - - // find inverse post-translation - cols[3] = -cols[0]*translation.xxxx()-cols[1]*translation.yyyy()-cols[2]*translation.zzzz(); - - // columns into rows - _out = transpose(tmp).extractSub3x4(); - - return true; -} - -inline bool matrix3x4SIMD::getSub3x3InverseTranspose(core::matrix3x4SIMD& _out) const -{ - vectorSIMDf r1crossr2; - const vectorSIMDf d = determinant_helper(r1crossr2); - if (core::iszero(d.x, FLT_MIN)) - return false; - auto rcp = core::reciprocal(d); - - // matrix of cofactors * 1/det - _out = getSub3x3TransposeCofactors(); - _out.rows[0] *= rcp; - _out.rows[1] *= rcp; - _out.rows[2] *= rcp; - - return true; -} - -inline core::matrix3x4SIMD matrix3x4SIMD::getSub3x3TransposeCofactors() const -{ - core::matrix3x4SIMD _out; - _out.rows[0] = core::cross(rows[1], rows[2]); - _out.rows[1] = core::cross(rows[2], rows[0]); - _out.rows[2] = core::cross(rows[0], rows[1]); - return _out; -} - -// TODO: Double check this!- -inline void matrix3x4SIMD::setTransformationCenter(const core::vectorSIMDf& _center, const core::vectorSIMDf& _translation) -{ - core::vectorSIMDf r0 = rows[0] * _center; - core::vectorSIMDf r1 = rows[1] * _center; - core::vectorSIMDf r2 = rows[2] * _center; - core::vectorSIMDf r3(0.f, 0.f, 0.f, 1.f); - - __m128 col3 = _mm_hadd_ps(_mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), _mm_hadd_ps(r2.getAsRegister(), r3.getAsRegister())); - const vectorSIMDf vcol3 = _center - _translation - col3; - - for (size_t i = 0u; i < VectorCount; ++i) - rows[i].w = vcol3.pointer[i]; -} - - -// TODO: Double check this! -inline matrix3x4SIMD matrix3x4SIMD::buildAxisAlignedBillboard( - const core::vectorSIMDf& camPos, - const core::vectorSIMDf& center, - const core::vectorSIMDf& translation, - const core::vectorSIMDf& axis, - const core::vectorSIMDf& from) -{ - // axis of rotation - const core::vectorSIMDf up = core::normalize(axis); - const core::vectorSIMDf forward = core::normalize(camPos - center); - const core::vectorSIMDf right = core::normalize(core::cross(up, forward)); - - // correct look vector - const core::vectorSIMDf look = core::cross(right, up); - - // rotate from to - // axis multiplication by sin - const core::vectorSIMDf vs = core::cross(look, from); - - // cosinus angle - const core::vectorSIMDf ca = core::cross(from, look); - - const core::vectorSIMDf vt(up * (core::vectorSIMDf(1.f) - ca)); - const core::vectorSIMDf wt = vt * up.yzxx(); - const core::vectorSIMDf vtuppca = vt * up + ca; - - matrix3x4SIMD mat; - core::vectorSIMDf& row0 = mat.rows[0]; - core::vectorSIMDf& row1 = mat.rows[1]; - core::vectorSIMDf& row2 = mat.rows[2]; - - row0 = vtuppca & BUILD_MASKF(1, 0, 0, 0); - row1 = vtuppca & BUILD_MASKF(0, 1, 0, 0); - row2 = vtuppca & BUILD_MASKF(0, 0, 1, 0); - - row0 += (wt.xxzx() + vs.xzyx() * core::vectorSIMDf(1.f, 1.f, -1.f, 1.f)) & BUILD_MASKF(0, 1, 1, 0); - row1 += (wt.xxyx() + vs.zxxx() * core::vectorSIMDf(-1.f, 1.f, 1.f, 1.f)) & BUILD_MASKF(1, 0, 1, 0); - row2 += (wt.zyxx() + vs.yxxx() * core::vectorSIMDf(1.f, -1.f, 1.f, 1.f)) & BUILD_MASKF(1, 1, 0, 0); - - mat.setTransformationCenter(center, translation); - return mat; -} - - - -inline vectorSIMDf matrix3x4SIMD::doJob(const __m128& a, const matrix3x4SIMD& _mtx) -{ - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - - const __m128i mask = _mm_setr_epi32(0, 0, 0, 0xffffffff); - - vectorSIMDf res; - res = _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(0)), r0); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(1)), r1); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(2)), r2); - res += vectorSIMDf(a) & mask; // always 0 0 0 a3 -- no shuffle needed - return res; - } - -inline __m128d matrix3x4SIMD::halfRowAsDouble(size_t _n, bool _0) const -{ - return _mm_cvtps_pd(_0 ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix3x4SIMD::doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _xyHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _xyHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _xyHalf); - - const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - if (!_xyHalf) - res = _mm_add_pd(res, _mm_and_pd(_a1, mask01)); - return res; -} - -#undef BUILD_MASKF -#undef BUILD_XORMASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -} // nbl::core - -#endif diff --git a/include/matrix4SIMD.h b/include/matrix4SIMD.h deleted file mode 100644 index 03126c61f7..0000000000 --- a/include/matrix4SIMD.h +++ /dev/null @@ -1,385 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_H_INCLUDED__ - -#include "matrix3x4SIMD.h" - -namespace nbl -{ -namespace core -{ - -template -class aabbox3d; - - -class matrix4SIMD// : public AlignedBase<_NBL_SIMD_ALIGNMENT> don't inherit from AlignedBase (which is empty) because member `rows[4]` inherits from it as well -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 4u; - vectorSIMDf rows[VectorCount]; - - inline explicit matrix4SIMD(const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f), - const vectorSIMDf& _r3 = vectorSIMDf(0.f, 0.f, 0.f, 1.f)) - : rows{ _r0, _r1, _r2, _r3 } - { - } - - inline matrix4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23, - float _a30, float _a31, float _a32, float _a33) - : matrix4SIMD( vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23), - vectorSIMDf(_a30, _a31, _a32, _a33)) - { - } - - inline explicit matrix4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i); - } - inline matrix4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i, ALIGNED); - } - - inline explicit matrix4SIMD(const matrix3x4SIMD& smallMat) - { - *reinterpret_cast(this) = smallMat; - rows[3].set(0.f,0.f,0.f,1.f); - } - - inline matrix3x4SIMD extractSub3x4() const - { - return matrix3x4SIMD(rows[0],rows[1],rows[2]); - } - - //! Access by row - inline const vectorSIMDf& getRow(size_t _rown) const{ return rows[_rown]; } - inline vectorSIMDf& getRow(size_t _rown) { return rows[_rown]; } - - //! Access by element - inline float operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - inline float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - - //! Access for memory - inline const float* pointer() const {return rows[0].pointer;} - inline float* pointer() {return rows[0].pointer;} - - - inline bool operator==(const matrix4SIMD& _other) const - { - return !(*this != _other); - } - inline bool operator!=(const matrix4SIMD& _other) const; - - inline matrix4SIMD& operator+=(const matrix4SIMD& _other); - inline matrix4SIMD operator+(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r += _other; - } - - inline matrix4SIMD& operator-=(const matrix4SIMD& _other); - inline matrix4SIMD operator-(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r -= _other; - } - - inline matrix4SIMD& operator*=(float _scalar); - inline matrix4SIMD operator*(float _scalar) const - { - matrix4SIMD r{*this}; - return r *= _scalar; - } - - static inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b); - static inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b); - - inline bool isIdentity() const - { - return *this == matrix4SIMD(); - } - inline bool isIdentity(float _tolerance) const; - - inline bool isOrthogonal() const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(); - } - inline bool isOrthogonal(float _tolerance) const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(_tolerance); - } - - inline matrix4SIMD& setScale(const core::vectorSIMDf& _scale); - inline matrix4SIMD& setScale(float _scale) - { - return setScale(vectorSIMDf(_scale)); - } - - inline void setTranslation(const float* _t) - { - for (size_t i = 0u; i < 3u; ++i) - rows[i].w = _t[i]; - } - //! Takes into account only x,y,z components of _t - inline void setTranslation(const vectorSIMDf& _t) - { - setTranslation(_t.pointer); - } - inline void setTranslation(const vector3d& _t) - { - setTranslation(&_t.X); - } - - //! Returns last column of the matrix. - inline vectorSIMDf getTranslation() const; - - //! Returns translation part of the matrix (w component is always 0). - inline vectorSIMDf getTranslation3D() const; - - enum class E_MATRIX_INVERSE_PRECISION - { - EMIP_FAST_RECIPROCAL, - EMIP_32BIT, - EMIP_64BBIT - }; - - template - inline bool getInverseTransform(matrix4SIMD& _out) const - { - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_64BBIT) - { - double a = rows[0][0], b = rows[0][1], c = rows[0][2], d = rows[0][3]; - double e = rows[1][0], f = rows[1][1], g = rows[1][2], h = rows[1][3]; - double i = rows[2][0], j = rows[2][1], k = rows[2][2], l = rows[2][3]; - double m = rows[3][0], n = rows[3][1], o = rows[3][2], p = rows[3][3]; - - double kp_lo = k * p - l * o; - double jp_ln = j * p - l * n; - double jo_kn = j * o - k * n; - double ip_lm = i * p - l * m; - double io_km = i * o - k * m; - double in_jm = i * n - j * m; - - double a11 = +(f * kp_lo - g * jp_ln + h * jo_kn); - double a12 = -(e * kp_lo - g * ip_lm + h * io_km); - double a13 = +(e * jp_ln - f * ip_lm + h * in_jm); - double a14 = -(e * jo_kn - f * io_km + g * in_jm); - - double det = a * a11 + b * a12 + c * a13 + d * a14; - - if (core::iszero(det, DBL_MIN)) - return false; - - double invDet = 1.0 / det; - - _out.rows[0][0] = a11 * invDet; - _out.rows[1][0] = a12 * invDet; - _out.rows[2][0] = a13 * invDet; - _out.rows[3][0] = a14 * invDet; - - _out.rows[0][1] = -(b * kp_lo - c * jp_ln + d * jo_kn) * invDet; - _out.rows[1][1] = +(a * kp_lo - c * ip_lm + d * io_km) * invDet; - _out.rows[2][1] = -(a * jp_ln - b * ip_lm + d * in_jm) * invDet; - _out.rows[3][1] = +(a * jo_kn - b * io_km + c * in_jm) * invDet; - - double gp_ho = g * p - h * o; - double fp_hn = f * p - h * n; - double fo_gn = f * o - g * n; - double ep_hm = e * p - h * m; - double eo_gm = e * o - g * m; - double en_fm = e * n - f * m; - - _out.rows[0][2] = +(b * gp_ho - c * fp_hn + d * fo_gn) * invDet; - _out.rows[1][2] = -(a * gp_ho - c * ep_hm + d * eo_gm) * invDet; - _out.rows[2][2] = +(a * fp_hn - b * ep_hm + d * en_fm) * invDet; - _out.rows[3][2] = -(a * fo_gn - b * eo_gm + c * en_fm) * invDet; - - double gl_hk = g * l - h * k; - double fl_hj = f * l - h * j; - double fk_gj = f * k - g * j; - double el_hi = e * l - h * i; - double ek_gi = e * k - g * i; - double ej_fi = e * j - f * i; - - _out.rows[0][3] = -(b * gl_hk - c * fl_hj + d * fk_gj) * invDet; - _out.rows[1][3] = +(a * gl_hk - c * el_hi + d * ek_gi) * invDet; - _out.rows[2][3] = -(a * fl_hj - b * el_hi + d * ej_fi) * invDet; - _out.rows[3][3] = +(a * fk_gj - b * ek_gi + c * ej_fi) * invDet; - - return true; - } - else - { - auto mat2mul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.xwxw()+_A.yxwz()*_B.zyzy(); - }; - auto mat2adjmul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A.wwxx()*_B-_A.yyzz()*_B.zwxy(); - }; - auto mat2muladj = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.wxwx()-_A.yxwz()*_B.zyzy(); - }; - - vectorSIMDf A = _mm_movelh_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); - vectorSIMDf B = _mm_movehl_ps(rows[1].getAsRegister(), rows[0].getAsRegister()); - vectorSIMDf C = _mm_movelh_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); - vectorSIMDf D = _mm_movehl_ps(rows[3].getAsRegister(), rows[2].getAsRegister()); - - vectorSIMDf allDets = vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(2,0,2,0)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(3,1,3,1))) - - - vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(3,1,3,1)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(2,0,2,0))); - - auto detA = allDets.xxxx(); - auto detB = allDets.yyyy(); - auto detC = allDets.zzzz(); - auto detD = allDets.wwww(); - - // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html - auto D_C = mat2adjmul(D, C); - // A#B - auto A_B = mat2adjmul(A, B); - // X# = |D|A - B(D#C) - auto X_ = detD*A - mat2mul(B, D_C); - // W# = |A|D - C(A#B) - auto W_ = detA*D - mat2mul(C, A_B); - - // |M| = |A|*|D| + ... (continue later) - auto detM = detA*detD; - - // Y# = |B|C - D(A#B)# - auto Y_ = detB*C - mat2muladj(D, A_B); - // Z# = |C|B - A(D#C)# - auto Z_ = detC*B - mat2muladj(A, D_C); - - // |M| = |A|*|D| + |B|*|C| ... (continue later) - detM += detB*detC; - - // tr((A#B)(D#C)) - __m128 tr = (A_B*D_C.xzyw()).getAsRegister(); - tr = _mm_hadd_ps(tr, tr); - tr = _mm_hadd_ps(tr, tr); - // |M| = |A|*|D| + |B|*|C| - tr((A#B)(D#C) - detM -= tr; - - if (core::iszero(detM.x, FLT_MIN)) - return false; - - vectorSIMDf rDetM; - - // (1/|M|, -1/|M|, -1/|M|, 1/|M|) - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_FAST_RECIPROCAL) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f)*core::reciprocal(detM); - else if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_32BIT) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f).preciseDivision(detM); - - X_ *= rDetM; - Y_ *= rDetM; - Z_ *= rDetM; - W_ *= rDetM; - - // apply adjugate and store, here we combine adjugate shuffle and store shuffle - _out.rows[0] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[1] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - _out.rows[2] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[3] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - - return true; - } - } - - inline vectorSIMDf sub3x3TransformVect(const vectorSIMDf& _in) const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _vector) const - { - transformVect(_vector, _vector); - } - - inline void translateVect(vectorSIMDf& _vect) const - { - _vect += getTranslation(); - } - - bool isBoxInFrustum(const aabbox3d& bbox); - - bool perspectiveTransformVect(core::vectorSIMDf& inOutVec) - { - transformVect(inOutVec); - const bool inFront = inOutVec[3] > 0.f; - inOutVec /= inOutVec.wwww(); - return inFront; - } - - core::vector2di fragCoordTransformVect(const core::vectorSIMDf& _in, const core::dimension2du& viewportDimensions) - { - core::vectorSIMDf pos(_in); - pos.w = 1.f; - if (perspectiveTransformVect(pos)) - core::vector2di(-0x80000000, -0x80000000); - - pos[0] *= 0.5f; - pos[1] *= 0.5f; - pos[0] += 0.5f; - pos[1] += 0.5f; - - return core::vector2di(pos[0] * float(viewportDimensions.Width), pos[1] * float(viewportDimensions.Height)); - } - - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - - static inline matrix4SIMD buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - - //! Access by row - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - //! Access by row - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - //! TODO: implement a dvec<2> - inline __m128d halfRowAsDouble(size_t _n, bool _firstHalf) const; - static inline __m128d concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf); -}; - -inline matrix4SIMD operator*(float _scalar, const matrix4SIMD& _mtx) -{ - return _mtx * _scalar; -} - -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - - -}} // nbl::core - -#endif diff --git a/include/matrix4SIMD_impl.h b/include/matrix4SIMD_impl.h deleted file mode 100644 index 02484e7a4c..0000000000 --- a/include/matrix4SIMD_impl.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ - -#include "matrix4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" -#include "aabbox3d.h" - -namespace nbl -{ -namespace core -{ - - -inline bool matrix4SIMD::operator!=(const matrix4SIMD& _other) const -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix4SIMD& matrix4SIMD::operator+=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator-=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -inline bool matrix4SIMD::isIdentity(float _tolerance) const -{ - return core::equals(*this, matrix4SIMD(), core::ROUNDING_ERROR()); -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - auto calcRow = [](const __m128& _row, const matrix4SIMD& _mtx) - { - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - __m128 r3 = _mtx.rows[3].getAsRegister(); - - __m128 res; - res = _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(0)), r0); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(1)), r1)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(2)), r2)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(3)), r3)); - return res; - }; - - matrix4SIMD r; - for (size_t i = 0u; i < 4u; ++i) - r.rows[i] = calcRow(_a.rows[i].getAsRegister(), _b); - - return r; -} -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - matrix4SIMD out; - - __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - __m128 second; - - { - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - second = _mm_cvtpd_ps(concat64_helper(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - second = _mm_cvtpd_ps(concat64_helper(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - second = _mm_cvtpd_ps(concat64_helper(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r30 = _a.halfRowAsDouble(3u, true); - __m128d r31 = _a.halfRowAsDouble(3u, false); - second = _mm_cvtpd_ps(concat64_helper(r30, r31, _b, false)); - out.rows[3] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r30, r31, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - return out; -} - -inline matrix4SIMD& matrix4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const __m128i mask0001 = BUILD_MASKF(0, 0, 0, 1); - - rows[0] = (_scale & BUILD_MASKF(1, 0, 0, 0)) | _mm_castps_si128((rows[0] & mask0001).getAsRegister()); - rows[1] = (_scale & BUILD_MASKF(0, 1, 0, 0)) | _mm_castps_si128((rows[1] & mask0001).getAsRegister()); - rows[2] = (_scale & BUILD_MASKF(0, 0, 1, 0)) | _mm_castps_si128((rows[2] & mask0001).getAsRegister()); - rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return *this; -} - -//! Returns last column of the matrix. -inline vectorSIMDf matrix4SIMD::getTranslation() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); // (2z,3z,2w,3w) - __m128 col3 = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,3w) - - return col3; -} -//! Returns translation part of the matrix (w component is always 0). -inline vectorSIMDf matrix4SIMD::getTranslation3D() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 transl = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,0) - - return transl; -} - -inline vectorSIMDf matrix4SIMD::sub3x3TransformVect(const vectorSIMDf& _in) const -{ - matrix4SIMD cp{*this}; - vectorSIMDf out = _in & BUILD_MASKF(1, 1, 1, 0); - transformVect(out); - return out; -} - -inline void matrix4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r[4]; - for (size_t i = 0u; i < VectorCount; ++i) - r[i] = rows[i] * _in; - - _out = _mm_hadd_ps( - _mm_hadd_ps(r[0].getAsRegister(), r[1].getAsRegister()), - _mm_hadd_ps(r[2].getAsRegister(), r[3].getAsRegister()) - ); -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, -1.f, 0.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 1.f, 0.f); - - return m; -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, 1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} - - - -inline __m128d matrix4SIMD::halfRowAsDouble(size_t _n, bool _firstHalf) const -{ - return _mm_cvtps_pd(_firstHalf ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix4SIMD::concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _firstHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _firstHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _firstHalf); - __m128d r3 = _mtx.halfRowAsDouble(3u, _firstHalf); - - //const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3/*0b11*/), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 3/*0b11*/), r3)); - return res; -} - -#undef BUILD_MASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -inline bool matrix4SIMD::isBoxInFrustum(const aabbox3d& bbox) -{ - vectorSIMDf MinEdge, MaxEdge; - MinEdge.set(bbox.MinEdge); - MaxEdge.set(bbox.MaxEdge); - MinEdge.w = 1.f; - MaxEdge.w = 1.f; - - - auto getClosestDP = [&MinEdge,&MaxEdge](const vectorSIMDf& toDot) -> float - { - return dot(mix(MaxEdge,MinEdge,toDot +#endif + +namespace nbl +{ +namespace hlsl +{ +namespace glsl +{ + +template +inline T radians(NBL_CONST_REF_ARG(T) degrees) +{ + static_assert( + is_floating_point::value, + "This code expects the type to be either a double or a float." + ); + + return degrees * PI() / T(180); +} + +template +inline T degrees(NBL_CONST_REF_ARG(T) radians) +{ + static_assert( + is_floating_point::value, + "This code expects the type to be either a double or a float." + ); + + return radians * T(180) / PI(); +} + +template +inline bool equals(NBL_CONST_REF_ARG(T) a, NBL_CONST_REF_ARG(T) b, NBL_CONST_REF_ARG(T) tolerance) +{ + return (a + tolerance >= b) && (a - tolerance <= b); +} + +#ifndef __HLSL_VERSION + +NBL_FORCE_INLINE bool equals(const core::vector3df& a, const core::vector3df& b, const core::vector3df& tolerance) +{ + auto ha = a + tolerance; + auto la = a - tolerance; + return ha.X >= b.X && ha.Y >= b.Y && ha.Z >= b.Z && la.X <= b.X && la.Y <= b.Y && la.Z <= b.Z; +} + +#endif + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl new file mode 100644 index 0000000000..4979e35ef6 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -0,0 +1,74 @@ +// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" +// For conditions of distribution and use, see copyright notice in nabla.h +// See the original file in irrlicht source for authors + +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + + +//! Quaternion class for representing rotations. +/** It provides cheap combinations and avoids gimbal locks. +Also useful for interpolations. */ + +template +struct quaternion +{ + // i*data[0] + j*data[1] + k*data[2] + data[3] + vector data; + + //! creates identity quaternion + static inline quaternion create() + { + quaternion q; + q.data = vector(0.0f, 0.0f, 0.0f, 1.0f); + + return q; + } + + static inline quaternion create(T x, T y, T z, T w) + { + quaternion q; + q.data = vector(x, y, z, z); + + return q; + } + +#define DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(TYPE)\ + inline quaternion operator*(float scalar)\ + {\ + quaternion output;\ + output.data = data * scalar;\ + return output;\ + }\ + + DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(uint32_t) + DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(uint64_t) + DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(float32_t) + DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(float64_t) + +#undef DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR + + inline quaternion operator*(NBL_CONST_REF_ARG(quaternion) other) + { + return quaternion::create( + w * q.w - x * q.x - y * q.y - z * q.z, + w * q.x + x * q.w + y * q.z - z * q.y, + w * q.y - x * q.z + y * q.w + z * q.x, + w * q.z + x * q.y - y * q.x + z * q.w + ); + } +} + +} // end namespace core +} // nbl + +#endif + diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl new file mode 100644 index 0000000000..d00d9ce2c4 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl @@ -0,0 +1,25 @@ +// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" +// For conditions of distribution and use, see copyright notice in nabla.h +// See the original file in irrlicht source for authors + +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +namespace quaternion_impl +{ + +} + +} // end namespace core +} // nbl + +#endif + From cdf867d71095743f37c1198aad7cafd8aaa29082 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 08:38:52 +0200 Subject: [PATCH 03/27] add operator= for glm:: matrices corresponding to our at home matrices - we were never be able to do ourMatrixT = glmMatrixT and its important since a lot of utils create glmMatrixT actually --- include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index b1d33f097b..9a8ce9e51f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -28,6 +28,12 @@ struct matrix final : private glm::mat return *this; } + matrix& operator=(Base const& rhs) + { + Base::operator=(rhs); + return *this; + } + friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) + reinterpret_cast(rhs)); } friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) - reinterpret_cast(rhs)); } From 6ef4f89383e012e8309e52f2cd7ade7fddf26599 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 09:04:26 +0200 Subject: [PATCH 04/27] I'm stupid, I have explicit at home matrix constructor taking glm:: matrix xD remove the = operator glm -> to ours --- include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 9a8ce9e51f..cc89f9d003 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -27,13 +27,7 @@ struct matrix final : private glm::mat Base::operator=(rhs); return *this; } - - matrix& operator=(Base const& rhs) - { - Base::operator=(rhs); - return *this; - } - + friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) + reinterpret_cast(rhs)); } friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) - reinterpret_cast(rhs)); } From 22f505c8c7a976aa1b47e90e20bfca71fb2571ca Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 10:33:28 +0200 Subject: [PATCH 05/27] steal include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl from https://github.com/Devsh-Graphics-Programming/Nabla/pull/760 --- .../transformation_matrix_utils.hlsl | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl new file mode 100644 index 0000000000..d50bc16869 --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -0,0 +1,112 @@ +#ifndef _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +template +matrix getMatrix3x4As4x4(const matrix& mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); + + return output; +} + +// TODO: use portable_float when merged +//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) +template +inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) +{ + const matrix a4x4 = getMatrix3x4As4x4(a); + const matrix b4x4 = getMatrix3x4As4x4(b); + return matrix(mul(a4x4, b4x4)); +} + +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = core::normalize(target - position); + const vector xaxis = core::normalize(core::cross(upVector, zaxis)); + const vector yaxis = core::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -dot(xaxis, position)); + r[1] = vector(yaxis, -dot(yaxis, position)); + r[2] = vector(zaxis, -dot(zaxis, position)); + + return r; +} + +float32_t3x4 buildCameraLookAtMatrixRH( + const float32_t3& position, + const float32_t3& target, + const float32_t3& upVector) +{ + const float32_t3 zaxis = core::normalize(position - target); + const float32_t3 xaxis = core::normalize(core::cross(upVector, zaxis)); + const float32_t3 yaxis = core::cross(zaxis, xaxis); + + float32_t3x4 r; + r[0] = float32_t4(xaxis, -dot(xaxis, position)); + r[1] = float32_t4(yaxis, -dot(yaxis, position)); + r[2] = float32_t4(zaxis, -dot(zaxis, position)); + + return r; +} + +// TODO: test, check if there is better implementation +// TODO: move quaternion to nbl::hlsl +// TODO: why NBL_REF_ARG(MatType) doesn't work????? + +//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged +template +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +{ + static_assert(N == 3 || N == 4); + + outMat[0] = vector( + 1 - 2 * (quat.y * quat.y + quat.z * quat.z), + 2 * (quat.x * quat.y - quat.z * quat.w), + 2 * (quat.x * quat.z + quat.y * quat.w), + outMat[0][3] + ); + + outMat[1] = vector( + 2 * (quat.x * quat.y + quat.z * quat.w), + 1 - 2 * (quat.x * quat.x + quat.z * quat.z), + 2 * (quat.y * quat.z - quat.x * quat.w), + outMat[1][3] + ); + + outMat[2] = vector( + 2 * (quat.x * quat.z - quat.y * quat.w), + 2 * (quat.y * quat.z + quat.x * quat.w), + 1 - 2 * (quat.x * quat.x + quat.y * quat.y), + outMat[2][3] + ); +} + +template +inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector) translation) +{ + static_assert(N == 3 || N == 4); + + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + +} +} + +#endif \ No newline at end of file From 51f4cedff116b7777c6bcb922f30ed00e88c77b4 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 11:06:23 +0200 Subject: [PATCH 06/27] fix getMatrix3x4As4x4 (return type issues) & buildCameraLookAtMatrixRH (ambiguity dependent type issues), reference https://github.com/Devsh-Graphics-Programming/Nabla/pull/760 --- .../transformation_matrix_utils.hlsl | 31 ++++++++++--------- include/nbl/core/math/glslFunctions.h | 1 + 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index d50bc16869..3c5b2adcd0 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -9,7 +9,7 @@ namespace hlsl { template -matrix getMatrix3x4As4x4(const matrix& mat) +matrix getMatrix3x4As4x4(const matrix& mat) { matrix output; for (int i = 0; i < 3; ++i) @@ -24,8 +24,8 @@ matrix getMatrix3x4As4x4(const matrix& mat) template inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) { - const matrix a4x4 = getMatrix3x4As4x4(a); - const matrix b4x4 = getMatrix3x4As4x4(b); + const auto a4x4 = getMatrix3x4As4x4(a); + const auto b4x4 = getMatrix3x4As4x4(b); return matrix(mul(a4x4, b4x4)); } @@ -47,19 +47,20 @@ inline matrix buildCameraLookAtMatrixLH( return r; } -float32_t3x4 buildCameraLookAtMatrixRH( - const float32_t3& position, - const float32_t3& target, - const float32_t3& upVector) +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) { - const float32_t3 zaxis = core::normalize(position - target); - const float32_t3 xaxis = core::normalize(core::cross(upVector, zaxis)); - const float32_t3 yaxis = core::cross(zaxis, xaxis); - - float32_t3x4 r; - r[0] = float32_t4(xaxis, -dot(xaxis, position)); - r[1] = float32_t4(yaxis, -dot(yaxis, position)); - r[2] = float32_t4(zaxis, -dot(zaxis, position)); + const vector zaxis = core::normalize(position - target); + const vector xaxis = core::normalize(core::cross(upVector, zaxis)); + const vector yaxis = core::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -dot(xaxis, position)); + r[1] = vector(yaxis, -dot(yaxis, position)); + r[2] = vector(zaxis, -dot(zaxis, position)); return r; } diff --git a/include/nbl/core/math/glslFunctions.h b/include/nbl/core/math/glslFunctions.h index 2bd17cd642..3c9cc98850 100644 --- a/include/nbl/core/math/glslFunctions.h +++ b/include/nbl/core/math/glslFunctions.h @@ -362,6 +362,7 @@ NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vect template NBL_FORCE_INLINE T normalize(const T& v) { + // TODO: THIS CREATES AMGIGUITY WITH GLM:: NAMESPACE! auto d = dot(v, v); #ifdef __NBL_FAST_MATH return v * core::inversesqrt(d); From 1397756bb8e218b1f7a39cb151f545a84f71b390 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 25 Oct 2024 03:10:59 -0700 Subject: [PATCH 07/27] Saving work --- examples_tests | 2 +- include/nabla.h | 3 +- .../nbl/asset/utils/CQuantQuaternionCache.h | 2 +- .../nbl/builtin/hlsl/cpp_compat/intrinsics.h | 2 + .../nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 1 + .../hlsl/math/quaternion/quaternion.hlsl | 12 ++--- .../transformation_matrix_utils.hlsl | 35 ++++++++++--- include/nbl/core/declarations.h | 1 - include/nbl/core/definitions.h | 4 -- include/nbl/core/math/floatutil.tcc | 12 +---- include/nbl/core/math/glslFunctions.h | 22 ++++----- include/nbl/core/math/glslFunctions.tcc | 49 +++++++------------ include/nbl/core/math/matrixutil.h | 29 ----------- include/nbl/core/math/plane3dSIMD.h | 18 ++++--- 14 files changed, 80 insertions(+), 112 deletions(-) delete mode 100644 include/nbl/core/math/matrixutil.h diff --git a/examples_tests b/examples_tests index 6b45fecf8c..13df456b76 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6b45fecf8c64e8b11fef8c2b3a7e5618edaca68d +Subproject commit 13df456b76c4e38996502fc71d9878334fd5ea18 diff --git a/include/nabla.h b/include/nabla.h index dac3155e5e..c3981635ea 100644 --- a/include/nabla.h +++ b/include/nabla.h @@ -53,9 +53,8 @@ #include "vector3d.h" #include "vectorSIMD.h" #include "line3d.h" -#include "matrix4SIMD.h" #include "position2d.h" -#include "quaternion.h" +#include "nbl/builtin/hlsl/math/quaternion/quaternion.hlsl" #include "rect.h" #include "dimension2d.h" diff --git a/include/nbl/asset/utils/CQuantQuaternionCache.h b/include/nbl/asset/utils/CQuantQuaternionCache.h index 8e46dffb0a..a51549d24d 100644 --- a/include/nbl/asset/utils/CQuantQuaternionCache.h +++ b/include/nbl/asset/utils/CQuantQuaternionCache.h @@ -60,7 +60,7 @@ class CQuantQuaternionCache : public CDirQuantCacheBase - value_type_t quantize(const core::quaternion& quat) + value_type_t quantize(const hlsl::quaternion& quat) { return Base::quantize<4u,CacheFormat>(reinterpret_cast(quat)); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h index 3cc0bc7dab..a8f745fbae 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h @@ -4,6 +4,7 @@ #include #include +#include // this is a C++ only header, hence the `.h` extension, it only implements HLSL's built-in functions #ifndef __HLSL_VERSION @@ -62,6 +63,7 @@ NBL_BIT_OP_GLM_PASSTHROUGH(findMSB,findMSB) template inline matrix inverse(const matrix& m) { + static_assert(!(N == 3 && M == 4)); return reinterpret_cast&>(glm::inverse(reinterpret_cast::Base const&>(m))); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index b1d33f097b..455f8ef0c7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_MATRIX_INCLUDED_ #include +#include namespace nbl { diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl index 4979e35ef6..2991a79e35 100644 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -42,7 +42,7 @@ struct quaternion } #define DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(TYPE)\ - inline quaternion operator*(float scalar)\ + inline quaternion operator*(TYPE scalar)\ {\ quaternion output;\ output.data = data * scalar;\ @@ -59,13 +59,13 @@ struct quaternion inline quaternion operator*(NBL_CONST_REF_ARG(quaternion) other) { return quaternion::create( - w * q.w - x * q.x - y * q.y - z * q.z, - w * q.x + x * q.w + y * q.z - z * q.y, - w * q.y - x * q.z + y * q.w + z * q.x, - w * q.z + x * q.y - y * q.x + z * q.w + data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, + data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, + data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, + data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w ); } -} +}; } // end namespace core } // nbl diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index d50bc16869..9b3ce2f602 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -2,14 +2,33 @@ #define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #include +#include +// TODO: remove this header when deleting vectorSIMDf.hlsl +#include namespace nbl { namespace hlsl { +// TODO: this is temporary function, delete when removing vectorSIMD template -matrix getMatrix3x4As4x4(const matrix& mat) +core::vectorSIMDf transformVector(const matrix& mat, const core::vectorSIMDf& vec) +{ + core::vectorSIMDf output; + float32_t4 tmp; + for (int i = 0; i < 4; ++i) // rather do that that reinterpret_cast for safety + tmp[i] = output[i]; + + for (int i = 0; i < 4; ++i) + output[i] = dot(mat[i], tmp[i]); + + return output; +} + +// TODO: another idea is to create `getTransformationMatrixAs4x4` function, which will add 4th row to a 3x4 matrix and do nothing to 4x4 matrix, this way we will not have to deal with partial specialization later +template +matrix getMatrix3x4As4x4(matrix mat) { matrix output; for (int i = 0; i < 3; ++i) @@ -35,9 +54,9 @@ inline matrix buildCameraLookAtMatrixLH( const vector& target, const vector& upVector) { - const vector zaxis = core::normalize(target - position); - const vector xaxis = core::normalize(core::cross(upVector, zaxis)); - const vector yaxis = core::cross(zaxis, xaxis); + const vector zaxis = normalize(target - position); + const vector xaxis = normalize(cross(upVector, zaxis)); + const vector yaxis = cross(zaxis, xaxis); matrix r; r[0] = vector(xaxis, -dot(xaxis, position)); @@ -52,9 +71,9 @@ float32_t3x4 buildCameraLookAtMatrixRH( const float32_t3& target, const float32_t3& upVector) { - const float32_t3 zaxis = core::normalize(position - target); - const float32_t3 xaxis = core::normalize(core::cross(upVector, zaxis)); - const float32_t3 yaxis = core::cross(zaxis, xaxis); + const float32_t3 zaxis = normalize(position - target); + const float32_t3 xaxis = normalize(cross(upVector, zaxis)); + const float32_t3 yaxis = cross(zaxis, xaxis); float32_t3x4 r; r[0] = float32_t4(xaxis, -dot(xaxis, position)); @@ -70,7 +89,7 @@ float32_t3x4 buildCameraLookAtMatrixRH( //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged template -inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) { static_assert(N == 3 || N == 4); diff --git a/include/nbl/core/declarations.h b/include/nbl/core/declarations.h index 9aa708a793..466ea988aa 100644 --- a/include/nbl/core/declarations.h +++ b/include/nbl/core/declarations.h @@ -50,7 +50,6 @@ #include "nbl/core/math/colorutil.h" #include "nbl/core/math/rational.h" #include "nbl/core/math/plane3dSIMD.h" -#include "nbl/core/math/matrixutil.h" // memory #include "nbl/core/memory/memory.h" #include "nbl/core/memory/new_delete.h" diff --git a/include/nbl/core/definitions.h b/include/nbl/core/definitions.h index c08af6ad74..5913c2c8f2 100644 --- a/include/nbl/core/definitions.h +++ b/include/nbl/core/definitions.h @@ -15,8 +15,4 @@ #include "nbl/core/math/floatutil.tcc" #include "nbl/core/math/glslFunctions.tcc" -// implementations [deprecated] -#include "matrix3x4SIMD_impl.h" -#include "matrix4SIMD_impl.h" - #endif \ No newline at end of file diff --git a/include/nbl/core/math/floatutil.tcc b/include/nbl/core/math/floatutil.tcc index 71c8bd2da7..86e42c7e73 100644 --- a/include/nbl/core/math/floatutil.tcc +++ b/include/nbl/core/math/floatutil.tcc @@ -7,7 +7,7 @@ #include "nbl/core/math/floatutil.h" -#include "matrix4SIMD.h" +#include "vectorSIMD.h" namespace nbl { @@ -29,16 +29,6 @@ NBL_FORCE_INLINE vectorSIMDf ROUNDING_ERROR() { return vectorSIMDf(ROUNDING_ERROR()); } -template<> -NBL_FORCE_INLINE matrix3x4SIMD ROUNDING_ERROR() -{ - return matrix3x4SIMD(ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR()); -} -template<> -NBL_FORCE_INLINE matrix4SIMD ROUNDING_ERROR() -{ - return matrix4SIMD(ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR()); -} template NBL_FORCE_INLINE T ROUNDING_ERROR() { diff --git a/include/nbl/core/math/glslFunctions.h b/include/nbl/core/math/glslFunctions.h index 2bd17cd642..82402a0ae5 100644 --- a/include/nbl/core/math/glslFunctions.h +++ b/include/nbl/core/math/glslFunctions.h @@ -10,6 +10,7 @@ #include "nbl/type_traits.h" #include "nbl/core/math/floatutil.h" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" namespace nbl { @@ -21,8 +22,6 @@ class vectorSIMDBool; template class vectorSIMD_32; class vectorSIMDf; -class matrix4SIMD; -class matrix3x4SIMD; template @@ -317,7 +316,6 @@ NBL_FORCE_INLINE T lerp(const T& a, const T& b, const U& t) return core::mix(a,b,t); } - // TODO : step,smoothstep,isnan,isinf,floatBitsToInt,floatBitsToUint,intBitsToFloat,uintBitsToFloat,frexp,ldexp // extra note, GCC breaks isfinite, isinf, isnan, isnormal, signbit in -ffast-math so need to implement ourselves // TODO : packUnorm2x16, packSnorm2x16, packUnorm4x8, packSnorm4x8, unpackUnorm2x16, unpackSnorm2x16, unpackUnorm4x8, unpackSnorm4x8, packHalf2x16, unpackHalf2x16, packDouble2x32, unpackDouble2x32 @@ -331,7 +329,6 @@ NBL_FORCE_INLINE vectorSIMD_32 dot>(const vector template<> NBL_FORCE_INLINE vectorSIMD_32 dot>(const vectorSIMD_32& a, const vectorSIMD_32& b); - template NBL_FORCE_INLINE T lengthsquared(const T& v) { @@ -355,14 +352,22 @@ NBL_FORCE_INLINE T distancesquared(const T& a, const T& b) } template -NBL_FORCE_INLINE T cross(const T& a, const T& b); +NBL_FORCE_INLINE T cross(const T& a, const T& b) +{ + hlsl::float32_t3 output; + output.x = a[1] * b[2] - a[2] * b[1]; + output.y = a[2] * b[0] - a[0] * b[2]; + output.y = a[0] * b[1] - a[1] * b[0]; + + return output; +} template<> NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vectorSIMDf& b); template NBL_FORCE_INLINE T normalize(const T& v) { - auto d = dot(v, v); + auto d = core::dot(v, v); #ifdef __NBL_FAST_MATH return v * core::inversesqrt(d); #else @@ -373,11 +378,6 @@ NBL_FORCE_INLINE T normalize(const T& v) // TODO : matrixCompMult, outerProduct, inverse template NBL_FORCE_INLINE T transpose(const T& m); -template<> -NBL_FORCE_INLINE matrix4SIMD transpose(const matrix4SIMD& m); - - - // Extras diff --git a/include/nbl/core/math/glslFunctions.tcc b/include/nbl/core/math/glslFunctions.tcc index 205585965b..c25464b876 100644 --- a/include/nbl/core/math/glslFunctions.tcc +++ b/include/nbl/core/math/glslFunctions.tcc @@ -8,7 +8,6 @@ #include "nbl/core/declarations.h" #include "nbl/core/math/floatutil.tcc" -#include "matrix4SIMD.h" #include #include @@ -266,6 +265,23 @@ NBL_FORCE_INLINE vectorSIMDu32 dot(const vectorSIMDu32& a, const #endif } +// these functions will be removed with vectorSIMDf, would't worry about them +template<> +NBL_FORCE_INLINE hlsl::float32_t3 dot(const hlsl::float32_t3& a, const hlsl::float32_t3& b) +{ + hlsl::float32_t3 output = a * b; + output.x = output.x + output.y + output.z; + return output; +} + +template<> +NBL_FORCE_INLINE hlsl::float32_t4 dot(const hlsl::float32_t4& a, const hlsl::float32_t4& b) +{ + hlsl::float32_t4 output = a * b; + output.x = output.x + output.y + output.z + output.w; + return output; +} + template<> NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vectorSIMDf& b) { @@ -280,21 +296,6 @@ NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vect #endif } -template<> -NBL_FORCE_INLINE matrix4SIMD transpose(const matrix4SIMD& m) -{ - core::matrix4SIMD retval; - __m128 a0 = m.rows[0].getAsRegister(), a1 = m.rows[1].getAsRegister(), a2 = m.rows[2].getAsRegister(), a3 = m.rows[3].getAsRegister(); - _MM_TRANSPOSE4_PS(a0, a1, a2, a3); - retval.rows[0] = a0; - retval.rows[1] = a1; - retval.rows[2] = a2; - retval.rows[3] = a3; - return retval; -} - - - template<> NBL_FORCE_INLINE bool equals(const vectorSIMDf& a, const vectorSIMDf& b, const vectorSIMDf& tolerance) { @@ -307,22 +308,6 @@ NBL_FORCE_INLINE bool equals(const core::vector3df& a, const core::vector3df& b, auto la = a-tolerance; return ha.X>=b.X&&ha.Y>=b.Y&&ha.Z>=b.Z && la.X<=b.X&&la.Y<=b.Y&&la.Z<=b.Z; } -template<> -NBL_FORCE_INLINE bool equals(const matrix4SIMD& a, const matrix4SIMD& b, const matrix4SIMD& tolerance) -{ - for (size_t i = 0u; i(a.rows[i], b.rows[i], tolerance.rows[i])) - return false; - return true; -} -template<> -NBL_FORCE_INLINE bool equals(const matrix3x4SIMD& a, const matrix3x4SIMD& b, const matrix3x4SIMD& tolerance) -{ - for (size_t i = 0u; i(a.rows[i], b.rows[i], tolerance[i])) - return false; - return true; -} template NBL_FORCE_INLINE bool equals(const T& a, const T& b, const T& tolerance) { diff --git a/include/nbl/core/math/matrixutil.h b/include/nbl/core/math/matrixutil.h deleted file mode 100644 index afe7955c9b..0000000000 --- a/include/nbl/core/math/matrixutil.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX_UTIL_H_INCLUDED_ -#define _NBL_MATRIX_UTIL_H_INCLUDED_ - -#include "matrix4SIMD.h" -#include "matrix3x4SIMD.h" - -namespace nbl::core -{ - - -//! TODO: OPTIMIZE THIS, DON'T PROMOTE THE MATRIX IF DON'T HAVE TO -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix3x4SIMD& _b) -{ - return concatenateBFollowedByA(_a, matrix4SIMD(_b)); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix3x4SIMD& _b) -{ - return concatenateBFollowedByAPrecisely(_a, matrix4SIMD(_b)); -} -*/ - -} - -#endif diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index 891ed1300c..f6afafd8de 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -6,7 +6,7 @@ #ifndef __NBL_CORE_PLANE_3D_H_INCLUDED__ #define __NBL_CORE_PLANE_3D_H_INCLUDED__ -#include "matrix3x4SIMD.h" +#include namespace nbl { @@ -99,14 +99,20 @@ class plane3dSIMDf : private vectorSIMDf } //! - static inline plane3dSIMDf transform(const plane3dSIMDf& _in, const matrix3x4SIMD& _mat) + static inline plane3dSIMDf transform(const plane3dSIMDf& in, const hlsl::float32_t3x4& mat) { - matrix3x4SIMD inv; - _mat.getInverse(inv); + hlsl::float32_t3x4 a = mat; + hlsl::float32_t4x4 inv = hlsl::getMatrix3x4As4x4(a); + hlsl::inverse(inv); - vectorSIMDf normal(_in.getNormal()); + vectorSIMDf normal(in.getNormal()); // transform by inverse transpose - return plane3dSIMDf(inv.rows[0]*normal.xxxx()+inv.rows[1]*normal.yyyy()+inv.rows[2]*normal.zzzz()+(normal.wwww()&BUILD_MASKF(0,0,0,1))); + hlsl::float32_t4 planeEq = inv[0] * hlsl::float32_t4(normal.x) + inv[1] * hlsl::float32_t4(normal.y) + inv[2] * hlsl::float32_t4(normal.z) + (hlsl::float32_t4(0, 0, 0, normal.w)); + vectorSIMDf planeEqSIMD; + for (int i = 0; i < 4; ++i) + planeEqSIMD[i] = planeEq[i]; + + return plane3dSIMDf(planeEqSIMD); #undef BUILD_MASKF } From 7f2c0857be1555d591971ec84639576bcb895926 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 14:38:27 +0200 Subject: [PATCH 08/27] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c5f12f0075..038c5d7992 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c5f12f0075f34e5dca36079b09b75aef19f07681 +Subproject commit 038c5d799269a026ffa71e48f5963013632e4634 From f1daa257e1d2d097a77c986ef15065acb874332c Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 16:24:30 +0200 Subject: [PATCH 09/27] actually *this* addresses https://github.com/Devsh-Graphics-Programming/Nabla/pull/760/files#r1816728485 for https://github.com/Devsh-Graphics-Programming/Nabla/pull/760 PR, update examples_tests submodule --- examples_tests | 2 +- .../transformation_matrix_utils.hlsl | 26 ++++++++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/examples_tests b/examples_tests index 038c5d7992..0415999a70 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 038c5d799269a026ffa71e48f5963013632e4634 +Subproject commit 0415999a7035f56f3d9cca7255329916c697ab36 diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 3c5b2adcd0..b2534f9ac8 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -29,20 +29,22 @@ inline matrix concatenateBFollowedByA(const matrix& a, const m return matrix(mul(a4x4, b4x4)); } +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) + template inline matrix buildCameraLookAtMatrixLH( const vector& position, const vector& target, const vector& upVector) { - const vector zaxis = core::normalize(target - position); - const vector xaxis = core::normalize(core::cross(upVector, zaxis)); - const vector yaxis = core::cross(zaxis, xaxis); + const vector zaxis = glm::normalize(target - position); + const vector xaxis = glm::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); matrix r; - r[0] = vector(xaxis, -dot(xaxis, position)); - r[1] = vector(yaxis, -dot(yaxis, position)); - r[2] = vector(zaxis, -dot(zaxis, position)); + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); return r; } @@ -53,14 +55,14 @@ inline matrix buildCameraLookAtMatrixRH( const vector& target, const vector& upVector) { - const vector zaxis = core::normalize(position - target); - const vector xaxis = core::normalize(core::cross(upVector, zaxis)); - const vector yaxis = core::cross(zaxis, xaxis); + const vector zaxis = glm::normalize(position - target); + const vector xaxis = glm::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); matrix r; - r[0] = vector(xaxis, -dot(xaxis, position)); - r[1] = vector(yaxis, -dot(yaxis, position)); - r[2] = vector(zaxis, -dot(zaxis, position)); + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); return r; } From 2192e99e91d359bf7f8c544990d03fdef56a7b4f Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 25 Oct 2024 16:56:07 +0200 Subject: [PATCH 10/27] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0415999a70..84f35e5d62 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0415999a7035f56f3d9cca7255329916c697ab36 +Subproject commit 84f35e5d6259d0c814af72ccefcba0d00818668e From bc8b2a43265e03e90eb33c8df6ad89efcca530f1 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 26 Oct 2024 09:12:46 -0700 Subject: [PATCH 11/27] Saving work --- examples_tests | 2 +- include/nbl/asset/IAccelerationStructure.h | 9 +-- include/nbl/asset/IAnimationLibrary.h | 6 +- include/nbl/asset/ICPUSkeleton.h | 8 +-- include/nbl/asset/asset_utils.h | 2 +- include/nbl/asset/metadata/IMeshMetadata.h | 2 +- include/nbl/asset/utils/IMeshManipulator.h | 2 +- .../nbl/builtin/hlsl/glsl_compat/math.hlsl | 65 ------------------- .../hlsl/math/quaternion/quaternion.hlsl | 39 ++++++++++- .../transformation_matrix_utils.hlsl | 12 ++++ include/nbl/core/math/glslFunctions.h | 26 +++----- include/nbl/core/math/glslFunctions.tcc | 20 ------ include/nbl/core/math/intutil.h | 4 +- include/nbl/ext/DebugDraw/CDraw3DLine.h | 2 +- include/nbl/ext/MitsubaLoader/CElementShape.h | 2 +- include/nbl/scene/ILevelOfDetailLibrary.h | 6 +- include/vector3d.h | 6 +- src/nbl/asset/interchange/CGLTFLoader.cpp | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 6 +- src/nbl/asset/utils/CMeshManipulator.cpp | 17 ++--- 20 files changed, 95 insertions(+), 143 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/glsl_compat/math.hlsl diff --git a/examples_tests b/examples_tests index 13df456b76..d1c0e9f7e6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 13df456b76c4e38996502fc71d9878334fd5ea18 +Subproject commit d1c0e9f7e6bb05dbacd2318da86909d3a93e08f3 diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index 94e53238b2..9dc07b2675 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -10,6 +10,7 @@ #include #include "nbl/builtin/hlsl/cpp_compat/matrix.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" #include "nbl/asset/ECommonEnums.h" #include "nbl/asset/IDescriptor.h" @@ -181,7 +182,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure FORCE_OPACITY_MICROMAP_2_STATE_BIT = 0x1u<<4u, FORCE_DISABLE_OPACITY_MICROMAPS_BIT = 0x1u<<5u, }; - // Note: `core::matrix3x4SIMD` is equvalent to VkTransformMatrixKHR, 4x3 row_major matrix + // Note: `hlsl::float32_t3x4` is equvalent to VkTransformMatrixKHR, 4x3 row_major matrix template struct Instance final { @@ -197,18 +198,18 @@ class ITopLevelAccelerationStructure : public AccelerationStructure template struct StaticInstance final { - core::matrix3x4SIMD transform = core::matrix3x4SIMD(); + hlsl::float32_t3x4 transform = hlsl::IdentityFloat32_t3x4; Instance base = {}; }; template struct MatrixMotionInstance final { - core::matrix3x4SIMD transform[2] = {core::matrix3x4SIMD(),core::matrix3x4SIMD()}; + hlsl::float32_t3x4 transform[2] = { hlsl::IdentityFloat32_t3x4, hlsl::IdentityFloat32_t3x4 }; Instance base = {}; }; struct SRT { - // TODO: some operators to convert back and forth from `core::matrix3x4SIMD + // TODO: some operators to convert back and forth from `hlsl::float32_t3x4 float sx; float a; diff --git a/include/nbl/asset/IAnimationLibrary.h b/include/nbl/asset/IAnimationLibrary.h index 9665349103..d650cb25d9 100644 --- a/include/nbl/asset/IAnimationLibrary.h +++ b/include/nbl/asset/IAnimationLibrary.h @@ -34,7 +34,7 @@ class IAnimationLibrary : public virtual core::IReferenceCounted translation[2] = translation[1] = translation[0] = 0.f; quat = core::vectorSIMDu32(128u,128u,128u,255u); // should be (0,0,0,1) encoded } - Keyframe(const core::vectorSIMDf& _scale, const core::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) + Keyframe(const core::vectorSIMDf& _scale, const hlsl::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) { std::copy(_translation.pointer,_translation.pointer+3,translation); quat = quantCache->template quantize(_quat); @@ -42,13 +42,13 @@ class IAnimationLibrary : public virtual core::IReferenceCounted //scale = ; } - inline core::quaternion getRotation() const + inline hlsl::quaternion getRotation() const { const void* _pix[4] = {&quat,nullptr,nullptr,nullptr}; double out[4]; decodePixels(_pix,out,0u,0u); auto q = core::normalize(core::vectorSIMDf(out[0],out[1],out[2],out[3])); - return reinterpret_cast(&q)[0]; + return reinterpret_cast*>(&q)[0]; } inline core::vectorSIMDf getScale() const diff --git a/include/nbl/asset/ICPUSkeleton.h b/include/nbl/asset/ICPUSkeleton.h index 6f1c576ed8..53d7e66be0 100644 --- a/include/nbl/asset/ICPUSkeleton.h +++ b/include/nbl/asset/ICPUSkeleton.h @@ -42,15 +42,15 @@ class ICPUSkeleton final : public ISkeleton, public IAsset } //! - inline const core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) const + inline const hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) const { const uint8_t* ptr = reinterpret_cast(m_defaultTransforms.buffer->getPointer()); - return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; + return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; } - inline core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) + inline hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) { assert(isMutable()); - return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); + return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); } //! diff --git a/include/nbl/asset/asset_utils.h b/include/nbl/asset/asset_utils.h index 8e4e35a733..84c8a8df45 100644 --- a/include/nbl/asset/asset_utils.h +++ b/include/nbl/asset/asset_utils.h @@ -31,7 +31,7 @@ inline void fillBufferWithDeadBeef(ICPUBuffer* _buf) #include "nbl/nblpack.h" //! Designed for use with interface blocks declared with `layout (row_major, std140)` -// TODO: change members to core::matrix3x4SIMD and core::matrix4SIMD +// TODO: change members to hlsl::float32_t3x4 and hlsl::float32_t4x4 struct SBasicViewParameters { float MVP[4*4]; diff --git a/include/nbl/asset/metadata/IMeshMetadata.h b/include/nbl/asset/metadata/IMeshMetadata.h index 5ce3c12980..25f72e05c0 100644 --- a/include/nbl/asset/metadata/IMeshMetadata.h +++ b/include/nbl/asset/metadata/IMeshMetadata.h @@ -18,7 +18,7 @@ class IMeshMetadata : public core::Interface public: struct SInstance { - core::matrix3x4SIMD worldTform; + hlsl::float32_t3x4 worldTform; }; core::SRange m_instances; diff --git a/include/nbl/asset/utils/IMeshManipulator.h b/include/nbl/asset/utils/IMeshManipulator.h index dfdd44e475..27569e4f60 100644 --- a/include/nbl/asset/utils/IMeshManipulator.h +++ b/include/nbl/asset/utils/IMeshManipulator.h @@ -351,7 +351,7 @@ class NBL_API2 IMeshManipulator : public virtual core::IReferenceCounted static float DistanceToLine(core::vectorSIMDf P0, core::vectorSIMDf P1, core::vectorSIMDf InPoint); static float DistanceToPlane(core::vectorSIMDf InPoint, core::vectorSIMDf PlanePoint, core::vectorSIMDf PlaneNormal); - static core::matrix3x4SIMD calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer); + static hlsl::float32_t3x4 calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer); //! Calculates bounding box of the meshbuffer static inline core::aabbox3df calculateBoundingBox( diff --git a/include/nbl/builtin/hlsl/glsl_compat/math.hlsl b/include/nbl/builtin/hlsl/glsl_compat/math.hlsl deleted file mode 100644 index 22d6314d0d..0000000000 --- a/include/nbl/builtin/hlsl/glsl_compat/math.hlsl +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (C) 2023 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_GLSL_COMPAT_CORE_INCLUDED_ -#define _NBL_BUILTIN_HLSL_GLSL_COMPAT_CORE_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" -#include "nbl/builtin/hlsl/type_traits.hlsl" - -#ifndef __HLSL_VERSION -#include -#endif - -namespace nbl -{ -namespace hlsl -{ -namespace glsl -{ - -template -inline T radians(NBL_CONST_REF_ARG(T) degrees) -{ - static_assert( - is_floating_point::value, - "This code expects the type to be either a double or a float." - ); - - return degrees * PI() / T(180); -} - -template -inline T degrees(NBL_CONST_REF_ARG(T) radians) -{ - static_assert( - is_floating_point::value, - "This code expects the type to be either a double or a float." - ); - - return radians * T(180) / PI(); -} - -template -inline bool equals(NBL_CONST_REF_ARG(T) a, NBL_CONST_REF_ARG(T) b, NBL_CONST_REF_ARG(T) tolerance) -{ - return (a + tolerance >= b) && (a - tolerance <= b); -} - -#ifndef __HLSL_VERSION - -NBL_FORCE_INLINE bool equals(const core::vector3df& a, const core::vector3df& b, const core::vector3df& tolerance) -{ - auto ha = a + tolerance; - auto la = a - tolerance; - return ha.X >= b.X && ha.Y >= b.Y && ha.Z >= b.Z && la.X <= b.X && la.Y <= b.Y && la.Z <= b.Z; -} - -#endif - -} -} -} - -#endif diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl index 2991a79e35..2f24e52635 100644 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -13,7 +13,6 @@ namespace nbl namespace hlsl { - //! Quaternion class for representing rotations. /** It provides cheap combinations and avoids gimbal locks. Also useful for interpolations. */ @@ -22,6 +21,7 @@ template struct quaternion { // i*data[0] + j*data[1] + k*data[2] + data[3] + using vec_t = vector; vector data; //! creates identity quaternion @@ -36,11 +36,46 @@ struct quaternion static inline quaternion create(T x, T y, T z, T w) { quaternion q; - q.data = vector(x, y, z, z); + q.data = vector(x, y, z, w); return q; } + static inline quaternion create(NBL_CONST_REF_ARG(quaternion) other) + { + return other; + } + + static inline quaternion create(T pitch, T yaw, T roll) + { + float angle; + + angle = roll * 0.5f; + const float sr = sinf(angle); + const float cr = cosf(angle); + + angle = pitch * 0.5f; + const float sp = sinf(angle); + const float cp = cos(angle); + + angle = yaw * 0.5f; + const float sy = sinf(angle); + const float cy = cosf(angle); + + const float cpcy = cp * cy; + const float spcy = sp * cy; + const float cpsy = cp * sy; + const float spsy = sp * sy; + + quaternion output; + output.data = float32_t4(sr, cr, cr, cr) * float32_t4(cpcy, spcy, cpsy, cpcy) + float32_t4(-cr, sr, -sr, sr) * float32_t4(spsy, cpsy, spcy, spsy); + + return output; + } + + // TODO: + //explicit quaternion(NBL_CONST_REF_ARG(float32_t3x4) m) {} + #define DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(TYPE)\ inline quaternion operator*(TYPE scalar)\ {\ diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 9b3ce2f602..cb3391c16f 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -5,12 +5,15 @@ #include // TODO: remove this header when deleting vectorSIMDf.hlsl #include +#include "vectorSIMD.h" namespace nbl { namespace hlsl { +NBL_CONSTEXPR hlsl::float32_t3x4 IdentityFloat32_t3x4 = hlsl::float32_t3x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0)); + // TODO: this is temporary function, delete when removing vectorSIMD template core::vectorSIMDf transformVector(const matrix& mat, const core::vectorSIMDf& vec) @@ -125,6 +128,15 @@ inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector +inline void setTranslation_tmp(matrix& outMat, NBL_CONST_REF_ARG(core::vectorSIMDf) translation) +{ + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + } } diff --git a/include/nbl/core/math/glslFunctions.h b/include/nbl/core/math/glslFunctions.h index 82402a0ae5..7fa7385dad 100644 --- a/include/nbl/core/math/glslFunctions.h +++ b/include/nbl/core/math/glslFunctions.h @@ -122,17 +122,17 @@ NBL_FORCE_INLINE T mix(const T & a, const T & b, const U & t) } else { - if constexpr(nbl::is_any_of::value) + if constexpr(nbl::is_any_of::value) { for (uint32_t i=0u; i::value) + if constexpr(nbl::is_any_of::value) { - retval[i] = core::mix(a.rows[i], b.rows[i], t.rows[i]); + retval[i] = core::mix(a[i], b[i], t[i]); } else { - retval[i] = core::mix(a.rows[i], b.rows[i], t); + retval[i] = core::mix(a[i], b[i], t); } } @@ -316,6 +316,7 @@ NBL_FORCE_INLINE T lerp(const T& a, const T& b, const U& t) return core::mix(a,b,t); } + // TODO : step,smoothstep,isnan,isinf,floatBitsToInt,floatBitsToUint,intBitsToFloat,uintBitsToFloat,frexp,ldexp // extra note, GCC breaks isfinite, isinf, isnan, isnormal, signbit in -ffast-math so need to implement ourselves // TODO : packUnorm2x16, packSnorm2x16, packUnorm4x8, packSnorm4x8, unpackUnorm2x16, unpackSnorm2x16, unpackUnorm4x8, unpackSnorm4x8, packHalf2x16, unpackHalf2x16, packDouble2x32, unpackDouble2x32 @@ -329,6 +330,7 @@ NBL_FORCE_INLINE vectorSIMD_32 dot>(const vector template<> NBL_FORCE_INLINE vectorSIMD_32 dot>(const vectorSIMD_32& a, const vectorSIMD_32& b); + template NBL_FORCE_INLINE T lengthsquared(const T& v) { @@ -352,22 +354,14 @@ NBL_FORCE_INLINE T distancesquared(const T& a, const T& b) } template -NBL_FORCE_INLINE T cross(const T& a, const T& b) -{ - hlsl::float32_t3 output; - output.x = a[1] * b[2] - a[2] * b[1]; - output.y = a[2] * b[0] - a[0] * b[2]; - output.y = a[0] * b[1] - a[1] * b[0]; - - return output; -} +NBL_FORCE_INLINE T cross(const T& a, const T& b); template<> NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vectorSIMDf& b); template NBL_FORCE_INLINE T normalize(const T& v) { - auto d = core::dot(v, v); + auto d = dot(v, v); #ifdef __NBL_FAST_MATH return v * core::inversesqrt(d); #else @@ -424,10 +418,6 @@ template NBL_FORCE_INLINE bool equals(const T& a, const T& b, const T& tolerance); template<> NBL_FORCE_INLINE bool equals(const vectorSIMDf& a, const vectorSIMDf& b, const vectorSIMDf& tolerance); -template<> -NBL_FORCE_INLINE bool equals(const matrix4SIMD& a, const matrix4SIMD& b, const matrix4SIMD& tolerance); -template<> -NBL_FORCE_INLINE bool equals(const matrix3x4SIMD& a, const matrix3x4SIMD& b, const matrix3x4SIMD& tolerance); //! returns if a equals zero, taking rounding errors into account diff --git a/include/nbl/core/math/glslFunctions.tcc b/include/nbl/core/math/glslFunctions.tcc index c25464b876..d2343f0322 100644 --- a/include/nbl/core/math/glslFunctions.tcc +++ b/include/nbl/core/math/glslFunctions.tcc @@ -265,23 +265,6 @@ NBL_FORCE_INLINE vectorSIMDu32 dot(const vectorSIMDu32& a, const #endif } -// these functions will be removed with vectorSIMDf, would't worry about them -template<> -NBL_FORCE_INLINE hlsl::float32_t3 dot(const hlsl::float32_t3& a, const hlsl::float32_t3& b) -{ - hlsl::float32_t3 output = a * b; - output.x = output.x + output.y + output.z; - return output; -} - -template<> -NBL_FORCE_INLINE hlsl::float32_t4 dot(const hlsl::float32_t4& a, const hlsl::float32_t4& b) -{ - hlsl::float32_t4 output = a * b; - output.x = output.x + output.y + output.z + output.w; - return output; -} - template<> NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vectorSIMDf& b) { @@ -314,7 +297,6 @@ NBL_FORCE_INLINE bool equals(const T& a, const T& b, const T& tolerance) return (a + tolerance >= b) && (a - tolerance <= b); } - template<> NBL_FORCE_INLINE vectorSIMDf sin(const vectorSIMDf& a) { @@ -327,8 +309,6 @@ NBL_FORCE_INLINE T sin(const T& a) return std::sin(a); } - - // extras diff --git a/include/nbl/core/math/intutil.h b/include/nbl/core/math/intutil.h index d365835163..768883f2b0 100644 --- a/include/nbl/core/math/intutil.h +++ b/include/nbl/core/math/intutil.h @@ -3,8 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h // TODO: kill this file -#ifndef __NBL_MATH_H_INCLUDED__ -#define __NBL_MATH_H_INCLUDED__ +#ifndef __NBL_INTUTIL_H_INCLUDED__ +#define __NBL_INTUTIL_H_INCLUDED__ #include "BuildConfigOptions.h" diff --git a/include/nbl/ext/DebugDraw/CDraw3DLine.h b/include/nbl/ext/DebugDraw/CDraw3DLine.h index 68cd64e9c1..2437ce4bc5 100644 --- a/include/nbl/ext/DebugDraw/CDraw3DLine.h +++ b/include/nbl/ext/DebugDraw/CDraw3DLine.h @@ -91,7 +91,7 @@ class CDraw3DLine : public core::IReferenceCounted */ void recordToCommandBuffer(video::IGPUCommandBuffer* cmdBuffer, video::IGPUGraphicsPipeline* graphics_pipeline); - inline void addBox(const core::aabbox3df& box, float r, float g, float b, float a, const core::matrix3x4SIMD& tform=core::matrix3x4SIMD()) + inline void addBox(const core::aabbox3df& box, float r, float g, float b, float a, const hlsl::float32_t3x4& tform=hlsl::float32_t3x4()) { auto addLine = [&](auto s, auto e) -> void { diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 205023afea..c1725963b2 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -225,7 +225,7 @@ class CElementShape : public IElement std::string getLogName() const override { return "shape"; } - inline core::matrix3x4SIMD getAbsoluteTransform() const + inline hlsl::float32_t3x4 getAbsoluteTransform() const { auto local = transform.matrix.extractSub3x4(); // TODO restore at some point (and make it actually work??) diff --git a/include/nbl/scene/ILevelOfDetailLibrary.h b/include/nbl/scene/ILevelOfDetailLibrary.h index 12fbf9dabf..6b68189e7d 100644 --- a/include/nbl/scene/ILevelOfDetailLibrary.h +++ b/include/nbl/scene/ILevelOfDetailLibrary.h @@ -26,11 +26,11 @@ class ILevelOfDetailLibrary : public virtual core::IReferenceCounted return distanceSqAtReferenceFoV(); - return abs(proj.rows[0].x*proj.rows[1].y-proj.rows[0].y*proj.rows[1].x)/dot(proj.rows[3],proj.rows[3]).x; + return abs(proj[0].x*proj[1].y-proj[0].y*proj[1].x)/core::dot(proj[3],proj[3]).x; } }; template class container=core::vector> diff --git a/include/vector3d.h b/include/vector3d.h index 327eaacdb2..e5a4905da3 100644 --- a/include/vector3d.h +++ b/include/vector3d.h @@ -6,7 +6,7 @@ #ifndef __NBL_POINT_3D_H_INCLUDED__ #define __NBL_POINT_3D_H_INCLUDED__ -#include "nbl/core/math/glslFunctions.h" +#include namespace nbl { @@ -63,7 +63,7 @@ namespace core //! use weak float compare bool operator==(const vector3d& other) const { - return core::equals >(*this,other,vector3d(core::ROUNDING_ERROR())); + return core::equals >(*this, other, vector3d(core::ROUNDING_ERROR())); } bool operator!=(const vector3d& other) const @@ -72,7 +72,6 @@ namespace core } // functions - vector3d& set(const T nx, const T ny, const T nz) {X=nx; Y=ny; Z=nz; return *this;} vector3d& set(const vector3d& p) {X=p.X; Y=p.Y; Z=p.Z;return *this;} @@ -275,7 +274,6 @@ namespace core template vector3d operator*(const S scalar, const vector3d& vector) { return vector*scalar; } - } // end namespace core } // end namespace nbl diff --git a/src/nbl/asset/interchange/CGLTFLoader.cpp b/src/nbl/asset/interchange/CGLTFLoader.cpp index 7382bd2f08..63fd42c552 100644 --- a/src/nbl/asset/interchange/CGLTFLoader.cpp +++ b/src/nbl/asset/interchange/CGLTFLoader.cpp @@ -2398,7 +2398,7 @@ using namespace nbl::asset; { core::vector3df_SIMD translation = {}; //!< The node's translation along the x, y, and z axes. core::vector3df_SIMD scale = core::vector3df_SIMD(1.f,1.f,1.f); //!< The node's non-uniform scale, given as the scaling factors along the x, y, and z axes. - core::quaternion rotation = {}; //!< The node's unit quaternion rotation in the order (x, y, z, w), where w is the scalar. + hlsl::quaternion rotation = {}; //!< The node's unit quaternion rotation in the order (x, y, z, w), where w is the scalar. } trs; if (translation.error() != simdjson::error_code::NO_SUCH_FIELD) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 50898be56f..c740b85d97 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -680,7 +680,7 @@ CGeometryCreator::return_type CGeometryCreator::createDiskMesh(float radius, uin DiskVertex* ptr = (DiskVertex*)vertices->getPointer(); const core::vectorSIMDf v0(0.0f, radius, 0.0f, 1.0f); - core::matrix3x4SIMD rotation; + hlsl::float32_t3x4 rotation; //center ptr[0] = DiskVertex(core::vector3df_SIMD(0.0f), video::SColor(0xFFFFFFFFu), @@ -697,8 +697,8 @@ CGeometryCreator::return_type CGeometryCreator::createDiskMesh(float radius, uin for (int i = 2; i < vertexCount-1; i++) { core::vectorSIMDf vn; - core::matrix3x4SIMD rotMatrix; - rotMatrix.setRotation(core::quaternion(0.0f, 0.0f, core::radians((i-1)*angle))); + hlsl::float32_t3x4 rotMatrix; + rotMatrix.setRotation(hlsl::quaternion(0.0f, 0.0f, core::radians((i-1)*angle))); rotMatrix.transformVect(vn, v0); ptr[i] = DiskVertex(vn, video::SColor(0xFFFFFFFFu), diff --git a/src/nbl/asset/utils/CMeshManipulator.cpp b/src/nbl/asset/utils/CMeshManipulator.cpp index 8996972a98..86ead8d302 100644 --- a/src/nbl/asset/utils/CMeshManipulator.cpp +++ b/src/nbl/asset/utils/CMeshManipulator.cpp @@ -11,6 +11,7 @@ #include "nbl/asset/asset.h" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" #include "nbl/asset/IRenderpassIndependentPipeline.h" #include "nbl/asset/utils/CMeshManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" @@ -1526,7 +1527,7 @@ float IMeshManipulator::DistanceToPlane(core::vectorSIMDf InPoint, core::vectorS return (core::dot(PointToPlane, PlaneNormal).x >= 0) ? core::abs(core::dot(PointToPlane, PlaneNormal).x) : 0; } -core::matrix3x4SIMD IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer) +hlsl::float32_t3x4 IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer) { auto FindMinMaxProj = [&](const core::vectorSIMDf& Dir, const core::vectorSIMDf Extrema[]) -> core::vectorSIMDf { @@ -1682,7 +1683,7 @@ core::matrix3x4SIMD IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuf ComputeAxis(P2, P1, Q1, BestAxis, BestQuality, Extrema); ComputeAxis(Q1, P2, P1, BestAxis, BestQuality, Extrema); - core::matrix3x4SIMD TransMat = core::matrix3x4SIMD( + hlsl::float32_t3x4 TransMat = hlsl::float32_t3x4( BestAxis[0].x, BestAxis[1].x, BestAxis[2].x, 0, BestAxis[0].y, BestAxis[1].y, BestAxis[2].y, 0, BestAxis[0].z, BestAxis[1].z, BestAxis[2].z, 0); @@ -1705,12 +1706,12 @@ core::matrix3x4SIMD IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuf core::vectorSIMDf ABBDiff = AABBMax - AABBMin; float ABBQuality = ABBDiff.x * ABBDiff.y + ABBDiff.y * ABBDiff.z + ABBDiff.z * ABBDiff.x; - core::matrix3x4SIMD scaleMat; - core::matrix3x4SIMD translationMat; - translationMat.setTranslation(-(MinPoint) / OBBDiff); - scaleMat.setScale(OBBDiff); - TransMat = core::concatenateBFollowedByA(TransMat, scaleMat); - TransMat = core::concatenateBFollowedByA(TransMat, translationMat); + hlsl::float32_t3x4 scaleMat; + hlsl::float32_t3x4 translationMat; + hlsl::setTranslation(translationMat, -(MinPoint) / OBBDiff); + hlsl::setScale(scaleMat, OBBDiff); + TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); + TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); if (ABBQuality < OBBQuality) { translationMat.setTranslation(-(AABBMin) / ABBDiff); scaleMat.setScale(ABBDiff); From 520b1f5d8702098505bfc111f481d07814158a97 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Mon, 28 Oct 2024 11:44:40 +0100 Subject: [PATCH 12/27] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 84f35e5d62..0574d725b2 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 84f35e5d6259d0c814af72ccefcba0d00818668e +Subproject commit 0574d725b2d0ae02a0d0e409e642a14981bc1e97 From fda286ad51311fcbe7430b6b460090727a93bdf2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 29 Oct 2024 10:04:39 -0700 Subject: [PATCH 13/27] Saving work --- examples_tests | 2 +- include/ICameraSceneNode.h | 8 +- include/nbl/asset/ICPUMeshBuffer.h | 8 +- include/nbl/asset/IMeshBuffer.h | 4 +- include/nbl/asset/ISkeleton.h | 2 +- .../IRenderpassIndependentPipelineMetadata.h | 30 +- include/nbl/asset/utils/IMeshManipulator.h | 6 +- .../nbl/builtin/glsl/math/quaternions.glsl | 5 - .../nbl/builtin/hlsl/cpp_compat/intrinsics.h | 2 - .../nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 1 - .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 3 +- .../transformation_matrix_utils.hlsl | 83 ++-- include/nbl/core/math/floatutil.h | 6 +- include/nbl/core/math/floatutil.tcc | 1 - include/nbl/core/math/glslFunctions.h | 9 +- include/nbl/core/math/glslFunctions.tcc | 1 - include/nbl/core/math/intutil.h | 2 +- include/nbl/core/math/plane3dSIMD.h | 2 + .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 10 +- include/nbl/ext/MitsubaLoader/SContext.h | 4 +- include/nbl/video/IGPUAccelerationStructure.h | 2 +- include/quaternion.h | 463 ------------------ include/vectorSIMD.h | 18 + src/nbl/asset/IAssetManager.cpp | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 5 +- src/nbl/asset/utils/CMeshManipulator.cpp | 17 +- 26 files changed, 125 insertions(+), 571 deletions(-) delete mode 100644 include/quaternion.h diff --git a/examples_tests b/examples_tests index d1c0e9f7e6..6cbd85caa3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d1c0e9f7e6bb05dbacd2318da86909d3a93e08f3 +Subproject commit 6cbd85caa3d646969d7724ca7f07b0b6a72e0957 diff --git a/include/ICameraSceneNode.h b/include/ICameraSceneNode.h index e3975e3802..014c063bf2 100644 --- a/include/ICameraSceneNode.h +++ b/include/ICameraSceneNode.h @@ -46,17 +46,17 @@ class ICameraSceneNode : public ISceneNode The function will figure it out if you've set an orthogonal matrix. \param projection The new projection matrix of the camera. */ - virtual void setProjectionMatrix(const core::matrix4SIMD& projection) =0; + virtual void setProjectionMatrix(const hlsl::float32_t4x4& projection) =0; //! Gets the current projection matrix of the camera. /** \return The current projection matrix of the camera. */ - inline const core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } + inline const hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } //! Gets the current view matrix of the camera. /** \return The current view matrix of the camera. */ - virtual const core::matrix3x4SIMD& getViewMatrix() const =0; + virtual const hlsl::float32_t3x4& getViewMatrix() const =0; - virtual const core::matrix4SIMD& getConcatenatedMatrix() const =0; + virtual const hlsl::float32_t4x4& getConcatenatedMatrix() const =0; #if 0 //! It is possible to send mouse and key events to the camera. /** Most cameras may ignore this input, but camera scene nodes diff --git a/include/nbl/asset/ICPUMeshBuffer.h b/include/nbl/asset/ICPUMeshBuffer.h index 4ecc0f35ba..edfae1be39 100644 --- a/include/nbl/asset/ICPUMeshBuffer.h +++ b/include/nbl/asset/ICPUMeshBuffer.h @@ -582,18 +582,18 @@ class ICPUMeshBuffer final : public IMeshBuffer(m_inverseBindPoseBufferBinding.buffer->getPointer()); - return reinterpret_cast(ptr+m_inverseBindPoseBufferBinding.offset); + return reinterpret_cast(ptr+m_inverseBindPoseBufferBinding.offset); } - inline core::matrix3x4SIMD* getInverseBindPoses() + inline hlsl::float32_t3x4* getInverseBindPoses() { assert(isMutable()); - return const_cast(const_cast(this)->getInverseBindPoses()); + return const_cast(const_cast(this)->getInverseBindPoses()); } //! diff --git a/include/nbl/asset/IMeshBuffer.h b/include/nbl/asset/IMeshBuffer.h index abf5d156dd..cbcfa9c8f6 100644 --- a/include/nbl/asset/IMeshBuffer.h +++ b/include/nbl/asset/IMeshBuffer.h @@ -211,7 +211,7 @@ class IMeshBuffer : public virtual core::IReferenceCounted virtual inline bool isSkinned() const { return jointCount>0u && maxJointsPerVx>0u && m_inverseBindPoseBufferBinding.buffer && - m_inverseBindPoseBufferBinding.offset+jointCount*sizeof(core::matrix3x4SIMD)<=m_inverseBindPoseBufferBinding.buffer->getSize(); + m_inverseBindPoseBufferBinding.offset+jointCount*sizeof(hlsl::float32_t3x4)<=m_inverseBindPoseBufferBinding.buffer->getSize(); } //! @@ -228,7 +228,7 @@ class IMeshBuffer : public virtual core::IReferenceCounted if (_maxJointsPerVx==0u || _maxJointsPerVx>4u) return false; - if (_inverseBindPoseBufferBinding.offset+_jointCount*sizeof(core::matrix3x4SIMD)>_inverseBindPoseBufferBinding.buffer->getSize()) + if (_inverseBindPoseBufferBinding.offset+_jointCount*sizeof(hlsl::float32_t3x4)>_inverseBindPoseBufferBinding.buffer->getSize()) return false; m_inverseBindPoseBufferBinding = std::move(_inverseBindPoseBufferBinding); diff --git a/include/nbl/asset/ISkeleton.h b/include/nbl/asset/ISkeleton.h index 7960ca4eef..03ba3af4ea 100644 --- a/include/nbl/asset/ISkeleton.h +++ b/include/nbl/asset/ISkeleton.h @@ -62,7 +62,7 @@ class ISkeleton : public virtual core::IReferenceCounted return; assert(m_parentJointIDs.buffer->getSize()>=m_parentJointIDs.offset+sizeof(joint_id_t)*m_jointCount); - assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(core::matrix3x4SIMD)*m_jointCount); + assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(hlsl::float32_t3x4)*m_jointCount); } virtual ~ISkeleton() { diff --git a/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h b/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h index 416c04823b..7d0b63a141 100644 --- a/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h +++ b/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h @@ -140,35 +140,35 @@ class IRenderpassIndependentPipelineMetadata : public core::Interface //! A non exhaustive list of commonly used shader input semantics enum E_COMMON_SHADER_INPUT { - //! core::matrix4SIMD giving the total projection onto the screen from model-space coordinates + //! hlsl::float32_t4x4 giving the total projection onto the screen from model-space coordinates ECSI_WORLD_VIEW_PROJ, - //! core::matrix4SIMD giving the mapping from view-space into the pre-divide NDC space + //! hlsl::float32_t4x4 giving the mapping from view-space into the pre-divide NDC space ECSI_PROJ, - //! core::matrix3x4SIMD giving the view-space transformation from model-space coordinates + //! hlsl::float32_t3x4 giving the view-space transformation from model-space coordinates ECSI_WORLD_VIEW, - //! core::matrix3x4SIMD giving the view-space transformation from world-space + //! hlsl::float32_t3x4 giving the view-space transformation from world-space ECSI_VIEW, - //! core::matrix3x4SIMD giving the world-space transformation from model-space (last column is object world-space-position) + //! hlsl::float32_t3x4 giving the world-space transformation from model-space (last column is object world-space-position) ECSI_WORLD, - //! core::matrix4SIMD giving the total projection to model-space coordinates from screen-space + //! hlsl::float32_t4x4 giving the total projection to model-space coordinates from screen-space ECSI_WORLD_VIEW_PROJ_INVERSE, - //! core::matrix4SIMD giving the mapping from the pre-divide NDC space into view-space + //! hlsl::float32_t4x4 giving the mapping from the pre-divide NDC space into view-space ECSI_PROJ_INVERSE, - //! core::matrix3x4SIMD giving the model-space transformation from view-space coordinates + //! hlsl::float32_t3x4 giving the model-space transformation from view-space coordinates ECSI_WORLD_VIEW_INVERSE, - //! core::matrix3x4SIMD giving the world-space transformation from view-space (last column is camera world-space-position) + //! hlsl::float32_t3x4 giving the world-space transformation from view-space (last column is camera world-space-position) ECSI_VIEW_INVERSE, - //! core::matrix3x4SIMD giving the model-space transformation from world-space + //! hlsl::float32_t3x4 giving the model-space transformation from world-space ECSI_WORLD_INVERSE, - //! transpose of core::matrix4SIMD giving the total projection to model-space coordinates from screen-space + //! transpose of hlsl::float32_t4x4 giving the total projection to model-space coordinates from screen-space ECSI_WORLD_VIEW_PROJ_INVERSE_TRANSPOSE, - //! transpose of core::matrix4SIMD giving the mapping from the pre-divide NDC space into view-space + //! transpose of hlsl::float32_t4x4 giving the mapping from the pre-divide NDC space into view-space ECSI_PROJ_INVERSE_TRANSPOSE, - //! transpose of core::matrix3x4SIMD giving the model-space transformation from view-space coordinates (upper 3x3 matrix can be used instead of `gl_NormalMatrix`) + //! transpose of hlsl::float32_t3x4 giving the model-space transformation from view-space coordinates (upper 3x3 matrix can be used instead of `gl_NormalMatrix`) ECSI_WORLD_VIEW_INVERSE_TRANSPOSE, - //! transpose of core::matrix3x4SIMD giving the world-space transformation from view-space (last row is camera world-space-position) + //! transpose of hlsl::float32_t3x4 giving the world-space transformation from view-space (last row is camera world-space-position) ECSI_VIEW_INVERSE_TRANSPOSE, - //! transpose of core::matrix3x4SIMD giving the model-space transformation from world-space (upper 3x3 matrix can transform model space normals to world space) + //! transpose of hlsl::float32_t3x4 giving the model-space transformation from world-space (upper 3x3 matrix can transform model space normals to world space) ECSI_WORLD_INVERSE_TRANSPOSE, //! a simple non-filtered environment map as a cubemap diff --git a/include/nbl/asset/utils/IMeshManipulator.h b/include/nbl/asset/utils/IMeshManipulator.h index 27569e4f60..0a5bf12313 100644 --- a/include/nbl/asset/utils/IMeshManipulator.h +++ b/include/nbl/asset/utils/IMeshManipulator.h @@ -18,6 +18,9 @@ #include "nbl/asset/utils/CQuantNormalCache.h" #include "nbl/asset/utils/CQuantQuaternionCache.h" +#include +#include + namespace nbl { namespace asset @@ -408,8 +411,7 @@ class NBL_API2 IMeshManipulator : public virtual core::IReferenceCounted if (jointIDFLT_MIN) { - core::vectorSIMDf boneSpacePos; - inverseBindPoses[jointID].transformVect(boneSpacePos,pos); + core::vectorSIMDf boneSpacePos = hlsl::transformVector(hlsl::getMatrix3x4As4x4(inverseBindPoses[jointID]), pos); jointAABBs[jointID].addInternalPoint(boneSpacePos.getAsVector3df()); noJointInfluence = false; } diff --git a/include/nbl/builtin/glsl/math/quaternions.glsl b/include/nbl/builtin/glsl/math/quaternions.glsl index 7dc6ca0279..d94d48ecc9 100644 --- a/include/nbl/builtin/glsl/math/quaternions.glsl +++ b/include/nbl/builtin/glsl/math/quaternions.glsl @@ -1,18 +1,13 @@ #ifndef _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_ #define _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_ - - #include - - struct nbl_glsl_quaternion_t { vec4 data; }; - nbl_glsl_quaternion_t nbl_glsl_quaternion_t_constructFromTruncated(in vec3 first3Components) { nbl_glsl_quaternion_t quat; diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h index a8f745fbae..4d10a669c3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h @@ -1,10 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_INTRINSICS_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_INTRINSICS_INCLUDED_ - #include #include -#include // this is a C++ only header, hence the `.h` extension, it only implements HLSL's built-in functions #ifndef __HLSL_VERSION diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 455f8ef0c7..b1d33f097b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -2,7 +2,6 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_MATRIX_INCLUDED_ #include -#include namespace nbl { diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index 0407cf95b6..c83b3893a6 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -2,7 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ // stuff for C++ -#ifndef __HLSL_VERSION +#ifndef __HLSL_VERSION #include #include @@ -90,5 +90,6 @@ struct blake3_hasher::update_impl,Dummy> }; } #endif + } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index cb3391c16f..8f687fb97a 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -1,5 +1,5 @@ -#ifndef _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #include #include @@ -16,7 +16,7 @@ NBL_CONSTEXPR hlsl::float32_t3x4 IdentityFloat32_t3x4 = hlsl::float32_t3x4(hlsl: // TODO: this is temporary function, delete when removing vectorSIMD template -core::vectorSIMDf transformVector(const matrix& mat, const core::vectorSIMDf& vec) +inline core::vectorSIMDf transformVector(const matrix& mat, const core::vectorSIMDf& vec) { core::vectorSIMDf output; float32_t4 tmp; @@ -24,14 +24,13 @@ core::vectorSIMDf transformVector(const matrix& mat, const core::vector tmp[i] = output[i]; for (int i = 0; i < 4; ++i) - output[i] = dot(mat[i], tmp[i]); + output[i] = hlsl::dot(mat[i], tmp); return output; } -// TODO: another idea is to create `getTransformationMatrixAs4x4` function, which will add 4th row to a 3x4 matrix and do nothing to 4x4 matrix, this way we will not have to deal with partial specialization later template -matrix getMatrix3x4As4x4(matrix mat) +inline matrix getMatrix3x4As4x4(matrix mat) { matrix output; for (int i = 0; i < 3; ++i) @@ -41,6 +40,16 @@ matrix getMatrix3x4As4x4(matrix mat) return output; } +template +inline matrix getAs64BitPrecisionMatrix(matrix mat) +{ + matrix output; + for (int i = 0; i < N; ++i) + output[i] = mat[i]; + + return output; +} + // TODO: use portable_float when merged //! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) template @@ -62,26 +71,27 @@ inline matrix buildCameraLookAtMatrixLH( const vector yaxis = cross(zaxis, xaxis); matrix r; - r[0] = vector(xaxis, -dot(xaxis, position)); - r[1] = vector(yaxis, -dot(yaxis, position)); - r[2] = vector(zaxis, -dot(zaxis, position)); + r[0] = vector(xaxis, -dot(xaxis, position)); + r[1] = vector(yaxis, -dot(yaxis, position)); + r[2] = vector(zaxis, -dot(zaxis, position)); return r; } -float32_t3x4 buildCameraLookAtMatrixRH( - const float32_t3& position, - const float32_t3& target, - const float32_t3& upVector) +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) { - const float32_t3 zaxis = normalize(position - target); - const float32_t3 xaxis = normalize(cross(upVector, zaxis)); - const float32_t3 yaxis = cross(zaxis, xaxis); + const vector zaxis = normalize(position - target); + const vector xaxis = normalize(cross(upVector, zaxis)); + const vector yaxis = cross(zaxis, xaxis); float32_t3x4 r; - r[0] = float32_t4(xaxis, -dot(xaxis, position)); - r[1] = float32_t4(yaxis, -dot(yaxis, position)); - r[2] = float32_t4(zaxis, -dot(zaxis, position)); + r[0] = vector(xaxis, -dot(xaxis, position)); + r[1] = vector(yaxis, -dot(yaxis, position)); + r[2] = vector(zaxis, -dot(zaxis, position)); return r; } @@ -90,6 +100,14 @@ float32_t3x4 buildCameraLookAtMatrixRH( // TODO: move quaternion to nbl::hlsl // TODO: why NBL_REF_ARG(MatType) doesn't work????? +template +inline void setScale(matrix& outMat, NBL_CONST_REF_ARG(vector) scale) +{ + outMat[0][0] = scale[0]; + outMat[1][1] = scale[1]; + outMat[2][2] = scale[2]; +} + //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged template inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) @@ -97,23 +115,23 @@ inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(nbl::hlsl::qu static_assert(N == 3 || N == 4); outMat[0] = vector( - 1 - 2 * (quat.y * quat.y + quat.z * quat.z), - 2 * (quat.x * quat.y - quat.z * quat.w), - 2 * (quat.x * quat.z + quat.y * quat.w), + 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), + 2 * (quat.data.x * quat.data.y - quat.data.z * quat.data.w), + 2 * (quat.data.x * quat.data.z + quat.data.y * quat.data.w), outMat[0][3] ); outMat[1] = vector( - 2 * (quat.x * quat.y + quat.z * quat.w), - 1 - 2 * (quat.x * quat.x + quat.z * quat.z), - 2 * (quat.y * quat.z - quat.x * quat.w), + 2 * (quat.data.x * quat.data.y + quat.data.z * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.z * quat.data.z), + 2 * (quat.data.y * quat.data.z - quat.data.x * quat.data.w), outMat[1][3] ); outMat[2] = vector( - 2 * (quat.x * quat.z - quat.y * quat.w), - 2 * (quat.y * quat.z + quat.x * quat.w), - 1 - 2 * (quat.x * quat.x + quat.y * quat.y), + 2 * (quat.data.x * quat.data.z - quat.data.y * quat.data.w), + 2 * (quat.data.y * quat.data.z + quat.data.x * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.y * quat.data.y), outMat[2][3] ); } @@ -128,15 +146,6 @@ inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector -inline void setTranslation_tmp(matrix& outMat, NBL_CONST_REF_ARG(core::vectorSIMDf) translation) -{ - outMat[0].w = translation.x; - outMat[1].w = translation.y; - outMat[2].w = translation.z; -} - } } diff --git a/include/nbl/core/math/floatutil.h b/include/nbl/core/math/floatutil.h index b37b2cc463..9f310ba147 100644 --- a/include/nbl/core/math/floatutil.h +++ b/include/nbl/core/math/floatutil.h @@ -27,8 +27,6 @@ namespace nbl::core { class vectorSIMDf; -class matrix3x4SIMD; -class matrix4SIMD; //! Rounding error constant often used when comparing values. (TODO: remove) template @@ -40,9 +38,9 @@ NBL_FORCE_INLINE double ROUNDING_ERROR(); template<> NBL_FORCE_INLINE vectorSIMDf ROUNDING_ERROR(); template<> -NBL_FORCE_INLINE matrix3x4SIMD ROUNDING_ERROR(); +NBL_FORCE_INLINE hlsl::float32_t3x4 ROUNDING_ERROR(); template<> -NBL_FORCE_INLINE matrix4SIMD ROUNDING_ERROR(); +NBL_FORCE_INLINE hlsl::float32_t4x4 ROUNDING_ERROR(); #ifdef PI // make sure we don't collide with a define #undef PI diff --git a/include/nbl/core/math/floatutil.tcc b/include/nbl/core/math/floatutil.tcc index 86e42c7e73..e2b43a25bb 100644 --- a/include/nbl/core/math/floatutil.tcc +++ b/include/nbl/core/math/floatutil.tcc @@ -5,7 +5,6 @@ #ifndef __NBL_CORE_FLOAT_UTIL_TCC_INCLUDED__ #define __NBL_CORE_FLOAT_UTIL_TCC_INCLUDED__ - #include "nbl/core/math/floatutil.h" #include "vectorSIMD.h" diff --git a/include/nbl/core/math/glslFunctions.h b/include/nbl/core/math/glslFunctions.h index 7fa7385dad..5fabd1126f 100644 --- a/include/nbl/core/math/glslFunctions.h +++ b/include/nbl/core/math/glslFunctions.h @@ -10,7 +10,7 @@ #include "nbl/type_traits.h" #include "nbl/core/math/floatutil.h" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/cpp_compat/matrix.hlsl" namespace nbl { @@ -23,7 +23,6 @@ template class vectorSIMD_32; class vectorSIMDf; - template NBL_FORCE_INLINE T radians(const T& degrees) { @@ -126,7 +125,7 @@ NBL_FORCE_INLINE T mix(const T & a, const T & b, const U & t) { for (uint32_t i=0u; i::value) + if constexpr(nbl::is_any_of::value) { retval[i] = core::mix(a[i], b[i], t[i]); } @@ -316,13 +315,13 @@ NBL_FORCE_INLINE T lerp(const T& a, const T& b, const U& t) return core::mix(a,b,t); } - // TODO : step,smoothstep,isnan,isinf,floatBitsToInt,floatBitsToUint,intBitsToFloat,uintBitsToFloat,frexp,ldexp // extra note, GCC breaks isfinite, isinf, isnan, isnormal, signbit in -ffast-math so need to implement ourselves // TODO : packUnorm2x16, packSnorm2x16, packUnorm4x8, packSnorm4x8, unpackUnorm2x16, unpackSnorm2x16, unpackUnorm4x8, unpackSnorm4x8, packHalf2x16, unpackHalf2x16, packDouble2x32, unpackDouble2x32 // MOVE : faceforward, reflect, refract, any, all, not template NBL_FORCE_INLINE T dot(const T& a, const T& b); + template<> NBL_FORCE_INLINE vectorSIMDf dot(const vectorSIMDf& a, const vectorSIMDf& b); template<> @@ -361,7 +360,7 @@ NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vect template NBL_FORCE_INLINE T normalize(const T& v) { - auto d = dot(v, v); + auto d = core::dot(v, v); #ifdef __NBL_FAST_MATH return v * core::inversesqrt(d); #else diff --git a/include/nbl/core/math/glslFunctions.tcc b/include/nbl/core/math/glslFunctions.tcc index d2343f0322..5c86d4c6c1 100644 --- a/include/nbl/core/math/glslFunctions.tcc +++ b/include/nbl/core/math/glslFunctions.tcc @@ -228,7 +228,6 @@ NBL_FORCE_INLINE T max(const T& a, const T& b) return core::mix(a,vb,asmaller); } - template<> NBL_FORCE_INLINE vectorSIMDf dot(const vectorSIMDf& a, const vectorSIMDf& b) { diff --git a/include/nbl/core/math/intutil.h b/include/nbl/core/math/intutil.h index 768883f2b0..8e6bead24d 100644 --- a/include/nbl/core/math/intutil.h +++ b/include/nbl/core/math/intutil.h @@ -8,9 +8,9 @@ #include "BuildConfigOptions.h" +#include "nbl/builtin/hlsl/cpp_compat/intrinsics.h" #include "nbl/macros.h" #include "nbl/core/math/glslFunctions.h" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" #include #include // For INT_MAX / UINT_MAX diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index f6afafd8de..2686c0a821 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -7,6 +7,8 @@ #define __NBL_CORE_PLANE_3D_H_INCLUDED__ #include +#include +#include namespace nbl { diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index 5deb0982db..3021cc42ec 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -13,12 +13,10 @@ #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/SContext.h" - +#include namespace nbl::ext::MitsubaLoader { - - class CElementBSDF; class CMitsubaMaterialCompilerFrontend; @@ -27,7 +25,7 @@ class CMitsubaMaterialCompilerFrontend; //#include "nbl/builtin/glsl/ext/MitsubaLoader/instance_data_struct.glsl" #define uint uint32_t #define uvec2 uint64_t -#define mat4x3 nbl::core::matrix3x4SIMD +#define mat4x3 nbl::hlsl::float32_t3x4 #define nbl_glsl_MC_material_data_t asset::material_compiler::material_data_t struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t { @@ -70,8 +68,8 @@ class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader // core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const system::logger_opt_ptr& logger); - core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform, const system::logger_opt_ptr& _logger); - SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const core::matrix3x4SIMD& relTform, const system::logger_opt_ptr& logger); + core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const hlsl::float32_t3x4& relTform, const system::logger_opt_ptr& _logger); + SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const hlsl::float32_t3x4& relTform, const system::logger_opt_ptr& logger); void cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* texture, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic); diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 4069b03583..88129918cb 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -190,7 +190,7 @@ namespace MitsubaLoader struct SInstanceData { - SInstanceData(core::matrix3x4SIMD _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : + SInstanceData(hlsl::float32_t3x4 _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : tform(_tform), bsdf(_bsdf), #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) bsdf_id(_id), @@ -198,7 +198,7 @@ namespace MitsubaLoader emitter{_emitterFront, _emitterBack} {} - core::matrix3x4SIMD tform; + hlsl::float32_t3x4 tform; SContext::bsdf_type bsdf; #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) std::string bsdf_id; diff --git a/include/nbl/video/IGPUAccelerationStructure.h b/include/nbl/video/IGPUAccelerationStructure.h index 8e451b7a8c..20b350be0b 100644 --- a/include/nbl/video/IGPUAccelerationStructure.h +++ b/include/nbl/video/IGPUAccelerationStructure.h @@ -279,7 +279,7 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pInfos-03809 // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pInfos-03810 // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkBuildAccelerationStructuresKHR-pInfos-03773 - if (Base::invalidInputBuffer(geometry.transform,buildRangeInfo.transformByteOffset,1u,sizeof(core::matrix3x4SIMD),sizeof(core::vectorSIMDf))) + if (Base::invalidInputBuffer(geometry.transform,buildRangeInfo.transformByteOffset,1u,sizeof(hlsl::float32_t3x4),sizeof(core::vectorSIMDf))) return false; } else diff --git a/include/quaternion.h b/include/quaternion.h deleted file mode 100644 index a0fd689dc6..0000000000 --- a/include/quaternion.h +++ /dev/null @@ -1,463 +0,0 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" -// For conditions of distribution and use, see copyright notice in nabla.h -// See the original file in irrlicht source for authors - -#ifndef __NBL_QUATERNION_H_INCLUDED__ -#define __NBL_QUATERNION_H_INCLUDED__ - - -#include "vectorSIMD.h" - -#include "nbl/core/math/glslFunctions.h" - - -namespace nbl -{ -namespace core -{ - -class matrix3x4SIMD; - - -//! Quaternion class for representing rotations. -/** It provides cheap combinations and avoids gimbal locks. -Also useful for interpolations. */ -// TODO: move this to nbl::hlsl -class quaternion : public vectorSIMDf -{ - public: - //! Default Constructor - inline quaternion() : vectorSIMDf(0,0,0,1) {} - - inline quaternion(const quaternion& other) : vectorSIMDf(static_cast(other)) {} - - inline quaternion(const float* data) : vectorSIMDf(data) {} - - //! Constructor - inline quaternion(const float& x, const float& y, const float& z, const float& w) : vectorSIMDf(x,y,z,w) { } - - //! Constructor which converts euler angles (radians) to a quaternion - inline quaternion(const float& pitch, const float& yaw, const float& roll) {set(pitch,yaw,roll);} - - //! Constructor which converts a matrix to a quaternion - explicit quaternion(const matrix3x4SIMD& m); - - inline float* getPointer() {return pointer;} - - //! Equalilty operator - inline vector4db_SIMD operator==(const quaternion& other) const {return vectorSIMDf::operator==(other);} - - //! inequality operator - inline vector4db_SIMD operator!=(const quaternion& other) const {return vectorSIMDf::operator!=(other);} - - //! Assignment operator - inline quaternion& operator=(const quaternion& other) {return reinterpret_cast(vectorSIMDf::operator=(other));} - - //! Multiplication operator with scalar - inline quaternion operator*(const float& s) const - { - quaternion tmp; - reinterpret_cast(tmp) = reinterpret_cast(this)->operator*(s); - return tmp; - } - - //! Multiplication operator with scalar - inline quaternion& operator*=(const float& s) - { - *this = (*this)*s; - return *this; - } - - //! Multiplication operator - inline quaternion& operator*=(const quaternion& other) - { - *this = (*this)*other; - return *this; - } - - //! Multiplication operator - //http://momchil-velikov.blogspot.fr/2013/10/fast-sse-quternion-multiplication.html - inline quaternion operator*(const quaternion& other) const - { - __m128 xyzw = vectorSIMDf::getAsRegister(); - __m128 abcd = reinterpret_cast(other).getAsRegister(); - - __m128 t0 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (3, 3, 3, 3)); /* 1, 0.5 */ - __m128 t1 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (2, 3, 0, 1)); /* 1, 0.5 */ - - __m128 t3 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (0, 0, 0, 0)); /* 1, 0.5 */ - __m128 t4 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (1, 0, 3, 2)); /* 1, 0.5 */ - - __m128 t5 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (1, 1, 1, 1)); /* 1, 0.5 */ - __m128 t6 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (2, 0, 3, 1)); /* 1, 0.5 */ - - /* [d,d,d,d]*[z,w,x,y] = [dz,dw,dx,dy] */ - __m128 m0 = _mm_mul_ps (t0, t1); /* 5/4, 1 */ - - /* [a,a,a,a]*[y,x,w,z] = [ay,ax,aw,az]*/ - __m128 m1 = _mm_mul_ps (t3, t4); /* 5/4, 1 */ - - /* [b,b,b,b]*[z,x,w,y] = [bz,bx,bw,by]*/ - __m128 m2 = _mm_mul_ps (t5, t6); /* 5/4, 1 */ - - /* [c,c,c,c]*[w,z,x,y] = [cw,cz,cx,cy] */ - __m128 t7 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (2, 2, 2, 2)); /* 1, 0.5 */ - __m128 t8 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (3, 2, 0, 1)); /* 1, 0.5 */ - - __m128 m3 = _mm_mul_ps (t7, t8); /* 5/4, 1 */ - - /* 1 */ - /* [dz,dw,dx,dy]+-[ay,ax,aw,az] = [dz+ay,dw-ax,dx+aw,dy-az] */ - __m128 e = _mm_addsub_ps (m0, m1); /* 3, 1 */ - - /* 2 */ - /* [dx+aw,dz+ay,dy-az,dw-ax] */ - e = FAST_FLOAT_SHUFFLE(e, _MM_SHUFFLE (1, 3, 0, 2)); /* 1, 0.5 */ - - /* [dx+aw,dz+ay,dy-az,dw-ax]+-[bz,bx,bw,by] = [dx+aw+bz,dz+ay-bx,dy-az+bw,dw-ax-by]*/ - e = _mm_addsub_ps (e, m2); /* 3, 1 */ - - /* 2 */ - /* [dz+ay-bx,dw-ax-by,dy-az+bw,dx+aw+bz] */ - e = FAST_FLOAT_SHUFFLE(e, _MM_SHUFFLE (2, 0, 1, 3)); /* 1, 0.5 */ - - /* [dz+ay-bx,dw-ax-by,dy-az+bw,dx+aw+bz]+-[cw,cz,cx,cy] - = [dz+ay-bx+cw,dw-ax-by-cz,dy-az+bw+cx,dx+aw+bz-cy] */ - e = _mm_addsub_ps (e, m3); /* 3, 1 */ - - /* 2 */ - /* [dw-ax-by-cz,dz+ay-bx+cw,dy-az+bw+cx,dx+aw+bz-cy] */ - quaternion tmp; - reinterpret_cast(tmp) = FAST_FLOAT_SHUFFLE(e, _MM_SHUFFLE (2, 3, 1, 0)); /* 1, 0.5 */ - return tmp; - } - - inline vectorSIMDf transformVect(const vectorSIMDf& vec) - { - vectorSIMDf direction = *reinterpret_cast(this); - vectorSIMDf scale = core::length(direction); - direction.makeSafe3D(); - - return scale*vec+cross(direction,vec*W+cross(direction,vec))*2.f; - } - - //! Sets new quaternion - inline quaternion& set(const vectorSIMDf& xyzw) - { - *this = reinterpret_cast(xyzw); - return *this; - } - - //! Sets new quaternion based on euler angles (radians) - inline quaternion& set(const float& roll, const float& pitch, const float& yaw); - - //! Sets new quaternion from other quaternion - inline quaternion& set(const quaternion& quat) - { - *this = quat; - return *this; - } - - //! Inverts this quaternion - inline void makeInverse() - { - reinterpret_cast(*this) ^= _mm_set_epi32(0x0u,0x80000000u,0x80000000u,0x80000000u); - } - - //! Fills an angle (radians) around an axis (unit vector) - void toAngleAxis(float& angle, vector3df_SIMD& axis) const; - - //! Output this quaternion to an euler angle (radians) - void toEuler(vector3df_SIMD& euler) const; - - //! Set quaternion to identity - inline void makeIdentity() {vectorSIMDf::set(0,0,0,1);} - - - vectorSIMDf& getData() {return *((vectorSIMDf*)this);} - -//statics - inline static quaternion normalize(const quaternion& in) - { - quaternion tmp; - reinterpret_cast(tmp) = core::normalize(reinterpret_cast(in)); - return tmp; - } - - //! Helper func - static quaternion lerp(const quaternion &q1, const quaternion &q2, const float& interpolant, const bool& wrongDoubleCover); - - //! Set this quaternion to the linear interpolation between two quaternions - /** \param q1 First quaternion to be interpolated. - \param q2 Second quaternion to be interpolated. - \param interpolant Progress of interpolation. For interpolant=0 the result is - q1, for interpolant=1 the result is q2. Otherwise interpolation - between q1 and q2. - */ - static quaternion lerp(const quaternion &q1, const quaternion &q2, const float& interpolant); - - //! Helper func - static inline void flerp_interpolant_terms(float& interpolantPrecalcTerm2, float& interpolantPrecalcTerm3, const float& interpolant) - { - interpolantPrecalcTerm2 = (interpolant - 0.5f) * (interpolant - 0.5f); - interpolantPrecalcTerm3 = interpolant * (interpolant - 0.5f) * (interpolant - 1.f); - } - - static float flerp_adjustedinterpolant(const float& angle, const float& interpolant, const float& interpolantPrecalcTerm2, const float& interpolantPrecalcTerm3); - - //! Set this quaternion to the approximate slerp between two quaternions - /** \param q1 First quaternion to be interpolated. - \param q2 Second quaternion to be interpolated. - \param interpolant Progress of interpolation. For interpolant=0 the result is - q1, for interpolant=1 the result is q2. Otherwise interpolation - between q1 and q2. - */ - static quaternion flerp(const quaternion &q1, const quaternion &q2, const float& interpolant); - - //! Set this quaternion to the result of the spherical interpolation between two quaternions - /** \param q1 First quaternion to be interpolated. - \param q2 Second quaternion to be interpolated. - \param time Progress of interpolation. For interpolant=0 the result is - q1, for interpolant=1 the result is q2. Otherwise interpolation - between q1 and q2. - \param threshold To avoid inaccuracies the - interpolation switches to linear interpolation at some point. - This value defines how much of the interpolation will - be calculated with lerp. - */ - static quaternion slerp(const quaternion& q1, const quaternion& q2, - const float& interpolant, const float& threshold=.05f); - - inline static quaternion fromEuler(const vector3df_SIMD& euler) - { - quaternion tmp; - tmp.set(euler.X,euler.Y,euler.Z); - return tmp; - } - - inline static quaternion fromEuler(const vector3df& euler) - { - quaternion tmp; - tmp.set(euler.X,euler.Y,euler.Z); - return tmp; - } - - //! Set quaternion to represent a rotation from one vector to another. - static quaternion rotationFromTo(const vector3df_SIMD& from, const vector3df_SIMD& to); - - //! Create quaternion from rotation angle and rotation axis. - /** Axis must be unit length. - The quaternion representing the rotation is - q = cos(A/2)+sin(A/2)*(x*i+y*j+z*k). - \param angle Rotation Angle in radians. - \param axis Rotation axis. */ - static quaternion fromAngleAxis(const float& angle, const vector3df_SIMD& axis); -}; -static_assert(sizeof(quaternion) == sizeof(vectorSIMDf), "Quaternion not same size as vec4"); - - -// set this quaternion to the result of the linear interpolation between two quaternions -inline quaternion quaternion::lerp(const quaternion &q1, const quaternion &q2, const float& interpolant, const bool& wrongDoubleCover) -{ - vectorSIMDf retval; - if (wrongDoubleCover) - retval = mix(reinterpret_cast(q1),-reinterpret_cast(q2),vectorSIMDf(interpolant)); - else - retval = mix(reinterpret_cast(q1), reinterpret_cast(q2),vectorSIMDf(interpolant)); - return reinterpret_cast(retval); -} - -// set this quaternion to the result of the linear interpolation between two quaternions -inline quaternion quaternion::lerp(const quaternion &q1, const quaternion &q2, const float& interpolant) -{ - const float angle = dot(q1,q2)[0]; - return lerp(q1,q2,interpolant,angle < 0.0f); -} - -// Arseny Kapoulkine -inline float quaternion::flerp_adjustedinterpolant(const float& angle, const float& interpolant, const float& interpolantPrecalcTerm2, const float& interpolantPrecalcTerm3) -{ - float A = 1.0904f + angle * (-3.2452f + angle * (3.55645f - angle * 1.43519f)); - float B = 0.848013f + angle * (-1.06021f + angle * 0.215638f); - float k = A * interpolantPrecalcTerm2 + B; - float ot = interpolant + interpolantPrecalcTerm3 * k; - return ot; -} - -// set this quaternion to the result of an approximate slerp -inline quaternion quaternion::flerp(const quaternion &q1, const quaternion &q2, const float& interpolant) -{ - const float angle = dot(q1,q2)[0]; - return lerp(q1,q2,flerp_adjustedinterpolant(fabsf(angle),interpolant,(interpolant - 0.5f) * (interpolant - 0.5f),interpolant * (interpolant - 0.5f) * (interpolant - 1.f)),angle < 0.0f); -} - - -// set this quaternion to the result of the interpolation between two quaternions -inline quaternion quaternion::slerp(const quaternion &q1, const quaternion &q2, const float& interpolant, const float& threshold) -{ - float angle = dot(q1,q2)[0]; - - // make sure we use the short rotation - bool wrongDoubleCover = angle < 0.0f; - if (wrongDoubleCover) - angle *= -1.f; - - if (angle <= (1.f-threshold)) // spherical interpolation - { // acosf + sinf - vectorSIMDf retval; - - const float sinARcp = inversesqrt(1.f-angle*angle); - const float sinAt = sinf(acosf(angle) * interpolant); // could this line be optimized? - //1sqrt 3min/add 5mul from now on - const float sinAt_over_sinA = sinAt*sinARcp; - - const float scale = core::sqrt(1.f-sinAt*sinAt)-angle*sinAt_over_sinA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A) - if (wrongDoubleCover) // make sure we use the short rotation - retval = reinterpret_cast(q1)*scale - reinterpret_cast(q2)*sinAt_over_sinA; - else - retval = reinterpret_cast(q1)*scale + reinterpret_cast(q2)*sinAt_over_sinA; - - return reinterpret_cast(retval); - } - else - return normalize(lerp(q1,q2,interpolant,wrongDoubleCover)); -} - - -#if !NBL_TEST_BROKEN_QUATERNION_USE -//! axis must be unit length, angle in radians -inline quaternion quaternion::fromAngleAxis(const float& angle, const vector3df_SIMD& axis) -{ - const float fHalfAngle = 0.5f*angle; - const float fSin = sinf(fHalfAngle); - quaternion retval; - reinterpret_cast(retval) = axis*fSin; - reinterpret_cast(retval).W = cosf(fHalfAngle); - return retval; -} - - -inline void quaternion::toAngleAxis(float& angle, vector3df_SIMD &axis) const -{ - vectorSIMDf scale = core::length(*reinterpret_cast(this)); - - if (scale.X==0.f) - { - angle = 0.0f; - axis.X = 0.0f; - axis.Y = 1.0f; - axis.Z = 0.0f; - } - else - { - axis = reinterpret_cast(this)->operator/(scale); - angle = 2.f * acosf(axis.W); - - axis.makeSafe3D(); - } -} - -inline void quaternion::toEuler(vector3df_SIMD& euler) const -{ - vectorSIMDf sqr = *reinterpret_cast(this); - sqr *= sqr; - const double test = 2.0 * (Y*W - X*Z); - - if (core::equals(test, 1.0, 0.000001)) - { - // heading = rotation about z-axis - euler.Z = (float) (-2.0*atan2(X, W)); - // bank = rotation about x-axis - euler.X = 0; - // attitude = rotation about y-axis - euler.Y = core::HALF_PI(); - } - else if (core::equals(test, -1.0, 0.000001)) - { - // heading = rotation about z-axis - euler.Z = (float) (2.0*atan2(X, W)); - // bank = rotation about x-axis - euler.X = 0; - // attitude = rotation about y-axis - euler.Y = -core::HALF_PI(); - } - else - { - // heading = rotation about z-axis - euler.Z = (float) atan2(2.0 * (X*Y +Z*W),(sqr.X - sqr.Y - sqr.Z + sqr.W)); - // bank = rotation about x-axis - euler.X = (float) atan2(2.0 * (Y*Z +X*W),(-sqr.X - sqr.Y + sqr.Z + sqr.W)); - // attitude = rotation about y-axis - euler.Y = (float) asin( core::clamp(test, -1.0, 1.0) ); - } -} - -inline quaternion quaternion::rotationFromTo(const vector3df_SIMD& from, const vector3df_SIMD& to) -{ - // Based on Stan Melax's article in Game Programming Gems - // Copy, since cannot modify local - vector3df_SIMD v0 = from; - vector3df_SIMD v1 = to; - v0 = core::normalize(v0); - v1 = core::normalize(v1); - - const vectorSIMDf dddd = core::dot(v0,v1); - quaternion tmp; - if (dddd.X >= 1.0f) // If dot == 1, vectors are the same - { - return tmp; - } - else if (dddd.X <= -1.0f) // exactly opposite - { - vector3df_SIMD axis(1.0f, 0.f, 0.f); - axis = cross(axis,v0); - if (length(axis)[0]==0.f) - { - axis.set(0.f,1.f,0.f); - axis = cross(axis,v0); - } - // same as fromAngleAxis(PI, axis).normalize(); - reinterpret_cast(tmp) = axis; - return normalize(tmp); - } - - vectorSIMDf s = core::sqrt(vectorSIMDf(2.f,2.f,2.f,0.f)+dddd*2.f); - reinterpret_cast(tmp) = cross(v0,v1)*reciprocal_approxim(s); - tmp.W = s.X*0.5f; - return normalize(tmp); -} -#endif - -// sets new quaternion based on euler angles -inline quaternion& quaternion::set(const float& roll, const float& pitch, const float& yaw) -{ - float angle; - - angle = roll * 0.5f; - const float sr = sinf(angle); - const float cr = cosf(angle); - - angle = pitch * 0.5f; - const float sp = sinf(angle); - const float cp = cos(angle); - - angle = yaw * 0.5f; - const float sy = sinf(angle); - const float cy = cosf(angle); - - const float cpcy = cp * cy; - const float spcy = sp * cy; - const float cpsy = cp * sy; - const float spsy = sp * sy; - - *reinterpret_cast(this) = vectorSIMDf(sr,cr,cr,cr)*vectorSIMDf(cpcy,spcy,cpsy,cpcy)+vectorSIMDf(-cr,sr,-sr,sr)*vectorSIMDf(spsy,cpsy,spcy,spsy); - - return *this; -} - -} // end namespace core -} // end namespace nbl - -#endif - diff --git a/include/vectorSIMD.h b/include/vectorSIMD.h index 4c9c90d236..7d0dc8d966 100644 --- a/include/vectorSIMD.h +++ b/include/vectorSIMD.h @@ -887,6 +887,24 @@ namespace core } }; + inline hlsl::float32_t4 getAsVec4(const vectorSIMDf& vec) + { + hlsl::float32_t4 output; + for (int i = 0; i < 4; ++i) + output[i] = vec[i]; + + return output; + } + + inline hlsl::float32_t3 getAsVec3(const vectorSIMDf& vec) + { + hlsl::float32_t3 output; + for (int i = 0; i < 3; ++i) + output[i] = vec[i]; + + return output; + } + } // end namespace core } // end namespace nbl diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index e6a23fdb44..8b65544136 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -297,7 +297,7 @@ void IAssetManager::insertBuiltinAssets() addBuiltInToCaches(ds3Layout, "nbl/builtin/material/lambertian/singletexture/descriptor_set_layout/3"); // TODO find everything what has been using it so far constexpr uint32_t pcCount = 1u; - asset::SPushConstantRange pcRanges[pcCount] = {asset::IShader::E_SHADER_STAGE::ESS_VERTEX,0u,sizeof(core::matrix4SIMD)}; + asset::SPushConstantRange pcRanges[pcCount] = {asset::IShader::E_SHADER_STAGE::ESS_VERTEX,0u,sizeof(hlsl::float32_t3x4)}; auto pLayout = core::make_smart_refctd_ptr( std::span(pcRanges,pcCount), nullptr,core::smart_refctd_ptr(ds1Layout),nullptr,core::smart_refctd_ptr(ds3Layout) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index c740b85d97..ac065a8c97 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -696,10 +696,9 @@ CGeometryCreator::return_type CGeometryCreator::createDiskMesh(float radius, uin //v1, v2, ..., vn-1 for (int i = 2; i < vertexCount-1; i++) { - core::vectorSIMDf vn; hlsl::float32_t3x4 rotMatrix; - rotMatrix.setRotation(hlsl::quaternion(0.0f, 0.0f, core::radians((i-1)*angle))); - rotMatrix.transformVect(vn, v0); + hlsl::setRotation(rotMatrix, hlsl::quaternion::create(0.0f, 0.0f, core::radians((i - 1) * angle))); + core::vectorSIMDf vn = hlsl::transformVector(hlsl::getMatrix3x4As4x4(rotMatrix), v0); ptr[i] = DiskVertex(vn, video::SColor(0xFFFFFFFFu), core::vector2du32_SIMD(0u, 1u), core::vector3df_SIMD(0.0f, 0.0f, 1.0f)); diff --git a/src/nbl/asset/utils/CMeshManipulator.cpp b/src/nbl/asset/utils/CMeshManipulator.cpp index 86ead8d302..a76b1ed9d8 100644 --- a/src/nbl/asset/utils/CMeshManipulator.cpp +++ b/src/nbl/asset/utils/CMeshManipulator.cpp @@ -1708,19 +1708,20 @@ hlsl::float32_t3x4 IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuff float ABBQuality = ABBDiff.x * ABBDiff.y + ABBDiff.y * ABBDiff.z + ABBDiff.z * ABBDiff.x; hlsl::float32_t3x4 scaleMat; hlsl::float32_t3x4 translationMat; - hlsl::setTranslation(translationMat, -(MinPoint) / OBBDiff); - hlsl::setScale(scaleMat, OBBDiff); + hlsl::setTranslation(translationMat, core::getAsVec3(-(MinPoint) / OBBDiff)); + hlsl::setScale(scaleMat, core::getAsVec3(OBBDiff)); TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); - if (ABBQuality < OBBQuality) { - translationMat.setTranslation(-(AABBMin) / ABBDiff); - scaleMat.setScale(ABBDiff); - TransMat = core::matrix3x4SIMD( + if (ABBQuality < OBBQuality) + { + hlsl::setTranslation(translationMat, core::getAsVec3(-(AABBMin) / ABBDiff)); + hlsl::setScale(scaleMat, core::getAsVec3(ABBDiff)); + TransMat = hlsl::float32_t3x4( 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0); - TransMat = core::concatenateBFollowedByA(TransMat, scaleMat); - TransMat = core::concatenateBFollowedByA(TransMat, translationMat); + TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); + TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); } return TransMat; From 6f9f099ae6d737d710a8a096562641421b5f4a80 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 29 Oct 2024 14:40:52 -0700 Subject: [PATCH 14/27] Removed core::matrixSIMD --- examples_tests | 2 +- .../nbl/builtin/hlsl/cpp_compat/intrinsics.h | 2 ++ .../transformation_matrix_utils.hlsl | 24 +++++++++---------- .../hlsl/vector_utils/vector_utils.hlsl | 21 ++++++++++++++++ include/nbl/core/math/glslFunctions.h | 8 +++++-- include/nbl/scene/ILevelOfDetailLibrary.h | 3 ++- 6 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl diff --git a/examples_tests b/examples_tests index 6cbd85caa3..b69152cb35 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6cbd85caa3d646969d7724ca7f07b0b6a72e0957 +Subproject commit b69152cb350ddd994c5d7fde9a89a1d989fef61b diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h index 4d10a669c3..f3f0e9e3eb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h @@ -41,6 +41,8 @@ NBL_BIT_OP_GLM_PASSTHROUGH(bitCount,bitCount) NBL_SIMPLE_GLM_PASSTHROUGH(cross,cross) NBL_SIMPLE_GLM_PASSTHROUGH(clamp,clamp) +NBL_SIMPLE_GLM_PASSTHROUGH(normalize, normalize) +NBL_SIMPLE_GLM_PASSTHROUGH(length, length) template inline typename scalar_type::type dot(const T& lhs, const T& rhs) {return glm::dot(lhs,rhs);} diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 8f687fb97a..63d764693d 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -66,14 +66,14 @@ inline matrix buildCameraLookAtMatrixLH( const vector& target, const vector& upVector) { - const vector zaxis = normalize(target - position); - const vector xaxis = normalize(cross(upVector, zaxis)); - const vector yaxis = cross(zaxis, xaxis); + const vector zaxis = hlsl::normalize(target - position); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); matrix r; - r[0] = vector(xaxis, -dot(xaxis, position)); - r[1] = vector(yaxis, -dot(yaxis, position)); - r[2] = vector(zaxis, -dot(zaxis, position)); + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); return r; } @@ -84,14 +84,14 @@ inline matrix buildCameraLookAtMatrixRH( const vector& target, const vector& upVector) { - const vector zaxis = normalize(position - target); - const vector xaxis = normalize(cross(upVector, zaxis)); - const vector yaxis = cross(zaxis, xaxis); + const vector zaxis = hlsl::normalize(position - target); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); float32_t3x4 r; - r[0] = vector(xaxis, -dot(xaxis, position)); - r[1] = vector(yaxis, -dot(yaxis, position)); - r[2] = vector(zaxis, -dot(zaxis, position)); + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); return r; } diff --git a/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl new file mode 100644 index 0000000000..e1fa9dd3a0 --- /dev/null +++ b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl @@ -0,0 +1,21 @@ +#ifndef _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// TODO: why cant I NBL_CONST_REF_ARG(vector) +template +inline T lengthsquared(vector vec) +{ + return dot(vec, vec); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/core/math/glslFunctions.h b/include/nbl/core/math/glslFunctions.h index 5fabd1126f..30a0344501 100644 --- a/include/nbl/core/math/glslFunctions.h +++ b/include/nbl/core/math/glslFunctions.h @@ -320,7 +320,12 @@ NBL_FORCE_INLINE T lerp(const T& a, const T& b, const U& t) // TODO : packUnorm2x16, packSnorm2x16, packUnorm4x8, packSnorm4x8, unpackUnorm2x16, unpackSnorm2x16, unpackUnorm4x8, unpackSnorm4x8, packHalf2x16, unpackHalf2x16, packDouble2x32, unpackDouble2x32 // MOVE : faceforward, reflect, refract, any, all, not template -NBL_FORCE_INLINE T dot(const T& a, const T& b); +NBL_FORCE_INLINE T dot(const T& a, const T& b) +{ + static_assert(!(std::is_same_v || std::is_same_v || std::is_same_v)); + + return T(0); +} template<> NBL_FORCE_INLINE vectorSIMDf dot(const vectorSIMDf& a, const vectorSIMDf& b); @@ -329,7 +334,6 @@ NBL_FORCE_INLINE vectorSIMD_32 dot>(const vector template<> NBL_FORCE_INLINE vectorSIMD_32 dot>(const vectorSIMD_32& a, const vectorSIMD_32& b); - template NBL_FORCE_INLINE T lengthsquared(const T& v) { diff --git a/include/nbl/scene/ILevelOfDetailLibrary.h b/include/nbl/scene/ILevelOfDetailLibrary.h index 6b68189e7d..be3794f966 100644 --- a/include/nbl/scene/ILevelOfDetailLibrary.h +++ b/include/nbl/scene/ILevelOfDetailLibrary.h @@ -7,6 +7,7 @@ #include "nbl/video/ILogicalDevice.h" #include "nbl/video/utilities/IDrawIndirectAllocator.h" +#include "nbl/builtin/hlsl/cpp_compat/intrinsics.h" namespace nbl::scene { @@ -30,7 +31,7 @@ class ILevelOfDetailLibrary : public virtual core::IReferenceCounted { if (proj[3].w!=0.f) return core::nan(); - return abs(proj[0].x*proj[1].y-proj[0].y*proj[1].x)/core::dot(proj[3],proj[3]).x; + return abs(proj[0].x*proj[1].y-proj[0].y*proj[1].x)/hlsl::dot(proj[3],proj[3]); } }; template class container=core::vector> From 339ec8e33f1dabc6484a3bad1ae2a977bb06c773 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 29 Oct 2024 17:46:33 -0700 Subject: [PATCH 15/27] Fixed `length` function --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/intrinsics.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 9213b8933d..b1d968927d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9213b8933dee9fcdd415587bf1ca1c44ae1e9e70 +Subproject commit b1d968927dc75a83f8f2292ed03f48cdb9d10847 diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h index f56c3553cf..357b89b619 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h @@ -43,7 +43,12 @@ NBL_BIT_OP_GLM_PASSTHROUGH(bitCount,bitCount) NBL_SIMPLE_GLM_PASSTHROUGH(cross,cross) NBL_SIMPLE_GLM_PASSTHROUGH(clamp,clamp) NBL_SIMPLE_GLM_PASSTHROUGH(normalize, normalize) -NBL_SIMPLE_GLM_PASSTHROUGH(length, length) + +template +inline scalar_type_t length(const T& vec) +{ + return glm::length(vec); +} template inline scalar_type_t dot(const T& lhs, const T& rhs) From bfdbdbcd9a1284f5360eb20f4a95af2c351c8c2d Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 29 Oct 2024 12:51:27 -0700 Subject: [PATCH 16/27] Refactor --- examples_tests | 2 +- .../hlsl/math/quaternion/quaternion.hlsl | 36 ++++------- .../transformation_matrix_utils.hlsl | 64 ++++++++++++++++++- .../builtin/hlsl/projection/projection.hlsl | 51 +++++++++++++++ 4 files changed, 129 insertions(+), 24 deletions(-) diff --git a/examples_tests b/examples_tests index b1d968927d..1a694ebbd2 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b1d968927dc75a83f8f2292ed03f48cdb9d10847 +Subproject commit 1a694ebbd26cd62a92d791fed4ac1b35f176d531 diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl index 2f24e52635..bc0286e778 100644 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -17,26 +17,26 @@ namespace hlsl /** It provides cheap combinations and avoids gimbal locks. Also useful for interpolations. */ -template +template struct quaternion { // i*data[0] + j*data[1] + k*data[2] + data[3] - using vec_t = vector; - vector data; + using vec_t = vector; + vector data; //! creates identity quaternion static inline quaternion create() { quaternion q; - q.data = vector(0.0f, 0.0f, 0.0f, 1.0f); + q.data = vector(0.0f, 0.0f, 0.0f, 1.0f); return q; } - static inline quaternion create(T x, T y, T z, T w) + static inline quaternion create(float_t x, float_t y, float_t z, float_t w) { quaternion q; - q.data = vector(x, y, z, w); + q.data = vector(x, y, z, w); return q; } @@ -46,7 +46,7 @@ struct quaternion return other; } - static inline quaternion create(T pitch, T yaw, T roll) + static inline quaternion create(float_t pitch, float_t yaw, float_t roll) { float angle; @@ -67,7 +67,7 @@ struct quaternion const float cpsy = cp * sy; const float spsy = sp * sy; - quaternion output; + quaternion output; output.data = float32_t4(sr, cr, cr, cr) * float32_t4(cpcy, spcy, cpsy, cpcy) + float32_t4(-cr, sr, -sr, sr) * float32_t4(spsy, cpsy, spcy, spsy); return output; @@ -76,20 +76,12 @@ struct quaternion // TODO: //explicit quaternion(NBL_CONST_REF_ARG(float32_t3x4) m) {} -#define DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(TYPE)\ - inline quaternion operator*(TYPE scalar)\ - {\ - quaternion output;\ - output.data = data * scalar;\ - return output;\ - }\ - - DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(uint32_t) - DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(uint64_t) - DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(float32_t) - DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR(float64_t) - -#undef DEFINE_MUL_QUATERNION_BY_SCALAR_OPERATOR + inline quaternion operator*(float_t scalar) + { + quaternion output; + output.data = data * scalar; + return output; + } inline quaternion operator*(NBL_CONST_REF_ARG(quaternion) other) { diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 63d764693d..8fcccdafda 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -12,7 +12,16 @@ namespace nbl namespace hlsl { -NBL_CONSTEXPR hlsl::float32_t3x4 IdentityFloat32_t3x4 = hlsl::float32_t3x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0)); +// TODO: if `IdentityFloat32_t3x4` and `IdentityFloat32_t3x4` constexprs are ok, then I can expand them into templated struct, not doing it untill the concept is approved +//template +//struct IdentityMatrix +//{ +// +//}; +NBL_CONSTEXPR hlsl::float32_t3x4 IdentityFloat32_t3x4 = + hlsl::float32_t3x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0)); +NBL_CONSTEXPR hlsl::float32_t4x4 IdentityFloat32_t4x4 = + hlsl::float32_t4x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 0, 1)); // TODO: this is temporary function, delete when removing vectorSIMD template @@ -40,6 +49,12 @@ inline matrix getMatrix3x4As4x4(matrix mat) return output; } +template +inline matrix getSub3x3(matrix mat) +{ + return matrix(mat); +} + template inline matrix getAs64BitPrecisionMatrix(matrix mat) { @@ -50,6 +65,53 @@ inline matrix getAs64BitPrecisionMatrix(matrix return output; } +namespace transformation_matrix_utils_impl +{ + template + inline T determinant_helper(matrix mat, vector& r1crossr2) + { + r1crossr2 = hlsl::cross(mat[1], mat[2]); + return hlsl::dot(mat[0], r1crossr2); + } +} + +template +inline matrix getSub3x3TransposeCofactors(const matrix& mat) +{ + static_assert(N >= 3 && M >= 3); + + matrix output; + vector row0 = vector(mat[0]); + vector row1 = vector(mat[1]); + vector row2 = vector(mat[2]); + output[0] = hlsl::cross(row1, row2); + output[1] = hlsl::cross(row2, row0); + output[2] = hlsl::cross(row0, row1); + + output[0] = hlsl::cross(row0, row1); + + return output; +} + +template +inline bool getSub3x3InverseTranspose(const matrix& matIn, matrix& matOut) +{ + matrix matIn3x3 = getSub3x3(matIn); + vector r1crossr2; + T d = transformation_matrix_utils_impl::determinant_helper(matIn3x3, r1crossr2); + if (core::iszero(d, FLT_MIN)) + return false; + auto rcp = core::reciprocal(d); + + // matrix of cofactors * 1/det + matOut = getSub3x3TransposeCofactors(matIn3x3); + matOut[0] *= rcp; + matOut[1] *= rcp; + matOut[2] *= rcp; + + return true; +} + // TODO: use portable_float when merged //! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) template diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl index d973fda25d..ecf180b5c6 100644 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -27,6 +27,57 @@ inline float32_t4x4 buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadia return m; } +inline float32_t4x4 buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); +#ifndef __HLSL_VERSION + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero +#endif + const float w = h / aspectRatio; + + float32_t4x4 m; + m[0] = hlsl::float32_t4(w, 0.f, 0.f, 0.f); + m[1] = hlsl::float32_t4(0.f, -h, 0.f, 0.f); + m[2] = hlsl::float32_t4(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = hlsl::float32_t4(0.f, 0.f, -1.f, 0.f); + + return m; +} + +inline float32_t4x4 buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ +#ifndef __HLSL_VERSION + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero +#endif + + float32_t4x4 m; + m[0] = hlsl::float32_t4(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = hlsl::float32_t4(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = hlsl::float32_t4(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = hlsl::float32_t4(0.f, 0.f, 0.f, 1.f); + + return m; +} +inline float32_t4x4 buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ +#ifndef __HLSL_VERSION + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero +#endif + + float32_t4x4 m; + m[0] = hlsl::float32_t4(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = hlsl::float32_t4(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = hlsl::float32_t4(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = hlsl::float32_t4(0.f, 0.f, 0.f, 1.f); + + return m; +} + } } From f00262d5d3375720b5ed3ad7f2c7b32f209566a7 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 31 Oct 2024 15:45:05 -0700 Subject: [PATCH 17/27] Refactor --- .../hlsl/matrix_utils/transformation_matrix_utils.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 8fcccdafda..6938709e78 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -39,7 +39,7 @@ inline core::vectorSIMDf transformVector(const matrix& mat, const core: } template -inline matrix getMatrix3x4As4x4(matrix mat) +inline matrix getMatrix3x4As4x4(const matrix& mat) { matrix output; for (int i = 0; i < 3; ++i) @@ -50,13 +50,13 @@ inline matrix getMatrix3x4As4x4(matrix mat) } template -inline matrix getSub3x3(matrix mat) +inline matrix getSub3x3(const matrix& mat) { return matrix(mat); } template -inline matrix getAs64BitPrecisionMatrix(matrix mat) +inline matrix getAs64BitPrecisionMatrix(const matrix& mat) { matrix output; for (int i = 0; i < N; ++i) @@ -68,7 +68,7 @@ inline matrix getAs64BitPrecisionMatrix(matrix namespace transformation_matrix_utils_impl { template - inline T determinant_helper(matrix mat, vector& r1crossr2) + inline T determinant_helper(const matrix& mat, vector& r1crossr2) { r1crossr2 = hlsl::cross(mat[1], mat[2]); return hlsl::dot(mat[0], r1crossr2); From a0890f0df957e0d989d956ec41c41fd3be72791a Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Sat, 2 Nov 2024 08:52:21 +0100 Subject: [PATCH 18/27] forgot to commit projection build methods, update examples_tests submodule --- examples_tests | 2 +- .../transformation_matrix_utils.hlsl | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0574d725b2..ccbb37cf2b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0574d725b2d0ae02a0d0e409e642a14981bc1e97 +Subproject commit ccbb37cf2bf150dec0c9ad8ea5d328fbb3806745 diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index b2534f9ac8..aee84405a2 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -109,6 +109,74 @@ inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector +inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); + + return m; +} +template +inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + } } From 0109b22b397ef99053e7b87ff028ca49a8ab8e2d Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Sat, 2 Nov 2024 15:50:37 +0100 Subject: [PATCH 19/27] add is_smart_refctd_ptr_v, update examples_tests submodule --- examples_tests | 2 +- .../hlsl/matrix_utils/transformation_matrix_utils.hlsl | 8 ++++---- include/nbl/core/decl/smart_refctd_ptr.h | 9 +++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index ccbb37cf2b..d3f325d6d2 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ccbb37cf2bf150dec0c9ad8ea5d328fbb3806745 +Subproject commit d3f325d6d2b4c9c623b8e70a0882a1bae17471fb diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index aee84405a2..22bc82ea13 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -110,7 +110,7 @@ inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector +template inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) { const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); @@ -127,7 +127,7 @@ inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRa return m; } -template +template inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) { const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); @@ -145,7 +145,7 @@ inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRa return m; } -template +template inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) { _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero @@ -161,7 +161,7 @@ inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, flo return m; } -template +template inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) { _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero diff --git a/include/nbl/core/decl/smart_refctd_ptr.h b/include/nbl/core/decl/smart_refctd_ptr.h index 7c231fea4b..74fdf61693 100644 --- a/include/nbl/core/decl/smart_refctd_ptr.h +++ b/include/nbl/core/decl/smart_refctd_ptr.h @@ -144,6 +144,15 @@ smart_refctd_ptr move_and_dynamic_cast(smart_refctd_ptr& smart_ptr); template< class U, class T > smart_refctd_ptr move_and_dynamic_cast(smart_refctd_ptr&& smart_ptr) {return move_and_dynamic_cast(smart_ptr);} +template +struct is_smart_refctd_ptr : std::false_type {}; + +template +struct is_smart_refctd_ptr> : std::true_type {}; + +template +inline constexpr bool is_smart_refctd_ptr_v = is_smart_refctd_ptr::value; + } // end namespace nbl::core /* From 2a29a2f970ea114476cec1fa741075773ad1dbf0 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Sun, 3 Nov 2024 19:30:28 +0100 Subject: [PATCH 20/27] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index d3f325d6d2..f9fce56971 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d3f325d6d2b4c9c623b8e70a0882a1bae17471fb +Subproject commit f9fce56971dd80264eb107c413ba0b038f4ebf96 From 7673769d878a6a12034d2a97fd51a49abb077a6c Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Sun, 3 Nov 2024 20:12:10 +0100 Subject: [PATCH 21/27] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f9fce56971..2f44640ec4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f9fce56971dd80264eb107c413ba0b038f4ebf96 +Subproject commit 2f44640ec4e823c5d97b427562b291fd0e3eb62f From 3fee9c1ee3bc730ea2160f6b021a23bbcfed52ee Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 4 Nov 2024 15:30:49 -0800 Subject: [PATCH 22/27] Updated examples_tests --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 88c3d34a86..424a71a55a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 88c3d34a866c96a7037e9b916a71b37b7abe0b33 +Subproject commit 424a71a55a457231aa782a6fe6083a2fb9f082f4 From 9746e6adbbd3a83f01a22b1721c44e463377df1f Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 4 Nov 2024 15:38:50 -0800 Subject: [PATCH 23/27] Refactor --- examples_tests | 2 +- .../nbl/builtin/hlsl/camera/view_matrix.hlsl | 51 +++++++++ .../transformation_matrix_utils.hlsl | 107 ------------------ .../builtin/hlsl/projection/projection.hlsl | 73 ++++++------ 4 files changed, 87 insertions(+), 146 deletions(-) create mode 100644 include/nbl/builtin/hlsl/camera/view_matrix.hlsl diff --git a/examples_tests b/examples_tests index 424a71a55a..1946d5b2da 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 424a71a55a457231aa782a6fe6083a2fb9f082f4 +Subproject commit 1946d5b2dab693c54dd16eaaf93cc77b8de2ddf2 diff --git a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl new file mode 100644 index 0000000000..27b2c63239 --- /dev/null +++ b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl @@ -0,0 +1,51 @@ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(target - position); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(position - target); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index dbbfe5c2df..add1471bb5 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -122,45 +122,6 @@ inline matrix concatenateBFollowedByA(const matrix& a, const m return matrix(mul(a4x4, b4x4)); } -// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) -template -inline matrix buildCameraLookAtMatrixLH( - const vector& position, - const vector& target, - const vector& upVector) -{ - const vector zaxis = hlsl::normalize(target - position); - const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); - const vector yaxis = hlsl::cross(zaxis, xaxis); - - matrix r; - r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); - r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); - r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - - return r; -} - -template -inline matrix buildCameraLookAtMatrixRH( - const vector& position, - const vector& target, - const vector& upVector) -{ - const vector zaxis = hlsl::normalize(position - target); - const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); - const vector yaxis = hlsl::cross(zaxis, xaxis); - - matrix r; - r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); - r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); - r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - - return r; -} - -// TODO: test, check if there is better implementation -// TODO: move quaternion to nbl::hlsl // TODO: why NBL_REF_ARG(MatType) doesn't work????? template @@ -210,74 +171,6 @@ inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector -inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, -1.f, 0.f); - - return m; -} -template -inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 1.f, 0.f); - - return m; -} - -template -inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); - - return m; -} - -template -inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); - - return m; -} - } } diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl index ecf180b5c6..22d2872fde 100644 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -8,72 +8,69 @@ namespace nbl namespace hlsl { // TODO: use glm instead for c++ -inline float32_t4x4 buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +template +inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) { -#ifndef __HLSL_VERSION + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero -#endif - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); - const float w = h / aspectRatio; - - float32_t4x4 m; - m[0] = float32_t4(w, 0.f, 0.f, 0.f); - m[1] = float32_t4(0.f, -h, 0.f, 0.f); - m[2] = float32_t4(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = float32_t4(0.f, 0.f, 1.f, 0.f); - + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); + return m; } - -inline float32_t4x4 buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +template +inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) { const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); -#ifndef __HLSL_VERSION _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero -#endif const float w = h / aspectRatio; - float32_t4x4 m; - m[0] = hlsl::float32_t4(w, 0.f, 0.f, 0.f); - m[1] = hlsl::float32_t4(0.f, -h, 0.f, 0.f); - m[2] = hlsl::float32_t4(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = hlsl::float32_t4(0.f, 0.f, -1.f, 0.f); + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); return m; } -inline float32_t4x4 buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +template +inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) { -#ifndef __HLSL_VERSION _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero -#endif - float32_t4x4 m; - m[0] = hlsl::float32_t4(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = hlsl::float32_t4(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = hlsl::float32_t4(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = hlsl::float32_t4(0.f, 0.f, 0.f, 1.f); + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); return m; } -inline float32_t4x4 buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) + +template +inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) { -#ifndef __HLSL_VERSION _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero -#endif - float32_t4x4 m; - m[0] = hlsl::float32_t4(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = hlsl::float32_t4(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = hlsl::float32_t4(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = hlsl::float32_t4(0.f, 0.f, 0.f, 1.f); + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); return m; } From d6c9716d62b8d7881220e904b947e375b226d08b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 4 Nov 2024 17:56:11 -0800 Subject: [PATCH 24/27] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1946d5b2da..d3d7bc4e3e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1946d5b2dab693c54dd16eaaf93cc77b8de2ddf2 +Subproject commit d3d7bc4e3ecf37b117a34f130a827bafeba139fa From cfc8dc9c9a386e65f78bd3f2998040fca5a5992c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 13 Nov 2024 15:06:36 -0800 Subject: [PATCH 25/27] Saving work --- examples_tests | 2 +- .../hlsl/matrix_utils/transformation_matrix_utils.hlsl | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/examples_tests b/examples_tests index 2f26a62337..7cb41f7f6b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2f26a6233740f0c7cd8128373a14ccdb2a9bb0ab +Subproject commit 7cb41f7f6b6eec116305dffe748d70d9ca07a1c3 diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index add1471bb5..d3d7d7fa04 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -12,12 +12,6 @@ namespace nbl namespace hlsl { -// TODO: if `IdentityFloat32_t3x4` and `IdentityFloat32_t3x4` constexprs are ok, then I can expand them into templated struct, not doing it untill the concept is approved -//template -//struct IdentityMatrix -//{ -// -//}; NBL_CONSTEXPR hlsl::float32_t3x4 IdentityFloat32_t3x4 = hlsl::float32_t3x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0)); NBL_CONSTEXPR hlsl::float32_t4x4 IdentityFloat32_t4x4 = @@ -75,6 +69,7 @@ namespace transformation_matrix_utils_impl } } +//! returs adjugate of the cofactor (sub 3x3) matrix template inline matrix getSub3x3TransposeCofactors(const matrix& mat) { From 22630d0ba1a8a36a1a3265b9b27b9c15b0c83718 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 14 Nov 2024 15:06:50 -0800 Subject: [PATCH 26/27] Identity matrices refactor --- include/nbl/asset/IAccelerationStructure.h | 4 +- .../hlsl/matrix_utils/matrix_traits.hlsl | 30 +++++++++ .../transformation_matrix_utils.hlsl | 66 +++++++++++++++++-- 3 files changed, 94 insertions(+), 6 deletions(-) create mode 100644 include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index 9dc07b2675..b3fb583796 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -198,13 +198,13 @@ class ITopLevelAccelerationStructure : public AccelerationStructure template struct StaticInstance final { - hlsl::float32_t3x4 transform = hlsl::IdentityFloat32_t3x4; + hlsl::float32_t3x4 transform = hlsl::float32_t3x4_identity; Instance base = {}; }; template struct MatrixMotionInstance final { - hlsl::float32_t3x4 transform[2] = { hlsl::IdentityFloat32_t3x4, hlsl::IdentityFloat32_t3x4 }; + hlsl::float32_t3x4 transform[2] = { hlsl::float32_t3x4_identity, hlsl::float32_t3x4_identity }; Instance base = {}; }; struct SRT diff --git a/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl new file mode 100644 index 0000000000..5a40089c86 --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_TRAITS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_TRAITS_INCLUDED_ + +#include +#include +// TODO: remove this header when deleting vectorSIMDf.hlsl +#include +#include "vectorSIMD.h" +#include + +namespace nbl +{ +namespace hlsl +{ + +template +struct matrix_traits; +// partial spec for matrices +template +struct matrix_traits > +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = N; + NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = M; + NBL_CONSTEXPR_STATIC_INLINE bool Square = N == M; +}; + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index d3d7d7fa04..8858ada6d2 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -6,16 +6,74 @@ // TODO: remove this header when deleting vectorSIMDf.hlsl #include #include "vectorSIMD.h" +#include +#include +#include namespace nbl { namespace hlsl { + // goal: identity::value + // plan: diagonal + // identity + // + // partial spec: + // template + // identity + +namespace transfromation_matrix_utils_impl +{ +template +matrix diagonal(float diagonal) +{ + using MatT = matrix; + MatT output; + + for (int i = 0; i < matrix_traits::RowCount; ++i) + for (int j = 0; j < matrix_traits::ColumnCount; ++j) + output[i][j] = 0; + + auto a = matrix_traits::RowCount; + auto b = matrix_traits::ColumnCount; + + for (int diag = 0; diag < matrix_traits::RowCount; ++diag) + output[diag][diag] = diagonal; + + return output; +}; +} -NBL_CONSTEXPR hlsl::float32_t3x4 IdentityFloat32_t3x4 = - hlsl::float32_t3x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0)); -NBL_CONSTEXPR hlsl::float32_t4x4 IdentityFloat32_t4x4 = - hlsl::float32_t4x4(hlsl::float32_t4(1, 0, 0, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 1, 0), hlsl::float32_t4(0, 0, 0, 1)); +template +struct identity; + +template +struct identity > +{ + static matrix get() + { + return transfromation_matrix_utils_impl::diagonal(1); + } +}; + +#define IDENTITY_MATRIX(TYPE, N, M)\ +const matrix TYPE ## N ## x ## M ## _identity = identity >::get(); + +#define DEFINE_IDENTITY_MATRICES(TYPE)\ +IDENTITY_MATRIX(TYPE, 2, 2)\ +IDENTITY_MATRIX(TYPE, 3, 3)\ +IDENTITY_MATRIX(TYPE, 4, 4)\ +IDENTITY_MATRIX(TYPE, 3, 4) + +DEFINE_IDENTITY_MATRICES(float32_t) +DEFINE_IDENTITY_MATRICES(float64_t) +DEFINE_IDENTITY_MATRICES(int32_t) +DEFINE_IDENTITY_MATRICES(int64_t) +DEFINE_IDENTITY_MATRICES(uint32_t) +DEFINE_IDENTITY_MATRICES(uint64_t) + +#undef DEFINE_IDENTITY_MATRICES +#undef IDENTITY_MATRIX // TODO: this is temporary function, delete when removing vectorSIMD template From 2617c164ad5ea6c948496aa573282e9ff6ce2018 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 19 Nov 2024 14:25:46 -0800 Subject: [PATCH 27/27] Fixed templated functions --- examples_tests | 2 +- include/nbl/asset/IAccelerationStructure.h | 4 +- include/nbl/asset/utils/IMeshManipulator.h | 3 +- include/nbl/builtin/hlsl/concepts.hlsl | 1 - .../nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 1 + .../nbl/builtin/hlsl/cpp_compat/unroll.hlsl | 12 ++ .../hlsl/matrix_utils/matrix_traits.hlsl | 36 +++-- .../transformation_matrix_utils.hlsl | 131 +++++++----------- include/nbl/core/math/plane3dSIMD.h | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 4 +- src/nbl/asset/utils/CMeshManipulator.cpp | 18 ++- 11 files changed, 106 insertions(+), 108 deletions(-) create mode 100644 include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl diff --git a/examples_tests b/examples_tests index f56c121641..2ef03b660f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f56c121641cae50eb7eb555e42549b6736a390c3 +Subproject commit 2ef03b660f2a08a1b196a1e11ffdc520091b43b0 diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index b3fb583796..889a5ce626 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -198,13 +198,13 @@ class ITopLevelAccelerationStructure : public AccelerationStructure template struct StaticInstance final { - hlsl::float32_t3x4 transform = hlsl::float32_t3x4_identity; + hlsl::float32_t3x4 transform = hlsl::diagonal(1.0f); Instance base = {}; }; template struct MatrixMotionInstance final { - hlsl::float32_t3x4 transform[2] = { hlsl::float32_t3x4_identity, hlsl::float32_t3x4_identity }; + hlsl::float32_t3x4 transform[2] = { hlsl::diagonal(1.0f), hlsl::diagonal(1.0f) }; Instance base = {}; }; struct SRT diff --git a/include/nbl/asset/utils/IMeshManipulator.h b/include/nbl/asset/utils/IMeshManipulator.h index 0a5bf12313..3c2fc6ccf4 100644 --- a/include/nbl/asset/utils/IMeshManipulator.h +++ b/include/nbl/asset/utils/IMeshManipulator.h @@ -411,7 +411,8 @@ class NBL_API2 IMeshManipulator : public virtual core::IReferenceCounted if (jointIDFLT_MIN) { - core::vectorSIMDf boneSpacePos = hlsl::transformVector(hlsl::getMatrix3x4As4x4(inverseBindPoses[jointID]), pos); + const hlsl::float32_t4x4 transformationMatrix = hlsl::getMatrix3x4As4x4(inverseBindPoses[jointID]); + core::vectorSIMDf boneSpacePos = hlsl::transformVector(transformationMatrix, pos); jointAABBs[jointID].addInternalPoint(boneSpacePos.getAsVector3df()); noJointInfluence = false; } diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index 29660b8b45..943004045c 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -4,7 +4,6 @@ #ifndef _NBL_BUILTIN_HLSL_CONCEPTS_INCLUDED_ #define _NBL_BUILTIN_HLSL_CONCEPTS_INCLUDED_ - #include #include #include diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index cc89f9d003..b04bd6c7e0 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -14,6 +14,7 @@ struct matrix final : private glm::mat using Base = glm::mat; using Base::Base; using Base::operator[]; + //using type = matrix; // For assigning to same dimension use implicit ctor, and even then only allow for dimension truncation template requires ((X!=N || Y!=M) && X>=N && Y>=M) diff --git a/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl new file mode 100644 index 0000000000..36bcd944c6 --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl @@ -0,0 +1,12 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ + +#ifdef __HLSL_VERSION +#define NBL_UNROLL [unroll] +#define NBL_UNROLL_LIMITED(LIMIT) [unroll(LIMIT)] +#else +#define NBL_UNROLL // can't be bothered / TODO +#define NBL_UNROLL_LIMITED(LIMIT) +#endif + +#endif diff --git a/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl index 5a40089c86..28142af3ac 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl @@ -2,11 +2,6 @@ #define _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_TRAITS_INCLUDED_ #include -#include -// TODO: remove this header when deleting vectorSIMDf.hlsl -#include -#include "vectorSIMD.h" -#include namespace nbl { @@ -15,14 +10,33 @@ namespace hlsl template struct matrix_traits; -// partial spec for matrices -template -struct matrix_traits > + +// i choose to implement it this way because of this DXC bug: https://github.com/microsoft/DirectXShaderCompiler/issues/7007 +#define DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(ROW_COUNT, COLUMN_COUNT) \ +template \ +struct matrix_traits > \ +{ \ + using ComponentType = T; \ + NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = ROW_COUNT; \ + NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = COLUMN_COUNT; \ + NBL_CONSTEXPR_STATIC_INLINE bool Square = RowCount == ColumnCount; \ +}; + +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(2, 2) +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 3) +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(4, 4) +DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 4) + +// TODO: when this bug: https://github.com/microsoft/DirectXShaderCompiler/issues/7007 is fixed, uncomment and delete template specializations +/*template +struct matrix_traits > { - NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = N; - NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = M; - NBL_CONSTEXPR_STATIC_INLINE bool Square = N == M; + using ComponentType = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = ROW_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = COLUMN_COUNT; + NBL_CONSTEXPR_STATIC_INLINE bool Square = RowCount == ColumnCount; }; +*/ } } diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 8858ada6d2..d1a628ccc0 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -1,83 +1,49 @@ #ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #define _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ - -#include #include // TODO: remove this header when deleting vectorSIMDf.hlsl +#ifndef __HLSL_VERSION #include #include "vectorSIMD.h" -#include +#endif #include -#include +#include "nbl/builtin/hlsl/cpp_compat/unroll.hlsl" namespace nbl { namespace hlsl { - // goal: identity::value - // plan: diagonal - // identity - // - // partial spec: - // template - // identity - -namespace transfromation_matrix_utils_impl -{ -template -matrix diagonal(float diagonal) + +template +MatT diagonal(float diagonal = 1) { - using MatT = matrix; MatT output; - for (int i = 0; i < matrix_traits::RowCount; ++i) - for (int j = 0; j < matrix_traits::ColumnCount; ++j) + NBL_UNROLL_LIMITED(4) + for (uint32_t i = 0; i < matrix_traits::RowCount; ++i) + NBL_UNROLL_LIMITED(4) + for (uint32_t j = 0; j < matrix_traits::ColumnCount; ++j) output[i][j] = 0; - auto a = matrix_traits::RowCount; - auto b = matrix_traits::ColumnCount; - - for (int diag = 0; diag < matrix_traits::RowCount; ++diag) + NBL_UNROLL_LIMITED(4) + for (uint32_t diag = 0; diag < matrix_traits::RowCount; ++diag) output[diag][diag] = diagonal; return output; -}; } template -struct identity; - -template -struct identity > +MatT identity() { - static matrix get() - { - return transfromation_matrix_utils_impl::diagonal(1); - } -}; - -#define IDENTITY_MATRIX(TYPE, N, M)\ -const matrix TYPE ## N ## x ## M ## _identity = identity >::get(); - -#define DEFINE_IDENTITY_MATRICES(TYPE)\ -IDENTITY_MATRIX(TYPE, 2, 2)\ -IDENTITY_MATRIX(TYPE, 3, 3)\ -IDENTITY_MATRIX(TYPE, 4, 4)\ -IDENTITY_MATRIX(TYPE, 3, 4) - -DEFINE_IDENTITY_MATRICES(float32_t) -DEFINE_IDENTITY_MATRICES(float64_t) -DEFINE_IDENTITY_MATRICES(int32_t) -DEFINE_IDENTITY_MATRICES(int64_t) -DEFINE_IDENTITY_MATRICES(uint32_t) -DEFINE_IDENTITY_MATRICES(uint64_t) - -#undef DEFINE_IDENTITY_MATRICES -#undef IDENTITY_MATRIX + // TODO + // static_assert(MatT::Square); + return diagonal(1); +} // TODO: this is temporary function, delete when removing vectorSIMD +#ifndef __HLSL_VERSION template -inline core::vectorSIMDf transformVector(const matrix& mat, const core::vectorSIMDf& vec) +inline core::vectorSIMDf transformVector(NBL_CONST_REF_ARG(matrix) mat, NBL_CONST_REF_ARG(core::vectorSIMDf) vec) { core::vectorSIMDf output; float32_t4 tmp; @@ -89,9 +55,9 @@ inline core::vectorSIMDf transformVector(const matrix& mat, const core: return output; } - +#endif template -inline matrix getMatrix3x4As4x4(const matrix& mat) +inline matrix getMatrix3x4As4x4(NBL_CONST_REF_ARG(matrix) mat) { matrix output; for (int i = 0; i < 3; ++i) @@ -101,14 +67,14 @@ inline matrix getMatrix3x4As4x4(const matrix& mat) return output; } -template -inline matrix getSub3x3(const matrix& mat) +template +inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) { return matrix(mat); } -template -inline matrix getAs64BitPrecisionMatrix(const matrix& mat) +template +inline matrix getAs64BitPrecisionMatrix(NBL_CONST_REF_ARG(matrix) mat) { matrix output; for (int i = 0; i < N; ++i) @@ -119,8 +85,9 @@ inline matrix getAs64BitPrecisionMatrix(const matrix - inline T determinant_helper(const matrix& mat, vector& r1crossr2) + inline T determinant_helper(NBL_CONST_REF_ARG(matrix) mat, NBL_REF_ARG(vector) r1crossr2) { r1crossr2 = hlsl::cross(mat[1], mat[2]); return hlsl::dot(mat[0], r1crossr2); @@ -128,8 +95,8 @@ namespace transformation_matrix_utils_impl } //! returs adjugate of the cofactor (sub 3x3) matrix -template -inline matrix getSub3x3TransposeCofactors(const matrix& mat) +template +inline matrix getSub3x3TransposeCofactors(NBL_CONST_REF_ARG(matrix) mat) { static_assert(N >= 3 && M >= 3); @@ -146,15 +113,15 @@ inline matrix getSub3x3TransposeCofactors(const matrix& mat) return output; } -template -inline bool getSub3x3InverseTranspose(const matrix& matIn, matrix& matOut) +template +inline bool getSub3x3InverseTranspose(NBL_CONST_REF_ARG(matrix) matIn, NBL_CONST_REF_ARG(matrix) matOut) { matrix matIn3x3 = getSub3x3(matIn); vector r1crossr2; T d = transformation_matrix_utils_impl::determinant_helper(matIn3x3, r1crossr2); - if (core::iszero(d, FLT_MIN)) + if (abs(d) <= FLT_MIN) return false; - auto rcp = core::reciprocal(d); + auto rcp = T(1.0f)/d; // matrix of cofactors * 1/det matOut = getSub3x3TransposeCofactors(matIn3x3); @@ -168,28 +135,33 @@ inline bool getSub3x3InverseTranspose(const matrix& matIn, matrix -inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) +inline matrix concatenateBFollowedByA(NBL_CONST_REF_ARG(matrix) a, NBL_CONST_REF_ARG(const matrix) b) { - const matrix a4x4 = getMatrix3x4As4x4(a); - const matrix b4x4 = getMatrix3x4As4x4(b); + // TODO + // static_assert(N == 3 || N == 4); + + const matrix a4x4 = getMatrix3x4As4x4(a); + const matrix b4x4 = getMatrix3x4As4x4(b); return matrix(mul(a4x4, b4x4)); } -// TODO: why NBL_REF_ARG(MatType) doesn't work????? - -template -inline void setScale(matrix& outMat, NBL_CONST_REF_ARG(vector) scale) +template +inline void setScale(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) scale) { + // TODO + // static_assert(N == 3 || N == 4); + outMat[0][0] = scale[0]; outMat[1][1] = scale[1]; outMat[2][2] = scale[2]; } //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged -template -inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) +template +inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) { - static_assert(N == 3 || N == 4); + // TODO + //static_assert(N == 3 || N == 4); outMat[0] = vector( 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), @@ -214,10 +186,11 @@ inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(nbl::hlsl::qu ); } -template -inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector) translation) +template +inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) { - static_assert(N == 3 || N == 4); + // TODO + // static_assert(N == 3 || N == 4); outMat[0].w = translation.x; outMat[1].w = translation.y; diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index 2686c0a821..0a9f163208 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -104,7 +104,7 @@ class plane3dSIMDf : private vectorSIMDf static inline plane3dSIMDf transform(const plane3dSIMDf& in, const hlsl::float32_t3x4& mat) { hlsl::float32_t3x4 a = mat; - hlsl::float32_t4x4 inv = hlsl::getMatrix3x4As4x4(a); + hlsl::float32_t4x4 inv = hlsl::getMatrix3x4As4x4(a); hlsl::inverse(inv); vectorSIMDf normal(in.getNormal()); diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index ac065a8c97..a15b52cfca 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -697,8 +697,8 @@ CGeometryCreator::return_type CGeometryCreator::createDiskMesh(float radius, uin for (int i = 2; i < vertexCount-1; i++) { hlsl::float32_t3x4 rotMatrix; - hlsl::setRotation(rotMatrix, hlsl::quaternion::create(0.0f, 0.0f, core::radians((i - 1) * angle))); - core::vectorSIMDf vn = hlsl::transformVector(hlsl::getMatrix3x4As4x4(rotMatrix), v0); + hlsl::setRotation(rotMatrix, hlsl::quaternion::create(0.0f, 0.0f, core::radians((i - 1) * angle))); + core::vectorSIMDf vn = hlsl::transformVector(hlsl::getMatrix3x4As4x4(rotMatrix), v0); ptr[i] = DiskVertex(vn, video::SColor(0xFFFFFFFFu), core::vector2du32_SIMD(0u, 1u), core::vector3df_SIMD(0.0f, 0.0f, 1.0f)); diff --git a/src/nbl/asset/utils/CMeshManipulator.cpp b/src/nbl/asset/utils/CMeshManipulator.cpp index a76b1ed9d8..ed1e37cbdd 100644 --- a/src/nbl/asset/utils/CMeshManipulator.cpp +++ b/src/nbl/asset/utils/CMeshManipulator.cpp @@ -9,9 +9,7 @@ #include #include - #include "nbl/asset/asset.h" -#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" #include "nbl/asset/IRenderpassIndependentPipeline.h" #include "nbl/asset/utils/CMeshManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" @@ -1708,20 +1706,20 @@ hlsl::float32_t3x4 IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuff float ABBQuality = ABBDiff.x * ABBDiff.y + ABBDiff.y * ABBDiff.z + ABBDiff.z * ABBDiff.x; hlsl::float32_t3x4 scaleMat; hlsl::float32_t3x4 translationMat; - hlsl::setTranslation(translationMat, core::getAsVec3(-(MinPoint) / OBBDiff)); - hlsl::setScale(scaleMat, core::getAsVec3(OBBDiff)); - TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); - TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); + hlsl::setTranslation(translationMat, core::getAsVec3(-(MinPoint) / OBBDiff)); + hlsl::setScale(scaleMat, core::getAsVec3(OBBDiff)); + TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); + TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); if (ABBQuality < OBBQuality) { - hlsl::setTranslation(translationMat, core::getAsVec3(-(AABBMin) / ABBDiff)); - hlsl::setScale(scaleMat, core::getAsVec3(ABBDiff)); + hlsl::setTranslation(translationMat, core::getAsVec3(-(AABBMin) / ABBDiff)); + hlsl::setScale(scaleMat, core::getAsVec3(ABBDiff)); TransMat = hlsl::float32_t3x4( 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0); - TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); - TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); + TransMat = hlsl::concatenateBFollowedByA(TransMat, scaleMat); + TransMat = hlsl::concatenateBFollowedByA(TransMat, translationMat); } return TransMat;