diff --git a/67_RayQueryGeometry/app_resources/common.hlsl b/67_RayQueryGeometry/app_resources/common.hlsl index e39e7192b..9110cd4a1 100644 --- a/67_RayQueryGeometry/app_resources/common.hlsl +++ b/67_RayQueryGeometry/app_resources/common.hlsl @@ -10,8 +10,9 @@ struct SGeomInfo { uint64_t vertexBufferAddress; uint64_t indexBufferAddress; + uint64_t normalBufferAddress; - uint32_t vertexStride : 29; + uint32_t objType : 29; uint32_t indexType : 2; // 16 bit, 32 bit or none uint32_t smoothNormals : 1; // flat for cube, rectangle, disk uint32_t padding; @@ -35,8 +36,6 @@ enum ObjectType : uint32_t // matches c++ OT_SPHERE, OT_CYLINDER, OT_RECTANGLE, - OT_DISK, - OT_ARROW, OT_CONE, OT_ICOSPHERE, diff --git a/67_RayQueryGeometry/app_resources/render.comp.hlsl b/67_RayQueryGeometry/app_resources/render.comp.hlsl index 657d0bbf0..937273767 100644 --- a/67_RayQueryGeometry/app_resources/render.comp.hlsl +++ b/67_RayQueryGeometry/app_resources/render.comp.hlsl @@ -28,10 +28,11 @@ float3 unpackNormals3x10(uint32_t v) float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bary) { const uint indexType = geom.indexType; - const uint vertexStride = geom.vertexStride; + const uint objType = geom.objType; const uint64_t vertexBufferAddress = geom.vertexBufferAddress; const uint64_t indexBufferAddress = geom.indexBufferAddress; + const uint64_t normalBufferAddress = geom.normalBufferAddress; uint32_t3 indices; switch (indexType) @@ -51,42 +52,30 @@ float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bar } float3 n0, n1, n2; - switch (instID) + switch (objType) { case OT_CUBE: - { - // TODO: document why the alignment is 2 here and nowhere else? isnt the `vertexStride` aligned to more than 2 anyway? - uint32_t v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * vertexStride, 2u); - uint32_t v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * vertexStride, 2u); - uint32_t v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * vertexStride, 2u); - - n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); - n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); - n2 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v2).xyz); - } - break; case OT_SPHERE: + case OT_RECTANGLE: case OT_CYLINDER: - case OT_ARROW: + //case OT_ARROW: case OT_CONE: { - uint32_t v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * vertexStride); - uint32_t v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * vertexStride); - uint32_t v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * vertexStride); + uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4); + uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4); + uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4); - n0 = normalize(unpackNormals3x10(v0)); - n1 = normalize(unpackNormals3x10(v1)); - n2 = normalize(unpackNormals3x10(v2)); + n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); + n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); + n2 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v2).xyz); } break; - case OT_RECTANGLE: - case OT_DISK: case OT_ICOSPHERE: default: { - n0 = normalize(vk::RawBufferLoad(vertexBufferAddress + indices[0] * vertexStride)); - n1 = normalize(vk::RawBufferLoad(vertexBufferAddress + indices[1] * vertexStride)); - n2 = normalize(vk::RawBufferLoad(vertexBufferAddress + indices[2] * vertexStride)); + n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12)); + n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12)); + n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12)); } } diff --git a/67_RayQueryGeometry/include/common.hpp b/67_RayQueryGeometry/include/common.hpp index bcf896f55..b1759e9e3 100644 --- a/67_RayQueryGeometry/include/common.hpp +++ b/67_RayQueryGeometry/include/common.hpp @@ -15,4 +15,70 @@ using namespace nbl::examples; #include "app_resources/common.hlsl" +namespace nbl::scene +{ +enum ObjectType : uint8_t +{ + OT_CUBE, + OT_SPHERE, + OT_CYLINDER, + OT_RECTANGLE, + OT_CONE, + OT_ICOSPHERE, + + OT_COUNT, + OT_UNKNOWN = std::numeric_limits::max() +}; + +static constexpr uint32_t s_smoothNormals[OT_COUNT] = { 0, 1, 1, 0, 1, 1 }; + +struct ObjectMeta +{ + ObjectType type = OT_UNKNOWN; + std::string_view name = "Unknown"; +}; + +struct ObjectDrawHookCpu +{ + nbl::core::matrix3x4SIMD model; + ObjectMeta meta; +}; + +enum GeometryShader +{ + GP_BASIC = 0, + GP_CONE, + GP_ICO, + + GP_COUNT +}; + +struct ReferenceObjectCpu +{ + ObjectMeta meta; + core::matrix3x4SIMD transform; + core::smart_refctd_ptr data; +}; + +struct ReferenceObjectGpu +{ + struct Bindings + { + nbl::asset::SBufferBinding vertex, index; + }; + + ObjectMeta meta; + Bindings bindings; + uint32_t vertexStride; + nbl::asset::E_INDEX_TYPE indexType = nbl::asset::E_INDEX_TYPE::EIT_UNKNOWN; + uint32_t indexCount = {}; + + const bool useIndex() const + { + return bindings.index.buffer && (indexType != E_INDEX_TYPE::EIT_UNKNOWN); + } +}; +} + + #endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index 495f3a3e2..76a4819e0 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -3,10 +3,10 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" -class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoAssetManagerAndBuiltinResourceApplication +class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { using device_base_t = SimpleWindowedApplication; - using asset_base_t = MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; @@ -279,11 +279,11 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS } }; imageBarriers[0].image = outHDRImage.get(); @@ -319,11 +319,11 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[2]; imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT } }; imageBarriers[0].image = outHDRImage.get(); @@ -338,11 +338,11 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA imageBarriers[0].newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; imageBarriers[1].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT } }; imageBarriers[1].image = m_surface->getSwapchainResources()->getImage(m_currentImageAcquire.imageIndex); @@ -384,11 +384,11 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::NONE, - .dstAccessMask = ACCESS_FLAGS::NONE + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::NONE, + .dstAccessMask = ACCESS_FLAGS::NONE } }; imageBarriers[0].image = m_surface->getSwapchainResources()->getImage(m_currentImageAcquire.imageIndex); @@ -486,62 +486,48 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA smart_refctd_ptr createAccelerationStructureDS(video::CThreadSafeQueueAdapter* queue) { - // get geometries in ICPUBuffers -#if 1 - return nullptr; -#else - std::array objectsCpu; - objectsCpu[OT_CUBE] = ReferenceObjectCpu{ .meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .shadersType = GP_BASIC, .data = gc->createCubeMesh(nbl::core::vector3df(1.f, 1.f, 1.f)) }; - objectsCpu[OT_SPHERE] = ReferenceObjectCpu{ .meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .shadersType = GP_BASIC, .data = gc->createSphereMesh(2, 16, 16) }; - objectsCpu[OT_CYLINDER] = ReferenceObjectCpu{ .meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .shadersType = GP_BASIC, .data = gc->createCylinderMesh(2, 2, 20) }; - objectsCpu[OT_RECTANGLE] = ReferenceObjectCpu{ .meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .shadersType = GP_BASIC, .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)) }; - objectsCpu[OT_DISK] = ReferenceObjectCpu{ .meta = {.type = OT_DISK, .name = "Disk Mesh" }, .shadersType = GP_BASIC, .data = gc->createDiskMesh(2, 30) }; - objectsCpu[OT_ARROW] = ReferenceObjectCpu{ .meta = {.type = OT_ARROW, .name = "Arrow Mesh" }, .shadersType = GP_BASIC, .data = gc->createArrowMesh() }; - objectsCpu[OT_CONE] = ReferenceObjectCpu{ .meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }; - objectsCpu[OT_ICOSPHERE] = ReferenceObjectCpu{ .meta = {.type = OT_ICOSPHERE, .name = "Icosphere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) }; - - auto geomInfoBuffer = ICPUBuffer::create({ OT_COUNT * sizeof(SGeomInfo) }); + using namespace nbl::scene; + + // triangles geometries + auto gc = make_smart_refctd_ptr(); + + auto transform_i = 0; + auto nextTransform = [&transform_i]() + { + core::matrix3x4SIMD transform; + transform.setTranslation(nbl::core::vectorSIMDf(5.f * transform_i, 0, 0, 0)); + transform_i++; + return transform; + }; + + std::vector cpuObjects; + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .transform = nextTransform(), .data = gc->createCube({1.f, 1.f, 1.f})}); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .transform = nextTransform(), .data = gc->createSphere(2, 16, 16)}); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .transform = nextTransform(), .data = gc->createCylinder(2, 2, 20)}); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .transform = nextTransform(), .data = gc->createRectangle({1.5, 3})}); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_CONE, .name = "Cone Mesh" }, .transform = nextTransform(), .data = gc->createCone(2, 3, 10)}); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_ICOSPHERE, .name = "Icosphere Mesh" }, .transform = nextTransform(), .data = gc->createIcoSphere(1, 3, true)}); + const auto arrowPolygons = gc->createArrow(); + const auto arrowTransform = nextTransform(); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_CYLINDER, .name = "Arrow Mesh" }, .transform = arrowTransform, .data = arrowPolygons[0]}); + cpuObjects.push_back(ReferenceObjectCpu{ .meta = {.type = OT_CONE, .name = "Arrow Mesh" }, .transform = arrowTransform, .data = arrowPolygons[1]}); + auto geomInfoBuffer = ICPUBuffer::create({ cpuObjects.size() * sizeof(SGeomInfo) }); SGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); - const uint32_t byteOffsets[OT_COUNT] = { 18, 24, 24, 20, 20, 24, 16, 12 }; // based on normals data position - const uint32_t smoothNormals[OT_COUNT] = { 0, 1, 1, 0, 0, 1, 1, 1 }; // get ICPUBuffers into ICPUBottomLevelAccelerationStructures - std::array, OT_COUNT> cpuBlas; + std::vector> cpuBlas(cpuObjects.size()); for (uint32_t i = 0; i < cpuBlas.size(); i++) { auto triangles = make_refctd_dynamic_array>>(1u); auto primitiveCounts = make_refctd_dynamic_array>(1u); auto& tri = triangles->front(); - auto& primCount = primitiveCounts->front(); - const auto& geom = objectsCpu[i]; - - const bool useIndex = geom.data.indexType != EIT_UNKNOWN; - const uint32_t vertexStride = geom.data.inputParams.bindings[0].stride; - const uint32_t numVertices = (geom.data.bindings[0].buffer->getSize()-geom.data.bindings[0].offset) / vertexStride; - if (useIndex) - primCount = geom.data.indexCount / 3; - else - primCount = numVertices / 3; + auto& primCount = primitiveCounts->front(); + primCount = cpuObjects[i].data->getPrimitiveCount(); - geomInfos[i].indexType = geom.data.indexType; - geomInfos[i].vertexStride = vertexStride; - geomInfos[i].smoothNormals = smoothNormals[i]; - - geom.data.bindings[0].buffer->setContentHash(geom.data.bindings[0].buffer->computeContentHash()); - tri.vertexData[0] = geom.data.bindings[0]; - if (useIndex) - { - geom.data.indexBuffer.buffer->setContentHash(geom.data.indexBuffer.buffer->computeContentHash()); - tri.indexData = geom.data.indexBuffer; - } - tri.maxVertex = numVertices - 1; - tri.vertexStride = vertexStride; - tri.vertexFormat = static_cast(geom.data.inputParams.attributes[0].format); - tri.indexType = geom.data.indexType; - tri.geometryFlags = IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; + tri = cpuObjects[i].data->exportForBLAS(); auto& blas = cpuBlas[i]; blas = make_smart_refctd_ptr(); @@ -556,7 +542,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA } // get ICPUBottomLevelAccelerationStructure into ICPUTopLevelAccelerationStructure - auto geomInstances = make_refctd_dynamic_array>(OT_COUNT); + auto geomInstances = make_refctd_dynamic_array>(cpuObjects.size()); { uint32_t i = 0; for (auto instance = geomInstances->begin(); instance != geomInstances->end(); instance++, i++) @@ -567,11 +553,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA inst.base.instanceCustomIndex = i; inst.base.instanceShaderBindingTableRecordOffset = 0; inst.base.mask = 0xFF; - - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(5.f * i, 0, 0, 0)); - inst.transform = transform; - + inst.transform = cpuObjects[i].transform; instance->instance = inst; } } @@ -638,29 +620,26 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA CAssetConverter::patch_t tlasPatch = {}; tlasPatch.compactAfterBuild = true; - std::array,OT_COUNT> tmpBLASPatches = {}; - std::array tmpBuffers; - std::array, OT_COUNT * 2u> tmpBufferPatches; + std::vector> tmpBLASPatches(cpuObjects.size()); + std::vector tmpGeometries(cpuObjects.size()); + std::vector> tmpGeometryPatches(cpuObjects.size()); { tmpBLASPatches.front().compactAfterBuild = true; std::fill(tmpBLASPatches.begin(),tmpBLASPatches.end(),tmpBLASPatches.front()); // - for (uint32_t i = 0; i < objectsCpu.size(); i++) + for (uint32_t i = 0; i < cpuObjects.size(); i++) { - tmpBuffers[2 * i + 0] = cpuBlas[i]->getTriangleGeometries().front().vertexData[0].buffer.get(); - tmpBuffers[2 * i + 1] = cpuBlas[i]->getTriangleGeometries().front().indexData.buffer.get(); + tmpGeometries[i] = cpuObjects[i].data.get(); + tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; } - // make sure all buffers are BDA-readable - for (auto& patch : tmpBufferPatches) - patch.usage |= asset::IBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; std::get>(inputs.assets) = {&descriptorSet.get(),1}; std::get>(inputs.assets) = {&cpuTlas.get(),1}; std::get>(inputs.patches) = {&tlasPatch,1}; std::get>(inputs.assets) = {&cpuBlas.data()->get(),cpuBlas.size()}; std::get>(inputs.patches) = tmpBLASPatches; - std::get>(inputs.assets) = tmpBuffers; - std::get>(inputs.patches) = tmpBufferPatches; + std::get>(inputs.assets) = tmpGeometries; + std::get>(inputs.patches) = tmpGeometryPatches; } auto reservation = converter->reserve(inputs); @@ -783,18 +762,37 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA return {}; } + auto&& tlases = reservation.getGPUObjects(); + m_gpuTlas = tlases[0].value; + + auto&& gpuPolygonGeometries = reservation.getGPUObjects(); + m_gpuPolygons.resize(gpuPolygonGeometries.size()); + // assign gpu objects to output - for (const auto& buffer : reservation.getGPUObjects()) - retainedBuffers.push_back(buffer.value); - for (uint32_t i = 0; i < objectsCpu.size(); i++) + for (uint32_t i = 0; i < gpuPolygonGeometries.size(); i++) { - auto vBuffer = retainedBuffers[2 * i + 0].get(); - auto iBuffer = retainedBuffers[2 * i + 1].get(); - const auto& geom = objectsCpu[i]; - const bool useIndex = geom.data.indexType != EIT_UNKNOWN; + const auto& cpuObject = cpuObjects[i]; + const auto& gpuPolygon = gpuPolygonGeometries[i].value; + const auto gpuTriangles = gpuPolygon->exportForBLAS(); + + const auto& vertexBufferBinding = gpuTriangles.vertexData[0]; + const uint64_t vertexBufferAddress = vertexBufferBinding.buffer->getDeviceAddress() + vertexBufferBinding.offset; + + const auto& normalView = gpuPolygon->getNormalView(); + const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; + + const auto& indexBufferBinding = gpuTriangles.indexData; + auto& geomInfo = geomInfos[i]; + geomInfo = { + .vertexBufferAddress = vertexBufferAddress, + .indexBufferAddress = indexBufferBinding.buffer ? indexBufferBinding.buffer->getDeviceAddress() + indexBufferBinding.offset : vertexBufferAddress, + .normalBufferAddress = normalBufferAddress, + .objType = cpuObject.meta.type, + .indexType = gpuTriangles.indexType, + .smoothNormals = s_smoothNormals[cpuObject.meta.type], + }; - geomInfos[i].vertexBufferAddress = vBuffer->getDeviceAddress() + byteOffsets[i]; - geomInfos[i].indexBufferAddress = useIndex ? iBuffer->getDeviceAddress():0x0ull; + m_gpuPolygons[i] = gpuPolygon; } } @@ -802,7 +800,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA { IGPUBuffer::SCreationParams params; params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = OT_COUNT * sizeof(SGeomInfo); + params.size = cpuObjects.size() * sizeof(SGeomInfo); m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = gQueue }, std::move(params), geomInfos).move_into(geometryInfoBuffer); } @@ -892,7 +890,6 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA m_api->endCapture(); return reservation.getGPUObjects().front().value; -#endif } @@ -911,11 +908,13 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public MonoA video::CDumbPresentationOracle oracle; smart_refctd_ptr geometryInfoBuffer; - core::vector> retainedBuffers; smart_refctd_ptr outHDRImage; + core::vector> m_gpuPolygons; + smart_refctd_ptr m_gpuTlas; smart_refctd_ptr renderPipeline; smart_refctd_ptr renderDs; + }; NBL_MAIN_FUNC(RayQueryGeometryApp) \ No newline at end of file diff --git a/71_RayTracingPipeline/app_resources/common.hlsl b/71_RayTracingPipeline/app_resources/common.hlsl index 18b67085a..8f7a06a33 100644 --- a/71_RayTracingPipeline/app_resources/common.hlsl +++ b/71_RayTracingPipeline/app_resources/common.hlsl @@ -92,6 +92,7 @@ struct STriangleGeomInfo MaterialPacked material; uint64_t vertexBufferAddress; uint64_t indexBufferAddress; + uint64_t normalBufferAddress; uint32_t vertexStride : 26; uint32_t objType: 3; @@ -238,8 +239,6 @@ enum ObjectType : uint32_t // matches c++ OT_COUNT }; -static uint32_t s_offsetsToNormalBytes[OT_COUNT] = { 18, 24, 24, 20, 20, 24, 16, 12 }; // based on normals data position - float32_t3 computeDiffuse(Material mat, float32_t3 light_dir, float32_t3 normal) { float32_t dotNL = max(dot(normal, light_dir), 0.0); @@ -271,85 +270,6 @@ float3 unpackNormals3x10(uint32_t v) return clamp(float3(pn) / 511.0, -1.0, 1.0); } -float32_t3 fetchVertexNormal(int instID, int primID, STriangleGeomInfo geom, float2 bary) -{ - uint idxOffset = primID * 3; - - const uint indexType = geom.indexType; - const uint vertexStride = geom.vertexStride; - - const uint32_t objType = geom.objType; - const uint64_t indexBufferAddress = geom.indexBufferAddress; - - uint i0, i1, i2; - switch (indexType) - { - case 0: // EIT_16BIT - { - i0 = uint32_t(vk::RawBufferLoad < uint16_t > (indexBufferAddress + (idxOffset + 0) * sizeof(uint16_t), 2u)); - i1 = uint32_t(vk::RawBufferLoad < uint16_t > (indexBufferAddress + (idxOffset + 1) * sizeof(uint16_t), 2u)); - i2 = uint32_t(vk::RawBufferLoad < uint16_t > (indexBufferAddress + (idxOffset + 2) * sizeof(uint16_t), 2u)); - } - break; - case 1: // EIT_32BIT - { - i0 = vk::RawBufferLoad < uint32_t > (indexBufferAddress + (idxOffset + 0) * sizeof(uint32_t)); - i1 = vk::RawBufferLoad < uint32_t > (indexBufferAddress + (idxOffset + 1) * sizeof(uint32_t)); - i2 = vk::RawBufferLoad < uint32_t > (indexBufferAddress + (idxOffset + 2) * sizeof(uint32_t)); - } - break; - default: // EIT_NONE - { - i0 = idxOffset; - i1 = idxOffset + 1; - i2 = idxOffset + 2; - } - } - - const uint64_t normalVertexBufferAddress = geom.vertexBufferAddress + s_offsetsToNormalBytes[objType]; - float3 n0, n1, n2; - switch (objType) - { - case OT_CUBE: - { - uint32_t v0 = vk::RawBufferLoad < uint32_t > (normalVertexBufferAddress + i0 * vertexStride, 2u); - uint32_t v1 = vk::RawBufferLoad < uint32_t > (normalVertexBufferAddress + i1 * vertexStride, 2u); - uint32_t v2 = vk::RawBufferLoad < uint32_t > (normalVertexBufferAddress + i2 * vertexStride, 2u); - - n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); - n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); - n2 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v2).xyz); - } - break; - case OT_SPHERE: - case OT_CYLINDER: - case OT_ARROW: - case OT_CONE: - { - uint32_t v0 = vk::RawBufferLoad < uint32_t > (normalVertexBufferAddress + i0 * vertexStride); - uint32_t v1 = vk::RawBufferLoad < uint32_t > (normalVertexBufferAddress + i1 * vertexStride); - uint32_t v2 = vk::RawBufferLoad < uint32_t > (normalVertexBufferAddress + i2 * vertexStride); - - n0 = normalize(unpackNormals3x10(v0)); - n1 = normalize(unpackNormals3x10(v1)); - n2 = normalize(unpackNormals3x10(v2)); - } - break; - case OT_RECTANGLE: - case OT_DISK: - case OT_ICOSPHERE: - default: - { - n0 = vk::RawBufferLoad < float3 > (normalVertexBufferAddress + i0 * vertexStride); - n1 = vk::RawBufferLoad < float3 > (normalVertexBufferAddress + i1 * vertexStride); - n2 = vk::RawBufferLoad < float3 > (normalVertexBufferAddress + i2 * vertexStride); - } - } - - float3 barycentrics = float3(0.0, bary); - barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; - return normalize(barycentrics.x * n0 + barycentrics.y * n1 + barycentrics.z * n2); -} #endif namespace nbl diff --git a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl index cf68e52eb..b513d5958 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl @@ -2,6 +2,77 @@ [[vk::push_constant]] SPushConstants pc; +float32_t3 fetchVertexNormal(int instID, int primID, STriangleGeomInfo geom, float2 bary) +{ + uint idxOffset = primID * 3; + + const uint indexType = geom.indexType; + const uint vertexStride = geom.vertexStride; + + const uint32_t objType = geom.objType; + const uint64_t indexBufferAddress = geom.indexBufferAddress; + + uint i0, i1, i2; + switch (indexType) + { + case 0: // EIT_16BIT + { + i0 = uint32_t(vk::RawBufferLoad < uint16_t > (indexBufferAddress + (idxOffset + 0) * sizeof(uint16_t), 2u)); + i1 = uint32_t(vk::RawBufferLoad < uint16_t > (indexBufferAddress + (idxOffset + 1) * sizeof(uint16_t), 2u)); + i2 = uint32_t(vk::RawBufferLoad < uint16_t > (indexBufferAddress + (idxOffset + 2) * sizeof(uint16_t), 2u)); + } + break; + case 1: // EIT_32BIT + { + i0 = vk::RawBufferLoad < uint32_t > (indexBufferAddress + (idxOffset + 0) * sizeof(uint32_t)); + i1 = vk::RawBufferLoad < uint32_t > (indexBufferAddress + (idxOffset + 1) * sizeof(uint32_t)); + i2 = vk::RawBufferLoad < uint32_t > (indexBufferAddress + (idxOffset + 2) * sizeof(uint32_t)); + } + break; + default: // EIT_NONE + { + i0 = idxOffset; + i1 = idxOffset + 1; + i2 = idxOffset + 2; + } + } + + const uint64_t normalVertexBufferAddress = geom.normalBufferAddress; + float3 n0, n1, n2; + + float3 n0, n1, n2; + switch (objType) + { + case OT_CUBE: + case OT_SPHERE: + case OT_RECTANGLE: + case OT_CYLINDER: + //case OT_ARROW: + case OT_CONE: + { + // TODO: document why the alignment is 2 here and nowhere else? isnt the `vertexStride` aligned to more than 2 anyway? + uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + i0 * 4); + uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + i1 * 4); + uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + i2 * 4); + + n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); + n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); + n2 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v2).xyz); + } + break; + case OT_ICOSPHERE: + default: + { + n0 = normalize(vk::RawBufferLoad(normalBufferAddress + i0 * 12)); + n1 = normalize(vk::RawBufferLoad(normalBufferAddress + i1 * 12)); + n2 = normalize(vk::RawBufferLoad(normalBufferAddress + i2 * 12)); + } + } + + float3 barycentrics = float3(0.0, bary); + barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; + return normalize(barycentrics.x * n0 + barycentrics.y * n1 + barycentrics.z * n2); +} [shader("closesthit")] void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs) diff --git a/71_RayTracingPipeline/include/common.hpp b/71_RayTracingPipeline/include/common.hpp index 184d424c7..479b7fff6 100644 --- a/71_RayTracingPipeline/include/common.hpp +++ b/71_RayTracingPipeline/include/common.hpp @@ -45,40 +45,15 @@ struct ObjectMeta std::string_view name = "Unknown"; }; -struct ObjectDrawHookCpu -{ - nbl::core::matrix3x4SIMD model; - ObjectMeta meta; -}; - struct ReferenceObjectCpu { ObjectMeta meta; core::smart_refctd_ptr data; Material material; core::matrix3x4SIMD transform; -}; -struct ReferenceObjectGpu -{ - struct Bindings - { - nbl::asset::SBufferBinding vertex, index; - }; - - ObjectMeta meta; - Bindings bindings; - uint32_t vertexStride; - nbl::asset::E_INDEX_TYPE indexType = nbl::asset::E_INDEX_TYPE::EIT_UNKNOWN; - uint32_t indexCount = {}; - MaterialPacked material; - core::matrix3x4SIMD transform; - - const bool useIndex() const - { - return bindings.index.buffer && (indexType != E_INDEX_TYPE::EIT_UNKNOWN); - } }; + } #endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 382e5cccb..c47eea1c4 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -6,11 +6,13 @@ #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/indirect_commands.hlsl" +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" -class RaytracingPipelineApp final : public SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication + +class RaytracingPipelineApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { using device_base_t = SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; @@ -1220,7 +1222,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app } else { - auto triangles = make_refctd_dynamic_array>>(cpuObjects[i].data->exportForBLAS()); + auto triangles = make_refctd_dynamic_array>>(1u); auto primitiveCounts = make_refctd_dynamic_array>(1u); auto& tri = triangles->front(); @@ -1228,6 +1230,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app auto& primCount = primitiveCounts->front(); primCount = cpuObjects[i].data->getPrimitiveCount(); + tri = cpuObjects[i].data->exportForBLAS(); tri.geometryFlags = cpuObjects[i].material.isTransparent() ? IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::NO_DUPLICATE_ANY_HIT_INVOCATION_BIT : IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; @@ -1257,7 +1260,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app inst.base.blas = cpuBlasList[i]; inst.base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); inst.base.instanceCustomIndex = i; - inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0;; + inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0; inst.base.mask = 0xFF; inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; @@ -1305,19 +1308,22 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app inputs.allocator = &myalloc; std::array tmpTlas; - std::array tmpGeometries; std::array tmpBuffers; + std::array tmpGeometries; + std::array, std::size(cpuObjects)> tmpGeometryPatches; { tmpTlas[0] = cpuTlas.get(); tmpBuffers[0] = cpuProcBuffer.get(); for (uint32_t i = 0; i < cpuObjects.size(); i++) { tmpGeometries[i] = cpuObjects[i].data.get(); + tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; } std::get>(inputs.assets) = tmpTlas; std::get>(inputs.assets) = tmpBuffers; std::get>(inputs.assets) = tmpGeometries; + std::get>(inputs.patches) = tmpGeometryPatches; } auto reservation = converter->reserve(inputs); @@ -1346,6 +1352,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app prepass.template operator() < ICPUTopLevelAccelerationStructure > (tmpTlas); prepass.template operator() < ICPUBuffer > (tmpBuffers); + prepass.template operator() < ICPUPolygonGeometry > (tmpGeometries); } constexpr auto CompBufferCount = 2; @@ -1425,25 +1432,37 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app auto&& tlases = reservation.getGPUObjects(); m_gpuTlas = tlases[0].value; auto&& buffers = reservation.getGPUObjects(); + m_proceduralAabbBuffer = buffers[0].value; - m_proceduralAabbBuffer = buffers[2 * proceduralBlasIdx].value; + auto&& gpuPolygonGeometries = reservation.getGPUObjects(); + m_gpuPolygons.resize(gpuPolygonGeometries.size()); - for (uint32_t i = 0; i < cpuObjects.size(); i++) + for (uint32_t i = 0; i < gpuPolygonGeometries.size(); i++) { const auto& cpuObject = cpuObjects[i]; - const auto& cpuBlas = cpuBlasList[i]; - const auto& geometry = cpuBlas->getTriangleGeometries()[0]; - const uint64_t vertexBufferAddress = buffers[2 * i].value->getDeviceAddress(); - const uint64_t indexBufferAddress = buffers[(2 * i) + 1].value->getDeviceAddress(); - geomInfos[i] = { + const auto& gpuPolygon = gpuPolygonGeometries[i].value; + const auto gpuTriangles = gpuPolygon->exportForBLAS(); + + const auto& vertexBufferBinding = gpuTriangles.vertexData[0]; + const uint64_t vertexBufferAddress = vertexBufferBinding.buffer->getDeviceAddress() + vertexBufferBinding.offset; + + const auto& normalView = gpuPolygon->getNormalView(); + const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; + + const auto& indexBufferBinding = gpuTriangles.indexData; + auto& geomInfo = geomInfos[i]; + geomInfo = { .material = hlsl::_static_cast(cpuObject.material), .vertexBufferAddress = vertexBufferAddress, - .indexBufferAddress = geometry.indexData.buffer ? indexBufferAddress : vertexBufferAddress, - .vertexStride = geometry.vertexStride, + .indexBufferAddress = indexBufferBinding.buffer ? indexBufferBinding.buffer->getDeviceAddress() + indexBufferBinding.offset : vertexBufferAddress, + .normalBufferAddress = normalBufferAddress, + .vertexStride = gpuTriangles.vertexStride, .objType = cpuObject.meta.type, - .indexType = geometry.indexType, + .indexType = gpuTriangles.indexType, .smoothNormals = scene::s_smoothNormals[cpuObject.meta.type], }; + + m_gpuPolygons[i] = gpuPolygon; } } @@ -1508,6 +1527,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public app core::vector m_gpuIntersectionSpheres; uint32_t m_intersectionHitGroupIdx; + core::vector> m_gpuPolygons; smart_refctd_ptr m_gpuTlas; smart_refctd_ptr m_instanceBuffer; diff --git a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp index 2798cfed7..2993725a0 100644 --- a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp +++ b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp @@ -66,6 +66,10 @@ class CGeometryCreatorScene : public core::IReferenceCounted addGeometry("Cube",creator->createCube({1.f,1.f,1.f})); addGeometry("Rectangle",creator->createRectangle({1.5f,3.f})); addGeometry("Disk",creator->createDisk(2.f,30)); + addGeometry("Sphere", creator->createSphere(2, 16, 16)); + addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); + addGeometry("Cone", creator->createCone(2, 3, 10)); + addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); } init.geometries.reserve(init.geometryNames.size());