From 59a8ab0357687ef7593aecf54c99a270110ab314 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 13:39:44 +1100 Subject: [PATCH 01/20] Add rs_allocate_closure free function. Signed-off-by: Curtis Black --- src/include/OSL/rs_free_function.h | 6 + src/liboslexec/CMakeLists.txt | 3 +- src/liboslexec/llvm_gen.cpp | 22 ++- src/liboslexec/opclosure.cpp | 64 +++++++-- src/liboslexec/oslexec_pvt.h | 42 +----- src/liboslexec/rs_fallback.cpp | 12 ++ src/testrender/cuda/rend_lib.cu | 208 ----------------------------- src/testshade/rs_simplerend.cpp | 14 ++ testsuite/example-cuda/rend_lib.cu | 188 -------------------------- 9 files changed, 97 insertions(+), 462 deletions(-) diff --git a/src/include/OSL/rs_free_function.h b/src/include/OSL/rs_free_function.h index de9b3316f..a3e2b0336 100644 --- a/src/include/OSL/rs_free_function.h +++ b/src/include/OSL/rs_free_function.h @@ -316,6 +316,12 @@ OSL_RSOP OSL_HOSTDEVICE bool rs_trace_get(OSL::OpaqueExecContextPtr oec, OSL::ustringhash name, OSL::TypeDesc type, void* val, bool derivatives); +/// Allocates memory for a closure color. May return null if no memory could +/// be allocated. +OSL_RSOP OSL_HOSTDEVICE void* +rs_allocate_closure(OSL::OpaqueExecContextPtr oec, size_t size, + size_t alignment); + /// Report errors, warnings, printf, and fprintf. /// Fmtlib style format specifier is used (vs. printf style) /// Arguments are represented as EncodedTypes (encodedtypes.h) and diff --git a/src/liboslexec/CMakeLists.txt b/src/liboslexec/CMakeLists.txt index 57cb7d59b..986b6026f 100644 --- a/src/liboslexec/CMakeLists.txt +++ b/src/liboslexec/CMakeLists.txt @@ -194,7 +194,7 @@ if (USE_LLVM_BITCODE) EMBED_LLVM_BITCODE_IN_CPP ( "${llvm_ops_srcs}" "_host" "osl_llvm_compiled_ops" lib_src "" "${include_dirs}") set (rs_dependent_ops_srcs - opmatrix.cpp opfmt.cpp optexture.cpp pointcloud.cpp + opmatrix.cpp opfmt.cpp optexture.cpp pointcloud.cpp opclosure.cpp ) # Achieve the effect of absorbing osl_llvm_compiled_ops by adding its # sources to rs_dependent_ops_srcs which avoids having to do it at runtime. @@ -217,6 +217,7 @@ if (USE_LLVM_BITCODE) ${CMAKE_SOURCE_DIR}/src/liboslexec/opmatrix.cpp ${CMAKE_SOURCE_DIR}/src/liboslexec/optexture.cpp ${CMAKE_SOURCE_DIR}/src/liboslexec/pointcloud.cpp + ${CMAKE_SOURCE_DIR}/src/liboslexec/opclosure.cpp ${CMAKE_SOURCE_DIR}/src/liboslnoise/gabornoise.cpp ${CMAKE_SOURCE_DIR}/src/liboslnoise/simplexnoise.cpp ) diff --git a/src/liboslexec/llvm_gen.cpp b/src/liboslexec/llvm_gen.cpp index 2e7c4bb7e..cef146b2a 100644 --- a/src/liboslexec/llvm_gen.cpp +++ b/src/liboslexec/llvm_gen.cpp @@ -3850,19 +3850,15 @@ LLVMGEN(llvm_gen_closure) id_int, size_int); llvm::Value* comp_void_ptr = return_ptr; - // For the weighted closures, we need a surrounding "if" so that it's safe - // for osl_allocate_weighted_closure_component to return NULL (unless we - // know for sure that it's constant weighted and that the weight is - // not zero). - llvm::BasicBlock* next_block = NULL; - if (weighted && !(weight->is_constant() && !rop.is_zero(*weight))) { - llvm::BasicBlock* notnull_block = rop.ll.new_basic_block( - "non_null_closure"); - next_block = rop.ll.new_basic_block(""); - llvm::Value* cond = rop.ll.op_ne(return_ptr, rop.ll.void_ptr_null()); - rop.ll.op_branch(cond, notnull_block, next_block); - // new insert point is nonnull_block - } + // We need a surrounding "if" so that it's safe for closure allocation to + // return NULL, either because it has zero weight, or renderer services ran + // out of memory in the closure pool. + llvm::BasicBlock* notnull_block = rop.ll.new_basic_block( + "non_null_closure"); + llvm::BasicBlock* next_block = rop.ll.new_basic_block(""); + llvm::Value* cond = rop.ll.op_ne(return_ptr, rop.ll.void_ptr_null()); + rop.ll.op_branch(cond, notnull_block, next_block); + // new insert point is nonnull_block llvm::Value* comp_ptr = rop.ll.ptr_cast(comp_void_ptr, rop.llvm_type_closure_component_ptr()); diff --git a/src/liboslexec/opclosure.cpp b/src/liboslexec/opclosure.cpp index 6aa32a072..ff047037c 100644 --- a/src/liboslexec/opclosure.cpp +++ b/src/liboslexec/opclosure.cpp @@ -7,6 +7,7 @@ #include "oslexec_pvt.h" #include +#include OSL_NAMESPACE_BEGIN @@ -14,7 +15,7 @@ namespace pvt { -OSL_SHADEOP const ClosureColor* +OSL_SHADEOP OSL_HOSTDEVICE const ClosureColor* osl_add_closure_closure(ShaderGlobals* sg, const ClosureColor* a, const ClosureColor* b) { @@ -22,12 +23,19 @@ osl_add_closure_closure(ShaderGlobals* sg, const ClosureColor* a, return b; if (b == NULL) return a; - return sg->context->closure_add_allot(a, b); + ClosureAdd* add = (ClosureAdd*)rs_allocate_closure(sg, sizeof(ClosureAdd), + alignof(ClosureAdd)); + if (add) { + add->id = ClosureColor::ADD; + add->closureA = a; + add->closureB = b; + } + return add; } -OSL_SHADEOP const ClosureColor* -osl_mul_closure_color(ShaderGlobals* sg, ClosureColor* a, const Color3* w) +OSL_SHADEOP OSL_HOSTDEVICE const ClosureColor* +osl_mul_closure_color(ShaderGlobals* sg, const ClosureColor* a, const Color3* w) { if (a == NULL) return NULL; @@ -35,12 +43,19 @@ osl_mul_closure_color(ShaderGlobals* sg, ClosureColor* a, const Color3* w) return NULL; if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) return a; - return sg->context->closure_mul_allot(*w, a); + ClosureMul* mul = (ClosureMul*)rs_allocate_closure(sg, sizeof(ClosureMul), + alignof(ClosureMul)); + if (mul) { + mul->id = ClosureColor::MUL; + mul->weight = *w; + mul->closure = a; + } + return mul; } -OSL_SHADEOP const ClosureColor* -osl_mul_closure_float(ShaderGlobals* sg, ClosureColor* a, float w) +OSL_SHADEOP OSL_HOSTDEVICE const ClosureColor* +osl_mul_closure_float(ShaderGlobals* sg, const ClosureColor* a, float w) { if (a == NULL) return NULL; @@ -48,25 +63,50 @@ osl_mul_closure_float(ShaderGlobals* sg, ClosureColor* a, float w) return NULL; if (w == 1.0f) return a; - return sg->context->closure_mul_allot(w, a); + ClosureMul* mul = (ClosureMul*)rs_allocate_closure(sg, sizeof(ClosureMul), + alignof(ClosureMul)); + if (mul) { + mul->id = ClosureColor::MUL; + mul->weight = Color3(w); + mul->closure = a; + } + return mul; } -OSL_SHADEOP ClosureComponent* +OSL_SHADEOP OSL_HOSTDEVICE ClosureComponent* osl_allocate_closure_component(ShaderGlobals* sg, int id, int size) { - return sg->context->closure_component_allot(id, size, Color3(1.0f)); + // Allocate the component and the mul back to back + const size_t needed = sizeof(ClosureComponent) + size; + ClosureComponent* comp + = (ClosureComponent*)rs_allocate_closure(sg, needed, + alignof(ClosureComponent)); + if (comp) { + comp->id = id; + comp->w = Color3(1.0f); + } + return comp; } -OSL_SHADEOP ClosureColor* +OSL_SHADEOP OSL_HOSTDEVICE ClosureColor* osl_allocate_weighted_closure_component(ShaderGlobals* sg, int id, int size, const Color3* w) { if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; - return sg->context->closure_component_allot(id, size, *w); + // Allocate the component and the mul back to back + const size_t needed = sizeof(ClosureComponent) + size; + ClosureComponent* comp + = (ClosureComponent*)rs_allocate_closure(sg, needed, + alignof(ClosureComponent)); + if (comp) { + comp->id = id; + comp->w = *w; + } + return comp; } // Deprecated, remove when conversion from ustring to ustringhash is finished diff --git a/src/liboslexec/oslexec_pvt.h b/src/liboslexec/oslexec_pvt.h index fa2a95628..cc6c1c47a 100644 --- a/src/liboslexec/oslexec_pvt.h +++ b/src/liboslexec/oslexec_pvt.h @@ -2242,49 +2242,11 @@ class OSLEXECPUBLIC ShadingContext { } #endif - ClosureComponent* closure_component_allot(int id, size_t prim_size, - const Color3& w) + void* allocate_closure(size_t size, size_t alignment) { - // Allocate the component and the mul back to back - size_t needed = sizeof(ClosureComponent) + prim_size; - ClosureComponent* comp = (ClosureComponent*)m_closure_pool.alloc( - needed, alignof(ClosureComponent)); - comp->id = id; - comp->w = w; - return comp; + return m_closure_pool.alloc(size, alignment); } - ClosureMul* closure_mul_allot(const Color3& w, const ClosureColor* c) - { - ClosureMul* mul = (ClosureMul*)m_closure_pool.alloc(sizeof(ClosureMul), - alignof(ClosureMul)); - mul->id = ClosureColor::MUL; - mul->weight = w; - mul->closure = c; - return mul; - } - - ClosureMul* closure_mul_allot(float w, const ClosureColor* c) - { - ClosureMul* mul = (ClosureMul*)m_closure_pool.alloc(sizeof(ClosureMul), - alignof(ClosureMul)); - mul->id = ClosureColor::MUL; - mul->weight.setValue(w, w, w); - mul->closure = c; - return mul; - } - - ClosureAdd* closure_add_allot(const ClosureColor* a, const ClosureColor* b) - { - ClosureAdd* add = (ClosureAdd*)m_closure_pool.alloc(sizeof(ClosureAdd), - alignof(ClosureAdd)); - add->id = ClosureColor::ADD; - add->closureA = a; - add->closureB = b; - return add; - } - - /// Find the named symbol in the (already-executed!) stack of shaders of /// the given use. If a layer is given, search just that layer. If no /// layer is specified, priority is given to later laters over earlier diff --git a/src/liboslexec/rs_fallback.cpp b/src/liboslexec/rs_fallback.cpp index 374a8c365..2aeefdebd 100644 --- a/src/liboslexec/rs_fallback.cpp +++ b/src/liboslexec/rs_fallback.cpp @@ -316,6 +316,18 @@ rs_trace_get(OSL::OpaqueExecContextPtr exec_ctx, OSL::ustringhash name, #endif } +OSL_RSOP OSL_HOSTDEVICE void* +rs_allocate_closure(OSL::OpaqueExecContextPtr exec_ctx, size_t size, + size_t alignment) +{ +#ifndef __CUDA_ARCH__ + auto sg = get_sg(exec_ctx); + return sg->context->allocate_closure(size, alignment); +#else + return nullptr; +#endif +} + OSL_RSOP OSL_HOSTDEVICE void rs_errorfmt(OSL::OpaqueExecContextPtr exec_ctx, OSL::ustringhash fmt_specification, int32_t count, diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index 4b36ab6be..d44b68b99 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -30,17 +30,6 @@ extern __device__ CUdeviceptr xform_buffer; } // namespace pvt OSL_NAMESPACE_END - -// Taken from the SimplePool class -__device__ static inline size_t -alignment_offset_calc(void* ptr, size_t alignment) -{ - uintptr_t ptrbits = reinterpret_cast(ptr); - uintptr_t offset = ((ptrbits + alignment - 1) & -alignment) - ptrbits; - return offset; -} - - // These functions are declared extern to prevent name mangling. extern "C" { @@ -50,203 +39,6 @@ __direct_callable__dummy_rend_lib() { } - -__device__ void* -closure_component_allot(void* pool, int id, size_t prim_size, - const OSL::Color3& w) -{ - ((OSL::ClosureComponent*)pool)->id = id; - ((OSL::ClosureComponent*)pool)->w = w; - - size_t needed = (sizeof(OSL::ClosureComponent) + prim_size - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return (void*)&char_ptr[needed]; -} - - -__device__ void* -closure_mul_allot(void* pool, const OSL::Color3& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_mul_float_allot(void* pool, const float& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight.x = w; - ((OSL::ClosureMul*)pool)->weight.y = w; - ((OSL::ClosureMul*)pool)->weight.z = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) -{ - ((OSL::ClosureAdd*)pool)->id = OSL::ClosureColor::ADD; - ((OSL::ClosureAdd*)pool)->closureA = a; - ((OSL::ClosureAdd*)pool)->closureB = b; - - size_t needed = (sizeof(OSL::ClosureAdd) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -osl_allocate_closure_component(void* sg_, int id, int size) -{ - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - OSL::Color3 w = OSL::Color3(1, 1, 1); - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - - size = max(4, size); - - sg_ptr->renderstate = closure_component_allot(ret, id, size, w); - - return ret; -} - - - -__device__ void* -osl_allocate_weighted_closure_component(void* sg_, int id, int size, - const void* w) -{ - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - const OSL::Color3* wc - = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); - - if (wc->x == 0.0f && wc->y == 0.0f && wc->z == 0.0f) { - return NULL; - } - - size = max(4, size); - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_component_allot(ret, id, size, *wc); - - return ret; -} - - - -__device__ void* -osl_mul_closure_color(void* sg_, void* a, const void* w) -{ - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - const OSL::Color3* wc - = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); - - if (a == NULL) { - return NULL; - } - - if (wc->x == 0.0f && wc->y == 0.0f && wc->z == 0.0f) { - return NULL; - } - - if (wc->x == 1.0f && wc->y == 1.0f && wc->z == 1.0f) { - return a; - } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_allot(ret, *wc, (OSL::ClosureColor*)a); - - return ret; -} - - - -__device__ void* -osl_mul_closure_float(void* sg_, void* a, float w) -{ - a = __builtin_assume_aligned(a, alignof(float)); - - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - if (a == NULL || w == 0.0f) { - return NULL; - } - - if (w == 1.0f) { - return a; - } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_float_allot(ret, w, - (OSL::ClosureColor*)a); - - return ret; -} - - - -__device__ void* -osl_add_closure_closure(void* sg_, void* a, void* b) -{ - a = __builtin_assume_aligned(a, alignof(float)); - b = __builtin_assume_aligned(b, alignof(float)); - - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - if (a == NULL) { - return b; - } - - if (b == NULL) { - return a; - } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_add_allot(ret, (OSL::ClosureColor*)a, - (OSL::ClosureColor*)b); - - return ret; -} - - #define IS_STRING(type) (type.basetype == OSL::TypeDesc::STRING) #define IS_PTR(type) (type.basetype == OSL::TypeDesc::PTR) #define IS_COLOR(type) (type.vecsemantics == OSL::TypeDesc::COLOR) diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index 13c3ce45c..6fa866c24 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -368,6 +368,20 @@ rs_trace_get(OSL::OpaqueExecContextPtr ec, OSL::ustringhash name, #endif } +OSL_RSOP OSL_HOSTDEVICE void* +rs_allocate_closure(OSL::OpaqueExecContextPtr ec, size_t size, size_t alignment) +{ + auto sg = (OSL::ShaderGlobals*)ec; +#ifndef __CUDA_ARCH__ + return sg->context->allocate_closure(size, alignment); +#else + uintptr_t ptr = OIIO::round_to_multiple_of_pow2((uintptr_t)sg.renderstate, + alignment); + sg.renderstate = (void*)(ptr + size); + return (void*)ptr; +#endif +} + OSL_RSOP OSL_HOSTDEVICE bool rs_get_attribute_constant_string(OSL::ustringhash value, void* result) { diff --git a/testsuite/example-cuda/rend_lib.cu b/testsuite/example-cuda/rend_lib.cu index 01e75bb37..bac46e936 100644 --- a/testsuite/example-cuda/rend_lib.cu +++ b/testsuite/example-cuda/rend_lib.cu @@ -14,197 +14,9 @@ extern __device__ char* s_color_system; } OSL_NAMESPACE_END -// Taken from the SimplePool class -__device__ static inline size_t -alignment_offset_calc(void* ptr, size_t alignment) -{ - uintptr_t ptrbits = reinterpret_cast(ptr); - uintptr_t offset = ((ptrbits + alignment - 1) & -alignment) - ptrbits; - return offset; -} - // These functions are declared extern to prevent name mangling. extern "C" { -__device__ void* -closure_component_allot(void* pool, int id, size_t prim_size, - const OSL::Color3& w) -{ - ((OSL::ClosureComponent*)pool)->id = id; - ((OSL::ClosureComponent*)pool)->w = w; - - size_t needed = (sizeof(OSL::ClosureComponent) - sizeof(void*) + prim_size - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return (void*)&char_ptr[needed]; -} - - -__device__ void* -closure_mul_allot(void* pool, const OSL::Color3& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_mul_float_allot(void* pool, const float& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight.x = w; - ((OSL::ClosureMul*)pool)->weight.y = w; - ((OSL::ClosureMul*)pool)->weight.z = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) -{ - ((OSL::ClosureAdd*)pool)->id = OSL::ClosureColor::ADD; - ((OSL::ClosureAdd*)pool)->closureA = a; - ((OSL::ClosureAdd*)pool)->closureB = b; - - size_t needed = (sizeof(OSL::ClosureAdd) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -osl_allocate_closure_component(void* sg_, int id, int size) -{ - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - OSL::Color3 w = OSL::Color3(1, 1, 1); - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - - size = max(4, size); - - sg_ptr->renderstate = closure_component_allot(ret, id, size, w); - - return ret; -} - - -__device__ void* -osl_allocate_weighted_closure_component(void* sg_, int id, int size, - const OSL::Color3* w) -{ - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) { - return NULL; - } - - size = max(4, size); - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_component_allot(ret, id, size, *w); - - return ret; -} - - -__device__ void* -osl_mul_closure_color(void* sg_, OSL::ClosureColor* a, const OSL::Color3* w) -{ - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - if (a == NULL) { - return NULL; - } - - if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) { - return NULL; - } - - if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) { - return a; - } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_allot(ret, *w, a); - - return ret; -} - - -__device__ void* -osl_mul_closure_float(void* sg_, OSL::ClosureColor* a, float w) -{ - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - if (a == NULL || w == 0.0f) { - return NULL; - } - - if (w == 1.0f) { - return a; - } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_float_allot(ret, w, a); - - return ret; -} - - -__device__ void* -osl_add_closure_closure(void* sg_, OSL::ClosureColor* a, OSL::ClosureColor* b) -{ - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - if (a == NULL) { - return b; - } - - if (b == NULL) { - return a; - } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_add_allot(ret, a, b); - - return ret; -} - #define IS_STRING(type) (type.basetype == OSL::TypeDesc::STRING) #define IS_PTR(type) (type.basetype == OSL::TypeDesc::PTR) #define IS_COLOR(type) (type.vecsemantics == OSL::TypeDesc::COLOR) From dd273dd0a19fc133f57328100616c13597325619 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 13:59:32 +1100 Subject: [PATCH 02/20] Add missing include Signed-off-by: Curtis Black --- src/liboslexec/rs_fallback.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/liboslexec/rs_fallback.cpp b/src/liboslexec/rs_fallback.cpp index 2aeefdebd..2526a81d0 100644 --- a/src/liboslexec/rs_fallback.cpp +++ b/src/liboslexec/rs_fallback.cpp @@ -8,6 +8,7 @@ #include +#include "oslexec_pvt.h" // Fallback is to reroute calls back through the virtual function // based RendererServices from ShaderGlobals. From b4a17e20833b3de84a4560544b18dcef2b3ca03c Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 14:30:07 +1100 Subject: [PATCH 03/20] Add missing include Signed-off-by: Curtis Black --- src/testshade/rs_simplerend.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index 6fa866c24..c89d39261 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -17,6 +17,8 @@ #include "render_state.h" +#include "oslexec_pvt.h" + // Keep free functions in sync with virtual function based SimpleRenderer. OSL_RSOP OSL_HOSTDEVICE bool From 097f5a7720df2de3cb718ed0b9688217878c2f82 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 14:53:50 +1100 Subject: [PATCH 04/20] simplerend use rs_fallback Signed-off-by: Curtis Black --- src/testshade/rs_simplerend.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index c89d39261..ead267367 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -17,8 +17,6 @@ #include "render_state.h" -#include "oslexec_pvt.h" - // Keep free functions in sync with virtual function based SimpleRenderer. OSL_RSOP OSL_HOSTDEVICE bool @@ -370,19 +368,17 @@ rs_trace_get(OSL::OpaqueExecContextPtr ec, OSL::ustringhash name, #endif } +#ifdef __CUDA_ARCH__ // Host side uses rs_fallback implementation. OSL_RSOP OSL_HOSTDEVICE void* rs_allocate_closure(OSL::OpaqueExecContextPtr ec, size_t size, size_t alignment) { auto sg = (OSL::ShaderGlobals*)ec; -#ifndef __CUDA_ARCH__ - return sg->context->allocate_closure(size, alignment); -#else uintptr_t ptr = OIIO::round_to_multiple_of_pow2((uintptr_t)sg.renderstate, alignment); sg.renderstate = (void*)(ptr + size); return (void*)ptr; -#endif } +#endif OSL_RSOP OSL_HOSTDEVICE bool rs_get_attribute_constant_string(OSL::ustringhash value, void* result) From 6b0add9de47f488627a84775f1b4148594038542 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 15:24:16 +1100 Subject: [PATCH 05/20] clang format Signed-off-by: Curtis Black --- src/testshade/rs_simplerend.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index ead267367..640f1ac46 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -368,14 +368,14 @@ rs_trace_get(OSL::OpaqueExecContextPtr ec, OSL::ustringhash name, #endif } -#ifdef __CUDA_ARCH__ // Host side uses rs_fallback implementation. +#ifdef __CUDA_ARCH__ // Host side uses rs_fallback implementation. OSL_RSOP OSL_HOSTDEVICE void* rs_allocate_closure(OSL::OpaqueExecContextPtr ec, size_t size, size_t alignment) { - auto sg = (OSL::ShaderGlobals*)ec; - uintptr_t ptr = OIIO::round_to_multiple_of_pow2((uintptr_t)sg.renderstate, - alignment); - sg.renderstate = (void*)(ptr + size); + auto sg = (OSL::ShaderGlobals*)ec; + uintptr_t ptr = OIIO::round_to_multiple_of_pow2((uintptr_t)sg->renderstate, + alignment); + sg->renderstate = (void*)(ptr + size); return (void*)ptr; } #endif From 8332615128caf06f099866487c4717106eb88d0f Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 16:15:39 +1100 Subject: [PATCH 06/20] match host/device function decls Signed-off-by: Curtis Black --- src/liboslexec/builtindecl.h | 17 +-------------- src/liboslexec/opclosure.cpp | 41 ++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/liboslexec/builtindecl.h b/src/liboslexec/builtindecl.h index c08b44db7..ae84b5236 100644 --- a/src/liboslexec/builtindecl.h +++ b/src/liboslexec/builtindecl.h @@ -109,21 +109,6 @@ DECL(osl_##name##_dvdvv, "xXXX") - -#ifndef __CUDA_ARCH__ -DECL(osl_add_closure_closure, "CXCC") -DECL(osl_mul_closure_float, "CXCf") -DECL(osl_mul_closure_color, "CXCc") -DECL(osl_allocate_closure_component, "CXii") -DECL(osl_allocate_weighted_closure_component, "CXiiX") -DECL(osl_closure_to_string, "sXC") -DECL(osl_closure_to_ustringhash, "hXC") -#else -// TODO: Figure out why trying to match the signatures between host and device -// definitions fails with 'LLVM had to make a cast' assertion failure. -// -// In the meantime, use a signature that matches the definitions in rend_lib.cu, -// where void* is used instead of ClosureColor* and ShaderGlobals*. DECL(osl_add_closure_closure, "XXXX") DECL(osl_mul_closure_float, "XXXf") DECL(osl_mul_closure_color, "XXXc") @@ -131,7 +116,7 @@ DECL(osl_allocate_closure_component, "XXii") DECL(osl_allocate_weighted_closure_component, "XXiiX") DECL(osl_closure_to_string, "sXX") DECL(osl_closure_to_ustringhash, "hXX") -#endif + DECL(osl_format, "hh*") DECL(osl_gen_ustringhash_pod, "hs") DECL(osl_gen_ustring, "sh") diff --git a/src/liboslexec/opclosure.cpp b/src/liboslexec/opclosure.cpp index ff047037c..e943ee900 100644 --- a/src/liboslexec/opclosure.cpp +++ b/src/liboslexec/opclosure.cpp @@ -15,10 +15,13 @@ namespace pvt { -OSL_SHADEOP OSL_HOSTDEVICE const ClosureColor* -osl_add_closure_closure(ShaderGlobals* sg, const ClosureColor* a, - const ClosureColor* b) +OSL_SHADEOP OSL_HOSTDEVICE const void* +osl_add_closure_closure(OpaqueExecContextPtr oec, const void* a_, + const void* b_) { + ShaderGlobals* sg = (ShaderGlobals*)oec; + const ClosureColor* a = (const ClosureColor*)a_; + const ClosureColor* b = (const ClosureColor*)b_; if (a == NULL) return b; if (b == NULL) @@ -34,9 +37,11 @@ osl_add_closure_closure(ShaderGlobals* sg, const ClosureColor* a, } -OSL_SHADEOP OSL_HOSTDEVICE const ClosureColor* -osl_mul_closure_color(ShaderGlobals* sg, const ClosureColor* a, const Color3* w) +OSL_SHADEOP OSL_HOSTDEVICE const void* +osl_mul_closure_color(OpaqueExecContextPtr oec, const void* a_, const Color3* w) { + ShaderGlobals* sg = (ShaderGlobals*)oec; + const ClosureColor* a = (const ClosureColor*)a_; if (a == NULL) return NULL; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) @@ -54,9 +59,11 @@ osl_mul_closure_color(ShaderGlobals* sg, const ClosureColor* a, const Color3* w) } -OSL_SHADEOP OSL_HOSTDEVICE const ClosureColor* -osl_mul_closure_float(ShaderGlobals* sg, const ClosureColor* a, float w) +OSL_SHADEOP OSL_HOSTDEVICE const void* +osl_mul_closure_float(OpaqueExecContextPtr oec, const void* a_, float w) { + ShaderGlobals* sg = (ShaderGlobals*)oec; + const ClosureColor* a = (const ClosureColor*)a_; if (a == NULL) return NULL; if (w == 0.0f) @@ -74,9 +81,10 @@ osl_mul_closure_float(ShaderGlobals* sg, const ClosureColor* a, float w) } -OSL_SHADEOP OSL_HOSTDEVICE ClosureComponent* -osl_allocate_closure_component(ShaderGlobals* sg, int id, int size) +OSL_SHADEOP OSL_HOSTDEVICE void* +osl_allocate_closure_component(OpaqueExecContextPtr oec, int id, int size) { + ShaderGlobals* sg = (ShaderGlobals*)oec; // Allocate the component and the mul back to back const size_t needed = sizeof(ClosureComponent) + size; ClosureComponent* comp @@ -91,10 +99,11 @@ osl_allocate_closure_component(ShaderGlobals* sg, int id, int size) -OSL_SHADEOP OSL_HOSTDEVICE ClosureColor* -osl_allocate_weighted_closure_component(ShaderGlobals* sg, int id, int size, - const Color3* w) +OSL_SHADEOP OSL_HOSTDEVICE void* +osl_allocate_weighted_closure_component(OpaqueExecContextPtr oec, int id, + int size, const Color3* w) { + ShaderGlobals* sg = (ShaderGlobals*)oec; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; // Allocate the component and the mul back to back @@ -111,8 +120,10 @@ osl_allocate_weighted_closure_component(ShaderGlobals* sg, int id, int size, // Deprecated, remove when conversion from ustring to ustringhash is finished OSL_SHADEOP const char* -osl_closure_to_string(ShaderGlobals* sg, ClosureColor* c) +osl_closure_to_string(OpaqueExecContextPtr oec, const void* c_) { + ShaderGlobals* sg = (ShaderGlobals*)oec; + const ClosureColor* c = (const ClosureColor*)c_; // Special case for printing closures std::ostringstream stream; stream.imbue(std::locale::classic()); // force C locale @@ -122,8 +133,10 @@ osl_closure_to_string(ShaderGlobals* sg, ClosureColor* c) } OSL_SHADEOP ustringhash_pod -osl_closure_to_ustringhash(ShaderGlobals* sg, ClosureColor* c) +osl_closure_to_ustringhash(OpaqueExecContextPtr oec, const void* c_) { + ShaderGlobals* sg = (ShaderGlobals*)oec; + const ClosureColor* c = (const ClosureColor*)c_; // Special case for printing closures std::ostringstream stream; stream.imbue(std::locale::classic()); // force C locale From 00e3d5e361d1eee5e7154b4689f696030c67d7b2 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 18 Feb 2025 16:54:28 +1100 Subject: [PATCH 07/20] match host/device function decls Signed-off-by: Curtis Black --- src/liboslexec/builtindecl.h | 2 +- src/liboslexec/opclosure.cpp | 21 +++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/liboslexec/builtindecl.h b/src/liboslexec/builtindecl.h index ae84b5236..8795845a4 100644 --- a/src/liboslexec/builtindecl.h +++ b/src/liboslexec/builtindecl.h @@ -111,7 +111,7 @@ DECL(osl_add_closure_closure, "XXXX") DECL(osl_mul_closure_float, "XXXf") -DECL(osl_mul_closure_color, "XXXc") +DECL(osl_mul_closure_color, "XXXX") DECL(osl_allocate_closure_component, "XXii") DECL(osl_allocate_weighted_closure_component, "XXiiX") DECL(osl_closure_to_string, "sXX") diff --git a/src/liboslexec/opclosure.cpp b/src/liboslexec/opclosure.cpp index e943ee900..e90e2f1ea 100644 --- a/src/liboslexec/opclosure.cpp +++ b/src/liboslexec/opclosure.cpp @@ -19,14 +19,13 @@ OSL_SHADEOP OSL_HOSTDEVICE const void* osl_add_closure_closure(OpaqueExecContextPtr oec, const void* a_, const void* b_) { - ShaderGlobals* sg = (ShaderGlobals*)oec; const ClosureColor* a = (const ClosureColor*)a_; const ClosureColor* b = (const ClosureColor*)b_; if (a == NULL) return b; if (b == NULL) return a; - ClosureAdd* add = (ClosureAdd*)rs_allocate_closure(sg, sizeof(ClosureAdd), + ClosureAdd* add = (ClosureAdd*)rs_allocate_closure(oec, sizeof(ClosureAdd), alignof(ClosureAdd)); if (add) { add->id = ClosureColor::ADD; @@ -38,17 +37,17 @@ osl_add_closure_closure(OpaqueExecContextPtr oec, const void* a_, OSL_SHADEOP OSL_HOSTDEVICE const void* -osl_mul_closure_color(OpaqueExecContextPtr oec, const void* a_, const Color3* w) +osl_mul_closure_color(OpaqueExecContextPtr oec, const void* a_, const void* w_) { - ShaderGlobals* sg = (ShaderGlobals*)oec; const ClosureColor* a = (const ClosureColor*)a_; + const Color3* w = (const Color3*)w_; if (a == NULL) return NULL; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) return a; - ClosureMul* mul = (ClosureMul*)rs_allocate_closure(sg, sizeof(ClosureMul), + ClosureMul* mul = (ClosureMul*)rs_allocate_closure(oec, sizeof(ClosureMul), alignof(ClosureMul)); if (mul) { mul->id = ClosureColor::MUL; @@ -62,7 +61,6 @@ osl_mul_closure_color(OpaqueExecContextPtr oec, const void* a_, const Color3* w) OSL_SHADEOP OSL_HOSTDEVICE const void* osl_mul_closure_float(OpaqueExecContextPtr oec, const void* a_, float w) { - ShaderGlobals* sg = (ShaderGlobals*)oec; const ClosureColor* a = (const ClosureColor*)a_; if (a == NULL) return NULL; @@ -70,7 +68,7 @@ osl_mul_closure_float(OpaqueExecContextPtr oec, const void* a_, float w) return NULL; if (w == 1.0f) return a; - ClosureMul* mul = (ClosureMul*)rs_allocate_closure(sg, sizeof(ClosureMul), + ClosureMul* mul = (ClosureMul*)rs_allocate_closure(oec, sizeof(ClosureMul), alignof(ClosureMul)); if (mul) { mul->id = ClosureColor::MUL; @@ -84,11 +82,10 @@ osl_mul_closure_float(OpaqueExecContextPtr oec, const void* a_, float w) OSL_SHADEOP OSL_HOSTDEVICE void* osl_allocate_closure_component(OpaqueExecContextPtr oec, int id, int size) { - ShaderGlobals* sg = (ShaderGlobals*)oec; // Allocate the component and the mul back to back const size_t needed = sizeof(ClosureComponent) + size; ClosureComponent* comp - = (ClosureComponent*)rs_allocate_closure(sg, needed, + = (ClosureComponent*)rs_allocate_closure(oec, needed, alignof(ClosureComponent)); if (comp) { comp->id = id; @@ -101,15 +98,15 @@ osl_allocate_closure_component(OpaqueExecContextPtr oec, int id, int size) OSL_SHADEOP OSL_HOSTDEVICE void* osl_allocate_weighted_closure_component(OpaqueExecContextPtr oec, int id, - int size, const Color3* w) + int size, const void* w_) { - ShaderGlobals* sg = (ShaderGlobals*)oec; + const Color3* w = (const Color3*)w_; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; // Allocate the component and the mul back to back const size_t needed = sizeof(ClosureComponent) + size; ClosureComponent* comp - = (ClosureComponent*)rs_allocate_closure(sg, needed, + = (ClosureComponent*)rs_allocate_closure(oec, needed, alignof(ClosureComponent)); if (comp) { comp->id = id; From b112bb548297c548db4d243f769b0326c6087cc9 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Fri, 21 Feb 2025 10:27:46 +1100 Subject: [PATCH 08/20] restore cuda code Signed-off-by: Curtis Black --- src/include/OSL/rs_free_function.h | 3 +- src/testrender/cuda/rend_lib.cu | 208 +++++++++++++++++++++++++++++ testsuite/example-cuda/rend_lib.cu | 188 ++++++++++++++++++++++++++ 3 files changed, 398 insertions(+), 1 deletion(-) diff --git a/src/include/OSL/rs_free_function.h b/src/include/OSL/rs_free_function.h index a3e2b0336..0abf3fcf8 100644 --- a/src/include/OSL/rs_free_function.h +++ b/src/include/OSL/rs_free_function.h @@ -317,7 +317,8 @@ rs_trace_get(OSL::OpaqueExecContextPtr oec, OSL::ustringhash name, OSL::TypeDesc type, void* val, bool derivatives); /// Allocates memory for a closure color. May return null if no memory could -/// be allocated. +/// be allocated. It is the renderers responsibility to clean up these +/// allocations after a shader is run and the closures have been processed. OSL_RSOP OSL_HOSTDEVICE void* rs_allocate_closure(OSL::OpaqueExecContextPtr oec, size_t size, size_t alignment); diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index d44b68b99..4b36ab6be 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -30,6 +30,17 @@ extern __device__ CUdeviceptr xform_buffer; } // namespace pvt OSL_NAMESPACE_END + +// Taken from the SimplePool class +__device__ static inline size_t +alignment_offset_calc(void* ptr, size_t alignment) +{ + uintptr_t ptrbits = reinterpret_cast(ptr); + uintptr_t offset = ((ptrbits + alignment - 1) & -alignment) - ptrbits; + return offset; +} + + // These functions are declared extern to prevent name mangling. extern "C" { @@ -39,6 +50,203 @@ __direct_callable__dummy_rend_lib() { } + +__device__ void* +closure_component_allot(void* pool, int id, size_t prim_size, + const OSL::Color3& w) +{ + ((OSL::ClosureComponent*)pool)->id = id; + ((OSL::ClosureComponent*)pool)->w = w; + + size_t needed = (sizeof(OSL::ClosureComponent) + prim_size + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return (void*)&char_ptr[needed]; +} + + +__device__ void* +closure_mul_allot(void* pool, const OSL::Color3& w, OSL::ClosureColor* c) +{ + ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; + ((OSL::ClosureMul*)pool)->weight = w; + ((OSL::ClosureMul*)pool)->closure = c; + + size_t needed = (sizeof(OSL::ClosureMul) + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return &char_ptr[needed]; +} + + +__device__ void* +closure_mul_float_allot(void* pool, const float& w, OSL::ClosureColor* c) +{ + ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; + ((OSL::ClosureMul*)pool)->weight.x = w; + ((OSL::ClosureMul*)pool)->weight.y = w; + ((OSL::ClosureMul*)pool)->weight.z = w; + ((OSL::ClosureMul*)pool)->closure = c; + + size_t needed = (sizeof(OSL::ClosureMul) + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return &char_ptr[needed]; +} + + +__device__ void* +closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) +{ + ((OSL::ClosureAdd*)pool)->id = OSL::ClosureColor::ADD; + ((OSL::ClosureAdd*)pool)->closureA = a; + ((OSL::ClosureAdd*)pool)->closureB = b; + + size_t needed = (sizeof(OSL::ClosureAdd) + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return &char_ptr[needed]; +} + + +__device__ void* +osl_allocate_closure_component(void* sg_, int id, int size) +{ + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; + + OSL::Color3 w = OSL::Color3(1, 1, 1); + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + + size = max(4, size); + + sg_ptr->renderstate = closure_component_allot(ret, id, size, w); + + return ret; +} + + + +__device__ void* +osl_allocate_weighted_closure_component(void* sg_, int id, int size, + const void* w) +{ + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; + + const OSL::Color3* wc + = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); + + if (wc->x == 0.0f && wc->y == 0.0f && wc->z == 0.0f) { + return NULL; + } + + size = max(4, size); + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_component_allot(ret, id, size, *wc); + + return ret; +} + + + +__device__ void* +osl_mul_closure_color(void* sg_, void* a, const void* w) +{ + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::Color3* wc + = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); + + if (a == NULL) { + return NULL; + } + + if (wc->x == 0.0f && wc->y == 0.0f && wc->z == 0.0f) { + return NULL; + } + + if (wc->x == 1.0f && wc->y == 1.0f && wc->z == 1.0f) { + return a; + } + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_mul_allot(ret, *wc, (OSL::ClosureColor*)a); + + return ret; +} + + + +__device__ void* +osl_mul_closure_float(void* sg_, void* a, float w) +{ + a = __builtin_assume_aligned(a, alignof(float)); + + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; + + if (a == NULL || w == 0.0f) { + return NULL; + } + + if (w == 1.0f) { + return a; + } + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_mul_float_allot(ret, w, + (OSL::ClosureColor*)a); + + return ret; +} + + + +__device__ void* +osl_add_closure_closure(void* sg_, void* a, void* b) +{ + a = __builtin_assume_aligned(a, alignof(float)); + b = __builtin_assume_aligned(b, alignof(float)); + + OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; + + if (a == NULL) { + return b; + } + + if (b == NULL) { + return a; + } + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_add_allot(ret, (OSL::ClosureColor*)a, + (OSL::ClosureColor*)b); + + return ret; +} + + #define IS_STRING(type) (type.basetype == OSL::TypeDesc::STRING) #define IS_PTR(type) (type.basetype == OSL::TypeDesc::PTR) #define IS_COLOR(type) (type.vecsemantics == OSL::TypeDesc::COLOR) diff --git a/testsuite/example-cuda/rend_lib.cu b/testsuite/example-cuda/rend_lib.cu index bac46e936..01e75bb37 100644 --- a/testsuite/example-cuda/rend_lib.cu +++ b/testsuite/example-cuda/rend_lib.cu @@ -14,9 +14,197 @@ extern __device__ char* s_color_system; } OSL_NAMESPACE_END +// Taken from the SimplePool class +__device__ static inline size_t +alignment_offset_calc(void* ptr, size_t alignment) +{ + uintptr_t ptrbits = reinterpret_cast(ptr); + uintptr_t offset = ((ptrbits + alignment - 1) & -alignment) - ptrbits; + return offset; +} + // These functions are declared extern to prevent name mangling. extern "C" { +__device__ void* +closure_component_allot(void* pool, int id, size_t prim_size, + const OSL::Color3& w) +{ + ((OSL::ClosureComponent*)pool)->id = id; + ((OSL::ClosureComponent*)pool)->w = w; + + size_t needed = (sizeof(OSL::ClosureComponent) - sizeof(void*) + prim_size + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return (void*)&char_ptr[needed]; +} + + +__device__ void* +closure_mul_allot(void* pool, const OSL::Color3& w, OSL::ClosureColor* c) +{ + ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; + ((OSL::ClosureMul*)pool)->weight = w; + ((OSL::ClosureMul*)pool)->closure = c; + + size_t needed = (sizeof(OSL::ClosureMul) + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return &char_ptr[needed]; +} + + +__device__ void* +closure_mul_float_allot(void* pool, const float& w, OSL::ClosureColor* c) +{ + ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; + ((OSL::ClosureMul*)pool)->weight.x = w; + ((OSL::ClosureMul*)pool)->weight.y = w; + ((OSL::ClosureMul*)pool)->weight.z = w; + ((OSL::ClosureMul*)pool)->closure = c; + + size_t needed = (sizeof(OSL::ClosureMul) + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return &char_ptr[needed]; +} + + +__device__ void* +closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) +{ + ((OSL::ClosureAdd*)pool)->id = OSL::ClosureColor::ADD; + ((OSL::ClosureAdd*)pool)->closureA = a; + ((OSL::ClosureAdd*)pool)->closureB = b; + + size_t needed = (sizeof(OSL::ClosureAdd) + + (alignof(OSL::ClosureComponent) - 1)) + & ~(alignof(OSL::ClosureComponent) - 1); + char* char_ptr = (char*)pool; + + return &char_ptr[needed]; +} + + +__device__ void* +osl_allocate_closure_component(void* sg_, int id, int size) +{ + ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + + OSL::Color3 w = OSL::Color3(1, 1, 1); + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + + size = max(4, size); + + sg_ptr->renderstate = closure_component_allot(ret, id, size, w); + + return ret; +} + + +__device__ void* +osl_allocate_weighted_closure_component(void* sg_, int id, int size, + const OSL::Color3* w) +{ + ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + + if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) { + return NULL; + } + + size = max(4, size); + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_component_allot(ret, id, size, *w); + + return ret; +} + + +__device__ void* +osl_mul_closure_color(void* sg_, OSL::ClosureColor* a, const OSL::Color3* w) +{ + ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + + if (a == NULL) { + return NULL; + } + + if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) { + return NULL; + } + + if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) { + return a; + } + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_mul_allot(ret, *w, a); + + return ret; +} + + +__device__ void* +osl_mul_closure_float(void* sg_, OSL::ClosureColor* a, float w) +{ + ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + + if (a == NULL || w == 0.0f) { + return NULL; + } + + if (w == 1.0f) { + return a; + } + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_mul_float_allot(ret, w, a); + + return ret; +} + + +__device__ void* +osl_add_closure_closure(void* sg_, OSL::ClosureColor* a, OSL::ClosureColor* b) +{ + ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + + if (a == NULL) { + return b; + } + + if (b == NULL) { + return a; + } + + // Fix up the alignment + void* ret = ((char*)sg_ptr->renderstate) + + alignment_offset_calc(sg_ptr->renderstate, + alignof(OSL::ClosureComponent)); + sg_ptr->renderstate = closure_add_allot(ret, a, b); + + return ret; +} + #define IS_STRING(type) (type.basetype == OSL::TypeDesc::STRING) #define IS_PTR(type) (type.basetype == OSL::TypeDesc::PTR) #define IS_COLOR(type) (type.vecsemantics == OSL::TypeDesc::COLOR) From c0f846288370ca50afaf2d1315337407c133e355 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Fri, 21 Feb 2025 14:06:18 +1100 Subject: [PATCH 09/20] Move closure pool to render state. Signed-off-by: Curtis Black --- src/testrender/cuda/optix_raytracer.cu | 10 ++++--- src/testrender/simpleraytracer.cpp | 6 ++-- src/testshade/cuda/optix_grid_renderer.cu | 9 ++++-- src/testshade/render_state.h | 31 +++++++++++++++++++- src/testshade/rs_simplerend.cpp | 19 +++++------- src/testshade/simplerend.cpp | 10 +++---- src/testshade/simplerend.h | 2 +- src/testshade/testshade.cpp | 18 +++++++----- testsuite/example-cuda/cuda_grid_renderer.cu | 9 ++++-- 9 files changed, 76 insertions(+), 38 deletions(-) diff --git a/src/testrender/cuda/optix_raytracer.cu b/src/testrender/cuda/optix_raytracer.cu index a2880c730..13a2dcd8f 100644 --- a/src/testrender/cuda/optix_raytracer.cu +++ b/src/testrender/cuda/optix_raytracer.cu @@ -48,16 +48,18 @@ __device__ __constant__ RenderParams render_params; static inline __device__ void -execute_shader(ShaderGlobalsType& sg, const int shader_id, char* closure_pool) +execute_shader(ShaderGlobalsType& sg, const int shader_id, StackClosurePool& closure_pool) { if (shader_id < 0) { // TODO: should probably never get here ... return; } - // Pack the "closure pool" into one of the ShaderGlobals pointers - *(int*)&closure_pool[0] = 0; - sg.renderstate = &closure_pool[0]; + closure_pool.reset(); + RenderState renderState; + // TODO: renderState.context = ... + renderState.closure_pool = &closure_pool; + sg.renderstate = &renderState; // Pack the pointers to the options structs in a faux "context", // which is a rough stand-in for the host ShadingContext. diff --git a/src/testrender/simpleraytracer.cpp b/src/testrender/simpleraytracer.cpp index 2318021fa..400ded296 100644 --- a/src/testrender/simpleraytracer.cpp +++ b/src/testrender/simpleraytracer.cpp @@ -945,7 +945,7 @@ SimpleRaytracer::eval_background(const Dual2& dir, ShadingContext* ctx, #ifndef __CUDACC__ shadingsys->execute(*ctx, *m_shaders[backgroundShaderID].surf, sg); #else - alignas(8) char closure_pool[256]; + StackClosurePool closure_pool; execute_shader(sg, render_params.bg_id, closure_pool); #endif return process_background_closure((const ClosureColor*)sg.Ci); @@ -957,8 +957,8 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler, { #ifdef __CUDACC__ // Scratch space for the output closures - alignas(8) char closure_pool[256]; - alignas(8) char light_closure_pool[256]; + StackClosurePool closure_pool; + StackClosurePool light_closure_pool; #endif constexpr float inf = std::numeric_limits::infinity(); diff --git a/src/testshade/cuda/optix_grid_renderer.cu b/src/testshade/cuda/optix_grid_renderer.cu index f3657efcd..fe5d45a5e 100644 --- a/src/testshade/cuda/optix_grid_renderer.cu +++ b/src/testshade/cuda/optix_grid_renderer.cu @@ -98,7 +98,7 @@ __raygen__() // networks, so there should be (at least) some mechanism to issue a // warning or error if the closure or param storage can possibly be // exceeded. - alignas(8) char closure_pool[256]; + StackClosurePool closure_pool; alignas(8) char params[256]; OSL_CUDA::ShaderGlobals sg; @@ -137,8 +137,11 @@ __raygen__() sg.object2common = reinterpret_cast(render_params.object2common); // Pack the "closure pool" into one of the ShaderGlobals pointers - *(int*)&closure_pool[0] = 0; - sg.renderstate = &closure_pool[0]; + closure_pool.reset(); + RenderState renderState; + // TODO: renderState.context = ... + renderState.closure_pool = &closure_pool; + sg.renderstate = &renderState; // Run the OSL group and init functions if (render_params.fused_callable) diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index 8aec4954f..88a051d2f 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -10,7 +10,7 @@ // All the the state free functions in rs_simplerend.cpp will need to do their job // NOTE: Additional data is here that will be used by rs_simplerend.cpp in future PR's // procedurally generating ShaderGlobals. -struct RenderState { +struct RenderContext { int xres; int yres; OSL::Matrix44 world_to_camera; @@ -24,6 +24,35 @@ struct RenderState { void* journal_buffer; }; +class StackClosurePool { + alignas(8) char buffer[256]; + void* ptr; + +public: + StackClosurePool() { reset(); } + + void reset() + { + ptr = &buffer[0]; + *(int*)ptr = 0; + } + + void* allocate(size_t size, size_t alignment) + { + uintptr_t p = OIIO::round_to_multiple_of_pow2((uintptr_t)ptr, + alignment); + ptr = (void*)(p + size); + if (ptr <= &buffer[256]) + return p; + return nullptr; + } +} + +struct RenderState { + RenderContext* context; + StackClosurePool* closure_pool; +}; + // Create constexpr hashes for all strings used by the free function renderer services. // NOTE: Actually ustring's should also be instantiated in host code someplace as well diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index 640f1ac46..4ff850b48 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -66,7 +66,7 @@ rs_get_inverse_matrix_space_time(OSL::OpaqueExecContextPtr ec, using OSL::Matrix44; - auto rs = OSL::get_rs(ec); + auto rs = OSL::get_rs(ec)->context; if (to == OSL::Hashes::camera || to == OSL::Hashes::screen || to == OSL::Hashes::NDC || to == RS::Hashes::raster) { Matrix44 M { rs->world_to_camera }; @@ -372,11 +372,8 @@ rs_trace_get(OSL::OpaqueExecContextPtr ec, OSL::ustringhash name, OSL_RSOP OSL_HOSTDEVICE void* rs_allocate_closure(OSL::OpaqueExecContextPtr ec, size_t size, size_t alignment) { - auto sg = (OSL::ShaderGlobals*)ec; - uintptr_t ptr = OIIO::round_to_multiple_of_pow2((uintptr_t)sg->renderstate, - alignment); - sg->renderstate = (void*)(ptr + size); - return (void*)ptr; + auto rs = OSL::get_rs(ec); + return rs->closure_pool->allocate(size, alignment); } #endif @@ -503,7 +500,7 @@ rs_get_attribute(OSL::OpaqueExecContextPtr oec, OSL::ustringhash_pod object_, auto object = OSL::ustringhash_from(object_); auto name = OSL::ustringhash_from(name_); const OSL::TypeDesc type = OSL::TypeDesc_from(_type); - auto rs = OSL::get_rs(oec); + auto rs = OSL::get_rs(oec)->context; // The many branches in the code below handle the case where we don't know // the attribute name at compile time. In the case it is known, dead-code @@ -648,7 +645,7 @@ rs_errorfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification, int32_t arg_count, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec); + auto rs = OSL::get_rs(ec)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_errorfmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec), @@ -661,7 +658,7 @@ rs_warningfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification, int32_t arg_count, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec); + auto rs = OSL::get_rs(ec)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_warningfmt(OSL::get_max_warnings_per_thread(ec), @@ -676,7 +673,7 @@ rs_printfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification, int32_t arg_count, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec); + auto rs = OSL::get_rs(ec)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_printfmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec), @@ -691,7 +688,7 @@ rs_filefmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash filename_hash, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec); + auto rs = OSL::get_rs(ec)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_filefmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec), diff --git a/src/testshade/simplerend.cpp b/src/testshade/simplerend.cpp index 7e67ab063..9d0b12e13 100644 --- a/src/testshade/simplerend.cpp +++ b/src/testshade/simplerend.cpp @@ -1049,7 +1049,7 @@ SimpleRenderer::add_output(string_view varname_, string_view filename, void -SimpleRenderer::export_state(RenderState& state) const +SimpleRenderer::export_state(RenderContext& state) const { state.xres = m_xres; state.yres = m_yres; @@ -1073,7 +1073,7 @@ SimpleRenderer::errorfmt(OSL::ShaderGlobals* sg, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderState* rs = reinterpret_cast(sg->renderstate); + RenderContext* rs = reinterpret_cast(sg->renderstate)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_errorfmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg), fmt_specification, arg_count, arg_types, arg_values_size, @@ -1086,7 +1086,7 @@ SimpleRenderer::warningfmt(OSL::ShaderGlobals* sg, int32_t arg_count, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderState* rs = reinterpret_cast(sg->renderstate); + RenderContext* rs = reinterpret_cast(sg->renderstate)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_warningfmt(OSL::get_max_warnings_per_thread(sg), OSL::get_thread_index(sg), OSL::get_shade_index(sg), @@ -1102,7 +1102,7 @@ SimpleRenderer::printfmt(OSL::ShaderGlobals* sg, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderState* rs = reinterpret_cast(sg->renderstate); + RenderContext* rs = reinterpret_cast(sg->renderstate)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_printfmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg), fmt_specification, arg_count, arg_types, arg_values_size, @@ -1115,7 +1115,7 @@ SimpleRenderer::filefmt(OSL::ShaderGlobals* sg, OSL::ustringhash filename_hash, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderState* rs = reinterpret_cast(sg->renderstate); + RenderContext* rs = reinterpret_cast(sg->renderstate)->context; OSL::journal::Writer jw { rs->journal_buffer }; jw.record_filefmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg), filename_hash, fmt_specification, arg_count, arg_types, diff --git a/src/testshade/simplerend.h b/src/testshade/simplerend.h index bb6a0e426..a09e029e3 100644 --- a/src/testshade/simplerend.h +++ b/src/testshade/simplerend.h @@ -145,7 +145,7 @@ class SimpleRenderer : public RendererServices { size_t noutputs() const { return m_outputbufs.size(); } virtual void init_shadingsys(ShadingSystem* ss) { shadingsys = ss; } - virtual void export_state(RenderState&) const; + virtual void export_state(RenderContext&) const; virtual void prepare_render() {} virtual void warmup() {} virtual void render(int /*xres*/, int /*yres*/) {} diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index 51f3e001b..6a4db292c 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -945,21 +945,23 @@ setup_transformations(SimpleRenderer& rend, OSL::Matrix44& Mshad, rend.name_transform("myspace", Mmyspace); } -// NOTE: each host thread could end up with its own RenderState. -// Starting simple with a single instance for now -static RenderState theRenderState; +// A single render context shared by all render threads. +static RenderContext theRenderState; // Set up the ShaderGlobals fields for pixel (x,y). static void -setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, int x, int y) +setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, + RenderState& renderState, int x, int y) { // Just zero the whole thing out to start memset((char*)&sg, 0, sizeof(ShaderGlobals)); // Any state data needed by SimpleRenderer or its free function equivalent // will need to be passed here the ShaderGlobals. - sg.renderstate = &theRenderState; + renderState.context = &theRenderState; + renderState.closure_pool = nullptr; // Use inbuilt closure pool. + sg.renderstate = &renderState; // Set "shader" space to be Mshad. In a real renderer, this may be // different for each shader group. @@ -1182,7 +1184,8 @@ setup_output_images(SimpleRenderer* rend, ShadingSystem* shadingsys, ShadingContext* ctx = shadingsys->get_context(thread_info); raytype_bit = shadingsys->raytype_bit(ustring(raytype_name)); ShaderGlobals sg; - setup_shaderglobals(sg, shadingsys, 0, 0); + RenderState renderState; + setup_shaderglobals(sg, shadingsys, renderState, 0, 0); #if OSL_USE_BATCHED if (batched) { @@ -1586,6 +1589,7 @@ shade_region(SimpleRenderer* rend, ShaderGroup* shadergroup, OIIO::ROI roi, // Set up shader globals and a little test grid of points to shade. ShaderGlobals shaderglobals; + RenderState renderState; raytype_bit = shadingsys->raytype_bit(ustring(raytype_name)); @@ -1606,7 +1610,7 @@ shade_region(SimpleRenderer* rend, ShaderGroup* shadergroup, OIIO::ROI roi, // set it up rigged to look like we're rendering a single // quadrilateral that exactly fills the viewport, and that // setup is done in the following function call: - setup_shaderglobals(shaderglobals, shadingsys, x, y); + setup_shaderglobals(shaderglobals, shadingsys, renderState, x, y); if (this_threads_index == uninitialized_thread_index) { this_threads_index = next_thread_index.fetch_add(1u); diff --git a/testsuite/example-cuda/cuda_grid_renderer.cu b/testsuite/example-cuda/cuda_grid_renderer.cu index cb53427f8..d2d4426b3 100644 --- a/testsuite/example-cuda/cuda_grid_renderer.cu +++ b/testsuite/example-cuda/cuda_grid_renderer.cu @@ -74,7 +74,7 @@ shade(float3* Cout, int w, int h) // networks, so there should be (at least) some mechanism to issue a // warning or error if the closure or param storage can possibly be // exceeded. - alignas(8) char closure_pool[256]; + StackClosurePool closure_pool; alignas(8) char params[256]; const float invw = 1.0 / w; @@ -115,8 +115,11 @@ shade(float3* Cout, int w, int h) sg.flipHandedness = 0; // Pack the "closure pool" into one of the ShaderGlobals pointers - *(int*)&closure_pool[0] = 0; - sg.renderstate = &closure_pool[0]; + closure_pool.reset(); + RenderState renderState; + // TODO: renderState.context = ... + renderState.closure_pool = &closure_pool; + sg.renderstate = &renderState; // Run the shader __osl__init(&sg, params); From d73a310590aef0b8a0a4a38ce449f75240104b4d Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Fri, 21 Feb 2025 14:32:37 +1100 Subject: [PATCH 10/20] Update optix code to use closure pool. Signed-off-by: Curtis Black --- src/testrender/cuda/rend_lib.cu | 261 ++++++++++------------------- src/testshade/render_state.h | 6 +- testsuite/example-cuda/rend_lib.cu | 237 ++++++++++---------------- 3 files changed, 176 insertions(+), 328 deletions(-) diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index 4b36ab6be..3720f1f76 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -30,17 +30,6 @@ extern __device__ CUdeviceptr xform_buffer; } // namespace pvt OSL_NAMESPACE_END - -// Taken from the SimplePool class -__device__ static inline size_t -alignment_offset_calc(void* ptr, size_t alignment) -{ - uintptr_t ptrbits = reinterpret_cast(ptr); - uintptr_t offset = ((ptrbits + alignment - 1) & -alignment) - ptrbits; - return offset; -} - - // These functions are declared extern to prevent name mangling. extern "C" { @@ -50,200 +39,120 @@ __direct_callable__dummy_rend_lib() { } - -__device__ void* -closure_component_allot(void* pool, int id, size_t prim_size, - const OSL::Color3& w) -{ - ((OSL::ClosureComponent*)pool)->id = id; - ((OSL::ClosureComponent*)pool)->w = w; - - size_t needed = (sizeof(OSL::ClosureComponent) + prim_size - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return (void*)&char_ptr[needed]; -} - - -__device__ void* -closure_mul_allot(void* pool, const OSL::Color3& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_mul_float_allot(void* pool, const float& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight.x = w; - ((OSL::ClosureMul*)pool)->weight.y = w; - ((OSL::ClosureMul*)pool)->weight.z = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - __device__ void* -closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) +osl_add_closure_closure(void* sg_, const void* a_, const void* b_) { - ((OSL::ClosureAdd*)pool)->id = OSL::ClosureColor::ADD; - ((OSL::ClosureAdd*)pool)->closureA = a; - ((OSL::ClosureAdd*)pool)->closureB = b; - - size_t needed = (sizeof(OSL::ClosureAdd) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; + a_ = __builtin_assume_aligned(a_, alignof(float)); + b_ = __builtin_assume_aligned(b_, alignof(float)); + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::ClosureColor* b = (const OSL::ClosureColor*)b_; + if (a == NULL) + return b; + if (b == NULL) + return a; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL::ClosureAdd* add + = (OSL::ClosureAdd*)closure_pool->allocate(sizeof(OSL::ClosureAdd), + alignof(OSL::ClosureAdd)); + if (add) { + add->id = OSL::ClosureColor::ADD; + add->closureA = a; + add->closureB = b; + } + return add; } - __device__ void* -osl_allocate_closure_component(void* sg_, int id, int size) +osl_mul_closure_color(void* sg_, const void* a_, const void* w_) { - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - OSL::Color3 w = OSL::Color3(1, 1, 1); - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - - size = max(4, size); - - sg_ptr->renderstate = closure_component_allot(ret, id, size, w); + a_ = __builtin_assume_aligned(a_, alignof(float)); + w_ = __builtin_assume_aligned(w_, alignof(float)); - return ret; -} - - - -__device__ void* -osl_allocate_weighted_closure_component(void* sg_, int id, int size, - const void* w) -{ - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - const OSL::Color3* wc - = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); - - if (wc->x == 0.0f && wc->y == 0.0f && wc->z == 0.0f) { + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::Color3* w = (const OSL::Color3*)w_; + if (a == NULL) + return NULL; + if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; + if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) + return a; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL::ClosureMul* mul + = (OSL::ClosureMul*)closure_pool->allocate(sizeof(OSL::ClosureMul), + alignof(OSL::ClosureMul)); + if (mul) { + mul->id = OSL::ClosureColor::MUL; + mul->weight = *w; + mul->closure = a; } - - size = max(4, size); - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_component_allot(ret, id, size, *wc); - - return ret; + return mul; } - - __device__ void* -osl_mul_closure_color(void* sg_, void* a, const void* w) +osl_mul_closure_float(void* sg_, const void* a_, float w) { - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - const OSL::Color3* wc - = (const OSL::Color3*)__builtin_assume_aligned(w, alignof(float)); + a_ = __builtin_assume_aligned(a_, alignof(float)); - if (a == NULL) { + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + if (a == NULL) return NULL; - } - - if (wc->x == 0.0f && wc->y == 0.0f && wc->z == 0.0f) { + if (w == 0.0f) return NULL; - } - - if (wc->x == 1.0f && wc->y == 1.0f && wc->z == 1.0f) { + if (w == 1.0f) return a; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL::ClosureMul* mul + = (OSL::ClosureMul*)closure_pool->allocate(sizeof(OSL::ClosureMul), + alignof(OSL::ClosureMul)); + if (mul) { + mul->id = OSL::ClosureColor::MUL; + mul->weight = OSL::Color3(w); + mul->closure = a; } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_allot(ret, *wc, (OSL::ClosureColor*)a); - - return ret; + return mul; } - - __device__ void* -osl_mul_closure_float(void* sg_, void* a, float w) +osl_allocate_closure_component(void* sg_, int id, int size) { - a = __builtin_assume_aligned(a, alignof(float)); - - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - if (a == NULL || w == 0.0f) { - return NULL; - } - - if (w == 1.0f) { - return a; + ShaderGlobals* sg = (ShaderGlobals*)sg_; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + // Allocate the component and the mul back to back + const size_t needed = sizeof(OSL::ClosureComponent) + size; + OSL::ClosureComponent* comp + = (OSL::ClosureComponent*) + closure_pool->allocate(needed, alignof(OSL::ClosureComponent)); + if (comp) { + comp->id = id; + comp->w = OSL::Color3(1.0f); } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_float_allot(ret, w, - (OSL::ClosureColor*)a); - - return ret; + return comp; } - - __device__ void* -osl_add_closure_closure(void* sg_, void* a, void* b) +osl_allocate_weighted_closure_component(void* sg_, int id, int size, + const void* w_) { - a = __builtin_assume_aligned(a, alignof(float)); - b = __builtin_assume_aligned(b, alignof(float)); - - OSL_CUDA::ShaderGlobals* sg_ptr = (OSL_CUDA::ShaderGlobals*)sg_; - - if (a == NULL) { - return b; - } + w_ = __builtin_assume_aligned(w_, alignof(float)); - if (b == NULL) { - return a; + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::Color3* w = (const OSL::Color3*)w_; + if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) + return NULL; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + // Allocate the component and the mul back to back + const size_t needed = sizeof(OSL::ClosureComponent) + size; + OSL::ClosureComponent* comp + = (OSL::ClosureComponent*) + closure_pool->allocate(needed, alignof(OSL::ClosureComponent)); + if (comp) { + comp->id = id; + comp->w = *w; } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_add_allot(ret, (OSL::ClosureColor*)a, - (OSL::ClosureColor*)b); - - return ret; + return comp; } diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index 88a051d2f..1c0d83ff8 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -33,8 +33,8 @@ class StackClosurePool { void reset() { - ptr = &buffer[0]; - *(int*)ptr = 0; + ptr = &buffer[0]; + *(int*)ptr = 0; } void* allocate(size_t size, size_t alignment) @@ -43,7 +43,7 @@ class StackClosurePool { alignment); ptr = (void*)(p + size); if (ptr <= &buffer[256]) - return p; + return (void*)p; return nullptr; } } diff --git a/testsuite/example-cuda/rend_lib.cu b/testsuite/example-cuda/rend_lib.cu index 01e75bb37..fa9714aa4 100644 --- a/testsuite/example-cuda/rend_lib.cu +++ b/testsuite/example-cuda/rend_lib.cu @@ -26,185 +26,124 @@ alignment_offset_calc(void* ptr, size_t alignment) // These functions are declared extern to prevent name mangling. extern "C" { -__device__ void* -closure_component_allot(void* pool, int id, size_t prim_size, - const OSL::Color3& w) -{ - ((OSL::ClosureComponent*)pool)->id = id; - ((OSL::ClosureComponent*)pool)->w = w; - - size_t needed = (sizeof(OSL::ClosureComponent) - sizeof(void*) + prim_size - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return (void*)&char_ptr[needed]; -} - __device__ void* -closure_mul_allot(void* pool, const OSL::Color3& w, OSL::ClosureColor* c) +osl_add_closure_closure(void* sg_, const void* a_, const void* b_) { - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_mul_float_allot(void* pool, const float& w, OSL::ClosureColor* c) -{ - ((OSL::ClosureMul*)pool)->id = OSL::ClosureColor::MUL; - ((OSL::ClosureMul*)pool)->weight.x = w; - ((OSL::ClosureMul*)pool)->weight.y = w; - ((OSL::ClosureMul*)pool)->weight.z = w; - ((OSL::ClosureMul*)pool)->closure = c; - - size_t needed = (sizeof(OSL::ClosureMul) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -closure_add_allot(void* pool, OSL::ClosureColor* a, OSL::ClosureColor* b) -{ - ((OSL::ClosureAdd*)pool)->id = OSL::ClosureColor::ADD; - ((OSL::ClosureAdd*)pool)->closureA = a; - ((OSL::ClosureAdd*)pool)->closureB = b; - - size_t needed = (sizeof(OSL::ClosureAdd) - + (alignof(OSL::ClosureComponent) - 1)) - & ~(alignof(OSL::ClosureComponent) - 1); - char* char_ptr = (char*)pool; - - return &char_ptr[needed]; -} - - -__device__ void* -osl_allocate_closure_component(void* sg_, int id, int size) -{ - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - OSL::Color3 w = OSL::Color3(1, 1, 1); - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - - size = max(4, size); - - sg_ptr->renderstate = closure_component_allot(ret, id, size, w); - - return ret; + a_ = __builtin_assume_aligned(a_, alignof(float)); + b_ = __builtin_assume_aligned(b_, alignof(float)); + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::ClosureColor* b = (const OSL::ClosureColor*)b_; + if (a == NULL) + return b; + if (b == NULL) + return a; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL::ClosureAdd* add + = (OSL::ClosureAdd*)closure_pool->allocate(sizeof(OSL::ClosureAdd), + alignof(OSL::ClosureAdd)); + if (add) { + add->id = OSL::ClosureColor::ADD; + add->closureA = a; + add->closureB = b; + } + return add; } - __device__ void* -osl_allocate_weighted_closure_component(void* sg_, int id, int size, - const OSL::Color3* w) +osl_mul_closure_color(void* sg_, const void* a_, const void* w_) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + a_ = __builtin_assume_aligned(a_, alignof(float)); + w_ = __builtin_assume_aligned(w_, alignof(float)); - if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) { + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::Color3* w = (const OSL::Color3*)w_; + if (a == NULL) + return NULL; + if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; + if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) + return a; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL::ClosureMul* mul + = (OSL::ClosureMul*)closure_pool->allocate(sizeof(OSL::ClosureMul), + alignof(OSL::ClosureMul)); + if (mul) { + mul->id = OSL::ClosureColor::MUL; + mul->weight = *w; + mul->closure = a; } - - size = max(4, size); - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_component_allot(ret, id, size, *w); - - return ret; + return mul; } - __device__ void* -osl_mul_closure_color(void* sg_, OSL::ClosureColor* a, const OSL::Color3* w) +osl_mul_closure_float(void* sg_, const void* a_, float w) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + a_ = __builtin_assume_aligned(a_, alignof(float)); - if (a == NULL) { + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + if (a == NULL) return NULL; - } - - if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) { + if (w == 0.0f) return NULL; - } - - if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f) { + if (w == 1.0f) return a; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL::ClosureMul* mul + = (OSL::ClosureMul*)closure_pool->allocate(sizeof(OSL::ClosureMul), + alignof(OSL::ClosureMul)); + if (mul) { + mul->id = OSL::ClosureColor::MUL; + mul->weight = OSL::Color3(w); + mul->closure = a; } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_allot(ret, *w, a); - - return ret; + return mul; } - __device__ void* -osl_mul_closure_float(void* sg_, OSL::ClosureColor* a, float w) +osl_allocate_closure_component(void* sg_, int id, int size) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; - - if (a == NULL || w == 0.0f) { - return NULL; - } - - if (w == 1.0f) { - return a; + ShaderGlobals* sg = (ShaderGlobals*)sg_; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + // Allocate the component and the mul back to back + const size_t needed = sizeof(OSL::ClosureComponent) + size; + OSL::ClosureComponent* comp + = (OSL::ClosureComponent*) + closure_pool->allocate(needed, alignof(OSL::ClosureComponent)); + if (comp) { + comp->id = id; + comp->w = OSL::Color3(1.0f); } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_mul_float_allot(ret, w, a); - - return ret; + return comp; } - __device__ void* -osl_add_closure_closure(void* sg_, OSL::ClosureColor* a, OSL::ClosureColor* b) +osl_allocate_weighted_closure_component(void* sg_, int id, int size, + const void* w_) { - ShaderGlobals* sg_ptr = (ShaderGlobals*)sg_; + w_ = __builtin_assume_aligned(w_, alignof(float)); - if (a == NULL) { - return b; - } - - if (b == NULL) { - return a; + ShaderGlobals* sg = (ShaderGlobals*)sg_; + const OSL::Color3* w = (const OSL::Color3*)w_; + if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) + return NULL; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + // Allocate the component and the mul back to back + const size_t needed = sizeof(OSL::ClosureComponent) + size; + OSL::ClosureComponent* comp + = (OSL::ClosureComponent*) + closure_pool->allocate(needed, alignof(OSL::ClosureComponent)); + if (comp) { + comp->id = id; + comp->w = *w; } - - // Fix up the alignment - void* ret = ((char*)sg_ptr->renderstate) - + alignment_offset_calc(sg_ptr->renderstate, - alignof(OSL::ClosureComponent)); - sg_ptr->renderstate = closure_add_allot(ret, a, b); - - return ret; + return comp; } + #define IS_STRING(type) (type.basetype == OSL::TypeDesc::STRING) #define IS_PTR(type) (type.basetype == OSL::TypeDesc::PTR) #define IS_COLOR(type) (type.vecsemantics == OSL::TypeDesc::COLOR) @@ -230,8 +169,8 @@ osl_bind_interpolated_param(void* sg_, OSL::ustring_pod name, long long type, char status = *userdata_initialized; if (status == 0) { bool ok = rend_get_userdata(HDSTR(name), userdata_data, - symbol_data_size, (*(OSL::TypeDesc*)&type), - userdata_index); + symbol_data_size, (*(OSL::TypeDesc*)&type), + userdata_index); *userdata_initialized = status = 1 + ok; } if (status == 2) { From aef2a80b3655239aec30f1c0dedf5871d05c37ce Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Fri, 21 Feb 2025 14:51:37 +1100 Subject: [PATCH 11/20] clang format Signed-off-by: Curtis Black --- src/testrender/cuda/optix_raytracer.cu | 3 ++- src/testshade/render_state.h | 2 +- src/testshade/testshade.cpp | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/testrender/cuda/optix_raytracer.cu b/src/testrender/cuda/optix_raytracer.cu index 13a2dcd8f..c102d25dc 100644 --- a/src/testrender/cuda/optix_raytracer.cu +++ b/src/testrender/cuda/optix_raytracer.cu @@ -48,7 +48,8 @@ __device__ __constant__ RenderParams render_params; static inline __device__ void -execute_shader(ShaderGlobalsType& sg, const int shader_id, StackClosurePool& closure_pool) +execute_shader(ShaderGlobalsType& sg, const int shader_id, + StackClosurePool& closure_pool) { if (shader_id < 0) { // TODO: should probably never get here ... diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index 1c0d83ff8..391b0a5fd 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -46,7 +46,7 @@ class StackClosurePool { return (void*)p; return nullptr; } -} +}; struct RenderState { RenderContext* context; diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index 6a4db292c..541d22a48 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -959,9 +959,9 @@ setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, // Any state data needed by SimpleRenderer or its free function equivalent // will need to be passed here the ShaderGlobals. - renderState.context = &theRenderState; - renderState.closure_pool = nullptr; // Use inbuilt closure pool. - sg.renderstate = &renderState; + renderState.context = &theRenderState; + renderState.closure_pool = nullptr; // Use inbuilt closure pool. + sg.renderstate = &renderState; // Set "shader" space to be Mshad. In a real renderer, this may be // different for each shader group. From ae990c38688e42b0e1881a82f670197cfa2621c1 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Fri, 21 Feb 2025 15:35:09 +1100 Subject: [PATCH 12/20] cleanup Signed-off-by: Curtis Black --- src/testrender/cuda/rend_lib.cu | 31 ++++++++++----------- src/testshade/render_state.h | 3 +++ testsuite/example-cuda/rend_lib.cu | 43 ++++++++++++------------------ 3 files changed, 36 insertions(+), 41 deletions(-) diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index 3720f1f76..aa26eef43 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -39,14 +39,15 @@ __direct_callable__dummy_rend_lib() { } -__device__ void* + +__device__ const void* osl_add_closure_closure(void* sg_, const void* a_, const void* b_) { - a_ = __builtin_assume_aligned(a_, alignof(float)); - b_ = __builtin_assume_aligned(b_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; - const OSL::ClosureColor* b = (const OSL::ClosureColor*)b_; + a_ = __builtin_assume_aligned(a_, alignof(float)); + b_ = __builtin_assume_aligned(b_, alignof(float)); + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::ClosureColor* b = (const OSL::ClosureColor*)b_; if (a == NULL) return b; if (b == NULL) @@ -63,15 +64,15 @@ osl_add_closure_closure(void* sg_, const void* a_, const void* b_) return add; } -__device__ void* +__device__ const void* osl_mul_closure_color(void* sg_, const void* a_, const void* w_) { a_ = __builtin_assume_aligned(a_, alignof(float)); w_ = __builtin_assume_aligned(w_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; - const OSL::Color3* w = (const OSL::Color3*)w_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::Color3* w = (const OSL::Color3*)w_; if (a == NULL) return NULL; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) @@ -90,13 +91,13 @@ osl_mul_closure_color(void* sg_, const void* a_, const void* w_) return mul; } -__device__ void* +__device__ const void* osl_mul_closure_float(void* sg_, const void* a_, float w) { a_ = __builtin_assume_aligned(a_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; if (a == NULL) return NULL; if (w == 0.0f) @@ -118,8 +119,8 @@ osl_mul_closure_float(void* sg_, const void* a_, float w) __device__ void* osl_allocate_closure_component(void* sg_, int id, int size) { - ShaderGlobals* sg = (ShaderGlobals*)sg_; - auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; // Allocate the component and the mul back to back const size_t needed = sizeof(OSL::ClosureComponent) + size; OSL::ClosureComponent* comp diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index 391b0a5fd..22d6e37af 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -29,14 +29,17 @@ class StackClosurePool { void* ptr; public: + OSL_HOSTDEVICE StackClosurePool() { reset(); } + OSL_HOSTDEVICE void reset() { ptr = &buffer[0]; *(int*)ptr = 0; } + OSL_HOSTDEVICE void* allocate(size_t size, size_t alignment) { uintptr_t p = OIIO::round_to_multiple_of_pow2((uintptr_t)ptr, diff --git a/testsuite/example-cuda/rend_lib.cu b/testsuite/example-cuda/rend_lib.cu index fa9714aa4..2d235dc19 100644 --- a/testsuite/example-cuda/rend_lib.cu +++ b/testsuite/example-cuda/rend_lib.cu @@ -14,27 +14,18 @@ extern __device__ char* s_color_system; } OSL_NAMESPACE_END -// Taken from the SimplePool class -__device__ static inline size_t -alignment_offset_calc(void* ptr, size_t alignment) -{ - uintptr_t ptrbits = reinterpret_cast(ptr); - uintptr_t offset = ((ptrbits + alignment - 1) & -alignment) - ptrbits; - return offset; -} - // These functions are declared extern to prevent name mangling. extern "C" { -__device__ void* +__device__ const void* osl_add_closure_closure(void* sg_, const void* a_, const void* b_) { - a_ = __builtin_assume_aligned(a_, alignof(float)); - b_ = __builtin_assume_aligned(b_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; - const OSL::ClosureColor* b = (const OSL::ClosureColor*)b_; + a_ = __builtin_assume_aligned(a_, alignof(float)); + b_ = __builtin_assume_aligned(b_, alignof(float)); + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::ClosureColor* b = (const OSL::ClosureColor*)b_; if (a == NULL) return b; if (b == NULL) @@ -51,15 +42,15 @@ osl_add_closure_closure(void* sg_, const void* a_, const void* b_) return add; } -__device__ void* +__device__ const void* osl_mul_closure_color(void* sg_, const void* a_, const void* w_) { a_ = __builtin_assume_aligned(a_, alignof(float)); w_ = __builtin_assume_aligned(w_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; - const OSL::Color3* w = (const OSL::Color3*)w_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + const OSL::Color3* w = (const OSL::Color3*)w_; if (a == NULL) return NULL; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) @@ -78,13 +69,13 @@ osl_mul_closure_color(void* sg_, const void* a_, const void* w_) return mul; } -__device__ void* +__device__ const void* osl_mul_closure_float(void* sg_, const void* a_, float w) { a_ = __builtin_assume_aligned(a_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::ClosureColor* a = (const OSL::ClosureColor*)a_; if (a == NULL) return NULL; if (w == 0.0f) @@ -106,8 +97,8 @@ osl_mul_closure_float(void* sg_, const void* a_, float w) __device__ void* osl_allocate_closure_component(void* sg_, int id, int size) { - ShaderGlobals* sg = (ShaderGlobals*)sg_; - auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; // Allocate the component and the mul back to back const size_t needed = sizeof(OSL::ClosureComponent) + size; OSL::ClosureComponent* comp @@ -126,8 +117,8 @@ osl_allocate_weighted_closure_component(void* sg_, int id, int size, { w_ = __builtin_assume_aligned(w_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; - const OSL::Color3* w = (const OSL::Color3*)w_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::Color3* w = (const OSL::Color3*)w_; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; From dd091cabde0d897e9da6abb627cf040939f5d8a0 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 12:05:06 +1000 Subject: [PATCH 13/20] Rename variables Signed-off-by: Curtis Black --- src/testshade/rs_simplerend.cpp | 66 ++++++++++++++++----------------- src/testshade/simplerend.cpp | 44 +++++++++++----------- src/testshade/simplerend.h | 2 +- src/testshade/testshade.cpp | 8 ++-- 4 files changed, 60 insertions(+), 60 deletions(-) diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index f7fd5ba74..d9d644247 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -66,24 +66,24 @@ rs_get_inverse_matrix_space_time(OSL::OpaqueExecContextPtr ec, using OSL::Matrix44; - auto rs = OSL::get_rs(ec)->context; + auto rc = OSL::get_rs(ec)->context; if (to == OSL::Hashes::camera || to == OSL::Hashes::screen || to == OSL::Hashes::NDC || to == RS::Hashes::raster) { - Matrix44 M { rs->world_to_camera }; + Matrix44 M { rc->world_to_camera }; if (to == OSL::Hashes::screen || to == OSL::Hashes::NDC || to == RS::Hashes::raster) { - float depthrange = (double)rs->yon - (double)rs->hither; - OSL::ustringhash proj = rs->projection; + float depthrange = (double)rc->yon - (double)rc->hither; + OSL::ustringhash proj = rc->projection; if (proj == RS::Hashes::perspective) { - float tanhalffov = OIIO::fast_tan(0.5f * rs->fov * M_PI + float tanhalffov = OIIO::fast_tan(0.5f * rc->fov * M_PI / 180.0); // clang-format off Matrix44 camera_to_screen (1/tanhalffov, 0, 0, 0, 0, 1/tanhalffov, 0, 0, - 0, 0, rs->yon/depthrange, 1, - 0, 0, -(rs->yon*rs->hither)/depthrange, 0); + 0, 0, rc->yon/depthrange, 1, + 0, 0, -(rc->yon*rc->hither)/depthrange, 0); // clang-format on M = M * camera_to_screen; } else { @@ -91,7 +91,7 @@ rs_get_inverse_matrix_space_time(OSL::OpaqueExecContextPtr ec, Matrix44 camera_to_screen (1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1/depthrange, 0, - 0, 0, -(rs->hither)/depthrange, 1); + 0, 0, -(rc->hither)/depthrange, 1); // clang-format on M = M * camera_to_screen; } @@ -107,8 +107,8 @@ rs_get_inverse_matrix_space_time(OSL::OpaqueExecContextPtr ec, M = M * screen_to_ndc; if (to == RS::Hashes::raster) { // clang-format off - Matrix44 ndc_to_raster (rs->xres, 0, 0, 0, - 0, rs->yres, 0, 0, + Matrix44 ndc_to_raster (rc->xres, 0, 0, 0, + 0, rc->yres, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); M = M * ndc_to_raster; @@ -500,7 +500,7 @@ rs_get_attribute(OSL::OpaqueExecContextPtr oec, OSL::ustringhash_pod object_, auto object = OSL::ustringhash_from(object_); auto name = OSL::ustringhash_from(name_); const OSL::TypeDesc type = OSL::TypeDesc_from(_type); - auto rs = OSL::get_rs(oec)->context; + auto rc = OSL::get_rs(oec)->context; // The many branches in the code below handle the case where we don't know // the attribute name at compile time. In the case it is known, dead-code @@ -509,38 +509,38 @@ rs_get_attribute(OSL::OpaqueExecContextPtr oec, OSL::ustringhash_pod object_, return rs_get_attribute_constant_int(OSL_VERSION, result); if (name == RS::Hashes::camera_resolution && type == OSL::TypeDesc(OSL::TypeDesc::INT, 2)) - return rs_get_attribute_constant_int2(rs->xres, rs->yres, result); + return rs_get_attribute_constant_int2(rc->xres, rc->yres, result); if (name == RS::Hashes::camera_projection && type == OSL::TypeString) - return rs_get_attribute_constant_string(rs->projection, result); + return rs_get_attribute_constant_string(rc->projection, result); if (name == RS::Hashes::camera_pixelaspect && type == OSL::TypeFloat) - return rs_get_attribute_constant_float(rs->pixelaspect, derivatives, + return rs_get_attribute_constant_float(rc->pixelaspect, derivatives, result); if (name == RS::Hashes::camera_screen_window && type == OSL::TypeDesc(OSL::TypeDesc::FLOAT, 4)) - return rs_get_attribute_constant_float4(rs->screen_window[0], - rs->screen_window[1], - rs->screen_window[2], - rs->screen_window[3], + return rs_get_attribute_constant_float4(rc->screen_window[0], + rc->screen_window[1], + rc->screen_window[2], + rc->screen_window[3], derivatives, result); if (name == RS::Hashes::camera_fov && type == OSL::TypeFloat) - return rs_get_attribute_constant_float(rs->fov, derivatives, result); + return rs_get_attribute_constant_float(rc->fov, derivatives, result); if (name == RS::Hashes::camera_clip && type == OSL::TypeDesc(OSL::TypeDesc::FLOAT, 2)) - return rs_get_attribute_constant_float2(rs->hither, rs->yon, + return rs_get_attribute_constant_float2(rc->hither, rc->yon, derivatives, result); if (name == RS::Hashes::camera_clip_near && type == OSL::TypeFloat) - return rs_get_attribute_constant_float(rs->hither, derivatives, result); + return rs_get_attribute_constant_float(rc->hither, derivatives, result); if (name == RS::Hashes::camera_clip_far && type == OSL::TypeFloat) - return rs_get_attribute_constant_float(rs->yon, derivatives, result); + return rs_get_attribute_constant_float(rc->yon, derivatives, result); if (name == RS::Hashes::camera_shutter && type == OSL::TypeDesc(OSL::TypeDesc::FLOAT, 2)) - return rs_get_attribute_constant_float2(rs->shutter[0], rs->shutter[1], + return rs_get_attribute_constant_float2(rc->shutter[0], rc->shutter[1], derivatives, result); if (name == RS::Hashes::camera_shutter_open && type == OSL::TypeFloat) - return rs_get_attribute_constant_float(rs->shutter[0], derivatives, + return rs_get_attribute_constant_float(rc->shutter[0], derivatives, result); if (name == RS::Hashes::camera_shutter_close && type == OSL::TypeFloat) - return rs_get_attribute_constant_float(rs->shutter[1], derivatives, + return rs_get_attribute_constant_float(rc->shutter[1], derivatives, result); if (name == RS::Hashes::shading_index && type == OSL::TypeInt) @@ -652,9 +652,9 @@ rs_errorfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification, int32_t arg_count, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec)->context; + auto rc = OSL::get_rs(ec)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_errorfmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec), fmt_specification, arg_count, argTypes, argValuesSize, argValues); @@ -665,9 +665,9 @@ rs_warningfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification, int32_t arg_count, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec)->context; + auto rc = OSL::get_rs(ec)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_warningfmt(OSL::get_max_warnings_per_thread(ec), OSL::get_thread_index(ec), OSL::get_shade_index(ec), fmt_specification, arg_count, argTypes, argValuesSize, @@ -680,9 +680,9 @@ rs_printfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification, int32_t arg_count, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec)->context; + auto rc = OSL::get_rs(ec)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_printfmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec), fmt_specification, arg_count, argTypes, argValuesSize, argValues); @@ -695,9 +695,9 @@ rs_filefmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash filename_hash, const OSL::EncodedType* argTypes, uint32_t argValuesSize, uint8_t* argValues) { - auto rs = OSL::get_rs(ec)->context; + auto rc = OSL::get_rs(ec)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_filefmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec), filename_hash, fmt_specification, arg_count, argTypes, argValuesSize, argValues); diff --git a/src/testshade/simplerend.cpp b/src/testshade/simplerend.cpp index 044f40f50..5b56ec809 100644 --- a/src/testshade/simplerend.cpp +++ b/src/testshade/simplerend.cpp @@ -1058,22 +1058,22 @@ SimpleRenderer::add_output(string_view varname_, string_view filename, void -SimpleRenderer::export_state(RenderContext& state) const +SimpleRenderer::export_context(RenderContext& context) const { - state.xres = m_xres; - state.yres = m_yres; - state.fov = m_fov; - state.hither = m_hither; - state.yon = m_yon; - - state.world_to_camera = OSL::Matrix44(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, - 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, - 0.0, 1.0); + context.xres = m_xres; + context.yres = m_yres; + context.fov = m_fov; + context.hither = m_hither; + context.yon = m_yon; + + context.world_to_camera = OSL::Matrix44(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, + 0.0, 1.0); //perspective is not a member of StringParams (i.e not in strdecls.h) - state.projection = RS::Hashes::perspective; - state.pixelaspect = m_pixelaspect; - std::copy_n(m_screen_window, 4, state.screen_window); - std::copy_n(m_shutter, 2, state.shutter); + context.projection = RS::Hashes::perspective; + context.pixelaspect = m_pixelaspect; + std::copy_n(m_screen_window, 4, context.screen_window); + std::copy_n(m_shutter, 2, context.shutter); } void @@ -1082,8 +1082,8 @@ SimpleRenderer::errorfmt(OSL::ShaderGlobals* sg, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderContext* rs = reinterpret_cast(sg->renderstate)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + RenderContext* rc = reinterpret_cast(sg->renderstate)->context; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_errorfmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg), fmt_specification, arg_count, arg_types, arg_values_size, argValues); @@ -1095,8 +1095,8 @@ SimpleRenderer::warningfmt(OSL::ShaderGlobals* sg, int32_t arg_count, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderContext* rs = reinterpret_cast(sg->renderstate)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + RenderContext* rc = reinterpret_cast(sg->renderstate)->context; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_warningfmt(OSL::get_max_warnings_per_thread(sg), OSL::get_thread_index(sg), OSL::get_shade_index(sg), fmt_specification, arg_count, arg_types, @@ -1111,8 +1111,8 @@ SimpleRenderer::printfmt(OSL::ShaderGlobals* sg, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderContext* rs = reinterpret_cast(sg->renderstate)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + RenderContext* rc = reinterpret_cast(sg->renderstate)->context; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_printfmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg), fmt_specification, arg_count, arg_types, arg_values_size, argValues); @@ -1124,8 +1124,8 @@ SimpleRenderer::filefmt(OSL::ShaderGlobals* sg, OSL::ustringhash filename_hash, const EncodedType* arg_types, uint32_t arg_values_size, uint8_t* argValues) { - RenderContext* rs = reinterpret_cast(sg->renderstate)->context; - OSL::journal::Writer jw { rs->journal_buffer }; + RenderContext* rc = reinterpret_cast(sg->renderstate)->context; + OSL::journal::Writer jw { rc->journal_buffer }; jw.record_filefmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg), filename_hash, fmt_specification, arg_count, arg_types, arg_values_size, argValues); diff --git a/src/testshade/simplerend.h b/src/testshade/simplerend.h index a09e029e3..39ff3f5a9 100644 --- a/src/testshade/simplerend.h +++ b/src/testshade/simplerend.h @@ -145,7 +145,7 @@ class SimpleRenderer : public RendererServices { size_t noutputs() const { return m_outputbufs.size(); } virtual void init_shadingsys(ShadingSystem* ss) { shadingsys = ss; } - virtual void export_state(RenderContext&) const; + virtual void export_context(RenderContext&) const; virtual void prepare_render() {} virtual void warmup() {} virtual void render(int /*xres*/, int /*yres*/) {} diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index 541d22a48..64710e0e3 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -946,7 +946,7 @@ setup_transformations(SimpleRenderer& rend, OSL::Matrix44& Mshad, } // A single render context shared by all render threads. -static RenderContext theRenderState; +static RenderContext theRenderContext; // Set up the ShaderGlobals fields for pixel (x,y). @@ -959,7 +959,7 @@ setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, // Any state data needed by SimpleRenderer or its free function equivalent // will need to be passed here the ShaderGlobals. - renderState.context = &theRenderState; + renderState.context = &theRenderContext; renderState.closure_pool = nullptr; // Use inbuilt closure pool. sg.renderstate = &renderState; @@ -2143,7 +2143,7 @@ test_shade(int argc, const char* argv[]) rend->prepare_render(); if (use_rs_bitcode) { // SimpleRend to supply the required state for render service free functions - rend->export_state(theRenderState); + rend->export_context(theRenderContext); } double setuptime = timer.lap(); @@ -2171,7 +2171,7 @@ test_shade(int argc, const char* argv[]) //Send the populated Journal Buffer to the renderer - theRenderState.journal_buffer = jbuffer.get(); + theRenderContext.journal_buffer = jbuffer.get(); // Allow a settable number of iterations to "render" the whole image, From 6cc829b9ddebcd0ade66ac240404a5d5e363a109 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 14:41:03 +1000 Subject: [PATCH 14/20] Use stack closure pool for test shade cpu bitcode Signed-off-by: Curtis Black --- src/liboslexec/shadeimage.cpp | 3 --- src/osltoy/osltoyrenderer.cpp | 3 --- src/testrender/simpleraytracer.cpp | 7 ------- src/testshade/rs_simplerend.cpp | 2 -- src/testshade/testshade.cpp | 14 ++++++++++---- 5 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/liboslexec/shadeimage.cpp b/src/liboslexec/shadeimage.cpp index da982670e..5a3db57ea 100644 --- a/src/liboslexec/shadeimage.cpp +++ b/src/liboslexec/shadeimage.cpp @@ -116,9 +116,6 @@ shade_image(ShadingSystem& shadingsys, ShaderGroup& group, // That also implies that our normal points to (0,0,1) sg.N = Vec3(0, 0, 1); sg.Ng = Vec3(0, 0, 1); - // In our SimpleRenderer, the "renderstate" itself just a pointer to - // the ShaderGlobals. - // sg.renderstate = &sg; } // Loop over all pixels in the image (in x and y)... diff --git a/src/osltoy/osltoyrenderer.cpp b/src/osltoy/osltoyrenderer.cpp index 48d55855c..932cee26b 100644 --- a/src/osltoy/osltoyrenderer.cpp +++ b/src/osltoy/osltoyrenderer.cpp @@ -115,9 +115,6 @@ OSLToyRenderer::OSLToyRenderer() // That also implies that our normal points to (0,0,1) sg.N = Vec3(0, 0, 1); sg.Ng = Vec3(0, 0, 1); - // In our SimpleRenderer, the "renderstate" itself just a pointer to - // the ShaderGlobals. - // sg.renderstate = &sg; } diff --git a/src/testrender/simpleraytracer.cpp b/src/testrender/simpleraytracer.cpp index b7795c98c..7508410a8 100644 --- a/src/testrender/simpleraytracer.cpp +++ b/src/testrender/simpleraytracer.cpp @@ -921,12 +921,6 @@ SimpleRaytracer::globals_from_hit(ShaderGlobalsType& sg, const Ray& r, } sg.raytype = r.raytype; sg.flipHandedness = sg.dPdx.cross(sg.dPdy).dot(sg.N) < 0; - -#ifndef __CUDACC__ - // In our SimpleRaytracer, the "renderstate" itself just a pointer to - // the ShaderGlobals. - sg.renderstate = &sg; -#endif } @@ -1349,7 +1343,6 @@ SimpleRaytracer::prepare_geometry() sg.v = uv[i].y; sg.I = (p[i] - camera.eye).normalize(); sg.surfacearea = area; - sg.renderstate = &sg; shadingsys->execute(*ctx, *m_shaders[shaderID].disp, sg); diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index d9d644247..33ede018c 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -368,14 +368,12 @@ rs_trace_get(OSL::OpaqueExecContextPtr ec, OSL::ustringhash name, #endif } -#ifdef __CUDA_ARCH__ // Host side uses rs_fallback implementation. OSL_RSOP OSL_HOSTDEVICE void* rs_allocate_closure(OSL::OpaqueExecContextPtr ec, size_t size, size_t alignment) { auto rs = OSL::get_rs(ec); return rs->closure_pool->allocate(size, alignment); } -#endif OSL_RSOP OSL_HOSTDEVICE bool rs_get_attribute_constant_string(OSL::ustringhash value, void* result) diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index 64710e0e3..5309d892d 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -952,7 +952,8 @@ static RenderContext theRenderContext; // Set up the ShaderGlobals fields for pixel (x,y). static void setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, - RenderState& renderState, int x, int y) + RenderState& renderState, StackClosurePool* closure_pool, + int x, int y) { // Just zero the whole thing out to start memset((char*)&sg, 0, sizeof(ShaderGlobals)); @@ -960,8 +961,10 @@ setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, // Any state data needed by SimpleRenderer or its free function equivalent // will need to be passed here the ShaderGlobals. renderState.context = &theRenderContext; - renderState.closure_pool = nullptr; // Use inbuilt closure pool. + renderState.closure_pool = closure_pool; sg.renderstate = &renderState; + if (closure_pool) + closure_pool->reset(); // Set "shader" space to be Mshad. In a real renderer, this may be // different for each shader group. @@ -1185,7 +1188,8 @@ setup_output_images(SimpleRenderer* rend, ShadingSystem* shadingsys, raytype_bit = shadingsys->raytype_bit(ustring(raytype_name)); ShaderGlobals sg; RenderState renderState; - setup_shaderglobals(sg, shadingsys, renderState, 0, 0); + StackClosurePool closure_pool; + setup_shaderglobals(sg, shadingsys, renderState, &closure_pool, 0, 0); #if OSL_USE_BATCHED if (batched) { @@ -1590,6 +1594,7 @@ shade_region(SimpleRenderer* rend, ShaderGroup* shadergroup, OIIO::ROI roi, // Set up shader globals and a little test grid of points to shade. ShaderGlobals shaderglobals; RenderState renderState; + StackClosurePool closure_pool; raytype_bit = shadingsys->raytype_bit(ustring(raytype_name)); @@ -1610,7 +1615,8 @@ shade_region(SimpleRenderer* rend, ShaderGroup* shadergroup, OIIO::ROI roi, // set it up rigged to look like we're rendering a single // quadrilateral that exactly fills the viewport, and that // setup is done in the following function call: - setup_shaderglobals(shaderglobals, shadingsys, renderState, x, y); + setup_shaderglobals(shaderglobals, shadingsys, renderState, + &closure_pool, x, y); if (this_threads_index == uninitialized_thread_index) { this_threads_index = next_thread_index.fetch_add(1u); From 606099782a4c7dad6fc4f71d054d208f8f7f8677 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 15:03:27 +1000 Subject: [PATCH 15/20] Add missing include Signed-off-by: Curtis Black --- src/testrender/raytracer.h | 2 +- src/testshade/render_state.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/testrender/raytracer.h b/src/testrender/raytracer.h index 4be865678..12516fe97 100644 --- a/src/testrender/raytracer.h +++ b/src/testrender/raytracer.h @@ -14,7 +14,7 @@ #include #include #include "bvh.h" - +#include "../testshade/render_state.h" #if OSL_USE_OPTIX # include diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index 22d6e37af..f59b35e3a 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -47,6 +47,7 @@ class StackClosurePool { ptr = (void*)(p + size); if (ptr <= &buffer[256]) return (void*)p; + assert(false); return nullptr; } }; From b634e3cc32a4fae99e3b04990f528b8e8cb4f2b0 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 15:17:25 +1000 Subject: [PATCH 16/20] cleanup Signed-off-by: Curtis Black --- src/testrender/cuda/rend_lib.cu | 2 +- src/testrender/raytracer.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index 4e656f75a..d0a0d7bc8 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -139,7 +139,7 @@ osl_allocate_weighted_closure_component(void* sg_, int id, int size, { w_ = __builtin_assume_aligned(w_, alignof(float)); - ShaderGlobals* sg = (ShaderGlobals*)sg_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; const OSL::Color3* w = (const OSL::Color3*)w_; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; diff --git a/src/testrender/raytracer.h b/src/testrender/raytracer.h index 12516fe97..73dcad562 100644 --- a/src/testrender/raytracer.h +++ b/src/testrender/raytracer.h @@ -9,12 +9,12 @@ #include +#include "../testshade/render_state.h" #include "optix_compat.h" #include "render_params.h" #include #include #include "bvh.h" -#include "../testshade/render_state.h" #if OSL_USE_OPTIX # include From 8c613276d021bfa6899b7536901ab2a0746e4677 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 15:42:19 +1000 Subject: [PATCH 17/20] cleanup Signed-off-by: Curtis Black --- src/testrender/cuda/rend_lib.cu | 4 ++-- src/testshade/render_state.h | 15 --------------- src/testshade/rs_simplerend.cpp | 16 ++++++++++++++++ src/testshade/simplerend.cpp | 15 +++++++++++++++ 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/testrender/cuda/rend_lib.cu b/src/testrender/cuda/rend_lib.cu index d0a0d7bc8..75de931cc 100644 --- a/src/testrender/cuda/rend_lib.cu +++ b/src/testrender/cuda/rend_lib.cu @@ -139,8 +139,8 @@ osl_allocate_weighted_closure_component(void* sg_, int id, int size, { w_ = __builtin_assume_aligned(w_, alignof(float)); - OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; - const OSL::Color3* w = (const OSL::Color3*)w_; + OSL_CUDA::ShaderGlobals* sg = (OSL_CUDA::ShaderGlobals*)sg_; + const OSL::Color3* w = (const OSL::Color3*)w_; if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f) return NULL; auto* closure_pool = ((RenderState*)sg->renderstate)->closure_pool; diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index f59b35e3a..a014cebec 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -56,18 +56,3 @@ struct RenderState { RenderContext* context; StackClosurePool* closure_pool; }; - - -// Create constexpr hashes for all strings used by the free function renderer services. -// NOTE: Actually ustring's should also be instantiated in host code someplace as well -// to allow the reverse mapping of hash->string to work when processing messages -namespace RS { -namespace { -namespace Hashes { -#define RS_STRDECL(str, var_name) \ - constexpr OSL::ustringhash var_name(OSL::strhash(str)); -#include "rs_strdecls.h" -#undef RS_STRDECL -}; //namespace Hashes -} // unnamed namespace -}; //namespace RS diff --git a/src/testshade/rs_simplerend.cpp b/src/testshade/rs_simplerend.cpp index 33ede018c..b8c69597e 100644 --- a/src/testshade/rs_simplerend.cpp +++ b/src/testshade/rs_simplerend.cpp @@ -17,6 +17,22 @@ #include "render_state.h" + +// Create constexpr hashes for all strings used by the free function renderer services. +// NOTE: Actually ustring's should also be instantiated in host code someplace as well +// to allow the reverse mapping of hash->string to work when processing messages +namespace RS { +namespace { +namespace Hashes { +#define RS_STRDECL(str, var_name) \ + constexpr OSL::ustringhash var_name(OSL::strhash(str)); +#include "rs_strdecls.h" +#undef RS_STRDECL +}; //namespace Hashes +} // unnamed namespace +}; //namespace RS + + // Keep free functions in sync with virtual function based SimpleRenderer. OSL_RSOP OSL_HOSTDEVICE bool diff --git a/src/testshade/simplerend.cpp b/src/testshade/simplerend.cpp index 5b56ec809..1563caa6d 100644 --- a/src/testshade/simplerend.cpp +++ b/src/testshade/simplerend.cpp @@ -25,6 +25,21 @@ namespace Strings { } // namespace RS +// Create constexpr hashes for all strings used by the free function renderer services. +// NOTE: Actually ustring's should also be instantiated in host code someplace as well +// to allow the reverse mapping of hash->string to work when processing messages +namespace RS { +namespace { +namespace Hashes { +#define RS_STRDECL(str, var_name) \ + constexpr OSL::ustringhash var_name(OSL::strhash(str)); +#include "rs_strdecls.h" +#undef RS_STRDECL +}; //namespace Hashes +} // unnamed namespace +}; //namespace RS + + using namespace OSL; From 3c3481d1eebfd4fe1e82c31427e78486cff5e051 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 16:16:47 +1000 Subject: [PATCH 18/20] Increase stack closure pool size Signed-off-by: Curtis Black --- src/testshade/render_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index a014cebec..53ba0047f 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -25,7 +25,7 @@ struct RenderContext { }; class StackClosurePool { - alignas(8) char buffer[256]; + alignas(8) char buffer[512]; void* ptr; public: From e51890d8bc39dc332f3bd1f14260502d57be0a35 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 16:33:32 +1000 Subject: [PATCH 19/20] define closure pool size Signed-off-by: Curtis Black --- src/testshade/render_state.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index 53ba0047f..a7c9c1c8e 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -25,7 +25,8 @@ struct RenderContext { }; class StackClosurePool { - alignas(8) char buffer[512]; + static constexpr size_t capacity = 512; + alignas(8) char buffer[capacity]; void* ptr; public: @@ -45,7 +46,7 @@ class StackClosurePool { uintptr_t p = OIIO::round_to_multiple_of_pow2((uintptr_t)ptr, alignment); ptr = (void*)(p + size); - if (ptr <= &buffer[256]) + if (ptr <= &buffer[capacity]) return (void*)p; assert(false); return nullptr; From 3954a14f1bcb23b0200b6dd2bb2ad4dcf1d087c4 Mon Sep 17 00:00:00 2001 From: Curtis Black Date: Tue, 3 Jun 2025 16:51:38 +1000 Subject: [PATCH 20/20] Increase stack closure pool size Signed-off-by: Curtis Black --- src/testshade/render_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/testshade/render_state.h b/src/testshade/render_state.h index a7c9c1c8e..29079f324 100644 --- a/src/testshade/render_state.h +++ b/src/testshade/render_state.h @@ -25,7 +25,7 @@ struct RenderContext { }; class StackClosurePool { - static constexpr size_t capacity = 512; + static constexpr size_t capacity = 1024; alignas(8) char buffer[capacity]; void* ptr;