diff --git a/vendor/nvapi/NvApiDriverSettings.c b/vendor/nvapi/NvApiDriverSettings.c new file mode 100644 index 0000000000..2fb52dd755 --- /dev/null +++ b/vendor/nvapi/NvApiDriverSettings.c @@ -0,0 +1,970 @@ +/***************************************************************************\ +|* *| +|* Copyright NVIDIA Corporation. All rights reserved. *| +|* *| +|* NOTICE TO USER: *| +|* *| +|* This source code is subject to NVIDIA ownership rights under U.S. *| +|* and international Copyright laws. Users and possessors of this *| +|* source code are hereby granted a nonexclusive, royalty-free *| +|* license to use this code in individual and commercial software. *| +|* *| +|* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE *| +|* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR *| +|* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH *| +|* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF *| +|* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR *| +|* PURPOSE. IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, *| +|* INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES *| +|* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN *| +|* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING *| +|* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE *| +|* CODE. *| +|* *| +|* U.S. Government End Users. This source code is a "commercial item" *| +|* as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting *| +|* of "commercial computer software" and "commercial computer software *| +|* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) *| +|* and is provided to the U.S. Government only as a commercial end item. *| +|* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through *| +|* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the *| +|* source code with only those rights set forth herein. *| +|* *| +|* Any use of this source code in individual and commercial software must *| +|* include, in the user documentation and internal comments to the code, *| +|* the above Disclaimer and U.S. Government End Users Notice. *| +|* *| +|* *| +\***************************************************************************/ + +#include "NvApiDriverSettings.h" + +EValues_OGL_AA_LINE_GAMMA g_valuesOGL_AA_LINE_GAMMA[OGL_AA_LINE_GAMMA_NUM_VALUES] = +{ + OGL_AA_LINE_GAMMA_DISABLED, + OGL_AA_LINE_GAMMA_ENABLED, + OGL_AA_LINE_GAMMA_MIN, + OGL_AA_LINE_GAMMA_MAX, +}; + +EValues_OGL_CPL_GDI_COMPATIBILITY g_valuesOGL_CPL_GDI_COMPATIBILITY[OGL_CPL_GDI_COMPATIBILITY_NUM_VALUES] = +{ + OGL_CPL_GDI_COMPATIBILITY_PREFER_DISABLED, + OGL_CPL_GDI_COMPATIBILITY_PREFER_ENABLED, + OGL_CPL_GDI_COMPATIBILITY_AUTO, +}; + +EValues_OGL_CPL_PREFER_DXPRESENT g_valuesOGL_CPL_PREFER_DXPRESENT[OGL_CPL_PREFER_DXPRESENT_NUM_VALUES] = +{ + OGL_CPL_PREFER_DXPRESENT_PREFER_DISABLED, + OGL_CPL_PREFER_DXPRESENT_PREFER_ENABLED, + OGL_CPL_PREFER_DXPRESENT_AUTO, +}; + +EValues_OGL_DEEP_COLOR_SCANOUT g_valuesOGL_DEEP_COLOR_SCANOUT[OGL_DEEP_COLOR_SCANOUT_NUM_VALUES] = +{ + OGL_DEEP_COLOR_SCANOUT_DISABLE, + OGL_DEEP_COLOR_SCANOUT_ENABLE, +}; + +EValues_OGL_DEFAULT_SWAP_INTERVAL g_valuesOGL_DEFAULT_SWAP_INTERVAL[OGL_DEFAULT_SWAP_INTERVAL_NUM_VALUES] = +{ + OGL_DEFAULT_SWAP_INTERVAL_TEAR, + OGL_DEFAULT_SWAP_INTERVAL_VSYNC_ONE, + OGL_DEFAULT_SWAP_INTERVAL_VSYNC, + OGL_DEFAULT_SWAP_INTERVAL_VALUE_MASK, + OGL_DEFAULT_SWAP_INTERVAL_FORCE_MASK, + OGL_DEFAULT_SWAP_INTERVAL_FORCE_OFF, + OGL_DEFAULT_SWAP_INTERVAL_FORCE_ON, + OGL_DEFAULT_SWAP_INTERVAL_APP_CONTROLLED, + OGL_DEFAULT_SWAP_INTERVAL_DISABLE, +}; + +EValues_OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL g_valuesOGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL[OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_NUM_VALUES] = +{ + OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_ZERO_SCANLINES, + OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_ONE_FULL_FRAME_OF_SCANLINES, +}; + +EValues_OGL_DEFAULT_SWAP_INTERVAL_SIGN g_valuesOGL_DEFAULT_SWAP_INTERVAL_SIGN[OGL_DEFAULT_SWAP_INTERVAL_SIGN_NUM_VALUES] = +{ + OGL_DEFAULT_SWAP_INTERVAL_SIGN_POSITIVE, + OGL_DEFAULT_SWAP_INTERVAL_SIGN_NEGATIVE, +}; + +EValues_OGL_EVENT_LOG_SEVERITY_THRESHOLD g_valuesOGL_EVENT_LOG_SEVERITY_THRESHOLD[OGL_EVENT_LOG_SEVERITY_THRESHOLD_NUM_VALUES] = +{ + OGL_EVENT_LOG_SEVERITY_THRESHOLD_DISABLE, + OGL_EVENT_LOG_SEVERITY_THRESHOLD_CRITICAL, + OGL_EVENT_LOG_SEVERITY_THRESHOLD_WARNING, + OGL_EVENT_LOG_SEVERITY_THRESHOLD_INFORMATION, + OGL_EVENT_LOG_SEVERITY_THRESHOLD_ALL, +}; + +EValues_OGL_FORCE_BLIT g_valuesOGL_FORCE_BLIT[OGL_FORCE_BLIT_NUM_VALUES] = +{ + OGL_FORCE_BLIT_ON, + OGL_FORCE_BLIT_OFF, +}; + +EValues_OGL_FORCE_STEREO g_valuesOGL_FORCE_STEREO[OGL_FORCE_STEREO_NUM_VALUES] = +{ + OGL_FORCE_STEREO_OFF, + OGL_FORCE_STEREO_ON, +}; + +const wchar_t * g_valuesOGL_IMPLICIT_GPU_AFFINITY[OGL_IMPLICIT_GPU_AFFINITY_NUM_VALUES] = +{ + OGL_IMPLICIT_GPU_AFFINITY_AUTOSELECT +}; + +EValues_OGL_OVERLAY_PIXEL_TYPE g_valuesOGL_OVERLAY_PIXEL_TYPE[OGL_OVERLAY_PIXEL_TYPE_NUM_VALUES] = +{ + OGL_OVERLAY_PIXEL_TYPE_NONE, + OGL_OVERLAY_PIXEL_TYPE_CI, + OGL_OVERLAY_PIXEL_TYPE_RGBA, + OGL_OVERLAY_PIXEL_TYPE_CI_AND_RGBA, +}; + +EValues_OGL_OVERLAY_SUPPORT g_valuesOGL_OVERLAY_SUPPORT[OGL_OVERLAY_SUPPORT_NUM_VALUES] = +{ + OGL_OVERLAY_SUPPORT_OFF, + OGL_OVERLAY_SUPPORT_ON, + OGL_OVERLAY_SUPPORT_FORCE_SW, +}; + +EValues_OGL_QUALITY_ENHANCEMENTS g_valuesOGL_QUALITY_ENHANCEMENTS[OGL_QUALITY_ENHANCEMENTS_NUM_VALUES] = +{ + OGL_QUALITY_ENHANCEMENTS_HQUAL, + OGL_QUALITY_ENHANCEMENTS_QUAL, + OGL_QUALITY_ENHANCEMENTS_PERF, + OGL_QUALITY_ENHANCEMENTS_HPERF, +}; + +EValues_OGL_SINGLE_BACKDEPTH_BUFFER g_valuesOGL_SINGLE_BACKDEPTH_BUFFER[OGL_SINGLE_BACKDEPTH_BUFFER_NUM_VALUES] = +{ + OGL_SINGLE_BACKDEPTH_BUFFER_DISABLE, + OGL_SINGLE_BACKDEPTH_BUFFER_ENABLE, + OGL_SINGLE_BACKDEPTH_BUFFER_USE_HW_DEFAULT, +}; + +EValues_OGL_SLI_CFR_MODE g_valuesOGL_SLI_CFR_MODE[OGL_SLI_CFR_MODE_NUM_VALUES] = +{ + OGL_SLI_CFR_MODE_DISABLE, + OGL_SLI_CFR_MODE_ENABLE, + OGL_SLI_CFR_MODE_CLASSIC_SFR, +}; + +EValues_OGL_SLI_MULTICAST g_valuesOGL_SLI_MULTICAST[OGL_SLI_MULTICAST_NUM_VALUES] = +{ + OGL_SLI_MULTICAST_DISABLE, + OGL_SLI_MULTICAST_ENABLE, + OGL_SLI_MULTICAST_FORCE_DISABLE, + OGL_SLI_MULTICAST_ALLOW_MOSAIC, +}; + +EValues_OGL_THREAD_CONTROL g_valuesOGL_THREAD_CONTROL[OGL_THREAD_CONTROL_NUM_VALUES] = +{ + OGL_THREAD_CONTROL_ENABLE, + OGL_THREAD_CONTROL_DISABLE, +}; + +EValues_OGL_TMON_LEVEL g_valuesOGL_TMON_LEVEL[OGL_TMON_LEVEL_NUM_VALUES] = +{ + OGL_TMON_LEVEL_DISABLE, + OGL_TMON_LEVEL_CRITICAL, + OGL_TMON_LEVEL_WARNING, + OGL_TMON_LEVEL_INFORMATION, + OGL_TMON_LEVEL_MOST, + OGL_TMON_LEVEL_VERBOSE, +}; + +EValues_OGL_TRIPLE_BUFFER g_valuesOGL_TRIPLE_BUFFER[OGL_TRIPLE_BUFFER_NUM_VALUES] = +{ + OGL_TRIPLE_BUFFER_DISABLED, + OGL_TRIPLE_BUFFER_ENABLED, +}; + +EValues_AA_BEHAVIOR_FLAGS g_valuesAA_BEHAVIOR_FLAGS[AA_BEHAVIOR_FLAGS_NUM_VALUES] = +{ + AA_BEHAVIOR_FLAGS_NONE, + AA_BEHAVIOR_FLAGS_TREAT_OVERRIDE_AS_APP_CONTROLLED, + AA_BEHAVIOR_FLAGS_TREAT_OVERRIDE_AS_ENHANCE, + AA_BEHAVIOR_FLAGS_DISABLE_OVERRIDE, + AA_BEHAVIOR_FLAGS_TREAT_ENHANCE_AS_APP_CONTROLLED, + AA_BEHAVIOR_FLAGS_TREAT_ENHANCE_AS_OVERRIDE, + AA_BEHAVIOR_FLAGS_DISABLE_ENHANCE, + AA_BEHAVIOR_FLAGS_MAP_VCAA_TO_MULTISAMPLING, + AA_BEHAVIOR_FLAGS_SLI_DISABLE_TRANSPARENCY_SUPERSAMPLING, + AA_BEHAVIOR_FLAGS_DISABLE_CPLAA, + AA_BEHAVIOR_FLAGS_SKIP_RT_DIM_CHECK_FOR_ENHANCE, + AA_BEHAVIOR_FLAGS_DISABLE_SLIAA, + AA_BEHAVIOR_FLAGS_DEFAULT, + AA_BEHAVIOR_FLAGS_AA_RT_BPP_DIV_4, + AA_BEHAVIOR_FLAGS_AA_RT_BPP_DIV_4_SHIFT, + AA_BEHAVIOR_FLAGS_NON_AA_RT_BPP_DIV_4, + AA_BEHAVIOR_FLAGS_NON_AA_RT_BPP_DIV_4_SHIFT, + AA_BEHAVIOR_FLAGS_MASK, +}; + +EValues_AA_MODE_ALPHATOCOVERAGE g_valuesAA_MODE_ALPHATOCOVERAGE[AA_MODE_ALPHATOCOVERAGE_NUM_VALUES] = +{ + AA_MODE_ALPHATOCOVERAGE_MODE_MASK, + AA_MODE_ALPHATOCOVERAGE_MODE_OFF, + AA_MODE_ALPHATOCOVERAGE_MODE_ON, + AA_MODE_ALPHATOCOVERAGE_MODE_MAX, +}; + +EValues_AA_MODE_GAMMACORRECTION g_valuesAA_MODE_GAMMACORRECTION[AA_MODE_GAMMACORRECTION_NUM_VALUES] = +{ + AA_MODE_GAMMACORRECTION_MASK, + AA_MODE_GAMMACORRECTION_OFF, + AA_MODE_GAMMACORRECTION_ON_IF_FOS, + AA_MODE_GAMMACORRECTION_ON_ALWAYS, + AA_MODE_GAMMACORRECTION_MAX, + AA_MODE_GAMMACORRECTION_DEFAULT, + AA_MODE_GAMMACORRECTION_DEFAULT_TESLA, + AA_MODE_GAMMACORRECTION_DEFAULT_FERMI, +}; + +EValues_AA_MODE_METHOD g_valuesAA_MODE_METHOD[AA_MODE_METHOD_NUM_VALUES] = +{ + AA_MODE_METHOD_NONE, + AA_MODE_METHOD_SUPERSAMPLE_2X_H, + AA_MODE_METHOD_SUPERSAMPLE_2X_V, + AA_MODE_METHOD_SUPERSAMPLE_1_5X1_5, + AA_MODE_METHOD_FREE_0x03, + AA_MODE_METHOD_FREE_0x04, + AA_MODE_METHOD_SUPERSAMPLE_4X, + AA_MODE_METHOD_SUPERSAMPLE_4X_BIAS, + AA_MODE_METHOD_SUPERSAMPLE_4X_GAUSSIAN, + AA_MODE_METHOD_FREE_0x08, + AA_MODE_METHOD_FREE_0x09, + AA_MODE_METHOD_SUPERSAMPLE_9X, + AA_MODE_METHOD_SUPERSAMPLE_9X_BIAS, + AA_MODE_METHOD_SUPERSAMPLE_16X, + AA_MODE_METHOD_SUPERSAMPLE_16X_BIAS, + AA_MODE_METHOD_MULTISAMPLE_2X_DIAGONAL, + AA_MODE_METHOD_MULTISAMPLE_2X_QUINCUNX, + AA_MODE_METHOD_MULTISAMPLE_4X, + AA_MODE_METHOD_FREE_0x11, + AA_MODE_METHOD_MULTISAMPLE_4X_GAUSSIAN, + AA_MODE_METHOD_MIXEDSAMPLE_4X_SKEWED_4TAP, + AA_MODE_METHOD_FREE_0x14, + AA_MODE_METHOD_FREE_0x15, + AA_MODE_METHOD_MIXEDSAMPLE_6X, + AA_MODE_METHOD_MIXEDSAMPLE_6X_SKEWED_6TAP, + AA_MODE_METHOD_MIXEDSAMPLE_8X, + AA_MODE_METHOD_MIXEDSAMPLE_8X_SKEWED_8TAP, + AA_MODE_METHOD_MIXEDSAMPLE_16X, + AA_MODE_METHOD_MULTISAMPLE_4X_GAMMA, + AA_MODE_METHOD_MULTISAMPLE_16X, + AA_MODE_METHOD_VCAA_32X_8v24, + AA_MODE_METHOD_CORRUPTION_CHECK, + AA_MODE_METHOD_6X_CT, + AA_MODE_METHOD_MULTISAMPLE_2X_DIAGONAL_GAMMA, + AA_MODE_METHOD_SUPERSAMPLE_4X_GAMMA, + AA_MODE_METHOD_MULTISAMPLE_4X_FOSGAMMA, + AA_MODE_METHOD_MULTISAMPLE_2X_DIAGONAL_FOSGAMMA, + AA_MODE_METHOD_SUPERSAMPLE_4X_FOSGAMMA, + AA_MODE_METHOD_MULTISAMPLE_8X, + AA_MODE_METHOD_VCAA_8X_4v4, + AA_MODE_METHOD_VCAA_16X_4v12, + AA_MODE_METHOD_VCAA_16X_8v8, + AA_MODE_METHOD_MIXEDSAMPLE_32X, + AA_MODE_METHOD_SUPERVCAA_64X_4v12, + AA_MODE_METHOD_SUPERVCAA_64X_8v8, + AA_MODE_METHOD_MIXEDSAMPLE_64X, + AA_MODE_METHOD_MIXEDSAMPLE_128X, + AA_MODE_METHOD_COUNT, + AA_MODE_METHOD_METHOD_MASK, + AA_MODE_METHOD_METHOD_MAX, +}; + +EValues_AA_MODE_REPLAY g_valuesAA_MODE_REPLAY[AA_MODE_REPLAY_NUM_VALUES] = +{ + AA_MODE_REPLAY_SAMPLES_MASK, + AA_MODE_REPLAY_SAMPLES_ONE, + AA_MODE_REPLAY_SAMPLES_TWO, + AA_MODE_REPLAY_SAMPLES_FOUR, + AA_MODE_REPLAY_SAMPLES_EIGHT, + AA_MODE_REPLAY_SAMPLES_MAX, + AA_MODE_REPLAY_MODE_MASK, + AA_MODE_REPLAY_MODE_OFF, + AA_MODE_REPLAY_MODE_ALPHA_TEST, + AA_MODE_REPLAY_MODE_PIXEL_KILL, + AA_MODE_REPLAY_MODE_DYN_BRANCH, + AA_MODE_REPLAY_MODE_OPTIMAL, + AA_MODE_REPLAY_MODE_ALL, + AA_MODE_REPLAY_MODE_MAX, + AA_MODE_REPLAY_TRANSPARENCY, + AA_MODE_REPLAY_DISALLOW_TRAA, + AA_MODE_REPLAY_TRANSPARENCY_DEFAULT, + AA_MODE_REPLAY_TRANSPARENCY_DEFAULT_TESLA, + AA_MODE_REPLAY_TRANSPARENCY_DEFAULT_FERMI, + AA_MODE_REPLAY_MASK, +}; + +EValues_AA_MODE_SELECTOR g_valuesAA_MODE_SELECTOR[AA_MODE_SELECTOR_NUM_VALUES] = +{ + AA_MODE_SELECTOR_MASK, + AA_MODE_SELECTOR_APP_CONTROL, + AA_MODE_SELECTOR_OVERRIDE, + AA_MODE_SELECTOR_ENHANCE, + AA_MODE_SELECTOR_MAX, +}; + +EValues_AA_MODE_SELECTOR_SLIAA g_valuesAA_MODE_SELECTOR_SLIAA[AA_MODE_SELECTOR_SLIAA_NUM_VALUES] = +{ + AA_MODE_SELECTOR_SLIAA_DISABLED, + AA_MODE_SELECTOR_SLIAA_ENABLED, +}; + +EValues_ANISO_MODE_LEVEL g_valuesANISO_MODE_LEVEL[ANISO_MODE_LEVEL_NUM_VALUES] = +{ + ANISO_MODE_LEVEL_MASK, + ANISO_MODE_LEVEL_NONE_POINT, + ANISO_MODE_LEVEL_NONE_LINEAR, + ANISO_MODE_LEVEL_MAX, + ANISO_MODE_LEVEL_DEFAULT, +}; + +EValues_ANISO_MODE_SELECTOR g_valuesANISO_MODE_SELECTOR[ANISO_MODE_SELECTOR_NUM_VALUES] = +{ + ANISO_MODE_SELECTOR_MASK, + ANISO_MODE_SELECTOR_APP, + ANISO_MODE_SELECTOR_USER, + ANISO_MODE_SELECTOR_COND, + ANISO_MODE_SELECTOR_MAX, + ANISO_MODE_SELECTOR_DEFAULT, +}; + +EValues_ANSEL_ALLOW g_valuesANSEL_ALLOW[ANSEL_ALLOW_NUM_VALUES] = +{ + ANSEL_ALLOW_DISALLOWED, + ANSEL_ALLOW_ALLOWED, +}; + +EValues_ANSEL_ALLOWLISTED g_valuesANSEL_ALLOWLISTED[ANSEL_ALLOWLISTED_NUM_VALUES] = +{ + ANSEL_ALLOWLISTED_DISALLOWED, + ANSEL_ALLOWLISTED_ALLOWED, +}; + +EValues_ANSEL_ENABLE g_valuesANSEL_ENABLE[ANSEL_ENABLE_NUM_VALUES] = +{ + ANSEL_ENABLE_OFF, + ANSEL_ENABLE_ON, +}; + +EValues_APPLICATION_PROFILE_NOTIFICATION_TIMEOUT g_valuesAPPLICATION_PROFILE_NOTIFICATION_TIMEOUT[APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_NUM_VALUES] = +{ + APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_DISABLED, + APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_NINE_SECONDS, + APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_FIFTEEN_SECONDS, + APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_THIRTY_SECONDS, + APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_ONE_MINUTE, + APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_TWO_MINUTES, +}; + +EValues_BATTERY_BOOST_APP_FPS g_valuesBATTERY_BOOST_APP_FPS[BATTERY_BOOST_APP_FPS_NUM_VALUES] = +{ + BATTERY_BOOST_APP_FPS_MIN, + BATTERY_BOOST_APP_FPS_MAX, + BATTERY_BOOST_APP_FPS_NO_OVERRIDE, +}; + +EValues_CPL_HIDDEN_PROFILE g_valuesCPL_HIDDEN_PROFILE[CPL_HIDDEN_PROFILE_NUM_VALUES] = +{ + CPL_HIDDEN_PROFILE_DISABLED, + CPL_HIDDEN_PROFILE_ENABLED, +}; + +const wchar_t * g_valuesCUDA_EXCLUDED_GPUS[CUDA_EXCLUDED_GPUS_NUM_VALUES] = +{ + CUDA_EXCLUDED_GPUS_NONE +}; + +const wchar_t * g_valuesD3DOGL_GPU_MAX_POWER[D3DOGL_GPU_MAX_POWER_NUM_VALUES] = +{ + D3DOGL_GPU_MAX_POWER_DEFAULTPOWER +}; + +EValues_EXPORT_PERF_COUNTERS g_valuesEXPORT_PERF_COUNTERS[EXPORT_PERF_COUNTERS_NUM_VALUES] = +{ + EXPORT_PERF_COUNTERS_OFF, + EXPORT_PERF_COUNTERS_ON, +}; + +EValues_EXTERNAL_QUIET_MODE g_valuesEXTERNAL_QUIET_MODE[EXTERNAL_QUIET_MODE_NUM_VALUES] = +{ + EXTERNAL_QUIET_MODE_ON, + EXTERNAL_QUIET_MODE_OFF, +}; + +EValues_FRL_FPS g_valuesFRL_FPS[FRL_FPS_NUM_VALUES] = +{ + FRL_FPS_DISABLED, + FRL_FPS_MIN, + FRL_FPS_MAX, +}; + +EValues_FXAA_ALLOW g_valuesFXAA_ALLOW[FXAA_ALLOW_NUM_VALUES] = +{ + FXAA_ALLOW_DISALLOWED, + FXAA_ALLOW_ALLOWED, +}; + +EValues_FXAA_ENABLE g_valuesFXAA_ENABLE[FXAA_ENABLE_NUM_VALUES] = +{ + FXAA_ENABLE_OFF, + FXAA_ENABLE_ON, +}; + +EValues_FXAA_INDICATOR_ENABLE g_valuesFXAA_INDICATOR_ENABLE[FXAA_INDICATOR_ENABLE_NUM_VALUES] = +{ + FXAA_INDICATOR_ENABLE_OFF, + FXAA_INDICATOR_ENABLE_ON, +}; + +EValues_LATENCY_INDICATOR_AUTOALIGN g_valuesLATENCY_INDICATOR_AUTOALIGN[LATENCY_INDICATOR_AUTOALIGN_NUM_VALUES] = +{ + LATENCY_INDICATOR_AUTOALIGN_DISABLED, + LATENCY_INDICATOR_AUTOALIGN_ENABLED, +}; + +EValues_MCSFRSHOWSPLIT g_valuesMCSFRSHOWSPLIT[MCSFRSHOWSPLIT_NUM_VALUES] = +{ + MCSFRSHOWSPLIT_DISABLED, + MCSFRSHOWSPLIT_ENABLED, +}; + +EValues_NV_QUALITY_UPSCALING g_valuesNV_QUALITY_UPSCALING[NV_QUALITY_UPSCALING_NUM_VALUES] = +{ + NV_QUALITY_UPSCALING_OFF, + NV_QUALITY_UPSCALING_ON, +}; + +EValues_OPTIMUS_MAXAA g_valuesOPTIMUS_MAXAA[OPTIMUS_MAXAA_NUM_VALUES] = +{ + OPTIMUS_MAXAA_MIN, + OPTIMUS_MAXAA_MAX, +}; + +EValues_PHYSXINDICATOR g_valuesPHYSXINDICATOR[PHYSXINDICATOR_NUM_VALUES] = +{ + PHYSXINDICATOR_DISABLED, + PHYSXINDICATOR_ENABLED, +}; + +EValues_PREFERRED_PSTATE g_valuesPREFERRED_PSTATE[PREFERRED_PSTATE_NUM_VALUES] = +{ + PREFERRED_PSTATE_ADAPTIVE, + PREFERRED_PSTATE_PREFER_MAX, + PREFERRED_PSTATE_DRIVER_CONTROLLED, + PREFERRED_PSTATE_PREFER_CONSISTENT_PERFORMANCE, + PREFERRED_PSTATE_PREFER_MIN, + PREFERRED_PSTATE_OPTIMAL_POWER, + PREFERRED_PSTATE_MIN, + PREFERRED_PSTATE_MAX, +}; + +EValues_PREVENT_UI_AF_OVERRIDE g_valuesPREVENT_UI_AF_OVERRIDE[PREVENT_UI_AF_OVERRIDE_NUM_VALUES] = +{ + PREVENT_UI_AF_OVERRIDE_OFF, + PREVENT_UI_AF_OVERRIDE_ON, +}; + +EValues_SHIM_MCCOMPAT g_valuesSHIM_MCCOMPAT[SHIM_MCCOMPAT_NUM_VALUES] = +{ + SHIM_MCCOMPAT_INTEGRATED, + SHIM_MCCOMPAT_ENABLE, + SHIM_MCCOMPAT_USER_EDITABLE, + SHIM_MCCOMPAT_MASK, + SHIM_MCCOMPAT_VIDEO_MASK, + SHIM_MCCOMPAT_VARYING_BIT, + SHIM_MCCOMPAT_AUTO_SELECT, + SHIM_MCCOMPAT_OVERRIDE_BIT, +}; + +EValues_SHIM_RENDERING_MODE g_valuesSHIM_RENDERING_MODE[SHIM_RENDERING_MODE_NUM_VALUES] = +{ + SHIM_RENDERING_MODE_INTEGRATED, + SHIM_RENDERING_MODE_ENABLE, + SHIM_RENDERING_MODE_USER_EDITABLE, + SHIM_RENDERING_MODE_MASK, + SHIM_RENDERING_MODE_VIDEO_MASK, + SHIM_RENDERING_MODE_VARYING_BIT, + SHIM_RENDERING_MODE_AUTO_SELECT, + SHIM_RENDERING_MODE_OVERRIDE_BIT, +}; + +EValues_SHIM_RENDERING_OPTIONS g_valuesSHIM_RENDERING_OPTIONS[SHIM_RENDERING_OPTIONS_NUM_VALUES] = +{ + SHIM_RENDERING_OPTIONS_DEFAULT_RENDERING_MODE, + SHIM_RENDERING_OPTIONS_DISABLE_ASYNC_PRESENT, + SHIM_RENDERING_OPTIONS_EHSHELL_DETECT, + SHIM_RENDERING_OPTIONS_FLASHPLAYER_HOST_DETECT, + SHIM_RENDERING_OPTIONS_VIDEO_DRM_APP_DETECT, + SHIM_RENDERING_OPTIONS_IGNORE_OVERRIDES, + SHIM_RENDERING_OPTIONS_RESERVED1, + SHIM_RENDERING_OPTIONS_ENABLE_DWM_ASYNC_PRESENT, + SHIM_RENDERING_OPTIONS_RESERVED2, + SHIM_RENDERING_OPTIONS_ALLOW_INHERITANCE, + SHIM_RENDERING_OPTIONS_DISABLE_WRAPPERS, + SHIM_RENDERING_OPTIONS_DISABLE_DXGI_WRAPPERS, + SHIM_RENDERING_OPTIONS_PRUNE_UNSUPPORTED_FORMATS, + SHIM_RENDERING_OPTIONS_ENABLE_ALPHA_FORMAT, + SHIM_RENDERING_OPTIONS_IGPU_TRANSCODING, + SHIM_RENDERING_OPTIONS_DISABLE_CUDA, + SHIM_RENDERING_OPTIONS_ALLOW_CP_CAPS_FOR_VIDEO, + SHIM_RENDERING_OPTIONS_IGPU_TRANSCODING_FWD_OPTIMUS, + SHIM_RENDERING_OPTIONS_DISABLE_DURING_SECURE_BOOT, + SHIM_RENDERING_OPTIONS_INVERT_FOR_QUADRO, + SHIM_RENDERING_OPTIONS_INVERT_FOR_MSHYBRID, + SHIM_RENDERING_OPTIONS_REGISTER_PROCESS_ENABLE_GOLD, + SHIM_RENDERING_OPTIONS_HANDLE_WINDOWED_MODE_PERF_OPT, + SHIM_RENDERING_OPTIONS_HANDLE_WIN7_ASYNC_RUNTIME_BUG, + SHIM_RENDERING_OPTIONS_EXPLICIT_ADAPTER_OPTED_BY_APP, + SHIM_RENDERING_OPTIONS_ALLOW_DYNAMIC_DISPLAY_MUX_SWITCH, + SHIM_RENDERING_OPTIONS_DISALLOW_DYNAMIC_DISPLAY_MUX_SWITCH, + SHIM_RENDERING_OPTIONS_DISABLE_TURING_POWER_POLICY, +}; + +EValues_SLI_GPU_COUNT g_valuesSLI_GPU_COUNT[SLI_GPU_COUNT_NUM_VALUES] = +{ + SLI_GPU_COUNT_AUTOSELECT, + SLI_GPU_COUNT_ONE, + SLI_GPU_COUNT_TWO, + SLI_GPU_COUNT_THREE, + SLI_GPU_COUNT_FOUR, +}; + +EValues_SLI_PREDEFINED_GPU_COUNT g_valuesSLI_PREDEFINED_GPU_COUNT[SLI_PREDEFINED_GPU_COUNT_NUM_VALUES] = +{ + SLI_PREDEFINED_GPU_COUNT_AUTOSELECT, + SLI_PREDEFINED_GPU_COUNT_ONE, + SLI_PREDEFINED_GPU_COUNT_TWO, + SLI_PREDEFINED_GPU_COUNT_THREE, + SLI_PREDEFINED_GPU_COUNT_FOUR, +}; + +EValues_SLI_PREDEFINED_GPU_COUNT_DX10 g_valuesSLI_PREDEFINED_GPU_COUNT_DX10[SLI_PREDEFINED_GPU_COUNT_DX10_NUM_VALUES] = +{ + SLI_PREDEFINED_GPU_COUNT_DX10_AUTOSELECT, + SLI_PREDEFINED_GPU_COUNT_DX10_ONE, + SLI_PREDEFINED_GPU_COUNT_DX10_TWO, + SLI_PREDEFINED_GPU_COUNT_DX10_THREE, + SLI_PREDEFINED_GPU_COUNT_DX10_FOUR, +}; + +EValues_SLI_PREDEFINED_MODE g_valuesSLI_PREDEFINED_MODE[SLI_PREDEFINED_MODE_NUM_VALUES] = +{ + SLI_PREDEFINED_MODE_AUTOSELECT, + SLI_PREDEFINED_MODE_FORCE_SINGLE, + SLI_PREDEFINED_MODE_FORCE_AFR, + SLI_PREDEFINED_MODE_FORCE_AFR2, + SLI_PREDEFINED_MODE_FORCE_SFR, + SLI_PREDEFINED_MODE_FORCE_AFR_OF_SFR__FALLBACK_3AFR, +}; + +EValues_SLI_PREDEFINED_MODE_DX10 g_valuesSLI_PREDEFINED_MODE_DX10[SLI_PREDEFINED_MODE_DX10_NUM_VALUES] = +{ + SLI_PREDEFINED_MODE_DX10_AUTOSELECT, + SLI_PREDEFINED_MODE_DX10_FORCE_SINGLE, + SLI_PREDEFINED_MODE_DX10_FORCE_AFR, + SLI_PREDEFINED_MODE_DX10_FORCE_AFR2, + SLI_PREDEFINED_MODE_DX10_FORCE_SFR, + SLI_PREDEFINED_MODE_DX10_FORCE_AFR_OF_SFR__FALLBACK_3AFR, +}; + +EValues_SLI_RENDERING_MODE g_valuesSLI_RENDERING_MODE[SLI_RENDERING_MODE_NUM_VALUES] = +{ + SLI_RENDERING_MODE_AUTOSELECT, + SLI_RENDERING_MODE_FORCE_SINGLE, + SLI_RENDERING_MODE_FORCE_AFR, + SLI_RENDERING_MODE_FORCE_AFR2, + SLI_RENDERING_MODE_FORCE_SFR, + SLI_RENDERING_MODE_FORCE_AFR_OF_SFR__FALLBACK_3AFR, +}; + +EValues_VRPRERENDERLIMIT g_valuesVRPRERENDERLIMIT[VRPRERENDERLIMIT_NUM_VALUES] = +{ + VRPRERENDERLIMIT_MIN, + VRPRERENDERLIMIT_MAX, + VRPRERENDERLIMIT_APP_CONTROLLED, + VRPRERENDERLIMIT_DEFAULT, +}; + +EValues_VRRFEATUREINDICATOR g_valuesVRRFEATUREINDICATOR[VRRFEATUREINDICATOR_NUM_VALUES] = +{ + VRRFEATUREINDICATOR_DISABLED, + VRRFEATUREINDICATOR_ENABLED, +}; + +EValues_VRROVERLAYINDICATOR g_valuesVRROVERLAYINDICATOR[VRROVERLAYINDICATOR_NUM_VALUES] = +{ + VRROVERLAYINDICATOR_DISABLED, + VRROVERLAYINDICATOR_ENABLED, +}; + +EValues_VRRREQUESTSTATE g_valuesVRRREQUESTSTATE[VRRREQUESTSTATE_NUM_VALUES] = +{ + VRRREQUESTSTATE_DISABLED, + VRRREQUESTSTATE_FULLSCREEN_ONLY, + VRRREQUESTSTATE_FULLSCREEN_AND_WINDOWED, +}; + +EValues_VRR_APP_OVERRIDE g_valuesVRR_APP_OVERRIDE[VRR_APP_OVERRIDE_NUM_VALUES] = +{ + VRR_APP_OVERRIDE_ALLOW, + VRR_APP_OVERRIDE_FORCE_OFF, + VRR_APP_OVERRIDE_DISALLOW, + VRR_APP_OVERRIDE_ULMB, + VRR_APP_OVERRIDE_FIXED_REFRESH, +}; + +EValues_VRR_APP_OVERRIDE_REQUEST_STATE g_valuesVRR_APP_OVERRIDE_REQUEST_STATE[VRR_APP_OVERRIDE_REQUEST_STATE_NUM_VALUES] = +{ + VRR_APP_OVERRIDE_REQUEST_STATE_ALLOW, + VRR_APP_OVERRIDE_REQUEST_STATE_FORCE_OFF, + VRR_APP_OVERRIDE_REQUEST_STATE_DISALLOW, + VRR_APP_OVERRIDE_REQUEST_STATE_ULMB, + VRR_APP_OVERRIDE_REQUEST_STATE_FIXED_REFRESH, +}; + +EValues_VRR_MODE g_valuesVRR_MODE[VRR_MODE_NUM_VALUES] = +{ + VRR_MODE_DISABLED, + VRR_MODE_FULLSCREEN_ONLY, + VRR_MODE_FULLSCREEN_AND_WINDOWED, +}; + +EValues_VSYNCSMOOTHAFR g_valuesVSYNCSMOOTHAFR[VSYNCSMOOTHAFR_NUM_VALUES] = +{ + VSYNCSMOOTHAFR_OFF, + VSYNCSMOOTHAFR_ON, +}; + +EValues_VSYNCVRRCONTROL g_valuesVSYNCVRRCONTROL[VSYNCVRRCONTROL_NUM_VALUES] = +{ + VSYNCVRRCONTROL_DISABLE, + VSYNCVRRCONTROL_ENABLE, + VSYNCVRRCONTROL_NOTSUPPORTED, +}; + +EValues_VSYNC_BEHAVIOR_FLAGS g_valuesVSYNC_BEHAVIOR_FLAGS[VSYNC_BEHAVIOR_FLAGS_NUM_VALUES] = +{ + VSYNC_BEHAVIOR_FLAGS_NONE, + VSYNC_BEHAVIOR_FLAGS_DEFAULT, + VSYNC_BEHAVIOR_FLAGS_IGNORE_FLIPINTERVAL_MULTIPLE, +}; + +EValues_WKS_API_STEREO_EYES_EXCHANGE g_valuesWKS_API_STEREO_EYES_EXCHANGE[WKS_API_STEREO_EYES_EXCHANGE_NUM_VALUES] = +{ + WKS_API_STEREO_EYES_EXCHANGE_OFF, + WKS_API_STEREO_EYES_EXCHANGE_ON, +}; + +EValues_WKS_API_STEREO_MODE g_valuesWKS_API_STEREO_MODE[WKS_API_STEREO_MODE_NUM_VALUES] = +{ + WKS_API_STEREO_MODE_SHUTTER_GLASSES, + WKS_API_STEREO_MODE_VERTICAL_INTERLACED, + WKS_API_STEREO_MODE_TWINVIEW, + WKS_API_STEREO_MODE_NV17_SHUTTER_GLASSES_AUTO, + WKS_API_STEREO_MODE_NV17_SHUTTER_GLASSES_DAC0, + WKS_API_STEREO_MODE_NV17_SHUTTER_GLASSES_DAC1, + WKS_API_STEREO_MODE_COLOR_LINE, + WKS_API_STEREO_MODE_COLOR_INTERLEAVED, + WKS_API_STEREO_MODE_ANAGLYPH, + WKS_API_STEREO_MODE_HORIZONTAL_INTERLACED, + WKS_API_STEREO_MODE_SIDE_FIELD, + WKS_API_STEREO_MODE_SUB_FIELD, + WKS_API_STEREO_MODE_CHECKERBOARD, + WKS_API_STEREO_MODE_INVERSE_CHECKERBOARD, + WKS_API_STEREO_MODE_TRIDELITY_SL, + WKS_API_STEREO_MODE_TRIDELITY_MV, + WKS_API_STEREO_MODE_SEEFRONT, + WKS_API_STEREO_MODE_STEREO_MIRROR, + WKS_API_STEREO_MODE_FRAME_SEQUENTIAL, + WKS_API_STEREO_MODE_AUTODETECT_PASSIVE_MODE, + WKS_API_STEREO_MODE_AEGIS_DT_FRAME_SEQUENTIAL, + WKS_API_STEREO_MODE_OEM_EMITTER_FRAME_SEQUENTIAL, + WKS_API_STEREO_MODE_DP_INBAND, + WKS_API_STEREO_MODE_USE_HW_DEFAULT, + WKS_API_STEREO_MODE_DEFAULT_GL, +}; + +EValues_WKS_MEMORY_ALLOCATION_POLICY g_valuesWKS_MEMORY_ALLOCATION_POLICY[WKS_MEMORY_ALLOCATION_POLICY_NUM_VALUES] = +{ + WKS_MEMORY_ALLOCATION_POLICY_AS_NEEDED, + WKS_MEMORY_ALLOCATION_POLICY_MODERATE_PRE_ALLOCATION, + WKS_MEMORY_ALLOCATION_POLICY_AGGRESSIVE_PRE_ALLOCATION, +}; + +EValues_WKS_STEREO_DONGLE_SUPPORT g_valuesWKS_STEREO_DONGLE_SUPPORT[WKS_STEREO_DONGLE_SUPPORT_NUM_VALUES] = +{ + WKS_STEREO_DONGLE_SUPPORT_OFF, + WKS_STEREO_DONGLE_SUPPORT_DAC, + WKS_STEREO_DONGLE_SUPPORT_DLP, +}; + +EValues_WKS_STEREO_SUPPORT g_valuesWKS_STEREO_SUPPORT[WKS_STEREO_SUPPORT_NUM_VALUES] = +{ + WKS_STEREO_SUPPORT_OFF, + WKS_STEREO_SUPPORT_ON, +}; + +EValues_WKS_STEREO_SWAP_MODE g_valuesWKS_STEREO_SWAP_MODE[WKS_STEREO_SWAP_MODE_NUM_VALUES] = +{ + WKS_STEREO_SWAP_MODE_APPLICATION_CONTROL, + WKS_STEREO_SWAP_MODE_PER_EYE, + WKS_STEREO_SWAP_MODE_PER_EYE_PAIR, + WKS_STEREO_SWAP_MODE_LEGACY_BEHAVIOR, + WKS_STEREO_SWAP_MODE_PER_EYE_FOR_SWAP_GROUP, +}; + +EValues_AO_MODE g_valuesAO_MODE[AO_MODE_NUM_VALUES] = +{ + AO_MODE_OFF, + AO_MODE_LOW, + AO_MODE_MEDIUM, + AO_MODE_HIGH, +}; + +EValues_AO_MODE_ACTIVE g_valuesAO_MODE_ACTIVE[AO_MODE_ACTIVE_NUM_VALUES] = +{ + AO_MODE_ACTIVE_DISABLED, + AO_MODE_ACTIVE_ENABLED, +}; + +EValues_AUTO_LODBIASADJUST g_valuesAUTO_LODBIASADJUST[AUTO_LODBIASADJUST_NUM_VALUES] = +{ + AUTO_LODBIASADJUST_OFF, + AUTO_LODBIASADJUST_ON, +}; + +EValues_EXPORT_PERF_COUNTERS_DX9_ONLY g_valuesEXPORT_PERF_COUNTERS_DX9_ONLY[EXPORT_PERF_COUNTERS_DX9_ONLY_NUM_VALUES] = +{ + EXPORT_PERF_COUNTERS_DX9_ONLY_OFF, + EXPORT_PERF_COUNTERS_DX9_ONLY_ON, +}; + +EValues_LODBIASADJUST g_valuesLODBIASADJUST[LODBIASADJUST_NUM_VALUES] = +{ + LODBIASADJUST_MIN, + LODBIASADJUST_MAX, +}; + +EValues_MAXWELL_B_SAMPLE_INTERLEAVE g_valuesMAXWELL_B_SAMPLE_INTERLEAVE[MAXWELL_B_SAMPLE_INTERLEAVE_NUM_VALUES] = +{ + MAXWELL_B_SAMPLE_INTERLEAVE_OFF, + MAXWELL_B_SAMPLE_INTERLEAVE_ON, +}; + +EValues_PRERENDERLIMIT g_valuesPRERENDERLIMIT[PRERENDERLIMIT_NUM_VALUES] = +{ + PRERENDERLIMIT_MIN, + PRERENDERLIMIT_MAX, + PRERENDERLIMIT_APP_CONTROLLED, +}; + +EValues_PS_SHADERDISKCACHE g_valuesPS_SHADERDISKCACHE[PS_SHADERDISKCACHE_NUM_VALUES] = +{ + PS_SHADERDISKCACHE_OFF, + PS_SHADERDISKCACHE_ON, +}; + +EValues_PS_SHADERDISKCACHE_MAX_SIZE g_valuesPS_SHADERDISKCACHE_MAX_SIZE[PS_SHADERDISKCACHE_MAX_SIZE_NUM_VALUES] = +{ + PS_SHADERDISKCACHE_MAX_SIZE_MIN, + PS_SHADERDISKCACHE_MAX_SIZE_MAX, +}; + +EValues_PS_TEXFILTER_ANISO_OPTS2 g_valuesPS_TEXFILTER_ANISO_OPTS2[PS_TEXFILTER_ANISO_OPTS2_NUM_VALUES] = +{ + PS_TEXFILTER_ANISO_OPTS2_OFF, + PS_TEXFILTER_ANISO_OPTS2_ON, +}; + +EValues_PS_TEXFILTER_BILINEAR_IN_ANISO g_valuesPS_TEXFILTER_BILINEAR_IN_ANISO[PS_TEXFILTER_BILINEAR_IN_ANISO_NUM_VALUES] = +{ + PS_TEXFILTER_BILINEAR_IN_ANISO_OFF, + PS_TEXFILTER_BILINEAR_IN_ANISO_ON, +}; + +EValues_PS_TEXFILTER_DISABLE_TRILIN_SLOPE g_valuesPS_TEXFILTER_DISABLE_TRILIN_SLOPE[PS_TEXFILTER_DISABLE_TRILIN_SLOPE_NUM_VALUES] = +{ + PS_TEXFILTER_DISABLE_TRILIN_SLOPE_OFF, + PS_TEXFILTER_DISABLE_TRILIN_SLOPE_ON, +}; + +EValues_PS_TEXFILTER_NO_NEG_LODBIAS g_valuesPS_TEXFILTER_NO_NEG_LODBIAS[PS_TEXFILTER_NO_NEG_LODBIAS_NUM_VALUES] = +{ + PS_TEXFILTER_NO_NEG_LODBIAS_OFF, + PS_TEXFILTER_NO_NEG_LODBIAS_ON, +}; + +EValues_QUALITY_ENHANCEMENTS g_valuesQUALITY_ENHANCEMENTS[QUALITY_ENHANCEMENTS_NUM_VALUES] = +{ + QUALITY_ENHANCEMENTS_HIGHQUALITY, + QUALITY_ENHANCEMENTS_QUALITY, + QUALITY_ENHANCEMENTS_PERFORMANCE, + QUALITY_ENHANCEMENTS_HIGHPERFORMANCE, +}; + +EValues_QUALITY_ENHANCEMENT_SUBSTITUTION g_valuesQUALITY_ENHANCEMENT_SUBSTITUTION[QUALITY_ENHANCEMENT_SUBSTITUTION_NUM_VALUES] = +{ + QUALITY_ENHANCEMENT_SUBSTITUTION_NO_SUBSTITUTION, + QUALITY_ENHANCEMENT_SUBSTITUTION_HIGHQUALITY_BECOMES_QUALITY, +}; + +EValues_REFRESH_RATE_OVERRIDE g_valuesREFRESH_RATE_OVERRIDE[REFRESH_RATE_OVERRIDE_NUM_VALUES] = +{ + REFRESH_RATE_OVERRIDE_APPLICATION_CONTROLLED, + REFRESH_RATE_OVERRIDE_HIGHEST_AVAILABLE, + REFRESH_RATE_OVERRIDE_LOW_LATENCY_RR_MASK, +}; + +EValues_SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE g_valuesSET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE[SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE_NUM_VALUES] = +{ + SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE_OFF, + SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE_ON, +}; + +EValues_SET_VAB_DATA g_valuesSET_VAB_DATA[SET_VAB_DATA_NUM_VALUES] = +{ + SET_VAB_DATA_ZERO, + SET_VAB_DATA_UINT_ONE, + SET_VAB_DATA_FLOAT_ONE, + SET_VAB_DATA_FLOAT_POS_INF, + SET_VAB_DATA_FLOAT_NAN, + SET_VAB_DATA_USE_API_DEFAULTS, +}; + +EValues_VSYNCMODE g_valuesVSYNCMODE[VSYNCMODE_NUM_VALUES] = +{ + VSYNCMODE_PASSIVE, + VSYNCMODE_FORCEOFF, + VSYNCMODE_FORCEON, + VSYNCMODE_FLIPINTERVAL2, + VSYNCMODE_FLIPINTERVAL3, + VSYNCMODE_FLIPINTERVAL4, + VSYNCMODE_VIRTUAL, +}; + +EValues_VSYNCTEARCONTROL g_valuesVSYNCTEARCONTROL[VSYNCTEARCONTROL_NUM_VALUES] = +{ + VSYNCTEARCONTROL_DISABLE, + VSYNCTEARCONTROL_ENABLE, +}; + + +SettingDWORDNameString mapSettingDWORD[TOTAL_DWORD_SETTING_NUM] = +{ + {OGL_AA_LINE_GAMMA_ID, OGL_AA_LINE_GAMMA_STRING, 4, (NvU32 *)g_valuesOGL_AA_LINE_GAMMA, OGL_AA_LINE_GAMMA_DISABLED}, + {OGL_CPL_GDI_COMPATIBILITY_ID, OGL_CPL_GDI_COMPATIBILITY_STRING, 3, (NvU32 *)g_valuesOGL_CPL_GDI_COMPATIBILITY, OGL_CPL_GDI_COMPATIBILITY_AUTO}, + {OGL_CPL_PREFER_DXPRESENT_ID, OGL_CPL_PREFER_DXPRESENT_STRING, 3, (NvU32 *)g_valuesOGL_CPL_PREFER_DXPRESENT, OGL_CPL_PREFER_DXPRESENT_AUTO}, + {OGL_DEEP_COLOR_SCANOUT_ID, OGL_DEEP_COLOR_SCANOUT_STRING, 2, (NvU32 *)g_valuesOGL_DEEP_COLOR_SCANOUT, OGL_DEEP_COLOR_SCANOUT_ENABLE}, + {OGL_DEFAULT_SWAP_INTERVAL_ID, OGL_DEFAULT_SWAP_INTERVAL_STRING, 9, (NvU32 *)g_valuesOGL_DEFAULT_SWAP_INTERVAL, OGL_DEFAULT_SWAP_INTERVAL_VSYNC_ONE}, + {OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_ID, OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_STRING, 2, (NvU32 *)g_valuesOGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL, 0x00000000}, + {OGL_DEFAULT_SWAP_INTERVAL_SIGN_ID, OGL_DEFAULT_SWAP_INTERVAL_SIGN_STRING, 2, (NvU32 *)g_valuesOGL_DEFAULT_SWAP_INTERVAL_SIGN, OGL_DEFAULT_SWAP_INTERVAL_SIGN_POSITIVE}, + {OGL_EVENT_LOG_SEVERITY_THRESHOLD_ID, OGL_EVENT_LOG_SEVERITY_THRESHOLD_STRING, 5, (NvU32 *)g_valuesOGL_EVENT_LOG_SEVERITY_THRESHOLD, OGL_EVENT_LOG_SEVERITY_THRESHOLD_ALL}, + {OGL_EXTENSION_STRING_VERSION_ID, OGL_EXTENSION_STRING_VERSION_STRING, 0, NULL, 0x00000000}, + {OGL_FORCE_BLIT_ID, OGL_FORCE_BLIT_STRING, 2, (NvU32 *)g_valuesOGL_FORCE_BLIT, OGL_FORCE_BLIT_OFF}, + {OGL_FORCE_STEREO_ID, OGL_FORCE_STEREO_STRING, 2, (NvU32 *)g_valuesOGL_FORCE_STEREO, OGL_FORCE_STEREO_OFF}, + {OGL_MAX_FRAMES_ALLOWED_ID, OGL_MAX_FRAMES_ALLOWED_STRING, 0, NULL, 0x00000002}, + {OGL_OVERLAY_PIXEL_TYPE_ID, OGL_OVERLAY_PIXEL_TYPE_STRING, 4, (NvU32 *)g_valuesOGL_OVERLAY_PIXEL_TYPE, OGL_OVERLAY_PIXEL_TYPE_CI}, + {OGL_OVERLAY_SUPPORT_ID, OGL_OVERLAY_SUPPORT_STRING, 3, (NvU32 *)g_valuesOGL_OVERLAY_SUPPORT, OGL_OVERLAY_SUPPORT_OFF}, + {OGL_QUALITY_ENHANCEMENTS_ID, OGL_QUALITY_ENHANCEMENTS_STRING, 4, (NvU32 *)g_valuesOGL_QUALITY_ENHANCEMENTS, OGL_QUALITY_ENHANCEMENTS_QUAL}, + {OGL_SINGLE_BACKDEPTH_BUFFER_ID, OGL_SINGLE_BACKDEPTH_BUFFER_STRING, 3, (NvU32 *)g_valuesOGL_SINGLE_BACKDEPTH_BUFFER, OGL_SINGLE_BACKDEPTH_BUFFER_DISABLE}, + {OGL_SLI_CFR_MODE_ID, OGL_SLI_CFR_MODE_STRING, 3, (NvU32 *)g_valuesOGL_SLI_CFR_MODE, OGL_SLI_CFR_MODE_DISABLE}, + {OGL_SLI_MULTICAST_ID, OGL_SLI_MULTICAST_STRING, 4, (NvU32 *)g_valuesOGL_SLI_MULTICAST, OGL_SLI_MULTICAST_DISABLE}, + {OGL_THREAD_CONTROL_ID, OGL_THREAD_CONTROL_STRING, 2, (NvU32 *)g_valuesOGL_THREAD_CONTROL, 0x00000000}, + {OGL_TMON_LEVEL_ID, OGL_TMON_LEVEL_STRING, 6, (NvU32 *)g_valuesOGL_TMON_LEVEL, OGL_TMON_LEVEL_MOST}, + {OGL_TRIPLE_BUFFER_ID, OGL_TRIPLE_BUFFER_STRING, 2, (NvU32 *)g_valuesOGL_TRIPLE_BUFFER, OGL_TRIPLE_BUFFER_DISABLED}, + {AA_BEHAVIOR_FLAGS_ID, AA_BEHAVIOR_FLAGS_STRING, 18, (NvU32 *)g_valuesAA_BEHAVIOR_FLAGS, AA_BEHAVIOR_FLAGS_DEFAULT}, + {AA_MODE_ALPHATOCOVERAGE_ID, AA_MODE_ALPHATOCOVERAGE_STRING, 4, (NvU32 *)g_valuesAA_MODE_ALPHATOCOVERAGE, 0x00000000}, + {AA_MODE_GAMMACORRECTION_ID, AA_MODE_GAMMACORRECTION_STRING, 8, (NvU32 *)g_valuesAA_MODE_GAMMACORRECTION, 0x00000000}, + {AA_MODE_METHOD_ID, AA_MODE_METHOD_STRING, 50, (NvU32 *)g_valuesAA_MODE_METHOD, AA_MODE_METHOD_NONE}, + {AA_MODE_REPLAY_ID, AA_MODE_REPLAY_STRING, 20, (NvU32 *)g_valuesAA_MODE_REPLAY, 0x00000000}, + {AA_MODE_SELECTOR_ID, AA_MODE_SELECTOR_STRING, 5, (NvU32 *)g_valuesAA_MODE_SELECTOR, AA_MODE_SELECTOR_APP_CONTROL}, + {AA_MODE_SELECTOR_SLIAA_ID, AA_MODE_SELECTOR_SLIAA_STRING, 2, (NvU32 *)g_valuesAA_MODE_SELECTOR_SLIAA, AA_MODE_SELECTOR_SLIAA_DISABLED}, + {ANISO_MODE_LEVEL_ID, ANISO_MODE_LEVEL_STRING, 5, (NvU32 *)g_valuesANISO_MODE_LEVEL, ANISO_MODE_LEVEL_DEFAULT}, + {ANISO_MODE_SELECTOR_ID, ANISO_MODE_SELECTOR_STRING, 6, (NvU32 *)g_valuesANISO_MODE_SELECTOR, ANISO_MODE_SELECTOR_DEFAULT}, + {ANSEL_ALLOW_ID, ANSEL_ALLOW_STRING, 2, (NvU32 *)g_valuesANSEL_ALLOW, ANSEL_ALLOW_ALLOWED}, + {ANSEL_ALLOWLISTED_ID, ANSEL_ALLOWLISTED_STRING, 2, (NvU32 *)g_valuesANSEL_ALLOWLISTED, ANSEL_ALLOWLISTED_DISALLOWED}, + {ANSEL_ENABLE_ID, ANSEL_ENABLE_STRING, 2, (NvU32 *)g_valuesANSEL_ENABLE, ANSEL_ENABLE_ON}, + {APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_ID, APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_STRING, 6, (NvU32 *)g_valuesAPPLICATION_PROFILE_NOTIFICATION_TIMEOUT, APPLICATION_PROFILE_NOTIFICATION_TIMEOUT_DISABLED}, + {APPLICATION_STEAM_ID_ID, APPLICATION_STEAM_ID_STRING, 0, NULL, 0x00000000}, + {BATTERY_BOOST_APP_FPS_ID, BATTERY_BOOST_APP_FPS_STRING, 3, (NvU32 *)g_valuesBATTERY_BOOST_APP_FPS, BATTERY_BOOST_APP_FPS_NO_OVERRIDE}, + {CPL_HIDDEN_PROFILE_ID, CPL_HIDDEN_PROFILE_STRING, 2, (NvU32 *)g_valuesCPL_HIDDEN_PROFILE, CPL_HIDDEN_PROFILE_DISABLED}, + {EXPORT_PERF_COUNTERS_ID, EXPORT_PERF_COUNTERS_STRING, 2, (NvU32 *)g_valuesEXPORT_PERF_COUNTERS, EXPORT_PERF_COUNTERS_OFF}, + {EXTERNAL_QUIET_MODE_ID, EXTERNAL_QUIET_MODE_STRING, 2, (NvU32 *)g_valuesEXTERNAL_QUIET_MODE, EXTERNAL_QUIET_MODE_OFF}, + {FRL_FPS_ID, FRL_FPS_STRING, 3, (NvU32 *)g_valuesFRL_FPS, FRL_FPS_DISABLED}, + {FXAA_ALLOW_ID, FXAA_ALLOW_STRING, 2, (NvU32 *)g_valuesFXAA_ALLOW, FXAA_ALLOW_ALLOWED}, + {FXAA_ENABLE_ID, FXAA_ENABLE_STRING, 2, (NvU32 *)g_valuesFXAA_ENABLE, FXAA_ENABLE_OFF}, + {FXAA_INDICATOR_ENABLE_ID, FXAA_INDICATOR_ENABLE_STRING, 2, (NvU32 *)g_valuesFXAA_INDICATOR_ENABLE, FXAA_INDICATOR_ENABLE_OFF}, + {LATENCY_INDICATOR_AUTOALIGN_ID, LATENCY_INDICATOR_AUTOALIGN_STRING, 2, (NvU32 *)g_valuesLATENCY_INDICATOR_AUTOALIGN, LATENCY_INDICATOR_AUTOALIGN_ENABLED}, + {MCSFRSHOWSPLIT_ID, MCSFRSHOWSPLIT_STRING, 2, (NvU32 *)g_valuesMCSFRSHOWSPLIT, MCSFRSHOWSPLIT_DISABLED}, + {NV_QUALITY_UPSCALING_ID, NV_QUALITY_UPSCALING_STRING, 2, (NvU32 *)g_valuesNV_QUALITY_UPSCALING, NV_QUALITY_UPSCALING_OFF}, + {OPTIMUS_MAXAA_ID, OPTIMUS_MAXAA_STRING, 2, (NvU32 *)g_valuesOPTIMUS_MAXAA, 0x00000000}, + {PHYSXINDICATOR_ID, PHYSXINDICATOR_STRING, 2, (NvU32 *)g_valuesPHYSXINDICATOR, PHYSXINDICATOR_DISABLED}, + {PREFERRED_PSTATE_ID, PREFERRED_PSTATE_STRING, 8, (NvU32 *)g_valuesPREFERRED_PSTATE, PREFERRED_PSTATE_OPTIMAL_POWER}, + {PREVENT_UI_AF_OVERRIDE_ID, PREVENT_UI_AF_OVERRIDE_STRING, 2, (NvU32 *)g_valuesPREVENT_UI_AF_OVERRIDE, PREVENT_UI_AF_OVERRIDE_OFF}, + {SHIM_MAXRES_ID, SHIM_MAXRES_STRING, 0, NULL, 0x00000000}, + {SHIM_MCCOMPAT_ID, SHIM_MCCOMPAT_STRING, 8, (NvU32 *)g_valuesSHIM_MCCOMPAT, SHIM_MCCOMPAT_AUTO_SELECT}, + {SHIM_RENDERING_MODE_ID, SHIM_RENDERING_MODE_STRING, 8, (NvU32 *)g_valuesSHIM_RENDERING_MODE, SHIM_RENDERING_MODE_AUTO_SELECT}, + {SHIM_RENDERING_OPTIONS_ID, SHIM_RENDERING_OPTIONS_STRING, 28, (NvU32 *)g_valuesSHIM_RENDERING_OPTIONS, 0x00000000}, + {SLI_GPU_COUNT_ID, SLI_GPU_COUNT_STRING, 5, (NvU32 *)g_valuesSLI_GPU_COUNT, SLI_GPU_COUNT_AUTOSELECT}, + {SLI_PREDEFINED_GPU_COUNT_ID, SLI_PREDEFINED_GPU_COUNT_STRING, 5, (NvU32 *)g_valuesSLI_PREDEFINED_GPU_COUNT, SLI_PREDEFINED_GPU_COUNT_AUTOSELECT}, + {SLI_PREDEFINED_GPU_COUNT_DX10_ID, SLI_PREDEFINED_GPU_COUNT_DX10_STRING, 5, (NvU32 *)g_valuesSLI_PREDEFINED_GPU_COUNT_DX10, SLI_PREDEFINED_GPU_COUNT_DX10_AUTOSELECT}, + {SLI_PREDEFINED_MODE_ID, SLI_PREDEFINED_MODE_STRING, 6, (NvU32 *)g_valuesSLI_PREDEFINED_MODE, SLI_PREDEFINED_MODE_AUTOSELECT}, + {SLI_PREDEFINED_MODE_DX10_ID, SLI_PREDEFINED_MODE_DX10_STRING, 6, (NvU32 *)g_valuesSLI_PREDEFINED_MODE_DX10, SLI_PREDEFINED_MODE_DX10_AUTOSELECT}, + {SLI_RENDERING_MODE_ID, SLI_RENDERING_MODE_STRING, 6, (NvU32 *)g_valuesSLI_RENDERING_MODE, SLI_RENDERING_MODE_AUTOSELECT}, + {VRPRERENDERLIMIT_ID, VRPRERENDERLIMIT_STRING, 4, (NvU32 *)g_valuesVRPRERENDERLIMIT, VRPRERENDERLIMIT_DEFAULT}, + {VRRFEATUREINDICATOR_ID, VRRFEATUREINDICATOR_STRING, 2, (NvU32 *)g_valuesVRRFEATUREINDICATOR, VRRFEATUREINDICATOR_ENABLED}, + {VRROVERLAYINDICATOR_ID, VRROVERLAYINDICATOR_STRING, 2, (NvU32 *)g_valuesVRROVERLAYINDICATOR, VRROVERLAYINDICATOR_ENABLED}, + {VRRREQUESTSTATE_ID, VRRREQUESTSTATE_STRING, 3, (NvU32 *)g_valuesVRRREQUESTSTATE, VRRREQUESTSTATE_FULLSCREEN_ONLY}, + {VRR_APP_OVERRIDE_ID, VRR_APP_OVERRIDE_STRING, 5, (NvU32 *)g_valuesVRR_APP_OVERRIDE, VRR_APP_OVERRIDE_ALLOW}, + {VRR_APP_OVERRIDE_REQUEST_STATE_ID, VRR_APP_OVERRIDE_REQUEST_STATE_STRING, 5, (NvU32 *)g_valuesVRR_APP_OVERRIDE_REQUEST_STATE, VRR_APP_OVERRIDE_REQUEST_STATE_ALLOW}, + {VRR_MODE_ID, VRR_MODE_STRING, 3, (NvU32 *)g_valuesVRR_MODE, VRR_MODE_FULLSCREEN_ONLY}, + {VSYNCSMOOTHAFR_ID, VSYNCSMOOTHAFR_STRING, 2, (NvU32 *)g_valuesVSYNCSMOOTHAFR, VSYNCSMOOTHAFR_OFF}, + {VSYNCVRRCONTROL_ID, VSYNCVRRCONTROL_STRING, 3, (NvU32 *)g_valuesVSYNCVRRCONTROL, VSYNCVRRCONTROL_ENABLE}, + {VSYNC_BEHAVIOR_FLAGS_ID, VSYNC_BEHAVIOR_FLAGS_STRING, 3, (NvU32 *)g_valuesVSYNC_BEHAVIOR_FLAGS, VSYNC_BEHAVIOR_FLAGS_DEFAULT}, + {WKS_API_STEREO_EYES_EXCHANGE_ID, WKS_API_STEREO_EYES_EXCHANGE_STRING, 2, (NvU32 *)g_valuesWKS_API_STEREO_EYES_EXCHANGE, WKS_API_STEREO_EYES_EXCHANGE_OFF}, + {WKS_API_STEREO_MODE_ID, WKS_API_STEREO_MODE_STRING, 25, (NvU32 *)g_valuesWKS_API_STEREO_MODE, WKS_API_STEREO_MODE_SHUTTER_GLASSES}, + {WKS_MEMORY_ALLOCATION_POLICY_ID, WKS_MEMORY_ALLOCATION_POLICY_STRING, 3, (NvU32 *)g_valuesWKS_MEMORY_ALLOCATION_POLICY, WKS_MEMORY_ALLOCATION_POLICY_AS_NEEDED}, + {WKS_STEREO_DONGLE_SUPPORT_ID, WKS_STEREO_DONGLE_SUPPORT_STRING, 3, (NvU32 *)g_valuesWKS_STEREO_DONGLE_SUPPORT, WKS_STEREO_DONGLE_SUPPORT_DAC}, + {WKS_STEREO_SUPPORT_ID, WKS_STEREO_SUPPORT_STRING, 2, (NvU32 *)g_valuesWKS_STEREO_SUPPORT, WKS_STEREO_SUPPORT_OFF}, + {WKS_STEREO_SWAP_MODE_ID, WKS_STEREO_SWAP_MODE_STRING, 5, (NvU32 *)g_valuesWKS_STEREO_SWAP_MODE, WKS_STEREO_SWAP_MODE_APPLICATION_CONTROL}, + {AO_MODE_ID, AO_MODE_STRING, 4, (NvU32 *)g_valuesAO_MODE, AO_MODE_OFF}, + {AO_MODE_ACTIVE_ID, AO_MODE_ACTIVE_STRING, 2, (NvU32 *)g_valuesAO_MODE_ACTIVE, AO_MODE_ACTIVE_DISABLED}, + {AUTO_LODBIASADJUST_ID, AUTO_LODBIASADJUST_STRING, 2, (NvU32 *)g_valuesAUTO_LODBIASADJUST, AUTO_LODBIASADJUST_ON}, + {EXPORT_PERF_COUNTERS_DX9_ONLY_ID, EXPORT_PERF_COUNTERS_DX9_ONLY_STRING, 2, (NvU32 *)g_valuesEXPORT_PERF_COUNTERS_DX9_ONLY, EXPORT_PERF_COUNTERS_DX9_ONLY_OFF}, + {LODBIASADJUST_ID, LODBIASADJUST_STRING, 2, (NvU32 *)g_valuesLODBIASADJUST, 0x00000000}, + {MAXWELL_B_SAMPLE_INTERLEAVE_ID, MAXWELL_B_SAMPLE_INTERLEAVE_STRING, 2, (NvU32 *)g_valuesMAXWELL_B_SAMPLE_INTERLEAVE, MAXWELL_B_SAMPLE_INTERLEAVE_OFF}, + {PRERENDERLIMIT_ID, PRERENDERLIMIT_STRING, 3, (NvU32 *)g_valuesPRERENDERLIMIT, PRERENDERLIMIT_APP_CONTROLLED}, + {PS_SHADERDISKCACHE_ID, PS_SHADERDISKCACHE_STRING, 2, (NvU32 *)g_valuesPS_SHADERDISKCACHE, PS_SHADERDISKCACHE_ON}, + {PS_SHADERDISKCACHE_MAX_SIZE_ID, PS_SHADERDISKCACHE_MAX_SIZE_STRING, 2, (NvU32 *)g_valuesPS_SHADERDISKCACHE_MAX_SIZE, 0x00000000}, + {PS_TEXFILTER_ANISO_OPTS2_ID, PS_TEXFILTER_ANISO_OPTS2_STRING, 2, (NvU32 *)g_valuesPS_TEXFILTER_ANISO_OPTS2, PS_TEXFILTER_ANISO_OPTS2_OFF}, + {PS_TEXFILTER_BILINEAR_IN_ANISO_ID, PS_TEXFILTER_BILINEAR_IN_ANISO_STRING, 2, (NvU32 *)g_valuesPS_TEXFILTER_BILINEAR_IN_ANISO, PS_TEXFILTER_BILINEAR_IN_ANISO_OFF}, + {PS_TEXFILTER_DISABLE_TRILIN_SLOPE_ID, PS_TEXFILTER_DISABLE_TRILIN_SLOPE_STRING, 2, (NvU32 *)g_valuesPS_TEXFILTER_DISABLE_TRILIN_SLOPE, PS_TEXFILTER_DISABLE_TRILIN_SLOPE_OFF}, + {PS_TEXFILTER_NO_NEG_LODBIAS_ID, PS_TEXFILTER_NO_NEG_LODBIAS_STRING, 2, (NvU32 *)g_valuesPS_TEXFILTER_NO_NEG_LODBIAS, PS_TEXFILTER_NO_NEG_LODBIAS_OFF}, + {QUALITY_ENHANCEMENTS_ID, QUALITY_ENHANCEMENTS_STRING, 4, (NvU32 *)g_valuesQUALITY_ENHANCEMENTS, QUALITY_ENHANCEMENTS_QUALITY}, + {QUALITY_ENHANCEMENT_SUBSTITUTION_ID, QUALITY_ENHANCEMENT_SUBSTITUTION_STRING, 2, (NvU32 *)g_valuesQUALITY_ENHANCEMENT_SUBSTITUTION, QUALITY_ENHANCEMENT_SUBSTITUTION_NO_SUBSTITUTION}, + {REFRESH_RATE_OVERRIDE_ID, REFRESH_RATE_OVERRIDE_STRING, 3, (NvU32 *)g_valuesREFRESH_RATE_OVERRIDE, REFRESH_RATE_OVERRIDE_APPLICATION_CONTROLLED}, + {SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE_ID, SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE_STRING, 2, (NvU32 *)g_valuesSET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE, SET_POWER_THROTTLE_FOR_PCIe_COMPLIANCE_OFF}, + {SET_VAB_DATA_ID, SET_VAB_DATA_STRING, 6, (NvU32 *)g_valuesSET_VAB_DATA, SET_VAB_DATA_USE_API_DEFAULTS}, + {VSYNCMODE_ID, VSYNCMODE_STRING, 7, (NvU32 *)g_valuesVSYNCMODE, VSYNCMODE_PASSIVE}, + {VSYNCTEARCONTROL_ID, VSYNCTEARCONTROL_STRING, 2, (NvU32 *)g_valuesVSYNCTEARCONTROL, VSYNCTEARCONTROL_DISABLE}, +}; + +SettingWSTRINGNameString mapSettingWSTRING[TOTAL_WSTRING_SETTING_NUM] = +{ + {OGL_IMPLICIT_GPU_AFFINITY_ID, OGL_IMPLICIT_GPU_AFFINITY_STRING, 1, (const wchar_t **)g_valuesOGL_IMPLICIT_GPU_AFFINITY, L"autoselect"}, + {CUDA_EXCLUDED_GPUS_ID, CUDA_EXCLUDED_GPUS_STRING, 1, (const wchar_t **)g_valuesCUDA_EXCLUDED_GPUS, L"none"}, + {D3DOGL_GPU_MAX_POWER_ID, D3DOGL_GPU_MAX_POWER_STRING, 1, (const wchar_t **)g_valuesD3DOGL_GPU_MAX_POWER, L"0"}, + {ICAFE_LOGO_CONFIG_ID, ICAFE_LOGO_CONFIG_STRING, 0, NULL, L""}, +}; + diff --git a/vendor/nvapi/NvApiDriverSettings.h b/vendor/nvapi/NvApiDriverSettings.h index fc001dca5b..100f9c8faa 100644 --- a/vendor/nvapi/NvApiDriverSettings.h +++ b/vendor/nvapi/NvApiDriverSettings.h @@ -41,6 +41,8 @@ #define _NVAPI_DRIVER_SETTINGS_H_ #define OGL_AA_LINE_GAMMA_STRING L"Antialiasing - Line gamma" +#define OGL_CPL_GDI_COMPATIBILITY_STRING L"OpenGL GDI compatibility" +#define OGL_CPL_PREFER_DXPRESENT_STRING L"Vulkan/OpenGL present method" #define OGL_DEEP_COLOR_SCANOUT_STRING L"Deep color for 3D applications" #define OGL_DEFAULT_SWAP_INTERVAL_STRING L"OpenGL default swap interval" #define OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_STRING L"OpenGL default swap interval fraction" @@ -141,6 +143,8 @@ enum ESetting { OGL_AA_LINE_GAMMA_ID = 0x2089BF6C, + OGL_CPL_GDI_COMPATIBILITY_ID = 0x2072C5A3, + OGL_CPL_PREFER_DXPRESENT_ID = 0x20D690F8, OGL_DEEP_COLOR_SCANOUT_ID = 0x2097C2F6, OGL_DEFAULT_SWAP_INTERVAL_ID = 0x206A6582, OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_ID = 0x206C4581, @@ -238,9 +242,9 @@ enum ESetting { SET_VAB_DATA_ID = 0x00AB8687, VSYNCMODE_ID = 0x00A879CF, VSYNCTEARCONTROL_ID = 0x005A375C, - TOTAL_DWORD_SETTING_NUM = 94, + TOTAL_DWORD_SETTING_NUM = 96, TOTAL_WSTRING_SETTING_NUM = 4, - TOTAL_SETTING_NUM = 98, + TOTAL_SETTING_NUM = 100, INVALID_SETTING_ID = 0xFFFFFFFF }; @@ -253,6 +257,22 @@ enum EValues_OGL_AA_LINE_GAMMA { OGL_AA_LINE_GAMMA_DEFAULT = OGL_AA_LINE_GAMMA_DISABLED }; +enum EValues_OGL_CPL_GDI_COMPATIBILITY { + OGL_CPL_GDI_COMPATIBILITY_PREFER_DISABLED = 0x00000000, + OGL_CPL_GDI_COMPATIBILITY_PREFER_ENABLED = 0x00000001, + OGL_CPL_GDI_COMPATIBILITY_AUTO = 0x00000002, + OGL_CPL_GDI_COMPATIBILITY_NUM_VALUES = 3, + OGL_CPL_GDI_COMPATIBILITY_DEFAULT = OGL_CPL_GDI_COMPATIBILITY_AUTO +}; + +enum EValues_OGL_CPL_PREFER_DXPRESENT { + OGL_CPL_PREFER_DXPRESENT_PREFER_DISABLED = 0x00000000, + OGL_CPL_PREFER_DXPRESENT_PREFER_ENABLED = 0x00000001, + OGL_CPL_PREFER_DXPRESENT_AUTO = 0x00000002, + OGL_CPL_PREFER_DXPRESENT_NUM_VALUES = 3, + OGL_CPL_PREFER_DXPRESENT_DEFAULT = OGL_CPL_PREFER_DXPRESENT_AUTO +}; + enum EValues_OGL_DEEP_COLOR_SCANOUT { OGL_DEEP_COLOR_SCANOUT_DISABLE = 0, OGL_DEEP_COLOR_SCANOUT_ENABLE = 1, @@ -278,7 +298,7 @@ enum EValues_OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL { OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_ZERO_SCANLINES = 0, OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_ONE_FULL_FRAME_OF_SCANLINES = 100, OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_NUM_VALUES = 2, - OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_DEFAULT = 0 + OGL_DEFAULT_SWAP_INTERVAL_FRACTIONAL_DEFAULT = 0U }; enum EValues_OGL_DEFAULT_SWAP_INTERVAL_SIGN { @@ -372,7 +392,7 @@ enum EValues_OGL_THREAD_CONTROL { OGL_THREAD_CONTROL_ENABLE = 0x00000001, OGL_THREAD_CONTROL_DISABLE = 0x00000002, OGL_THREAD_CONTROL_NUM_VALUES = 2, - OGL_THREAD_CONTROL_DEFAULT = 0 + OGL_THREAD_CONTROL_DEFAULT = 0U }; enum EValues_OGL_TMON_LEVEL { @@ -706,62 +726,62 @@ enum EValues_PREVENT_UI_AF_OVERRIDE { }; enum EValues_SHIM_MCCOMPAT { - SHIM_MCCOMPAT_INTEGRATED = 0x00000000, - SHIM_MCCOMPAT_ENABLE = 0x00000001, - SHIM_MCCOMPAT_USER_EDITABLE = 0x00000002, - SHIM_MCCOMPAT_MASK = 0x00000003, - SHIM_MCCOMPAT_VIDEO_MASK = 0x00000004, - SHIM_MCCOMPAT_VARYING_BIT = 0x00000008, - SHIM_MCCOMPAT_AUTO_SELECT = 0x00000010, - SHIM_MCCOMPAT_OVERRIDE_BIT = 0x80000000, + SHIM_MCCOMPAT_INTEGRATED = 0x00000000U, + SHIM_MCCOMPAT_ENABLE = 0x00000001U, + SHIM_MCCOMPAT_USER_EDITABLE = 0x00000002U, + SHIM_MCCOMPAT_MASK = 0x00000003U, + SHIM_MCCOMPAT_VIDEO_MASK = 0x00000004U, + SHIM_MCCOMPAT_VARYING_BIT = 0x00000008U, + SHIM_MCCOMPAT_AUTO_SELECT = 0x00000010U, + SHIM_MCCOMPAT_OVERRIDE_BIT = 0x80000000U, SHIM_MCCOMPAT_NUM_VALUES = 8, SHIM_MCCOMPAT_DEFAULT = SHIM_MCCOMPAT_AUTO_SELECT }; enum EValues_SHIM_RENDERING_MODE { - SHIM_RENDERING_MODE_INTEGRATED = 0x00000000, - SHIM_RENDERING_MODE_ENABLE = 0x00000001, - SHIM_RENDERING_MODE_USER_EDITABLE = 0x00000002, - SHIM_RENDERING_MODE_MASK = 0x00000003, - SHIM_RENDERING_MODE_VIDEO_MASK = 0x00000004, - SHIM_RENDERING_MODE_VARYING_BIT = 0x00000008, - SHIM_RENDERING_MODE_AUTO_SELECT = 0x00000010, - SHIM_RENDERING_MODE_OVERRIDE_BIT = 0x80000000, + SHIM_RENDERING_MODE_INTEGRATED = 0x00000000U, + SHIM_RENDERING_MODE_ENABLE = 0x00000001U, + SHIM_RENDERING_MODE_USER_EDITABLE = 0x00000002U, + SHIM_RENDERING_MODE_MASK = 0x00000003U, + SHIM_RENDERING_MODE_VIDEO_MASK = 0x00000004U, + SHIM_RENDERING_MODE_VARYING_BIT = 0x00000008U, + SHIM_RENDERING_MODE_AUTO_SELECT = 0x00000010U, + SHIM_RENDERING_MODE_OVERRIDE_BIT = 0x80000000U, SHIM_RENDERING_MODE_NUM_VALUES = 8, SHIM_RENDERING_MODE_DEFAULT = SHIM_RENDERING_MODE_AUTO_SELECT }; enum EValues_SHIM_RENDERING_OPTIONS { - SHIM_RENDERING_OPTIONS_DEFAULT_RENDERING_MODE = 0x00000000, - SHIM_RENDERING_OPTIONS_DISABLE_ASYNC_PRESENT = 0x00000001, - SHIM_RENDERING_OPTIONS_EHSHELL_DETECT = 0x00000002, - SHIM_RENDERING_OPTIONS_FLASHPLAYER_HOST_DETECT = 0x00000004, - SHIM_RENDERING_OPTIONS_VIDEO_DRM_APP_DETECT = 0x00000008, - SHIM_RENDERING_OPTIONS_IGNORE_OVERRIDES = 0x00000010, - SHIM_RENDERING_OPTIONS_RESERVED1 = 0x00000020, - SHIM_RENDERING_OPTIONS_ENABLE_DWM_ASYNC_PRESENT = 0x00000040, - SHIM_RENDERING_OPTIONS_RESERVED2 = 0x00000080, - SHIM_RENDERING_OPTIONS_ALLOW_INHERITANCE = 0x00000100, - SHIM_RENDERING_OPTIONS_DISABLE_WRAPPERS = 0x00000200, - SHIM_RENDERING_OPTIONS_DISABLE_DXGI_WRAPPERS = 0x00000400, - SHIM_RENDERING_OPTIONS_PRUNE_UNSUPPORTED_FORMATS = 0x00000800, - SHIM_RENDERING_OPTIONS_ENABLE_ALPHA_FORMAT = 0x00001000, - SHIM_RENDERING_OPTIONS_IGPU_TRANSCODING = 0x00002000, - SHIM_RENDERING_OPTIONS_DISABLE_CUDA = 0x00004000, - SHIM_RENDERING_OPTIONS_ALLOW_CP_CAPS_FOR_VIDEO = 0x00008000, - SHIM_RENDERING_OPTIONS_IGPU_TRANSCODING_FWD_OPTIMUS = 0x00010000, - SHIM_RENDERING_OPTIONS_DISABLE_DURING_SECURE_BOOT = 0x00020000, - SHIM_RENDERING_OPTIONS_INVERT_FOR_QUADRO = 0x00040000, - SHIM_RENDERING_OPTIONS_INVERT_FOR_MSHYBRID = 0x00080000, - SHIM_RENDERING_OPTIONS_REGISTER_PROCESS_ENABLE_GOLD = 0x00100000, - SHIM_RENDERING_OPTIONS_HANDLE_WINDOWED_MODE_PERF_OPT = 0x00200000, - SHIM_RENDERING_OPTIONS_HANDLE_WIN7_ASYNC_RUNTIME_BUG = 0x00400000, - SHIM_RENDERING_OPTIONS_EXPLICIT_ADAPTER_OPTED_BY_APP = 0x00800000, - SHIM_RENDERING_OPTIONS_ALLOW_DYNAMIC_DISPLAY_MUX_SWITCH = 0x01000000, - SHIM_RENDERING_OPTIONS_DISALLOW_DYNAMIC_DISPLAY_MUX_SWITCH = 0x02000000, - SHIM_RENDERING_OPTIONS_DISABLE_TURING_POWER_POLICY = 0x04000000, + SHIM_RENDERING_OPTIONS_DEFAULT_RENDERING_MODE = 0x00000000U, + SHIM_RENDERING_OPTIONS_DISABLE_ASYNC_PRESENT = 0x00000001U, + SHIM_RENDERING_OPTIONS_EHSHELL_DETECT = 0x00000002U, + SHIM_RENDERING_OPTIONS_FLASHPLAYER_HOST_DETECT = 0x00000004U, + SHIM_RENDERING_OPTIONS_VIDEO_DRM_APP_DETECT = 0x00000008U, + SHIM_RENDERING_OPTIONS_IGNORE_OVERRIDES = 0x00000010U, + SHIM_RENDERING_OPTIONS_RESERVED1 = 0x00000020U, + SHIM_RENDERING_OPTIONS_ENABLE_DWM_ASYNC_PRESENT = 0x00000040U, + SHIM_RENDERING_OPTIONS_RESERVED2 = 0x00000080U, + SHIM_RENDERING_OPTIONS_ALLOW_INHERITANCE = 0x00000100U, + SHIM_RENDERING_OPTIONS_DISABLE_WRAPPERS = 0x00000200U, + SHIM_RENDERING_OPTIONS_DISABLE_DXGI_WRAPPERS = 0x00000400U, + SHIM_RENDERING_OPTIONS_PRUNE_UNSUPPORTED_FORMATS = 0x00000800U, + SHIM_RENDERING_OPTIONS_ENABLE_ALPHA_FORMAT = 0x00001000U, + SHIM_RENDERING_OPTIONS_IGPU_TRANSCODING = 0x00002000U, + SHIM_RENDERING_OPTIONS_DISABLE_CUDA = 0x00004000U, + SHIM_RENDERING_OPTIONS_ALLOW_CP_CAPS_FOR_VIDEO = 0x00008000U, + SHIM_RENDERING_OPTIONS_IGPU_TRANSCODING_FWD_OPTIMUS = 0x00010000U, + SHIM_RENDERING_OPTIONS_DISABLE_DURING_SECURE_BOOT = 0x00020000U, + SHIM_RENDERING_OPTIONS_INVERT_FOR_QUADRO = 0x00040000U, + SHIM_RENDERING_OPTIONS_INVERT_FOR_MSHYBRID = 0x00080000U, + SHIM_RENDERING_OPTIONS_REGISTER_PROCESS_ENABLE_GOLD = 0x00100000U, + SHIM_RENDERING_OPTIONS_HANDLE_WINDOWED_MODE_PERF_OPT = 0x00200000U, + SHIM_RENDERING_OPTIONS_HANDLE_WIN7_ASYNC_RUNTIME_BUG = 0x00400000U, + SHIM_RENDERING_OPTIONS_EXPLICIT_ADAPTER_OPTED_BY_APP = 0x00800000U, + SHIM_RENDERING_OPTIONS_ALLOW_DYNAMIC_DISPLAY_MUX_SWITCH = 0x01000000U, + SHIM_RENDERING_OPTIONS_DISALLOW_DYNAMIC_DISPLAY_MUX_SWITCH = 0x02000000U, + SHIM_RENDERING_OPTIONS_DISABLE_TURING_POWER_POLICY = 0x04000000U, SHIM_RENDERING_OPTIONS_NUM_VALUES = 28, - SHIM_RENDERING_OPTIONS_DEFAULT = 0x00000000 + SHIM_RENDERING_OPTIONS_DEFAULT = 0x00000000U }; enum EValues_SLI_GPU_COUNT { diff --git a/vendor/nvapi/amd64/nvapi64.lib b/vendor/nvapi/amd64/nvapi64.lib index e36e9f1092..e54fdf958d 100644 Binary files a/vendor/nvapi/amd64/nvapi64.lib and b/vendor/nvapi/amd64/nvapi64.lib differ diff --git a/vendor/nvapi/nvHLSLExtns.h b/vendor/nvapi/nvHLSLExtns.h new file mode 100644 index 0000000000..9394036dad --- /dev/null +++ b/vendor/nvapi/nvHLSLExtns.h @@ -0,0 +1,2206 @@ + /************************************************************************************************************************************\ +|* *| +|* Copyright © 2012 NVIDIA Corporation. All rights reserved. *| +|* *| +|* NOTICE TO USER: *| +|* *| +|* This software is subject to NVIDIA ownership rights under U.S. and international Copyright laws. *| +|* *| +|* This software and the information contained herein are PROPRIETARY and CONFIDENTIAL to NVIDIA *| +|* and are being provided solely under the terms and conditions of an NVIDIA software license agreement. *| +|* Otherwise, you have no rights to use or access this software in any manner. *| +|* *| +|* If not covered by the applicable NVIDIA software license agreement: *| +|* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOFTWARE FOR ANY PURPOSE. *| +|* IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. *| +|* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, *| +|* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. *| +|* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, *| +|* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, *| +|* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE. *| +|* *| +|* U.S. Government End Users. *| +|* This software is a "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 1995), *| +|* consisting of "commercial computer software" and "commercial computer software documentation" *| +|* as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government only as a commercial end item. *| +|* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), *| +|* all U.S. Government End Users acquire the software with only those rights set forth herein. *| +|* *| +|* Any use of this software in individual and commercial software must include, *| +|* in the user documentation and internal comments to the code, *| +|* the above Disclaimer (as applicable) and U.S. Government End Users Notice. *| +|* *| + \************************************************************************************************************************************/ + +////////////////////////// NVIDIA SHADER EXTENSIONS ///////////////// + +// this file is to be #included in the app HLSL shader code to make +// use of nvidia shader extensions + + +#include "nvHLSLExtnsInternal.h" + +//----------------------------------------------------------------------------// +//------------------------- Warp Shuffle Functions ---------------------------// +//----------------------------------------------------------------------------// + +// all functions have variants with width parameter which permits sub-division +// of the warp into segments - for example to exchange data between 4 groups of +// 8 lanes in a SIMD manner. If width is less than warpSize then each subsection +// of the warp behaves as a separate entity with a starting logical lane ID of 0. +// A thread may only exchange data with others in its own subsection. Width must +// have a value which is a power of 2 so that the warp can be subdivided equally; +// results are undefined if width is not a power of 2, or is a number greater +// than warpSize. + +// +// simple variant of SHFL instruction +// returns val from the specified lane +// optional width parameter must be a power of two and width <= 32 +// +int NvShfl(int val, uint srcLane, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = srcLane; // source lane + g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width); + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL; + + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +int2 NvShfl(int2 val, uint srcLane, int width = NV_WARP_SIZE) +{ + int x = NvShfl(val.x, srcLane, width); + int y = NvShfl(val.y, srcLane, width); + return int2(x, y); +} + +int4 NvShfl(int4 val, uint srcLane, int width = NV_WARP_SIZE) +{ + int x = NvShfl(val.x, srcLane, width); + int y = NvShfl(val.y, srcLane, width); + int z = NvShfl(val.z, srcLane, width); + int w = NvShfl(val.w, srcLane, width); + return int4(x, y, z, w); +} + +// +// Copy from a lane with lower ID relative to caller +// +int NvShflUp(int val, uint delta, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = delta; // relative lane offset + g_NvidiaExt[index].src0u.z = (NV_WARP_SIZE - width) << 8; // minIndex = maxIndex for shfl_up (src2[4:0] is expected to be 0) + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_UP; + return g_NvidiaExt.IncrementCounter(); +} + +// +// Copy from a lane with higher ID relative to caller +// +int NvShflDown(int val, uint delta, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = delta; // relative lane offset + g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width); + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_DOWN; + return g_NvidiaExt.IncrementCounter(); +} + +// +// Copy from a lane based on bitwise XOR of own lane ID +// +int NvShflXor(int val, uint laneMask, int width = NV_WARP_SIZE) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = laneMask; // laneMask to be XOR'ed with current laneId to get the source lane id + g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width); + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_XOR; + return g_NvidiaExt.IncrementCounter(); +} + + +//----------------------------------------------------------------------------// +//----------------------------- Warp Vote Functions---------------------------// +//----------------------------------------------------------------------------// + +// returns 0xFFFFFFFF if the predicate is true for any thread in the warp, returns 0 otherwise +uint NvAny(int predicate) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = predicate; + g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_ANY; + return g_NvidiaExt.IncrementCounter(); +} + +// returns 0xFFFFFFFF if the predicate is true for ALL threads in the warp, returns 0 otherwise +uint NvAll(int predicate) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = predicate; + g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_ALL; + return g_NvidiaExt.IncrementCounter(); +} + +// returns a mask of all threads in the warp with bits set for threads that have predicate true +uint NvBallot(int predicate) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = predicate; + g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_BALLOT; + return g_NvidiaExt.IncrementCounter(); +} + + +//----------------------------------------------------------------------------// +//----------------------------- Utility Functions ----------------------------// +//----------------------------------------------------------------------------// + +// returns the lane index of the current thread (thread index in warp) +int NvGetLaneId() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_LANE_ID; + return g_NvidiaExt.IncrementCounter(); +} + +// returns value of special register - specify subopcode from any of NV_SPECIALOP_* specified in nvShaderExtnEnums.h - other opcodes undefined behavior +uint NvGetSpecial(uint subOpCode) +{ + return __NvGetSpecial(subOpCode); +} + +//----------------------------------------------------------------------------// +//----------------------------- FP16 Atmoic Functions-------------------------// +//----------------------------------------------------------------------------// + +// The functions below performs atomic operations on two consecutive fp16 +// values in the given raw UAV. +// The uint paramater 'fp16x2Val' is treated as two fp16 values byteAddress must be multiple of 4 +// The returned value are the two fp16 values packed into a single uint + +uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MAX); +} + + +// versions of the above functions taking two fp32 values (internally converted to fp16 values) +uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val) +{ + return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + + +//----------------------------------------------------------------------------// + +// The functions below perform atomic operation on a R16G16_FLOAT UAV at the given address +// the uint paramater 'fp16x2Val' is treated as two fp16 values +// the returned value are the two fp16 values (.x and .y components) packed into a single uint +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16_FLOAT format (might result in app crash or TDR) + +uint NvInterlockedAddFp16x2(RWTexture1D uav, uint address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture1D uav, uint address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture1D uav, uint address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture2D uav, uint2 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture2D uav, uint2 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture2D uav, uint2 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture3D uav, uint3 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture3D uav, uint3 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture3D uav, uint3 address, uint fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + + +// versions taking two fp32 values (internally converted to fp16) +uint NvInterlockedAddFp16x2(RWTexture1D uav, uint address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture1D uav, uint address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture1D uav, uint address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture2D uav, uint2 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture2D uav, uint2 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture2D uav, uint2 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + +uint NvInterlockedAddFp16x2(RWTexture3D uav, uint3 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD); +} + +uint NvInterlockedMinFp16x2(RWTexture3D uav, uint3 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN); +} + +uint NvInterlockedMaxFp16x2(RWTexture3D uav, uint3 address, float2 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX); +} + + +//----------------------------------------------------------------------------// + +// The functions below perform Atomic operation on a R16G16B16A16_FLOAT UAV at the given address +// the uint2 paramater 'fp16x2Val' is treated as four fp16 values +// i.e, fp16x2Val.x = uav.xy and fp16x2Val.y = uav.yz +// The returned value are the four fp16 values (.xyzw components) packed into uint2 +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16B16A16_FLOAT format (might result in app crash or TDR) + +uint2 NvInterlockedAddFp16x4(RWTexture1D uav, uint address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture1D uav, uint address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture1D uav, uint address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture2D uav, uint2 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture2D uav, uint2 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture2D uav, uint2 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture3D uav, uint3 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture3D uav, uint3 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture3D uav, uint3 address, uint2 fp16x2Val) +{ + return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX); +} + +// versions taking four fp32 values (internally converted to fp16) +uint2 NvInterlockedAddFp16x4(RWTexture1D uav, uint address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture1D uav, uint address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture1D uav, uint address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture2D uav, uint2 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture2D uav, uint2 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture2D uav, uint2 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedAddFp16x4(RWTexture3D uav, uint3 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMinFp16x4(RWTexture3D uav, uint3 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedMaxFp16x4(RWTexture3D uav, uint3 address, float4 val) +{ + return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX); +} + + +//----------------------------------------------------------------------------// +//----------------------------- FP32 Atmoic Functions-------------------------// +//----------------------------------------------------------------------------// + +// The functions below performs atomic add on the given UAV treating the value as float +// byteAddress must be multiple of 4 +// The returned value is the value present in memory location before the atomic add + +float NvInterlockedAddFp32(RWByteAddressBuffer uav, uint byteAddress, float val) +{ + return __NvAtomicAddFP32(uav, byteAddress, val); +} + +//----------------------------------------------------------------------------// + +// The functions below perform atomic add on a R32_FLOAT UAV at the given address +// the returned value is the value before performing the atomic add +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R32_FLOAT format (might result in app crash or TDR) + +float NvInterlockedAddFp32(RWTexture1D uav, uint address, float val) +{ + return __NvAtomicAddFP32(uav, address, val); +} + +float NvInterlockedAddFp32(RWTexture2D uav, uint2 address, float val) +{ + return __NvAtomicAddFP32(uav, address, val); +} + +float NvInterlockedAddFp32(RWTexture3D uav, uint3 address, float val) +{ + return __NvAtomicAddFP32(uav, address, val); +} + + +//----------------------------------------------------------------------------// +//--------------------------- UINT64 Atmoic Functions-------------------------// +//----------------------------------------------------------------------------// + +// The functions below performs atomic operation on the given UAV treating the value as uint64 +// byteAddress must be multiple of 8 +// The returned value is the value present in memory location before the atomic operation +// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits. + +uint2 NvInterlockedAddUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, byteAddress, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value) +{ + return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_SWAP); +} + +//----------------------------------------------------------------------------// + +// The functions below perform atomic operation on a R32G32_UINT UAV at the given address treating the value as uint64 +// the returned value is the value before performing the atomic operation +// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits. +// Warning: Behaviour of these set of functions is undefined if the UAV is not of R32G32_UINT format (might result in app crash or TDR) + +uint2 NvInterlockedAddUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWTexture1D uav, uint address, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWTexture1D uav, uint address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP); +} + +uint2 NvInterlockedAddUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWTexture2D uav, uint2 address, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWTexture2D uav, uint2 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP); +} + +uint2 NvInterlockedAddUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD); +} + +uint2 NvInterlockedMaxUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX); +} + +uint2 NvInterlockedMinUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN); +} + +uint2 NvInterlockedAndUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND); +} + +uint2 NvInterlockedOrUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR); +} + +uint2 NvInterlockedXorUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR); +} + +uint2 NvInterlockedCompareExchangeUint64(RWTexture3D uav, uint3 address, uint2 compare_value, uint2 value) +{ + return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value); +} + +uint2 NvInterlockedExchangeUint64(RWTexture3D uav, uint3 address, uint2 value) +{ + return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP); +} + +//----------------------------------------------------------------------------// +//--------------------------- VPRS functions ---------------------------------// +//----------------------------------------------------------------------------// + +// Returns the shading rate and the number of per-pixel shading passes for current VPRS pixel +uint3 NvGetShadingRate() +{ + uint3 shadingRate = (uint3)0; + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SHADING_RATE; + g_NvidiaExt[index].numOutputsForIncCounter = 3; + shadingRate.x = g_NvidiaExt.IncrementCounter(); + shadingRate.y = g_NvidiaExt.IncrementCounter(); + shadingRate.z = g_NvidiaExt.IncrementCounter(); + return shadingRate; +} + +float NvEvaluateAttributeAtSampleForVPRS(float attrib, uint sampleIndex, int2 pixelOffset) +{ + float value = (float)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float2 NvEvaluateAttributeAtSampleForVPRS(float2 attrib, uint sampleIndex, int2 pixelOffset) +{ + float2 value = (float2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float3 NvEvaluateAttributeAtSampleForVPRS(float3 attrib, uint sampleIndex, int2 pixelOffset) +{ + float3 value = (float3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float4 NvEvaluateAttributeAtSampleForVPRS(float4 attrib, uint sampleIndex, int2 pixelOffset) +{ + float4 value = (float4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + value.w = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +int NvEvaluateAttributeAtSampleForVPRS(int attrib, uint sampleIndex, int2 pixelOffset) +{ + int value = (int)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int2 NvEvaluateAttributeAtSampleForVPRS(int2 attrib, uint sampleIndex, int2 pixelOffset) +{ + int2 value = (int2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int3 NvEvaluateAttributeAtSampleForVPRS(int3 attrib, uint sampleIndex, int2 pixelOffset) +{ + int3 value = (int3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int4 NvEvaluateAttributeAtSampleForVPRS(int4 attrib, uint sampleIndex, int2 pixelOffset) +{ + int4 value = (int4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + value.w = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint NvEvaluateAttributeAtSampleForVPRS(uint attrib, uint sampleIndex, int2 pixelOffset) +{ + uint value = (uint)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint2 NvEvaluateAttributeAtSampleForVPRS(uint2 attrib, uint sampleIndex, int2 pixelOffset) +{ + uint2 value = (uint2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint3 NvEvaluateAttributeAtSampleForVPRS(uint3 attrib, uint sampleIndex, int2 pixelOffset) +{ + uint3 value = (uint3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint4 NvEvaluateAttributeAtSampleForVPRS(uint4 attrib, uint sampleIndex, int2 pixelOffset) +{ + uint4 value = (uint4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.x = sampleIndex; + g_NvidiaExt[ext].src2u.xy = pixelOffset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + value.w = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + + +float NvEvaluateAttributeSnappedForVPRS(float attrib, uint2 offset) +{ + float value = (float)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float2 NvEvaluateAttributeSnappedForVPRS(float2 attrib, uint2 offset) +{ + float2 value = (float2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float3 NvEvaluateAttributeSnappedForVPRS(float3 attrib, uint2 offset) +{ + float3 value = (float3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +float4 NvEvaluateAttributeSnappedForVPRS(float4 attrib, uint2 offset) +{ + float4 value = (float4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asfloat(g_NvidiaExt.IncrementCounter()); + value.y = asfloat(g_NvidiaExt.IncrementCounter()); + value.z = asfloat(g_NvidiaExt.IncrementCounter()); + value.w = asfloat(g_NvidiaExt.IncrementCounter()); + return value; +} + +int NvEvaluateAttributeSnappedForVPRS(int attrib, uint2 offset) +{ + int value = (int)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int2 NvEvaluateAttributeSnappedForVPRS(int2 attrib, uint2 offset) +{ + int2 value = (int2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int3 NvEvaluateAttributeSnappedForVPRS(int3 attrib, uint2 offset) +{ + int3 value = (int3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +int4 NvEvaluateAttributeSnappedForVPRS(int4 attrib, uint2 offset) +{ + int4 value = (int4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asint(g_NvidiaExt.IncrementCounter()); + value.y = asint(g_NvidiaExt.IncrementCounter()); + value.z = asint(g_NvidiaExt.IncrementCounter()); + value.w = asint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint NvEvaluateAttributeSnappedForVPRS(uint attrib, uint2 offset) +{ + uint value = (uint)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.x = asuint(attrib.x); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 1; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint2 NvEvaluateAttributeSnappedForVPRS(uint2 attrib, uint2 offset) +{ + uint2 value = (uint2)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 2; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint3 NvEvaluateAttributeSnappedForVPRS(uint3 attrib, uint2 offset) +{ + uint3 value = (uint3)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 3; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +uint4 NvEvaluateAttributeSnappedForVPRS(uint4 attrib, uint2 offset) +{ + uint4 value = (uint4)0; + uint ext = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED; + g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw); + g_NvidiaExt[ext].src1u.xy = offset; + g_NvidiaExt[ext].numOutputsForIncCounter = 4; + value.x = asuint(g_NvidiaExt.IncrementCounter()); + value.y = asuint(g_NvidiaExt.IncrementCounter()); + value.z = asuint(g_NvidiaExt.IncrementCounter()); + value.w = asuint(g_NvidiaExt.IncrementCounter()); + return value; +} + +// MATCH instruction variants +uint NvWaveMatch(uint value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = value; + g_NvidiaExt[index].src1u.x = 1; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(uint2 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = value.xy; + g_NvidiaExt[index].src1u.x = 2; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(uint4 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u = value; + g_NvidiaExt[index].src1u.x = 4; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(float value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = asuint(value); + g_NvidiaExt[index].src1u.x = 1; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(float2 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = asuint(value); + g_NvidiaExt[index].src1u.x = 2; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + +uint NvWaveMatch(float4 value) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u = asuint(value); + g_NvidiaExt[index].src1u.x = 4; + g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY; + // result is returned as the return value of IncrementCounter on fake UAV slot + return g_NvidiaExt.IncrementCounter(); +} + + +//----------------------------------------------------------------------------// +//------------------------------ Footprint functions -------------------------// +//----------------------------------------------------------------------------// +// texSpace and smpSpace must be immediates, texIndex and smpIndex can be variable +// offset must be immediate +// the required components of location and offset fields can be filled depending on the dimension/type of the texture +// texType should be one of 2D or 3D as defined in nvShaderExtnEnums.h and and should be an immediate literal +// if the above restrictions are not met, the behaviour of this instruction is undefined + +uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset); +} + +uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset); +} + + + +uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset); +} + +uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset); +} + + + +uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset); +} + +uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset); +} + + + +uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset); +} + +uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0)) +{ + return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset); +} + +uint NvFootprintExtractLOD(uint4 blob) +{ + return ((blob.w & 0xF000) >> 12); +} + +uint NvFootprintExtractReturnGran(uint4 blob) +{ + return ((blob.z & 0xF000000) >> 24); +} + +uint2 NvFootprintExtractAnchorTileLoc2D(uint4 blob) +{ + uint2 loc; + loc.x = (blob.w & 0xFFF); + loc.y = (blob.z & 0xFFF); + return loc; +} + +uint3 NvFootprintExtractAnchorTileLoc3D(uint4 blob) +{ + uint3 loc; + loc.x = (blob.w & 0xFFF); + loc.y = ((blob.w & 0xFFF0000) >> 16); + loc.z = (blob.z & 0x1FFF); + return loc; +} + +uint2 NvFootprintExtractOffset2D(uint4 blob) +{ + uint2 loc; + loc.x = ((blob.z & 0x070000) >> 16); + loc.y = ((blob.z & 0x380000) >> 19); + return loc; +} + +uint3 NvFootprintExtractOffset3D(uint4 blob) +{ + uint3 loc; + loc.x = ((blob.z & 0x030000) >> 16); + loc.y = ((blob.z & 0x0C0000) >> 18); + loc.z = ((blob.z & 0x300000) >> 20); + return loc; +} + +uint2 NvFootprintExtractBitmask(uint4 blob) +{ + return blob.xy; +} + + +// Variant of Footprint extensions which returns isSingleLod (out parameter) +// isSingleLod = true -> This footprint request touched the texels from only single LOD. +uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + + +uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + + +uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + + +uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + +uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0)) +{ + uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset); + isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED); + return res; +} + + +uint NvActiveThreads() +{ + return NvBallot(1); +} + + +//----------------------------------------------------------------------------// +//------------------------------ WaveMultiPrefix functions -------------------// +//----------------------------------------------------------------------------// + +// Following are the WaveMultiPrefix functions for different operations (Add, Bitand, BitOr, BitXOr) for different datatypes (uint, uint2, uint4) +// This is a set of functions which implement multi-prefix operations among the set of active lanes in the current wave (WARP). +// A multi-prefix operation comprises a set of prefix operations, executed in parallel within subsets of lanes identified with the provided bitmasks. +// These bitmasks represent partitioning of the set of active lanes in the current wave into N groups (where N is the number of unique masks across all lanes in the wave). +// N prefix operations are then performed each within its corresponding group. +// The groups are assumed to be non-intersecting (that is, a given lane can be a member of one and only one group), +// and bitmasks in all lanes belonging to the same group are required to be the same. +// There are 2 type of functions - Exclusive and Inclusive prefix operations. +// e.g. For NvWaveMultiPrefixInclusiveAdd(val, mask) operation - For each of the groups (for which mask input is same) following is the expected output : +// i^th thread in a group has value = sum(values of threads 0 to i) +// For Exclusive version of same opeartion - +// i^th thread in a group has value = sum(values of threads 0 to i-1) and 0th thread in a the Group has value 0 + +// Extensions for Add +uint NvWaveMultiPrefixInclusiveAdd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + // As remainingThreads only has threads in group with smaller thread ids than its own thread-id nextLane can never be 31 for any thread in the group except the smallest one + // For smallest thread in the group, remainingThreads is 0 --> nextLane is ~0 (i.e. considering last 5 bits its 31) + // So passing maskClampValue=30 to __NvShflGeneric, it will return laneValid=false for the smallest thread in the group. So update val and nextLane based on laneValid. + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val + temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveAdd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : 0; + return NvWaveMultiPrefixInclusiveAdd(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveAdd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val + temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveAdd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(0, 0); + return NvWaveMultiPrefixInclusiveAdd(val, mask); +} + +uint4 NvWaveMultiPrefixInclusiveAdd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val + temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveAdd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0); + return NvWaveMultiPrefixInclusiveAdd(val, mask); +} + +// MultiPrefix extensions for Bitand +uint NvWaveMultiPrefixInclusiveAnd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val & temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveAnd(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : ~0; + return NvWaveMultiPrefixInclusiveAnd(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveAnd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val & temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveAnd(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(~0, ~0); + return NvWaveMultiPrefixInclusiveAnd(val, mask); +} + + +uint4 NvWaveMultiPrefixInclusiveAnd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val & temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveAnd(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(~0, ~0, ~0, ~0); + return NvWaveMultiPrefixInclusiveAnd(val, mask); +} + + +// MultiPrefix extensions for BitOr +uint NvWaveMultiPrefixInclusiveOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val | temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : 0; + return NvWaveMultiPrefixInclusiveOr(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val | temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(0, 0); + return NvWaveMultiPrefixInclusiveOr(val, mask); +} + + +uint4 NvWaveMultiPrefixInclusiveOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val | temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0); + return NvWaveMultiPrefixInclusiveOr(val, mask); +} + + +// MultiPrefix extensions for BitXOr +uint NvWaveMultiPrefixInclusiveXOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val ^ temp; + nextLane = newLane; + } + } + return val; +} + +uint NvWaveMultiPrefixExclusiveXOr(uint val, uint mask) +{ + uint temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : 0; + return NvWaveMultiPrefixInclusiveXOr(val, mask); +} + +uint2 NvWaveMultiPrefixInclusiveXOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val ^ temp; + nextLane = newLane; + } + } + return val; +} + +uint2 NvWaveMultiPrefixExclusiveXOr(uint2 val, uint mask) +{ + uint2 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint2(0, 0); + return NvWaveMultiPrefixInclusiveXOr(val, mask); +} + + +uint4 NvWaveMultiPrefixInclusiveXOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint nextLane = firstbithigh(remainingThreads); + for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++) + { + temp = NvShfl(val, nextLane); + uint laneValid; + uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid)); + if (laneValid) // if nextLane's nextLane is valid + { + val = val ^ temp; + nextLane = newLane; + } + } + return val; +} + +uint4 NvWaveMultiPrefixExclusiveXOr(uint4 val, uint mask) +{ + uint4 temp; + uint a = NvActiveThreads(); + uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask; + uint lane = firstbithigh(remainingThreads); + temp = NvShfl(val, lane); + val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0); + return NvWaveMultiPrefixInclusiveXOr(val, mask); +} + +//----------------------------------------------------------------------------// +//------------------------- DXR HitObject Extension --------------------------// +//----------------------------------------------------------------------------// + +// Support for templates in HLSL requires HLSL 2021+. When using dxc, +// use the -HV 2021 command line argument to enable these versions. +#if defined(__HLSL_VERSION) && (__HLSL_VERSION >= 2021) && !defined(NV_HITOBJECT_USE_MACRO_API) + +struct NvHitObject { + uint _handle; + + bool IsMiss() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsNop() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + uint GetInstanceID() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetInstanceIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetPrimitiveIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetGeometryIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetHitKind() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + RayDesc GetRayDesc() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC; + g_NvidiaExt[index].src0u.x = _handle; + + uint tmin = g_NvidiaExt.IncrementCounter(); + uint tmax = g_NvidiaExt.IncrementCounter(); + uint rayOrgX = g_NvidiaExt.IncrementCounter(); + uint rayOrgY = g_NvidiaExt.IncrementCounter(); + uint rayOrgZ = g_NvidiaExt.IncrementCounter(); + uint rayDirX = g_NvidiaExt.IncrementCounter(); + uint rayDirY = g_NvidiaExt.IncrementCounter(); + uint rayDirZ = g_NvidiaExt.IncrementCounter(); + + RayDesc ray; + ray.TMin = asfloat(tmin); + ray.TMax = asfloat(tmax); + ray.Origin.x = asfloat(rayOrgX); + ray.Origin.y = asfloat(rayOrgY); + ray.Origin.z = asfloat(rayOrgZ); + ray.Direction.x = asfloat(rayDirX); + ray.Direction.y = asfloat(rayDirY); + ray.Direction.z = asfloat(rayDirZ); + + return ray; + } + + template + T GetAttributes() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES; + g_NvidiaExt[index].src0u.x = _handle; + uint callHandle = g_NvidiaExt.IncrementCounter(); + + T attrs; + CallShader(callHandle, attrs); + return attrs; + } + + uint GetShaderTableIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes) + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT; + g_NvidiaExt[index].src0u.x = _handle; + g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes; + return g_NvidiaExt.IncrementCounter(); + } +}; + +template +NvHitObject NvTraceRayHitObject( + RaytracingAccelerationStructure AccelerationStructure, + uint RayFlags, + uint InstanceInclusionMask, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + uint MissShaderIndex, + RayDesc Ray, + inout T Payload) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + g_NvidiaExt[index].src0u.x = MissShaderIndex; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + uint traceHandle = g_NvidiaExt.IncrementCounter(); + + TraceRay(AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, traceHandle, Ray, Payload); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +template +NvHitObject NvMakeHit( + RaytracingAccelerationStructure AccelerationStructure, + uint InstanceIndex, + uint GeometryIndex, + uint PrimitiveIndex, + uint HitKind, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + RayDesc Ray, + T Attributes) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + g_NvidiaExt[index].src0u.x = InstanceIndex; + g_NvidiaExt[index].src0u.y = GeometryIndex; + g_NvidiaExt[index].src0u.z = PrimitiveIndex; + g_NvidiaExt[index].src0u.w = HitKind; + g_NvidiaExt[index].src1u.x = RayContributionToHitGroupIndex; + g_NvidiaExt[index].src1u.y = MultiplierForGeometryContributionToHitGroupIndex; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + uint traceHandle = g_NvidiaExt.IncrementCounter(); + + struct AttrWrapper { T Attrs; }; + AttrWrapper wrapper; + wrapper.Attrs = Attributes; + CallShader(traceHandle, wrapper); + + struct DummyPayload { int a; }; + DummyPayload payload; + TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +template +NvHitObject NvMakeHitWithRecordIndex( + uint HitGroupRecordIndex, + RaytracingAccelerationStructure AccelerationStructure, + uint InstanceIndex, + uint GeometryIndex, + uint PrimitiveIndex, + uint HitKind, + RayDesc Ray, + T Attributes) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + g_NvidiaExt[index].src0u.x = InstanceIndex; + g_NvidiaExt[index].src0u.y = GeometryIndex; + g_NvidiaExt[index].src0u.z = PrimitiveIndex; + g_NvidiaExt[index].src0u.w = HitKind; + g_NvidiaExt[index].src1u.x = HitGroupRecordIndex; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + uint traceHandle = g_NvidiaExt.IncrementCounter(); + + struct AttrWrapper { T Attrs; }; + AttrWrapper wrapper; + wrapper.Attrs = Attributes; + CallShader(traceHandle, wrapper); + + struct DummyPayload { int a; }; + DummyPayload payload; + TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +NvHitObject NvMakeMiss( + uint MissShaderIndex, + RayDesc Ray) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS; + g_NvidiaExt[index].src0u.x = MissShaderIndex; + g_NvidiaExt[index].src0u.y = asuint(Ray.TMin); + g_NvidiaExt[index].src0u.z = asuint(Ray.TMax); + g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x); + g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y); + g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z); + g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x); + g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y); + g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z); + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +NvHitObject NvMakeNop() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 0; + g_NvidiaExt[index].src0u.y = 0; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 1; + g_NvidiaExt[index].src0u.y = HitObj._handle; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj) +{ + NvReorderThread(HitObj, 0, 0); +} + +template +void NvInvokeHitObject( + RaytracingAccelerationStructure AccelerationStructure, + NvHitObject HitObj, + inout T Payload) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE; + g_NvidiaExt[index].src0u.x = HitObj._handle; + uint handle = g_NvidiaExt.IncrementCounter(); + + TraceRay(AccelerationStructure, 0, 0, 0, 0, handle, (RayDesc)0, Payload); +} + +// Macro-based version of the HitObject API. Use this when HLSL 2021 is not available. +// Enable by specifying #define NV_HITOBJECT_USE_MACRO_API before including this header. +#elif defined(NV_HITOBJECT_USE_MACRO_API) + +struct NvHitObject { + uint _handle; + + bool IsMiss() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsHit() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + bool IsNop() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP; + g_NvidiaExt[index].src0u.x = _handle; + uint ret = g_NvidiaExt.IncrementCounter(); + return ret != 0; + } + + uint GetInstanceID() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetInstanceIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetPrimitiveIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetGeometryIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint GetHitKind() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + RayDesc GetRayDesc() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC; + g_NvidiaExt[index].src0u.x = _handle; + + uint tmin = g_NvidiaExt.IncrementCounter(); + uint tmax = g_NvidiaExt.IncrementCounter(); + uint rayOrgX = g_NvidiaExt.IncrementCounter(); + uint rayOrgY = g_NvidiaExt.IncrementCounter(); + uint rayOrgZ = g_NvidiaExt.IncrementCounter(); + uint rayDirX = g_NvidiaExt.IncrementCounter(); + uint rayDirY = g_NvidiaExt.IncrementCounter(); + uint rayDirZ = g_NvidiaExt.IncrementCounter(); + + RayDesc ray; + ray.TMin = asfloat(tmin); + ray.TMax = asfloat(tmax); + ray.Origin.x = asfloat(rayOrgX); + ray.Origin.y = asfloat(rayOrgY); + ray.Origin.z = asfloat(rayOrgZ); + ray.Direction.x = asfloat(rayDirX); + ray.Direction.y = asfloat(rayDirY); + ray.Direction.z = asfloat(rayDirZ); + + return ray; + } + + uint GetShaderTableIndex() + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX; + g_NvidiaExt[index].src0u.x = _handle; + return g_NvidiaExt.IncrementCounter(); + } + + uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes) + { + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT; + g_NvidiaExt[index].src0u.x = _handle; + g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes; + return g_NvidiaExt.IncrementCounter(); + } +}; + +#define NvTraceRayHitObject(AccelerationStructure,RayFlags,InstanceInclusionMask,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex,Ray,Payload,ResultHitObj) \ +do { \ + uint _rayFlags = RayFlags; \ + uint _instanceInclusionMask = InstanceInclusionMask; \ + uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \ + uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \ + uint _missShaderIndex = MissShaderIndex; \ + RayDesc _ray = Ray; \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY; \ + g_NvidiaExt[_index].numOutputsForIncCounter = 2; \ + g_NvidiaExt[_index].src0u.x = _missShaderIndex; \ + uint _hitHandle = g_NvidiaExt.IncrementCounter(); \ + uint _traceHandle = g_NvidiaExt.IncrementCounter(); \ + TraceRay(AccelerationStructure, _rayFlags, _instanceInclusionMask, _rayContributionToHitGroupIndex, _multiplierForGeometryContributionToHitGroupIndex, _traceHandle, _ray, Payload); \ + ResultHitObj._handle = _hitHandle; \ +} while(0) + +struct NvHitObjectMacroDummyPayloadType { int a; }; + +#define NvMakeHit(AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,Ray,Attributes,ResultHitObj) \ +do { \ + uint _instanceIndex = InstanceIndex; \ + uint _geometryIndex = GeometryIndex; \ + uint _primitiveIndex = PrimitiveIndex; \ + uint _hitKind = HitKind; \ + uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \ + uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \ + RayDesc _ray = Ray; \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT; \ + g_NvidiaExt[_index].numOutputsForIncCounter = 2; \ + g_NvidiaExt[_index].src0u.x = _instanceIndex; \ + g_NvidiaExt[_index].src0u.y = _geometryIndex; \ + g_NvidiaExt[_index].src0u.z = _primitiveIndex; \ + g_NvidiaExt[_index].src0u.w = _hitKind; \ + g_NvidiaExt[_index].src1u.x = _rayContributionToHitGroupIndex; \ + g_NvidiaExt[_index].src1u.y = _multiplierForGeometryContributionToHitGroupIndex; \ + uint _hitHandle = g_NvidiaExt.IncrementCounter(); \ + uint _traceHandle = g_NvidiaExt.IncrementCounter(); \ + CallShader(_traceHandle, Attributes); \ + NvHitObjectMacroDummyPayloadType _payload; \ + TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \ + ResultHitObj._handle = _hitHandle; \ +} while(0) + +#define NvMakeHitWithRecordIndex(HitGroupRecordIndex,AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,Ray,Attributes,ResultHitObj) \ +do { \ + uint _hitGroupRecordIndex = HitGroupRecordIndex; \ + uint _instanceIndex = InstanceIndex; \ + uint _geometryIndex = GeometryIndex; \ + uint _primitiveIndex = PrimitiveIndex; \ + uint _hitKind = HitKind; \ + RayDesc _ray = Ray; \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX; \ + g_NvidiaExt[_index].numOutputsForIncCounter = 2; \ + g_NvidiaExt[_index].src0u.x = _instanceIndex; \ + g_NvidiaExt[_index].src0u.y = _geometryIndex; \ + g_NvidiaExt[_index].src0u.z = _primitiveIndex; \ + g_NvidiaExt[_index].src0u.w = _hitKind; \ + g_NvidiaExt[_index].src1u.x = _hitGroupRecordIndex; \ + uint _hitHandle = g_NvidiaExt.IncrementCounter(); \ + uint _traceHandle = g_NvidiaExt.IncrementCounter(); \ + CallShader(_traceHandle, Attributes); \ + NvHitObjectMacroDummyPayloadType _payload; \ + TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \ + ResultHitObj._handle = _hitHandle; \ +} while(0) + +NvHitObject NvMakeMiss( + uint MissShaderIndex, + RayDesc Ray) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS; + g_NvidiaExt[index].src0u.x = MissShaderIndex; + g_NvidiaExt[index].src0u.y = asuint(Ray.TMin); + g_NvidiaExt[index].src0u.z = asuint(Ray.TMax); + g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x); + g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y); + g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z); + g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x); + g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y); + g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z); + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +NvHitObject NvMakeNop() +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP; + uint hitHandle = g_NvidiaExt.IncrementCounter(); + + NvHitObject hitObj; + hitObj._handle = hitHandle; + return hitObj; +} + +#define NvGetAttributesFromHitObject(HitObj,ResultAttributes) \ +do { \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES; \ + g_NvidiaExt[_index].src0u.x = HitObj._handle; \ + uint _callHandle = g_NvidiaExt.IncrementCounter(); \ + CallShader(_callHandle, ResultAttributes); \ +} while(0) + +void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 0; + g_NvidiaExt[index].src0u.y = 0; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD; + g_NvidiaExt[index].src0u.x = 1; + g_NvidiaExt[index].src0u.y = HitObj._handle; + g_NvidiaExt[index].src0u.z = CoherenceHint; + g_NvidiaExt[index].src0u.w = NumCoherenceHintBits; + g_NvidiaExt.IncrementCounter(); +} + +void NvReorderThread(NvHitObject HitObj) +{ + NvReorderThread(HitObj, 0, 0); +} + +#define NvInvokeHitObject(AccelerationStructure,HitObj,Payload) \ +do { \ + uint _index = g_NvidiaExt.IncrementCounter(); \ + g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE; \ + g_NvidiaExt[_index].src0u.x = HitObj._handle; \ + uint _handle = g_NvidiaExt.IncrementCounter(); \ + TraceRay(AccelerationStructure, 0, 0, 0, 0, _handle, (RayDesc)0, Payload); \ +} while(0) + +#endif diff --git a/vendor/nvapi/nvHLSLExtnsInternal.h b/vendor/nvapi/nvHLSLExtnsInternal.h new file mode 100644 index 0000000000..c5936a5e0d --- /dev/null +++ b/vendor/nvapi/nvHLSLExtnsInternal.h @@ -0,0 +1,767 @@ + /************************************************************************************************************************************\ +|* *| +|* Copyright © 2012 NVIDIA Corporation. All rights reserved. *| +|* *| +|* NOTICE TO USER: *| +|* *| +|* This software is subject to NVIDIA ownership rights under U.S. and international Copyright laws. *| +|* *| +|* This software and the information contained herein are PROPRIETARY and CONFIDENTIAL to NVIDIA *| +|* and are being provided solely under the terms and conditions of an NVIDIA software license agreement. *| +|* Otherwise, you have no rights to use or access this software in any manner. *| +|* *| +|* If not covered by the applicable NVIDIA software license agreement: *| +|* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOFTWARE FOR ANY PURPOSE. *| +|* IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. *| +|* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, *| +|* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. *| +|* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, *| +|* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, *| +|* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE. *| +|* *| +|* U.S. Government End Users. *| +|* This software is a "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 1995), *| +|* consisting of "commercial computer software" and "commercial computer software documentation" *| +|* as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government only as a commercial end item. *| +|* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), *| +|* all U.S. Government End Users acquire the software with only those rights set forth herein. *| +|* *| +|* Any use of this software in individual and commercial software must include, *| +|* in the user documentation and internal comments to the code, *| +|* the above Disclaimer (as applicable) and U.S. Government End Users Notice. *| +|* *| + \************************************************************************************************************************************/ + +////////////////////////// NVIDIA SHADER EXTENSIONS ///////////////// +// internal functions +// Functions in this file are not expected to be called by apps directly + +#include "nvShaderExtnEnums.h" + +struct NvShaderExtnStruct +{ + uint opcode; // opcode + uint rid; // resource ID + uint sid; // sampler ID + + uint4 dst1u; // destination operand 1 (for instructions that need extra destination operands) + uint4 src3u; // source operand 3 + uint4 src4u; // source operand 4 + uint4 src5u; // source operand 5 + + uint4 src0u; // uint source operand 0 + uint4 src1u; // uint source operand 0 + uint4 src2u; // uint source operand 0 + uint4 dst0u; // uint destination operand + + uint markUavRef; // the next store to UAV is fake and is used only to identify the uav slot + uint numOutputsForIncCounter; // Used for output to IncrementCounter + float padding1[27]; // struct size: 256 bytes +}; + +// RW structured buffer for Nvidia shader extensions + +// Application needs to define NV_SHADER_EXTN_SLOT as a unused slot, which should be +// set using NvAPI_D3D11_SetNvShaderExtnSlot() call before creating the first shader that +// uses nvidia shader extensions. E.g before including this file in shader define it as: +// #define NV_SHADER_EXTN_SLOT u7 + +// For SM5.1, application needs to define NV_SHADER_EXTN_REGISTER_SPACE as register space +// E.g. before including this file in shader define it as: +// #define NV_SHADER_EXTN_REGISTER_SPACE space2 + +// Note that other operations to this UAV will be ignored so application +// should bind a null resource + +#ifdef NV_SHADER_EXTN_REGISTER_SPACE +RWStructuredBuffer g_NvidiaExt : register( NV_SHADER_EXTN_SLOT, NV_SHADER_EXTN_REGISTER_SPACE ); +#else +RWStructuredBuffer g_NvidiaExt : register( NV_SHADER_EXTN_SLOT ); +#endif + +//----------------------------------------------------------------------------// +// the exposed SHFL instructions accept a mask parameter in src2 +// To compute lane mask from width of segment: +// minLaneID : currentLaneId & src2[12:8] +// maxLaneID : minLaneId | (src2[4:0] & ~src2[12:8]) +// where [minLaneId, maxLaneId] defines the segment where currentLaneId belongs +// we always set src2[4:0] to 11111 (0x1F), and set src2[12:8] as (32 - width) +int __NvGetShflMaskFromWidth(uint width) +{ + return ((NV_WARP_SIZE - width) << 8) | 0x1F; +} + +//----------------------------------------------------------------------------// + +void __NvReferenceUAVForOp(RWByteAddressBuffer uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav.Store(index, 0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = float2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = float2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = float2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = float4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = float4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = float4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = 0.0f; +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = 0.0f; +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = 0.0f; +} + + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = uint2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = uint2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = uint2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = uint4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = uint4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = uint4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = 0; +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = 0; +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = 0; +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = int2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = int2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = int2(0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = int4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = int4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = int4(0,0,0,0); +} + +void __NvReferenceUAVForOp(RWTexture1D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[index] = 0; +} + +void __NvReferenceUAVForOp(RWTexture2D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint2(index,index)] = 0; +} + +void __NvReferenceUAVForOp(RWTexture3D uav) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].markUavRef = 1; + uav[uint3(index,index,index)] = 0; +} + +//----------------------------------------------------------------------------// +// ATOMIC op sub-opcodes +#define NV_EXTN_ATOM_AND 0 +#define NV_EXTN_ATOM_OR 1 +#define NV_EXTN_ATOM_XOR 2 + +#define NV_EXTN_ATOM_ADD 3 +#define NV_EXTN_ATOM_MAX 6 +#define NV_EXTN_ATOM_MIN 7 + +#define NV_EXTN_ATOM_SWAP 8 +#define NV_EXTN_ATOM_CAS 9 + +//----------------------------------------------------------------------------// + +// performs Atomic operation on two consecutive fp16 values in the given UAV +// the uint paramater 'fp16x2Val' is treated as two fp16 values +// the passed sub-opcode 'op' should be an immediate constant +// byteAddress must be multiple of 4 +// the returned value are the two fp16 values packed into a single uint +uint __NvAtomicOpFP16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +//----------------------------------------------------------------------------// + +// performs Atomic operation on a R16G16_FLOAT UAV at the given address +// the uint paramater 'fp16x2Val' is treated as two fp16 values +// the passed sub-opcode 'op' should be an immediate constant +// the returned value are the two fp16 values (.x and .y components) packed into a single uint +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16_FLOAT format (might result in app crash or TDR) + +uint __NvAtomicOpFP16x2(RWTexture1D uav, uint address, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +uint __NvAtomicOpFP16x2(RWTexture2D uav, uint2 address, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +uint __NvAtomicOpFP16x2(RWTexture3D uav, uint3 address, uint fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.x = fp16x2Val; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + + return g_NvidiaExt[index].dst0u.x; +} + +//----------------------------------------------------------------------------// + +// performs Atomic operation on a R16G16B16A16_FLOAT UAV at the given address +// the uint2 paramater 'fp16x2Val' is treated as four fp16 values +// i.e, fp16x2Val.x = uav.xy and fp16x2Val.y = uav.yz +// the passed sub-opcode 'op' should be an immediate constant +// the returned value are the four fp16 values (.xyzw components) packed into uint2 +// Warning: Behaviour of these set of functions is undefined if the UAV is not +// of R16G16B16A16_FLOAT format (might result in app crash or TDR) + +uint2 __NvAtomicOpFP16x2(RWTexture1D uav, uint address, uint2 fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + // break it down into two fp16x2 atomic ops + uint2 retVal; + + // first op has x-coordinate = x * 2 + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address * 2; + g_NvidiaExt[index].src1u.x = fp16x2Val.x; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.x = g_NvidiaExt[index].dst0u.x; + + // second op has x-coordinate = x * 2 + 1 + index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address * 2 + 1; + g_NvidiaExt[index].src1u.x = fp16x2Val.y; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.y = g_NvidiaExt[index].dst0u.x; + + return retVal; +} + +uint2 __NvAtomicOpFP16x2(RWTexture2D uav, uint2 address, uint2 fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + // break it down into two fp16x2 atomic ops + uint2 retVal; + + // first op has x-coordinate = x * 2 + uint2 addressTemp = uint2(address.x * 2, address.y); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.x; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.x = g_NvidiaExt[index].dst0u.x; + + // second op has x-coordinate = x * 2 + 1 + addressTemp.x++; + index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.y; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.y = g_NvidiaExt[index].dst0u.x; + + return retVal; +} + +uint2 __NvAtomicOpFP16x2(RWTexture3D uav, uint3 address, uint2 fp16x2Val, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + // break it down into two fp16x2 atomic ops + uint2 retVal; + + // first op has x-coordinate = x * 2 + uint3 addressTemp = uint3(address.x * 2, address.y, address.z); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.x; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.x = g_NvidiaExt[index].dst0u.x; + + // second op has x-coordinate = x * 2 + 1 + addressTemp.x++; + index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = addressTemp; + g_NvidiaExt[index].src1u.x = fp16x2Val.y; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP16_ATOMIC; + retVal.y = g_NvidiaExt[index].dst0u.x; + + return retVal; +} + +uint __fp32x2Tofp16x2(float2 val) +{ + return (f32tof16(val.y)<<16) | f32tof16(val.x) ; +} + +uint2 __fp32x4Tofp16x4(float4 val) +{ + return uint2( (f32tof16(val.y)<<16) | f32tof16(val.x), (f32tof16(val.w)<<16) | f32tof16(val.z) ) ; +} + +//----------------------------------------------------------------------------// + +// FP32 Atomic functions +// performs Atomic operation treating the uav as float (fp32) values +// the passed sub-opcode 'op' should be an immediate constant +// byteAddress must be multiple of 4 +float __NvAtomicAddFP32(RWByteAddressBuffer uav, uint byteAddress, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.x = asuint(val); // passing as uint to make it more convinient for the driver to translate + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +float __NvAtomicAddFP32(RWTexture1D uav, uint address, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.x = asuint(val); + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +float __NvAtomicAddFP32(RWTexture2D uav, uint2 address, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.x = asuint(val); + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +float __NvAtomicAddFP32(RWTexture3D uav, uint3 address, float val) +{ + __NvReferenceUAVForOp(uav); + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.x = asuint(val); + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_ADD; + g_NvidiaExt[index].opcode = NV_EXTN_OP_FP32_ATOMIC; + + return asfloat(g_NvidiaExt[index].dst0u.x); +} + +//----------------------------------------------------------------------------// + +// UINT64 Atmoic Functions +// The functions below performs atomic operation on the given UAV treating the value as uint64 +// byteAddress must be multiple of 8 +// The returned value is the value present in memory location before the atomic operation +// uint2 vector type is used to represent a single uint64 value with the x component containing the low 32 bits and y component the high 32 bits. + +uint2 __NvAtomicCompareExchangeUINT64(RWByteAddressBuffer uav, uint byteAddress, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWByteAddressBuffer uav, uint byteAddress, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = byteAddress; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicCompareExchangeUINT64(RWTexture1D uav, uint address, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWTexture1D uav, uint address, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = address; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicCompareExchangeUINT64(RWTexture2D uav, uint2 address, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWTexture2D uav, uint2 address, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xy = address; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicCompareExchangeUINT64(RWTexture3D uav, uint3 address, uint2 compareValue, uint2 value) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.xy = compareValue; + g_NvidiaExt[index].src1u.zw = value; + g_NvidiaExt[index].src2u.x = NV_EXTN_ATOM_CAS; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + +uint2 __NvAtomicOpUINT64(RWTexture3D uav, uint3 address, uint2 value, uint atomicOpType) +{ + __NvReferenceUAVForOp(uav); + + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.xyz = address; + g_NvidiaExt[index].src1u.xy = value; + g_NvidiaExt[index].src2u.x = atomicOpType; + g_NvidiaExt[index].opcode = NV_EXTN_OP_UINT64_ATOMIC; + + return g_NvidiaExt[index].dst0u.xy; +} + + +uint4 __NvFootprint(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +uint4 __NvFootprintBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float bias, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src2u.x = asuint(bias); + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_BIAS; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +uint4 __NvFootprintLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float lodLevel, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src2u.x = asuint(lodLevel); + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_LEVEL; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +uint4 __NvFootprintGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint footprintmode, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0)) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = texIndex; + g_NvidiaExt[index].src0u.y = smpIndex; + g_NvidiaExt[index].src1u.xyz = asuint(location); + g_NvidiaExt[index].src1u.w = gran; + g_NvidiaExt[index].src2u.xyz = asuint(ddx); + g_NvidiaExt[index].src5u.xyz = asuint(ddy); + g_NvidiaExt[index].src3u.x = texSpace; + g_NvidiaExt[index].src3u.y = smpSpace; + g_NvidiaExt[index].src3u.z = texType; + g_NvidiaExt[index].src3u.w = footprintmode; + g_NvidiaExt[index].src4u.xyz = asuint(offset); + g_NvidiaExt[index].opcode = NV_EXTN_OP_FOOTPRINT_GRAD; + g_NvidiaExt[index].numOutputsForIncCounter = 4; + + // result is returned as the return value of IncrementCounter on fake UAV slot + uint4 op; + op.x = g_NvidiaExt.IncrementCounter(); + op.y = g_NvidiaExt.IncrementCounter(); + op.z = g_NvidiaExt.IncrementCounter(); + op.w = g_NvidiaExt.IncrementCounter(); + return op; +} + +// returns value of special register - specify subopcode from any of NV_SPECIALOP_* specified in nvShaderExtnEnums.h - other opcodes undefined behavior +uint __NvGetSpecial(uint subOpCode) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SPECIAL; + g_NvidiaExt[index].src0u.x = subOpCode; + return g_NvidiaExt.IncrementCounter(); +} + +// predicate is returned in laneValid indicating if srcLane is in range and val from specified lane is returned. +int __NvShflGeneric(int val, uint srcLane, uint maskClampVal, out uint laneValid) +{ + uint index = g_NvidiaExt.IncrementCounter(); + g_NvidiaExt[index].src0u.x = val; // variable to be shuffled + g_NvidiaExt[index].src0u.y = srcLane; // source lane + g_NvidiaExt[index].src0u.z = maskClampVal; + g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_GENERIC; + g_NvidiaExt[index].numOutputsForIncCounter = 2; + + laneValid = asuint(g_NvidiaExt.IncrementCounter()); + return g_NvidiaExt.IncrementCounter(); +} \ No newline at end of file diff --git a/vendor/nvapi/nvShaderExtnEnums.h b/vendor/nvapi/nvShaderExtnEnums.h new file mode 100644 index 0000000000..cfa918b3e7 --- /dev/null +++ b/vendor/nvapi/nvShaderExtnEnums.h @@ -0,0 +1,141 @@ + /************************************************************************************************************************************\ +|* *| +|* Copyright © 2012 NVIDIA Corporation. All rights reserved. *| +|* *| +|* NOTICE TO USER: *| +|* *| +|* This software is subject to NVIDIA ownership rights under U.S. and international Copyright laws. *| +|* *| +|* This software and the information contained herein are PROPRIETARY and CONFIDENTIAL to NVIDIA *| +|* and are being provided solely under the terms and conditions of an NVIDIA software license agreement. *| +|* Otherwise, you have no rights to use or access this software in any manner. *| +|* *| +|* If not covered by the applicable NVIDIA software license agreement: *| +|* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOFTWARE FOR ANY PURPOSE. *| +|* IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. *| +|* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, *| +|* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. *| +|* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, *| +|* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, *| +|* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE. *| +|* *| +|* U.S. Government End Users. *| +|* This software is a "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 1995), *| +|* consisting of "commercial computer software" and "commercial computer software documentation" *| +|* as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government only as a commercial end item. *| +|* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), *| +|* all U.S. Government End Users acquire the software with only those rights set forth herein. *| +|* *| +|* Any use of this software in individual and commercial software must include, *| +|* in the user documentation and internal comments to the code, *| +|* the above Disclaimer (as applicable) and U.S. Government End Users Notice. *| +|* *| + \************************************************************************************************************************************/ + +//////////////////////////////////////////////////////////////////////////////// +////////////////////////// NVIDIA SHADER EXTENSIONS //////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// This file can be included both from HLSL shader code as well as C++ code. +// The app should call NvAPI_D3D11_IsNvShaderExtnOpCodeSupported() / NvAPI_D3D12_IsNvShaderExtnOpCodeSupported() +// to check for support for every nv shader extension opcode it plans to use + + + +//----------------------------------------------------------------------------// +//---------------------------- NV Shader Extn Version -----------------------// +//----------------------------------------------------------------------------// +#define NV_SHADER_EXTN_VERSION 1 + +//----------------------------------------------------------------------------// +//---------------------------- Misc constants --------------------------------// +//----------------------------------------------------------------------------// +#define NV_WARP_SIZE 32 +#define NV_WARP_SIZE_LOG2 5 + +//----------------------------------------------------------------------------// +//---------------------------- opCode constants ------------------------------// +//----------------------------------------------------------------------------// + + +#define NV_EXTN_OP_SHFL 1 +#define NV_EXTN_OP_SHFL_UP 2 +#define NV_EXTN_OP_SHFL_DOWN 3 +#define NV_EXTN_OP_SHFL_XOR 4 + +#define NV_EXTN_OP_VOTE_ALL 5 +#define NV_EXTN_OP_VOTE_ANY 6 +#define NV_EXTN_OP_VOTE_BALLOT 7 + +#define NV_EXTN_OP_GET_LANE_ID 8 +#define NV_EXTN_OP_FP16_ATOMIC 12 +#define NV_EXTN_OP_FP32_ATOMIC 13 + +#define NV_EXTN_OP_GET_SPECIAL 19 + +#define NV_EXTN_OP_UINT64_ATOMIC 20 + +#define NV_EXTN_OP_MATCH_ANY 21 + +// FOOTPRINT - For Sample and SampleBias +#define NV_EXTN_OP_FOOTPRINT 28 +#define NV_EXTN_OP_FOOTPRINT_BIAS 29 + +#define NV_EXTN_OP_GET_SHADING_RATE 30 + +// FOOTPRINT - For SampleLevel and SampleGrad +#define NV_EXTN_OP_FOOTPRINT_LEVEL 31 +#define NV_EXTN_OP_FOOTPRINT_GRAD 32 + +// SHFL Generic +#define NV_EXTN_OP_SHFL_GENERIC 33 + +#define NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE 51 +#define NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED 52 + +// HitObject API +#define NV_EXTN_OP_HIT_OBJECT_TRACE_RAY 67 +#define NV_EXTN_OP_HIT_OBJECT_MAKE_HIT 68 +#define NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX 69 +#define NV_EXTN_OP_HIT_OBJECT_MAKE_MISS 70 +#define NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD 71 +#define NV_EXTN_OP_HIT_OBJECT_INVOKE 72 +#define NV_EXTN_OP_HIT_OBJECT_IS_MISS 73 +#define NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID 74 +#define NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX 75 +#define NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX 76 +#define NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX 77 +#define NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND 78 +#define NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC 79 +#define NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES 80 +#define NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX 81 +#define NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT 82 +#define NV_EXTN_OP_HIT_OBJECT_IS_HIT 83 +#define NV_EXTN_OP_HIT_OBJECT_IS_NOP 84 +#define NV_EXTN_OP_HIT_OBJECT_MAKE_NOP 85 + +//----------------------------------------------------------------------------// +//-------------------- GET_SPECIAL subOpCode constants -----------------------// +//----------------------------------------------------------------------------// +#define NV_SPECIALOP_THREADLTMASK 4 +#define NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED 5 +#define NV_SPECIALOP_GLOBAL_TIMER_LO 9 +#define NV_SPECIALOP_GLOBAL_TIMER_HI 10 + +//----------------------------------------------------------------------------// +//----------------------------- Texture Types -------------------------------// +//----------------------------------------------------------------------------// +#define NV_EXTN_TEXTURE_1D 2 +#define NV_EXTN_TEXTURE_1D_ARRAY 3 +#define NV_EXTN_TEXTURE_2D 4 +#define NV_EXTN_TEXTURE_2D_ARRAY 5 +#define NV_EXTN_TEXTURE_3D 6 +#define NV_EXTN_TEXTURE_CUBE 7 +#define NV_EXTN_TEXTURE_CUBE_ARRAY 8 + + +//---------------------------------------------------------------------------// +//----------------FOOTPRINT Enums for NvFootprint* extns---------------------// +//---------------------------------------------------------------------------// +#define NV_EXTN_FOOTPRINT_MODE_FINE 0 +#define NV_EXTN_FOOTPRINT_MODE_COARSE 1 diff --git a/vendor/nvapi/nvapi.h b/vendor/nvapi/nvapi.h index ffdfe03a19..e5354f6b83 100644 --- a/vendor/nvapi/nvapi.h +++ b/vendor/nvapi/nvapi.h @@ -41,7 +41,7 @@ /////////////////////////////////////////////////////////////////////////////// // -// Date: Jan 6, 2022 +// Date: Nov 4, 2022 // File: nvapi.h // // NvAPI provides an interface to NVIDIA devices. This file contains the @@ -717,7 +717,6 @@ typedef struct //! \ingroup dispcontrol #define NV_VIEW_TARGET_INFO_VER MAKE_NVAPI_VERSION(NV_VIEW_TARGET_INFO,2) - //! \ingroup dispcontrol __nvapi_deprecated_function("Do not use this function - it is deprecated in release 290. Instead, use NvAPI_DISP_SetDisplayConfig.") NVAPI_INTERFACE NvAPI_SetView(NvDisplayHandle hNvDisplay, NV_VIEW_TARGET_INFO *pTargetInfo, NV_TARGET_VIEW_MODE targetView); @@ -1768,10 +1767,10 @@ NVAPI_INTERFACE NvAPI_GetGPUIDfromPhysicalGPU(NvPhysicalGpuHandle hPhysicalGpu, //! //! \since Release: 170 //! -//! RETURN STATUS: NVAPI_INVALID_ARGUMENT: pCount is NULL -//! NVAPI_OK: *pCount is set -//! NVAPI_NVIDIA_DEVICE_NOT_FOUND: no NVIDIA GPU driving a display was found -//! NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +//! \retval NVAPI_INVALID_ARGUMENT: pCount is NULL +//! \retval NVAPI_OK: *pCount is set +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND: no NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle //! //! \ingroup gpu /////////////////////////////////////////////////////////////////////////////// @@ -1920,7 +1919,7 @@ typedef struct _NV_GPU_DISPLAYIDS NvU32 isWFD : 1; //!< Deprecated. Will always return 0. NvU32 isConnected : 1; //!< if bit is set, then this display is connected - NvU32 reservedInternal :10; //!< Do not use + NvU32 reservedInternal : 10; //!< Do not use NvU32 isPhysicallyConnected : 1; //!< if bit is set, then this display is a phycially connected display; Valid only when isConnected bit is set NvU32 reserved : 14; //!< must be zero } NV_GPU_DISPLAYIDS; @@ -1936,29 +1935,31 @@ typedef struct _NV_GPU_DISPLAYIDS // // FUNCTION NAME: NvAPI_GPU_GetConnectedDisplayIds // -//! \code +//! //! DESCRIPTION: Due to space limitation NvAPI_GPU_GetConnectedOutputs can return maximum 32 devices, but //! this is no longer true for DPMST. NvAPI_GPU_GetConnectedDisplayIds will return all //! the connected display devices in the form of displayIds for the associated hPhysicalGpu. //! This function can accept set of flags to request cached, uncached, sli and lid to get the connected devices. //! Default value for flags will be cached . +//! //! HOW TO USE: 1) for each PhysicalGpu, make a call to get the number of connected displayId's //! using NvAPI_GPU_GetConnectedDisplayIds by passing the pDisplayIds as NULL //! On call success: +//! //! 2) If pDisplayIdCount is greater than 0, allocate memory based on pDisplayIdCount. Then make a call NvAPI_GPU_GetConnectedDisplayIds to populate DisplayIds. //! However, if pDisplayIdCount is 0, do not make this call. //! SUPPORTED OS: Windows 7 and higher //! -//! PARAMETERS: hPhysicalGpu (IN) - GPU selection -//! flags (IN) - One or more defines from NV_GPU_CONNECTED_IDS_FLAG_* as valid flags. -//! pDisplayIds (IN/OUT) - Pointer to an NV_GPU_DISPLAYIDS struct, each entry represents a one displayID and its attributes -//! pDisplayIdCount(OUT)- Number of displayId's. +//! \param [in] hPhysicalGpu - GPU selection +//! \param [in] flags - One or more defines from NV_GPU_CONNECTED_IDS_FLAG_* as valid flags. +//! \param [in,out] pDisplayIds - Pointer to an NV_GPU_DISPLAYIDS struct, each entry represents a one displayID and its attributes +//! \param [in] pDisplayIdCount - Number of displayId's. //! -//! RETURN STATUS: NVAPI_INVALID_ARGUMENT: hPhysicalGpu or pDisplayIds or pDisplayIdCount is NULL -//! NVAPI_OK: *pDisplayIds contains a set of GPU-output identifiers -//! NVAPI_NVIDIA_DEVICE_NOT_FOUND: no NVIDIA GPU driving a display was found -//! NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle -//! \endcode +//! \retval NVAPI_INVALID_ARGUMENT: hPhysicalGpu or pDisplayIds or pDisplayIdCount is NULL +//! \retval NVAPI_OK: *pDisplayIds contains a set of GPU-output identifiers +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND: no NVIDIA GPU driving a display was found +//! \retval NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE: hPhysicalGpu was not a physical GPU handle +//! //! \ingroup gpu /////////////////////////////////////////////////////////////////////////////// NVAPI_INTERFACE NvAPI_GPU_GetConnectedDisplayIds(__in NvPhysicalGpuHandle hPhysicalGpu, __inout_ecount_part_opt(*pDisplayIdCount, *pDisplayIdCount) NV_GPU_DISPLAYIDS* pDisplayIds, __inout NvU32* pDisplayIdCount, __in NvU32 flags); @@ -2644,6 +2645,26 @@ NVAPI_INTERFACE NvAPI_GPU_GetBoardInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_BOAR +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetRamBusWidth +// +//! This function returns the width of the GPU's RAM memory bus. +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! \since Release: 100 +//! +//! \return NVAPI_ERROR or NVAPI_OK +//! \ingroup gpu +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetRamBusWidth(NvPhysicalGpuHandle hPhysicalGpu, NvU32 *pBusWidth); + + + //! Used in NvAPI_GPU_GetArchInfo() typedef struct { @@ -2681,6 +2702,7 @@ typedef enum _NV_GPU_ARCHITECTURE_ID NV_GPU_ARCHITECTURE_GV110 = 0x00000150, NV_GPU_ARCHITECTURE_TU100 = 0x00000160, NV_GPU_ARCHITECTURE_GA100 = 0x00000170, + NV_GPU_ARCHITECTURE_AD100 = 0x00000190, }NV_GPU_ARCHITECTURE_ID; @@ -2779,6 +2801,10 @@ typedef enum _NV_GPU_ARCH_IMPLEMENTATION_ID NV_GPU_ARCH_IMPLEMENTATION_GA102 = 0x00000002, NV_GPU_ARCH_IMPLEMENTATION_GA104 = 0x00000004, + NV_GPU_ARCH_IMPLEMENTATION_AD102 = 0x00000002, + NV_GPU_ARCH_IMPLEMENTATION_AD103 = 0x00000003, + NV_GPU_ARCH_IMPLEMENTATION_AD104 = 0x00000004, + }NV_GPU_ARCH_IMPLEMENTATION_ID; typedef enum _NV_GPU_CHIP_REVISION @@ -2854,7 +2880,7 @@ NVAPI_INTERFACE NvAPI_GPU_GetArchInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_A // These APIs allow I2C access only to DDC monitors -//! \addtogroup i2capi +//! \ingroup i2capi //! @{ #define NVAPI_MAX_SIZEOF_I2C_DATA_BUFFER 4096 #define NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS 4 @@ -3167,7 +3193,7 @@ NVAPI_INTERFACE NvAPI_GPU_GetHDCPSupportStatus(NvPhysicalGpuHandle hPhysicalGpu, -//! \addtogroup gpucuda +//! \ingroup gpucuda //! @{ //! defgroup nvcomp_gpu_top NVIDIA Compute GPU Topology Flags @@ -3235,7 +3261,7 @@ typedef NV_COMPUTE_GPU_TOPOLOGY_V2 NV_COMPUTE_GPU_TOPOLOGY; //! //! \since Release: 180 //! -//! \param [inout] pComputeTopo Pointer to the structure NV_COMPUTE_GPU_TOPOLOGY. +//! \param [in,out] pComputeTopo Pointer to the structure NV_COMPUTE_GPU_TOPOLOGY. //! //! \retval ::NVAPI_OK The request was completed successfully. //! The gpuCount indicates if one or more compute-capable GPUs are found. @@ -3544,7 +3570,7 @@ typedef struct void *callbackParam; //!< This value will be passed back to the callback function when an event occurs union { - NVAPI_CALLBACK_QSYNCEVENT nvQSYNCEventCallback; //!< Callback function pointer for QSYNC events + NVAPI_CALLBACK_QSYNCEVENT nvQSYNCEventCallback; //!< Callback function pointer for QSYNC events }nvCallBackFunc; } NV_EVENT_REGISTER_CALLBACK, *PNV_EVENT_REGISTER_CALLBACK; @@ -3903,7 +3929,10 @@ NVAPI_INTERFACE NvAPI_GPU_GetScanoutConfigurationEx(__in NvU32 displayId, __inou // //! DESCRIPTION: This API returns the OS-AdapterID from physicalGpu Handle. OS-AdapterID //! is the Adapter ID that is used by Win7 CCD APIs. +//! This API is deprecated. Please use NvAPI_GPU_GetLogicalGpuInfo to get the OS-AdapterID. +//! NvAPI_GetLogicalGPUFromPhysicalGPU can be used to get the logical GPU handle associated with specified physical GPU handle. //! +//! \deprecated Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetLogicalGpuInfo. //! SUPPORTED OS: Windows 7 and higher //! //! @@ -3917,6 +3946,7 @@ NVAPI_INTERFACE NvAPI_GPU_GetScanoutConfigurationEx(__in NvU32 displayId, __inou //! //! \ingroup gpu /////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetLogicalGpuInfo.") NVAPI_INTERFACE NvAPI_GPU_GetAdapterIdFromPhysicalGpu(NvPhysicalGpuHandle hPhysicalGpu, void *pOSAdapterId); @@ -4006,7 +4036,7 @@ typedef NV_LOGICAL_GPU_DATA_V1 NV_LOGICAL_GPU_DATA; //! \since Release: 421 //! //! \param [in] hLogicalGpu logical GPU Handle. -//! \param [inout] pLogicalGpuData Pointer to NV_LOGICAL_GPU_DATA structure. +//! \param [in,out] pLogicalGpuData Pointer to NV_LOGICAL_GPU_DATA structure. //! //! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with //! specific meaning for this API, they are listed below. @@ -4016,7 +4046,7 @@ typedef NV_LOGICAL_GPU_DATA_V1 NV_LOGICAL_GPU_DATA; NVAPI_INTERFACE NvAPI_GPU_GetLogicalGpuInfo(__in NvLogicalGpuHandle hLogicalGpu, __inout NV_LOGICAL_GPU_DATA *pLogicalGpuData); -//! \addtogroup gridlicense +//! \ingroup gridlicense //! @{ //! Maximum number of supported Feature License @@ -4039,6 +4069,7 @@ typedef enum _NV_LICENSE_FEATURE_TYPE NV_LICENSE_FEATURE_NVIDIA_RTX = 2, NV_LICENSE_FEATURE_QUADRO = NV_LICENSE_FEATURE_NVIDIA_RTX, //!< DEPRECATED name - do not use NV_LICENSE_FEATURE_GAMING = 3, + NV_LICENSE_FEATURE_COMPUTE = 4, } NV_LICENSE_FEATURE_TYPE; //! Used in NV_LICENSE_FEATURE_DETAILS @@ -4174,6 +4205,8 @@ typedef NV_LICENSABLE_FEATURES_V4 NV_LICENSABLE_FEATURES; //! SUPPORTED OS: Windows 7 and higher //! //! +//! TCC_SUPPORTED +//! //! \param [in] hPhysicalGpu GPU selection //! \param [in,out] pLicensableFeatures Licensable features information. //! @@ -4210,7 +4243,7 @@ typedef NV_GPU_VR_READY_V1 NV_GPU_VR_READY; //! //! \since Release: 465 //! -//! \param [inout] pGpuVrReadyData - This structure will be filled with required information. +//! \param [in,out] pGpuVrReadyData - This structure will be filled with required information. //! //! \return This API can return any of the error codes enumerated in //! #NvAPI_Status. If there are return error codes with specific @@ -4219,6 +4252,7 @@ typedef NV_GPU_VR_READY_V1 NV_GPU_VR_READY; //! \ingroup gpu /////////////////////////////////////////////////////////////////////////////// NVAPI_INTERFACE NvAPI_GPU_GetVRReadyData(__in NvPhysicalGpuHandle hPhysicalGpu, __inout NV_GPU_VR_READY *pGpuVrReadyData); + //! Used in NvAPI_GPU_GetPerfDecreaseInfo. //! Bit masks for knowing the exact reason for performance decrease typedef enum _NVAPI_GPU_PERF_DECREASE @@ -7139,10 +7173,85 @@ typedef struct _NV_HDR_CAPABILITIES_V2 } NV_HDR_CAPABILITIES_V2; +typedef struct _NV_HDR_CAPABILITIES_V3 +{ + NvU32 version; //!< Version of this structure + + NvU32 isST2084EotfSupported :1; //!< HDMI2.0a UHDA HDR with ST2084 EOTF (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isTraditionalHdrGammaSupported :1; //!< HDMI2.0a traditional HDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isEdrSupported :1; //!< Extended Dynamic Range on SDR displays. Boolean: 0 = not supported, 1 = supported; + NvU32 driverExpandDefaultHdrParameters :1; //!< If set, driver will expand default (=zero) HDR capabilities parameters contained in display's EDID. + //!< Boolean: 0 = report actual HDR parameters, 1 = expand default HDR parameters; + NvU32 isTraditionalSdrGammaSupported :1; //!< HDMI2.0a traditional SDR gamma (CEA861.3). Boolean: 0 = not supported, 1 = supported; + NvU32 isDolbyVisionSupported :1; //!< Dolby Vision Support. Boolean: 0 = not supported, 1 = supported; + NvU32 isHdr10PlusSupported :1; //!< HDR10+ (Sink Side Tonemapping) is supported + NvU32 isHdr10PlusGamingSupported :1; //!< HDR10+ Gaming, a.k.a HDR10+ Source Side Tonemapping (SSTM), is supported + NvU32 reserved :24; + + NV_STATIC_METADATA_DESCRIPTOR_ID static_metadata_descriptor_id; //!< Static Metadata Descriptor Id (0 for static metadata type 1) + + struct //!< Static Metadata Descriptor Type 1, CEA-861.3, SMPTE ST2086 + { + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of the display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 desired_content_max_luminance; //!< Maximum display luminance = desired max luminance of HDR content ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + NvU16 desired_content_min_luminance; //!< Minimum display luminance = desired min luminance of HDR content ([0x0000-0xFFFF] = [0.0 - 6.55350] cd/m^2, in units of 0.0001 cd/m^2) + NvU16 desired_content_max_frame_average_luminance; //!< Desired maximum Frame-Average Light Level (MaxFALL) of HDR content ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + }display_data; + + struct + { + NvU32 VSVDB_version : 3; //!< Version of Vendor Data block,Version 0: 25 bytes Version 1: 14 bytes + NvU32 dm_version : 8; //!< Upper Nibble represents major version of Display Management(DM) while lower represents minor version of DM + NvU32 supports_2160p60hz : 1; //!< If set sink is capable of 4kx2k @ 60hz + NvU32 supports_YUV422_12bit : 1; //!< If set, sink is capable of YUV422-12 bit + NvU32 supports_global_dimming : 1; //!< Indicates if sink supports global dimming + NvU32 colorimetry : 1; //!< If set indicates sink supports DCI P3 colorimetry, REc709 otherwise + NvU32 supports_backlight_control : 2; //!< This is set when sink is using lowlatency interface and can control its backlight. + NvU32 backlt_min_luma : 2; //!< It is the level for Backlt min luminance value (reserved = 0x3 in latest DV spec). + NvU32 interface_supported_by_sink : 2; //!< Indicates the interface (standard or low latency) supported by the sink. + NvU32 supports_10b_12b_444 : 2; //!< It is set when interface supported is low latency, it tells whether it supports 10 bit or 12 bit RGB 4:4:4 or YCbCr 4:4:4 or both. + NvU32 parity : 1; //!< resolution and frame-rate relationships between Dolby Vision and other video processing + NvU32 reserved : 8; //!< Should be set to zero + + //!< All values below are encoded use DolbyVisionHDMITransmissionSpecification document to decode + NvU16 target_min_luminance; //!< Represents min luminance level of Sink + NvU16 target_max_luminance; //!< Represents max luminance level of sink + NvU16 cc_red_x; //!< Red primary chromaticity coordinate x + NvU16 cc_red_y; //!< Red primary chromaticity coordinate y + NvU16 cc_green_x; //!< Green primary chromaticity coordinate x + NvU16 cc_green_y; //!< Green primary chromaticity coordinate Y + NvU16 cc_blue_x; //!< Blue primary chromaticity coordinate x + NvU16 cc_blue_y; //!< Blue primary chromaticity coordinate y + NvU16 cc_white_x; //!< White primary chromaticity coordinate x + NvU16 cc_white_y; //!< White primary chromaticity coordinate y + }dv_static_metadata; + + struct + { + NvU16 application_version : 2; //!< Application version of HDR10+ Vendor Specific Video Data Block + NvU16 full_frame_peak_luminance_index : 2; //!< Full frame peak luminance index + NvU16 peak_luminance_index : 4; //!< Peak luminance index + NvU16 reserved : 8; + }hdr10plus_vsvdb; + +} NV_HDR_CAPABILITIES_V3; + #define NV_HDR_CAPABILITIES_VER1 MAKE_NVAPI_VERSION(NV_HDR_CAPABILITIES_V1, 1) #define NV_HDR_CAPABILITIES_VER2 MAKE_NVAPI_VERSION(NV_HDR_CAPABILITIES_V2, 2) -#define NV_HDR_CAPABILITIES_VER NV_HDR_CAPABILITIES_VER2 -typedef NV_HDR_CAPABILITIES_V2 NV_HDR_CAPABILITIES; +#define NV_HDR_CAPABILITIES_VER3 MAKE_NVAPI_VERSION(NV_HDR_CAPABILITIES_V3, 3) +#define NV_HDR_CAPABILITIES_VER NV_HDR_CAPABILITIES_VER3 +typedef NV_HDR_CAPABILITIES_V3 NV_HDR_CAPABILITIES; //! \ingroup dispcontrol //! @{ @@ -7284,8 +7393,220 @@ typedef NV_HDR_COLOR_DATA_V2 NV_HDR_COLOR_DATA; /////////////////////////////////////////////////////////////////////////////// NVAPI_INTERFACE NvAPI_Disp_HdrColorControl(__in NvU32 displayId, __inout NV_HDR_COLOR_DATA *pHdrColorData); -//! @} +typedef enum _NV_COLORSPACE_TYPE +{ + NV_COLORSPACE_sRGB = 0, //!< sRGB IEC 61966-2-1:1999 == DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 + NV_COLORSPACE_xRGB = 1, //!< FP16 linear with sRGB color primaries == DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 + NV_COLORSPACE_REC2100 = 12, //!< ITU-R Rec BT.2100 (HDR10) == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 +} NV_COLORSPACE_TYPE; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetSourceColorSpace +// +//! \fn NvAPI_Disp_SetSourceColorSpace(__in NvU32 displayId, __in NV_SOURCE_COLORSPACE colorSpace) +//! DESCRIPTION: This API sets colorspace of the source identified by the process id of the caller +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [in] colorSpaceType Source colorspace type +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetSourceColorSpace(__in NvU32 displayId, __in NV_COLORSPACE_TYPE colorSpaceType); + +#define NV_SOURCE_PID_CURRENT 0 + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetSourceColorSpace +// +//! \fn NvAPI_Disp_GetSourceColorSpace(__in NvU32 displayId, __inout NV_SOURCE_COLORSPACE* pColorSpace) +//! DESCRIPTION: This API gets colorspace of the source identified by the process id. +//! Set sourcePID = NV_SOURCE_PID_CURRENT to use the process id of the caller. +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [out] pColorSpaceType Source colorspace type +//! \param [in] sourcePID Source process id (PID) +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetSourceColorSpace(__in NvU32 displayId, __inout NV_COLORSPACE_TYPE* pColorSpaceType, NvU64 sourcePID); + +typedef struct _NV_HDR_METADATA_V1 +{ + NvU32 version; //!< Version of this structure + + NvU16 displayPrimary_x0; //!< x coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y0; //!< y coordinate of color primary 0 (e.g. Red) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x1; //!< x coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y1; //!< y coordinate of color primary 1 (e.g. Green) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayPrimary_x2; //!< x coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayPrimary_y2; //!< y coordinate of color primary 2 (e.g. Blue) of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 displayWhitePoint_x; //!< x coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + NvU16 displayWhitePoint_y; //!< y coordinate of white point of mastering display ([0x0000-0xC350] = [0.0 - 1.0]) + + NvU16 max_display_mastering_luminance; //!< Maximum display mastering luminance ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + NvU16 min_display_mastering_luminance; //!< Minimum display mastering luminance ([0x0000-0xFFFF] = [0.0 - 6.55350] cd/m^2, in units of 0.0001 cd/m^2) + + NvU16 max_content_light_level; //!< Maximum Content Light level (MaxCLL) ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) + NvU16 max_frame_average_light_level; //!< Maximum Frame-Average Light Level (MaxFALL) ([0x0000-0xFFFF] = [0.0 - 65535.0] cd/m^2, in units of 1 cd/m^2) +} NV_HDR_METADATA_V1; + +#define NV_HDR_METADATA_VER1 MAKE_NVAPI_VERSION(NV_HDR_METADATA_V1, 1) +#define NV_HDR_METADATA_VER NV_HDR_METADATA_VER1 +typedef NV_HDR_METADATA_V1 NV_HDR_METADATA; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetSourceHdrMetadata +// +//! \fn NvAPI_Disp_SetSourceHdrMetadata(__in NvU32 displayId, __in NV_HDR_METADATA* pMetadata) +//! DESCRIPTION: This API sets HDR metadata of the source identified by the process id of the caller +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [in] pMetadata HDR metadata +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetSourceHdrMetadata(__in NvU32 displayId, __in NV_HDR_METADATA* pMetadata); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetSourceHdrMetadata +// +//! \fn NvAPI_NvAPI_Disp_GetSourceHdrMetadata(__in NvU32 displayId, __inout NV_HDR_METADATA* pMetadata) +//! DESCRIPTION: This API gets HDR metadata of the source identified by the process id of the caller +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Monitor Identifier +//! \param [out] pMetadata HDR metadata +//! \param [in] sourcePID Source process id (PID) +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetSourceHdrMetadata(__in NvU32 displayId, __inout NV_HDR_METADATA* pMetadata, NvU64 sourcePID); + +typedef enum _NV_DISPLAY_OUTPUT_MODE +{ + NV_DISPLAY_OUTPUT_MODE_SDR = 0, + NV_DISPLAY_OUTPUT_MODE_HDR10 = 1, + NV_DISPLAY_OUTPUT_MODE_HDR10PLUS_GAMING = 2 +} NV_DISPLAY_OUTPUT_MODE; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetOutputMode +// +//! \fn NvAPI_Disp_SetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode) +//! DESCRIPTION: This API sets display output mode and returns the display output mode used by the OS before the API call. +//! Only one application at a time can override OS display output mode. +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Display identifier +//! \param [in, out] pDisplayMode New/original display output mode +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_RESOURCE_IN_USE the output mode can't be changed as it is already overriden by another application. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode); + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetOutputMode +// +//! \fn NvAPI_Disp_GetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode) +//! DESCRIPTION: This API gets display output mode. +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Display identifier +//! \param [out] pDisplayMode Current display output mode +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetOutputMode(__in NvU32 displayId, __inout NV_DISPLAY_OUTPUT_MODE* pDisplayMode); + +typedef enum _NV_HDR_TONEMAPPING_METHOD +{ + NV_HDR_TONEMAPPING_APP = 0, + NV_HDR_TONEMAPPING_GPU = 1 +} NV_HDR_TONEMAPPING_METHOD; + +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_SetHdrToneMapping +// +//! \fn NvAPI_Disp_SetHdrToneMapping(__in NvU32 displayId, __in NV_HDR_TONEMAPPING_METHOD hdrTonemapping) +//! DESCRIPTION: This API sets HDR tonemapping method for the display +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId Display identifier +//! \param [in] hdrTonemapping HDR tonemapping method +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_SetHdrToneMapping(__in NvU32 displayId, __in NV_HDR_TONEMAPPING_METHOD hdrTonemapping); +/////////////////////////////////////////////////////////////////////////////// +// FUNCTION NAME: NvAPI_Disp_GetHdrToneMapping +// +//! \fn NvAPI_Disp_GetHdrToneMapping(__in NvU32 displayId, __inout NV_HDR_TONEMAPPING_METHOD* pHdrTonemapping) +//! DESCRIPTION: This API gets HDR tonemapping method for the display. +//! +//! SUPPORTED OS: Windows 7 and higher +//! +//! +//! \since Release: 525 +//! +//! \param [in] displayId display identifier +//! \param [out] pHdrTonemapping HDR tonemapping method +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. If there are return error codes with +//! specific meaning for this API, they are listed below. +// +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_Disp_GetHdrToneMapping(__in NvU32 displayId, __inout NV_HDR_TONEMAPPING_METHOD* pHdrTonemapping); +//! @} //! \ingroup dispcontrol //! Used in NvAPI_DISP_GetTiming(). @@ -7851,7 +8172,7 @@ NVAPI_INTERFACE NvAPI_DISP_GetDisplayConfig(__inout NvU32 *pathInfoCount, __out_ //! //! \param [in] pathInfoCount Number of supplied elements in pathInfo //! \param [in] pathInfo Array of path information -//! \param [in] flags Flags for applying settings +//! \param [in] flags A bitwise OR of supported flags from NV_DISPLAYCONFIG_FLAGS. //! //! \retval ::NVAPI_OK - completed request //! \retval ::NVAPI_API_NOT_INTIALIZED - NVAPI not initialized @@ -12944,10 +13265,21 @@ typedef struct NvU64 gpuVAStart; //!< [OUT] gpu virtual address where resource starts NvU64 gpuVASize; //!< [OUT] virtual memory size } NVAPI_UAV_INFO_V1; + +typedef struct +{ + NvU32 version; //!< Structure version + NvU32 surfaceHandle; //!< [OUT] driver handle for a UAV (that can be used as a cudaSurfaceObject_t) + NvU64 gpuVAStart; //!< [OUT] gpu virtual address where resource starts + NvU64 gpuVASize; //!< [OUT] virtual memory size + NvU64 outFlags; +} NVAPI_UAV_INFO_V2; + #define NVAPI_UAV_INFO_VER1 1 +#define NVAPI_UAV_INFO_VER2 MAKE_NVAPI_VERSION(NVAPI_UAV_INFO_V2, 2) -#define NVAPI_UAV_INFO_VER NVAPI_UAV_INFO_VER1 -typedef NVAPI_UAV_INFO_V1 NVAPI_UAV_INFO; +#define NVAPI_UAV_INFO_VER NVAPI_UAV_INFO_VER2 +typedef NVAPI_UAV_INFO_V2 NVAPI_UAV_INFO; NVAPI_INTERFACE NvAPI_D3D12_CaptureUAVInfo(__in ID3D12Device* pDevice, __out NVAPI_UAV_INFO *pUAVInfo); @@ -13595,7 +13927,7 @@ NVAPI_INTERFACE NvAPI_D3D11_EnumerateMetaCommands(__in //! #NvAPI_Status. If there are return error codes with specific //! meaning for this API, they are listed below. //! -//! DXGI_ERROR_NOT_SUPPORTED - The requested Metacommand is not supported. +//! NVAPI_NOT_SUPPORTED - The requested Metacommand is not supported. //! \endcode //! \ingroup dx /////////////////////////////////////////////////////////////////////////////// @@ -13819,7 +14151,7 @@ NVAPI_INTERFACE NvAPI_D3D12_EnumerateMetaCommands(__in //! #NvAPI_Status. If there are return error codes with specific //! meaning for this API, they are listed below. //! -//! DXGI_ERROR_NOT_SUPPORTED - The requested Metacommand is not supported. +//! NVAPI_NOT_SUPPORTED - The requested Metacommand is not supported. //! \endcode //! \ingroup dx /////////////////////////////////////////////////////////////////////////////// @@ -16701,14 +17033,19 @@ NVAPI_INTERFACE NvAPI_D3D_GetLatency(__in IUnknown *pDev, __out NV_LATENCY_RESUL //! \ingroup dx typedef enum { - SIMULATION_START = 0, - SIMULATION_END = 1, - RENDERSUBMIT_START = 2, - RENDERSUBMIT_END = 3, - PRESENT_START = 4, - PRESENT_END = 5, - INPUT_SAMPLE = 6, - TRIGGER_FLASH = 7, + SIMULATION_START = 0, + SIMULATION_END = 1, + RENDERSUBMIT_START = 2, + RENDERSUBMIT_END = 3, + PRESENT_START = 4, + PRESENT_END = 5, + INPUT_SAMPLE = 6, + TRIGGER_FLASH = 7, + PC_LATENCY_PING = 8, + OUT_OF_BAND_RENDERSUBMIT_START = 9, + OUT_OF_BAND_RENDERSUBMIT_END = 10, + OUT_OF_BAND_PRESENT_START = 11, + OUT_OF_BAND_PRESENT_END = 12, } NV_LATENCY_MARKER_TYPE; //! SUPPORTED OS: Windows 7 and higher @@ -16760,6 +17097,65 @@ typedef NV_LATENCY_MARKER_PARAMS_V1 NV_LATENCY_MARKER_PARAMS; NVAPI_INTERFACE NvAPI_D3D_SetLatencyMarker(__in IUnknown *pDev, __in NV_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams); #endif //defined(__cplusplus) && (defined(_D3D9_H_) || defined(__d3d10_h__) || defined(__d3d10_1_h__) || defined(__d3d11_h__) || defined(__d3d12_h__)) +//! Used in NvAPI_D3D12_SetAsyncFrameMarker +//! \ingroup dx +typedef NV_LATENCY_MARKER_PARAMS_V1 NV_ASYNC_FRAME_MARKER_PARAMS_V1; +typedef NV_ASYNC_FRAME_MARKER_PARAMS_V1 NV_ASYNC_FRAME_MARKER_PARAMS; +#define NV_ASYNC_FRAME_MARKER_PARAMS_VER1 NV_LATENCY_MARKER_PARAMS_VER1 +#define NV_ASYNC_FRAME_MARKER_PARAMS_VER NV_LATENCY_MARKER_PARAMS_VER1 + +#if defined(__cplusplus) && (defined(__d3d12_h__)) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetAsyncFrameMarker +// +//! DESCRIPTION: Set an async frame marker for present and out-of-band render tracking. +//! +//! \since Release: 520 +//! \param [in] pCommandQueue The D3D12CommandQueue +//! \param [in] pSetAsyncFrameMarkerParams The async frame marker structure +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetAsyncFrameMarker(__in ID3D12CommandQueue *pCommandQueue, __in NV_ASYNC_FRAME_MARKER_PARAMS* pSetAsyncFrameMarkerParams); +#endif //defined(__cplusplus) && (defined(__d3d12_h__)) + +//! SUPPORTED OS: Windows 10 and higher +//! +//! Used in NvAPI_D3D12_NotifyOutOfBandCommandQueue +//! \ingroup dx +typedef enum +{ + OUT_OF_BAND_RENDER = 0, + OUT_OF_BAND_PRESENT = 1, +} NV_OUT_OF_BAND_CQ_TYPE; + +#if defined(__cplusplus) && defined(__d3d12_h__) +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_NotifyOutOfBandCommandQueue +// +//! DESCRIPTION: Notifies the driver that this command queue runs out of band +//! from the application's frame cadence. +//! +//! \since Release: 520 +//! \param [in] pCommandQueue The D3D12CommandQueue +//! \param [in] cqType The type of out of band command queue +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_NotifyOutOfBandCommandQueue(__in ID3D12CommandQueue *pCommandQueue, __in NV_OUT_OF_BAND_CQ_TYPE cqType); +#endif //defined(__cplusplus) && defined(__d3d12_h__)) #if defined (__cplusplus) && defined(__d3d12_h__) @@ -16838,7 +17234,67 @@ NVAPI_INTERFACE NvAPI_D3D12_GetCudaSurfaceObject(__in ID3D12Device* NVAPI_INTERFACE NvAPI_D3D12_IsFatbinPTXSupported(__in ID3D12Device *pDevice, __out bool *pSupported); -#endif //if defined (__cplusplus) && defined(__d3d12_h__) +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCuModule(__in ID3D12Device* pDevice, + __in const void* pBlob, + __in NvU32 size, + __out NVDX_ObjectHandle* phModule); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_EnumFunctionsInModule(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hModule, + __inout NvU32* pArraySize, + __out const char** const pFunctionNames); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_CreateCuFunction(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hModule, + __in const char* pName, + __out NVDX_ObjectHandle* phFunction); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! + +typedef struct _NVAPI_DIM3 +{ + NvU32 x; + NvU32 y; + NvU32 z; +} NVAPI_DIM3; + +typedef struct _NVAPI_CU_KERNEL_LAUNCH_PARAMS +{ + NVDX_ObjectHandle hFunction; + NVAPI_DIM3 gridDim; + NVAPI_DIM3 blockDim; + NvU32 dynSharedMemBytes; + void const * pParams; + NvU32 paramSize; +} NVAPI_CU_KERNEL_LAUNCH_PARAMS; + +NVAPI_INTERFACE NvAPI_D3D12_LaunchCuKernelChain(__in ID3D12GraphicsCommandList* pCommandList, + __in const NVAPI_CU_KERNEL_LAUNCH_PARAMS* pKernels, + __in NvU32 numKernels); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_DestroyCuModule(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hModule); + +// Experimental API for internal use. DO NOT USE! +//! SUPPORTED OS: Windows 10 and higher +//! +NVAPI_INTERFACE NvAPI_D3D12_DestroyCuFunction(__in ID3D12Device* pDevice, + __in NVDX_ObjectHandle hFunction); +#endif //if defined (__cplusplus) && defined(__d3d12_h__) #if defined (__cplusplus) && defined(__d3d11_h__) @@ -16950,6 +17406,791 @@ NVAPI_INTERFACE NvAPI_D3D11_GetResourceGPUVirtualAddress(__in ID3D11Device* #endif //defined(__cplusplus) && defined(__d3d11_h__) +#if defined(__cplusplus) && defined(__d3d12_h__) +//! Flags specifying raytracing thread reordering hardware support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS +{ + NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_NONE = 0x0, //!< Thread reordering acts as a no-op + NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_STANDARD = NV_BIT(0) //!< Standard thread reordering is supported +} NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS; + +//! Flags specifying raytracing Opacity Micromap support. +//! Additional flags will be added as support becomes available. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE = 0x0, //!< Opacity Micromap support is not available. + //!< The application must not attempt to use any OMM entrypoints or flags. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD = NV_BIT(0) //!< Standard Opacity Micromap support is available +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS; + +//! List of Raytracing CAPS types that can be queried. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_CAPS_TYPE +{ + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_THREAD_REORDERING = 0, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP = 1, + NVAPI_D3D12_RAYTRACING_CAPS_TYPE_INVALID = -1 +} NVAPI_D3D12_RAYTRACING_CAPS_TYPE; + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingCaps +// +//! DESCRIPTION: Query raytracing capabilities of a device. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Pointer to the device on which caps should be queried from. Pointer to the device from which ray tracing caps should be queried. Device to query ray tracing caps from. +//! \param [in] type Raytracing caps type requested. +//! \param [out] pData Memory to write raytracing caps to. +//! \param [in] dataSize Size in bytes of the memory pointed to by pData, must match the size of the raytracing caps type requested. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval ::NVAPI_OK Completed request +//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as an argument +//! \retval ::NVAPI_INVALID_ARGUMENT At least one of the arguments are invalid +//! \retval ::NVAPI_ERROR Error occurred +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingCaps( + __in ID3D12Device* pDevice, + __in NVAPI_D3D12_RAYTRACING_CAPS_TYPE type, + __out void* pData, + __in size_t dataSize); +#endif // defined(__cplusplus) && defined(__d3d12_h__) + +//! SUPPORTED OS: Windows 10 and higher +//! +#if defined(__cplusplus) && defined(__d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)) + +// Types used by both device and command list functions. + +//! Flags specifying building instructions and hints when constructing an OMM Array. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_NONE = 0x0, //!< No options specified for the OMM Array build. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE = NV_BIT(0), //!< Allow the OMM Array build to take a little longer in order to optimize for traversal performance. + //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD = NV_BIT(1) //!< Spend as little time as possible on the OMM Array build with some potential loss to traversal performance. + //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS; + +//! Specifies the input Opacity Micromap formats. +//! The OC1 (Opacity Compression 1) format follows the space-filling curve in barycentric space over the uniformly tessellated micro-triangles. +//! +//! \note This is a 16-bit value when used in #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_2_STATE = 0x1, //!< 2-state (Transparent/Opaque) format. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_4_STATE = 0x2 //!< 4-state (Transparent/Opaque, Known/Unknown) format. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT; + +//! Number of OMMs of a specific configuration in an OMM Array. +//! Used to compute conservative buffer size estimates for OMM Array builds. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT +{ + NvU32 count; //!< Total number of OMMs in the OMM Array with the particular \p subdivisionLevel and \p format specified in this descriptor. + NvU32 subdivisionLevel; //!< Number of subdivisions for the OMM; valid inputs are [0, 12] (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL). + //!< The total number of micro-triangles is 4subdivisionLevel. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT format; //!< Opacity Micromap format. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT; + +//! Describes one Opacity Micromap. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC +{ + NvU32 byteOffset; //!< Byte offset from the \c inputBuffer, specified in the input structure #NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS, to where the input OMM data is located. + NvU16 subdivisionLevel; //!< Number of subdivisions for the OMM; valid inputs are [0, 12] (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL). + //!< The total number of micro-triangles is 4subdivisionLevel. + NvU16 format; //!< Format of the OMM of type #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC; + +//! Input structure to OMM Array construction. +//! Individual OMMs are accessed via indices when used in bottom-level acceleration structure (BLAS) construction. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS flags; //!< Flags which apply to all OMMs in the array. + NvU32 numOMMUsageCounts; //!< Number of OMM usage count entries in the \p pOMMUsageCounts array. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT* pOMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the OMM entries in the build. + D3D12_GPU_VIRTUAL_ADDRESS inputBuffer; //!< Address for raw OMM input data; it must be 256-byte aligned. + //!< It is recommended to try to organize OMMs together in memory that are expected to be used close together spatially. + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE perOMMDescs; //!< GPU array with one #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC entry per OMM. +} NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS; + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Conservative memory requirements for building an OMM Array. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO +{ + NvU64 resultDataMaxSizeInBytes; //!< Size required to hold the result of an OMM Array build based on the specified inputs. + NvU64 scratchDataSizeInBytes; //!< Scratch storage on GPU required during OMM Array build based on the specified inputs. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO; + +//! Parameters given to NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(). +//! +//! \ingroup dx +typedef struct _NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS* pDesc; //!< [in] Description of the OMM Array build. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1; +#define NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS; +#define NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo +// +//! DESCRIPTION: Query conservative memory requirements for building an OMM (Opacity Micromap) Array. +//! The returned size is conservative for OMM Array builds containing +//! a lower or equal number of entries for each resolution and format combination. +//! +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the OMM Array will be built. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo( + __in ID3D12Device5* pDevice, + __inout NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Pipeline creation state flags. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS +{ + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_NONE = 0, //!< [in] No pipeline flags. + NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT = NV_BIT(0), //!< [in] Change whether raytracing pipelines are created with support for Opacity Micromaps. + //!< If a triangle with an OMM is encountered during traversal and the pipeline was not created with support for them, behavior is undefined. + //!< Support should only be enabled if there are OMMs present, since it may incur a small penalty on traversal performance overall. +} NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS; + +//! State used when creating new pipelines. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER. + NvU32 flags; //!< [in] A bitwise OR of one or more #NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS flags for raytracing pipeline creation. +} NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1; +#define NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1, 1) +typedef NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1 NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS; +#define NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_SetCreatePipelineStateOptions +// +//! DESCRIPTION: Globally change the state affecting pipeline creations. +//! This affects all pipelines created after this call, and until this function is called again. +//! +//! \note Only supported on GPUs capable of DXR. +//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the pipelines will be created. +//! \param [in] pState State to be applied to all future pipeline creations. + +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_SetCreatePipelineStateOptions( + __in ID3D12Device5* pDevice, + __in const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS* pState); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! Type of serialized data. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX +{ + // D3D12_SERIALIZED_DATA_TYPE flags + NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_ACCELERATION_STRUCTURE_EX = 0x0, //!< Serialized data contains a raytracing acceleration structure. + //!< Starting from offset 0, the first bytes of the serialized acceleration structure can be reinterpreted as \c D3D12_SERIALIZED_RAYTRACING_ACCELERATION_STRUCTURE_HEADER. + //!< That structure contains the identifier to be passed along to NvAPI_D3D12_CheckDriverMatchingIdentifierEx(). + + // NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX specific flags + NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX = 0x1, //!< Data blob contains an OMM Array. + //!< Starting from offset 0, the first bytes of the OMM Array can be reinterpreted as \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER. + +} NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX; + +//! Parameters given to NvAPI_D3D12_CheckDriverMatchingIdentifierEx(). +//! +//! \ingroup dx +typedef struct _NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER. + NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX serializedDataType; //!< [in] Type of data to be deserialized; see #NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX. + const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER* pIdentifierToCheck; //!< [in] Identifier from the header of the serialized data to check with the driver; see \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER. + //!< Information about how to retrieve that identifier can be found in the description of each #NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX enum. + D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS checkStatus; //!< [out] Result of the check; see \c D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS. +} NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1; +#define NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1, 1) +typedef NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1 NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS; +#define NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_CheckDriverMatchingIdentifierEx +// +//! DESCRIPTION: This function is an extension of ID3D12Device5::CheckDriverMatchingIdentifier() with additional serialized data types. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the data will be deserialized. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_CheckDriverMatchingIdentifierEx( + __in ID3D12Device5* pDevice, + __inout NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +//! This enum extends \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS with modified and additional values. +//! Only modified/new values are fully described; for more information on the other values, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX +{ + // D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS flags + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE_EX = 0x0, //!< No options specified for the acceleration structure build. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE_EX = NV_BIT(0), //!< Allow the acceleration structure to later be updated (via the flag #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE_EX), rather than always requiring a full rebuild. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION_EX = NV_BIT(1), //!< Allow for the acceleration structure to later be compacted. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE_EX = NV_BIT(2), //!< Favorize higher raytracing performance at the cost of longer build times. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD_EX = NV_BIT(3), //!< Favorize faster build times at the cost of lower raytracing performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY_EX = NV_BIT(4), //!< Minimize the memory footprint of the produced acceleration structure, potentially at the cost of longer build time or lower raytracing performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE_EX = NV_BIT(5), //!< Instead of rebuilding the acceleration structure from scratch, the existing acceleration structure will be updated. + //!< Added behaviour: If #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX is specified, OMM references may be changed along with positions when an update is performed. + + // NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX specific flags + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX = NV_BIT(6), //!< The acceleration structure (AS) supports updating OMM contents (base OMM Array and/or indices). + //!< Specifying this flag may result in larger AS size and may reduce traversal performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX = NV_BIT(7), //!< Only applicable for BLAS builds. If enabled, any instances referencing this BLAS are allowed to disable the OMM test through the #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX flag. + //!< Specifying this build flag may result in some reductions in traversal performance. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX = NV_BIT(8), //!< The acceleration structure (AS) supports updating OMM data (encoded opacity values). + //!< Specifying this flag may reduce traversal performance. + +} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX; + +//! This enum extends \c D3D12_RAYTRACING_GEOMETRY_TYPE with additional values. +//! Only new values are fully described below; for more information on the other values, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX +{ + // D3D12_RAYTRACING_GEOMETRY_TYPE flags + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX = 0x0, //!< This geometry is made of basic triangles. + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX = 0x1, //!< This geometry is made of axis-aligned bounding boxes (AABBs). + + // NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX specific flags + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX = 0x2, //!< Shares most fields with the basic triangle geometry type, but allows an OMM Array to be attached to the geometry. + //!< The basic triangle type and this OMM-enabled type geometries may be mixed in the same BLAS build. + + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX; + +//! If a triangle has a uniform OMM state in a BLAS build, it is preferable to signal this explicitly rather than attaching a single state OMM. +//! This can be accomplished by supplying these special indices as entries in \c opacityMicromapIndexBuffer, in #NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_TRANSPARENT = -1, //!< Uniform transparent OMM state. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_OPAQUE = -2, //!< Uniform opaque OMM state. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_TRANSPARENT = -3, //!< Uniform unknown-transparent OMM state. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_OPAQUE = -4 //!< Uniform unknown-opaque OMM state. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX; + +//! Geometry descriptor attachment with Opacity Micromaps. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE opacityMicromapIndexBuffer; //!< Optional buffer specifying which OMM index to use for each triangle; if \c NULL, there is a 1:1 mapping between input triangles and OMM Array entries. + //!< Special values can be used to encode OMMs with uniform state for individual triangles (see #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX). + //!< For BLAS updates, this input buffer must match that of the original build if the #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX build flag is not set. + DXGI_FORMAT opacityMicromapIndexFormat; //!< Format of \c opacityMicromapIndexBuffer, either \c DXGI_FORMAT_R32_UINT or \c DXGI_FORMAT_R16_UINT. + NvU32 opacityMicromapBaseLocation; //!< Constant added to all non-negative OMM indices in \p opacityMicromapIndexBuffer. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< Pointer to an OMM Array used by this geometry; it may be set to \c NULL if no non-uniform OMMs are used. + //!< Unlike vertex, index, and transform buffers, this resource is dereferenced during raytracing. + + NvU32 numOMMUsageCounts; //!< Number of OMM usage count entries in the \p pOMMUsageCounts array. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT* pOMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the OMM entries referred-to by the OMM index buffer specified by this geometry. + +} NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC; + +//! Geometry triangle descriptor with attached augmented Opacity Micromaps. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC +{ + D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Triangle mesh descriptor. + NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC ommAttachment; //!< Opacity Micromap attachment descriptor. +} NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC; + +//! This structure extends \c D3D12_RAYTRACING_GEOMETRY_DESC by supporting additional geometry types. +//! Only new members are fully described below; for more information on the other members, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX +{ + NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX type; //!< The type of geometry stored in the union of this structure. + D3D12_RAYTRACING_GEOMETRY_FLAGS flags; //!< Flags affecting how this geometry is processed by the raytracing pipeline. + union + { + D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Describes triangle geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + D3D12_RAYTRACING_GEOMETRY_AABBS_DESC aabbs; //!< Describes AABB geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC ommTriangles; //!< Describes triangle geometry which may optionally use Opacity Micromaps, if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX. + //!< Otherwise, this parameter is unused (space repurposed in a union). + }; +} NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX; + +//! This enum extends \c D3D12_RAYTRACING_INSTANCE_FLAGS with additional values. +//! Only new values are fully described below; for more information on the other values, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX +{ + // D3D12_RAYTRACING_INSTANCE_FLAGS flags + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_NONE_EX = 0x0, //!< No options specified for this instance. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE_EX = NV_BIT(0), //!< Disable triangle culling for this instance. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE_EX = NV_BIT(1), //!< Use counter-clockwise winding for defining front faces, instead of the default of clockwise winding. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OPAQUE_EX = NV_BIT(2), //!< Force all geometries in this instance to be opaque. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_NON_OPAQUE_EX = NV_BIT(3), //!< All geometries in this instance will be processed as if they never had the \c D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE flag applied to them. + + // NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX specific flags + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OMM_2_STATE_EX = NV_BIT(4), //!< Ignore the Unknown state and only consider the Transparent/Opaque bit for all 4-state OMMs encountered during traversal. + //!< This flag has no effect if #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX is set. + NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX = NV_BIT(5) //!< Disable OMMs for all triangles, and revert to using geometry opaque/non-opaque state instead (legacy behavior). + //!< This flag is only valid if the referenced BLAS was built with the #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX flag; omitting that flag during BLAS build will result in undefined behavior. +} NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX; + +//! This structure extends \c D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS by supporting additional geometry types. +//! Only modified members are fully described below; for more information on the other members, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX +{ + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE type; //!< Whether a top-level acceleration structure (TLAS) or bottom-level acceleration structure (BLAS) will be built using this information. + NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX flags; //!< Options influencing how the acceleration structure is built and which of its features can be used. + NvU32 numDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TOP_LEVEL, it represents the number of descriptions stored in \c instanceDescs. + //!< Otherwise, it contains the number of geometry descriptions stored in \c pGeometryDescs or \c ppGeometryDescs. + D3D12_ELEMENTS_LAYOUT descsLayout; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL, it specifies which of \c pGeometryDescs and \c ppGeometryDescs to use. + //!< Otherwise, this parameter is unused. + NvU32 geometryDescStrideInBytes; //!< Stride between consecutive geometry descriptors. Should typically be set to sizeof(NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX). + //!< Only used if \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY. + //!< This field guarantees backwards compatibility, even if the geometry descriptor size increases in future NVAPI versions. + union + { + D3D12_GPU_VIRTUAL_ADDRESS instanceDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TOP_LEVEL, the referenced instance structures can used the extended set of flags #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX in place of the \c D3D12_RAYTRACING_INSTANCE_FLAGS mentioned in \c D3D12_RAYTRACING_INSTANCE_DESC. + //!< Otherwise, this parameter is unused (space repurposed in a union). + const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* pGeometryDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY, it contains the descriptions of all geometries to be built into a BLAS. + //!< Otherwise, this parameter is unused (space repurposed in a union). + const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX*const* ppGeometryDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS, it contains the addresses of descriptions for all geometries to be built into a BLAS. + //!< Otherwise, this parameter is unused (space repurposed in a union). + }; +} NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX; + +//! Parameters given to NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(). +//! +//! \ingroup dx +typedef struct _NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX* pDesc; //!< [in] Description of the acceleration-structure build. + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO* pInfo; //!< [out] Result of the query. +} NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1; +#define NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1, 1) +typedef NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1 NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS; +#define NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx +// +//! DESCRIPTION: This function is an extension of ID3D12Device5::GetRaytracingAccelerationStructurePrebuildInfo() with additional input types. +//! +//! \note Only supported on GPUs capable of DXR. +//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pDevice Device on which the acceleration structure will be built. +//! \param [in,out] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx( + __in ID3D12Device5* pDevice, + __inout NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Description of the inputs and memory areas used during the building of OMM Arrays. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS destOpacityMicromapArrayData; //!< Output location for the OMM Array build. + //!< NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() reports the amount of memory required for the result given a set of input parameters. + //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT). + NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS inputs; //!< Description of the input data for the OMM Array build. + D3D12_GPU_VIRTUAL_ADDRESS scratchOpacityMicromapArrayData; //!< Location where the build will store temporary data. + //!< NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() reports the amount of scratch memory the implementation will need for a given set of input parameters. + //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT). + //!< Contents of this memory going into a build on the GPU timeline are irrelevant and will not be preserved. + //!< After the build is complete on the GPU timeline, the memory is left with whatever undefined contents the build finished with. + //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS. +} NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC; + +//! Structure emitted by NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(), and optionally NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(), when \c type equals #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC +{ + NvU64 currentSizeInBytes; //!< Size of the OMM Array buffer. + //!< The queried size may be smaller than the size reported by NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(). + //!< This allows the application to move and relocate the OMM Array to a smaller buffer to reclaim any unused memory after the OMM Array build is complete. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC; + +//! Type of postbuild info to emit after an OMM Array build. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE = 0x0 //!< Size of the current OMM Array. May be smaller than reported by the NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() call. + //!< Unused memory can be reclaimed by copying the OMM Array into a new resource; see #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE; + +//! Description of the postbuild information to generate from an OMM Array. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC +{ + D3D12_GPU_VIRTUAL_ADDRESS destBuffer; //!< Result storage. + //!< Size required and the layout of the contents written by the system depend on \p infoType. + //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS. + //!< The memory must be aligned to the natural alignment for the members of the particular output structure being generated (e.g. 8 bytes for a struct with the largest member being \c NvU64). + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE infoType; //!< Type of postbuild information to retrieve. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC; + +//! Parameters given to NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC* pDesc; //!< [in] Description of the OMM Array build. + NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built. + //!< [in] Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry. +} NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1; +#define NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1, 1) +typedef NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS; +#define NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingOpacityMicromapArray +// +//! DESCRIPTION: Construct OMM Array for a collection of OMMs on the GPU. +//! The CPU-side input buffers are not referenced after this call. +//! The GPU-side input resources are not referenced after the build has concluded after ExecuteCommandList(). +//! Additionally, the application may optionally output postbuild information immediately after the build. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INVALID_COMBINATION pParams->pPostbuildInfoDescs was set to \c NULL while pParams->numPostbuildInfoDescs is non zero. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingOpacityMicromapArray( + __in ID3D12GraphicsCommandList4* pCommandList, + __in NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Parameters given to NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER. + D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< [in] OMM Array current memory address; it must be 256-byte aligned (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT). +} NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1; +#define NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1, 1) +typedef NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS; +#define NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray +// +//! DESCRIPTION: Makes the OMM Array usable at its current location in memory. +//! An OMM Array that has been copied to a new location must be relocated using this function before it may be attached to any BLAS. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! Parameters given to NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(). +//! +//! \ingroup dx +typedef struct _NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER. + const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pDesc; //!< [in] Description of which postbuild info to emit. + NvU32 numSources; //!< [in] Number of OMM Arrays in \p pSources. + const D3D12_GPU_VIRTUAL_ADDRESS* pSources; //!< [in] List of OMM Arrays for which postbuild info should be emitted. +} NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1; +#define NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1, 1) +typedef NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS; +#define NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo +// +//! DESCRIPTION: Emits information about one or more OMM Arrays, only available after the OMM Array constructions have finished. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +//! This structure extends \c D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC by supporting additional geometry types as inputs. +//! For more information on the different members, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX +{ + D3D12_GPU_VIRTUAL_ADDRESS destAccelerationStructureData; //!< Memory where the resulting acceleration structure will be stored. + NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX inputs; //!< The inputs to the build process. + D3D12_GPU_VIRTUAL_ADDRESS sourceAccelerationStructureData; //!< The acceleration structure to be updated. + //!< Otherwise if the acceleration structure should be rebuilt entirely, this value must be \c NULL. + D3D12_GPU_VIRTUAL_ADDRESS scratchAccelerationStructureData; //!< Memory that will be temporarily used during the building process. +} NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX; + +//! Parameters given to NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(). +//! +//! \ingroup dx +typedef struct _NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1 +{ + NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER. + const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX* pDesc; //!< [in] Description of the acceleration structure to build. + NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed. + const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built. + //!< Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry. +} NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1; +#define NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1, 1) +typedef NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1 NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS; +#define NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1 + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingAccelerationStructureEx +// +//! DESCRIPTION: Perform an acceleration structure build on the GPU. +//! Also optionally output postbuild information immediately after the build. +//! This function is an extension of ID3D12GraphicsCommandList4::BuildRaytracingAccelerationStructure() with additional serialized data types. +//! +//! \note Only supported on GPUs capable of DXR. +//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details. +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! \since Release: 520 +//! +//! \param [in] pCommandList Command list on which the command will execute. +//! \param [in] pParams Wrapper around the inputs and outputs of the function. +//! +//! \return This API can return any of the error codes enumerated in #NvAPI_Status. +//! If there are return error codes with specific meaning for this API, they are listed below. +//! +//! \retval NVAPI_INVALID_COMBINATION pParams->pPostbuildInfoDescs was set to \c NULL while pParams->numPostbuildInfoDescs is non zero. +//! +//! \ingroup dx +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingAccelerationStructureEx( + __in ID3D12GraphicsCommandList4* pCommandList, + __in const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS* pParams); + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +#if defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + +/////////////////////////////////////////////////////////////////////////////// +// +// Miscellaneous +// +/////////////////////////////////////////////////////////////////////////////// + +//! Opacity Micromap micro-triangle states. +//! Not part of any input, but listed here for convenience. +//! +//! \ingroup dx +typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE +{ + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_TRANSPARENT = 0, //!< Transparent OMM state: hit is ignored. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_OPAQUE = 1, //!< Opaque OMM state: hit is committed. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT = 2, //!< Unknown-transparent OMM state. + //!< * If operating in 2-state mode, ignore hit. + //!< * If operating in 4-state mode, invoke any-hit shader. + NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE = 3 //!< Unknown-opaque OMM state. + //!< * If operating in 2-state mode, commit hit. + //!< * If operating in 4-state mode, invoke any-hit shader. +} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE; + +//! Mandatory alignment for the address of an OMM Array. +//! +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT 256 + +//! Highest subdivision-level allowed with OC1. +//! +//! \ingroup dx +#define NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL 12 + +//! A list of flags that can be given to the \c TraceRay() function in HLSL. +//! Only new or modified values are fully described below; for more information on the other values, please check Microsoft's DirectX Raytracing Specification. +//! +//! \ingroup dx +typedef enum _NVAPI_RAY_FLAGS_EX +{ + // RAY_FLAGS flags + NVAPI_RAY_FLAG_NONE_EX = 0x0, //!< No flag specified. + NVAPI_RAY_FLAG_FORCE_OPAQUE_EX = NV_BIT( 0), //!< Consider all intersected geometries to be opaque, regardless of the flags specified at the geometry and instance level. + NVAPI_RAY_FLAG_FORCE_NON_OPAQUE_EX = NV_BIT( 1), //!< Consider all intersected geometries to be non-opaque, regardless of the flags specified at the geometry and instance level. + NVAPI_RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH_EX = NV_BIT( 2), //!< End the traversal as soon as a geometry is hit, and that hit is not ignored by the any hit shader. + NVAPI_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER_EX = NV_BIT( 3), //!< Do not invoke the closest hit shader once the traversal ends. + NVAPI_RAY_FLAG_CULL_BACK_FACING_TRIANGLES_EX = NV_BIT( 4), //!< Never intersect triangle geometries that are back facing with regard to the ray. + NVAPI_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES_EX = NV_BIT( 5), //!< Never intersect triangle geometries that are front facing with regard to the ray. + NVAPI_RAY_FLAG_CULL_OPAQUE_EX = NV_BIT( 6), //!< Never intersect geometries that were flagged as opaque. + NVAPI_RAY_FLAG_CULL_NON_OPAQUE_EX = NV_BIT( 7), //!< Never intersect geometries that were not flagged as opaque. + NVAPI_RAY_FLAG_SKIP_TRIANGLES_EX = NV_BIT( 8), //!< Never intersect triangle geometries. + NVAPI_RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES_EX = NV_BIT( 9), //!< Never intersect AABB geometries. + + // NVAPI_RAY_FLAGS_EX specific flags + NVAPI_RAY_FLAG_FORCE_OMM_2_STATE_EX = NV_BIT(10), //!< Treat unknown-opaque and unknown-transparent as opaque and transparent, respectively, during traversal. + //!< If an instance is flagged with #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX, that takes precedence over this flag. +} NVAPI_RAY_FLAG_EX; + +#endif // defined(__cplusplus) && defined(__d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__) + //! \ingroup vidio @@ -20244,9 +21485,32 @@ typedef struct _NV_DISPLAY_DRIVER_INFO NvU32 reserved : 27; //!< Reserved for future use. } NV_DISPLAY_DRIVER_INFO_V1; +typedef struct _NV_DISPLAY_DRIVER_INFO_V2 +{ + NvU32 version; //!< Structure Version. + NvU32 driverVersion; //!< Contains the driver version after successful return. + NvAPI_ShortString szBuildBranch; //!< Contains the driver-branch string after successful return. + NvU32 bIsDCHDriver : 1; //!< Contains the driver DCH status after successful return. + //!< Value of 1 means that this is DCH driver. + //!< Value of 0 means that this is not a DCH driver (NVAPI may be unable to query the DCH status of the driver due to some registry API errors, in that case the API will return with NVAPI_ERROR) + NvU32 bIsNVIDIAStudioPackage : 1; //!< On successful return, this field provides information about whether the installed driver is from an NVIDIA Studio Driver package. + //!< Value of 1 means that this driver is from the NVIDIA Studio Driver package. + NvU32 bIsNVIDIAGameReadyPackage : 1; //!< On successful return, this field provides information about whether the installed driver is from an NVIDIA Game Ready Driver package. + //!< Value of 1 means that this driver is from the NVIDIA Game Ready Driver package. + NvU32 bIsNVIDIARTXProductionBranchPackage : 1; //!< On successful return, this field confirms whether the installed driver package is from an NVIDIA RTX Enterprise Production Branch which offers ISV certifications, long life-cycle support, regular security updates, and access to the same functionality as corresponding NVIDIA Studio Driver Packages (i.e., of the same driver version number). + //!< Value of 1 means that this driver is from the NVIDIA RTX Enterprise Production Branch package. + NvU32 bIsNVIDIARTXNewFeatureBranchPackage : 1; //!< On successful return, this field confirms whether the installed driver package is from an NVIDIA RTX New Feature Branch. + //!< This driver typically gives access to new features, bug fixes, new operating system support, and other driver enhancements offered between NVIDIA RTX Enterprise Production Branch releases. Support duration for NVIDIA RTX New Feature Branches is shorter than that for NVIDIA RTX Enterprise Production Branches. + //!< Value of 1 means that this driver is from the NVIDIA RTX New Feature Branch package. + NvU32 reserved : 27; //!< Reserved for future use. + NvAPI_ShortString szBuildBaseBranch; //!< (OUT) Contains the driver base branch string after successful return. + NvU32 reservedEx; //!< Reserved for future use +} NV_DISPLAY_DRIVER_INFO_V2; + #define NV_DISPLAY_DRIVER_INFO_VER1 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_INFO_V1, 1) -typedef NV_DISPLAY_DRIVER_INFO_V1 NV_DISPLAY_DRIVER_INFO; -#define NV_DISPLAY_DRIVER_INFO_VER NV_DISPLAY_DRIVER_INFO_VER1 +#define NV_DISPLAY_DRIVER_INFO_VER2 MAKE_NVAPI_VERSION(NV_DISPLAY_DRIVER_INFO_V2, 2) +typedef NV_DISPLAY_DRIVER_INFO_V2 NV_DISPLAY_DRIVER_INFO; +#define NV_DISPLAY_DRIVER_INFO_VER NV_DISPLAY_DRIVER_INFO_VER2 /////////////////////////////////////////////////////////////////////////////// // diff --git a/vendor/nvapi/nvapi_lite_common.h b/vendor/nvapi/nvapi_lite_common.h index 7af056bda0..21263566c1 100644 --- a/vendor/nvapi/nvapi_lite_common.h +++ b/vendor/nvapi/nvapi_lite_common.h @@ -88,6 +88,7 @@ typedef unsigned short NvU16; typedef unsigned char NvU8; typedef signed char NvS8; typedef float NvF32; +typedef double NvF64; /*! * Macro to convert NvU32 to NvF32. @@ -140,6 +141,7 @@ NV_DECLARE_HANDLE(NvAudioHandle); //!< NVIDIA HD Audio Device NV_DECLARE_HANDLE(Nv3DVPContextHandle); //!< A handle for a 3D Vision Pro (3DVP) context NV_DECLARE_HANDLE(Nv3DVPTransceiverHandle); //!< A handle for a 3DVP RF transceiver NV_DECLARE_HANDLE(Nv3DVPGlassesHandle); //!< A handle for a pair of 3DVP RF shutter glasses +NV_DECLARE_HANDLE(NvPcfClientHandle); //!< A handle for NVPCF clients typedef void* StereoHandle; //!< A stereo handle, that corresponds to the device interface @@ -396,6 +398,17 @@ typedef enum _NvAPI_Status NVAPI_NO_VULKAN = -229, //!< OpenGL does not export Vulkan fake extensions NVAPI_REQUEST_PENDING = -230, //!< A request for NvTOPPs telemetry CData has already been made and is pending a response. NVAPI_RESOURCE_IN_USE = -231, //!< Operation cannot be performed because the resource is in use. + NVAPI_INVALID_IMAGE = -232, //!< Device kernel image is invalid + NVAPI_INVALID_PTX = -233, //!< PTX JIT compilation failed + NVAPI_NVLINK_UNCORRECTABLE = -234, //!< Uncorrectable NVLink error was detected during the execution + NVAPI_JIT_COMPILER_NOT_FOUND = -235, //!< PTX JIT compiler library was not found. + NVAPI_INVALID_SOURCE = -236, //!< Device kernel source is invalid. + NVAPI_ILLEGAL_INSTRUCTION = -237, //!< While executing a kernel, the device encountered an illegal instruction. + NVAPI_INVALID_PC = -238, //!< While executing a kernel, the device program counter wrapped its address space + NVAPI_LAUNCH_FAILED = -239, //!< An exception occurred on the device while executing a kernel + NVAPI_NOT_PERMITTED = -240, //!< Attempted operation is not permitted. + NVAPI_CALLBACK_ALREADY_REGISTERED = -241, //!< The callback function has already been registered. + NVAPI_CALLBACK_NOT_FOUND = -242, //!< The callback function is not found or not registered. } NvAPI_Status; /////////////////////////////////////////////////////////////////////////////// @@ -487,6 +500,7 @@ typedef NV_DISPLAY_DRIVER_MEMORY_INFO_V3 NV_DISPLAY_DRIVER_MEMORY_INFO; //! DESCRIPTION: This function retrieves the available driver memory footprint for the specified GPU. //! If the GPU is in TCC Mode, only dedicatedVideoMemory will be returned in pMemoryInfo (NV_DISPLAY_DRIVER_MEMORY_INFO). //! +//! \deprecated Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetMemoryInfoEx. //! SUPPORTED OS: Windows 7 and higher //! //! @@ -504,7 +518,64 @@ typedef NV_DISPLAY_DRIVER_MEMORY_INFO_V3 NV_DISPLAY_DRIVER_MEMORY_INFO; //! //! \ingroup driverapi /////////////////////////////////////////////////////////////////////////////// +__nvapi_deprecated_function("Do not use this function - it is deprecated in release 520. Instead, use NvAPI_GPU_GetMemoryInfoEx.") NVAPI_INTERFACE NvAPI_GPU_GetMemoryInfo(NvPhysicalGpuHandle hPhysicalGpu, NV_DISPLAY_DRIVER_MEMORY_INFO *pMemoryInfo); + + +//! \ingroup driverapi +//! Used in NvAPI_GPU_GetMemoryInfoEx(). +typedef struct +{ + NvU32 version; //!< Structure version + NvU64 dedicatedVideoMemory; //!< Size(in bytes) of the physical framebuffer. + NvU64 availableDedicatedVideoMemory; //!< Size(in bytes) of the available physical framebuffer for allocating video memory surfaces. + NvU64 systemVideoMemory; //!< Size(in bytes) of system memory the driver allocates at load time. + NvU64 sharedSystemMemory; //!< Size(in bytes) of shared system memory that driver is allowed to commit for surfaces across all allocations. + NvU64 curAvailableDedicatedVideoMemory; //!< Size(in bytes) of the current available physical framebuffer for allocating video memory surfaces. + NvU64 dedicatedVideoMemoryEvictionsSize; //!< Size(in bytes) of the total size of memory released as a result of the evictions. + NvU64 dedicatedVideoMemoryEvictionCount; //!< Indicates the number of eviction events that caused an allocation to be removed from dedicated video memory to free GPU + //!< video memory to make room for other allocations. + NvU64 dedicatedVideoMemoryPromotionsSize; //!< Size(in bytes) of the total size of memory allocated as a result of the promotions. + NvU64 dedicatedVideoMemoryPromotionCount; //!< Indicates the number of promotion events that caused an allocation to be promoted to dedicated video memory +} NV_GPU_MEMORY_INFO_EX_V1; + +//! \ingroup driverapi +typedef NV_GPU_MEMORY_INFO_EX_V1 NV_GPU_MEMORY_INFO_EX; + +//! \ingroup driverapi +//! Macro for constructing the version field of NV_GPU_MEMORY_INFO_EX_V1 +#define NV_GPU_MEMORY_INFO_EX_VER_1 MAKE_NVAPI_VERSION(NV_GPU_MEMORY_INFO_EX_V1,1) + +//! \ingroup driverapi +#define NV_GPU_MEMORY_INFO_EX_VER NV_GPU_MEMORY_INFO_EX_VER_1 + + + +/////////////////////////////////////////////////////////////////////////////// +// +// FUNCTION NAME: NvAPI_GPU_GetMemoryInfoEx +// +//! DESCRIPTION: This function retrieves the available driver memory footprint for the specified GPU. +//! If the GPU is in TCC Mode, only dedicatedVideoMemory will be returned in pMemoryInfo (NV_GPU_MEMORY_INFO_EX). +//! +//! SUPPORTED OS: Windows 10 and higher +//! +//! +//! TCC_SUPPORTED +//! +//! \since Release: 520 +//! +//! \param [in] hPhysicalGpu Handle of the physical GPU for which the memory information is to be extracted. +//! \param [out] pMemoryInfo The memory footprint available in the driver. See NV_GPU_MEMORY_INFO_EX. +//! +//! \retval NVAPI_INVALID_ARGUMENT pMemoryInfo is NULL. +//! \retval NVAPI_OK Call successful. +//! \retval NVAPI_NVIDIA_DEVICE_NOT_FOUND No NVIDIA GPU driving a display was found. +//! \retval NVAPI_INCOMPATIBLE_STRUCT_VERSION NV_GPU_MEMORY_INFO_EX structure version mismatch. +//! +//! \ingroup driverapi +/////////////////////////////////////////////////////////////////////////////// +NVAPI_INTERFACE NvAPI_GPU_GetMemoryInfoEx(NvPhysicalGpuHandle hPhysicalGpu, NV_GPU_MEMORY_INFO_EX *pMemoryInfo); /////////////////////////////////////////////////////////////////////////////// // // FUNCTION NAME: NvAPI_EnumPhysicalGPUs diff --git a/vendor/nvapi/x86/nvapi.lib b/vendor/nvapi/x86/nvapi.lib index e6ebdf106c..d0e4c62874 100644 Binary files a/vendor/nvapi/x86/nvapi.lib and b/vendor/nvapi/x86/nvapi.lib differ