@@ -95,6 +95,8 @@ RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderI
95
95
#define AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize 0x19
96
96
#define AmdExtD3DShaderIntrinsicsOpcode_BaseInstance 0x1a
97
97
#define AmdExtD3DShaderIntrinsicsOpcode_BaseVertex 0x1b
98
+ #define AmdExtD3DShaderIntrinsicsOpcode_FloatConversion 0x1c
99
+ #define AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt 0x1d
98
100
99
101
/**
100
102
***********************************************************************************************************************
@@ -253,6 +255,17 @@ RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderI
253
255
#define AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64 0x07
254
256
#define AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 0x08
255
257
258
+ /**
259
+ ***********************************************************************************************************************
260
+ * AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions.
261
+ * To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction
262
+ ***********************************************************************************************************************
263
+ */
264
+ #define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near 0x01
265
+ #define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf 0x02
266
+ #define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf 0x03
267
+
268
+
256
269
/**
257
270
***********************************************************************************************************************
258
271
* MakeAmdShaderIntrinsicsInstruction
@@ -1315,6 +1328,133 @@ uint AmdExtD3DShaderIntrinsics_GetBaseVertex()
1315
1328
return retVal;
1316
1329
}
1317
1330
1331
+
1332
+
1333
+ /**
1334
+ ***********************************************************************************************************************
1335
+ * AmdExtD3DShaderIntrinsics_ReadlaneAt : uint
1336
+ *
1337
+ * The following function is available if CheckSupport(AmdExtD3DShaderIntrinsicsSupport_ReadlaneAt) returned S_OK.
1338
+ *
1339
+ * Returns the value of the source for the given lane index within the specified wave. The lane index
1340
+ * can be non-uniform across the wave.
1341
+ *
1342
+ ***********************************************************************************************************************
1343
+ */
1344
+ uint AmdExtD3DShaderIntrinsics_ReadlaneAt (uint src, uint laneId)
1345
+ {
1346
+ uint retVal;
1347
+
1348
+ uint instruction;
1349
+ instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1350
+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1351
+ 0 );
1352
+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, src, laneId, retVal);
1353
+
1354
+ return retVal;
1355
+ }
1356
+
1357
+ /**
1358
+ ***********************************************************************************************************************
1359
+ * AmdExtD3DShaderIntrinsics_ReadlaneAt : int
1360
+ ***********************************************************************************************************************
1361
+ */
1362
+ int AmdExtD3DShaderIntrinsics_ReadlaneAt (int src, uint laneId)
1363
+ {
1364
+ uint retVal;
1365
+
1366
+ uint instruction;
1367
+ instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1368
+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1369
+ 0 );
1370
+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (src), laneId, retVal);
1371
+
1372
+ return asint (retVal);
1373
+ }
1374
+
1375
+ /**
1376
+ ***********************************************************************************************************************
1377
+ * AmdExtD3DShaderIntrinsics_ReadlaneAt : float
1378
+ ***********************************************************************************************************************
1379
+ */
1380
+ float AmdExtD3DShaderIntrinsics_ReadlaneAt (float src, uint laneId)
1381
+ {
1382
+ uint retVal;
1383
+
1384
+ uint instruction;
1385
+ instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1386
+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1387
+ 0 );
1388
+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (src), laneId, retVal);
1389
+
1390
+ return asfloat (retVal);
1391
+ }
1392
+
1393
+ /**
1394
+ ***********************************************************************************************************************
1395
+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16
1396
+ *
1397
+ * The following functions are available if CheckSupport(AmdExtD3DShaderIntrinsicsSupport_FloatConversion) returned
1398
+ * S_OK.
1399
+ *
1400
+ * Converts 32bit floating point numbers into 16bit floating point number using a specified rounding mode
1401
+ *
1402
+ * Available in all shader stages.
1403
+ *
1404
+ ***********************************************************************************************************************
1405
+ */
1406
+
1407
+ /**
1408
+ ***********************************************************************************************************************
1409
+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16 - helper to convert f32 to f16 number
1410
+ ***********************************************************************************************************************
1411
+ */
1412
+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16 (in uint convOp, in float3 val)
1413
+ {
1414
+ uint instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_FloatConversion,
1415
+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1416
+ convOp);
1417
+
1418
+ uint3 retVal;
1419
+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (val.x), 0 , retVal.x);
1420
+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (val.y), 0 , retVal.y);
1421
+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (val.z), 0 , retVal.z);
1422
+
1423
+ return retVal;
1424
+ }
1425
+
1426
+ /**
1427
+ ***********************************************************************************************************************
1428
+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using nearest rounding mode
1429
+ ***********************************************************************************************************************
1430
+ */
1431
+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16Near (in float3 inVec)
1432
+ {
1433
+ return AmdExtD3DShaderIntrinsics_ConvertF32toF16 (AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near, inVec);
1434
+ }
1435
+
1436
+ /**
1437
+ ***********************************************************************************************************************
1438
+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using -inf rounding mode
1439
+ ***********************************************************************************************************************
1440
+ */
1441
+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16NegInf (in float3 inVec)
1442
+ {
1443
+ return AmdExtD3DShaderIntrinsics_ConvertF32toF16 (AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf, inVec);
1444
+ }
1445
+
1446
+ /**
1447
+ ***********************************************************************************************************************
1448
+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using +inf rounding mode
1449
+ ***********************************************************************************************************************
1450
+ */
1451
+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16PosInf (in float3 inVec)
1452
+ {
1453
+ return AmdExtD3DShaderIntrinsics_ConvertF32toF16 (AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf, inVec);
1454
+ }
1455
+
1456
+
1457
+
1318
1458
/**
1319
1459
***********************************************************************************************************************
1320
1460
* AmdExtD3DShaderIntrinsics_MakeAtomicInstructions
@@ -3756,4 +3896,98 @@ uint4 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint4 src)
3756
3896
}
3757
3897
3758
3898
3899
+ #if defined (AGS_RAY_HIT_TOKEN)
3900
+
3901
+ //=====================================================================================================================
3902
+ struct AmdExtRtHitToken
3903
+ {
3904
+ uint dword [2 ];
3905
+ };
3906
+
3907
+ /**
3908
+ ***********************************************************************************************************************
3909
+ * @brief
3910
+ * AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
3911
+ * that the dwords are already supplied in AmdExtRtHitTokenIn and only requires a call to intersect
3912
+ * ray, bypassing the traversal of the acceleration structure.
3913
+ ***********************************************************************************************************************
3914
+ */
3915
+ struct AmdExtRtHitTokenIn : AmdExtRtHitToken { };
3916
+
3917
+ /**
3918
+ ***********************************************************************************************************************
3919
+ * @brief
3920
+ * AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
3921
+ * that the dwords must be patched into the payload after traversal. The application can store this
3922
+ * data in a buffer which can then be used for hit group sorting so shading divergence can be avoided.
3923
+ ***********************************************************************************************************************
3924
+ */
3925
+ struct AmdExtRtHitTokenOut : AmdExtRtHitToken { };
3926
+
3927
+ /**
3928
+ ***********************************************************************************************************************
3929
+ * @brief
3930
+ * Group shared memory reserved for temprary storage of hit tokens. Not intended to touched by the app shader.
3931
+ * Application shader must only use the extension functions defined below to access the hit tokens
3932
+ *
3933
+ ***********************************************************************************************************************
3934
+ */
3935
+ groupshared AmdExtRtHitToken AmdHitToken;
3936
+
3937
+ /**
3938
+ ***********************************************************************************************************************
3939
+ * @brief
3940
+ * Accessor function to obtain the hit tokens from the last call to TraceRays(). The data returned by this
3941
+ * function only guarantees valid values for the last call to TraceRays() prior to calling this function.
3942
+ *
3943
+ ***********************************************************************************************************************
3944
+ */
3945
+ uint2 AmdGetLastHitToken ()
3946
+ {
3947
+ return uint2 (AmdHitToken.dword [0 ], AmdHitToken.dword [1 ]);
3948
+ }
3949
+
3950
+ /**
3951
+ ***********************************************************************************************************************
3952
+ * @brief
3953
+ * This function initialises hit tokens for subsequent TraceRays() call. Note, any TraceRay() that intends to use
3954
+ * these hit tokens must include this function call in the same basic block. Applications can use a convenience macro
3955
+ * defined below to enforce that.
3956
+ *
3957
+ ***********************************************************************************************************************
3958
+ */
3959
+ void AmdSetHitToken (uint2 token)
3960
+ {
3961
+ AmdHitToken.dword [0 ] = token.x;
3962
+ AmdHitToken.dword [1 ] = token.y;
3963
+ }
3964
+
3965
+ /**
3966
+ ***********************************************************************************************************************
3967
+ * @brief
3968
+ * Convenience macro for calling TraceRays that uses the hit token
3969
+ *
3970
+ ***********************************************************************************************************************
3971
+ */
3972
+ #define AmdTraceRay (accelStruct, \
3973
+ rayFlags, \
3974
+ instanceInclusionMask, \
3975
+ rayContributionToHitGroupIndex, \
3976
+ geometryMultiplier, \
3977
+ missShaderIndex, \
3978
+ ray, \
3979
+ payload, \
3980
+ token) \
3981
+ AmdSetHitToken (token); \
3982
+ TraceRay (accelStruct, \
3983
+ rayFlags, \
3984
+ instanceInclusionMask, \
3985
+ rayContributionToHitGroupIndex, \
3986
+ geometryMultiplier, \
3987
+ missShaderIndex, \
3988
+ ray, \
3989
+ payload); \
3990
+
3991
+ #endif // AGS_RAY_HIT_TOKEN
3992
+
3759
3993
#endif // _AMDEXTD3DSHADERINTRINICS_HLSL
0 commit comments