Skip to content

Commit 4f2442c

Browse files
committed
AGS 6.0
1 parent 4c48321 commit 4f2442c

38 files changed

+513
-234
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ In addition to the library itself, the AGS SDK includes several samples to demon
1111
<a href="https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK/releases/latest/"><img src="http://gpuopen-librariesandsdks.github.io/media/latest-release-button.svg" alt="Latest release" title="Latest release"></a>
1212
</div>
1313

14+
### What's new in AGS 6.0
15+
Version 6.0 introduces several new shader intrinsics, namely a DX12 ray tracing hit token for RDNA2 hardware for ray tracing optimisation, ReadLaneAt and explicit float conversions. There is also a change to the initialization API to make sure the AGS dll matches the header and calling code.
16+
1417
### What's new in AGS 5.4.2
1518
Version 5.4.2 reinstates the sharedMemoryInBytes field which is required when calculating the memory available on APUs.
1619

ags_lib/doc/amd_ags.chm

38.5 KB
Binary file not shown.

ags_lib/hlsl/ags_shader_intrinsics_dx12.hlsl

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderI
9595
#define AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize 0x19
9696
#define AmdExtD3DShaderIntrinsicsOpcode_BaseInstance 0x1a
9797
#define AmdExtD3DShaderIntrinsicsOpcode_BaseVertex 0x1b
98+
#define AmdExtD3DShaderIntrinsicsOpcode_FloatConversion 0x1c
99+
#define AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt 0x1d
98100

99101
/**
100102
***********************************************************************************************************************
@@ -253,6 +255,17 @@ RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderI
253255
#define AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64 0x07
254256
#define AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 0x08
255257

258+
/**
259+
***********************************************************************************************************************
260+
* AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions.
261+
* To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction
262+
***********************************************************************************************************************
263+
*/
264+
#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near 0x01
265+
#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf 0x02
266+
#define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf 0x03
267+
268+
256269
/**
257270
***********************************************************************************************************************
258271
* MakeAmdShaderIntrinsicsInstruction
@@ -1315,6 +1328,133 @@ uint AmdExtD3DShaderIntrinsics_GetBaseVertex()
13151328
return retVal;
13161329
}
13171330

1331+
1332+
1333+
/**
1334+
***********************************************************************************************************************
1335+
* AmdExtD3DShaderIntrinsics_ReadlaneAt : uint
1336+
*
1337+
* The following function is available if CheckSupport(AmdExtD3DShaderIntrinsicsSupport_ReadlaneAt) returned S_OK.
1338+
*
1339+
* Returns the value of the source for the given lane index within the specified wave. The lane index
1340+
* can be non-uniform across the wave.
1341+
*
1342+
***********************************************************************************************************************
1343+
*/
1344+
uint AmdExtD3DShaderIntrinsics_ReadlaneAt(uint src, uint laneId)
1345+
{
1346+
uint retVal;
1347+
1348+
uint instruction;
1349+
instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1350+
AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1351+
0);
1352+
AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, src, laneId, retVal);
1353+
1354+
return retVal;
1355+
}
1356+
1357+
/**
1358+
***********************************************************************************************************************
1359+
* AmdExtD3DShaderIntrinsics_ReadlaneAt : int
1360+
***********************************************************************************************************************
1361+
*/
1362+
int AmdExtD3DShaderIntrinsics_ReadlaneAt(int src, uint laneId)
1363+
{
1364+
uint retVal;
1365+
1366+
uint instruction;
1367+
instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1368+
AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1369+
0);
1370+
AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal);
1371+
1372+
return asint(retVal);
1373+
}
1374+
1375+
/**
1376+
***********************************************************************************************************************
1377+
* AmdExtD3DShaderIntrinsics_ReadlaneAt : float
1378+
***********************************************************************************************************************
1379+
*/
1380+
float AmdExtD3DShaderIntrinsics_ReadlaneAt(float src, uint laneId)
1381+
{
1382+
uint retVal;
1383+
1384+
uint instruction;
1385+
instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1386+
AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1387+
0);
1388+
AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(src), laneId, retVal);
1389+
1390+
return asfloat(retVal);
1391+
}
1392+
1393+
/**
1394+
***********************************************************************************************************************
1395+
* AmdExtD3DShaderIntrinsics_ConvertF32toF16
1396+
*
1397+
* The following functions are available if CheckSupport(AmdExtD3DShaderIntrinsicsSupport_FloatConversion) returned
1398+
* S_OK.
1399+
*
1400+
* Converts 32bit floating point numbers into 16bit floating point number using a specified rounding mode
1401+
*
1402+
* Available in all shader stages.
1403+
*
1404+
***********************************************************************************************************************
1405+
*/
1406+
1407+
/**
1408+
***********************************************************************************************************************
1409+
* AmdExtD3DShaderIntrinsics_ConvertF32toF16 - helper to convert f32 to f16 number
1410+
***********************************************************************************************************************
1411+
*/
1412+
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16(in uint convOp, in float3 val)
1413+
{
1414+
uint instruction = MakeAmdShaderIntrinsicsInstruction(AmdExtD3DShaderIntrinsicsOpcode_FloatConversion,
1415+
AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1416+
convOp);
1417+
1418+
uint3 retVal;
1419+
AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.x), 0, retVal.x);
1420+
AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.y), 0, retVal.y);
1421+
AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange(instruction, asuint(val.z), 0, retVal.z);
1422+
1423+
return retVal;
1424+
}
1425+
1426+
/**
1427+
***********************************************************************************************************************
1428+
* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using nearest rounding mode
1429+
***********************************************************************************************************************
1430+
*/
1431+
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16Near(in float3 inVec)
1432+
{
1433+
return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near, inVec);
1434+
}
1435+
1436+
/**
1437+
***********************************************************************************************************************
1438+
* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using -inf rounding mode
1439+
***********************************************************************************************************************
1440+
*/
1441+
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16NegInf(in float3 inVec)
1442+
{
1443+
return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf, inVec);
1444+
}
1445+
1446+
/**
1447+
***********************************************************************************************************************
1448+
* AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using +inf rounding mode
1449+
***********************************************************************************************************************
1450+
*/
1451+
uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16PosInf(in float3 inVec)
1452+
{
1453+
return AmdExtD3DShaderIntrinsics_ConvertF32toF16(AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf, inVec);
1454+
}
1455+
1456+
1457+
13181458
/**
13191459
***********************************************************************************************************************
13201460
* AmdExtD3DShaderIntrinsics_MakeAtomicInstructions
@@ -3756,4 +3896,98 @@ uint4 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint4 src)
37563896
}
37573897

37583898

3899+
#if defined (AGS_RAY_HIT_TOKEN)
3900+
3901+
//=====================================================================================================================
3902+
struct AmdExtRtHitToken
3903+
{
3904+
uint dword[2];
3905+
};
3906+
3907+
/**
3908+
***********************************************************************************************************************
3909+
* @brief
3910+
* AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
3911+
* that the dwords are already supplied in AmdExtRtHitTokenIn and only requires a call to intersect
3912+
* ray, bypassing the traversal of the acceleration structure.
3913+
***********************************************************************************************************************
3914+
*/
3915+
struct AmdExtRtHitTokenIn : AmdExtRtHitToken { };
3916+
3917+
/**
3918+
***********************************************************************************************************************
3919+
* @brief
3920+
* AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
3921+
* that the dwords must be patched into the payload after traversal. The application can store this
3922+
* data in a buffer which can then be used for hit group sorting so shading divergence can be avoided.
3923+
***********************************************************************************************************************
3924+
*/
3925+
struct AmdExtRtHitTokenOut : AmdExtRtHitToken { };
3926+
3927+
/**
3928+
***********************************************************************************************************************
3929+
* @brief
3930+
* Group shared memory reserved for temprary storage of hit tokens. Not intended to touched by the app shader.
3931+
* Application shader must only use the extension functions defined below to access the hit tokens
3932+
*
3933+
***********************************************************************************************************************
3934+
*/
3935+
groupshared AmdExtRtHitToken AmdHitToken;
3936+
3937+
/**
3938+
***********************************************************************************************************************
3939+
* @brief
3940+
* Accessor function to obtain the hit tokens from the last call to TraceRays(). The data returned by this
3941+
* function only guarantees valid values for the last call to TraceRays() prior to calling this function.
3942+
*
3943+
***********************************************************************************************************************
3944+
*/
3945+
uint2 AmdGetLastHitToken()
3946+
{
3947+
return uint2(AmdHitToken.dword[0], AmdHitToken.dword[1]);
3948+
}
3949+
3950+
/**
3951+
***********************************************************************************************************************
3952+
* @brief
3953+
* This function initialises hit tokens for subsequent TraceRays() call. Note, any TraceRay() that intends to use
3954+
* these hit tokens must include this function call in the same basic block. Applications can use a convenience macro
3955+
* defined below to enforce that.
3956+
*
3957+
***********************************************************************************************************************
3958+
*/
3959+
void AmdSetHitToken(uint2 token)
3960+
{
3961+
AmdHitToken.dword[0] = token.x;
3962+
AmdHitToken.dword[1] = token.y;
3963+
}
3964+
3965+
/**
3966+
***********************************************************************************************************************
3967+
* @brief
3968+
* Convenience macro for calling TraceRays that uses the hit token
3969+
*
3970+
***********************************************************************************************************************
3971+
*/
3972+
#define AmdTraceRay(accelStruct, \
3973+
rayFlags, \
3974+
instanceInclusionMask, \
3975+
rayContributionToHitGroupIndex, \
3976+
geometryMultiplier, \
3977+
missShaderIndex, \
3978+
ray, \
3979+
payload, \
3980+
token) \
3981+
AmdSetHitToken(token); \
3982+
TraceRay(accelStruct, \
3983+
rayFlags, \
3984+
instanceInclusionMask, \
3985+
rayContributionToHitGroupIndex, \
3986+
geometryMultiplier, \
3987+
missShaderIndex, \
3988+
ray, \
3989+
payload); \
3990+
3991+
#endif // AGS_RAY_HIT_TOKEN
3992+
37593993
#endif // _AMDEXTD3DSHADERINTRINICS_HLSL

0 commit comments

Comments
 (0)