Skip to content

Commit 949d817

Browse files
srishanmalexdeucher
authored andcommitted
drm/amdgpu/gfx11: Add cleaner shader for GFX11.0.3
This commit adds the cleaner shader microcode for GFX11.0.3 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_11_0_3_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. Cc: Christian König <christian.koenig@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Suggested-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 6bfe777 commit 949d817

File tree

3 files changed

+191
-0
lines changed

3 files changed

+191
-0
lines changed

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "clearstate_gfx11.h"
4747
#include "v11_structs.h"
4848
#include "gfx_v11_0.h"
49+
#include "gfx_v11_0_cleaner_shader.h"
4950
#include "gfx_v11_0_3.h"
5051
#include "nbio_v4_3.h"
5152
#include "mes_v11_0.h"
@@ -1579,8 +1580,24 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
15791580
}
15801581

15811582
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1583+
case IP_VERSION(11, 0, 3):
1584+
adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1585+
adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1586+
if (adev->gfx.me_fw_version >= 2280 &&
1587+
adev->gfx.pfp_fw_version >= 2370 &&
1588+
adev->gfx.mec_fw_version >= 2450 &&
1589+
adev->mes.fw_version[0] >= 99) {
1590+
adev->gfx.enable_cleaner_shader = true;
1591+
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1592+
if (r) {
1593+
adev->gfx.enable_cleaner_shader = false;
1594+
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1595+
}
1596+
}
1597+
break;
15821598
default:
15831599
adev->gfx.enable_cleaner_shader = false;
1600+
break;
15841601
}
15851602

15861603
/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright 2024 Advanced Micro Devices, Inc.
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a
6+
* copy of this software and associated documentation files (the "Software"),
7+
* to deal in the Software without restriction, including without limitation
8+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
* and/or sell copies of the Software, and to permit persons to whom the
10+
* Software is furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in
13+
* all copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21+
* OTHER DEALINGS IN THE SOFTWARE.
22+
*/
23+
24+
// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
25+
//To turn this shader program on for complitaion change this to main and lower shader main to main_1
26+
27+
// Navi3 : Clear SGPRs, VGPRs and LDS
28+
// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
29+
// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
30+
// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
31+
// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
32+
// Each wave clears SGPRs 0 - 107
33+
// Each wave clears VGPRs 0 - 63
34+
// The first wave of the workgroup clears its 64KB of LDS
35+
// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
36+
// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
37+
38+
shader main
39+
asic(GFX11)
40+
type(CS)
41+
wave_size(32)
42+
// Note: original source code from SQ team
43+
44+
// Takes about 2500 clocks to run.
45+
// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
46+
//
47+
S_BARRIER
48+
49+
//
50+
// CLEAR VGPRs
51+
//
52+
s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance)
53+
54+
label_0005:
55+
v_movreld_b32 v0, 0
56+
v_movreld_b32 v1, 0
57+
v_movreld_b32 v2, 0
58+
v_movreld_b32 v3, 0
59+
v_movreld_b32 v4, 0
60+
v_movreld_b32 v5, 0
61+
v_movreld_b32 v6, 0
62+
v_movreld_b32 v7, 0
63+
s_sub_u32 m0, m0, 8
64+
s_cbranch_scc0 label_0005
65+
//
66+
//
67+
68+
s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
69+
s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
70+
s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
71+
// CLEAR LDS
72+
//
73+
s_mov_b32 exec_lo, 0xffffffff
74+
s_mov_b32 exec_hi, 0xffffffff
75+
v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
76+
v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
77+
v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
78+
s_mov_b32 s2, 0x00000003f // 64 loop iterations
79+
s_mov_b32 m0, 0xffffffff
80+
// Clear all of LDS space
81+
// Each FirstWave of WorkGroup clears 64kbyte block
82+
83+
label_001F:
84+
ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
85+
ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
86+
v_add_co_u32 v1, vcc, 0x00000400, v1
87+
s_sub_u32 s2, s2, 1
88+
s_cbranch_scc0 label_001F
89+
//
90+
// CLEAR SGPRs
91+
//
92+
label_0023:
93+
s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
94+
label_sgpr_loop:
95+
s_movreld_b32 s0, 0
96+
s_movreld_b32 s1, 0
97+
s_movreld_b32 s2, 0
98+
s_movreld_b32 s3, 0
99+
s_sub_u32 m0, m0, 4
100+
s_cbranch_scc0 label_sgpr_loop
101+
102+
//clear vcc
103+
s_mov_b64 vcc, 0 //clear vcc
104+
s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
105+
s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
106+
s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
107+
s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
108+
s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
109+
s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
110+
s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
111+
s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
112+
s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
113+
s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
114+
115+
s_endpgm
116+
117+
end
118+
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright 2024 Advanced Micro Devices, Inc.
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a
6+
* copy of this software and associated documentation files (the "Software"),
7+
* to deal in the Software without restriction, including without limitation
8+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
* and/or sell copies of the Software, and to permit persons to whom the
10+
* Software is furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in
13+
* all copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21+
* OTHER DEALINGS IN THE SOFTWARE.
22+
*/
23+
24+
/* Define the cleaner shader gfx_11_0_3 */
25+
static const u32 gfx_11_0_3_cleaner_shader_hex[] = {
26+
0xb0804006, 0xbe8200ff,
27+
0x00000058, 0xbefd0080,
28+
0x7e008480, 0x7e028480,
29+
0x7e048480, 0x7e068480,
30+
0x7e088480, 0x7e0a8480,
31+
0x7e0c8480, 0x7e0e8480,
32+
0xbefd0002, 0x80828802,
33+
0xbfa1fff5, 0xbe8200ff,
34+
0x80000000, 0x8b020002,
35+
0xbfa10012, 0xbefe00c1,
36+
0xbeff00c1, 0xd71f0001,
37+
0x0001007f, 0xd7200001,
38+
0x0002027e, 0x16020288,
39+
0xbe8200bf, 0xbefd00c1,
40+
0xd9382000, 0x00020201,
41+
0xd9386040, 0x00040401,
42+
0xd7006a01, 0x000202ff,
43+
0x00000400, 0x80828102,
44+
0xbfa1fff7, 0xbefd00ff,
45+
0x00000068, 0xbe804280,
46+
0xbe814280, 0xbe824280,
47+
0xbe834280, 0x80fd847d,
48+
0xbfa1fffa, 0xbeea0180,
49+
0xbeec0180, 0xbeee0180,
50+
0xbef00180, 0xbef20180,
51+
0xbef40180, 0xbef60180,
52+
0xbef80180, 0xbefa0180,
53+
0xbfb00000, 0xbf9f0000,
54+
0xbf9f0000, 0xbf9f0000,
55+
0xbf9f0000, 0xbf9f0000,
56+
};

0 commit comments

Comments
 (0)