Skip to content

Commit 8bc7558

Browse files
ChristianKoenigAMDalexdeucher
authored andcommitted
drm/amdgpu: workaround to avoid SET_Q_MODE packets v2
It turned out that executing the SET_Q_MODE packet on every submission creates to much overhead. Implement a workaround which allows skipping the SET_Q_MODE packet if subsequent submissions all use the same parameters. v2: add a NULL check for ring_obj Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent c68cbbf commit 8bc7558

File tree

2 files changed

+93
-16
lines changed

2 files changed

+93
-16
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,9 @@ struct amdgpu_ring {
285285
unsigned cond_exe_offs;
286286
u64 cond_exe_gpu_addr;
287287
volatile u32 *cond_exe_cpu_addr;
288+
unsigned int set_q_mode_offs;
289+
volatile u32 *set_q_mode_ptr;
290+
u64 set_q_mode_token;
288291
unsigned vm_hub;
289292
unsigned vm_inv_eng;
290293
struct dma_fence *vmid_wait;

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5461,6 +5461,11 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
54615461
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
54625462
amdgpu_ring_write(ring, 0x0);
54635463
}
5464+
5465+
/* Make sure that we can't skip the SET_Q_MODE packets when the VM
5466+
* changed in any way.
5467+
*/
5468+
ring->set_q_mode_ptr = NULL;
54645469
}
54655470

54665471
static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
@@ -5510,16 +5515,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
55105515
amdgpu_ring_write(ring, 0);
55115516
}
55125517

5518+
static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5519+
uint64_t addr)
5520+
{
5521+
unsigned ret;
5522+
5523+
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5524+
amdgpu_ring_write(ring, lower_32_bits(addr));
5525+
amdgpu_ring_write(ring, upper_32_bits(addr));
5526+
/* discard following DWs if *cond_exec_gpu_addr==0 */
5527+
amdgpu_ring_write(ring, 0);
5528+
ret = ring->wptr & ring->buf_mask;
5529+
/* patch dummy value later */
5530+
amdgpu_ring_write(ring, 0);
5531+
5532+
return ret;
5533+
}
5534+
55135535
static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
55145536
u64 shadow_va, u64 csa_va,
55155537
u64 gds_va, bool init_shadow,
55165538
int vmid)
55175539
{
55185540
struct amdgpu_device *adev = ring->adev;
5541+
unsigned int offs, end;
55195542

5520-
if (!adev->gfx.cp_gfx_shadow)
5543+
if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
55215544
return;
55225545

5546+
/*
5547+
* The logic here isn't easy to understand because we need to keep state
5548+
* accross multiple executions of the function as well as between the
5549+
* CPU and GPU. The general idea is that the newly written GPU command
5550+
* has a condition on the previous one and only executed if really
5551+
* necessary.
5552+
*/
5553+
5554+
/*
5555+
* The dw in the NOP controls if the next SET_Q_MODE packet should be
5556+
* executed or not. Reserve 64bits just to be on the save side.
5557+
*/
5558+
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
5559+
offs = ring->wptr & ring->buf_mask;
5560+
5561+
/*
5562+
* We start with skipping the prefix SET_Q_MODE and always executing
5563+
* the postfix SET_Q_MODE packet. This is changed below with a
5564+
* WRITE_DATA command when the postfix executed.
5565+
*/
5566+
amdgpu_ring_write(ring, shadow_va ? 1 : 0);
5567+
amdgpu_ring_write(ring, 0);
5568+
5569+
if (ring->set_q_mode_offs) {
5570+
uint64_t addr;
5571+
5572+
addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5573+
addr += ring->set_q_mode_offs << 2;
5574+
end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
5575+
}
5576+
5577+
/*
5578+
* When the postfix SET_Q_MODE packet executes we need to make sure that the
5579+
* next prefix SET_Q_MODE packet executes as well.
5580+
*/
5581+
if (!shadow_va) {
5582+
uint64_t addr;
5583+
5584+
addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5585+
addr += offs << 2;
5586+
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5587+
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5588+
amdgpu_ring_write(ring, lower_32_bits(addr));
5589+
amdgpu_ring_write(ring, upper_32_bits(addr));
5590+
amdgpu_ring_write(ring, 0x1);
5591+
}
5592+
55235593
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
55245594
amdgpu_ring_write(ring, lower_32_bits(shadow_va));
55255595
amdgpu_ring_write(ring, upper_32_bits(shadow_va));
@@ -5531,23 +5601,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
55315601
PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
55325602
amdgpu_ring_write(ring, init_shadow ?
55335603
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5534-
}
55355604

5536-
static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5537-
uint64_t addr)
5538-
{
5539-
unsigned ret;
5605+
if (ring->set_q_mode_offs)
5606+
amdgpu_ring_patch_cond_exec(ring, end);
55405607

5541-
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5542-
amdgpu_ring_write(ring, lower_32_bits(addr));
5543-
amdgpu_ring_write(ring, upper_32_bits(addr));
5544-
/* discard following DWs if *cond_exec_gpu_addr==0 */
5545-
amdgpu_ring_write(ring, 0);
5546-
ret = ring->wptr & ring->buf_mask;
5547-
/* patch dummy value later */
5548-
amdgpu_ring_write(ring, 0);
5608+
if (shadow_va) {
5609+
uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
55495610

5550-
return ret;
5611+
/*
5612+
* If the tokens match try to skip the last postfix SET_Q_MODE
5613+
* packet to avoid saving/restoring the state all the time.
5614+
*/
5615+
if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
5616+
*ring->set_q_mode_ptr = 0;
5617+
5618+
ring->set_q_mode_token = token;
5619+
} else {
5620+
ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
5621+
}
5622+
5623+
ring->set_q_mode_offs = offs;
55515624
}
55525625

55535626
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
@@ -6114,7 +6187,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
61146187
.emit_frame_size = /* totally 247 maximum if 16 IBs */
61156188
5 + /* update_spm_vmid */
61166189
5 + /* COND_EXEC */
6117-
9 + /* SET_Q_PREEMPTION_MODE */
6190+
22 + /* SET_Q_PREEMPTION_MODE */
61186191
7 + /* PIPELINE_SYNC */
61196192
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
61206193
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
@@ -6127,6 +6200,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
61276200
31 + /* DE_META */
61286201
3 + /* CNTX_CTRL */
61296202
5 + /* HDP_INVL */
6203+
22 + /* SET_Q_PREEMPTION_MODE */
61306204
8 + 8 + /* FENCE x2 */
61316205
8, /* gfx_v11_0_emit_mem_sync */
61326206
.emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */

0 commit comments

Comments
 (0)