@@ -5461,6 +5461,11 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5461
5461
amdgpu_ring_write (ring , PACKET3 (PACKET3_PFP_SYNC_ME , 0 ));
5462
5462
amdgpu_ring_write (ring , 0x0 );
5463
5463
}
5464
+
5465
+ /* Make sure that we can't skip the SET_Q_MODE packets when the VM
5466
+ * changed in any way.
5467
+ */
5468
+ ring -> set_q_mode_ptr = NULL ;
5464
5469
}
5465
5470
5466
5471
static void gfx_v11_0_ring_emit_fence_kiq (struct amdgpu_ring * ring , u64 addr ,
@@ -5510,16 +5515,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5510
5515
amdgpu_ring_write (ring , 0 );
5511
5516
}
5512
5517
5518
+ static unsigned gfx_v11_0_ring_emit_init_cond_exec (struct amdgpu_ring * ring ,
5519
+ uint64_t addr )
5520
+ {
5521
+ unsigned ret ;
5522
+
5523
+ amdgpu_ring_write (ring , PACKET3 (PACKET3_COND_EXEC , 3 ));
5524
+ amdgpu_ring_write (ring , lower_32_bits (addr ));
5525
+ amdgpu_ring_write (ring , upper_32_bits (addr ));
5526
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
5527
+ amdgpu_ring_write (ring , 0 );
5528
+ ret = ring -> wptr & ring -> buf_mask ;
5529
+ /* patch dummy value later */
5530
+ amdgpu_ring_write (ring , 0 );
5531
+
5532
+ return ret ;
5533
+ }
5534
+
5513
5535
static void gfx_v11_0_ring_emit_gfx_shadow (struct amdgpu_ring * ring ,
5514
5536
u64 shadow_va , u64 csa_va ,
5515
5537
u64 gds_va , bool init_shadow ,
5516
5538
int vmid )
5517
5539
{
5518
5540
struct amdgpu_device * adev = ring -> adev ;
5541
+ unsigned int offs , end ;
5519
5542
5520
- if (!adev -> gfx .cp_gfx_shadow )
5543
+ if (!adev -> gfx .cp_gfx_shadow || ! ring -> ring_obj )
5521
5544
return ;
5522
5545
5546
+ /*
5547
+ * The logic here isn't easy to understand because we need to keep state
5548
+ * accross multiple executions of the function as well as between the
5549
+ * CPU and GPU. The general idea is that the newly written GPU command
5550
+ * has a condition on the previous one and only executed if really
5551
+ * necessary.
5552
+ */
5553
+
5554
+ /*
5555
+ * The dw in the NOP controls if the next SET_Q_MODE packet should be
5556
+ * executed or not. Reserve 64bits just to be on the save side.
5557
+ */
5558
+ amdgpu_ring_write (ring , PACKET3 (PACKET3_NOP , 1 ));
5559
+ offs = ring -> wptr & ring -> buf_mask ;
5560
+
5561
+ /*
5562
+ * We start with skipping the prefix SET_Q_MODE and always executing
5563
+ * the postfix SET_Q_MODE packet. This is changed below with a
5564
+ * WRITE_DATA command when the postfix executed.
5565
+ */
5566
+ amdgpu_ring_write (ring , shadow_va ? 1 : 0 );
5567
+ amdgpu_ring_write (ring , 0 );
5568
+
5569
+ if (ring -> set_q_mode_offs ) {
5570
+ uint64_t addr ;
5571
+
5572
+ addr = amdgpu_bo_gpu_offset (ring -> ring_obj );
5573
+ addr += ring -> set_q_mode_offs << 2 ;
5574
+ end = gfx_v11_0_ring_emit_init_cond_exec (ring , addr );
5575
+ }
5576
+
5577
+ /*
5578
+ * When the postfix SET_Q_MODE packet executes we need to make sure that the
5579
+ * next prefix SET_Q_MODE packet executes as well.
5580
+ */
5581
+ if (!shadow_va ) {
5582
+ uint64_t addr ;
5583
+
5584
+ addr = amdgpu_bo_gpu_offset (ring -> ring_obj );
5585
+ addr += offs << 2 ;
5586
+ amdgpu_ring_write (ring , PACKET3 (PACKET3_WRITE_DATA , 3 ));
5587
+ amdgpu_ring_write (ring , WRITE_DATA_DST_SEL (5 ) | WR_CONFIRM );
5588
+ amdgpu_ring_write (ring , lower_32_bits (addr ));
5589
+ amdgpu_ring_write (ring , upper_32_bits (addr ));
5590
+ amdgpu_ring_write (ring , 0x1 );
5591
+ }
5592
+
5523
5593
amdgpu_ring_write (ring , PACKET3 (PACKET3_SET_Q_PREEMPTION_MODE , 7 ));
5524
5594
amdgpu_ring_write (ring , lower_32_bits (shadow_va ));
5525
5595
amdgpu_ring_write (ring , upper_32_bits (shadow_va ));
@@ -5531,23 +5601,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5531
5601
PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID (vmid ) : 0 );
5532
5602
amdgpu_ring_write (ring , init_shadow ?
5533
5603
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0 );
5534
- }
5535
5604
5536
- static unsigned gfx_v11_0_ring_emit_init_cond_exec (struct amdgpu_ring * ring ,
5537
- uint64_t addr )
5538
- {
5539
- unsigned ret ;
5605
+ if (ring -> set_q_mode_offs )
5606
+ amdgpu_ring_patch_cond_exec (ring , end );
5540
5607
5541
- amdgpu_ring_write (ring , PACKET3 (PACKET3_COND_EXEC , 3 ));
5542
- amdgpu_ring_write (ring , lower_32_bits (addr ));
5543
- amdgpu_ring_write (ring , upper_32_bits (addr ));
5544
- /* discard following DWs if *cond_exec_gpu_addr==0 */
5545
- amdgpu_ring_write (ring , 0 );
5546
- ret = ring -> wptr & ring -> buf_mask ;
5547
- /* patch dummy value later */
5548
- amdgpu_ring_write (ring , 0 );
5608
+ if (shadow_va ) {
5609
+ uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid ;
5549
5610
5550
- return ret ;
5611
+ /*
5612
+ * If the tokens match try to skip the last postfix SET_Q_MODE
5613
+ * packet to avoid saving/restoring the state all the time.
5614
+ */
5615
+ if (ring -> set_q_mode_ptr && ring -> set_q_mode_token == token )
5616
+ * ring -> set_q_mode_ptr = 0 ;
5617
+
5618
+ ring -> set_q_mode_token = token ;
5619
+ } else {
5620
+ ring -> set_q_mode_ptr = & ring -> ring [ring -> set_q_mode_offs ];
5621
+ }
5622
+
5623
+ ring -> set_q_mode_offs = offs ;
5551
5624
}
5552
5625
5553
5626
static int gfx_v11_0_ring_preempt_ib (struct amdgpu_ring * ring )
@@ -6114,7 +6187,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6114
6187
.emit_frame_size = /* totally 247 maximum if 16 IBs */
6115
6188
5 + /* update_spm_vmid */
6116
6189
5 + /* COND_EXEC */
6117
- 9 + /* SET_Q_PREEMPTION_MODE */
6190
+ 22 + /* SET_Q_PREEMPTION_MODE */
6118
6191
7 + /* PIPELINE_SYNC */
6119
6192
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6120
6193
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
@@ -6127,6 +6200,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6127
6200
31 + /* DE_META */
6128
6201
3 + /* CNTX_CTRL */
6129
6202
5 + /* HDP_INVL */
6203
+ 22 + /* SET_Q_PREEMPTION_MODE */
6130
6204
8 + 8 + /* FENCE x2 */
6131
6205
8 , /* gfx_v11_0_emit_mem_sync */
6132
6206
.emit_ib_size = 4 , /* gfx_v11_0_ring_emit_ib_gfx */
0 commit comments