Skip to content

Commit 07e0618

Browse files
committed
Merge tag 'amd-drm-fixes-6.10-2024-06-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-6.10-2024-06-19: amdgpu: - Fix display idle optimization race - Fix GPUVM TLB flush locking scope - IPS fix - GFX 9.4.3 harvesting fix - Runtime pm fix for shared buffers - DCN 3.5.x fixes - USB4 fix - RISC-V clang fix - Silence UBSAN warnings - MES11 fix - PSP 14.0.x fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240619223233.3116457-1-alexander.deucher@amd.com
2 parents a808013 + ed5a448 commit 07e0618

File tree

16 files changed

+192
-120
lines changed

16 files changed

+192
-120
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@
4141
#include <linux/dma-buf.h>
4242
#include <linux/dma-fence-array.h>
4343
#include <linux/pci-p2pdma.h>
44-
#include <linux/pm_runtime.h>
45-
#include "amdgpu_trace.h"
4644

4745
/**
4846
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
5856
struct drm_gem_object *obj = dmabuf->priv;
5957
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
6058
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
61-
int r;
6259

6360
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
6461
attach->peer2peer = false;
6562

66-
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
67-
trace_amdgpu_runpm_reference_dumps(1, __func__);
68-
if (r < 0)
69-
goto out;
70-
7163
return 0;
72-
73-
out:
74-
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
75-
trace_amdgpu_runpm_reference_dumps(0, __func__);
76-
return r;
77-
}
78-
79-
/**
80-
* amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
81-
*
82-
* @dmabuf: DMA-buf where we remove the attachment from
83-
* @attach: the attachment to remove
84-
*
85-
* Called when an attachment is removed from the DMA-buf.
86-
*/
87-
static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
88-
struct dma_buf_attachment *attach)
89-
{
90-
struct drm_gem_object *obj = dmabuf->priv;
91-
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
92-
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
93-
94-
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
95-
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
96-
trace_amdgpu_runpm_reference_dumps(0, __func__);
9764
}
9865

9966
/**
@@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
267234

268235
const struct dma_buf_ops amdgpu_dmabuf_ops = {
269236
.attach = amdgpu_dma_buf_attach,
270-
.detach = amdgpu_dma_buf_detach,
271237
.pin = amdgpu_dma_buf_pin,
272238
.unpin = amdgpu_dma_buf_unpin,
273239
.map_dma_buf = amdgpu_dma_buf_map,

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
181181
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
182182
seq, flags | AMDGPU_FENCE_FLAG_INT);
183183
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
184-
trace_amdgpu_runpm_reference_dumps(1, __func__);
185184
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
186185
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
187186
struct dma_fence *old;
@@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
309308
dma_fence_put(fence);
310309
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
311310
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
312-
trace_amdgpu_runpm_reference_dumps(0, __func__);
313311
} while (last_seq != seq);
314312

315313
return true;

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
684684
struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
685685
struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
686686
unsigned int ndw;
687-
signed long r;
687+
int r;
688688
uint32_t seq;
689689

690-
if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
691-
!down_read_trylock(&adev->reset_domain->sem)) {
690+
/*
691+
* A GPU reset should flush all TLBs anyway, so no need to do
692+
* this while one is ongoing.
693+
*/
694+
if (!down_read_trylock(&adev->reset_domain->sem))
695+
return 0;
692696

697+
if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
693698
if (adev->gmc.flush_tlb_needs_extra_type_2)
694699
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
695700
2, all_hub,
@@ -703,43 +708,40 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
703708
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
704709
flush_type, all_hub,
705710
inst);
706-
return 0;
707-
}
711+
r = 0;
712+
} else {
713+
/* 2 dwords flush + 8 dwords fence */
714+
ndw = kiq->pmf->invalidate_tlbs_size + 8;
708715

709-
/* 2 dwords flush + 8 dwords fence */
710-
ndw = kiq->pmf->invalidate_tlbs_size + 8;
716+
if (adev->gmc.flush_tlb_needs_extra_type_2)
717+
ndw += kiq->pmf->invalidate_tlbs_size;
711718

712-
if (adev->gmc.flush_tlb_needs_extra_type_2)
713-
ndw += kiq->pmf->invalidate_tlbs_size;
719+
if (adev->gmc.flush_tlb_needs_extra_type_0)
720+
ndw += kiq->pmf->invalidate_tlbs_size;
714721

715-
if (adev->gmc.flush_tlb_needs_extra_type_0)
716-
ndw += kiq->pmf->invalidate_tlbs_size;
722+
spin_lock(&adev->gfx.kiq[inst].ring_lock);
723+
amdgpu_ring_alloc(ring, ndw);
724+
if (adev->gmc.flush_tlb_needs_extra_type_2)
725+
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
717726

718-
spin_lock(&adev->gfx.kiq[inst].ring_lock);
719-
amdgpu_ring_alloc(ring, ndw);
720-
if (adev->gmc.flush_tlb_needs_extra_type_2)
721-
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
727+
if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
728+
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
722729

723-
if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
724-
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
730+
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
731+
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
732+
if (r) {
733+
amdgpu_ring_undo(ring);
734+
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
735+
goto error_unlock_reset;
736+
}
725737

726-
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
727-
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
728-
if (r) {
729-
amdgpu_ring_undo(ring);
738+
amdgpu_ring_commit(ring);
730739
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
731-
goto error_unlock_reset;
732-
}
733-
734-
amdgpu_ring_commit(ring);
735-
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
736-
r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
737-
if (r < 1) {
738-
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
739-
r = -ETIME;
740-
goto error_unlock_reset;
740+
if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
741+
dev_err(adev->dev, "timeout waiting for kiq fence\n");
742+
r = -ETIME;
743+
}
741744
}
742-
r = 0;
743745

744746
error_unlock_reset:
745747
up_read(&adev->reset_domain->sem);

drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
554554
__entry->value)
555555
);
556556

557-
TRACE_EVENT(amdgpu_runpm_reference_dumps,
558-
TP_PROTO(uint32_t index, const char *func),
559-
TP_ARGS(index, func),
560-
TP_STRUCT__entry(
561-
__field(uint32_t, index)
562-
__string(func, func)
563-
),
564-
TP_fast_assign(
565-
__entry->index = index;
566-
__assign_str(func);
567-
),
568-
TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
569-
__entry->index,
570-
__get_str(func))
571-
);
572557
#undef AMDGPU_JOB_GET_TIMELINE_NAME
573558
#endif
574559

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4195,9 +4195,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i
41954195
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
41964196
struct amdgpu_cu_info *cu_info)
41974197
{
4198-
int i, j, k, counter, xcc_id, active_cu_number = 0;
4199-
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4198+
int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0;
4199+
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp;
42004200
unsigned disable_masks[4 * 4];
4201+
bool is_symmetric_cus;
42014202

42024203
if (!adev || !cu_info)
42034204
return -EINVAL;
@@ -4215,6 +4216,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
42154216

42164217
mutex_lock(&adev->grbm_idx_mutex);
42174218
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
4219+
is_symmetric_cus = true;
42184220
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
42194221
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
42204222
mask = 1;
@@ -4242,6 +4244,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
42424244
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
42434245
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
42444246
}
4247+
if (i && is_symmetric_cus && prev_counter != counter)
4248+
is_symmetric_cus = false;
4249+
prev_counter = counter;
4250+
}
4251+
if (is_symmetric_cus) {
4252+
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG);
4253+
tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1);
4254+
tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1);
4255+
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp);
42454256
}
42464257
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
42474258
xcc_id);

drivers/gpu/drm/amd/amdgpu/mes_v11_0.c

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
154154
void *pkt, int size,
155155
int api_status_off)
156156
{
157-
int ndw = size / 4;
158-
signed long r;
159-
union MESAPI__MISC *x_pkt = pkt;
160-
struct MES_API_STATUS *api_status;
157+
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
158+
signed long timeout = 3000000; /* 3000 ms */
161159
struct amdgpu_device *adev = mes->adev;
162160
struct amdgpu_ring *ring = &mes->ring;
163-
unsigned long flags;
164-
signed long timeout = 3000000; /* 3000 ms */
161+
struct MES_API_STATUS *api_status;
162+
union MESAPI__MISC *x_pkt = pkt;
165163
const char *op_str, *misc_op_str;
166-
u32 fence_offset;
167-
u64 fence_gpu_addr;
168-
u64 *fence_ptr;
164+
unsigned long flags;
165+
u64 status_gpu_addr;
166+
u32 status_offset;
167+
u64 *status_ptr;
168+
signed long r;
169169
int ret;
170170

171171
if (x_pkt->header.opcode >= MES_SCH_API_MAX)
@@ -177,44 +177,55 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
177177
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
178178
timeout = 15 * 600 * 1000;
179179
}
180-
BUG_ON(size % 4 != 0);
181180

182-
ret = amdgpu_device_wb_get(adev, &fence_offset);
181+
ret = amdgpu_device_wb_get(adev, &status_offset);
183182
if (ret)
184183
return ret;
185-
fence_gpu_addr =
186-
adev->wb.gpu_addr + (fence_offset * 4);
187-
fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
188-
*fence_ptr = 0;
184+
185+
status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
186+
status_ptr = (u64 *)&adev->wb.wb[status_offset];
187+
*status_ptr = 0;
189188

190189
spin_lock_irqsave(&mes->ring_lock, flags);
191-
if (amdgpu_ring_alloc(ring, ndw)) {
192-
spin_unlock_irqrestore(&mes->ring_lock, flags);
193-
amdgpu_device_wb_free(adev, fence_offset);
194-
return -ENOMEM;
195-
}
190+
r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
191+
if (r)
192+
goto error_unlock_free;
196193

197194
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
198-
api_status->api_completion_fence_addr = fence_gpu_addr;
195+
api_status->api_completion_fence_addr = status_gpu_addr;
199196
api_status->api_completion_fence_value = 1;
200197

201-
amdgpu_ring_write_multiple(ring, pkt, ndw);
198+
amdgpu_ring_write_multiple(ring, pkt, size / 4);
199+
200+
memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
201+
mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
202+
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
203+
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
204+
mes_status_pkt.api_status.api_completion_fence_addr =
205+
ring->fence_drv.gpu_addr;
206+
mes_status_pkt.api_status.api_completion_fence_value =
207+
++ring->fence_drv.sync_seq;
208+
209+
amdgpu_ring_write_multiple(ring, &mes_status_pkt,
210+
sizeof(mes_status_pkt) / 4);
211+
202212
amdgpu_ring_commit(ring);
203213
spin_unlock_irqrestore(&mes->ring_lock, flags);
204214

205215
op_str = mes_v11_0_get_op_string(x_pkt);
206216
misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
207217

208218
if (misc_op_str)
209-
dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
219+
dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
220+
misc_op_str);
210221
else if (op_str)
211222
dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
212223
else
213-
dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
224+
dev_dbg(adev->dev, "MES msg=%d was emitted\n",
225+
x_pkt->header.opcode);
214226

215-
r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout);
216-
amdgpu_device_wb_free(adev, fence_offset);
217-
if (r < 1) {
227+
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
228+
if (r < 1 || !*status_ptr) {
218229

219230
if (misc_op_str)
220231
dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
@@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
229240
while (halt_if_hws_hang)
230241
schedule();
231242

232-
return -ETIMEDOUT;
243+
r = -ETIMEDOUT;
244+
goto error_wb_free;
233245
}
234246

247+
amdgpu_device_wb_free(adev, status_offset);
235248
return 0;
249+
250+
error_unlock_free:
251+
spin_unlock_irqrestore(&mes->ring_lock, flags);
252+
253+
error_wb_free:
254+
amdgpu_device_wb_free(adev, status_offset);
255+
return r;
236256
}
237257

238258
static int convert_to_mes_queue_type(int queue_type)

drivers/gpu/drm/amd/amdgpu/psp_v14_0.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
#include "mp/mp_14_0_2_sh_mask.h"
3333

3434
MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
35+
MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
3536
MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
37+
MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
3638

3739
/* For large FW files the time to complete can be very long */
3840
#define USBC_PD_POLLING_LIMIT_S 240
@@ -64,6 +66,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp)
6466
case IP_VERSION(14, 0, 2):
6567
case IP_VERSION(14, 0, 3):
6668
err = psp_init_sos_microcode(psp, ucode_prefix);
69+
if (err)
70+
return err;
71+
err = psp_init_ta_microcode(psp, ucode_prefix);
6772
if (err)
6873
return err;
6974
break;

drivers/gpu/drm/amd/display/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ config DRM_AMD_DC
88
depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
99
select SND_HDA_COMPONENT if SND_HDA_CORE
1010
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
11-
select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG)
11+
select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV))
1212
help
1313
Choose this option if you want to use the new display engine
1414
support for AMDGPU. This adds required support for Vega and

0 commit comments

Comments
 (0)