Skip to content

Commit a639525

Browse files
committed
Merge tag 'amd-drm-fixes-6.8-2024-02-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-6.8-2024-02-01: amdgpu: - Fix reboot issue seen on some 7000 series dGPUs - Fix client init order for KFD - Misc display fixes - USB-C fix - DCN 3.5 fixes - Fix issues with GPU scheduler and GPU reset - GPU firmware loading fix - Misc fixes - GC 11.5 fix - VCN 4.0.5 fix - IH overflow fix amdkfd: - SVM fixes - Trap handler fix - Fix device permission lookup - Properly reserve BO before validating it Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240201184108.4923-1-alexander.deucher@amd.com
2 parents 111a3f0 + 6813cdc commit a639525

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+217
-225
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
141141
static const struct drm_client_funcs kfd_client_funcs = {
142142
.unregister = drm_client_release,
143143
};
144+
145+
int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
146+
{
147+
int ret;
148+
149+
if (!adev->kfd.init_complete)
150+
return 0;
151+
152+
ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
153+
&kfd_client_funcs);
154+
if (ret) {
155+
dev_err(adev->dev, "Failed to init DRM client: %d\n",
156+
ret);
157+
return ret;
158+
}
159+
160+
drm_client_register(&adev->kfd.client);
161+
162+
return 0;
163+
}
164+
144165
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
145166
{
146167
int i;
147168
int last_valid_bit;
148-
int ret;
149169

150170
amdgpu_amdkfd_gpuvm_init_mem_limits();
151171

@@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
164184
.enable_mes = adev->enable_mes,
165185
};
166186

167-
ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
168-
if (ret) {
169-
dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
170-
return;
171-
}
172-
173187
/* this is going to have a few of the MSBs set that we need to
174188
* clear
175189
*/
@@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
208222

209223
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
210224
&gpu_resources);
211-
if (adev->kfd.init_complete)
212-
drm_client_register(&adev->kfd.client);
213-
else
214-
drm_client_release(&adev->kfd.client);
215225

216226
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
217227

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
182182
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
183183
struct mm_struct *mm,
184184
struct svm_range_bo *svm_bo);
185+
186+
int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
185187
#if defined(CONFIG_DEBUG_FS)
186188
int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
187189
#endif
@@ -301,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
301303
struct kgd_mem *mem, void *drm_priv);
302304
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
303305
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
304-
void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
306+
int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
305307
int amdgpu_amdkfd_gpuvm_sync_memory(
306308
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
307309
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
290290
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
291291
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
292292

293-
if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
293+
if (!amdgpu_ring_sched_ready(ring))
294294
continue;
295295

296296
/* stop secheduler and drain ring. */

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,21 +2085,35 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
20852085
return ret;
20862086
}
20872087

2088-
void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
2088+
int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
20892089
{
20902090
struct kfd_mem_attachment *entry;
20912091
struct amdgpu_vm *vm;
2092+
int ret;
20922093

20932094
vm = drm_priv_to_vm(drm_priv);
20942095

20952096
mutex_lock(&mem->lock);
20962097

2098+
ret = amdgpu_bo_reserve(mem->bo, true);
2099+
if (ret)
2100+
goto out;
2101+
20972102
list_for_each_entry(entry, &mem->attachments, list) {
2098-
if (entry->bo_va->base.vm == vm)
2099-
kfd_mem_dmaunmap_attachment(mem, entry);
2103+
if (entry->bo_va->base.vm != vm)
2104+
continue;
2105+
if (entry->bo_va->base.bo->tbo.ttm &&
2106+
!entry->bo_va->base.bo->tbo.ttm->sg)
2107+
continue;
2108+
2109+
kfd_mem_dmaunmap_attachment(mem, entry);
21002110
}
21012111

2112+
amdgpu_bo_unreserve(mem->bo);
2113+
out:
21022114
mutex_unlock(&mem->lock);
2115+
2116+
return ret;
21032117
}
21042118

21052119
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
16781678
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
16791679
struct amdgpu_ring *ring = adev->rings[i];
16801680

1681-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
1681+
if (!amdgpu_ring_sched_ready(ring))
16821682
continue;
16831683
drm_sched_wqueue_stop(&ring->sched);
16841684
}
@@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
16941694
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
16951695
struct amdgpu_ring *ring = adev->rings[i];
16961696

1697-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
1697+
if (!amdgpu_ring_sched_ready(ring))
16981698
continue;
16991699
drm_sched_wqueue_start(&ring->sched);
17001700
}
@@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
19161916

19171917
ring = adev->rings[val];
19181918

1919-
if (!ring || !ring->funcs->preempt_ib ||
1920-
!drm_sched_wqueue_ready(&ring->sched))
1919+
if (!amdgpu_ring_sched_ready(ring) ||
1920+
!ring->funcs->preempt_ib)
19211921
return -EINVAL;
19221922

19231923
/* the last preemption failed */

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
41214121
}
41224122
}
41234123
} else {
4124-
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4125-
case IP_VERSION(13, 0, 0):
4126-
case IP_VERSION(13, 0, 7):
4127-
case IP_VERSION(13, 0, 10):
4128-
r = psp_gpu_reset(adev);
4129-
break;
4130-
default:
4131-
tmp = amdgpu_reset_method;
4132-
/* It should do a default reset when loading or reloading the driver,
4133-
* regardless of the module parameter reset_method.
4134-
*/
4135-
amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4136-
r = amdgpu_asic_reset(adev);
4137-
amdgpu_reset_method = tmp;
4138-
break;
4139-
}
4140-
4124+
tmp = amdgpu_reset_method;
4125+
/* It should do a default reset when loading or reloading the driver,
4126+
* regardless of the module parameter reset_method.
4127+
*/
4128+
amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4129+
r = amdgpu_asic_reset(adev);
4130+
amdgpu_reset_method = tmp;
41414131
if (r) {
41424132
dev_err(adev->dev, "asic reset on init failed\n");
41434133
goto failed;
@@ -5031,7 +5021,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
50315021
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
50325022
struct amdgpu_ring *ring = adev->rings[i];
50335023

5034-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5024+
if (!amdgpu_ring_sched_ready(ring))
50355025
continue;
50365026

50375027
spin_lock(&ring->sched.job_list_lock);
@@ -5170,7 +5160,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
51705160
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
51715161
struct amdgpu_ring *ring = adev->rings[i];
51725162

5173-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5163+
if (!amdgpu_ring_sched_ready(ring))
51745164
continue;
51755165

51765166
/* Clear job fence from fence drv to avoid force_completion
@@ -5637,7 +5627,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
56375627
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
56385628
struct amdgpu_ring *ring = tmp_adev->rings[i];
56395629

5640-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5630+
if (!amdgpu_ring_sched_ready(ring))
56415631
continue;
56425632

56435633
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5706,7 +5696,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
57065696
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
57075697
struct amdgpu_ring *ring = tmp_adev->rings[i];
57085698

5709-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5699+
if (!amdgpu_ring_sched_ready(ring))
57105700
continue;
57115701

57125702
drm_sched_start(&ring->sched, true);
@@ -6061,7 +6051,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
60616051
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
60626052
struct amdgpu_ring *ring = adev->rings[i];
60636053

6064-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6054+
if (!amdgpu_ring_sched_ready(ring))
60656055
continue;
60666056

60676057
drm_sched_stop(&ring->sched, NULL);
@@ -6189,7 +6179,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
61896179
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
61906180
struct amdgpu_ring *ring = adev->rings[i];
61916181

6192-
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6182+
if (!amdgpu_ring_sched_ready(ring))
61936183
continue;
61946184

61956185
drm_sched_start(&ring->sched, true);

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
22552255
if (ret)
22562256
goto err_pci;
22572257

2258+
ret = amdgpu_amdkfd_drm_client_create(adev);
2259+
if (ret)
2260+
goto err_pci;
2261+
22582262
/*
22592263
* 1. don't init fbdev on hw without DCE
22602264
* 2. don't init fbdev if there are no connectors

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
635635
ring->name);
636636

637637
ring->sched.ready = !r;
638+
638639
return r;
639640
}
640641

@@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
717718
if (ring->is_sw_ring)
718719
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
719720
}
721+
722+
bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
723+
{
724+
if (!ring)
725+
return false;
726+
727+
if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
728+
return false;
729+
730+
return true;
731+
}

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
450450
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
451451
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
452452
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
453-
453+
bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
454454
#endif

drivers/gpu/drm/amd/amdgpu/cik_ih.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
204204
tmp = RREG32(mmIH_RB_CNTL);
205205
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
206206
WREG32(mmIH_RB_CNTL, tmp);
207+
208+
/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
209+
* can be detected.
210+
*/
211+
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
212+
WREG32(mmIH_RB_CNTL, tmp);
207213
}
208214
return (wptr & ih->ptr_mask);
209215
}

0 commit comments

Comments
 (0)