Skip to content

Commit 3666ed8

Browse files
jcornwallAMDalexdeucher
authored andcommitted
drm/amdgpu: Increase KIQ invalidate_tlbs timeout
KIQ invalidate_tlbs request has been seen to marginally exceed the configured 100 ms timeout on systems under load. All other KIQ requests in the driver use a 10 second timeout. Use a similar timeout implementation on the invalidate_tlbs path. v2: Poll once before msleep v3: Fix return value Signed-off-by: Jay Cornwall <jay.cornwall@amd.com> Cc: Kent Russell <kent.russell@amd.com> Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent b3862d6 commit 3666ed8

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,6 @@ enum amdgpu_kiq_irq {
353353
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
354354
AMDGPU_CP_KIQ_IRQ_LAST
355355
};
356-
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
357356
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
358357
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
359358
#define MAX_KIQ_REG_TRY 1000

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -699,12 +699,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
699699
uint32_t flush_type, bool all_hub,
700700
uint32_t inst)
701701
{
702-
u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
703-
adev->usec_timeout;
704702
struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
705703
struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
706704
unsigned int ndw;
707-
int r;
705+
int r, cnt = 0;
708706
uint32_t seq;
709707

710708
/*
@@ -761,10 +759,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
761759

762760
amdgpu_ring_commit(ring);
763761
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
764-
if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
762+
763+
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
764+
765+
might_sleep();
766+
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
767+
!amdgpu_reset_pending(adev->reset_domain)) {
768+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
769+
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
770+
}
771+
772+
if (cnt > MAX_KIQ_REG_TRY) {
765773
dev_err(adev->dev, "timeout waiting for kiq fence\n");
766774
r = -ETIME;
767-
}
775+
} else
776+
r = 0;
768777
}
769778

770779
error_unlock_reset:

0 commit comments

Comments
 (0)