Skip to content

Commit 4a56c0e

Browse files
committed
Merge tag 'amd-drm-next-6.10-2024-04-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.10-2024-04-26: amdgpu: - Misc code cleanups and refactors - Support setting reset method at runtime - Report OD status - SMU 14.0.1 fixes - SDMA 4.4.2 fixes - VPE fixes - MES fixes - Update BO eviction priorities - UMSCH fixes - Reset fixes - Freesync fixes - GFXIP 9.4.3 fixes - SDMA 5.2 fixes - MES UAF fix - RAS updates - Devcoredump updates for dumping IP state - DSC fixes - JPEG fix - Fix VRAM memory accounting - VCN 5.0 fixes - MES fixes - UMC 12.0 updates - Modify contiguous flags handling - Initial support for mapping kernel queues via MES amdkfd: - Fix rescheduling of restore worker - VRAM accounting for SVM migrations - mGPU fix - Enable SQ watchpoint for gfx10 Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240426221245.1613332-1-alexander.deucher@amd.com
2 parents 68b89e2 + b77bef3 commit 4a56c0e

File tree

185 files changed

+1650
-286
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

185 files changed

+1650
-286
lines changed

drivers/gpu/drm/amd/amdgpu/aldebaran.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
9797
adev->ip_blocks[i].status.hw = false;
9898
}
9999

100-
return r;
100+
return 0;
101101
}
102102

103103
static int

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,14 @@ enum amdgpu_ss {
139139
AMDGPU_SS_DRV_UNLOAD
140140
};
141141

142+
struct amdgpu_hwip_reg_entry {
143+
u32 hwip;
144+
u32 inst;
145+
u32 seg;
146+
u32 reg_offset;
147+
const char *reg_name;
148+
};
149+
142150
struct amdgpu_watchdog_timer {
143151
bool timeout_fatal_disable;
144152
uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
@@ -494,6 +502,7 @@ struct amdgpu_wb {
494502
uint64_t gpu_addr;
495503
u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
496504
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
505+
spinlock_t lock;
497506
};
498507

499508
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);

drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,8 @@ static const struct amd_ip_funcs acp_ip_funcs = {
637637
.soft_reset = acp_soft_reset,
638638
.set_clockgating_state = acp_set_clockgating_state,
639639
.set_powergating_state = acp_set_powergating_state,
640+
.dump_ip_state = NULL,
641+
.print_ip_state = NULL,
640642
};
641643

642644
const struct amdgpu_ip_block_version acp_ip_block = {

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,10 +747,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
747747
return amdgpu_ras_get_fed_status(adev);
748748
}
749749

750+
void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
751+
enum amdgpu_ras_block block, uint16_t pasid,
752+
pasid_notify pasid_fn, void *data, uint32_t reset)
753+
{
754+
amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
755+
}
756+
750757
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
751758
enum amdgpu_ras_block block, uint32_t reset)
752759
{
753-
amdgpu_umc_poison_handler(adev, block, reset);
760+
amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
754761
}
755762

756763
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
337337
struct tile_config *config);
338338
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
339339
enum amdgpu_ras_block block, uint32_t reset);
340+
341+
void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
342+
enum amdgpu_ras_block block, uint16_t pasid,
343+
pasid_notify pasid_fn, void *data, uint32_t reset);
344+
340345
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
341346
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
342347
void amdgpu_amdkfd_block_mmu_notifications(void *p);

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,7 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
881881
}
882882

883883
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
884+
#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
884885
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
885886
uint64_t watch_address,
886887
uint32_t watch_address_mask,
@@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
889890
uint32_t debug_vmid,
890891
uint32_t inst)
891892
{
893+
/* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
894+
* same values.
895+
*/
892896
uint32_t watch_address_high;
893897
uint32_t watch_address_low;
894-
uint32_t watch_address_cntl;
895-
896-
watch_address_cntl = 0;
898+
uint32_t tcp_watch_address_cntl;
899+
uint32_t sq_watch_address_cntl;
897900

898901
watch_address_low = lower_32_bits(watch_address);
899902
watch_address_high = upper_32_bits(watch_address) & 0xffff;
900903

901-
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
904+
tcp_watch_address_cntl = 0;
905+
tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
902906
TCP_WATCH0_CNTL,
903907
VMID,
904908
debug_vmid);
905-
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
909+
tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
906910
TCP_WATCH0_CNTL,
907911
MODE,
908912
watch_mode);
909-
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
913+
tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
910914
TCP_WATCH0_CNTL,
911915
MASK,
912916
watch_address_mask >> 7);
913917

918+
sq_watch_address_cntl = 0;
919+
sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
920+
SQ_WATCH0_CNTL,
921+
VMID,
922+
debug_vmid);
923+
sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
924+
SQ_WATCH0_CNTL,
925+
MODE,
926+
watch_mode);
927+
sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
928+
SQ_WATCH0_CNTL,
929+
MASK,
930+
watch_address_mask >> 6);
931+
914932
/* Turning off this watch point until we set all the registers */
915-
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
933+
tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
916934
TCP_WATCH0_CNTL,
917935
VALID,
918936
0);
919-
920937
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
921938
(watch_id * TCP_WATCH_STRIDE)),
922-
watch_address_cntl);
939+
tcp_watch_address_cntl);
940+
941+
sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
942+
SQ_WATCH0_CNTL,
943+
VALID,
944+
0);
945+
WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
946+
(watch_id * SQ_WATCH_STRIDE)),
947+
sq_watch_address_cntl);
923948

949+
/* Program {TCP,SQ}_WATCH?_ADDR* */
924950
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
925951
(watch_id * TCP_WATCH_STRIDE)),
926952
watch_address_high);
927-
928953
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
929954
(watch_id * TCP_WATCH_STRIDE)),
930955
watch_address_low);
931956

957+
WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
958+
(watch_id * SQ_WATCH_STRIDE)),
959+
watch_address_high);
960+
WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
961+
(watch_id * SQ_WATCH_STRIDE)),
962+
watch_address_low);
963+
932964
/* Enable the watch point */
933-
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
965+
tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
934966
TCP_WATCH0_CNTL,
935967
VALID,
936968
1);
937-
938969
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
939970
(watch_id * TCP_WATCH_STRIDE)),
940-
watch_address_cntl);
971+
tcp_watch_address_cntl);
972+
973+
sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
974+
SQ_WATCH0_CNTL,
975+
VALID,
976+
1);
977+
WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
978+
(watch_id * SQ_WATCH_STRIDE)),
979+
sq_watch_address_cntl);
941980

942981
return 0;
943982
}
@@ -953,8 +992,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
953992
(watch_id * TCP_WATCH_STRIDE)),
954993
watch_address_cntl);
955994

995+
WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
996+
(watch_id * SQ_WATCH_STRIDE)),
997+
watch_address_cntl);
998+
956999
return 0;
9571000
}
1001+
#undef TCP_WATCH_STRIDE
1002+
#undef SQ_WATCH_STRIDE
9581003

9591004

9601005
/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
220220
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
221221
kfd_mem_limit.max_ttm_mem_limit) ||
222222
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
223-
vram_size - reserved_for_pt)) {
223+
vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) {
224224
ret = -ENOMEM;
225225
goto release;
226226
}

drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
3939
for (i = 0; i < n; i++) {
4040
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
4141
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
42-
false, false, false);
42+
false, false, 0);
4343
if (r)
4444
goto exit_do_move;
4545
r = dma_fence_wait(fence, false);

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2065,12 +2065,13 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
20652065
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
20662066
char reg_offset[11];
20672067
uint32_t *new = NULL, *tmp = NULL;
2068-
int ret, i = 0, len = 0;
2068+
unsigned int len = 0;
2069+
int ret, i = 0;
20692070

20702071
do {
20712072
memset(reg_offset, 0, 11);
20722073
if (copy_from_user(reg_offset, buf + len,
2073-
min(10, ((int)size-len)))) {
2074+
min(10, (size-len)))) {
20742075
ret = -EFAULT;
20752076
goto error_free;
20762077
}

drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,20 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
262262
drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr);
263263
drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status);
264264

265+
/* dump the ip state for each ip */
266+
drm_printf(&p, "IP Dump\n");
267+
for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
268+
if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) {
269+
drm_printf(&p, "IP: %s\n",
270+
coredump->adev->ip_blocks[i]
271+
.version->funcs->name);
272+
coredump->adev->ip_blocks[i]
273+
.version->funcs->print_ip_state(
274+
(void *)coredump->adev, &p);
275+
drm_printf(&p, "\n");
276+
}
277+
}
278+
265279
/* Add ring buffer information */
266280
drm_printf(&p, "Ring buffer information\n");
267281
for (int i = 0; i < coredump->adev->num_rings; i++) {

0 commit comments

Comments
 (0)