Skip to content

Commit 425b848

Browse files
committed
Merge tag 'amd-drm-next-6.15-2025-02-21' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.15-2025-02-21: amdgpu: - Add OEM i2c support for RGB lights, etc. - Add support for GC 11.5.3 - Add support for GC 11.5.2 - Add support for SDMA 6.1.3 - Add support for NBIO 7.11.2 - Add support for NBIO 7.9.1 - Add support for MMHUB 3.3.2 - Add support for MMHUB 1.8.1 - Add support for SMU 14.0.5 - Add support for SMUIO 13.0.11 - Add support for PSP 14.0.5 - Add support for UMC 12.5.0 - Add support for DCN 3.6.0 - JPEG 4.0.3 updates - Add dynamic workload profile switching for GC 10-12 - support larger vbios sizes - GC 9.5.0 updates - SMU 13.0.12 updates - SMU 13.0.6 updates - IP discovery updates - GC 10 queue reset updates - DCN 4.0.1 updates - UHBR link rate fixes - Aborted suspend fix - Mark gttsize parameter as deprecated - GC 10 cleaner shader updates - PSR-SU fixes - Clean up PM4 headers - Cursor fixes - Enable devcoredump for JPEG - Misc cleanups - Runpm cleanups - MES updates - GC 9 gfxoff fixes - Vbios fetching cleanups - Documentation updates - Update secondary plane handling - DML2 updates - SDMA fixes for MI - Cleaner shader fixes for GC 11/12 - ACA updates - Initial JPEG queue reset support - RAS updates - Initial RAS CPER support - DCN/DCE panic screen handling cleanup - BT2020 fixes - SR-IOV fixes amdkfd: - synchronize pasid values between KGD and KFD - Misc cleanups - Improve GTT/VRAM handling for APUs - Topology updates - Fix user queue validation on GC 7/8 UAPI: - Enable "Broadcast RGB" drm property - Add INFO IOCTL query for virtualization mode Proposed userspace: ROCm/amdsmi@e663bed From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250221213651.4176031-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
2 parents fb51bf0 + 3521276 commit 425b848

File tree

361 files changed

+87610
-16758
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

361 files changed

+87610
-16758
lines changed

drivers/gpu/drm/amd/amdgpu/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
6565
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
6666
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
6767
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
68-
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o
68+
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
69+
amdgpu_cper.o
6970

7071
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
7172

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
#include "amdgpu_mca.h"
110110
#include "amdgpu_aca.h"
111111
#include "amdgpu_ras.h"
112+
#include "amdgpu_cper.h"
112113
#include "amdgpu_xcp.h"
113114
#include "amdgpu_seq64.h"
114115
#include "amdgpu_reg_state.h"
@@ -415,6 +416,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev);
415416
bool amdgpu_read_bios(struct amdgpu_device *adev);
416417
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
417418
u8 *bios, u32 length_bytes);
419+
void amdgpu_bios_release(struct amdgpu_device *adev);
418420
/*
419421
* Clocks
420422
*/
@@ -1090,6 +1092,9 @@ struct amdgpu_device {
10901092
/* ACA */
10911093
struct amdgpu_aca aca;
10921094

1095+
/* CPER */
1096+
struct amdgpu_cper cper;
1097+
10931098
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
10941099
uint32_t harvest_ip_mask;
10951100
int num_ip_blocks;
@@ -1149,6 +1154,7 @@ struct amdgpu_device {
11491154
struct ratelimit_state throttling_logging_rs;
11501155
uint32_t ras_hw_enabled;
11511156
uint32_t ras_enabled;
1157+
bool ras_default_ecc_enabled;
11521158

11531159
bool no_hw_access;
11541160
struct pci_saved_state *pci_state;
@@ -1192,6 +1198,11 @@ struct amdgpu_device {
11921198
struct mutex enforce_isolation_mutex;
11931199

11941200
struct amdgpu_init_level *init_lvl;
1201+
1202+
/* This flag is used to determine how VRAM allocations are handled for APUs
1203+
* in KFD: VRAM or GTT.
1204+
*/
1205+
bool apu_prefer_gtt;
11951206
};
11961207

11971208
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,6 @@
3030

3131
typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
3232

33-
struct aca_banks {
34-
int nr_banks;
35-
struct list_head list;
36-
};
37-
38-
struct aca_hwip {
39-
int hwid;
40-
int mcatype;
41-
};
42-
4333
static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
4434
ACA_BANK_HWID(SMU, 0x01, 0x01),
4535
ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
@@ -111,7 +101,7 @@ static struct aca_regs_dump {
111101
{"STATUS", ACA_REG_IDX_STATUS},
112102
{"ADDR", ACA_REG_IDX_ADDR},
113103
{"MISC", ACA_REG_IDX_MISC0},
114-
{"CONFIG", ACA_REG_IDX_CONFG},
104+
{"CONFIG", ACA_REG_IDX_CONFIG},
115105
{"IPID", ACA_REG_IDX_IPID},
116106
{"SYND", ACA_REG_IDX_SYND},
117107
{"DESTAT", ACA_REG_IDX_DESTAT},
@@ -168,7 +158,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
168158
if (ret)
169159
return ret;
170160

171-
bank.type = type;
161+
bank.smu_err_type = type;
172162

173163
aca_smu_bank_dump(adev, i, count, &bank, qctx);
174164

@@ -394,6 +384,36 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type
394384
return ret;
395385
}
396386

387+
static void aca_banks_generate_cper(struct amdgpu_device *adev,
388+
enum aca_smu_type type,
389+
struct aca_banks *banks,
390+
int count)
391+
{
392+
struct aca_bank_node *node;
393+
struct aca_bank *bank;
394+
395+
if (!banks || !count) {
396+
dev_warn(adev->dev, "fail to generate cper records\n");
397+
return;
398+
}
399+
400+
/* UEs must be encoded into separate CPER entries */
401+
if (type == ACA_SMU_TYPE_UE) {
402+
list_for_each_entry(node, &banks->list, node) {
403+
bank = &node->bank;
404+
if (amdgpu_cper_generate_ue_record(adev, bank))
405+
dev_warn(adev->dev, "fail to generate ue cper records\n");
406+
}
407+
} else {
408+
/*
409+
* SMU_TYPE_CE banks are combined into 1 CPER entries,
410+
* they could be CEs or DEs or both
411+
*/
412+
if (amdgpu_cper_generate_ce_records(adev, banks, count))
413+
dev_warn(adev->dev, "fail to generate ce cper records\n");
414+
}
415+
}
416+
397417
static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
398418
bank_handler_t handler, struct ras_query_context *qctx, void *data)
399419
{
@@ -431,6 +451,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
431451
if (ret)
432452
goto err_release_banks;
433453

454+
aca_banks_generate_cper(adev, type, &banks, count);
455+
434456
err_release_banks:
435457
aca_banks_release(&banks);
436458

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ enum aca_reg_idx {
8181
ACA_REG_IDX_STATUS = 1,
8282
ACA_REG_IDX_ADDR = 2,
8383
ACA_REG_IDX_MISC0 = 3,
84-
ACA_REG_IDX_CONFG = 4,
84+
ACA_REG_IDX_CONFIG = 4,
8585
ACA_REG_IDX_IPID = 5,
8686
ACA_REG_IDX_SYND = 6,
8787
ACA_REG_IDX_DESTAT = 8,
@@ -108,13 +108,20 @@ enum aca_error_type {
108108
};
109109

110110
enum aca_smu_type {
111+
ACA_SMU_TYPE_INVALID = -1,
111112
ACA_SMU_TYPE_UE = 0,
112113
ACA_SMU_TYPE_CE,
113114
ACA_SMU_TYPE_COUNT,
114115
};
115116

117+
struct aca_hwip {
118+
int hwid;
119+
int mcatype;
120+
};
121+
116122
struct aca_bank {
117-
enum aca_smu_type type;
123+
enum aca_error_type aca_err_type;
124+
enum aca_smu_type smu_err_type;
118125
u64 regs[ACA_MAX_REGS_COUNT];
119126
};
120127

@@ -123,6 +130,11 @@ struct aca_bank_node {
123130
struct list_head node;
124131
};
125132

133+
struct aca_banks {
134+
int nr_banks;
135+
struct list_head list;
136+
};
137+
126138
struct aca_bank_info {
127139
int die_id;
128140
int socket_id;

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
459459
else
460460
mem_info->local_mem_size_private =
461461
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
462-
} else if (adev->flags & AMD_IS_APU) {
462+
} else if (adev->apu_prefer_gtt) {
463463
mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
464464
mem_info->local_mem_size_private = 0;
465465
} else {
@@ -818,7 +818,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
818818
}
819819
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
820820
return ALIGN_DOWN(tmp, PAGE_SIZE);
821-
} else if (adev->flags & AMD_IS_APU) {
821+
} else if (adev->apu_prefer_gtt) {
822822
return (ttm_tt_pages_limit() << PAGE_SHIFT);
823823
} else {
824824
return adev->gmc.real_vram_size;

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE {
4747
};
4848

4949
struct amdgpu_device;
50+
struct kfd_process_device;
5051
struct amdgpu_reset_context;
5152

5253
enum kfd_mem_attachment_type {
@@ -299,14 +300,10 @@ bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
299300
(&((struct amdgpu_fpriv *) \
300301
((struct drm_file *)(drm_priv))->driver_priv)->vm)
301302

302-
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
303-
struct amdgpu_vm *avm, u32 pasid);
304303
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
305304
struct amdgpu_vm *avm,
306305
void **process_info,
307306
struct dma_fence **ef);
308-
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
309-
void *drm_priv);
310307
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
311308
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
312309
uint8_t xcp_id);

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 8 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
197197
return -EINVAL;
198198

199199
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
200-
if (adev->flags & AMD_IS_APU) {
200+
if (adev->apu_prefer_gtt) {
201201
system_mem_needed = size;
202202
ttm_mem_needed = size;
203203
}
@@ -234,7 +234,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
234234
if (adev && xcp_id >= 0) {
235235
adev->kfd.vram_used[xcp_id] += vram_needed;
236236
adev->kfd.vram_used_aligned[xcp_id] +=
237-
(adev->flags & AMD_IS_APU) ?
237+
adev->apu_prefer_gtt ?
238238
vram_needed :
239239
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
240240
}
@@ -262,7 +262,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
262262

263263
if (adev) {
264264
adev->kfd.vram_used[xcp_id] -= size;
265-
if (adev->flags & AMD_IS_APU) {
265+
if (adev->apu_prefer_gtt) {
266266
adev->kfd.vram_used_aligned[xcp_id] -= size;
267267
kfd_mem_limit.system_mem_used -= size;
268268
kfd_mem_limit.ttm_mem_used -= size;
@@ -890,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
890890
* if peer device has large BAR. In contrast, access over xGMI is
891891
* allowed for both small and large BAR configurations of peer device
892892
*/
893-
if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) &&
893+
if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
894894
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
895895
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
896896
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -1529,27 +1529,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
15291529
amdgpu_bo_unreserve(bo);
15301530
}
15311531

1532-
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
1533-
struct amdgpu_vm *avm, u32 pasid)
1534-
1535-
{
1536-
int ret;
1537-
1538-
/* Free the original amdgpu allocated pasid,
1539-
* will be replaced with kfd allocated pasid.
1540-
*/
1541-
if (avm->pasid) {
1542-
amdgpu_pasid_free(avm->pasid);
1543-
amdgpu_vm_set_pasid(adev, avm, 0);
1544-
}
1545-
1546-
ret = amdgpu_vm_set_pasid(adev, avm, pasid);
1547-
if (ret)
1548-
return ret;
1549-
1550-
return 0;
1551-
}
1552-
15531532
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
15541533
struct amdgpu_vm *avm,
15551534
void **process_info,
@@ -1607,27 +1586,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
16071586
}
16081587
}
16091588

1610-
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
1611-
void *drm_priv)
1612-
{
1613-
struct amdgpu_vm *avm;
1614-
1615-
if (WARN_ON(!adev || !drm_priv))
1616-
return;
1617-
1618-
avm = drm_priv_to_vm(drm_priv);
1619-
1620-
pr_debug("Releasing process vm %p\n", avm);
1621-
1622-
/* The original pasid of amdgpu vm has already been
1623-
* released during making a amdgpu vm to a compute vm
1624-
* The current pasid is managed by kfd and will be
1625-
* released on kfd process destroy. Set amdgpu pasid
1626-
* to 0 to avoid duplicate release.
1627-
*/
1628-
amdgpu_vm_release_compute(adev, avm);
1629-
}
1630-
16311589
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
16321590
{
16331591
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1688,7 +1646,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
16881646
- reserved_for_pt
16891647
- reserved_for_ras;
16901648

1691-
if (adev->flags & AMD_IS_APU) {
1649+
if (adev->apu_prefer_gtt) {
16921650
system_mem_available = no_system_mem_limit ?
16931651
kfd_mem_limit.max_system_mem_limit :
16941652
kfd_mem_limit.max_system_mem_limit -
@@ -1736,7 +1694,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
17361694
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
17371695
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
17381696

1739-
if (adev->flags & AMD_IS_APU) {
1697+
if (adev->apu_prefer_gtt) {
17401698
domain = AMDGPU_GEM_DOMAIN_GTT;
17411699
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
17421700
alloc_flags = 0;
@@ -1987,7 +1945,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
19871945
if (size) {
19881946
if (!is_imported &&
19891947
(mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
1990-
((adev->flags & AMD_IS_APU) &&
1948+
(adev->apu_prefer_gtt &&
19911949
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
19921950
*size = bo_size;
19931951
else
@@ -2414,7 +2372,7 @@ static int import_obj_create(struct amdgpu_device *adev,
24142372
(*mem)->bo = bo;
24152373
(*mem)->va = va;
24162374
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
2417-
!(adev->flags & AMD_IS_APU) ?
2375+
!adev->apu_prefer_gtt ?
24182376
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
24192377

24202378
(*mem)->mapped_to_gpu_memory = 0;

0 commit comments

Comments
 (0)