Skip to content

Commit 400a39f

Browse files
jameszhu-amdalexdeucher
authored andcommitted
drm/amdgpu: skip xcp drm device allocation when out of drm resource
Return 0 when drm device alloc failed with -ENOSPC in order to allow amdgpu drive loading. But the xcp without drm device node assigned won't be visiable in user space. This helps amdgpu driver loading on system which has more than 64 nodes, the current limitation. The proposal to add more drm nodes is discussed in public, which will support up to 2^20 nodes totally. kernel drm: https://lore.kernel.org/lkml/20230724211428.3831636-1-michal.winiarski@intel.com/T/ libdrm: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/305 Signed-off-by: James Zhu <James.Zhu@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 1d02ae4 commit 400a39f

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
239239

240240
for (i = 1; i < MAX_XCP; i++) {
241241
ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
242-
if (ret)
242+
if (ret == -ENOSPC) {
243+
dev_warn(adev->dev,
244+
"Skip xcp node #%d when out of drm node resource.", i);
245+
return 0;
246+
} else if (ret) {
243247
return ret;
248+
}
244249

245250
/* Redirect all IOCTLs to the primary device */
246251
adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
@@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
328333
return 0;
329334

330335
for (i = 1; i < MAX_XCP; i++) {
336+
if (!adev->xcp_mgr->xcp[i].ddev)
337+
break;
338+
331339
ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
332340
if (ret)
333341
return ret;
@@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
345353
return;
346354

347355
for (i = 1; i < MAX_XCP; i++) {
356+
if (!adev->xcp_mgr->xcp[i].ddev)
357+
break;
358+
348359
p_ddev = adev->xcp_mgr->xcp[i].ddev;
349360
drm_dev_unplug(p_ddev);
350361
p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;

drivers/gpu/drm/amd/amdkfd/kfd_topology.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1915,7 +1915,14 @@ int kfd_topology_add_device(struct kfd_node *gpu)
19151915
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
19161916

19171917
gpu_id = kfd_generate_gpu_id(gpu);
1918-
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1918+
if (gpu->xcp && !gpu->xcp->ddev) {
1919+
dev_warn(gpu->adev->dev,
1920+
"Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
1921+
gpu_id);
1922+
return 0;
1923+
} else {
1924+
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1925+
}
19191926

19201927
/* Check to see if this gpu device exists in the topology_device_list.
19211928
* If so, assign the gpu to that device,

0 commit comments

Comments
 (0)