Skip to content

Commit 142262a

Browse files
fdavid-amdalexdeucher
authored andcommitted
drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems
On gfx943 APU, EXT_COHERENT should give MTYPE_CC for local and MTYPE_UC for nonlocal memory. On NUMA systems, local memory gets the local mtype, set by an override callback. If EXT_COHERENT is set, memory will be set as MTYPE_UC by default, with local memory MTYPE_CC. Add an option in the override function for this case, and add a check to ensure it is not used on UNCACHED memory. V2: Combined APU and NUMA code into one patch V3: Fixed a potential nullptr in amdgpu_vm_bo_update Signed-off-by: David Francis <David.Francis@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent a395f7f commit 142262a

File tree

5 files changed

+45
-23
lines changed

5 files changed

+45
-23
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
844844
* @immediate: immediate submission in a page fault
845845
* @unlocked: unlocked invalidation during MM callback
846846
* @flush_tlb: trigger tlb invalidation after update completed
847+
* @allow_override: change MTYPE for local NUMA nodes
847848
* @resv: fences we need to sync to
848849
* @start: start of mapped range
849850
* @last: last mapped entry
@@ -860,7 +861,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
860861
* 0 for success, negative erro code for failure.
861862
*/
862863
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
863-
bool immediate, bool unlocked, bool flush_tlb,
864+
bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
864865
struct dma_resv *resv, uint64_t start, uint64_t last,
865866
uint64_t flags, uint64_t offset, uint64_t vram_base,
866867
struct ttm_resource *res, dma_addr_t *pages_addr,
@@ -898,6 +899,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
898899
params.immediate = immediate;
899900
params.pages_addr = pages_addr;
900901
params.unlocked = unlocked;
902+
params.allow_override = allow_override;
901903

902904
/* Implicitly sync to command submissions in the same VM before
903905
* unmapping. Sync to moving fences before mapping.
@@ -1073,6 +1075,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
10731075
struct ttm_resource *mem;
10741076
struct dma_fence **last_update;
10751077
bool flush_tlb = clear;
1078+
bool uncached;
10761079
struct dma_resv *resv;
10771080
uint64_t vram_base;
10781081
uint64_t flags;
@@ -1110,9 +1113,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
11101113

11111114
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
11121115
vram_base = bo_adev->vm_manager.vram_base_offset;
1116+
uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
11131117
} else {
11141118
flags = 0x0;
11151119
vram_base = 0;
1120+
uncached = false;
11161121
}
11171122

11181123
if (clear || (bo && bo->tbo.base.resv ==
@@ -1146,7 +1151,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
11461151
trace_amdgpu_vm_bo_update(mapping);
11471152

11481153
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
1149-
resv, mapping->start, mapping->last,
1154+
!uncached, resv, mapping->start, mapping->last,
11501155
update_flags, mapping->offset,
11511156
vram_base, mem, pages_addr,
11521157
last_update);
@@ -1341,8 +1346,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
13411346
mapping->start < AMDGPU_GMC_HOLE_START)
13421347
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
13431348

1344-
r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
1345-
mapping->start, mapping->last,
1349+
r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
1350+
resv, mapping->start, mapping->last,
13461351
init_pte_value, 0, 0, NULL, NULL,
13471352
&f);
13481353
amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -2618,8 +2623,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
26182623
goto error_unlock;
26192624
}
26202625

2621-
r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
2622-
addr, flags, value, 0, NULL, NULL, NULL);
2626+
r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
2627+
NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
26232628
if (r)
26242629
goto error_unlock;
26252630

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,12 @@ struct amdgpu_vm_update_params {
246246
* @table_freed: return true if page table is freed when updating
247247
*/
248248
bool table_freed;
249+
250+
/**
251+
* @allow_override: true for memory that is not uncached: allows MTYPE
252+
* to be overridden for NUMA local memory.
253+
*/
254+
bool allow_override;
249255
};
250256

251257
struct amdgpu_vm_update_funcs {
@@ -441,7 +447,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
441447
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
442448
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
443449
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
444-
bool immediate, bool unlocked, bool flush_tlb,
450+
bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
445451
struct dma_resv *resv, uint64_t start, uint64_t last,
446452
uint64_t flags, uint64_t offset, uint64_t vram_base,
447453
struct ttm_resource *res, dma_addr_t *pages_addr,

drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
843843
*/
844844
if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
845845
adev->gmc.gmc_funcs->override_vm_pte_flags &&
846-
num_possible_nodes() > 1 && !params->pages_addr)
846+
num_possible_nodes() > 1 && !params->pages_addr && params->allow_override)
847847
amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
848848

849849
params->vm->update_funcs->update(params, pt, pe, addr, count, incr,

drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,12 +1251,15 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
12511251
return;
12521252
}
12531253

1254-
/* Only override mappings with MTYPE_NC, which is the safe default for
1255-
* cacheable memory.
1254+
/* MTYPE_NC is the same default and can be overridden.
1255+
* MTYPE_UC will be present if the memory is extended-coherent
1256+
* and can also be overridden.
12561257
*/
12571258
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
1258-
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
1259-
dev_dbg_ratelimited(adev->dev, "MTYPE is not NC\n");
1259+
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC) &&
1260+
(*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
1261+
AMDGPU_PTE_MTYPE_VG10(MTYPE_UC)) {
1262+
dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n");
12601263
return;
12611264
}
12621265

@@ -1283,15 +1286,23 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
12831286
vm->mem_id, local_node, nid);
12841287
if (nid == local_node) {
12851288
uint64_t old_flags = *flags;
1286-
unsigned int mtype_local = MTYPE_RW;
1289+
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
1290+
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
1291+
unsigned int mtype_local = MTYPE_RW;
12871292

1288-
if (amdgpu_mtype_local == 1)
1289-
mtype_local = MTYPE_NC;
1290-
else if (amdgpu_mtype_local == 2)
1291-
mtype_local = MTYPE_CC;
1293+
if (amdgpu_mtype_local == 1)
1294+
mtype_local = MTYPE_NC;
1295+
else if (amdgpu_mtype_local == 2)
1296+
mtype_local = MTYPE_CC;
1297+
1298+
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
1299+
AMDGPU_PTE_MTYPE_VG10(mtype_local);
1300+
} else {
1301+
/* MTYPE_UC case */
1302+
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
1303+
AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1304+
}
12921305

1293-
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
1294-
AMDGPU_PTE_MTYPE_VG10(mtype_local);
12951306
dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n",
12961307
old_flags, *flags);
12971308
}

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,7 +1282,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
12821282
if (num_possible_nodes() <= 1)
12831283
mapping_flags |= mtype_local;
12841284
else
1285-
mapping_flags |= AMDGPU_VM_MTYPE_NC;
1285+
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
12861286
/* system memory accessed by the dGPU */
12871287
} else {
12881288
mapping_flags |= AMDGPU_VM_MTYPE_UC;
@@ -1317,7 +1317,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
13171317

13181318
pr_debug("[0x%llx 0x%llx]\n", start, last);
13191319

1320-
return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
1320+
return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, start,
13211321
last, init_pte_value, 0, 0, NULL, NULL,
13221322
fence);
13231323
}
@@ -1424,8 +1424,8 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
14241424
* different memory partition based on fpfn/lpfn, we should use
14251425
* same vm_manager.vram_base_offset regardless memory partition.
14261426
*/
1427-
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
1428-
last_start, prange->start + i,
1427+
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, true,
1428+
NULL, last_start, prange->start + i,
14291429
pte_flags,
14301430
(last_start - prange->start) << PAGE_SHIFT,
14311431
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,

0 commit comments

Comments
 (0)