Skip to content

Commit 289e685

Browse files
hkasivisalexdeucher
authored andcommitted
drm/amdkfd: Set per-process flags only once cik/vi
Set per-process static sh_mem config only once during process initialization. Move all static changes from update_qpd() which is called each time a queue is created to set_cache_memory_policy() which is called once during process initialization. set_cache_memory_policy() is currently defined only for cik and vi family. So this commit only focuses on these two. A separate commit will address other asics. Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Reviewed-by: Amber Lin <Amber.Lin@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 68bfdc8 commit 289e685

File tree

3 files changed

+94
-85
lines changed

3 files changed

+94
-85
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,14 +2591,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
25912591
return retval;
25922592
}
25932593

2594-
/*
2595-
* Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
2596-
* stay in user mode.
2597-
*/
2598-
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
2599-
/* APE1 limit is inclusive and 64K aligned. */
2600-
#define APE1_LIMIT_ALIGNMENT 0xFFFF
2601-
26022594
static bool set_cache_memory_policy(struct device_queue_manager *dqm,
26032595
struct qcm_process_device *qpd,
26042596
enum cache_policy default_policy,
@@ -2613,34 +2605,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
26132605

26142606
dqm_lock(dqm);
26152607

2616-
if (alternate_aperture_size == 0) {
2617-
/* base > limit disables APE1 */
2618-
qpd->sh_mem_ape1_base = 1;
2619-
qpd->sh_mem_ape1_limit = 0;
2620-
} else {
2621-
/*
2622-
* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
2623-
* SH_MEM_APE1_BASE[31:0], 0x0000 }
2624-
* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
2625-
* SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
2626-
* Verify that the base and size parameters can be
2627-
* represented in this format and convert them.
2628-
* Additionally restrict APE1 to user-mode addresses.
2629-
*/
2630-
2631-
uint64_t base = (uintptr_t)alternate_aperture_base;
2632-
uint64_t limit = base + alternate_aperture_size - 1;
2633-
2634-
if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
2635-
(limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
2636-
retval = false;
2637-
goto out;
2638-
}
2639-
2640-
qpd->sh_mem_ape1_base = base >> 16;
2641-
qpd->sh_mem_ape1_limit = limit >> 16;
2642-
}
2643-
26442608
retval = dqm->asic_ops.set_cache_memory_policy(
26452609
dqm,
26462610
qpd,
@@ -2649,6 +2613,9 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
26492613
alternate_aperture_base,
26502614
alternate_aperture_size);
26512615

2616+
if (retval)
2617+
goto out;
2618+
26522619
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
26532620
program_sh_mem_settings(dqm, qpd);
26542621

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@
2727
#include "oss/oss_2_4_sh_mask.h"
2828
#include "gca/gfx_7_2_sh_mask.h"
2929

30+
/*
31+
* Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
32+
* stay in user mode.
33+
*/
34+
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
35+
/* APE1 limit is inclusive and 64K aligned. */
36+
#define APE1_LIMIT_ALIGNMENT 0xFFFF
37+
3038
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
3139
struct qcm_process_device *qpd,
3240
enum cache_policy default_policy,
@@ -84,6 +92,36 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
8492
{
8593
uint32_t default_mtype;
8694
uint32_t ape1_mtype;
95+
unsigned int temp;
96+
bool retval = true;
97+
98+
if (alternate_aperture_size == 0) {
99+
/* base > limit disables APE1 */
100+
qpd->sh_mem_ape1_base = 1;
101+
qpd->sh_mem_ape1_limit = 0;
102+
} else {
103+
/*
104+
* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
105+
* SH_MEM_APE1_BASE[31:0], 0x0000 }
106+
* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
107+
* SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
108+
* Verify that the base and size parameters can be
109+
* represented in this format and convert them.
110+
* Additionally restrict APE1 to user-mode addresses.
111+
*/
112+
113+
uint64_t base = (uintptr_t)alternate_aperture_base;
114+
uint64_t limit = base + alternate_aperture_size - 1;
115+
116+
if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
117+
(limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
118+
retval = false;
119+
goto out;
120+
}
121+
122+
qpd->sh_mem_ape1_base = base >> 16;
123+
qpd->sh_mem_ape1_limit = limit >> 16;
124+
}
87125

88126
default_mtype = (default_policy == cache_policy_coherent) ?
89127
MTYPE_NONCACHED :
@@ -97,37 +135,22 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
97135
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
98136
| DEFAULT_MTYPE(default_mtype)
99137
| APE1_MTYPE(ape1_mtype);
100-
101-
return true;
102-
}
103-
104-
static int update_qpd_cik(struct device_queue_manager *dqm,
105-
struct qcm_process_device *qpd)
106-
{
107-
struct kfd_process_device *pdd;
108-
unsigned int temp;
109-
110-
pdd = qpd_to_pdd(qpd);
111-
112-
/* check if sh_mem_config register already configured */
113-
if (qpd->sh_mem_config == 0) {
114-
qpd->sh_mem_config =
115-
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
116-
DEFAULT_MTYPE(MTYPE_NONCACHED) |
117-
APE1_MTYPE(MTYPE_NONCACHED);
118-
qpd->sh_mem_ape1_limit = 0;
119-
qpd->sh_mem_ape1_base = 0;
120-
}
121-
122138
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
123139
* aperture addresses.
124140
*/
125-
temp = get_sh_mem_bases_nybble_64(pdd);
141+
temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
126142
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
127143

128144
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
129145
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
130146

147+
out:
148+
return retval;
149+
}
150+
151+
static int update_qpd_cik(struct device_queue_manager *dqm,
152+
struct qcm_process_device *qpd)
153+
{
131154
return 0;
132155
}
133156

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@
2727
#include "gca/gfx_8_0_sh_mask.h"
2828
#include "oss/oss_3_0_sh_mask.h"
2929

30+
/*
31+
* Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
32+
* stay in user mode.
33+
*/
34+
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
35+
/* APE1 limit is inclusive and 64K aligned. */
36+
#define APE1_LIMIT_ALIGNMENT 0xFFFF
37+
3038
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
3139
struct qcm_process_device *qpd,
3240
enum cache_policy default_policy,
@@ -85,6 +93,36 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
8593
{
8694
uint32_t default_mtype;
8795
uint32_t ape1_mtype;
96+
unsigned int temp;
97+
bool retval = true;
98+
99+
if (alternate_aperture_size == 0) {
100+
/* base > limit disables APE1 */
101+
qpd->sh_mem_ape1_base = 1;
102+
qpd->sh_mem_ape1_limit = 0;
103+
} else {
104+
/*
105+
* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
106+
* SH_MEM_APE1_BASE[31:0], 0x0000 }
107+
* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
108+
* SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
109+
* Verify that the base and size parameters can be
110+
* represented in this format and convert them.
111+
* Additionally restrict APE1 to user-mode addresses.
112+
*/
113+
114+
uint64_t base = (uintptr_t)alternate_aperture_base;
115+
uint64_t limit = base + alternate_aperture_size - 1;
116+
117+
if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
118+
(limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
119+
retval = false;
120+
goto out;
121+
}
122+
123+
qpd->sh_mem_ape1_base = base >> 16;
124+
qpd->sh_mem_ape1_limit = limit >> 16;
125+
}
88126

89127
default_mtype = (default_policy == cache_policy_coherent) ?
90128
MTYPE_UC :
@@ -100,40 +138,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
100138
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
101139
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
102140

103-
return true;
104-
}
105-
106-
static int update_qpd_vi(struct device_queue_manager *dqm,
107-
struct qcm_process_device *qpd)
108-
{
109-
struct kfd_process_device *pdd;
110-
unsigned int temp;
111-
112-
pdd = qpd_to_pdd(qpd);
113-
114-
/* check if sh_mem_config register already configured */
115-
if (qpd->sh_mem_config == 0) {
116-
qpd->sh_mem_config =
117-
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
118-
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
119-
MTYPE_UC <<
120-
SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
121-
MTYPE_UC <<
122-
SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
123-
124-
qpd->sh_mem_ape1_limit = 0;
125-
qpd->sh_mem_ape1_base = 0;
126-
}
127-
128141
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
129142
* aperture addresses.
130143
*/
131-
temp = get_sh_mem_bases_nybble_64(pdd);
144+
temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
132145
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
133146

134147
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
135148
temp, qpd->sh_mem_bases);
149+
out:
150+
return retval;
151+
}
136152

153+
static int update_qpd_vi(struct device_queue_manager *dqm,
154+
struct qcm_process_device *qpd)
155+
{
137156
return 0;
138157
}
139158

0 commit comments

Comments
 (0)