Skip to content

Commit d1703dc

Browse files
Mr-Bossmanpalmer-dabbelt
authored andcommitted
RISC-V: Detect unaligned vector accesses supported
Run an unaligned vector access to test if the system supports vector unaligned access. Add the result to a new key in hwprobe. This is useful for usermode to know if vector misaligned accesses are supported and if they are faster or slower than equivalent byte accesses. Signed-off-by: Jesse Taube <jesse@rivosinc.com> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> Link: https://lore.kernel.org/r/20241017-jesse_unaligned_vector-v10-4-5b33500160f8@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent c05a62c commit d1703dc

File tree

11 files changed

+222
-30
lines changed

11 files changed

+222
-30
lines changed

arch/riscv/Kconfig

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,12 +784,26 @@ config THREAD_SIZE_ORDER
784784
Specify the Pages of thread stack size (from 4KB to 64KB), which also
785785
affects irq stack size, which is equal to thread stack size.
786786

787+
config RISCV_MISALIGNED
788+
bool
789+
help
790+
Embed support for detecting and emulating misaligned
791+
scalar or vector loads and stores.
792+
787793
config RISCV_SCALAR_MISALIGNED
788794
bool
795+
select RISCV_MISALIGNED
789796
select SYSCTL_ARCH_UNALIGN_ALLOW
790797
help
791798
Embed support for emulating misaligned loads and stores.
792799

800+
config RISCV_VECTOR_MISALIGNED
801+
bool
802+
select RISCV_MISALIGNED
803+
depends on RISCV_ISA_V
804+
help
805+
Enable detecting support for vector misaligned loads and stores.
806+
793807
choice
794808
prompt "Unaligned Accesses Support"
795809
default RISCV_PROBE_UNALIGNED_ACCESS
@@ -841,6 +855,28 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS
841855

842856
endchoice
843857

858+
choice
859+
prompt "Vector unaligned Accesses Support"
860+
depends on RISCV_ISA_V
861+
default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
862+
help
863+
This determines the level of support for vector unaligned accesses. This
864+
information is used by the kernel to perform optimizations. It is also
865+
exposed to user space via the hwprobe syscall. The hardware will be
866+
probed at boot by default.
867+
868+
config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
869+
bool "Probe speed of vector unaligned accesses"
870+
select RISCV_VECTOR_MISALIGNED
871+
depends on RISCV_ISA_V
872+
help
873+
During boot, the kernel will run a series of tests to determine the
874+
speed of vector unaligned accesses if they are supported. This probing
875+
will dynamically determine the speed of vector unaligned accesses on
876+
the underlying system if they are supported.
877+
878+
endchoice
879+
844880
source "arch/riscv/Kconfig.vendor"
845881

846882
endmenu # "Platform type"

arch/riscv/include/asm/cpufeature.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ void riscv_user_isa_enable(void);
5959
#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
6060
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
6161

62-
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
6362
bool check_unaligned_access_emulated_all_cpus(void);
63+
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
6464
void check_unaligned_access_emulated(struct work_struct *work __always_unused);
6565
void unaligned_emulation_finish(void);
6666
bool unaligned_ctl_available(void);
@@ -72,6 +72,12 @@ static inline bool unaligned_ctl_available(void)
7272
}
7373
#endif
7474

75+
bool check_vector_unaligned_access_emulated_all_cpus(void);
76+
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
77+
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
78+
DECLARE_PER_CPU(long, vector_misaligned_access);
79+
#endif
80+
7581
#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
7682
DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
7783

arch/riscv/include/asm/entry-common.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
2525
void handle_page_fault(struct pt_regs *regs);
2626
void handle_break(struct pt_regs *regs);
2727

28-
#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
2928
int handle_misaligned_load(struct pt_regs *regs);
3029
int handle_misaligned_store(struct pt_regs *regs);
31-
#else
32-
static inline int handle_misaligned_load(struct pt_regs *regs)
33-
{
34-
return -1;
35-
}
36-
static inline int handle_misaligned_store(struct pt_regs *regs)
37-
{
38-
return -1;
39-
}
40-
#endif
4130

4231
#endif /* _ASM_RISCV_ENTRY_COMMON_H */

arch/riscv/include/asm/hwprobe.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
#include <uapi/asm/hwprobe.h>
1010

11-
#define RISCV_HWPROBE_MAX_KEY 9
11+
#define RISCV_HWPROBE_MAX_KEY 10
1212

1313
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
1414
{

arch/riscv/include/asm/vector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
extern unsigned long riscv_v_vsize;
2323
int riscv_v_setup_vsize(void);
24+
bool insn_is_vector(u32 insn_buf);
2425
bool riscv_v_first_use_handler(struct pt_regs *regs);
2526
void kernel_vector_begin(void);
2627
void kernel_vector_end(void);
@@ -268,6 +269,7 @@ struct pt_regs;
268269

269270
static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
270271
static __always_inline bool has_vector(void) { return false; }
272+
static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
271273
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
272274
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
273275
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }

arch/riscv/include/uapi/asm/hwprobe.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ struct riscv_hwprobe {
8888
#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2
8989
#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3
9090
#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4
91+
#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF 10
92+
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN 0
93+
#define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2
94+
#define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3
95+
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4
9196
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
9297

9398
/* Flags */

arch/riscv/kernel/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ obj-y += probes/
6868
obj-y += tests/
6969
obj-$(CONFIG_MMU) += vdso.o vdso/
7070

71-
obj-$(CONFIG_RISCV_SCALAR_MISALIGNED) += traps_misaligned.o
72-
obj-$(CONFIG_RISCV_SCALAR_MISALIGNED) += unaligned_access_speed.o
71+
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
72+
obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
7373
obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
7474

7575
obj-$(CONFIG_FPU) += fpu.o

arch/riscv/kernel/sys_hwprobe.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,37 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
201201
}
202202
#endif
203203

204+
#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
205+
static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
206+
{
207+
int cpu;
208+
u64 perf = -1ULL;
209+
210+
/* Return if supported or not even if speed wasn't probed */
211+
for_each_cpu(cpu, cpus) {
212+
int this_perf = per_cpu(vector_misaligned_access, cpu);
213+
214+
if (perf == -1ULL)
215+
perf = this_perf;
216+
217+
if (perf != this_perf) {
218+
perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
219+
break;
220+
}
221+
}
222+
223+
if (perf == -1ULL)
224+
return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
225+
226+
return perf;
227+
}
228+
#else
229+
static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
230+
{
231+
return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
232+
}
233+
#endif
234+
204235
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
205236
const struct cpumask *cpus)
206237
{
@@ -229,6 +260,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
229260
pair->value = hwprobe_misaligned(cpus);
230261
break;
231262

263+
case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF:
264+
pair->value = hwprobe_vec_misaligned(cpus);
265+
break;
266+
232267
case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
233268
pair->value = 0;
234269
if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))

arch/riscv/kernel/traps_misaligned.c

Lines changed: 121 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <asm/entry-common.h>
1717
#include <asm/hwprobe.h>
1818
#include <asm/cpufeature.h>
19+
#include <asm/vector.h>
1920

2021
#define INSN_MATCH_LB 0x3
2122
#define INSN_MASK_LB 0x707f
@@ -322,12 +323,37 @@ union reg_data {
322323
u64 data_u64;
323324
};
324325

325-
static bool unaligned_ctl __read_mostly;
326-
327326
/* sysctl hooks */
328327
int unaligned_enabled __read_mostly = 1; /* Enabled by default */
329328

330-
int handle_misaligned_load(struct pt_regs *regs)
329+
#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
330+
static int handle_vector_misaligned_load(struct pt_regs *regs)
331+
{
332+
unsigned long epc = regs->epc;
333+
unsigned long insn;
334+
335+
if (get_insn(regs, epc, &insn))
336+
return -1;
337+
338+
/* Only return 0 when in check_vector_unaligned_access_emulated */
339+
if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
340+
*this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
341+
regs->epc = epc + INSN_LEN(insn);
342+
return 0;
343+
}
344+
345+
/* If vector instruction we don't emulate it yet */
346+
regs->epc = epc;
347+
return -1;
348+
}
349+
#else
350+
static int handle_vector_misaligned_load(struct pt_regs *regs)
351+
{
352+
return -1;
353+
}
354+
#endif
355+
356+
static int handle_scalar_misaligned_load(struct pt_regs *regs)
331357
{
332358
union reg_data val;
333359
unsigned long epc = regs->epc;
@@ -435,7 +461,7 @@ int handle_misaligned_load(struct pt_regs *regs)
435461
return 0;
436462
}
437463

438-
int handle_misaligned_store(struct pt_regs *regs)
464+
static int handle_scalar_misaligned_store(struct pt_regs *regs)
439465
{
440466
union reg_data val;
441467
unsigned long epc = regs->epc;
@@ -526,6 +552,91 @@ int handle_misaligned_store(struct pt_regs *regs)
526552
return 0;
527553
}
528554

555+
int handle_misaligned_load(struct pt_regs *regs)
556+
{
557+
unsigned long epc = regs->epc;
558+
unsigned long insn;
559+
560+
if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) {
561+
if (get_insn(regs, epc, &insn))
562+
return -1;
563+
564+
if (insn_is_vector(insn))
565+
return handle_vector_misaligned_load(regs);
566+
}
567+
568+
if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
569+
return handle_scalar_misaligned_load(regs);
570+
571+
return -1;
572+
}
573+
574+
int handle_misaligned_store(struct pt_regs *regs)
575+
{
576+
if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
577+
return handle_scalar_misaligned_store(regs);
578+
579+
return -1;
580+
}
581+
582+
#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
583+
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused)
584+
{
585+
long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
586+
unsigned long tmp_var;
587+
588+
*mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
589+
590+
kernel_vector_begin();
591+
/*
592+
* In pre-13.0.0 versions of GCC, vector registers cannot appear in
593+
* the clobber list. This inline asm clobbers v0, but since we do not
594+
* currently build the kernel with V enabled, the v0 clobber arg is not
595+
* needed (as the compiler will not emit vector code itself). If the kernel
596+
* is changed to build with V enabled, the clobber arg will need to be
597+
* added here.
598+
*/
599+
__asm__ __volatile__ (
600+
".balign 4\n\t"
601+
".option push\n\t"
602+
".option arch, +zve32x\n\t"
603+
" vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b
604+
" vle16.v v0, (%[ptr])\n\t" // Load bytes
605+
".option pop\n\t"
606+
: : [ptr] "r" ((u8 *)&tmp_var + 1));
607+
kernel_vector_end();
608+
}
609+
610+
bool check_vector_unaligned_access_emulated_all_cpus(void)
611+
{
612+
int cpu;
613+
614+
if (!has_vector()) {
615+
for_each_online_cpu(cpu)
616+
per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
617+
return false;
618+
}
619+
620+
schedule_on_each_cpu(check_vector_unaligned_access_emulated);
621+
622+
for_each_online_cpu(cpu)
623+
if (per_cpu(vector_misaligned_access, cpu)
624+
== RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
625+
return false;
626+
627+
return true;
628+
}
629+
#else
630+
bool check_vector_unaligned_access_emulated_all_cpus(void)
631+
{
632+
return false;
633+
}
634+
#endif
635+
636+
#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
637+
638+
static bool unaligned_ctl __read_mostly;
639+
529640
void check_unaligned_access_emulated(struct work_struct *work __always_unused)
530641
{
531642
int cpu = smp_processor_id();
@@ -574,3 +685,9 @@ bool unaligned_ctl_available(void)
574685
{
575686
return unaligned_ctl;
576687
}
688+
#else
689+
bool check_unaligned_access_emulated_all_cpus(void)
690+
{
691+
return false;
692+
}
693+
#endif

arch/riscv/kernel/unaligned_access_speed.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
2020
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
2121

22-
DEFINE_PER_CPU(long, misaligned_access_speed);
22+
DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
23+
DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
2324

2425
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
2526
static cpumask_t fast_misaligned_access;
@@ -260,23 +261,24 @@ static int check_unaligned_access_speed_all_cpus(void)
260261
kfree(bufs);
261262
return 0;
262263
}
264+
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
265+
static int check_unaligned_access_speed_all_cpus(void)
266+
{
267+
return 0;
268+
}
269+
#endif
263270

264271
static int check_unaligned_access_all_cpus(void)
265272
{
266-
bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
273+
bool all_cpus_emulated;
274+
275+
all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
276+
check_vector_unaligned_access_emulated_all_cpus();
267277

268278
if (!all_cpus_emulated)
269279
return check_unaligned_access_speed_all_cpus();
270280

271281
return 0;
272282
}
273-
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
274-
static int check_unaligned_access_all_cpus(void)
275-
{
276-
check_unaligned_access_emulated_all_cpus();
277-
278-
return 0;
279-
}
280-
#endif
281283

282284
arch_initcall(check_unaligned_access_all_cpus);

0 commit comments

Comments
 (0)