Skip to content

Commit d5c2e4b

Browse files
dcpleungkartben
authored andcommitted
xtensa: userspace: workaround return PC calc with loops
When syscall assembly is executed, the EPC points to the syscall instruction, and we have to manually advance it so we will return to the instruction after syscall to continue execution. However, with zero-overhead loops and the syscall instruction is the last instruction, this simple addition does not work as it would point past the loop and would have skipped the loop. Because of this, syscall entrance would need to look at the loop registers and set the PC back to the beginning of loop if we are still looping. Assuming most of the syscalls are not inside loops, the extra handling code consumes quite a few cycles. To workaround this, simply adds a nop after syscall so we no longer have to deal with loops at syscall entrance, and that a nop is faster than all the code to manipulate loop registers. Signed-off-by: Daniel Leung <daniel.leung@intel.com>
1 parent bdb5723 commit d5c2e4b

File tree

3 files changed

+33
-34
lines changed

3 files changed

+33
-34
lines changed

arch/xtensa/core/syscall_helper.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ uintptr_t xtensa_syscall_helper_args_6(uintptr_t arg1, uintptr_t arg2,
2525
register uintptr_t a8 __asm__("%a8") = arg5;
2626
register uintptr_t a9 __asm__("%a9") = arg6;
2727

28-
__asm__ volatile("syscall\n\t"
28+
__asm__ volatile(XTENSA_SYSCALL_ASM
2929
: "=r" (a2)
3030
: "r" (a2), "r" (a6), "r" (a3), "r" (a4),
3131
"r" (a5), "r" (a8), "r" (a9)
@@ -45,7 +45,7 @@ uintptr_t xtensa_syscall_helper_args_5(uintptr_t arg1, uintptr_t arg2,
4545
register uintptr_t a5 __asm__("%a5") = arg4;
4646
register uintptr_t a8 __asm__("%a8") = arg5;
4747

48-
__asm__ volatile("syscall\n\t"
48+
__asm__ volatile(XTENSA_SYSCALL_ASM
4949
: "=r" (a2)
5050
: "r" (a2), "r" (a6), "r" (a3), "r" (a4),
5151
"r" (a5), "r" (a8)
@@ -64,7 +64,7 @@ uintptr_t xtensa_syscall_helper_args_4(uintptr_t arg1, uintptr_t arg2,
6464
register uintptr_t a4 __asm__("%a4") = arg3;
6565
register uintptr_t a5 __asm__("%a5") = arg4;
6666

67-
__asm__ volatile("syscall\n\t"
67+
__asm__ volatile(XTENSA_SYSCALL_ASM
6868
: "=r" (a2)
6969
: "r" (a2), "r" (a6), "r" (a3), "r" (a4),
7070
"r" (a5)

arch/xtensa/core/userspace.S

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -78,33 +78,12 @@ _not_checking_user_context:
7878
/* Manipulate PC where we will return to after syscall.
7979
* This is needed as syscall will stash the PC where
8080
* the syscall instruction locates, instead of
81-
* the instruction after it.
81+
* the instruction after it. We need to increment it to
82+
* execute the next instruction when we return.
83+
* The instruction size is 3 bytes, so lets just add it.
8284
*/
8385
rsr.epc1 a3
84-
#if XCHAL_HAVE_LOOPS
85-
/* If the syscall instruction was the last instruction in the body of
86-
* a zero-overhead loop, and the loop will execute again, decrement
87-
* the loop count and resume execution at the head of the loop.
88-
*/
89-
rsr.lend a2
9086
addi a3, a3, 3
91-
bne a2, a3, end_loop
92-
rsr.lcount a2
93-
beqz a2, end_loop
94-
addi a2, a2, -1
95-
wsr.lcount a2
96-
rsr.lbeg a3
97-
98-
/* Make sure WSR above is synced before RSR in ODD_REG_SAVE. */
99-
isync
100-
end_loop:
101-
#else
102-
/* EPC1 (and now a3) contains the address that invoked syscall.
103-
* We need to increment it to execute the next instruction when
104-
* we return. The instruction size is 3 bytes, so lets just add it.
105-
*/
106-
addi a3, a3, 3
107-
#endif
10887
s32i a3, a0, ___xtensa_irq_bsa_t_pc_OFFSET
10988

11089
/* Need to setup PS so we can spill all registers.

include/zephyr/arch/xtensa/syscall.h

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,26 @@
3030
extern "C" {
3131
#endif
3232

33+
/* When syscall assembly is executed, the EPC points to the syscall
34+
* instruction, and we have to manually advance it so we will
35+
* return to the instruction after syscall to continue execution.
36+
* However, with zero-overhead loops and the syscall instruction is
37+
* the last instruction, this simple addition does not work as it
38+
* would point past the loop and would have skipped the loop.
39+
* Because of this, syscall entrance would need to look at the loop
40+
* registers and set the PC back to the beginning of loop if we are
41+
* still looping. Assuming most of the syscalls are not inside
42+
* loops, the extra handling code consumes quite a few cycles.
43+
* To workaround this, simply adds a nop after syscall so we no
44+
* longer have to deal with loops at syscall entrance, and that
45+
* a nop is faster than all the code to manipulate loop registers.
46+
*/
47+
#ifdef XCHAL_HAVE_LOOPS
48+
#define XTENSA_SYSCALL_ASM "syscall; nop;"
49+
#else
50+
#define XTENSA_SYSCALL_ASM "syscall"
51+
#endif
52+
3353
#ifdef CONFIG_XTENSA_SYSCALL_USE_HELPER
3454
uintptr_t xtensa_syscall_helper_args_6(uintptr_t arg1, uintptr_t arg2,
3555
uintptr_t arg3, uintptr_t arg4,
@@ -75,7 +95,7 @@ static SYSINL uintptr_t arch_syscall_invoke6(uintptr_t arg1, uintptr_t arg2,
7595
register uintptr_t a8 __asm__("%a8") = arg5;
7696
register uintptr_t a9 __asm__("%a9") = arg6;
7797

78-
__asm__ volatile("syscall\n\t"
98+
__asm__ volatile(XTENSA_SYSCALL_ASM
7999
: "=r" (a2)
80100
: "r" (a2), "r" (a6), "r" (a3), "r" (a4),
81101
"r" (a5), "r" (a8), "r" (a9)
@@ -99,7 +119,7 @@ static SYSINL uintptr_t arch_syscall_invoke5(uintptr_t arg1, uintptr_t arg2,
99119
register uintptr_t a5 __asm__("%a5") = arg4;
100120
register uintptr_t a8 __asm__("%a8") = arg5;
101121

102-
__asm__ volatile("syscall\n\t"
122+
__asm__ volatile(XTENSA_SYSCALL_ASM
103123
: "=r" (a2)
104124
: "r" (a2), "r" (a6), "r" (a3), "r" (a4),
105125
"r" (a5), "r" (a8)
@@ -122,7 +142,7 @@ static SYSINL uintptr_t arch_syscall_invoke4(uintptr_t arg1, uintptr_t arg2,
122142
register uintptr_t a4 __asm__("%a4") = arg3;
123143
register uintptr_t a5 __asm__("%a5") = arg4;
124144

125-
__asm__ volatile("syscall\n\t"
145+
__asm__ volatile(XTENSA_SYSCALL_ASM
126146
: "=r" (a2)
127147
: "r" (a2), "r" (a6), "r" (a3), "r" (a4),
128148
"r" (a5)
@@ -140,7 +160,7 @@ static inline uintptr_t arch_syscall_invoke3(uintptr_t arg1, uintptr_t arg2,
140160
register uintptr_t a3 __asm__("%a3") = arg2;
141161
register uintptr_t a4 __asm__("%a4") = arg3;
142162

143-
__asm__ volatile("syscall\n\t"
163+
__asm__ volatile(XTENSA_SYSCALL_ASM
144164
: "=r" (a2)
145165
: "r" (a2), "r" (a6), "r" (a3), "r" (a4)
146166
: "memory");
@@ -155,7 +175,7 @@ static inline uintptr_t arch_syscall_invoke2(uintptr_t arg1, uintptr_t arg2,
155175
register uintptr_t a6 __asm__("%a6") = arg1;
156176
register uintptr_t a3 __asm__("%a3") = arg2;
157177

158-
__asm__ volatile("syscall\n\t"
178+
__asm__ volatile(XTENSA_SYSCALL_ASM
159179
: "=r" (a2)
160180
: "r" (a2), "r" (a6), "r" (a3)
161181
: "memory");
@@ -168,7 +188,7 @@ static inline uintptr_t arch_syscall_invoke1(uintptr_t arg1, uintptr_t call_id)
168188
register uintptr_t a2 __asm__("%a2") = call_id;
169189
register uintptr_t a6 __asm__("%a6") = arg1;
170190

171-
__asm__ volatile("syscall\n\t"
191+
__asm__ volatile(XTENSA_SYSCALL_ASM
172192
: "=r" (a2)
173193
: "r" (a2), "r" (a6)
174194
: "memory");
@@ -180,7 +200,7 @@ static inline uintptr_t arch_syscall_invoke0(uintptr_t call_id)
180200
{
181201
register uintptr_t a2 __asm__("%a2") = call_id;
182202

183-
__asm__ volatile("syscall\n\t"
203+
__asm__ volatile(XTENSA_SYSCALL_ASM
184204
: "=r" (a2)
185205
: "r" (a2)
186206
: "memory");

0 commit comments

Comments
 (0)