Skip to content

Commit beb0cd1

Browse files
committed
lib: sbi: Simplify wait_for_coldboot() implementation
On QEMU virt machine with large number of HARTs, some of the HARTs randomly fail to come out of wait_for_coldboot() due to one of the following race-conditions: 1) Failing HARTs are not able to acquire the coldboot_lock and update the coldboot_hartmask in wait_for_coldboot() before the coldboot HART acquires the coldboot_lock and sends IPI in wake_coldboot_harts() hence the failing HARTs never receive IPI from the coldboot HART. 2) Failing HARTs acquire the coldbood_lock and update the coldboot_hartmask before coldboot HART does sbi_scratch_init() so the sbi_hartmask_set_hartid() does not update the coldboot_hartmask on the failing HARTs hence they never receive IPI from the coldboot HART. To address this, use a simple busy-loop in wait_for_coldboot() for polling on coldboot_done flag. Signed-off-by: Anup Patel <apatel@ventanamicro.com>
1 parent f5375bc commit beb0cd1

File tree

2 files changed

+8
-68
lines changed

2 files changed

+8
-68
lines changed

include/sbi/riscv_barrier.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@
4040
#define smp_wmb() RISCV_FENCE(w,w)
4141

4242
/* CPU relax for busy loop */
43-
#define cpu_relax() asm volatile ("" : : : "memory")
43+
#define cpu_relax() \
44+
do { \
45+
unsigned long __t; \
46+
__asm__ __volatile__ ("div %0, %0, zero" : "=r" (__t)); \
47+
} while (0)
4448

4549
/* clang-format on */
4650

lib/sbi/sbi_init.c

Lines changed: 3 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include <sbi/riscv_asm.h>
1111
#include <sbi/riscv_atomic.h>
1212
#include <sbi/riscv_barrier.h>
13-
#include <sbi/riscv_locks.h>
1413
#include <sbi/sbi_console.h>
1514
#include <sbi/sbi_cppc.h>
1615
#include <sbi/sbi_domain.h>
@@ -191,82 +190,19 @@ static void sbi_boot_print_hart(struct sbi_scratch *scratch, u32 hartid)
191190
sbi_hart_delegation_dump(scratch, "Boot HART ", " ");
192191
}
193192

194-
static spinlock_t coldboot_lock = SPIN_LOCK_INITIALIZER;
195-
static struct sbi_hartmask coldboot_wait_hmask = { 0 };
196-
197193
static unsigned long coldboot_done;
198194

199195
static void wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid)
200196
{
201-
unsigned long saved_mie, cmip;
202-
203-
if (__smp_load_acquire(&coldboot_done))
204-
return;
205-
206-
/* Save MIE CSR */
207-
saved_mie = csr_read(CSR_MIE);
208-
209-
/* Set MSIE and MEIE bits to receive IPI */
210-
csr_set(CSR_MIE, MIP_MSIP | MIP_MEIP);
211-
212-
/* Acquire coldboot lock */
213-
spin_lock(&coldboot_lock);
214-
215-
/* Mark current HART as waiting */
216-
sbi_hartmask_set_hartid(hartid, &coldboot_wait_hmask);
217-
218-
/* Release coldboot lock */
219-
spin_unlock(&coldboot_lock);
220-
221-
/* Wait for coldboot to finish using WFI */
222-
while (!__smp_load_acquire(&coldboot_done)) {
223-
do {
224-
wfi();
225-
cmip = csr_read(CSR_MIP);
226-
} while (!(cmip & (MIP_MSIP | MIP_MEIP)));
227-
}
228-
229-
/* Acquire coldboot lock */
230-
spin_lock(&coldboot_lock);
231-
232-
/* Unmark current HART as waiting */
233-
sbi_hartmask_clear_hartid(hartid, &coldboot_wait_hmask);
234-
235-
/* Release coldboot lock */
236-
spin_unlock(&coldboot_lock);
237-
238-
/* Restore MIE CSR */
239-
csr_write(CSR_MIE, saved_mie);
240-
241-
/*
242-
* The wait for coldboot is common for both warm startup and
243-
* warm resume path so clearing IPI here would result in losing
244-
* an IPI in warm resume path.
245-
*
246-
* Also, the sbi_platform_ipi_init() called from sbi_ipi_init()
247-
* will automatically clear IPI for current HART.
248-
*/
197+
/* Wait for coldboot to finish */
198+
while (!__smp_load_acquire(&coldboot_done))
199+
cpu_relax();
249200
}
250201

251202
static void wake_coldboot_harts(struct sbi_scratch *scratch, u32 hartid)
252203
{
253-
u32 i, hartindex = sbi_hartid_to_hartindex(hartid);
254-
255204
/* Mark coldboot done */
256205
__smp_store_release(&coldboot_done, 1);
257-
258-
/* Acquire coldboot lock */
259-
spin_lock(&coldboot_lock);
260-
261-
/* Send an IPI to all HARTs waiting for coldboot */
262-
sbi_hartmask_for_each_hartindex(i, &coldboot_wait_hmask) {
263-
if (i == hartindex)
264-
continue;
265-
sbi_ipi_raw_send(i);
266-
}
267-
268-
/* Release coldboot lock */
269-
spin_unlock(&coldboot_lock);
270206
}
271207

272208
static unsigned long entry_count_offset;

0 commit comments

Comments
 (0)