From ba970a21ae800c1d35f55786f1e661cc3bc5b800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sandro=20R=C3=BCegge?= Date: Mon, 27 Jan 2025 08:12:22 +0100 Subject: [PATCH 1/2] pmm: fix total_free_frames initial count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sandro Rüegge --- mm/pmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/pmm.c b/mm/pmm.c index 72620d35..e503ad6a 100644 --- a/mm/pmm.c +++ b/mm/pmm.c @@ -95,6 +95,7 @@ static inline void init_frames_array(frames_array_t *array) { for (unsigned i = 0; i < ARRAY_SIZE(array->frames); i++) init_frame(&array->frames[i]); list_add(&array->list, &frames); + total_free_frames += array->meta.free_count; } static frames_array_t *new_frames_array(void) { @@ -117,7 +118,6 @@ static frames_array_t *new_frames_array(void) { init_frames_array(array); - total_free_frames += array->meta.free_count; return array; error: panic("PMM: Unable to allocate new page for frame array"); From 41767f584df3539e20880e17cddcdbc45589a9f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sandro=20R=C3=BCegge?= Date: Mon, 27 Jan 2025 09:02:31 +0100 Subject: [PATCH 2/2] pagetables,pmm: fix deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. paging gets exclusive pmm access during array refill. 2. pmm can call back into paging while holding the paging lock. 3. paging performs pmm refill if needed to ensure reserved frames. Signed-off-by: Sandro Rüegge --- arch/x86/pagetables.c | 18 ++++- include/arch/x86/pagetable.h | 8 ++ include/mm/pmm.h | 2 + mm/pmm.c | 144 +++++++++++++++++++++++++++++------ 4 files changed, 147 insertions(+), 25 deletions(-) diff --git a/arch/x86/pagetables.c b/arch/x86/pagetables.c index e095146f..f4e78cc4 100644 --- a/arch/x86/pagetables.c +++ b/arch/x86/pagetables.c @@ -200,7 +200,7 @@ static mfn_t get_cr3_mfn(cr3_t *cr3_entry) { void *cr3_mapped = NULL; if (mfn_invalid(cr3_entry->mfn)) { - frame_t *frame = get_free_frame(); + frame_t *frame = get_free_frame_norefill(); BUG_ON(!frame); frame->flags.pagetable = 1; @@ -243,7 +243,7 @@ static mfn_t get_pgentry_mfn(mfn_t tab_mfn, pt_index_t index, unsigned long flag mfn = mfn_from_pgentry(*entry); if (mfn_invalid(mfn)) { - frame_t *frame = get_free_frame(); + frame_t *frame = get_free_frame_norefill(); BUG_ON(!frame); frame->flags.pagetable = 1; @@ -308,6 +308,7 @@ static void *_vmap(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned int order, invlpg(va); done: + refill_from_paging(); return va; } @@ -420,6 +421,19 @@ void *vmap_4k(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, return va; } +void *vmap_4k_nolock(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, + bool propagate_user) { + unsigned long _va = _ul(va) & PAGE_ORDER_TO_MASK(PAGE_ORDER_4K); + + dprintk("%s: va: 0x%p mfn: 0x%lx\n", __func__, va, mfn); + + ASSERT(vmap_lock); + va = _vmap_4k(&cr3, _ptr(_va), mfn, l1_flags, propagate_user); + ASSERT(vmap_lock); + + return va; +} + static inline void init_tmp_mapping(void) { pte_t *tab = get_l1_table(_tmp_mapping); _tmp_mapping_entry = (pgentry_t *) l1_table_entry(tab, _tmp_mapping); diff --git a/include/arch/x86/pagetable.h b/include/arch/x86/pagetable.h index b0dbd7e1..a5489454 100644 --- a/include/arch/x86/pagetable.h +++ b/include/arch/x86/pagetable.h @@ -178,6 +178,9 @@ extern void *vmap_2m(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l2_flags bool propagate_user); extern void *vmap_4k(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, bool propagate_user); +/* this function may only be called while already holding the vmap_lock */ +extern void *vmap_4k_nolock(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, + bool propagate_user); extern int vunmap_kern(void *va, mfn_t *mfn, unsigned int *order); extern int vunmap_user(void *va, mfn_t *mfn, unsigned int *order); @@ -360,6 +363,11 @@ static inline void *vmap_kern_4k(void *va, mfn_t mfn, unsigned long l1_flags) { return vmap_4k(&cr3, va, mfn, l1_flags, false); } +/* Same as vmap_kern_4k but does not take vmap_lock. Callers must hold vmap_lock! */ +static inline void *vmap_kern_4k_nolock(void *va, mfn_t mfn, unsigned long l1_flags) { + return vmap_4k_nolock(&cr3, va, mfn, l1_flags, false); +} + static inline void *vmap_user(void *va, mfn_t mfn, unsigned int order, #if defined(__x86_64__) unsigned long l4_flags, diff --git a/include/mm/pmm.h b/include/mm/pmm.h index 246c225c..f1c020a0 100644 --- a/include/mm/pmm.h +++ b/include/mm/pmm.h @@ -73,6 +73,7 @@ extern void init_pmm(void); extern frame_t *get_free_frames_cond(free_frames_cond_t cb); extern frame_t *get_free_frames(unsigned int order); +extern frame_t *get_free_frame_norefill(void); extern void put_free_frames(mfn_t mfn, unsigned int order); extern void reclaim_frame(mfn_t mfn, unsigned int order); @@ -84,6 +85,7 @@ extern frame_t *find_busy_paddr_frame(paddr_t paddr); extern frame_t *find_paddr_frame(paddr_t paddr); extern void map_frames_array(void); +extern void refill_from_paging(void); /* Static definitions */ diff --git a/mm/pmm.c b/mm/pmm.c index e503ad6a..fc39bc5d 100644 --- a/mm/pmm.c +++ b/mm/pmm.c @@ -41,10 +41,39 @@ static frames_array_t early_frames; static list_head_t free_frames[MAX_PAGE_ORDER + 1]; static list_head_t busy_frames[MAX_PAGE_ORDER + 1]; -#define MIN_NUM_4K_FRAMES 2 +/* + * Worst case: pmm wants to refill while paging just started on another thread + * 1 for pmm refill attempt (new_frames_array) + * 3 for paging currently ongoing + * 4 for refill_from_paging (1 array frame, 3 for mapping it) + */ +#define MIN_NUM_4K_FRAMES (1 + 3 + (1 + 3)) +/* enough array frames to refill 4K frames in the worst case without needing refill */ +#define MIN_NOREFILL_FREE_FRAMES_THRESHOLD \ + (MIN_FREE_FRAMES_THRESHOLD + MIN_NUM_4K_FRAMES + (MAX_PAGE_ORDER - PAGE_ORDER_4K)) static size_t frames_count[MAX_PAGE_ORDER + 1]; +/* first lock to serialize normal access to pmm (i.e. not array frame refill) */ static spinlock_t lock = SPINLOCK_INIT; +/** + * second lock to give paging priority access to pmm during array frame refill + * + * Ensure that get_free_frame_norefill and refill_from_paging are only called while + * holding the paging lock. + */ +static spinlock_t priority_lock = SPINLOCK_INIT; + +static void try_create_4k_frames(void); + +static inline void pmm_lock() { + spin_lock(&lock); + spin_lock(&priority_lock); +} + +static inline void pmm_unlock() { + spin_unlock(&priority_lock); + spin_unlock(&lock); +} void display_frames_count(void) { printk("Avail memory frames: (total size: %lu MB)\n", total_phys_memory / MB(1)); @@ -98,7 +127,7 @@ static inline void init_frames_array(frames_array_t *array) { total_free_frames += array->meta.free_count; } -static frames_array_t *new_frames_array(void) { +static frames_array_t *_new_frames_array(bool nolock) { frames_array_t *array; frame_t *frame; @@ -109,7 +138,21 @@ static frames_array_t *new_frames_array(void) { if (!boot_flags.virt) array = (frames_array_t *) mfn_to_virt_kern(frame->mfn); else { - array = vmap_kern_4k(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT); + /* switch to special refilling mode to avoid deadlock with paging */ + spin_unlock(&priority_lock); + + /* if we are comming from paging then we have to to this mapping without taking + * the lock again */ + if (nolock) { + array = vmap_kern_4k_nolock(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT); + } + else { + array = vmap_kern_4k(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT); + } + + /* switch back to normal mode */ + spin_lock(&priority_lock); + if (!array) goto error; } @@ -124,6 +167,14 @@ static frames_array_t *new_frames_array(void) { UNREACHABLE(); } +static inline frames_array_t *new_frames_array() { + return _new_frames_array(false); +} + +static inline frames_array_t *new_frames_array_nolock() { + return _new_frames_array(true); +} + static void del_frames_array(frames_array_t *array) { ASSERT(array); @@ -426,9 +477,9 @@ static frame_t *_find_mfn_frame(list_head_t *list, mfn_t mfn, unsigned int order frame_t *find_free_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(free_frames, mfn, order); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -436,9 +487,9 @@ frame_t *find_free_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *find_busy_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(busy_frames, mfn, order); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -446,11 +497,11 @@ frame_t *find_busy_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *find_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(busy_frames, mfn, order); if (!frame) frame = _find_mfn_frame(free_frames, mfn, order); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -471,9 +522,9 @@ static frame_t *_find_paddr_frame(list_head_t *list, paddr_t paddr) { frame_t *find_free_paddr_frame(paddr_t paddr) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_paddr_frame(free_frames, paddr); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -481,9 +532,9 @@ frame_t *find_free_paddr_frame(paddr_t paddr) { frame_t *find_busy_paddr_frame(paddr_t paddr) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_paddr_frame(busy_frames, paddr); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -491,11 +542,11 @@ frame_t *find_busy_paddr_frame(paddr_t paddr) { frame_t *find_paddr_frame(paddr_t paddr) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_paddr_frame(busy_frames, paddr); if (!frame) frame = _find_paddr_frame(free_frames, paddr); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -599,7 +650,7 @@ static void try_create_4k_frames(void) { * This function does not split larger frames. */ frame_t *get_free_frames_cond(free_frames_cond_t cb) { - spin_lock(&lock); + pmm_lock(); try_create_4k_frames(); for_each_order (order) { frame_t *frame; @@ -607,12 +658,12 @@ frame_t *get_free_frames_cond(free_frames_cond_t cb) { list_for_each_entry (frame, &free_frames[order], list) { if (cb(frame)) { reserve_frame(frame); - spin_unlock(&lock); + pmm_unlock(); return frame; } } } - spin_unlock(&lock); + pmm_unlock(); return NULL; } @@ -623,7 +674,7 @@ frame_t *get_free_frames(unsigned int order) { if (order > MAX_PAGE_ORDER) return NULL; - spin_lock(&lock); + pmm_lock(); if (order == PAGE_ORDER_4K) try_create_4k_frames(); @@ -631,14 +682,14 @@ frame_t *get_free_frames(unsigned int order) { BUG_ON(order == PAGE_ORDER_4K); frame = find_larger_frame(free_frames, order); if (!frame) { - spin_unlock(&lock); + pmm_unlock(); return NULL; } split_frame(frame); } frame = reserve_frame(get_first_frame(free_frames, order)); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -648,7 +699,7 @@ void put_free_frames(mfn_t mfn, unsigned int order) { ASSERT(order <= MAX_PAGE_ORDER); - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(busy_frames, mfn, order); if (!frame) { warning("PMM: unable to find frame: %lx, order: %u among busy frames", mfn, @@ -660,7 +711,7 @@ void put_free_frames(mfn_t mfn, unsigned int order) { merge_frames(frame); unlock: - spin_unlock(&lock); + pmm_unlock(); } void map_frames_array(void) { @@ -674,3 +725,50 @@ void map_frames_array(void) { BUG_ON(!vmap_kern_4k(va, mfn, L1_PROT)); } } + +/* functions for paging to avoid deadlocks */ + +frame_t *get_free_frame_norefill(void) { + frame_t *frame; + + spin_lock(&priority_lock); + frame = reserve_frame(get_first_frame(free_frames, PAGE_ORDER_4K)); + spin_unlock(&priority_lock); + + /* we ran out of reserved frames. increase MIN_NUM_4K_FRAMES */ + BUG_ON(!frame); + + return frame; +} + +/** + * Function to refill 4k pages after using get_free_frame_norefill. Caller + * needs to hold the vmap_lock! + */ +void refill_from_paging(void) { + /* avoid recursive refilling after paging; variable protected by vmap_lock */ + static bool refill_from_paging_ongoing = false; + + /* avoid refill_from_paging being called as a result of refill_from_paging */ + if (refill_from_paging_ongoing) + return; + + refill_from_paging_ongoing = true; + + spin_lock(&priority_lock); + + /* ensure enough space to refill 4K frames without frame array allocation */ + if (total_free_frames < MIN_NOREFILL_FREE_FRAMES_THRESHOLD) + new_frames_array_nolock(); + /* if this fails, increase MIN_NUM_4K_FRAMES to allow for multiple array refills + * and change the "if" to a "while" above. + */ + BUG_ON(total_free_frames < MIN_NOREFILL_FREE_FRAMES_THRESHOLD); + + /* refill the 4K frames */ + try_create_4k_frames(); + + spin_unlock(&priority_lock); + + refill_from_paging_ongoing = false; +}