diff --git a/arch/x86/pagetables.c b/arch/x86/pagetables.c index e095146f..f4e78cc4 100644 --- a/arch/x86/pagetables.c +++ b/arch/x86/pagetables.c @@ -200,7 +200,7 @@ static mfn_t get_cr3_mfn(cr3_t *cr3_entry) { void *cr3_mapped = NULL; if (mfn_invalid(cr3_entry->mfn)) { - frame_t *frame = get_free_frame(); + frame_t *frame = get_free_frame_norefill(); BUG_ON(!frame); frame->flags.pagetable = 1; @@ -243,7 +243,7 @@ static mfn_t get_pgentry_mfn(mfn_t tab_mfn, pt_index_t index, unsigned long flag mfn = mfn_from_pgentry(*entry); if (mfn_invalid(mfn)) { - frame_t *frame = get_free_frame(); + frame_t *frame = get_free_frame_norefill(); BUG_ON(!frame); frame->flags.pagetable = 1; @@ -308,6 +308,7 @@ static void *_vmap(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned int order, invlpg(va); done: + refill_from_paging(); return va; } @@ -420,6 +421,19 @@ void *vmap_4k(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, return va; } +void *vmap_4k_nolock(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, + bool propagate_user) { + unsigned long _va = _ul(va) & PAGE_ORDER_TO_MASK(PAGE_ORDER_4K); + + dprintk("%s: va: 0x%p mfn: 0x%lx\n", __func__, va, mfn); + + ASSERT(vmap_lock); + va = _vmap_4k(&cr3, _ptr(_va), mfn, l1_flags, propagate_user); + ASSERT(vmap_lock); + + return va; +} + static inline void init_tmp_mapping(void) { pte_t *tab = get_l1_table(_tmp_mapping); _tmp_mapping_entry = (pgentry_t *) l1_table_entry(tab, _tmp_mapping); diff --git a/include/arch/x86/pagetable.h b/include/arch/x86/pagetable.h index b0dbd7e1..a5489454 100644 --- a/include/arch/x86/pagetable.h +++ b/include/arch/x86/pagetable.h @@ -178,6 +178,9 @@ extern void *vmap_2m(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l2_flags bool propagate_user); extern void *vmap_4k(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, bool propagate_user); +/* this function may only be called while already holding the vmap_lock */ +extern void *vmap_4k_nolock(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags, + bool propagate_user); extern int vunmap_kern(void *va, mfn_t *mfn, unsigned int *order); extern int vunmap_user(void *va, mfn_t *mfn, unsigned int *order); @@ -360,6 +363,11 @@ static inline void *vmap_kern_4k(void *va, mfn_t mfn, unsigned long l1_flags) { return vmap_4k(&cr3, va, mfn, l1_flags, false); } +/* Same as vmap_kern_4k but does not take vmap_lock. Callers must hold vmap_lock! */ +static inline void *vmap_kern_4k_nolock(void *va, mfn_t mfn, unsigned long l1_flags) { + return vmap_4k_nolock(&cr3, va, mfn, l1_flags, false); +} + static inline void *vmap_user(void *va, mfn_t mfn, unsigned int order, #if defined(__x86_64__) unsigned long l4_flags, diff --git a/include/mm/pmm.h b/include/mm/pmm.h index 246c225c..f1c020a0 100644 --- a/include/mm/pmm.h +++ b/include/mm/pmm.h @@ -73,6 +73,7 @@ extern void init_pmm(void); extern frame_t *get_free_frames_cond(free_frames_cond_t cb); extern frame_t *get_free_frames(unsigned int order); +extern frame_t *get_free_frame_norefill(void); extern void put_free_frames(mfn_t mfn, unsigned int order); extern void reclaim_frame(mfn_t mfn, unsigned int order); @@ -84,6 +85,7 @@ extern frame_t *find_busy_paddr_frame(paddr_t paddr); extern frame_t *find_paddr_frame(paddr_t paddr); extern void map_frames_array(void); +extern void refill_from_paging(void); /* Static definitions */ diff --git a/mm/pmm.c b/mm/pmm.c index 72620d35..fc39bc5d 100644 --- a/mm/pmm.c +++ b/mm/pmm.c @@ -41,10 +41,39 @@ static frames_array_t early_frames; static list_head_t free_frames[MAX_PAGE_ORDER + 1]; static list_head_t busy_frames[MAX_PAGE_ORDER + 1]; -#define MIN_NUM_4K_FRAMES 2 +/* + * Worst case: pmm wants to refill while paging just started on another thread + * 1 for pmm refill attempt (new_frames_array) + * 3 for paging currently ongoing + * 4 for refill_from_paging (1 array frame, 3 for mapping it) + */ +#define MIN_NUM_4K_FRAMES (1 + 3 + (1 + 3)) +/* enough array frames to refill 4K frames in the worst case without needing refill */ +#define MIN_NOREFILL_FREE_FRAMES_THRESHOLD \ + (MIN_FREE_FRAMES_THRESHOLD + MIN_NUM_4K_FRAMES + (MAX_PAGE_ORDER - PAGE_ORDER_4K)) static size_t frames_count[MAX_PAGE_ORDER + 1]; +/* first lock to serialize normal access to pmm (i.e. not array frame refill) */ static spinlock_t lock = SPINLOCK_INIT; +/** + * second lock to give paging priority access to pmm during array frame refill + * + * Ensure that get_free_frame_norefill and refill_from_paging are only called while + * holding the paging lock. + */ +static spinlock_t priority_lock = SPINLOCK_INIT; + +static void try_create_4k_frames(void); + +static inline void pmm_lock() { + spin_lock(&lock); + spin_lock(&priority_lock); +} + +static inline void pmm_unlock() { + spin_unlock(&priority_lock); + spin_unlock(&lock); +} void display_frames_count(void) { printk("Avail memory frames: (total size: %lu MB)\n", total_phys_memory / MB(1)); @@ -95,9 +124,10 @@ static inline void init_frames_array(frames_array_t *array) { for (unsigned i = 0; i < ARRAY_SIZE(array->frames); i++) init_frame(&array->frames[i]); list_add(&array->list, &frames); + total_free_frames += array->meta.free_count; } -static frames_array_t *new_frames_array(void) { +static frames_array_t *_new_frames_array(bool nolock) { frames_array_t *array; frame_t *frame; @@ -108,7 +138,21 @@ static frames_array_t *new_frames_array(void) { if (!boot_flags.virt) array = (frames_array_t *) mfn_to_virt_kern(frame->mfn); else { - array = vmap_kern_4k(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT); + /* switch to special refilling mode to avoid deadlock with paging */ + spin_unlock(&priority_lock); + + /* if we are comming from paging then we have to to this mapping without taking + * the lock again */ + if (nolock) { + array = vmap_kern_4k_nolock(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT); + } + else { + array = vmap_kern_4k(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT); + } + + /* switch back to normal mode */ + spin_lock(&priority_lock); + if (!array) goto error; } @@ -117,13 +161,20 @@ static frames_array_t *new_frames_array(void) { init_frames_array(array); - total_free_frames += array->meta.free_count; return array; error: panic("PMM: Unable to allocate new page for frame array"); UNREACHABLE(); } +static inline frames_array_t *new_frames_array() { + return _new_frames_array(false); +} + +static inline frames_array_t *new_frames_array_nolock() { + return _new_frames_array(true); +} + static void del_frames_array(frames_array_t *array) { ASSERT(array); @@ -426,9 +477,9 @@ static frame_t *_find_mfn_frame(list_head_t *list, mfn_t mfn, unsigned int order frame_t *find_free_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(free_frames, mfn, order); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -436,9 +487,9 @@ frame_t *find_free_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *find_busy_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(busy_frames, mfn, order); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -446,11 +497,11 @@ frame_t *find_busy_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *find_mfn_frame(mfn_t mfn, unsigned int order) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(busy_frames, mfn, order); if (!frame) frame = _find_mfn_frame(free_frames, mfn, order); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -471,9 +522,9 @@ static frame_t *_find_paddr_frame(list_head_t *list, paddr_t paddr) { frame_t *find_free_paddr_frame(paddr_t paddr) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_paddr_frame(free_frames, paddr); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -481,9 +532,9 @@ frame_t *find_free_paddr_frame(paddr_t paddr) { frame_t *find_busy_paddr_frame(paddr_t paddr) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_paddr_frame(busy_frames, paddr); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -491,11 +542,11 @@ frame_t *find_busy_paddr_frame(paddr_t paddr) { frame_t *find_paddr_frame(paddr_t paddr) { frame_t *frame; - spin_lock(&lock); + pmm_lock(); frame = _find_paddr_frame(busy_frames, paddr); if (!frame) frame = _find_paddr_frame(free_frames, paddr); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -599,7 +650,7 @@ static void try_create_4k_frames(void) { * This function does not split larger frames. */ frame_t *get_free_frames_cond(free_frames_cond_t cb) { - spin_lock(&lock); + pmm_lock(); try_create_4k_frames(); for_each_order (order) { frame_t *frame; @@ -607,12 +658,12 @@ frame_t *get_free_frames_cond(free_frames_cond_t cb) { list_for_each_entry (frame, &free_frames[order], list) { if (cb(frame)) { reserve_frame(frame); - spin_unlock(&lock); + pmm_unlock(); return frame; } } } - spin_unlock(&lock); + pmm_unlock(); return NULL; } @@ -623,7 +674,7 @@ frame_t *get_free_frames(unsigned int order) { if (order > MAX_PAGE_ORDER) return NULL; - spin_lock(&lock); + pmm_lock(); if (order == PAGE_ORDER_4K) try_create_4k_frames(); @@ -631,14 +682,14 @@ frame_t *get_free_frames(unsigned int order) { BUG_ON(order == PAGE_ORDER_4K); frame = find_larger_frame(free_frames, order); if (!frame) { - spin_unlock(&lock); + pmm_unlock(); return NULL; } split_frame(frame); } frame = reserve_frame(get_first_frame(free_frames, order)); - spin_unlock(&lock); + pmm_unlock(); return frame; } @@ -648,7 +699,7 @@ void put_free_frames(mfn_t mfn, unsigned int order) { ASSERT(order <= MAX_PAGE_ORDER); - spin_lock(&lock); + pmm_lock(); frame = _find_mfn_frame(busy_frames, mfn, order); if (!frame) { warning("PMM: unable to find frame: %lx, order: %u among busy frames", mfn, @@ -660,7 +711,7 @@ void put_free_frames(mfn_t mfn, unsigned int order) { merge_frames(frame); unlock: - spin_unlock(&lock); + pmm_unlock(); } void map_frames_array(void) { @@ -674,3 +725,50 @@ void map_frames_array(void) { BUG_ON(!vmap_kern_4k(va, mfn, L1_PROT)); } } + +/* functions for paging to avoid deadlocks */ + +frame_t *get_free_frame_norefill(void) { + frame_t *frame; + + spin_lock(&priority_lock); + frame = reserve_frame(get_first_frame(free_frames, PAGE_ORDER_4K)); + spin_unlock(&priority_lock); + + /* we ran out of reserved frames. increase MIN_NUM_4K_FRAMES */ + BUG_ON(!frame); + + return frame; +} + +/** + * Function to refill 4k pages after using get_free_frame_norefill. Caller + * needs to hold the vmap_lock! + */ +void refill_from_paging(void) { + /* avoid recursive refilling after paging; variable protected by vmap_lock */ + static bool refill_from_paging_ongoing = false; + + /* avoid refill_from_paging being called as a result of refill_from_paging */ + if (refill_from_paging_ongoing) + return; + + refill_from_paging_ongoing = true; + + spin_lock(&priority_lock); + + /* ensure enough space to refill 4K frames without frame array allocation */ + if (total_free_frames < MIN_NOREFILL_FREE_FRAMES_THRESHOLD) + new_frames_array_nolock(); + /* if this fails, increase MIN_NUM_4K_FRAMES to allow for multiple array refills + * and change the "if" to a "while" above. + */ + BUG_ON(total_free_frames < MIN_NOREFILL_FREE_FRAMES_THRESHOLD); + + /* refill the 4K frames */ + try_create_4k_frames(); + + spin_unlock(&priority_lock); + + refill_from_paging_ongoing = false; +}