Skip to content

Fix pmm deadlock #345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions arch/x86/pagetables.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ static mfn_t get_cr3_mfn(cr3_t *cr3_entry) {
void *cr3_mapped = NULL;

if (mfn_invalid(cr3_entry->mfn)) {
frame_t *frame = get_free_frame();
frame_t *frame = get_free_frame_norefill();
BUG_ON(!frame);
frame->flags.pagetable = 1;

Expand Down Expand Up @@ -243,7 +243,7 @@ static mfn_t get_pgentry_mfn(mfn_t tab_mfn, pt_index_t index, unsigned long flag

mfn = mfn_from_pgentry(*entry);
if (mfn_invalid(mfn)) {
frame_t *frame = get_free_frame();
frame_t *frame = get_free_frame_norefill();
BUG_ON(!frame);
frame->flags.pagetable = 1;

Expand Down Expand Up @@ -308,6 +308,7 @@ static void *_vmap(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned int order,
invlpg(va);

done:
refill_from_paging();
return va;
}

Expand Down Expand Up @@ -420,6 +421,19 @@ void *vmap_4k(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags,
return va;
}

void *vmap_4k_nolock(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags,
bool propagate_user) {
unsigned long _va = _ul(va) & PAGE_ORDER_TO_MASK(PAGE_ORDER_4K);

dprintk("%s: va: 0x%p mfn: 0x%lx\n", __func__, va, mfn);

ASSERT(vmap_lock);
va = _vmap_4k(&cr3, _ptr(_va), mfn, l1_flags, propagate_user);
ASSERT(vmap_lock);

return va;
}

static inline void init_tmp_mapping(void) {
pte_t *tab = get_l1_table(_tmp_mapping);
_tmp_mapping_entry = (pgentry_t *) l1_table_entry(tab, _tmp_mapping);
Expand Down
8 changes: 8 additions & 0 deletions include/arch/x86/pagetable.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ extern void *vmap_2m(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l2_flags
bool propagate_user);
extern void *vmap_4k(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags,
bool propagate_user);
/* this function may only be called while already holding the vmap_lock */
extern void *vmap_4k_nolock(cr3_t *cr3_ptr, void *va, mfn_t mfn, unsigned long l1_flags,
bool propagate_user);

extern int vunmap_kern(void *va, mfn_t *mfn, unsigned int *order);
extern int vunmap_user(void *va, mfn_t *mfn, unsigned int *order);
Expand Down Expand Up @@ -360,6 +363,11 @@ static inline void *vmap_kern_4k(void *va, mfn_t mfn, unsigned long l1_flags) {
return vmap_4k(&cr3, va, mfn, l1_flags, false);
}

/* Same as vmap_kern_4k but does not take vmap_lock. Callers must hold vmap_lock! */
static inline void *vmap_kern_4k_nolock(void *va, mfn_t mfn, unsigned long l1_flags) {
return vmap_4k_nolock(&cr3, va, mfn, l1_flags, false);
}

static inline void *vmap_user(void *va, mfn_t mfn, unsigned int order,
#if defined(__x86_64__)
unsigned long l4_flags,
Expand Down
2 changes: 2 additions & 0 deletions include/mm/pmm.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ extern void init_pmm(void);

extern frame_t *get_free_frames_cond(free_frames_cond_t cb);
extern frame_t *get_free_frames(unsigned int order);
extern frame_t *get_free_frame_norefill(void);
extern void put_free_frames(mfn_t mfn, unsigned int order);
extern void reclaim_frame(mfn_t mfn, unsigned int order);

Expand All @@ -84,6 +85,7 @@ extern frame_t *find_busy_paddr_frame(paddr_t paddr);
extern frame_t *find_paddr_frame(paddr_t paddr);

extern void map_frames_array(void);
extern void refill_from_paging(void);

/* Static definitions */

Expand Down
146 changes: 122 additions & 24 deletions mm/pmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,39 @@ static frames_array_t early_frames;
static list_head_t free_frames[MAX_PAGE_ORDER + 1];
static list_head_t busy_frames[MAX_PAGE_ORDER + 1];

#define MIN_NUM_4K_FRAMES 2
/*
* Worst case: pmm wants to refill while paging just started on another thread
* 1 for pmm refill attempt (new_frames_array)
* 3 for paging currently ongoing
* 4 for refill_from_paging (1 array frame, 3 for mapping it)
*/
#define MIN_NUM_4K_FRAMES (1 + 3 + (1 + 3))
/* enough array frames to refill 4K frames in the worst case without needing refill */
#define MIN_NOREFILL_FREE_FRAMES_THRESHOLD \
(MIN_FREE_FRAMES_THRESHOLD + MIN_NUM_4K_FRAMES + (MAX_PAGE_ORDER - PAGE_ORDER_4K))
static size_t frames_count[MAX_PAGE_ORDER + 1];

/* first lock to serialize normal access to pmm (i.e. not array frame refill) */
static spinlock_t lock = SPINLOCK_INIT;
/**
* second lock to give paging priority access to pmm during array frame refill
*
* Ensure that get_free_frame_norefill and refill_from_paging are only called while
* holding the paging lock.
*/
static spinlock_t priority_lock = SPINLOCK_INIT;

static void try_create_4k_frames(void);

static inline void pmm_lock() {
spin_lock(&lock);
spin_lock(&priority_lock);
}

static inline void pmm_unlock() {
spin_unlock(&priority_lock);
spin_unlock(&lock);
}

void display_frames_count(void) {
printk("Avail memory frames: (total size: %lu MB)\n", total_phys_memory / MB(1));
Expand Down Expand Up @@ -95,9 +124,10 @@ static inline void init_frames_array(frames_array_t *array) {
for (unsigned i = 0; i < ARRAY_SIZE(array->frames); i++)
init_frame(&array->frames[i]);
list_add(&array->list, &frames);
total_free_frames += array->meta.free_count;
}

static frames_array_t *new_frames_array(void) {
static frames_array_t *_new_frames_array(bool nolock) {
frames_array_t *array;
frame_t *frame;

Expand All @@ -108,7 +138,21 @@ static frames_array_t *new_frames_array(void) {
if (!boot_flags.virt)
array = (frames_array_t *) mfn_to_virt_kern(frame->mfn);
else {
array = vmap_kern_4k(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT);
/* switch to special refilling mode to avoid deadlock with paging */
spin_unlock(&priority_lock);

/* if we are comming from paging then we have to to this mapping without taking
* the lock again */
if (nolock) {
array = vmap_kern_4k_nolock(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT);
}
else {
array = vmap_kern_4k(mfn_to_virt_map(frame->mfn), frame->mfn, L1_PROT);
}

/* switch back to normal mode */
spin_lock(&priority_lock);

if (!array)
goto error;
}
Expand All @@ -117,13 +161,20 @@ static frames_array_t *new_frames_array(void) {

init_frames_array(array);

total_free_frames += array->meta.free_count;
return array;
error:
panic("PMM: Unable to allocate new page for frame array");
UNREACHABLE();
}

static inline frames_array_t *new_frames_array() {
return _new_frames_array(false);
}

static inline frames_array_t *new_frames_array_nolock() {
return _new_frames_array(true);
}

static void del_frames_array(frames_array_t *array) {
ASSERT(array);

Expand Down Expand Up @@ -426,31 +477,31 @@ static frame_t *_find_mfn_frame(list_head_t *list, mfn_t mfn, unsigned int order
frame_t *find_free_mfn_frame(mfn_t mfn, unsigned int order) {
frame_t *frame;

spin_lock(&lock);
pmm_lock();
frame = _find_mfn_frame(free_frames, mfn, order);
spin_unlock(&lock);
pmm_unlock();

return frame;
}

frame_t *find_busy_mfn_frame(mfn_t mfn, unsigned int order) {
frame_t *frame;

spin_lock(&lock);
pmm_lock();
frame = _find_mfn_frame(busy_frames, mfn, order);
spin_unlock(&lock);
pmm_unlock();

return frame;
}

frame_t *find_mfn_frame(mfn_t mfn, unsigned int order) {
frame_t *frame;

spin_lock(&lock);
pmm_lock();
frame = _find_mfn_frame(busy_frames, mfn, order);
if (!frame)
frame = _find_mfn_frame(free_frames, mfn, order);
spin_unlock(&lock);
pmm_unlock();

return frame;
}
Expand All @@ -471,31 +522,31 @@ static frame_t *_find_paddr_frame(list_head_t *list, paddr_t paddr) {
frame_t *find_free_paddr_frame(paddr_t paddr) {
frame_t *frame;

spin_lock(&lock);
pmm_lock();
frame = _find_paddr_frame(free_frames, paddr);
spin_unlock(&lock);
pmm_unlock();

return frame;
}

frame_t *find_busy_paddr_frame(paddr_t paddr) {
frame_t *frame;

spin_lock(&lock);
pmm_lock();
frame = _find_paddr_frame(busy_frames, paddr);
spin_unlock(&lock);
pmm_unlock();

return frame;
}

frame_t *find_paddr_frame(paddr_t paddr) {
frame_t *frame;

spin_lock(&lock);
pmm_lock();
frame = _find_paddr_frame(busy_frames, paddr);
if (!frame)
frame = _find_paddr_frame(free_frames, paddr);
spin_unlock(&lock);
pmm_unlock();

return frame;
}
Expand Down Expand Up @@ -599,20 +650,20 @@ static void try_create_4k_frames(void) {
* This function does not split larger frames.
*/
frame_t *get_free_frames_cond(free_frames_cond_t cb) {
spin_lock(&lock);
pmm_lock();
try_create_4k_frames();
for_each_order (order) {
frame_t *frame;

list_for_each_entry (frame, &free_frames[order], list) {
if (cb(frame)) {
reserve_frame(frame);
spin_unlock(&lock);
pmm_unlock();
return frame;
}
}
}
spin_unlock(&lock);
pmm_unlock();

return NULL;
}
Expand All @@ -623,22 +674,22 @@ frame_t *get_free_frames(unsigned int order) {
if (order > MAX_PAGE_ORDER)
return NULL;

spin_lock(&lock);
pmm_lock();
if (order == PAGE_ORDER_4K)
try_create_4k_frames();

while (list_is_empty(&free_frames[order])) {
BUG_ON(order == PAGE_ORDER_4K);
frame = find_larger_frame(free_frames, order);
if (!frame) {
spin_unlock(&lock);
pmm_unlock();
return NULL;
}
split_frame(frame);
}

frame = reserve_frame(get_first_frame(free_frames, order));
spin_unlock(&lock);
pmm_unlock();

return frame;
}
Expand All @@ -648,7 +699,7 @@ void put_free_frames(mfn_t mfn, unsigned int order) {

ASSERT(order <= MAX_PAGE_ORDER);

spin_lock(&lock);
pmm_lock();
frame = _find_mfn_frame(busy_frames, mfn, order);
if (!frame) {
warning("PMM: unable to find frame: %lx, order: %u among busy frames", mfn,
Expand All @@ -660,7 +711,7 @@ void put_free_frames(mfn_t mfn, unsigned int order) {
merge_frames(frame);

unlock:
spin_unlock(&lock);
pmm_unlock();
}

void map_frames_array(void) {
Expand All @@ -674,3 +725,50 @@ void map_frames_array(void) {
BUG_ON(!vmap_kern_4k(va, mfn, L1_PROT));
}
}

/* functions for paging to avoid deadlocks */

frame_t *get_free_frame_norefill(void) {
frame_t *frame;

spin_lock(&priority_lock);
frame = reserve_frame(get_first_frame(free_frames, PAGE_ORDER_4K));
spin_unlock(&priority_lock);

/* we ran out of reserved frames. increase MIN_NUM_4K_FRAMES */
BUG_ON(!frame);

return frame;
}

/**
* Function to refill 4k pages after using get_free_frame_norefill. Caller
* needs to hold the vmap_lock!
*/
void refill_from_paging(void) {
/* avoid recursive refilling after paging; variable protected by vmap_lock */
static bool refill_from_paging_ongoing = false;

/* avoid refill_from_paging being called as a result of refill_from_paging */
if (refill_from_paging_ongoing)
return;

refill_from_paging_ongoing = true;

spin_lock(&priority_lock);

/* ensure enough space to refill 4K frames without frame array allocation */
if (total_free_frames < MIN_NOREFILL_FREE_FRAMES_THRESHOLD)
new_frames_array_nolock();
/* if this fails, increase MIN_NUM_4K_FRAMES to allow for multiple array refills
* and change the "if" to a "while" above.
*/
BUG_ON(total_free_frames < MIN_NOREFILL_FREE_FRAMES_THRESHOLD);

/* refill the 4K frames */
try_create_4k_frames();

spin_unlock(&priority_lock);

refill_from_paging_ongoing = false;
}