Skip to content

Commit f156a7d

Browse files
vdonnefortMarc Zyngier
authored andcommitted
KVM: arm64: Remove size-order align in the nVHE hyp private VA range
commit f922c13 ("KVM: arm64: Introduce pkvm_alloc_private_va_range()") and commit 92abe0f ("KVM: arm64: Introduce hyp_alloc_private_va_range()") added an alignment for the start address of any allocation into the nVHE hypervisor private VA range. This alignment (order of the size of the allocation) intends to enable efficient stack verification (if the PAGE_SHIFT bit is zero, the stack pointer is on the guard page and a stack overflow occurred). But this is only necessary for stack allocation and can waste a lot of VA space. So instead make stack-specific functions, handling the guard page requirements, while other users (e.g. fixmap) will only get page alignment. Reviewed-by: Kalesh Singh <kaleshsingh@google.com> Signed-off-by: Vincent Donnefort <vdonnefort@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20230811112037.1147863-1-vdonnefort@google.com
1 parent a6b33d0 commit f156a7d

File tree

6 files changed

+138
-85
lines changed

6 files changed

+138
-85
lines changed

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
168168
void __iomem **haddr);
169169
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
170170
void **haddr);
171+
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
171172
void __init free_hyp_pgds(void);
172173

173174
void stage2_unmap_vm(struct kvm *kvm);

arch/arm64/kvm/arm.c

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,30 +2283,8 @@ static int __init init_hyp_mode(void)
22832283
for_each_possible_cpu(cpu) {
22842284
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
22852285
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
2286-
unsigned long hyp_addr;
22872286

2288-
/*
2289-
* Allocate a contiguous HYP private VA range for the stack
2290-
* and guard page. The allocation is also aligned based on
2291-
* the order of its size.
2292-
*/
2293-
err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
2294-
if (err) {
2295-
kvm_err("Cannot allocate hyp stack guard page\n");
2296-
goto out_err;
2297-
}
2298-
2299-
/*
2300-
* Since the stack grows downwards, map the stack to the page
2301-
* at the higher address and leave the lower guard page
2302-
* unbacked.
2303-
*
2304-
* Any valid stack address now has the PAGE_SHIFT bit as 1
2305-
* and addresses corresponding to the guard page have the
2306-
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
2307-
*/
2308-
err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
2309-
__pa(stack_page), PAGE_HYP);
2287+
err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
23102288
if (err) {
23112289
kvm_err("Cannot map hyp stack\n");
23122290
goto out_err;
@@ -2319,8 +2297,6 @@ static int __init init_hyp_mode(void)
23192297
* has been mapped in the flexible private VA space.
23202298
*/
23212299
params->stack_pa = __pa(stack_page);
2322-
2323-
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
23242300
}
23252301

23262302
for_each_possible_cpu(cpu) {

arch/arm64/kvm/hyp/include/nvhe/mm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
2626
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
2727
enum kvm_pgtable_prot prot,
2828
unsigned long *haddr);
29+
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
2930
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
3031

3132
#endif /* __KVM_HYP_MM_H */

arch/arm64/kvm/hyp/nvhe/mm.c

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
4444
return err;
4545
}
4646

47+
static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
48+
{
49+
unsigned long cur;
50+
51+
hyp_assert_lock_held(&pkvm_pgd_lock);
52+
53+
if (!start || start < __io_map_base)
54+
return -EINVAL;
55+
56+
/* The allocated size is always a multiple of PAGE_SIZE */
57+
cur = start + PAGE_ALIGN(size);
58+
59+
/* Are we overflowing on the vmemmap ? */
60+
if (cur > __hyp_vmemmap)
61+
return -ENOMEM;
62+
63+
__io_map_base = cur;
64+
65+
return 0;
66+
}
67+
4768
/**
4869
* pkvm_alloc_private_va_range - Allocates a private VA range.
4970
* @size: The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
5677
*/
5778
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
5879
{
59-
unsigned long base, addr;
60-
int ret = 0;
80+
unsigned long addr;
81+
int ret;
6182

6283
hyp_spin_lock(&pkvm_pgd_lock);
63-
64-
/* Align the allocation based on the order of its size */
65-
addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
66-
67-
/* The allocated size is always a multiple of PAGE_SIZE */
68-
base = addr + PAGE_ALIGN(size);
69-
70-
/* Are we overflowing on the vmemmap ? */
71-
if (!addr || base > __hyp_vmemmap)
72-
ret = -ENOMEM;
73-
else {
74-
__io_map_base = base;
75-
*haddr = addr;
76-
}
77-
84+
addr = __io_map_base;
85+
ret = __pkvm_alloc_private_va_range(addr, size);
7886
hyp_spin_unlock(&pkvm_pgd_lock);
7987

88+
*haddr = addr;
89+
8090
return ret;
8191
}
8292

@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
340350
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
341351
}
342352

353+
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
354+
{
355+
unsigned long addr, prev_base;
356+
size_t size;
357+
int ret;
358+
359+
hyp_spin_lock(&pkvm_pgd_lock);
360+
361+
prev_base = __io_map_base;
362+
/*
363+
* Efficient stack verification using the PAGE_SHIFT bit implies
364+
* an alignment of our allocation on the order of the size.
365+
*/
366+
size = PAGE_SIZE * 2;
367+
addr = ALIGN(__io_map_base, size);
368+
369+
ret = __pkvm_alloc_private_va_range(addr, size);
370+
if (!ret) {
371+
/*
372+
* Since the stack grows downwards, map the stack to the page
373+
* at the higher address and leave the lower guard page
374+
* unbacked.
375+
*
376+
* Any valid stack address now has the PAGE_SHIFT bit as 1
377+
* and addresses corresponding to the guard page have the
378+
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
379+
*/
380+
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
381+
PAGE_SIZE, phys, PAGE_HYP);
382+
if (ret)
383+
__io_map_base = prev_base;
384+
}
385+
hyp_spin_unlock(&pkvm_pgd_lock);
386+
387+
*haddr = addr + size;
388+
389+
return ret;
390+
}
391+
343392
static void *admit_host_page(void *arg)
344393
{
345394
struct kvm_hyp_memcache *host_mc = arg;

arch/arm64/kvm/hyp/nvhe/setup.c

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -113,41 +113,16 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
113113

114114
for (i = 0; i < hyp_nr_cpus; i++) {
115115
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
116-
unsigned long hyp_addr;
117116

118117
start = (void *)kern_hyp_va(per_cpu_base[i]);
119118
end = start + PAGE_ALIGN(hyp_percpu_size);
120119
ret = pkvm_create_mappings(start, end, PAGE_HYP);
121120
if (ret)
122121
return ret;
123122

124-
/*
125-
* Allocate a contiguous HYP private VA range for the stack
126-
* and guard page. The allocation is also aligned based on
127-
* the order of its size.
128-
*/
129-
ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
123+
ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
130124
if (ret)
131125
return ret;
132-
133-
/*
134-
* Since the stack grows downwards, map the stack to the page
135-
* at the higher address and leave the lower guard page
136-
* unbacked.
137-
*
138-
* Any valid stack address now has the PAGE_SHIFT bit as 1
139-
* and addresses corresponding to the guard page have the
140-
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
141-
*/
142-
hyp_spin_lock(&pkvm_pgd_lock);
143-
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
144-
PAGE_SIZE, params->stack_pa, PAGE_HYP);
145-
hyp_spin_unlock(&pkvm_pgd_lock);
146-
if (ret)
147-
return ret;
148-
149-
/* Update stack_hyp_va to end of the stack's private VA range */
150-
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
151126
}
152127

153128
/*

arch/arm64/kvm/mmu.c

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
592592
return 0;
593593
}
594594

595+
static int __hyp_alloc_private_va_range(unsigned long base)
596+
{
597+
lockdep_assert_held(&kvm_hyp_pgd_mutex);
598+
599+
if (!PAGE_ALIGNED(base))
600+
return -EINVAL;
601+
602+
/*
603+
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
604+
* allocating the new area, as it would indicate we've
605+
* overflowed the idmap/IO address range.
606+
*/
607+
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
608+
return -ENOMEM;
609+
610+
io_map_base = base;
611+
612+
return 0;
613+
}
595614

596615
/**
597616
* hyp_alloc_private_va_range - Allocates a private VA range.
@@ -612,26 +631,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
612631

613632
/*
614633
* This assumes that we have enough space below the idmap
615-
* page to allocate our VAs. If not, the check below will
616-
* kick. A potential alternative would be to detect that
617-
* overflow and switch to an allocation above the idmap.
634+
* page to allocate our VAs. If not, the check in
635+
* __hyp_alloc_private_va_range() will kick. A potential
636+
* alternative would be to detect that overflow and switch
637+
* to an allocation above the idmap.
618638
*
619639
* The allocated size is always a multiple of PAGE_SIZE.
620640
*/
621-
base = io_map_base - PAGE_ALIGN(size);
622-
623-
/* Align the allocation based on the order of its size */
624-
base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
625-
626-
/*
627-
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
628-
* allocating the new area, as it would indicate we've
629-
* overflowed the idmap/IO address range.
630-
*/
631-
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
632-
ret = -ENOMEM;
633-
else
634-
*haddr = io_map_base = base;
641+
size = PAGE_ALIGN(size);
642+
base = io_map_base - size;
643+
ret = __hyp_alloc_private_va_range(base);
635644

636645
mutex_unlock(&kvm_hyp_pgd_mutex);
637646

@@ -668,6 +677,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
668677
return ret;
669678
}
670679

680+
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
681+
{
682+
unsigned long base;
683+
size_t size;
684+
int ret;
685+
686+
mutex_lock(&kvm_hyp_pgd_mutex);
687+
/*
688+
* Efficient stack verification using the PAGE_SHIFT bit implies
689+
* an alignment of our allocation on the order of the size.
690+
*/
691+
size = PAGE_SIZE * 2;
692+
base = ALIGN_DOWN(io_map_base - size, size);
693+
694+
ret = __hyp_alloc_private_va_range(base);
695+
696+
mutex_unlock(&kvm_hyp_pgd_mutex);
697+
698+
if (ret) {
699+
kvm_err("Cannot allocate hyp stack guard page\n");
700+
return ret;
701+
}
702+
703+
/*
704+
* Since the stack grows downwards, map the stack to the page
705+
* at the higher address and leave the lower guard page
706+
* unbacked.
707+
*
708+
* Any valid stack address now has the PAGE_SHIFT bit as 1
709+
* and addresses corresponding to the guard page have the
710+
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
711+
*/
712+
ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
713+
PAGE_HYP);
714+
if (ret)
715+
kvm_err("Cannot map hyp stack\n");
716+
717+
*haddr = base + size;
718+
719+
return ret;
720+
}
721+
671722
/**
672723
* create_hyp_io_mappings - Map IO into both kernel and HYP
673724
* @phys_addr: The physical start address which gets mapped

0 commit comments

Comments
 (0)