Skip to content

Commit 94f0038

Browse files
Merge patch series "RISC-V: mm: Make SV48 the default address space"
Charlie Jenkins <charlie@rivosinc.com> says: Make sv48 the default address space for mmap as some applications currently depend on this assumption. Users can now select a desired address space using a non-zero hint address to mmap. Previously, requesting the default address space from mmap by passing zero as the hint address would result in using the largest address space possible. Some applications depend on empty bits in the virtual address space, like Go and Java, so this patch provides more flexibility for application developers. * b4-shazam-merge: RISC-V: mm: Document mmap changes RISC-V: mm: Update pgtable comment documentation RISC-V: mm: Add tests for RISC-V mm RISC-V: mm: Restrict address space for sv39,sv48,sv57 Link: https://lore.kernel.org/r/20230809232218.849726-1-charlie@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 52b77c2 + 7998abe commit 94f0038

File tree

11 files changed

+261
-13
lines changed

11 files changed

+261
-13
lines changed

Documentation/riscv/vm-layout.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,25 @@ RISC-V Linux Kernel SV57
133133
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
134134
ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
135135
__________________|____________|__________________|_________|____________________________________________________________
136+
137+
138+
Userspace VAs
139+
--------------------
140+
To maintain compatibility with software that relies on the VA space with a
141+
maximum of 48 bits the kernel will, by default, return virtual addresses to
142+
userspace from a 48-bit range (sv48). This default behavior is achieved by
143+
passing 0 into the hint address parameter of mmap. On CPUs with an address space
144+
smaller than sv48, the CPU maximum supported address space will be the default.
145+
146+
Software can "opt-in" to receiving VAs from another VA space by providing
147+
a hint address to mmap. A hint address passed to mmap will cause the largest
148+
address space that fits entirely into the hint to be used, unless there is no
149+
space left in the address space. If there is no space available in the requested
150+
address space, an address in the next smallest available address space will be
151+
returned.
152+
153+
For example, in order to obtain 48-bit VA space, a hint address greater than
154+
:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace
155+
ending at :code:`1 << 47` and the addresses beyond this are reserved for the
156+
kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater
157+
than or equal to :code:`1 << 56` must be provided.

arch/riscv/include/asm/elf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
5050
* the loader. We need to make sure that it is out of the way of the program
5151
* that it will "exec", and that there is sufficient room for the brk.
5252
*/
53-
#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
53+
#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
5454

5555
#ifdef CONFIG_64BIT
5656
#ifdef CONFIG_COMPAT

arch/riscv/include/asm/pgtable.h

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,16 @@
6262
* struct pages to map half the virtual address space. Then
6363
* position vmemmap directly below the VMALLOC region.
6464
*/
65+
#define VA_BITS_SV32 32
6566
#ifdef CONFIG_64BIT
67+
#define VA_BITS_SV39 39
68+
#define VA_BITS_SV48 48
69+
#define VA_BITS_SV57 57
70+
6671
#define VA_BITS (pgtable_l5_enabled ? \
67-
57 : (pgtable_l4_enabled ? 48 : 39))
72+
VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
6873
#else
69-
#define VA_BITS 32
74+
#define VA_BITS VA_BITS_SV32
7075
#endif
7176

7277
#define VMEMMAP_SHIFT \
@@ -111,11 +116,27 @@
111116
#include <asm/page.h>
112117
#include <asm/tlbflush.h>
113118
#include <linux/mm_types.h>
119+
#include <asm/compat.h>
114120

115121
#define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)
116122

117123
#ifdef CONFIG_64BIT
118124
#include <asm/pgtable-64.h>
125+
126+
#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
127+
#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
128+
#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
129+
130+
#ifdef CONFIG_COMPAT
131+
#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
132+
#define MMAP_MIN_VA_BITS_64 (VA_BITS_SV39)
133+
#define MMAP_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_VA_BITS_64)
134+
#define MMAP_MIN_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_MIN_VA_BITS_64)
135+
#else
136+
#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
137+
#define MMAP_MIN_VA_BITS (VA_BITS_SV39)
138+
#endif /* CONFIG_COMPAT */
139+
119140
#else
120141
#include <asm/pgtable-32.h>
121142
#endif /* CONFIG_64BIT */
@@ -830,14 +851,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
830851
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
831852
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
832853
* Task size is:
833-
* - 0x9fc00000 (~2.5GB) for RV32.
834-
* - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
835-
* - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
854+
* - 0x9fc00000 (~2.5GB) for RV32.
855+
* - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
856+
* - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
857+
* - 0x100000000000000 ( 64PB) for RV64 using SV57 mmu
836858
*
837859
* Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
838860
* Instruction Set Manual Volume II: Privileged Architecture" states that
839861
* "load and store effective addresses, which are 64bits, must have bits
840862
* 63–48 all equal to bit 47, or else a page-fault exception will occur."
863+
* Similarly for SV57, bits 63–57 must be equal to bit 56.
841864
*/
842865
#ifdef CONFIG_64BIT
843866
#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)

arch/riscv/include/asm/processor.h

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,59 @@
1313

1414
#include <asm/ptrace.h>
1515

16+
#ifdef CONFIG_64BIT
17+
#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
18+
#define STACK_TOP_MAX TASK_SIZE_64
19+
20+
#define arch_get_mmap_end(addr, len, flags) \
21+
({ \
22+
unsigned long mmap_end; \
23+
typeof(addr) _addr = (addr); \
24+
if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
25+
mmap_end = STACK_TOP_MAX; \
26+
else if ((_addr) >= VA_USER_SV57) \
27+
mmap_end = STACK_TOP_MAX; \
28+
else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
29+
mmap_end = VA_USER_SV48; \
30+
else \
31+
mmap_end = VA_USER_SV39; \
32+
mmap_end; \
33+
})
34+
35+
#define arch_get_mmap_base(addr, base) \
36+
({ \
37+
unsigned long mmap_base; \
38+
typeof(addr) _addr = (addr); \
39+
typeof(base) _base = (base); \
40+
unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base); \
41+
if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
42+
mmap_base = (_base); \
43+
else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
44+
mmap_base = VA_USER_SV57 - rnd_gap; \
45+
else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
46+
mmap_base = VA_USER_SV48 - rnd_gap; \
47+
else \
48+
mmap_base = VA_USER_SV39 - rnd_gap; \
49+
mmap_base; \
50+
})
51+
52+
#else
53+
#define DEFAULT_MAP_WINDOW TASK_SIZE
54+
#define STACK_TOP_MAX TASK_SIZE
55+
#endif
56+
#define STACK_ALIGN 16
57+
58+
#define STACK_TOP DEFAULT_MAP_WINDOW
59+
1660
/*
1761
* This decides where the kernel will search for a free chunk of vm
1862
* space during mmap's.
1963
*/
20-
#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
21-
22-
#define STACK_TOP TASK_SIZE
2364
#ifdef CONFIG_64BIT
24-
#define STACK_TOP_MAX TASK_SIZE_64
65+
#define TASK_UNMAPPED_BASE PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3)
2566
#else
26-
#define STACK_TOP_MAX TASK_SIZE
67+
#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
2768
#endif
28-
#define STACK_ALIGN 16
2969

3070
#ifndef __ASSEMBLY__
3171

tools/testing/selftests/riscv/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
66

77
ifneq (,$(filter $(ARCH),riscv))
8-
RISCV_SUBTARGETS ?= hwprobe vector
8+
RISCV_SUBTARGETS ?= hwprobe vector mm
99
else
1010
RISCV_SUBTARGETS :=
1111
endif
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mmap_bottomup
2+
mmap_default
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# SPDX-License-Identifier: GPL-2.0
2+
# Copyright (C) 2021 ARM Limited
3+
# Originally tools/testing/arm64/abi/Makefile
4+
5+
# Additional include paths needed by kselftest.h and local headers
6+
CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
7+
8+
TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup
9+
10+
TEST_PROGS := testcases/run_mmap.sh
11+
12+
include ../../lib.mk
13+
14+
$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h
15+
$(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#include <sys/mman.h>
3+
#include <testcases/mmap_test.h>
4+
5+
#include "../../kselftest_harness.h"
6+
7+
TEST(infinite_rlimit)
8+
{
9+
// Only works on 64 bit
10+
#if __riscv_xlen == 64
11+
struct addresses mmap_addresses;
12+
13+
EXPECT_EQ(BOTTOM_UP, memory_layout());
14+
15+
do_mmaps(&mmap_addresses);
16+
17+
EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
18+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
19+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
20+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
21+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
22+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
23+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);
24+
25+
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
26+
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
27+
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
28+
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
29+
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
30+
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
31+
EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
32+
#endif
33+
}
34+
35+
TEST_HARNESS_MAIN
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#include <sys/mman.h>
3+
#include <testcases/mmap_test.h>
4+
5+
#include "../../kselftest_harness.h"
6+
7+
TEST(default_rlimit)
8+
{
9+
// Only works on 64 bit
10+
#if __riscv_xlen == 64
11+
struct addresses mmap_addresses;
12+
13+
EXPECT_EQ(TOP_DOWN, memory_layout());
14+
15+
do_mmaps(&mmap_addresses);
16+
17+
EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
18+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
19+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
20+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
21+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
22+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
23+
EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);
24+
25+
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
26+
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
27+
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
28+
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
29+
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
30+
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
31+
EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
32+
#endif
33+
}
34+
35+
TEST_HARNESS_MAIN
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
#ifndef _TESTCASES_MMAP_TEST_H
3+
#define _TESTCASES_MMAP_TEST_H
4+
#include <sys/mman.h>
5+
#include <sys/resource.h>
6+
#include <stddef.h>
7+
8+
#define TOP_DOWN 0
9+
#define BOTTOM_UP 1
10+
11+
struct addresses {
12+
int *no_hint;
13+
int *on_37_addr;
14+
int *on_38_addr;
15+
int *on_46_addr;
16+
int *on_47_addr;
17+
int *on_55_addr;
18+
int *on_56_addr;
19+
};
20+
21+
static inline void do_mmaps(struct addresses *mmap_addresses)
22+
{
23+
/*
24+
* Place all of the hint addresses on the boundaries of mmap
25+
* sv39, sv48, sv57
26+
* User addresses end at 1<<38, 1<<47, 1<<56 respectively
27+
*/
28+
void *on_37_bits = (void *)(1UL << 37);
29+
void *on_38_bits = (void *)(1UL << 38);
30+
void *on_46_bits = (void *)(1UL << 46);
31+
void *on_47_bits = (void *)(1UL << 47);
32+
void *on_55_bits = (void *)(1UL << 55);
33+
void *on_56_bits = (void *)(1UL << 56);
34+
35+
int prot = PROT_READ | PROT_WRITE;
36+
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
37+
38+
mmap_addresses->no_hint =
39+
mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0);
40+
mmap_addresses->on_37_addr =
41+
mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0);
42+
mmap_addresses->on_38_addr =
43+
mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0);
44+
mmap_addresses->on_46_addr =
45+
mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0);
46+
mmap_addresses->on_47_addr =
47+
mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0);
48+
mmap_addresses->on_55_addr =
49+
mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0);
50+
mmap_addresses->on_56_addr =
51+
mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
52+
}
53+
54+
static inline int memory_layout(void)
55+
{
56+
int prot = PROT_READ | PROT_WRITE;
57+
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
58+
59+
void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
60+
void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
61+
62+
return value2 > value1;
63+
}
64+
#endif /* _TESTCASES_MMAP_TEST_H */

0 commit comments

Comments
 (0)