|
| 1 | +From 138b7a4c83c43b42851cb8fec2bbdbaadd960241 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Patrick Roy <roypat@amazon.co.uk> |
| 3 | +Date: Fri, 7 Feb 2025 11:16:06 +0000 |
| 4 | +Subject: [PATCH 1/2] mm: introduce AS_NO_DIRECT_MAP |
| 5 | + |
| 6 | +Add AS_NO_DIRECT_MAP for mappings where direct map entries of folios are |
| 7 | +set to not present . Currently, mappings that match this description are |
| 8 | +secretmem mappings (memfd_secret()). Later, some guest_memfd |
| 9 | +configurations will also fall into this category. |
| 10 | + |
| 11 | +Reject this new type of mappings in all locations that currently reject |
| 12 | +secretmem mappings, on the assumption that if secretmem mappings are |
| 13 | +rejected somewhere, it is precisely because of an inability to deal with |
| 14 | +folios without direct map entries, and then make memfd_secret() use |
| 15 | +AS_NO_DIRECT_MAP on its address_space to drop its special |
| 16 | +vma_is_secretmem()/secretmem_mapping() checks. |
| 17 | + |
| 18 | +This drops a optimization in gup_fast_folio_allowed() where |
| 19 | +secretmem_mapping() was only called if CONFIG_SECRETMEM=y. secretmem is |
| 20 | +enabled by default since commit b758fe6df50d ("mm/secretmem: make it on |
| 21 | +by default"), so the secretmem check did not actually end up elided in |
| 22 | +most cases anymore anyway. |
| 23 | + |
| 24 | +Use a new flag instead of overloading AS_INACCESSIBLE (which is already |
| 25 | +set by guest_memfd) because not all guest_memfd mappings will end up |
| 26 | +being direct map removed (e.g. in pKVM setups, parts of guest_memfd that |
| 27 | +can be mapped to userspace should also be GUP-able, and generally not |
| 28 | +have restrictions on who can access it). |
| 29 | + |
| 30 | +Signed-off-by: Patrick Roy <roypat@amazon.co.uk> |
| 31 | +--- |
| 32 | + include/linux/pagemap.h | 16 ++++++++++++++++ |
| 33 | + include/linux/secretmem.h | 18 ------------------ |
| 34 | + lib/buildid.c | 4 ++-- |
| 35 | + mm/gup.c | 14 +++----------- |
| 36 | + mm/mlock.c | 2 +- |
| 37 | + mm/secretmem.c | 6 +----- |
| 38 | + 6 files changed, 23 insertions(+), 37 deletions(-) |
| 39 | + |
| 40 | +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h |
| 41 | +index 47bfc6b1b632..903b41e89cf8 100644 |
| 42 | +--- a/include/linux/pagemap.h |
| 43 | ++++ b/include/linux/pagemap.h |
| 44 | +@@ -210,6 +210,7 @@ enum mapping_flags { |
| 45 | + AS_STABLE_WRITES = 7, /* must wait for writeback before modifying |
| 46 | + folio contents */ |
| 47 | + AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ |
| 48 | ++ AS_NO_DIRECT_MAP = 9, /* Folios in the mapping are not in the direct map */ |
| 49 | + /* Bits 16-25 are used for FOLIO_ORDER */ |
| 50 | + AS_FOLIO_ORDER_BITS = 5, |
| 51 | + AS_FOLIO_ORDER_MIN = 16, |
| 52 | +@@ -335,6 +336,21 @@ static inline bool mapping_inaccessible(struct address_space *mapping) |
| 53 | + return test_bit(AS_INACCESSIBLE, &mapping->flags); |
| 54 | + } |
| 55 | + |
| 56 | ++static inline void mapping_set_no_direct_map(struct address_space *mapping) |
| 57 | ++{ |
| 58 | ++ set_bit(AS_NO_DIRECT_MAP, &mapping->flags); |
| 59 | ++} |
| 60 | ++ |
| 61 | ++static inline bool mapping_no_direct_map(struct address_space *mapping) |
| 62 | ++{ |
| 63 | ++ return test_bit(AS_NO_DIRECT_MAP, &mapping->flags); |
| 64 | ++} |
| 65 | ++ |
| 66 | ++static inline bool vma_is_no_direct_map(const struct vm_area_struct *vma) |
| 67 | ++{ |
| 68 | ++ return vma->vm_file && mapping_no_direct_map(vma->vm_file->f_mapping); |
| 69 | ++} |
| 70 | ++ |
| 71 | + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) |
| 72 | + { |
| 73 | + return mapping->gfp_mask; |
| 74 | +diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h |
| 75 | +index e918f96881f5..0ae1fb057b3d 100644 |
| 76 | +--- a/include/linux/secretmem.h |
| 77 | ++++ b/include/linux/secretmem.h |
| 78 | +@@ -4,28 +4,10 @@ |
| 79 | + |
| 80 | + #ifdef CONFIG_SECRETMEM |
| 81 | + |
| 82 | +-extern const struct address_space_operations secretmem_aops; |
| 83 | +- |
| 84 | +-static inline bool secretmem_mapping(struct address_space *mapping) |
| 85 | +-{ |
| 86 | +- return mapping->a_ops == &secretmem_aops; |
| 87 | +-} |
| 88 | +- |
| 89 | +-bool vma_is_secretmem(struct vm_area_struct *vma); |
| 90 | + bool secretmem_active(void); |
| 91 | + |
| 92 | + #else |
| 93 | + |
| 94 | +-static inline bool vma_is_secretmem(struct vm_area_struct *vma) |
| 95 | +-{ |
| 96 | +- return false; |
| 97 | +-} |
| 98 | +- |
| 99 | +-static inline bool secretmem_mapping(struct address_space *mapping) |
| 100 | +-{ |
| 101 | +- return false; |
| 102 | +-} |
| 103 | +- |
| 104 | + static inline bool secretmem_active(void) |
| 105 | + { |
| 106 | + return false; |
| 107 | +diff --git a/lib/buildid.c b/lib/buildid.c |
| 108 | +index c4b0f376fb34..33f173a607ad 100644 |
| 109 | +--- a/lib/buildid.c |
| 110 | ++++ b/lib/buildid.c |
| 111 | +@@ -65,8 +65,8 @@ static int freader_get_folio(struct freader *r, loff_t file_off) |
| 112 | + |
| 113 | + freader_put_folio(r); |
| 114 | + |
| 115 | +- /* reject secretmem folios created with memfd_secret() */ |
| 116 | +- if (secretmem_mapping(r->file->f_mapping)) |
| 117 | ++ /* reject secretmem folios created with memfd_secret() or guest_memfd() */ |
| 118 | ++ if (mapping_no_direct_map(r->file->f_mapping)) |
| 119 | + return -EFAULT; |
| 120 | + |
| 121 | + r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); |
| 122 | +diff --git a/mm/gup.c b/mm/gup.c |
| 123 | +index 3883b307780e..b1483a876740 100644 |
| 124 | +--- a/mm/gup.c |
| 125 | ++++ b/mm/gup.c |
| 126 | +@@ -1283,7 +1283,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) |
| 127 | + if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) |
| 128 | + return -EOPNOTSUPP; |
| 129 | + |
| 130 | +- if (vma_is_secretmem(vma)) |
| 131 | ++ if (vma_is_no_direct_map(vma)) |
| 132 | + return -EFAULT; |
| 133 | + |
| 134 | + if (write) { |
| 135 | +@@ -2786,7 +2786,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) |
| 136 | + { |
| 137 | + bool reject_file_backed = false; |
| 138 | + struct address_space *mapping; |
| 139 | +- bool check_secretmem = false; |
| 140 | + unsigned long mapping_flags; |
| 141 | + |
| 142 | + /* |
| 143 | +@@ -2798,14 +2797,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) |
| 144 | + reject_file_backed = true; |
| 145 | + |
| 146 | + /* We hold a folio reference, so we can safely access folio fields. */ |
| 147 | +- |
| 148 | +- /* secretmem folios are always order-0 folios. */ |
| 149 | +- if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio)) |
| 150 | +- check_secretmem = true; |
| 151 | +- |
| 152 | +- if (!reject_file_backed && !check_secretmem) |
| 153 | +- return true; |
| 154 | +- |
| 155 | + if (WARN_ON_ONCE(folio_test_slab(folio))) |
| 156 | + return false; |
| 157 | + |
| 158 | +@@ -2847,8 +2838,9 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) |
| 159 | + * At this point, we know the mapping is non-null and points to an |
| 160 | + * address_space object. |
| 161 | + */ |
| 162 | +- if (check_secretmem && secretmem_mapping(mapping)) |
| 163 | ++ if (mapping_no_direct_map(mapping)) |
| 164 | + return false; |
| 165 | ++ |
| 166 | + /* The only remaining allowed file system is shmem. */ |
| 167 | + return !reject_file_backed || shmem_mapping(mapping); |
| 168 | + } |
| 169 | +diff --git a/mm/mlock.c b/mm/mlock.c |
| 170 | +index cde076fa7d5e..a43f308be70d 100644 |
| 171 | +--- a/mm/mlock.c |
| 172 | ++++ b/mm/mlock.c |
| 173 | +@@ -474,7 +474,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, |
| 174 | + |
| 175 | + if (newflags == oldflags || (oldflags & VM_SPECIAL) || |
| 176 | + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || |
| 177 | +- vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE)) |
| 178 | ++ vma_is_dax(vma) || vma_is_no_direct_map(vma) || (oldflags & VM_DROPPABLE)) |
| 179 | + /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ |
| 180 | + goto out; |
| 181 | + |
| 182 | +diff --git a/mm/secretmem.c b/mm/secretmem.c |
| 183 | +index 1b0a214ee558..ea4c04d469b1 100644 |
| 184 | +--- a/mm/secretmem.c |
| 185 | ++++ b/mm/secretmem.c |
| 186 | +@@ -136,11 +136,6 @@ static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) |
| 187 | + return 0; |
| 188 | + } |
| 189 | + |
| 190 | +-bool vma_is_secretmem(struct vm_area_struct *vma) |
| 191 | +-{ |
| 192 | +- return vma->vm_ops == &secretmem_vm_ops; |
| 193 | +-} |
| 194 | +- |
| 195 | + static const struct file_operations secretmem_fops = { |
| 196 | + .release = secretmem_release, |
| 197 | + .mmap = secretmem_mmap, |
| 198 | +@@ -214,6 +209,7 @@ static struct file *secretmem_file_create(unsigned long flags) |
| 199 | + |
| 200 | + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
| 201 | + mapping_set_unevictable(inode->i_mapping); |
| 202 | ++ mapping_set_no_direct_map(inode->i_mapping); |
| 203 | + |
| 204 | + inode->i_op = &secretmem_iops; |
| 205 | + inode->i_mapping->a_ops = &secretmem_aops; |
| 206 | +-- |
| 207 | +2.48.1 |
| 208 | + |
0 commit comments