Skip to content

Commit ef0c8ef

Browse files
author
Claudio Imbrenda
committed
KVM: s390: stop using lists to keep track of used dat tables
Until now, every dat table allocated to map a guest was put in a linked list. The page->lru field of struct page was used to keep track of which pages were being used, and when the gmap is torn down, the list was walked and all pages freed. This patch gets rid of the usage of page->lru. Page tables are now freed by recursively walking the dat table tree. Since s390_unlist_old_asce() becomes useless now, remove it. Acked-by: Steffen Eiden <seiden@linux.ibm.com> Reviewed-by: Janosch Frank <frankja@linux.ibm.com> Reviewed-by: Christoph Schlameuss <schlameuss@linux.ibm.com> Link: https://lore.kernel.org/r/20250123144627.312456-12-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Message-ID: <20250123144627.312456-12-imbrenda@linux.ibm.com>
1 parent 37d1b5d commit ef0c8ef

File tree

3 files changed

+23
-86
lines changed

3 files changed

+23
-86
lines changed

arch/s390/include/asm/gmap.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
/**
2424
* struct gmap_struct - guest address space
2525
* @list: list head for the mm->context gmap list
26-
* @crst_list: list of all crst tables used in the guest address space
2726
* @mm: pointer to the parent mm_struct
2827
* @guest_to_host: radix tree with guest to host address translation
2928
* @host_to_guest: radix tree with pointer to segment table entries
@@ -35,7 +34,6 @@
3534
* @guest_handle: protected virtual machine handle for the ultravisor
3635
* @host_to_rmap: radix tree with gmap_rmap lists
3736
* @children: list of shadow gmap structures
38-
* @pt_list: list of all page tables used in the shadow guest address space
3937
* @shadow_lock: spinlock to protect the shadow gmap list
4038
* @parent: pointer to the parent gmap for shadow guest address spaces
4139
* @orig_asce: ASCE for which the shadow page table has been created
@@ -45,7 +43,6 @@
4543
*/
4644
struct gmap {
4745
struct list_head list;
48-
struct list_head crst_list;
4946
struct mm_struct *mm;
5047
struct radix_tree_root guest_to_host;
5148
struct radix_tree_root host_to_guest;
@@ -61,7 +58,6 @@ struct gmap {
6158
/* Additional data for shadow guest address spaces */
6259
struct radix_tree_root host_to_rmap;
6360
struct list_head children;
64-
struct list_head pt_list;
6561
spinlock_t shadow_lock;
6662
struct gmap *parent;
6763
unsigned long orig_asce;
@@ -141,7 +137,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned
141137
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
142138
unsigned long gaddr, unsigned long vmaddr);
143139
int s390_disable_cow_sharing(void);
144-
void s390_unlist_old_asce(struct gmap *gmap);
145140
int s390_replace_asce(struct gmap *gmap);
146141
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
147142
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,

arch/s390/mm/gmap.c

Lines changed: 23 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,7 @@ struct gmap *gmap_alloc(unsigned long limit)
8282
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
8383
if (!gmap)
8484
goto out;
85-
INIT_LIST_HEAD(&gmap->crst_list);
8685
INIT_LIST_HEAD(&gmap->children);
87-
INIT_LIST_HEAD(&gmap->pt_list);
8886
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
8987
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
9088
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
@@ -94,7 +92,6 @@ struct gmap *gmap_alloc(unsigned long limit)
9492
page = gmap_alloc_crst();
9593
if (!page)
9694
goto out_free;
97-
list_add(&page->lru, &gmap->crst_list);
9895
table = page_to_virt(page);
9996
crst_table_init(table, etype);
10097
gmap->table = table;
@@ -197,6 +194,27 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
197194
} while (nr > 0);
198195
}
199196

197+
static void gmap_free_crst(unsigned long *table, bool free_ptes)
198+
{
199+
bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
200+
int i;
201+
202+
if (is_segment) {
203+
if (!free_ptes)
204+
goto out;
205+
for (i = 0; i < _CRST_ENTRIES; i++)
206+
if (!(table[i] & _SEGMENT_ENTRY_INVALID))
207+
page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
208+
} else {
209+
for (i = 0; i < _CRST_ENTRIES; i++)
210+
if (!(table[i] & _REGION_ENTRY_INVALID))
211+
gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
212+
}
213+
214+
out:
215+
free_pages((unsigned long)table, CRST_ALLOC_ORDER);
216+
}
217+
200218
/**
201219
* gmap_free - free a guest address space
202220
* @gmap: pointer to the guest address space structure
@@ -205,24 +223,17 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
205223
*/
206224
void gmap_free(struct gmap *gmap)
207225
{
208-
struct page *page, *next;
209-
210226
/* Flush tlb of all gmaps (if not already done for shadows) */
211227
if (!(gmap_is_shadow(gmap) && gmap->removed))
212228
gmap_flush_tlb(gmap);
213229
/* Free all segment & region tables. */
214-
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
215-
__free_pages(page, CRST_ALLOC_ORDER);
230+
gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
231+
216232
gmap_radix_tree_free(&gmap->guest_to_host);
217233
gmap_radix_tree_free(&gmap->host_to_guest);
218234

219235
/* Free additional data for a shadow gmap */
220236
if (gmap_is_shadow(gmap)) {
221-
struct ptdesc *ptdesc, *n;
222-
223-
/* Free all page tables. */
224-
list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
225-
page_table_free_pgste(ptdesc);
226237
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
227238
/* Release reference to the parent */
228239
gmap_put(gmap->parent);
@@ -311,7 +322,6 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
311322
crst_table_init(new, init);
312323
spin_lock(&gmap->guest_table_lock);
313324
if (*table & _REGION_ENTRY_INVALID) {
314-
list_add(&page->lru, &gmap->crst_list);
315325
*table = __pa(new) | _REGION_ENTRY_LENGTH |
316326
(*table & _REGION_ENTRY_TYPE_MASK);
317327
page = NULL;
@@ -1243,7 +1253,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
12431253
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
12441254
/* Free page table */
12451255
ptdesc = page_ptdesc(phys_to_page(pgt));
1246-
list_del(&ptdesc->pt_list);
12471256
page_table_free_pgste(ptdesc);
12481257
}
12491258

@@ -1271,7 +1280,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
12711280
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
12721281
/* Free page table */
12731282
ptdesc = page_ptdesc(phys_to_page(pgt));
1274-
list_del(&ptdesc->pt_list);
12751283
page_table_free_pgste(ptdesc);
12761284
}
12771285
}
@@ -1301,7 +1309,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
13011309
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
13021310
/* Free segment table */
13031311
page = phys_to_page(sgt);
1304-
list_del(&page->lru);
13051312
__free_pages(page, CRST_ALLOC_ORDER);
13061313
}
13071314

@@ -1329,7 +1336,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
13291336
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
13301337
/* Free segment table */
13311338
page = phys_to_page(sgt);
1332-
list_del(&page->lru);
13331339
__free_pages(page, CRST_ALLOC_ORDER);
13341340
}
13351341
}
@@ -1359,7 +1365,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
13591365
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
13601366
/* Free region 3 table */
13611367
page = phys_to_page(r3t);
1362-
list_del(&page->lru);
13631368
__free_pages(page, CRST_ALLOC_ORDER);
13641369
}
13651370

@@ -1387,7 +1392,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
13871392
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
13881393
/* Free region 3 table */
13891394
page = phys_to_page(r3t);
1390-
list_del(&page->lru);
13911395
__free_pages(page, CRST_ALLOC_ORDER);
13921396
}
13931397
}
@@ -1417,7 +1421,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
14171421
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
14181422
/* Free region 2 table */
14191423
page = phys_to_page(r2t);
1420-
list_del(&page->lru);
14211424
__free_pages(page, CRST_ALLOC_ORDER);
14221425
}
14231426

@@ -1449,7 +1452,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
14491452
r1t[i] = _REGION1_ENTRY_EMPTY;
14501453
/* Free region 2 table */
14511454
page = phys_to_page(r2t);
1452-
list_del(&page->lru);
14531455
__free_pages(page, CRST_ALLOC_ORDER);
14541456
}
14551457
}
@@ -1544,7 +1546,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
15441546
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
15451547
if (sg->edat_level >= 1)
15461548
*table |= (r2t & _REGION_ENTRY_PROTECT);
1547-
list_add(&page->lru, &sg->crst_list);
15481549
if (fake) {
15491550
/* nothing to protect for fake tables */
15501551
*table &= ~_REGION_ENTRY_INVALID;
@@ -1628,7 +1629,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
16281629
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
16291630
if (sg->edat_level >= 1)
16301631
*table |= (r3t & _REGION_ENTRY_PROTECT);
1631-
list_add(&page->lru, &sg->crst_list);
16321632
if (fake) {
16331633
/* nothing to protect for fake tables */
16341634
*table &= ~_REGION_ENTRY_INVALID;
@@ -1712,7 +1712,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
17121712
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
17131713
if (sg->edat_level >= 1)
17141714
*table |= sgt & _REGION_ENTRY_PROTECT;
1715-
list_add(&page->lru, &sg->crst_list);
17161715
if (fake) {
17171716
/* nothing to protect for fake tables */
17181717
*table &= ~_REGION_ENTRY_INVALID;
@@ -1833,7 +1832,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
18331832
/* mark as invalid as long as the parent table is not protected */
18341833
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
18351834
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
1836-
list_add(&ptdesc->pt_list, &sg->pt_list);
18371835
if (fake) {
18381836
/* nothing to protect for fake tables */
18391837
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2623,49 +2621,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
26232621
}
26242622
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
26252623

2626-
/**
2627-
* s390_unlist_old_asce - Remove the topmost level of page tables from the
2628-
* list of page tables of the gmap.
2629-
* @gmap: the gmap whose table is to be removed
2630-
*
2631-
* On s390x, KVM keeps a list of all pages containing the page tables of the
2632-
* gmap (the CRST list). This list is used at tear down time to free all
2633-
* pages that are now not needed anymore.
2634-
*
2635-
* This function removes the topmost page of the tree (the one pointed to by
2636-
* the ASCE) from the CRST list.
2637-
*
2638-
* This means that it will not be freed when the VM is torn down, and needs
2639-
* to be handled separately by the caller, unless a leak is actually
2640-
* intended. Notice that this function will only remove the page from the
2641-
* list, the page will still be used as a top level page table (and ASCE).
2642-
*/
2643-
void s390_unlist_old_asce(struct gmap *gmap)
2644-
{
2645-
struct page *old;
2646-
2647-
old = virt_to_page(gmap->table);
2648-
spin_lock(&gmap->guest_table_lock);
2649-
list_del(&old->lru);
2650-
/*
2651-
* Sometimes the topmost page might need to be "removed" multiple
2652-
* times, for example if the VM is rebooted into secure mode several
2653-
* times concurrently, or if s390_replace_asce fails after calling
2654-
* s390_remove_old_asce and is attempted again later. In that case
2655-
* the old asce has been removed from the list, and therefore it
2656-
* will not be freed when the VM terminates, but the ASCE is still
2657-
* in use and still pointed to.
2658-
* A subsequent call to replace_asce will follow the pointer and try
2659-
* to remove the same page from the list again.
2660-
* Therefore it's necessary that the page of the ASCE has valid
2661-
* pointers, so list_del can work (and do nothing) without
2662-
* dereferencing stale or invalid pointers.
2663-
*/
2664-
INIT_LIST_HEAD(&old->lru);
2665-
spin_unlock(&gmap->guest_table_lock);
2666-
}
2667-
EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
2668-
26692624
/**
26702625
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
26712626
* @gmap: the gmap whose ASCE needs to be replaced
@@ -2685,8 +2640,6 @@ int s390_replace_asce(struct gmap *gmap)
26852640
struct page *page;
26862641
void *table;
26872642

2688-
s390_unlist_old_asce(gmap);
2689-
26902643
/* Replacing segment type ASCEs would cause serious issues */
26912644
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
26922645
return -EINVAL;
@@ -2697,15 +2650,6 @@ int s390_replace_asce(struct gmap *gmap)
26972650
table = page_to_virt(page);
26982651
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
26992652

2700-
/*
2701-
* The caller has to deal with the old ASCE, but here we make sure
2702-
* the new one is properly added to the CRST list, so that
2703-
* it will be freed when the VM is torn down.
2704-
*/
2705-
spin_lock(&gmap->guest_table_lock);
2706-
list_add(&page->lru, &gmap->crst_list);
2707-
spin_unlock(&gmap->guest_table_lock);
2708-
27092653
/* Set new table origin while preserving existing ASCE control bits */
27102654
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
27112655
WRITE_ONCE(gmap->asce, asce);

arch/s390/mm/pgalloc.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,6 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
176176
}
177177
table = ptdesc_to_virt(ptdesc);
178178
__arch_set_page_dat(table, 1);
179-
/* pt_list is used by gmap only */
180-
INIT_LIST_HEAD(&ptdesc->pt_list);
181179
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
182180
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
183181
return table;

0 commit comments

Comments
 (0)