@@ -82,9 +82,7 @@ struct gmap *gmap_alloc(unsigned long limit)
82
82
gmap = kzalloc (sizeof (struct gmap ), GFP_KERNEL_ACCOUNT );
83
83
if (!gmap )
84
84
goto out ;
85
- INIT_LIST_HEAD (& gmap -> crst_list );
86
85
INIT_LIST_HEAD (& gmap -> children );
87
- INIT_LIST_HEAD (& gmap -> pt_list );
88
86
INIT_RADIX_TREE (& gmap -> guest_to_host , GFP_KERNEL_ACCOUNT );
89
87
INIT_RADIX_TREE (& gmap -> host_to_guest , GFP_ATOMIC | __GFP_ACCOUNT );
90
88
INIT_RADIX_TREE (& gmap -> host_to_rmap , GFP_ATOMIC | __GFP_ACCOUNT );
@@ -94,7 +92,6 @@ struct gmap *gmap_alloc(unsigned long limit)
94
92
page = gmap_alloc_crst ();
95
93
if (!page )
96
94
goto out_free ;
97
- list_add (& page -> lru , & gmap -> crst_list );
98
95
table = page_to_virt (page );
99
96
crst_table_init (table , etype );
100
97
gmap -> table = table ;
@@ -197,6 +194,27 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
197
194
} while (nr > 0 );
198
195
}
199
196
197
+ static void gmap_free_crst (unsigned long * table , bool free_ptes )
198
+ {
199
+ bool is_segment = (table [0 ] & _SEGMENT_ENTRY_TYPE_MASK ) == 0 ;
200
+ int i ;
201
+
202
+ if (is_segment ) {
203
+ if (!free_ptes )
204
+ goto out ;
205
+ for (i = 0 ; i < _CRST_ENTRIES ; i ++ )
206
+ if (!(table [i ] & _SEGMENT_ENTRY_INVALID ))
207
+ page_table_free_pgste (page_ptdesc (phys_to_page (table [i ])));
208
+ } else {
209
+ for (i = 0 ; i < _CRST_ENTRIES ; i ++ )
210
+ if (!(table [i ] & _REGION_ENTRY_INVALID ))
211
+ gmap_free_crst (__va (table [i ] & PAGE_MASK ), free_ptes );
212
+ }
213
+
214
+ out :
215
+ free_pages ((unsigned long )table , CRST_ALLOC_ORDER );
216
+ }
217
+
200
218
/**
201
219
* gmap_free - free a guest address space
202
220
* @gmap: pointer to the guest address space structure
@@ -205,24 +223,17 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
205
223
*/
206
224
void gmap_free (struct gmap * gmap )
207
225
{
208
- struct page * page , * next ;
209
-
210
226
/* Flush tlb of all gmaps (if not already done for shadows) */
211
227
if (!(gmap_is_shadow (gmap ) && gmap -> removed ))
212
228
gmap_flush_tlb (gmap );
213
229
/* Free all segment & region tables. */
214
- list_for_each_entry_safe ( page , next , & gmap -> crst_list , lru )
215
- __free_pages ( page , CRST_ALLOC_ORDER );
230
+ gmap_free_crst ( gmap -> table , gmap_is_shadow ( gmap ));
231
+
216
232
gmap_radix_tree_free (& gmap -> guest_to_host );
217
233
gmap_radix_tree_free (& gmap -> host_to_guest );
218
234
219
235
/* Free additional data for a shadow gmap */
220
236
if (gmap_is_shadow (gmap )) {
221
- struct ptdesc * ptdesc , * n ;
222
-
223
- /* Free all page tables. */
224
- list_for_each_entry_safe (ptdesc , n , & gmap -> pt_list , pt_list )
225
- page_table_free_pgste (ptdesc );
226
237
gmap_rmap_radix_tree_free (& gmap -> host_to_rmap );
227
238
/* Release reference to the parent */
228
239
gmap_put (gmap -> parent );
@@ -311,7 +322,6 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
311
322
crst_table_init (new , init );
312
323
spin_lock (& gmap -> guest_table_lock );
313
324
if (* table & _REGION_ENTRY_INVALID ) {
314
- list_add (& page -> lru , & gmap -> crst_list );
315
325
* table = __pa (new ) | _REGION_ENTRY_LENGTH |
316
326
(* table & _REGION_ENTRY_TYPE_MASK );
317
327
page = NULL ;
@@ -1243,7 +1253,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
1243
1253
__gmap_unshadow_pgt (sg , raddr , __va (pgt ));
1244
1254
/* Free page table */
1245
1255
ptdesc = page_ptdesc (phys_to_page (pgt ));
1246
- list_del (& ptdesc -> pt_list );
1247
1256
page_table_free_pgste (ptdesc );
1248
1257
}
1249
1258
@@ -1271,7 +1280,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
1271
1280
__gmap_unshadow_pgt (sg , raddr , __va (pgt ));
1272
1281
/* Free page table */
1273
1282
ptdesc = page_ptdesc (phys_to_page (pgt ));
1274
- list_del (& ptdesc -> pt_list );
1275
1283
page_table_free_pgste (ptdesc );
1276
1284
}
1277
1285
}
@@ -1301,7 +1309,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
1301
1309
__gmap_unshadow_sgt (sg , raddr , __va (sgt ));
1302
1310
/* Free segment table */
1303
1311
page = phys_to_page (sgt );
1304
- list_del (& page -> lru );
1305
1312
__free_pages (page , CRST_ALLOC_ORDER );
1306
1313
}
1307
1314
@@ -1329,7 +1336,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
1329
1336
__gmap_unshadow_sgt (sg , raddr , __va (sgt ));
1330
1337
/* Free segment table */
1331
1338
page = phys_to_page (sgt );
1332
- list_del (& page -> lru );
1333
1339
__free_pages (page , CRST_ALLOC_ORDER );
1334
1340
}
1335
1341
}
@@ -1359,7 +1365,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
1359
1365
__gmap_unshadow_r3t (sg , raddr , __va (r3t ));
1360
1366
/* Free region 3 table */
1361
1367
page = phys_to_page (r3t );
1362
- list_del (& page -> lru );
1363
1368
__free_pages (page , CRST_ALLOC_ORDER );
1364
1369
}
1365
1370
@@ -1387,7 +1392,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
1387
1392
__gmap_unshadow_r3t (sg , raddr , __va (r3t ));
1388
1393
/* Free region 3 table */
1389
1394
page = phys_to_page (r3t );
1390
- list_del (& page -> lru );
1391
1395
__free_pages (page , CRST_ALLOC_ORDER );
1392
1396
}
1393
1397
}
@@ -1417,7 +1421,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
1417
1421
__gmap_unshadow_r2t (sg , raddr , __va (r2t ));
1418
1422
/* Free region 2 table */
1419
1423
page = phys_to_page (r2t );
1420
- list_del (& page -> lru );
1421
1424
__free_pages (page , CRST_ALLOC_ORDER );
1422
1425
}
1423
1426
@@ -1449,7 +1452,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
1449
1452
r1t [i ] = _REGION1_ENTRY_EMPTY ;
1450
1453
/* Free region 2 table */
1451
1454
page = phys_to_page (r2t );
1452
- list_del (& page -> lru );
1453
1455
__free_pages (page , CRST_ALLOC_ORDER );
1454
1456
}
1455
1457
}
@@ -1544,7 +1546,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
1544
1546
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID ;
1545
1547
if (sg -> edat_level >= 1 )
1546
1548
* table |= (r2t & _REGION_ENTRY_PROTECT );
1547
- list_add (& page -> lru , & sg -> crst_list );
1548
1549
if (fake ) {
1549
1550
/* nothing to protect for fake tables */
1550
1551
* table &= ~_REGION_ENTRY_INVALID ;
@@ -1628,7 +1629,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
1628
1629
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID ;
1629
1630
if (sg -> edat_level >= 1 )
1630
1631
* table |= (r3t & _REGION_ENTRY_PROTECT );
1631
- list_add (& page -> lru , & sg -> crst_list );
1632
1632
if (fake ) {
1633
1633
/* nothing to protect for fake tables */
1634
1634
* table &= ~_REGION_ENTRY_INVALID ;
@@ -1712,7 +1712,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
1712
1712
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID ;
1713
1713
if (sg -> edat_level >= 1 )
1714
1714
* table |= sgt & _REGION_ENTRY_PROTECT ;
1715
- list_add (& page -> lru , & sg -> crst_list );
1716
1715
if (fake ) {
1717
1716
/* nothing to protect for fake tables */
1718
1717
* table &= ~_REGION_ENTRY_INVALID ;
@@ -1833,7 +1832,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
1833
1832
/* mark as invalid as long as the parent table is not protected */
1834
1833
* table = (unsigned long ) s_pgt | _SEGMENT_ENTRY |
1835
1834
(pgt & _SEGMENT_ENTRY_PROTECT ) | _SEGMENT_ENTRY_INVALID ;
1836
- list_add (& ptdesc -> pt_list , & sg -> pt_list );
1837
1835
if (fake ) {
1838
1836
/* nothing to protect for fake tables */
1839
1837
* table &= ~_SEGMENT_ENTRY_INVALID ;
@@ -2623,49 +2621,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
2623
2621
}
2624
2622
EXPORT_SYMBOL_GPL (__s390_uv_destroy_range );
2625
2623
2626
- /**
2627
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
2628
- * list of page tables of the gmap.
2629
- * @gmap: the gmap whose table is to be removed
2630
- *
2631
- * On s390x, KVM keeps a list of all pages containing the page tables of the
2632
- * gmap (the CRST list). This list is used at tear down time to free all
2633
- * pages that are now not needed anymore.
2634
- *
2635
- * This function removes the topmost page of the tree (the one pointed to by
2636
- * the ASCE) from the CRST list.
2637
- *
2638
- * This means that it will not be freed when the VM is torn down, and needs
2639
- * to be handled separately by the caller, unless a leak is actually
2640
- * intended. Notice that this function will only remove the page from the
2641
- * list, the page will still be used as a top level page table (and ASCE).
2642
- */
2643
- void s390_unlist_old_asce (struct gmap * gmap )
2644
- {
2645
- struct page * old ;
2646
-
2647
- old = virt_to_page (gmap -> table );
2648
- spin_lock (& gmap -> guest_table_lock );
2649
- list_del (& old -> lru );
2650
- /*
2651
- * Sometimes the topmost page might need to be "removed" multiple
2652
- * times, for example if the VM is rebooted into secure mode several
2653
- * times concurrently, or if s390_replace_asce fails after calling
2654
- * s390_remove_old_asce and is attempted again later. In that case
2655
- * the old asce has been removed from the list, and therefore it
2656
- * will not be freed when the VM terminates, but the ASCE is still
2657
- * in use and still pointed to.
2658
- * A subsequent call to replace_asce will follow the pointer and try
2659
- * to remove the same page from the list again.
2660
- * Therefore it's necessary that the page of the ASCE has valid
2661
- * pointers, so list_del can work (and do nothing) without
2662
- * dereferencing stale or invalid pointers.
2663
- */
2664
- INIT_LIST_HEAD (& old -> lru );
2665
- spin_unlock (& gmap -> guest_table_lock );
2666
- }
2667
- EXPORT_SYMBOL_GPL (s390_unlist_old_asce );
2668
-
2669
2624
/**
2670
2625
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
2671
2626
* @gmap: the gmap whose ASCE needs to be replaced
@@ -2685,8 +2640,6 @@ int s390_replace_asce(struct gmap *gmap)
2685
2640
struct page * page ;
2686
2641
void * table ;
2687
2642
2688
- s390_unlist_old_asce (gmap );
2689
-
2690
2643
/* Replacing segment type ASCEs would cause serious issues */
2691
2644
if ((gmap -> asce & _ASCE_TYPE_MASK ) == _ASCE_TYPE_SEGMENT )
2692
2645
return - EINVAL ;
@@ -2697,15 +2650,6 @@ int s390_replace_asce(struct gmap *gmap)
2697
2650
table = page_to_virt (page );
2698
2651
memcpy (table , gmap -> table , 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT ));
2699
2652
2700
- /*
2701
- * The caller has to deal with the old ASCE, but here we make sure
2702
- * the new one is properly added to the CRST list, so that
2703
- * it will be freed when the VM is torn down.
2704
- */
2705
- spin_lock (& gmap -> guest_table_lock );
2706
- list_add (& page -> lru , & gmap -> crst_list );
2707
- spin_unlock (& gmap -> guest_table_lock );
2708
-
2709
2653
/* Set new table origin while preserving existing ASCE control bits */
2710
2654
asce = (gmap -> asce & ~_ASCE_ORIGIN ) | __pa (table );
2711
2655
WRITE_ONCE (gmap -> asce , asce );
0 commit comments