Skip to content

Commit 3844818

Browse files
hnazakpm00
authored andcommitted
mm: vmscan: restore high-cpu watermark safety in kswapd
Vlastimil points out that commit a211c65 ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") switched kswapd from zone_watermark_ok_safe() to the standard, percpu-cached version of reading free pages, thus dropping the watermark safety precautions for systems with high CPU counts (e.g. >212 cpus on 64G). Restore them. Since zone_watermark_ok_safe() is no longer the right interface, and this was the last caller of the function anyway, open-code the zone_page_state_snapshot() conditional and delete the function. Link: https://lkml.kernel.org/r/20250416135142.778933-2-hannes@cmpxchg.org Fixes: a211c65 ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Brendan Jackman <jackmanb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 2db93a8 commit 3844818

File tree

3 files changed

+19
-16
lines changed

3 files changed

+19
-16
lines changed

include/linux/mmzone.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,8 +1502,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
15021502
bool zone_watermark_ok(struct zone *z, unsigned int order,
15031503
unsigned long mark, int highest_zoneidx,
15041504
unsigned int alloc_flags);
1505-
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
1506-
unsigned long mark, int highest_zoneidx);
15071505
/*
15081506
* Memory initialization context, use to differentiate memory added by
15091507
* the platform statically or via memory hotplug interface.

mm/page_alloc.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3470,18 +3470,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
34703470
return false;
34713471
}
34723472

3473-
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
3474-
unsigned long mark, int highest_zoneidx)
3475-
{
3476-
long free_pages = zone_page_state(z, NR_FREE_PAGES);
3477-
3478-
if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
3479-
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
3480-
3481-
return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0,
3482-
free_pages);
3483-
}
3484-
34853473
#ifdef CONFIG_NUMA
34863474
int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
34873475

mm/vmscan.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6736,6 +6736,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
67366736
* meet watermarks.
67376737
*/
67386738
for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) {
6739+
enum zone_stat_item item;
67396740
unsigned long free_pages;
67406741

67416742
if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
@@ -6748,9 +6749,25 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
67486749
* blocks to avoid polluting allocator fallbacks.
67496750
*/
67506751
if (defrag_mode)
6751-
free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS);
6752+
item = NR_FREE_PAGES_BLOCKS;
67526753
else
6753-
free_pages = zone_page_state(zone, NR_FREE_PAGES);
6754+
item = NR_FREE_PAGES;
6755+
6756+
/*
6757+
* When there is a high number of CPUs in the system,
6758+
* the cumulative error from the vmstat per-cpu cache
6759+
* can blur the line between the watermarks. In that
6760+
* case, be safe and get an accurate snapshot.
6761+
*
6762+
* TODO: NR_FREE_PAGES_BLOCKS moves in steps of
6763+
* pageblock_nr_pages, while the vmstat pcp threshold
6764+
* is limited to 125. On many configurations that
6765+
* counter won't actually be per-cpu cached. But keep
6766+
* things simple for now; revisit when somebody cares.
6767+
*/
6768+
free_pages = zone_page_state(zone, item);
6769+
if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark)
6770+
free_pages = zone_page_state_snapshot(zone, item);
67546771

67556772
if (__zone_watermark_ok(zone, order, mark, highest_zoneidx,
67566773
0, free_pages))

0 commit comments

Comments
 (0)