Skip to content

Commit 88c91dc

Browse files
Hugh Dickinsakpm00
authored andcommitted
mempolicy: migration attempt to match interleave nodes
Improve alloc_migration_target_by_mpol()'s treatment of MPOL_INTERLEAVE. Make an effort in do_mbind(), to identify the correct interleave index for the first page to be migrated, so that it and all subsequent pages from the same vma will be targeted to precisely their intended nodes. Pages from following vmas will still be interleaved from the requested nodemask, but perhaps starting from a different base. Whether this is worth doing at all, or worth improving further, is arguable: queue_folio_required() is right not to care about the precise placement on interleaved nodes; but this little effort seems appropriate. [hughd@google.com: do vma_iter search under mmap_write_unlock()] Link: https://lkml.kernel.org/r/3311d544-fb05-a7f1-1b74-16aa0f6cd4fe@google.com Link: https://lkml.kernel.org/r/77954a5-9c9b-1c11-7d5c-3262c01b895f@google.com Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Hildenbrand <david@redhat.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tejun heo <tj@kernel.org> Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com> Cc: Yang Shi <shy828301@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 72e315f commit 88c91dc

File tree

1 file changed

+50
-5
lines changed

1 file changed

+50
-5
lines changed

mm/mempolicy.c

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,11 @@ static bool strictly_unmovable(unsigned long flags)
428428
MPOL_MF_STRICT;
429429
}
430430

431+
struct migration_mpol { /* for alloc_migration_target_by_mpol() */
432+
struct mempolicy *pol;
433+
pgoff_t ilx;
434+
};
435+
431436
struct queue_pages {
432437
struct list_head *pagelist;
433438
unsigned long flags;
@@ -1156,8 +1161,9 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
11561161
static struct folio *alloc_migration_target_by_mpol(struct folio *src,
11571162
unsigned long private)
11581163
{
1159-
struct mempolicy *pol = (struct mempolicy *)private;
1160-
pgoff_t ilx = 0; /* improve on this later */
1164+
struct migration_mpol *mmpol = (struct migration_mpol *)private;
1165+
struct mempolicy *pol = mmpol->pol;
1166+
pgoff_t ilx = mmpol->ilx;
11611167
struct page *page;
11621168
unsigned int order;
11631169
int nid = numa_node_id();
@@ -1212,6 +1218,7 @@ static long do_mbind(unsigned long start, unsigned long len,
12121218
struct mm_struct *mm = current->mm;
12131219
struct vm_area_struct *vma, *prev;
12141220
struct vma_iterator vmi;
1221+
struct migration_mpol mmpol;
12151222
struct mempolicy *new;
12161223
unsigned long end;
12171224
long err;
@@ -1284,17 +1291,55 @@ static long do_mbind(unsigned long start, unsigned long len,
12841291
}
12851292
}
12861293

1287-
mmap_write_unlock(mm);
1288-
12891294
if (!err && !list_empty(&pagelist)) {
12901295
/* Convert MPOL_DEFAULT's NULL to task or default policy */
12911296
if (!new) {
12921297
new = get_task_policy(current);
12931298
mpol_get(new);
12941299
}
1300+
mmpol.pol = new;
1301+
mmpol.ilx = 0;
1302+
1303+
/*
1304+
* In the interleaved case, attempt to allocate on exactly the
1305+
* targeted nodes, for the first VMA to be migrated; for later
1306+
* VMAs, the nodes will still be interleaved from the targeted
1307+
* nodemask, but one by one may be selected differently.
1308+
*/
1309+
if (new->mode == MPOL_INTERLEAVE) {
1310+
struct page *page;
1311+
unsigned int order;
1312+
unsigned long addr = -EFAULT;
1313+
1314+
list_for_each_entry(page, &pagelist, lru) {
1315+
if (!PageKsm(page))
1316+
break;
1317+
}
1318+
if (!list_entry_is_head(page, &pagelist, lru)) {
1319+
vma_iter_init(&vmi, mm, start);
1320+
for_each_vma_range(vmi, vma, end) {
1321+
addr = page_address_in_vma(page, vma);
1322+
if (addr != -EFAULT)
1323+
break;
1324+
}
1325+
}
1326+
if (addr != -EFAULT) {
1327+
order = compound_order(page);
1328+
/* We already know the pol, but not the ilx */
1329+
mpol_cond_put(get_vma_policy(vma, addr, order,
1330+
&mmpol.ilx));
1331+
/* Set base from which to increment by index */
1332+
mmpol.ilx -= page->index >> order;
1333+
}
1334+
}
1335+
}
1336+
1337+
mmap_write_unlock(mm);
1338+
1339+
if (!err && !list_empty(&pagelist)) {
12951340
nr_failed |= migrate_pages(&pagelist,
12961341
alloc_migration_target_by_mpol, NULL,
1297-
(unsigned long)new, MIGRATE_SYNC,
1342+
(unsigned long)&mmpol, MIGRATE_SYNC,
12981343
MR_MEMPOLICY_MBIND, NULL);
12991344
}
13001345

0 commit comments

Comments
 (0)