Skip to content

Commit 516e5bd

Browse files
committed
cxl: Add mce notifier to emit aliased address for extended linear cache
Below is a setup with extended linear cache configuration with an example layout of memory region shown below presented as a single memory region consists of 256G memory where there's 128G of DRAM and 128G of CXL memory. The kernel sees a region of total 256G of system memory. 128G DRAM 128G CXL memory |-----------------------------------|-------------------------------------| Data resides in either DRAM or far memory (FM) with no replication. Hot data is swapped into DRAM by the hardware behind the scenes. When error is detected in one location, it is possible that error also resides in the aliased location. Therefore when a memory location that is flagged by MCE is part of the special region, the aliased memory location needs to be offlined as well. Add an mce notify callback to identify if the MCE address location is part of an extended linear cache region and handle accordingly. Added symbol export to set_mce_nospec() in x86 code in order to call set_mce_nospec() from the CXL MCE notify callback. Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@dwillia2-xfh.jf.intel.com.notmuch/ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Li Ming <ming.li@zohomail.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20250226162224.3633792-5-dave.jiang@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 8c520c5 commit 516e5bd

File tree

10 files changed

+134
-0
lines changed

10 files changed

+134
-0
lines changed

arch/x86/mm/pat/set_memory.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,6 +2081,7 @@ int set_mce_nospec(unsigned long pfn)
20812081
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
20822082
return rc;
20832083
}
2084+
EXPORT_SYMBOL_GPL(set_mce_nospec);
20842085

20852086
/* Restore full speculative operation to the pfn. */
20862087
int clear_mce_nospec(unsigned long pfn)

drivers/cxl/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,8 @@ config CXL_REGION_INVALIDATION_TEST
146146
If unsure, or if this kernel is meant for production environments,
147147
say N.
148148

149+
config CXL_MCE
150+
def_bool y
151+
depends on X86_MCE && MEMORY_FAILURE
152+
149153
endif

drivers/cxl/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ cxl_core-y += cdat.o
1717
cxl_core-y += acpi.o
1818
cxl_core-$(CONFIG_TRACING) += trace.o
1919
cxl_core-$(CONFIG_CXL_REGION) += region.o
20+
cxl_core-$(CONFIG_CXL_MCE) += mce.o

drivers/cxl/core/mbox.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "core.h"
1313
#include "trace.h"
14+
#include "mce.h"
1415

1516
static bool cxl_raw_allow_all;
1617

@@ -1444,6 +1445,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
14441445
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
14451446
{
14461447
struct cxl_memdev_state *mds;
1448+
int rc;
14471449

14481450
mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
14491451
if (!mds) {
@@ -1459,6 +1461,10 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
14591461
mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
14601462
mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
14611463

1464+
rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
1465+
if (rc)
1466+
return ERR_PTR(rc);
1467+
14621468
return mds;
14631469
}
14641470
EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL");

drivers/cxl/core/mce.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
3+
#include <linux/mm.h>
4+
#include <linux/notifier.h>
5+
#include <linux/set_memory.h>
6+
#include <asm/mce.h>
7+
#include <cxlmem.h>
8+
#include "mce.h"
9+
10+
static int cxl_handle_mce(struct notifier_block *nb, unsigned long val,
11+
void *data)
12+
{
13+
struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state,
14+
mce_notifier);
15+
struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
16+
struct cxl_port *endpoint = cxlmd->endpoint;
17+
struct mce *mce = data;
18+
u64 spa, spa_alias;
19+
unsigned long pfn;
20+
21+
if (!mce || !mce_usable_address(mce))
22+
return NOTIFY_DONE;
23+
24+
if (!endpoint)
25+
return NOTIFY_DONE;
26+
27+
spa = mce->addr & MCI_ADDR_PHYSADDR;
28+
29+
pfn = spa >> PAGE_SHIFT;
30+
if (!pfn_valid(pfn))
31+
return NOTIFY_DONE;
32+
33+
spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa);
34+
if (spa_alias == ~0ULL)
35+
return NOTIFY_DONE;
36+
37+
pfn = spa_alias >> PAGE_SHIFT;
38+
39+
/*
40+
* Take down the aliased memory page. The original memory page flagged
41+
* by the MCE will be taken cared of by the standard MCE handler.
42+
*/
43+
dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n",
44+
spa_alias);
45+
if (!memory_failure(pfn, 0))
46+
set_mce_nospec(pfn);
47+
48+
return NOTIFY_OK;
49+
}
50+
51+
static void cxl_unregister_mce_notifier(void *mce_notifier)
52+
{
53+
mce_unregister_decode_chain(mce_notifier);
54+
}
55+
56+
int devm_cxl_register_mce_notifier(struct device *dev,
57+
struct notifier_block *mce_notifier)
58+
{
59+
mce_notifier->notifier_call = cxl_handle_mce;
60+
mce_notifier->priority = MCE_PRIO_UC;
61+
mce_register_decode_chain(mce_notifier);
62+
63+
return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier,
64+
mce_notifier);
65+
}

drivers/cxl/core/mce.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
3+
#ifndef _CXL_CORE_MCE_H_
4+
#define _CXL_CORE_MCE_H_
5+
6+
#include <linux/notifier.h>
7+
8+
#ifdef CONFIG_CXL_MCE
9+
int devm_cxl_register_mce_notifier(struct device *dev,
10+
struct notifier_block *mce_notifer);
11+
#else
12+
static inline int
13+
devm_cxl_register_mce_notifier(struct device *dev,
14+
struct notifier_block *mce_notifier)
15+
{
16+
return -EOPNOTSUPP;
17+
}
18+
#endif
19+
20+
#endif

drivers/cxl/core/region.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3447,6 +3447,34 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
34473447
}
34483448
EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
34493449

3450+
u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
3451+
{
3452+
struct cxl_region_ref *iter;
3453+
unsigned long index;
3454+
3455+
if (!endpoint)
3456+
return ~0ULL;
3457+
3458+
guard(rwsem_write)(&cxl_region_rwsem);
3459+
3460+
xa_for_each(&endpoint->regions, index, iter) {
3461+
struct cxl_region_params *p = &iter->region->params;
3462+
3463+
if (p->res->start <= spa && spa <= p->res->end) {
3464+
if (!p->cache_size)
3465+
return ~0ULL;
3466+
3467+
if (spa > p->res->start + p->cache_size)
3468+
return spa - p->cache_size;
3469+
3470+
return spa + p->cache_size;
3471+
}
3472+
}
3473+
3474+
return ~0ULL;
3475+
}
3476+
EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
3477+
34503478
static int is_system_ram(struct resource *res, void *arg)
34513479
{
34523480
struct cxl_region *cxlr = arg;

drivers/cxl/cxl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
877877
int cxl_add_to_region(struct cxl_port *root,
878878
struct cxl_endpoint_decoder *cxled);
879879
struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
880+
u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
880881
#else
881882
static inline bool is_cxl_pmem_region(struct device *dev)
882883
{
@@ -895,6 +896,11 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
895896
{
896897
return NULL;
897898
}
899+
static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
900+
u64 spa)
901+
{
902+
return 0;
903+
}
898904
#endif
899905

900906
void cxl_endpoint_parse_cdat(struct cxl_port *port);

drivers/cxl/cxlmem.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
477477
* @poison: poison driver state info
478478
* @security: security driver state info
479479
* @fw: firmware upload / activation state
480+
* @mce_notifier: MCE notifier
480481
*
481482
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
482483
* details on capacity parameters.
@@ -503,6 +504,7 @@ struct cxl_memdev_state {
503504
struct cxl_poison_state poison;
504505
struct cxl_security_state security;
505506
struct cxl_fw_state fw;
507+
struct notifier_block mce_notifier;
506508
};
507509

508510
static inline struct cxl_memdev_state *

tools/testing/cxl/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ cxl_core-y += $(CXL_CORE_SRC)/cdat.o
6464
cxl_core-y += $(CXL_CORE_SRC)/acpi.o
6565
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
6666
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
67+
cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
6768
cxl_core-y += config_check.o
6869
cxl_core-y += cxl_core_test.o
6970
cxl_core-y += cxl_core_exports.o

0 commit comments

Comments
 (0)