Skip to content

Commit 7d0ecc0

Browse files
Davidlohr Buesodavejiang
authored andcommitted
cxl/pmem: Export dirty shutdown count via sysfs
Similar to how the acpi_nfit driver exports Optane dirty shutdown count, introduce: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown Under the conditions that 1) dirty shutdown can be set, 2) Device GPF DVSEC exists, and 3) the count itself can be retrieved. Suggested-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://patch.msgid.link/20250220220235.276831-4-dave@stgolabs.net Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 86349aa commit 7d0ecc0

File tree

6 files changed

+117
-9
lines changed

6 files changed

+117
-9
lines changed

Documentation/ABI/testing/sysfs-bus-cxl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,3 +586,15 @@ Description:
586586
See Documentation/ABI/stable/sysfs-devices-node. access0 provides
587587
the number to the closest initiator and access1 provides the
588588
number to the closest CPU.
589+
590+
591+
What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
592+
Date: Feb, 2025
593+
KernelVersion: v6.15
594+
Contact: linux-cxl@vger.kernel.org
595+
Description:
596+
(RO) The device dirty shutdown count value, which is the number
597+
of times the device could have incurred in potential data loss.
598+
The count is persistent across power loss and wraps back to 0
599+
upon overflow. If this file is not present, the device does not
600+
have the necessary support for dirty tracking.

Documentation/driver-api/cxl/maturity-map.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ Mailbox commands
130130
* [0] Switch CCI
131131
* [3] Timestamp
132132
* [1] PMEM labels
133-
* [1] PMEM GPF / Dirty Shutdown
133+
* [3] PMEM GPF / Dirty Shutdown
134134
* [0] Scan Media
135135

136136
PMU

drivers/cxl/core/mbox.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,6 +1308,27 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
13081308
}
13091309
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL");
13101310

1311+
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
1312+
{
1313+
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
1314+
struct cxl_mbox_get_health_info_out hi;
1315+
struct cxl_mbox_cmd mbox_cmd;
1316+
int rc;
1317+
1318+
mbox_cmd = (struct cxl_mbox_cmd) {
1319+
.opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
1320+
.size_out = sizeof(hi),
1321+
.payload_out = &hi,
1322+
};
1323+
1324+
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
1325+
if (!rc)
1326+
*count = le32_to_cpu(hi.dirty_shutdown_cnt);
1327+
1328+
return rc;
1329+
}
1330+
EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");
1331+
13111332
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
13121333
{
13131334
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;

drivers/cxl/cxl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ struct cxl_nvdimm {
563563
struct device dev;
564564
struct cxl_memdev *cxlmd;
565565
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
566+
u64 dirty_shutdowns;
566567
};
567568

568569
struct cxl_pmem_region_mapping {

drivers/cxl/cxlmem.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,18 @@ struct cxl_mbox_set_partition_info {
693693

694694
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
695695

696+
/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */
697+
struct cxl_mbox_get_health_info_out {
698+
u8 health_status;
699+
u8 media_status;
700+
u8 additional_status;
701+
u8 life_used;
702+
__le16 device_temperature;
703+
__le32 dirty_shutdown_cnt;
704+
__le32 corrected_volatile_error_cnt;
705+
__le32 corrected_persistent_error_cnt;
706+
} __packed;
707+
696708
/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */
697709
struct cxl_mbox_set_shutdown_state_in {
698710
u8 state;
@@ -834,6 +846,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
834846
enum cxl_event_log_type type,
835847
enum cxl_event_type event_type,
836848
const uuid_t *uuid, union cxl_event *evt);
849+
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count);
837850
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds);
838851
int cxl_set_timestamp(struct cxl_memdev_state *mds);
839852
int cxl_poison_state_init(struct cxl_memdev_state *mds);

drivers/cxl/pmem.c

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,83 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *
4242
}
4343
static DEVICE_ATTR_RO(id);
4444

45+
static ssize_t dirty_shutdown_show(struct device *dev,
46+
struct device_attribute *attr, char *buf)
47+
{
48+
struct nvdimm *nvdimm = to_nvdimm(dev);
49+
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
50+
51+
return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns);
52+
}
53+
static DEVICE_ATTR_RO(dirty_shutdown);
54+
4555
static struct attribute *cxl_dimm_attributes[] = {
4656
&dev_attr_id.attr,
4757
&dev_attr_provider.attr,
58+
&dev_attr_dirty_shutdown.attr,
4859
NULL
4960
};
5061

62+
#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX
63+
static umode_t cxl_dimm_visible(struct kobject *kobj,
64+
struct attribute *a, int n)
65+
{
66+
if (a == &dev_attr_dirty_shutdown.attr) {
67+
struct device *dev = kobj_to_dev(kobj);
68+
struct nvdimm *nvdimm = to_nvdimm(dev);
69+
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
70+
71+
if (cxl_nvd->dirty_shutdowns ==
72+
CXL_INVALID_DIRTY_SHUTDOWN_COUNT)
73+
return 0;
74+
}
75+
76+
return a->mode;
77+
}
78+
5179
static const struct attribute_group cxl_dimm_attribute_group = {
5280
.name = "cxl",
5381
.attrs = cxl_dimm_attributes,
82+
.is_visible = cxl_dimm_visible
5483
};
5584

5685
static const struct attribute_group *cxl_dimm_attribute_groups[] = {
5786
&cxl_dimm_attribute_group,
5887
NULL
5988
};
6089

90+
static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd)
91+
{
92+
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
93+
struct cxl_dev_state *cxlds = cxlmd->cxlds;
94+
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
95+
struct device *dev = &cxl_nvd->dev;
96+
u32 count;
97+
98+
/*
99+
* Dirty tracking is enabled and exposed to the user, only when:
100+
* - dirty shutdown on the device can be set, and,
101+
* - the device has a Device GPF DVSEC (albeit unused), and,
102+
* - the Get Health Info cmd can retrieve the device's dirty count.
103+
*/
104+
cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT;
105+
106+
if (cxl_arm_dirty_shutdown(mds)) {
107+
dev_warn(dev, "GPF: could not set dirty shutdown state\n");
108+
return;
109+
}
110+
111+
if (!cxl_gpf_get_dvsec(cxlds->dev, false))
112+
return;
113+
114+
if (cxl_get_dirty_count(mds, &count)) {
115+
dev_warn(dev, "GPF: could not retrieve dirty count\n");
116+
return;
117+
}
118+
119+
cxl_nvd->dirty_shutdowns = count;
120+
}
121+
61122
static int cxl_nvdimm_probe(struct device *dev)
62123
{
63124
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
@@ -78,20 +139,20 @@ static int cxl_nvdimm_probe(struct device *dev)
78139
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
79140
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
80141
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
81-
nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
82-
cxl_dimm_attribute_groups, flags,
83-
cmd_mask, 0, NULL, cxl_nvd->dev_id,
84-
cxl_security_ops, NULL);
85-
if (!nvdimm)
86-
return -ENOMEM;
87142

88143
/*
89144
* Set dirty shutdown now, with the expectation that the device
90145
* clear it upon a successful GPF flow. The exception to this
91146
* is upon Viral detection, per CXL 3.2 section 12.4.2.
92147
*/
93-
if (cxl_arm_dirty_shutdown(mds))
94-
dev_warn(dev, "GPF: could not dirty shutdown state\n");
148+
cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd);
149+
150+
nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
151+
cxl_dimm_attribute_groups, flags,
152+
cmd_mask, 0, NULL, cxl_nvd->dev_id,
153+
cxl_security_ops, NULL);
154+
if (!nvdimm)
155+
return -ENOMEM;
95156

96157
dev_set_drvdata(dev, nvdimm);
97158
return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);

0 commit comments

Comments
 (0)