Skip to content

Commit d781a45

Browse files
committed
Merge branch 'for-6.15/dirty-shutdown' into cxl-for-next2
Add support for Global Persistent Flush (GPF) and dirty shutdown accounting.
2 parents b6faa9c + 6eb52f6 commit d781a45

File tree

11 files changed

+274
-1
lines changed

11 files changed

+274
-1
lines changed

Documentation/ABI/testing/sysfs-bus-cxl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,3 +604,15 @@ Description:
604604
See Documentation/ABI/stable/sysfs-devices-node. access0 provides
605605
the number to the closest initiator and access1 provides the
606606
number to the closest CPU.
607+
608+
609+
What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
610+
Date: Feb, 2025
611+
KernelVersion: v6.15
612+
Contact: linux-cxl@vger.kernel.org
613+
Description:
614+
(RO) The device dirty shutdown count value, which is the number
615+
of times the device could have incurred in potential data loss.
616+
The count is persistent across power loss and wraps back to 0
617+
upon overflow. If this file is not present, the device does not
618+
have the necessary support for dirty tracking.

Documentation/driver-api/cxl/maturity-map.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ Mailbox commands
130130
* [0] Switch CCI
131131
* [3] Timestamp
132132
* [1] PMEM labels
133-
* [0] PMEM GPF / Dirty Shutdown
133+
* [3] PMEM GPF / Dirty Shutdown
134134
* [0] Scan Media
135135

136136
PMU

drivers/cxl/core/core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,5 +117,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
117117

118118
int cxl_ras_init(void);
119119
void cxl_ras_exit(void);
120+
int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port);
120121

121122
#endif /* __CXL_CORE_H__ */

drivers/cxl/core/mbox.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,45 @@ int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
12821282
}
12831283
EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
12841284

1285+
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
1286+
{
1287+
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
1288+
struct cxl_mbox_get_health_info_out hi;
1289+
struct cxl_mbox_cmd mbox_cmd;
1290+
int rc;
1291+
1292+
mbox_cmd = (struct cxl_mbox_cmd) {
1293+
.opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
1294+
.size_out = sizeof(hi),
1295+
.payload_out = &hi,
1296+
};
1297+
1298+
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
1299+
if (!rc)
1300+
*count = le32_to_cpu(hi.dirty_shutdown_cnt);
1301+
1302+
return rc;
1303+
}
1304+
EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");
1305+
1306+
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
1307+
{
1308+
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
1309+
struct cxl_mbox_cmd mbox_cmd;
1310+
struct cxl_mbox_set_shutdown_state_in in = {
1311+
.state = 1
1312+
};
1313+
1314+
mbox_cmd = (struct cxl_mbox_cmd) {
1315+
.opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE,
1316+
.size_in = sizeof(in),
1317+
.payload_in = &in,
1318+
};
1319+
1320+
return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
1321+
}
1322+
EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL");
1323+
12851324
int cxl_set_timestamp(struct cxl_memdev_state *mds)
12861325
{
12871326
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;

drivers/cxl/core/pci.c

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,3 +1054,100 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
10541054

10551055
return 0;
10561056
}
1057+
1058+
/*
1059+
* Set max timeout such that platforms will optimize GPF flow to avoid
1060+
* the implied worst-case scenario delays. On a sane platform, all
1061+
* devices should always complete GPF within the energy budget of
1062+
* the GPF flow. The kernel does not have enough information to pick
1063+
* anything better than "maximize timeouts and hope it works".
1064+
*
1065+
* A misbehaving device could block forward progress of GPF for all
1066+
* the other devices, exhausting the energy budget of the platform.
1067+
* However, the spec seems to assume that moving on from slow to respond
1068+
* devices is a virtue. It is not possible to know that, in actuality,
1069+
* the slow to respond device is *the* most critical device in the
1070+
* system to wait.
1071+
*/
1072+
#define GPF_TIMEOUT_BASE_MAX 2
1073+
#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
1074+
1075+
u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port)
1076+
{
1077+
u16 dvsec;
1078+
1079+
if (!dev_is_pci(dev))
1080+
return 0;
1081+
1082+
dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL,
1083+
is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
1084+
if (!dvsec)
1085+
dev_warn(dev, "%s GPF DVSEC not present\n",
1086+
is_port ? "Port" : "Device");
1087+
return dvsec;
1088+
}
1089+
EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");
1090+
1091+
static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
1092+
{
1093+
u64 base, scale;
1094+
int rc, offset;
1095+
u16 ctrl;
1096+
1097+
switch (phase) {
1098+
case 1:
1099+
offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
1100+
base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
1101+
scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
1102+
break;
1103+
case 2:
1104+
offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
1105+
base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
1106+
scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
1107+
break;
1108+
default:
1109+
return -EINVAL;
1110+
}
1111+
1112+
rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
1113+
if (rc)
1114+
return rc;
1115+
1116+
if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
1117+
FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
1118+
return 0;
1119+
1120+
ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
1121+
ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);
1122+
1123+
rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
1124+
if (!rc)
1125+
pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
1126+
phase, GPF_TIMEOUT_BASE_MAX);
1127+
1128+
return rc;
1129+
}
1130+
1131+
int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port)
1132+
{
1133+
struct pci_dev *pdev;
1134+
1135+
if (!port)
1136+
return -EINVAL;
1137+
1138+
if (!port->gpf_dvsec) {
1139+
int dvsec;
1140+
1141+
dvsec = cxl_gpf_get_dvsec(dport_dev, true);
1142+
if (!dvsec)
1143+
return -EINVAL;
1144+
1145+
port->gpf_dvsec = dvsec;
1146+
}
1147+
1148+
pdev = to_pci_dev(dport_dev);
1149+
update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1);
1150+
update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2);
1151+
1152+
return 0;
1153+
}

drivers/cxl/core/port.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1678,6 +1678,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
16781678
if (rc && rc != -EBUSY)
16791679
return rc;
16801680

1681+
cxl_gpf_port_setup(dport_dev, port);
1682+
16811683
/* Any more ports to add between this one and the root? */
16821684
if (!dev_is_cxl_root_child(&port->dev))
16831685
continue;

drivers/cxl/cxl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ struct cxl_nvdimm {
542542
struct device dev;
543543
struct cxl_memdev *cxlmd;
544544
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
545+
u64 dirty_shutdowns;
545546
};
546547

547548
struct cxl_pmem_region_mapping {
@@ -589,6 +590,7 @@ struct cxl_dax_region {
589590
* @cdat: Cached CDAT data
590591
* @cdat_available: Should a CDAT attribute be available in sysfs
591592
* @pci_latency: Upstream latency in picoseconds
593+
* @gpf_dvsec: Cached GPF port DVSEC
592594
*/
593595
struct cxl_port {
594596
struct device dev;
@@ -612,6 +614,7 @@ struct cxl_port {
612614
} cdat;
613615
bool cdat_available;
614616
long pci_latency;
617+
int gpf_dvsec;
615618
};
616619

617620
/**
@@ -899,4 +902,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
899902
#define __mock static
900903
#endif
901904

905+
u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port);
906+
902907
#endif /* __CXL_H__ */

drivers/cxl/cxlmem.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,23 @@ struct cxl_mbox_set_partition_info {
721721

722722
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
723723

724+
/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */
725+
struct cxl_mbox_get_health_info_out {
726+
u8 health_status;
727+
u8 media_status;
728+
u8 additional_status;
729+
u8 life_used;
730+
__le16 device_temperature;
731+
__le32 dirty_shutdown_cnt;
732+
__le32 corrected_volatile_error_cnt;
733+
__le32 corrected_persistent_error_cnt;
734+
} __packed;
735+
736+
/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */
737+
struct cxl_mbox_set_shutdown_state_in {
738+
u8 state;
739+
} __packed;
740+
724741
/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
725742
struct cxl_mbox_set_timestamp_in {
726743
__le64 timestamp;
@@ -857,6 +874,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
857874
enum cxl_event_log_type type,
858875
enum cxl_event_type event_type,
859876
const uuid_t *uuid, union cxl_event *evt);
877+
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count);
878+
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds);
860879
int cxl_set_timestamp(struct cxl_memdev_state *mds);
861880
int cxl_poison_state_init(struct cxl_memdev_state *mds);
862881
int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,

drivers/cxl/cxlpci.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@
4040

4141
/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
4242
#define CXL_DVSEC_PORT_GPF 4
43+
#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C
44+
#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0)
45+
#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8)
46+
#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE
47+
#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0)
48+
#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8)
4349

4450
/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
4551
#define CXL_DVSEC_DEVICE_GPF 5

drivers/cxl/pmem.c

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,83 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *
4242
}
4343
static DEVICE_ATTR_RO(id);
4444

45+
static ssize_t dirty_shutdown_show(struct device *dev,
46+
struct device_attribute *attr, char *buf)
47+
{
48+
struct nvdimm *nvdimm = to_nvdimm(dev);
49+
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
50+
51+
return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns);
52+
}
53+
static DEVICE_ATTR_RO(dirty_shutdown);
54+
4555
static struct attribute *cxl_dimm_attributes[] = {
4656
&dev_attr_id.attr,
4757
&dev_attr_provider.attr,
58+
&dev_attr_dirty_shutdown.attr,
4859
NULL
4960
};
5061

62+
#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX
63+
static umode_t cxl_dimm_visible(struct kobject *kobj,
64+
struct attribute *a, int n)
65+
{
66+
if (a == &dev_attr_dirty_shutdown.attr) {
67+
struct device *dev = kobj_to_dev(kobj);
68+
struct nvdimm *nvdimm = to_nvdimm(dev);
69+
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
70+
71+
if (cxl_nvd->dirty_shutdowns ==
72+
CXL_INVALID_DIRTY_SHUTDOWN_COUNT)
73+
return 0;
74+
}
75+
76+
return a->mode;
77+
}
78+
5179
static const struct attribute_group cxl_dimm_attribute_group = {
5280
.name = "cxl",
5381
.attrs = cxl_dimm_attributes,
82+
.is_visible = cxl_dimm_visible
5483
};
5584

5685
static const struct attribute_group *cxl_dimm_attribute_groups[] = {
5786
&cxl_dimm_attribute_group,
5887
NULL
5988
};
6089

90+
static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd)
91+
{
92+
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
93+
struct cxl_dev_state *cxlds = cxlmd->cxlds;
94+
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
95+
struct device *dev = &cxl_nvd->dev;
96+
u32 count;
97+
98+
/*
99+
* Dirty tracking is enabled and exposed to the user, only when:
100+
* - dirty shutdown on the device can be set, and,
101+
* - the device has a Device GPF DVSEC (albeit unused), and,
102+
* - the Get Health Info cmd can retrieve the device's dirty count.
103+
*/
104+
cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT;
105+
106+
if (cxl_arm_dirty_shutdown(mds)) {
107+
dev_warn(dev, "GPF: could not set dirty shutdown state\n");
108+
return;
109+
}
110+
111+
if (!cxl_gpf_get_dvsec(cxlds->dev, false))
112+
return;
113+
114+
if (cxl_get_dirty_count(mds, &count)) {
115+
dev_warn(dev, "GPF: could not retrieve dirty count\n");
116+
return;
117+
}
118+
119+
cxl_nvd->dirty_shutdowns = count;
120+
}
121+
61122
static int cxl_nvdimm_probe(struct device *dev)
62123
{
63124
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
@@ -78,6 +139,14 @@ static int cxl_nvdimm_probe(struct device *dev)
78139
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
79140
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
80141
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
142+
143+
/*
144+
* Set dirty shutdown now, with the expectation that the device
145+
* clear it upon a successful GPF flow. The exception to this
146+
* is upon Viral detection, per CXL 3.2 section 12.4.2.
147+
*/
148+
cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd);
149+
81150
nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
82151
cxl_dimm_attribute_groups, flags,
83152
cmd_mask, 0, NULL, cxl_nvd->dev_id,

0 commit comments

Comments
 (0)