Skip to content

Commit d074228

Browse files
qzhuo2aegl
authored andcommitted
EDAC/ie31200: Add Intel Raptor Lake-S SoCs support
The Intel Raptor Lake-S SoC contains two memory controllers with DDR5 memory type and out-of-band ECC capability. The resource definitions of the memory controller are different from previous generations. One notable difference is that the PCI ERRSTS register is deprecated and is not used to indicate the presence of errors or to clear the MMIO-mapped ECC error log regsiters. Extend the ie31200_edac driver to support multiple memory controllers, add a resource configuration table and use an MSR register to clear the ECC error log registers to provide EDAC support for Raptor Lake-S SoCs. Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Tested-by: Gary Wang <gary.c.wang@intel.com> Link: https://lore.kernel.org/r/20250310011411.31685-11-qiuxu.zhuo@intel.com
1 parent 498550e commit d074228

File tree

1 file changed

+149
-33
lines changed

1 file changed

+149
-33
lines changed

drivers/edac/ie31200_edac.c

Lines changed: 149 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,15 @@
8484
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6
8585
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca
8686

87-
#define IE31200_RANKS_PER_CHANNEL 4
87+
/* Raptor Lake-S */
88+
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703
89+
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640
90+
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630
91+
92+
#define IE31200_RANKS_PER_CHANNEL 8
8893
#define IE31200_DIMMS_PER_CHANNEL 2
8994
#define IE31200_CHANNELS 2
95+
#define IE31200_IMC_NUM 2
9096

9197
/* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */
9298
#define IE31200_MCHBAR_LOW 0x48
@@ -117,15 +123,20 @@ static int ie31200_registered = 1;
117123

118124
struct res_config {
119125
enum mem_type mtype;
126+
int imc_num;
120127
/* Host MMIO configuration register */
121128
u64 reg_mchbar_mask;
122129
u64 reg_mchbar_window_size;
123130
/* ECC error log register */
124131
u64 reg_eccerrlog_offset[IE31200_CHANNELS];
125132
u64 reg_eccerrlog_ce_mask;
133+
u64 reg_eccerrlog_ce_ovfl_mask;
126134
u64 reg_eccerrlog_ue_mask;
135+
u64 reg_eccerrlog_ue_ovfl_mask;
127136
u64 reg_eccerrlog_rank_mask;
128137
u64 reg_eccerrlog_syndrome_mask;
138+
/* MSR to clear ECC error log register */
139+
u32 msr_clear_eccerrlog_offset;
129140
/* DIMM characteristics register */
130141
u64 reg_mad_dimm_size_granularity;
131142
u64 reg_mad_dimm_offset[IE31200_CHANNELS];
@@ -139,10 +150,18 @@ struct ie31200_priv {
139150
void __iomem *c0errlog;
140151
void __iomem *c1errlog;
141152
struct res_config *cfg;
153+
struct mem_ctl_info *mci;
154+
struct pci_dev *pdev;
155+
struct device dev;
142156
};
143157

158+
static struct ie31200_pvt {
159+
struct ie31200_priv *priv[IE31200_IMC_NUM];
160+
} ie31200_pvt;
161+
144162
enum ie31200_chips {
145163
IE31200 = 0,
164+
IE31200_1 = 1,
146165
};
147166

148167
struct ie31200_dev_info {
@@ -159,6 +178,9 @@ static const struct ie31200_dev_info ie31200_devs[] = {
159178
[IE31200] = {
160179
.ctl_name = "IE31200"
161180
},
181+
[IE31200_1] = {
182+
.ctl_name = "IE31200_1"
183+
},
162184
};
163185

164186
struct dimm_data {
@@ -202,23 +224,54 @@ static bool ecc_capable(struct pci_dev *pdev)
202224
return true;
203225
}
204226

227+
#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev)
228+
205229
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
206230
{
231+
struct ie31200_priv *priv = mci->pvt_info;
232+
struct res_config *cfg = priv->cfg;
233+
234+
/*
235+
* The PCI ERRSTS register is deprecated. Write the MSR to clear
236+
* the ECC error log registers in all memory controllers.
237+
*/
238+
if (cfg->msr_clear_eccerrlog_offset) {
239+
if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset,
240+
cfg->reg_eccerrlog_ce_mask |
241+
cfg->reg_eccerrlog_ce_ovfl_mask |
242+
cfg->reg_eccerrlog_ue_mask |
243+
cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0)
244+
ie31200_printk(KERN_ERR, "Failed to wrmsr.\n");
245+
246+
return;
247+
}
248+
207249
/*
208250
* Clear any error bits.
209251
* (Yes, we really clear bits by writing 1 to them.)
210252
*/
211-
pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS,
253+
pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS,
212254
IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS);
213255
}
214256

215257
static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
216258
struct ie31200_error_info *info)
217259
{
218-
struct pci_dev *pdev;
260+
struct pci_dev *pdev = mci_to_pci_dev(mci);
219261
struct ie31200_priv *priv = mci->pvt_info;
220262

221-
pdev = to_pci_dev(mci->pdev);
263+
/*
264+
* The PCI ERRSTS register is deprecated, directly read the
265+
* MMIO-mapped ECC error log registers.
266+
*/
267+
if (priv->cfg->msr_clear_eccerrlog_offset) {
268+
info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
269+
if (nr_channels == 2)
270+
info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
271+
272+
ie31200_clear_error_info(mci);
273+
return;
274+
}
222275

223276
/*
224277
* This is a mess because there is no atomic way to read all the
@@ -259,13 +312,15 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci,
259312
int channel;
260313
u64 log;
261314

262-
if (!(info->errsts & IE31200_ERRSTS_BITS))
263-
return;
315+
if (!cfg->msr_clear_eccerrlog_offset) {
316+
if (!(info->errsts & IE31200_ERRSTS_BITS))
317+
return;
264318

265-
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
266-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
267-
-1, -1, -1, "UE overwrote CE", "");
268-
info->errsts = info->errsts2;
319+
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
320+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
321+
-1, -1, -1, "UE overwrote CE", "");
322+
info->errsts = info->errsts2;
323+
}
269324
}
270325

271326
for (channel = 0; channel < nr_channels; channel++) {
@@ -295,7 +350,7 @@ static void ie31200_check(struct mem_ctl_info *mci)
295350
ie31200_process_error_info(mci, &info);
296351
}
297352

298-
static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg)
353+
static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc)
299354
{
300355
union {
301356
u64 mchbar;
@@ -309,6 +364,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config
309364
pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low);
310365
pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high);
311366
u.mchbar &= cfg->reg_mchbar_mask;
367+
u.mchbar += cfg->reg_mchbar_window_size * mc;
312368

313369
if (u.mchbar != (resource_size_t)u.mchbar) {
314370
ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n",
@@ -333,7 +389,7 @@ static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm,
333389
}
334390

335391
static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window,
336-
struct res_config *cfg)
392+
struct res_config *cfg, int mc)
337393
{
338394
struct dimm_data dimm_info;
339395
struct dimm_info *dimm;
@@ -347,8 +403,8 @@ static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *wind
347403

348404
for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
349405
populate_dimm_info(&dimm_info, addr_decode, j, cfg);
350-
edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n",
351-
i, j, dimm_info.size >> 20,
406+
edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n",
407+
mc, i, j, dimm_info.size >> 20,
352408
dimm_info.ranks,
353409
dimm_info.dtype);
354410

@@ -370,7 +426,7 @@ static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *wind
370426
}
371427
}
372428

373-
static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg)
429+
static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc)
374430
{
375431
struct edac_mc_layer layers[2];
376432
struct ie31200_priv *priv;
@@ -385,24 +441,23 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg)
385441
layers[1].type = EDAC_MC_LAYER_CHANNEL;
386442
layers[1].size = nr_channels;
387443
layers[1].is_virt_csrow = false;
388-
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
444+
mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers,
389445
sizeof(struct ie31200_priv));
390446
if (!mci)
391447
return -ENOMEM;
392448

393-
window = ie31200_map_mchbar(pdev, cfg);
449+
window = ie31200_map_mchbar(pdev, cfg, mc);
394450
if (!window) {
395451
ret = -ENODEV;
396452
goto fail_free;
397453
}
398454

399455
edac_dbg(3, "MC: init mci\n");
400-
mci->pdev = &pdev->dev;
401456
mci->mtype_cap = BIT(cfg->mtype);
402457
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
403458
mci->edac_cap = EDAC_FLAG_SECDED;
404459
mci->mod_name = EDAC_MOD_STR;
405-
mci->ctl_name = ie31200_devs[0].ctl_name;
460+
mci->ctl_name = ie31200_devs[mc].ctl_name;
406461
mci->dev_name = pci_name(pdev);
407462
mci->edac_check = ie31200_check;
408463
mci->ctl_page_to_phys = NULL;
@@ -411,8 +466,22 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg)
411466
priv->c0errlog = window + cfg->reg_eccerrlog_offset[0];
412467
priv->c1errlog = window + cfg->reg_eccerrlog_offset[1];
413468
priv->cfg = cfg;
469+
priv->mci = mci;
470+
priv->pdev = pdev;
471+
device_initialize(&priv->dev);
472+
/*
473+
* The EDAC core uses mci->pdev (pointer to the structure device)
474+
* as the memory controller ID. The SoCs attach one or more memory
475+
* controllers to a single pci_dev (a single pci_dev->dev can
476+
* correspond to multiple memory controllers).
477+
*
478+
* To make mci->pdev unique, assign pci_dev->dev to mci->pdev
479+
* for the first memory controller and assign a unique priv->dev
480+
* to mci->pdev for each additional memory controller.
481+
*/
482+
mci->pdev = mc ? &priv->dev : &pdev->dev;
414483

415-
ie31200_get_dimm_config(mci, window, cfg);
484+
ie31200_get_dimm_config(mci, window, cfg, mc);
416485
ie31200_clear_error_info(mci);
417486

418487
if (edac_mc_add_mc(mci)) {
@@ -421,6 +490,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg)
421490
goto fail_unmap;
422491
}
423492

493+
ie31200_pvt.priv[mc] = priv;
424494
return 0;
425495
fail_unmap:
426496
iounmap(window);
@@ -429,9 +499,27 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg)
429499
return ret;
430500
}
431501

502+
static void ie31200_unregister_mcis(void)
503+
{
504+
struct ie31200_priv *priv;
505+
struct mem_ctl_info *mci;
506+
int i;
507+
508+
for (i = 0; i < IE31200_IMC_NUM; i++) {
509+
priv = ie31200_pvt.priv[i];
510+
if (!priv)
511+
continue;
512+
513+
mci = priv->mci;
514+
edac_mc_del_mc(mci->pdev);
515+
iounmap(priv->window);
516+
edac_mc_free(mci);
517+
}
518+
}
519+
432520
static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg)
433521
{
434-
int ret;
522+
int i, ret;
435523

436524
edac_dbg(0, "MC:\n");
437525

@@ -440,13 +528,19 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg)
440528
return -ENODEV;
441529
}
442530

443-
ret = ie31200_register_mci(pdev, cfg);
444-
if (ret)
445-
return ret;
531+
for (i = 0; i < cfg->imc_num; i++) {
532+
ret = ie31200_register_mci(pdev, cfg, i);
533+
if (ret)
534+
goto fail_register;
535+
}
446536

447537
/* get this far and it's successful. */
448538
edac_dbg(3, "MC: success\n");
449539
return 0;
540+
541+
fail_register:
542+
ie31200_unregister_mcis();
543+
return ret;
450544
}
451545

452546
static int ie31200_init_one(struct pci_dev *pdev,
@@ -466,22 +560,15 @@ static int ie31200_init_one(struct pci_dev *pdev,
466560

467561
static void ie31200_remove_one(struct pci_dev *pdev)
468562
{
469-
struct mem_ctl_info *mci;
470-
struct ie31200_priv *priv;
471-
472563
edac_dbg(0, "\n");
473564
pci_dev_put(mci_pdev);
474565
mci_pdev = NULL;
475-
mci = edac_mc_del_mc(&pdev->dev);
476-
if (!mci)
477-
return;
478-
priv = mci->pvt_info;
479-
iounmap(priv->window);
480-
edac_mc_free(mci);
566+
ie31200_unregister_mcis();
481567
}
482568

483569
static struct res_config snb_cfg = {
484570
.mtype = MEM_DDR3,
571+
.imc_num = 1,
485572
.reg_mchbar_mask = GENMASK_ULL(38, 15),
486573
.reg_mchbar_window_size = BIT_ULL(15),
487574
.reg_eccerrlog_offset[0] = 0x40c8,
@@ -503,6 +590,7 @@ static struct res_config snb_cfg = {
503590

504591
static struct res_config skl_cfg = {
505592
.mtype = MEM_DDR4,
593+
.imc_num = 1,
506594
.reg_mchbar_mask = GENMASK_ULL(38, 15),
507595
.reg_mchbar_window_size = BIT_ULL(15),
508596
.reg_eccerrlog_offset[0] = 0x4048,
@@ -522,6 +610,31 @@ static struct res_config skl_cfg = {
522610
.reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
523611
};
524612

613+
struct res_config rpl_s_cfg = {
614+
.mtype = MEM_DDR5,
615+
.imc_num = 2,
616+
.reg_mchbar_mask = GENMASK_ULL(41, 17),
617+
.reg_mchbar_window_size = BIT_ULL(16),
618+
.reg_eccerrlog_offset[0] = 0xe048,
619+
.reg_eccerrlog_offset[1] = 0xe848,
620+
.reg_eccerrlog_ce_mask = BIT_ULL(0),
621+
.reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1),
622+
.reg_eccerrlog_ue_mask = BIT_ULL(2),
623+
.reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3),
624+
.reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
625+
.reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
626+
.msr_clear_eccerrlog_offset = 0x791,
627+
.reg_mad_dimm_offset[0] = 0xd80c,
628+
.reg_mad_dimm_offset[1] = 0xd810,
629+
.reg_mad_dimm_size_granularity = BIT_ULL(29),
630+
.reg_mad_dimm_size_mask[0] = GENMASK(6, 0),
631+
.reg_mad_dimm_size_mask[1] = GENMASK(22, 16),
632+
.reg_mad_dimm_rank_mask[0] = GENMASK(10, 9),
633+
.reg_mad_dimm_rank_mask[1] = GENMASK(27, 26),
634+
.reg_mad_dimm_width_mask[0] = GENMASK(8, 7),
635+
.reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
636+
};
637+
525638
static const struct pci_device_id ie31200_pci_tbl[] = {
526639
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg },
527640
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg },
@@ -545,6 +658,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
545658
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg },
546659
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg },
547660
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg },
661+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg},
662+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg},
663+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg},
548664
{ 0, } /* 0 terminated list. */
549665
};
550666
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);

0 commit comments

Comments
 (0)