Skip to content

Commit eeed3e0

Browse files
qzhuo2aegl
authored andcommitted
EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0
When enabling the retry_rd_err_log (RRL) feature during the loading of the i10nm_edac driver with the module parameter retry_rd_err_log=2 (Linux RRL control mode), the default values of the control bits of RRL are saved so that they can be restored during the unloading of the driver. In the current code, the RRL of pseudo channel 1 of HBM overwrites pseudo channel 0 during the loading of the driver, resulting in the loss of saved RRL for pseudo channel 0. This causes the RRL of pseudo channel 0 of HBM to be wrongly restored with the values from pseudo channel 1 when unloading the driver. Fix this issue by creating two separate groups of RRL control registers per channel to save default RRL settings of two {sub-,pseudo-}channels. Fixes: acd4cf6 ("EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM") Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Tested-by: Feng Xu <feng.f.xu@intel.com> Link: https://lore.kernel.org/r/20250417150724.1170168-3-qiuxu.zhuo@intel.com
1 parent 20d2d47 commit eeed3e0

File tree

2 files changed

+27
-19
lines changed

2 files changed

+27
-19
lines changed

drivers/edac/i10nm_base.c

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64,
9595
static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
9696
static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
9797

98-
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
98+
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl,
9999
u32 *offsets_scrub, u32 *offsets_demand,
100100
u32 *offsets_demand2)
101101
{
@@ -108,10 +108,10 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
108108

109109
if (enable) {
110110
/* Save default configurations */
111-
imc->chan[chan].retry_rd_err_log_s = s;
112-
imc->chan[chan].retry_rd_err_log_d = d;
111+
rrl_ctl[0] = s;
112+
rrl_ctl[1] = d;
113113
if (offsets_demand2)
114-
imc->chan[chan].retry_rd_err_log_d2 = d2;
114+
rrl_ctl[2] = d2;
115115

116116
s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
117117
s |= RETRY_RD_ERR_LOG_EN;
@@ -125,25 +125,25 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
125125
}
126126
} else {
127127
/* Restore default configurations */
128-
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
128+
if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC)
129129
s |= RETRY_RD_ERR_LOG_UC;
130-
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
130+
if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER)
131131
s |= RETRY_RD_ERR_LOG_NOOVER;
132-
if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
132+
if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN))
133133
s &= ~RETRY_RD_ERR_LOG_EN;
134-
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
134+
if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC)
135135
d |= RETRY_RD_ERR_LOG_UC;
136-
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
136+
if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER)
137137
d |= RETRY_RD_ERR_LOG_NOOVER;
138-
if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
138+
if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN))
139139
d &= ~RETRY_RD_ERR_LOG_EN;
140140

141141
if (offsets_demand2) {
142-
if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
142+
if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC)
143143
d2 |= RETRY_RD_ERR_LOG_UC;
144-
if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
144+
if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER))
145145
d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
146-
if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
146+
if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN))
147147
d2 &= ~RETRY_RD_ERR_LOG_EN;
148148
}
149149
}
@@ -157,6 +157,7 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
157157
static void enable_retry_rd_err_log(bool enable)
158158
{
159159
int i, j, imc_num, chan_num;
160+
struct skx_channel *chan;
160161
struct skx_imc *imc;
161162
struct skx_dev *d;
162163

@@ -171,8 +172,9 @@ static void enable_retry_rd_err_log(bool enable)
171172
if (!imc->mbase)
172173
continue;
173174

175+
chan = d->imc[i].chan;
174176
for (j = 0; j < chan_num; j++)
175-
__enable_retry_rd_err_log(imc, j, enable,
177+
__enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
176178
res_cfg->offsets_scrub,
177179
res_cfg->offsets_demand,
178180
res_cfg->offsets_demand2);
@@ -186,12 +188,13 @@ static void enable_retry_rd_err_log(bool enable)
186188
if (!imc->mbase || !imc->hbm_mc)
187189
continue;
188190

191+
chan = d->imc[i].chan;
189192
for (j = 0; j < chan_num; j++) {
190-
__enable_retry_rd_err_log(imc, j, enable,
193+
__enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
191194
res_cfg->offsets_scrub_hbm0,
192195
res_cfg->offsets_demand_hbm0,
193196
NULL);
194-
__enable_retry_rd_err_log(imc, j, enable,
197+
__enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1],
195198
res_cfg->offsets_scrub_hbm1,
196199
res_cfg->offsets_demand_hbm1,
197200
NULL);

drivers/edac/skx_common.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@
7979
*/
8080
#define MCACOD_EXT_MEM_ERR 0x280
8181

82+
/* Max RRL register sets per {,sub-,pseudo-}channel. */
83+
#define NUM_RRL_SET 3
84+
8285
/*
8386
* Each cpu socket contains some pci devices that provide global
8487
* information, and also some that are local to each of the two
@@ -117,9 +120,11 @@ struct skx_dev {
117120
struct skx_channel {
118121
struct pci_dev *cdev;
119122
struct pci_dev *edev;
120-
u32 retry_rd_err_log_s;
121-
u32 retry_rd_err_log_d;
122-
u32 retry_rd_err_log_d2;
123+
/*
124+
* Two groups of RRL control registers per channel to save default RRL
125+
* settings of two {sub-,pseudo-}channels in Linux RRL control mode.
126+
*/
127+
u32 rrl_ctl[2][NUM_RRL_SET];
123128
struct skx_dimm {
124129
u8 close_pg;
125130
u8 bank_xor_enable;

0 commit comments

Comments
 (0)