Skip to content

Commit 4240e2e

Browse files
yghannambp3tk0v
authored andcommitted
x86/MCE: Always save CS register on AMD Zen IF Poison errors
The Instruction Fetch (IF) units on current AMD Zen-based systems do not guarantee a synchronous #MC is delivered for poison consumption errors. Therefore, MCG_STATUS[EIPV|RIPV] will not be set. However, the microarchitecture does guarantee that the exception is delivered within the same context. In other words, the exact rIP is not known, but the context is known to not have changed. There is no architecturally-defined method to determine this behavior. The Code Segment (CS) register is always valid on such IF unit poison errors regardless of the value of MCG_STATUS[EIPV|RIPV]. Add a quirk to save the CS register for poison consumption from the IF unit banks. This is needed to properly determine the context of the error. Otherwise, the severity grading function will assume the context is IN_KERNEL due to the m->cs value being 0 (the initialized value). This leads to unnecessary kernel panics on data poison errors due to the kernel believing the poison consumption occurred in kernel context. Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230814200853.29258-1-yazen.ghannam@amd.com
1 parent c3629dd commit 4240e2e

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

arch/x86/kernel/cpu/mce/core.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,26 @@ static noinstr bool quirk_skylake_repmov(void)
842842
return false;
843843
}
844844

845+
/*
846+
* Some Zen-based Instruction Fetch Units set EIPV=RIPV=0 on poison consumption
847+
* errors. This means mce_gather_info() will not save the "ip" and "cs" registers.
848+
*
849+
* However, the context is still valid, so save the "cs" register for later use.
850+
*
851+
* The "ip" register is truly unknown, so don't save it or fixup EIPV/RIPV.
852+
*
853+
* The Instruction Fetch Unit is at MCA bank 1 for all affected systems.
854+
*/
855+
static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_regs *regs)
856+
{
857+
if (bank != 1)
858+
return;
859+
if (!(m->status & MCI_STATUS_POISON))
860+
return;
861+
862+
m->cs = regs->cs;
863+
}
864+
845865
/*
846866
* Do a quick check if any of the events requires a panic.
847867
* This decides if we keep the events around or clear them.
@@ -861,6 +881,9 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
861881
if (mce_flags.snb_ifu_quirk)
862882
quirk_sandybridge_ifu(i, m, regs);
863883

884+
if (mce_flags.zen_ifu_quirk)
885+
quirk_zen_ifu(i, m, regs);
886+
864887
m->bank = i;
865888
if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
866889
mce_read_aux(m, i);
@@ -1849,6 +1872,9 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
18491872
if (c->x86 == 0x15 && c->x86_model <= 0xf)
18501873
mce_flags.overflow_recov = 1;
18511874

1875+
if (c->x86 >= 0x17 && c->x86 <= 0x1A)
1876+
mce_flags.zen_ifu_quirk = 1;
1877+
18521878
}
18531879

18541880
if (c->x86_vendor == X86_VENDOR_INTEL) {

arch/x86/kernel/cpu/mce/internal.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ struct mce_vendor_flags {
157157
*/
158158
smca : 1,
159159

160+
/* Zen IFU quirk */
161+
zen_ifu_quirk : 1,
162+
160163
/* AMD-style error thresholding banks present. */
161164
amd_threshold : 1,
162165

@@ -172,7 +175,7 @@ struct mce_vendor_flags {
172175
/* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */
173176
skx_repmov_quirk : 1,
174177

175-
__reserved_0 : 56;
178+
__reserved_0 : 55;
176179
};
177180

178181
extern struct mce_vendor_flags mce_flags;

0 commit comments

Comments
 (0)