Skip to content

Commit 773ac53

Browse files
committed
Merge tag 'x86_urgent_for_v5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: "A bunch of x86/urgent stuff accumulated for the last two weeks so lemme unload it to you. It should be all totally risk-free, of course. :-) - Fix out-of-spec hardware (1st gen Hygon) which does not implement MSR_AMD64_SEV even though the spec clearly states so, and check CPUID bits first. - Send only one signal to a task when it is a SEGV_PKUERR si_code type. - Do away with all the wankery of reserving X amount of memory in the first megabyte to prevent BIOS corrupting it and simply and unconditionally reserve the whole first megabyte. - Make alternatives NOP optimization work at an arbitrary position within the patched sequence because the compiler can put single-byte NOPs for alignment anywhere in the sequence (32-bit retpoline), vs our previous assumption that the NOPs are only appended. - Force-disable ENQCMD[S] instructions support and remove update_pasid() because of insufficient protection against FPU state modification in an interrupt context, among other xstate horrors which are being addressed at the moment. This one limits the fallout until proper enablement. - Use cpu_feature_enabled() in the idxd driver so that it can be build-time disabled through the defines in disabled-features.h. - Fix LVT thermal setup for SMI delivery mode by making sure the APIC LVT value is read before APIC initialization so that softlockups during boot do not happen at least on one machine. - Mark all legacy interrupts as legacy vectors when the IO-APIC is disabled and when all legacy interrupts are routed through the PIC" * tag 'x86_urgent_for_v5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/sev: Check SME/SEV support in CPUID first x86/fault: Don't send SIGSEGV twice on SEGV_PKUERR x86/setup: Always reserve the first 1M of RAM x86/alternative: Optimize single-byte NOPs at an arbitrary position x86/cpufeatures: Force disable X86_FEATURE_ENQCMD and remove update_pasid() dmaengine: idxd: Use cpu_feature_enabled() x86/thermal: Fix LVT thermal setup for SMI delivery mode x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
2 parents f5b6eb1 + 009767d commit 773ac53

File tree

16 files changed

+145
-126
lines changed

16 files changed

+145
-126
lines changed

arch/x86/include/asm/apic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void)
174174
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
175175
extern void lapic_assign_system_vectors(void);
176176
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
177+
extern void lapic_update_legacy_vectors(void);
177178
extern void lapic_online(void);
178179
extern void lapic_offline(void);
179180
extern bool apic_needs_pit(void);

arch/x86/include/asm/disabled-features.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,8 @@
5656
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
5757
#endif
5858

59-
#ifdef CONFIG_IOMMU_SUPPORT
60-
# define DISABLE_ENQCMD 0
61-
#else
62-
# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
63-
#endif
59+
/* Force disable because it's broken beyond repair */
60+
#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
6461

6562
#ifdef CONFIG_X86_SGX
6663
# define DISABLE_SGX 0

arch/x86/include/asm/fpu/api.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
106106
*/
107107
#define PASID_DISABLED 0
108108

109-
#ifdef CONFIG_IOMMU_SUPPORT
110-
/* Update current's PASID MSR/state by mm's PASID. */
111-
void update_pasid(void);
112-
#else
113109
static inline void update_pasid(void) { }
114-
#endif
110+
115111
#endif /* _ASM_X86_FPU_API_H */

arch/x86/include/asm/fpu/internal.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -584,13 +584,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
584584
pkru_val = pk->pkru;
585585
}
586586
__write_pkru(pkru_val);
587-
588-
/*
589-
* Expensive PASID MSR write will be avoided in update_pasid() because
590-
* TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
591-
* unless it's different from mm->pasid to reduce overhead.
592-
*/
593-
update_pasid();
594587
}
595588

596589
#endif /* _ASM_X86_FPU_INTERNAL_H */

arch/x86/include/asm/thermal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
#define _ASM_X86_THERMAL_H
44

55
#ifdef CONFIG_X86_THERMAL_VECTOR
6+
void therm_lvt_init(void);
67
void intel_init_thermal(struct cpuinfo_x86 *c);
78
bool x86_thermal_enabled(void);
89
void intel_thermal_interrupt(void);
910
#else
10-
static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
11+
static inline void therm_lvt_init(void) { }
12+
static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
1113
#endif
1214

1315
#endif /* _ASM_X86_THERMAL_H */

arch/x86/kernel/alternative.c

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -182,42 +182,70 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
182182
n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
183183
}
184184

185+
/*
186+
* optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
187+
*
188+
* @instr: instruction byte stream
189+
* @instrlen: length of the above
190+
* @off: offset within @instr where the first NOP has been detected
191+
*
192+
* Return: number of NOPs found (and replaced).
193+
*/
194+
static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
195+
{
196+
unsigned long flags;
197+
int i = off, nnops;
198+
199+
while (i < instrlen) {
200+
if (instr[i] != 0x90)
201+
break;
202+
203+
i++;
204+
}
205+
206+
nnops = i - off;
207+
208+
if (nnops <= 1)
209+
return nnops;
210+
211+
local_irq_save(flags);
212+
add_nops(instr + off, nnops);
213+
local_irq_restore(flags);
214+
215+
DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
216+
217+
return nnops;
218+
}
219+
185220
/*
186221
* "noinline" to cause control flow change and thus invalidate I$ and
187222
* cause refetch after modification.
188223
*/
189224
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
190225
{
191-
unsigned long flags;
192226
struct insn insn;
193-
int nop, i = 0;
227+
int i = 0;
194228

195229
/*
196-
* Jump over the non-NOP insns, the remaining bytes must be single-byte
197-
* NOPs, optimize them.
230+
* Jump over the non-NOP insns and optimize single-byte NOPs into bigger
231+
* ones.
198232
*/
199233
for (;;) {
200234
if (insn_decode_kernel(&insn, &instr[i]))
201235
return;
202236

237+
/*
238+
* See if this and any potentially following NOPs can be
239+
* optimized.
240+
*/
203241
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
204-
break;
205-
206-
if ((i += insn.length) >= a->instrlen)
207-
return;
208-
}
242+
i += optimize_nops_range(instr, a->instrlen, i);
243+
else
244+
i += insn.length;
209245

210-
for (nop = i; i < a->instrlen; i++) {
211-
if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
246+
if (i >= a->instrlen)
212247
return;
213248
}
214-
215-
local_irq_save(flags);
216-
add_nops(instr + nop, i - nop);
217-
local_irq_restore(flags);
218-
219-
DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
220-
instr, nop, a->instrlen);
221249
}
222250

223251
/*

arch/x86/kernel/apic/apic.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode)
26042604
end_local_APIC_setup();
26052605
irq_remap_enable_fault_handling();
26062606
setup_IO_APIC();
2607+
lapic_update_legacy_vectors();
26072608
}
26082609

26092610
#ifdef CONFIG_UP_LATE_INIT

arch/x86/kernel/apic/vector.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace)
738738
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
739739
}
740740

741+
void __init lapic_update_legacy_vectors(void)
742+
{
743+
unsigned int i;
744+
745+
if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
746+
return;
747+
748+
/*
749+
* If the IO/APIC is disabled via config, kernel command line or
750+
* lack of enumeration then all legacy interrupts are routed
751+
* through the PIC. Make sure that they are marked as legacy
752+
* vectors. PIC_CASCADE_IRQ has already been marked in
753+
* lapic_assign_system_vectors().
754+
*/
755+
for (i = 0; i < nr_legacy_irqs(); i++) {
756+
if (i != PIC_CASCADE_IR)
757+
lapic_assign_legacy_vector(i, true);
758+
}
759+
}
760+
741761
void __init lapic_assign_system_vectors(void)
742762
{
743763
unsigned int i, vector = 0;

arch/x86/kernel/fpu/xstate.c

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
14021402
return 0;
14031403
}
14041404
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
1405-
1406-
#ifdef CONFIG_IOMMU_SUPPORT
1407-
void update_pasid(void)
1408-
{
1409-
u64 pasid_state;
1410-
u32 pasid;
1411-
1412-
if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
1413-
return;
1414-
1415-
if (!current->mm)
1416-
return;
1417-
1418-
pasid = READ_ONCE(current->mm->pasid);
1419-
/* Set the valid bit in the PASID MSR/state only for valid pasid. */
1420-
pasid_state = pasid == PASID_DISABLED ?
1421-
pasid : pasid | MSR_IA32_PASID_VALID;
1422-
1423-
/*
1424-
* No need to hold fregs_lock() since the task's fpstate won't
1425-
* be changed by others (e.g. ptrace) while the task is being
1426-
* switched to or is in IPI.
1427-
*/
1428-
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
1429-
/* The MSR is active and can be directly updated. */
1430-
wrmsrl(MSR_IA32_PASID, pasid_state);
1431-
} else {
1432-
struct fpu *fpu = &current->thread.fpu;
1433-
struct ia32_pasid_state *ppasid_state;
1434-
struct xregs_state *xsave;
1435-
1436-
/*
1437-
* The CPU's xstate registers are not currently active. Just
1438-
* update the PASID state in the memory buffer here. The
1439-
* PASID MSR will be loaded when returning to user mode.
1440-
*/
1441-
xsave = &fpu->state.xsave;
1442-
xsave->header.xfeatures |= XFEATURE_MASK_PASID;
1443-
ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
1444-
/*
1445-
* Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
1446-
* won't be NULL and no need to check its value.
1447-
*
1448-
* Only update the task's PASID state when it's different
1449-
* from the mm's pasid.
1450-
*/
1451-
if (ppasid_state->pasid != pasid_state) {
1452-
/*
1453-
* Invalid fpregs so that state restoring will pick up
1454-
* the PASID state.
1455-
*/
1456-
__fpu_invalidate_fpregs_state(fpu);
1457-
ppasid_state->pasid = pasid_state;
1458-
}
1459-
}
1460-
}
1461-
#endif /* CONFIG_IOMMU_SUPPORT */

arch/x86/kernel/setup.c

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include <asm/pci-direct.h>
4545
#include <asm/prom.h>
4646
#include <asm/proto.h>
47+
#include <asm/thermal.h>
4748
#include <asm/unwind.h>
4849
#include <asm/vsyscall.h>
4950
#include <linux/vmalloc.h>
@@ -637,11 +638,11 @@ static void __init trim_snb_memory(void)
637638
* them from accessing certain memory ranges, namely anything below
638639
* 1M and in the pages listed in bad_pages[] above.
639640
*
640-
* To avoid these pages being ever accessed by SNB gfx devices
641-
* reserve all memory below the 1 MB mark and bad_pages that have
642-
* not already been reserved at boot time.
641+
* To avoid these pages being ever accessed by SNB gfx devices reserve
642+
* bad_pages that have not already been reserved at boot time.
643+
* All memory below the 1 MB mark is anyway reserved later during
644+
* setup_arch(), so there is no need to reserve it here.
643645
*/
644-
memblock_reserve(0, 1<<20);
645646

646647
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
647648
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
@@ -733,14 +734,14 @@ static void __init early_reserve_memory(void)
733734
* The first 4Kb of memory is a BIOS owned area, but generally it is
734735
* not listed as such in the E820 table.
735736
*
736-
* Reserve the first memory page and typically some additional
737-
* memory (64KiB by default) since some BIOSes are known to corrupt
738-
* low memory. See the Kconfig help text for X86_RESERVE_LOW.
737+
* Reserve the first 64K of memory since some BIOSes are known to
738+
* corrupt low memory. After the real mode trampoline is allocated the
739+
* rest of the memory below 640k is reserved.
739740
*
740741
* In addition, make sure page 0 is always reserved because on
741742
* systems with L1TF its contents can be leaked to user processes.
742743
*/
743-
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
744+
memblock_reserve(0, SZ_64K);
744745

745746
early_reserve_initrd();
746747

@@ -751,6 +752,7 @@ static void __init early_reserve_memory(void)
751752

752753
reserve_ibft_region();
753754
reserve_bios_regions();
755+
trim_snb_memory();
754756
}
755757

756758
/*
@@ -1081,14 +1083,20 @@ void __init setup_arch(char **cmdline_p)
10811083
(max_pfn_mapped<<PAGE_SHIFT) - 1);
10821084
#endif
10831085

1084-
reserve_real_mode();
1085-
10861086
/*
1087-
* Reserving memory causing GPU hangs on Sandy Bridge integrated
1088-
* graphics devices should be done after we allocated memory under
1089-
* 1M for the real mode trampoline.
1087+
* Find free memory for the real mode trampoline and place it
1088+
* there.
1089+
* If there is not enough free memory under 1M, on EFI-enabled
1090+
* systems there will be additional attempt to reclaim the memory
1091+
* for the real mode trampoline at efi_free_boot_services().
1092+
*
1093+
* Unconditionally reserve the entire first 1M of RAM because
1094+
* BIOSes are know to corrupt low memory and several
1095+
* hundred kilobytes are not worth complex detection what memory gets
1096+
* clobbered. Moreover, on machines with SandyBridge graphics or in
1097+
* setups that use crashkernel the entire 1M is reserved anyway.
10901098
*/
1091-
trim_snb_memory();
1099+
reserve_real_mode();
10921100

10931101
init_mem_mapping();
10941102

@@ -1226,6 +1234,14 @@ void __init setup_arch(char **cmdline_p)
12261234

12271235
x86_init.timers.wallclock_init();
12281236

1237+
/*
1238+
* This needs to run before setup_local_APIC() which soft-disables the
1239+
* local APIC temporarily and that masks the thermal LVT interrupt,
1240+
* leading to softlockups on machines which have configured SMI
1241+
* interrupt delivery.
1242+
*/
1243+
therm_lvt_init();
1244+
12291245
mcheck_init();
12301246

12311247
register_refined_jiffies(CLOCK_TICK_RATE);

0 commit comments

Comments
 (0)