Skip to content

Commit 9776dd3

Browse files
committed
Merge tag 'x86-irq-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 interrupt handling updates from Thomas Gleixner: "Add support for posted interrupts on bare metal. Posted interrupts is a virtualization feature which allows to inject interrupts directly into a guest without host interaction. The VT-d interrupt remapping hardware sets the bit which corresponds to the interrupt vector in a vector bitmap which is either used to inject the interrupt directly into the guest via a virtualized APIC or in case that the guest is scheduled out provides a host side notification interrupt which informs the host that an interrupt has been marked pending in the bitmap. This can be utilized on bare metal for scenarios where multiple devices, e.g. NVME storage, raise interrupts with a high frequency. In the default mode these interrupts are handles independently and therefore require a full roundtrip of interrupt entry/exit. Utilizing posted interrupts this roundtrip overhead can be avoided by coalescing these interrupt entries to a single entry for the posted interrupt notification. The notification interrupt then demultiplexes the pending bits in a memory based bitmap and invokes the corresponding device specific handlers. Depending on the usage scenario and device utilization throughput improvements between 10% and 130% have been measured. As this is only relevant for high end servers with multiple device queues per CPU attached and counterproductive for situations where interrupts are arriving at distinct times, the functionality is opt-in via a kernel command line parameter" * tag 'x86-irq-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/irq: Use existing helper for pending vector check iommu/vt-d: Enable posted mode for device MSIs iommu/vt-d: Make posted MSI an opt-in command line option x86/irq: Extend checks for pending vectors to posted interrupts x86/irq: Factor out common code for checking pending interrupts x86/irq: Install posted MSI notification handler x86/irq: Factor out handler invocation from common_interrupt() x86/irq: Set up per host CPU posted interrupt descriptors x86/irq: Reserve a per CPU IDT vector for posted MSIs x86/irq: Add a Kconfig option for posted MSI x86/irq: Remove bitfields in posted interrupt descriptor x86/irq: Unionize PID.PIR for 64bit access w/o casting KVM: VMX: Move posted interrupt descriptor out of VMX code
2 parents 6bfd2d4 + 6ecc2e7 commit 9776dd3

File tree

19 files changed

+450
-119
lines changed

19 files changed

+450
-119
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2251,6 +2251,8 @@
22512251
no_x2apic_optout
22522252
BIOS x2APIC opt-out request will be ignored
22532253
nopost disable Interrupt Posting
2254+
posted_msi
2255+
enable MSIs delivered as posted interrupts
22542256

22552257
iomem= Disable strict checking of access to MMIO memory
22562258
strict regions from userspace.

arch/x86/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,17 @@ config X86_X2APIC
466466

467467
If you don't know what to do here, say N.
468468

469+
config X86_POSTED_MSI
470+
bool "Enable MSI and MSI-x delivery by posted interrupts"
471+
depends on X86_64 && IRQ_REMAP
472+
help
473+
This enables MSIs that are under interrupt remapping to be delivered as
474+
posted interrupts to the host kernel. Interrupt throughput can
475+
potentially be improved by coalescing CPU notifications during high
476+
frequency bursts.
477+
478+
If you don't know what to do here, say N.
479+
469480
config X86_MPPARSE
470481
bool "Enable MPS table" if ACPI
471482
default y

arch/x86/entry/entry_fred.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ static idtentry_t sysvec_table[NR_SYSTEM_VECTORS] __ro_after_init = {
117117
SYSVEC(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
118118
SYSVEC(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
119119
SYSVEC(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
120+
121+
SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, posted_msi_notification),
120122
};
121123

122124
static bool fred_setup_done __initdata;

arch/x86/include/asm/apic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <asm/msr.h>
1515
#include <asm/hardirq.h>
1616
#include <asm/io.h>
17+
#include <asm/posted_intr.h>
1718

1819
#define ARCH_APICTIMER_STOPS_ON_C3 1
1920

@@ -500,6 +501,11 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector)
500501
return !!(irr & (1U << (vector % 32)));
501502
}
502503

504+
static inline bool is_vector_pending(unsigned int vector)
505+
{
506+
return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector);
507+
}
508+
503509
/*
504510
* Warm reset vector position:
505511
*/

arch/x86/include/asm/hardirq.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,16 @@ typedef struct {
4444
unsigned int irq_hv_reenlightenment_count;
4545
unsigned int hyperv_stimer0_count;
4646
#endif
47+
#ifdef CONFIG_X86_POSTED_MSI
48+
unsigned int posted_msi_notification_count;
49+
#endif
4750
} ____cacheline_aligned irq_cpustat_t;
4851

4952
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
5053

54+
#ifdef CONFIG_X86_POSTED_MSI
55+
DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
56+
#endif
5157
#define __ARCH_IRQ_STAT
5258

5359
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)

arch/x86/include/asm/idtentry.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,12 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
751751
# define fred_sysvec_kvm_posted_intr_nested_ipi NULL
752752
#endif
753753

754+
# ifdef CONFIG_X86_POSTED_MSI
755+
DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification);
756+
#else
757+
# define fred_sysvec_posted_msi_notification NULL
758+
# endif
759+
754760
#if IS_ENABLED(CONFIG_HYPERV)
755761
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
756762
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);

arch/x86/include/asm/irq_remapping.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
5050
return x86_vector_domain;
5151
}
5252

53+
extern bool enable_posted_msi;
54+
55+
static inline bool posted_msi_supported(void)
56+
{
57+
return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP);
58+
}
59+
5360
#else /* CONFIG_IRQ_REMAP */
5461

5562
static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }

arch/x86/include/asm/irq_vectors.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,16 @@
9797

9898
#define LOCAL_TIMER_VECTOR 0xec
9999

100+
/*
101+
* Posted interrupt notification vector for all device MSIs delivered to
102+
* the host kernel.
103+
*/
104+
#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb
105+
100106
#define NR_VECTORS 256
101107

102108
#ifdef CONFIG_X86_LOCAL_APIC
103-
#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR
109+
#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR
104110
#else
105111
#define FIRST_SYSTEM_VECTOR NR_VECTORS
106112
#endif

arch/x86/include/asm/posted_intr.h

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _X86_POSTED_INTR_H
3+
#define _X86_POSTED_INTR_H
4+
#include <asm/irq_vectors.h>
5+
6+
#define POSTED_INTR_ON 0
7+
#define POSTED_INTR_SN 1
8+
9+
#define PID_TABLE_ENTRY_VALID 1
10+
11+
/* Posted-Interrupt Descriptor */
12+
struct pi_desc {
13+
union {
14+
u32 pir[8]; /* Posted interrupt requested */
15+
u64 pir64[4];
16+
};
17+
union {
18+
struct {
19+
u16 notifications; /* Suppress and outstanding bits */
20+
u8 nv;
21+
u8 rsvd_2;
22+
u32 ndst;
23+
};
24+
u64 control;
25+
};
26+
u32 rsvd[6];
27+
} __aligned(64);
28+
29+
static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
30+
{
31+
return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
32+
}
33+
34+
static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
35+
{
36+
return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
37+
}
38+
39+
static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
40+
{
41+
return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
42+
}
43+
44+
static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
45+
{
46+
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
47+
}
48+
49+
static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
50+
{
51+
return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
52+
}
53+
54+
static inline void pi_set_sn(struct pi_desc *pi_desc)
55+
{
56+
set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
57+
}
58+
59+
static inline void pi_set_on(struct pi_desc *pi_desc)
60+
{
61+
set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
62+
}
63+
64+
static inline void pi_clear_on(struct pi_desc *pi_desc)
65+
{
66+
clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
67+
}
68+
69+
static inline void pi_clear_sn(struct pi_desc *pi_desc)
70+
{
71+
clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
72+
}
73+
74+
static inline bool pi_test_on(struct pi_desc *pi_desc)
75+
{
76+
return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
77+
}
78+
79+
static inline bool pi_test_sn(struct pi_desc *pi_desc)
80+
{
81+
return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
82+
}
83+
84+
/* Non-atomic helpers */
85+
static inline void __pi_set_sn(struct pi_desc *pi_desc)
86+
{
87+
pi_desc->notifications |= BIT(POSTED_INTR_SN);
88+
}
89+
90+
static inline void __pi_clear_sn(struct pi_desc *pi_desc)
91+
{
92+
pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
93+
}
94+
95+
#ifdef CONFIG_X86_POSTED_MSI
96+
/*
97+
* Not all external vectors are subject to interrupt remapping, e.g. IOMMU's
98+
* own interrupts. Here we do not distinguish them since those vector bits in
99+
* PIR will always be zero.
100+
*/
101+
static inline bool pi_pending_this_cpu(unsigned int vector)
102+
{
103+
struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc);
104+
105+
if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
106+
return false;
107+
108+
return test_bit(vector, (unsigned long *)pid->pir);
109+
}
110+
111+
extern void intel_posted_msi_init(void);
112+
#else
113+
static inline bool pi_pending_this_cpu(unsigned int vector) { return false; }
114+
115+
static inline void intel_posted_msi_init(void) {};
116+
#endif /* X86_POSTED_MSI */
117+
118+
#endif /* _X86_POSTED_INTR_H */

arch/x86/kernel/apic/vector.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,7 @@ static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
965965
lockdep_assert_held(&vector_lock);
966966

967967
hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) {
968-
unsigned int irr, vector = apicd->prev_vector;
968+
unsigned int vector = apicd->prev_vector;
969969

970970
/*
971971
* Paranoia: Check if the vector that needs to be cleaned
@@ -979,8 +979,7 @@ static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
979979
* fixup_irqs() was just called to scan IRR for set bits and
980980
* forward them to new destination CPUs via IPIs.
981981
*/
982-
irr = check_irr ? apic_read(APIC_IRR + (vector / 32 * 0x10)) : 0;
983-
if (irr & (1U << (vector % 32))) {
982+
if (check_irr && is_vector_pending(vector)) {
984983
pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq);
985984
rearm = true;
986985
continue;

0 commit comments

Comments
 (0)