Skip to content

Commit c405e18

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Single fix for broken usage of 'multi-MIDR' infrastructure in PI code, adding an open-coded erratum check for everyone's favorite pile of sand: Cavium ThunderX x86: - Bugfixes from a planned posted interrupt rework - Do not use kvm_rip_read() unconditionally to cater for guests with inaccessible register state" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Do not use kvm_rip_read() unconditionally for KVM_PROFILING KVM: x86: Do not use kvm_rip_read() unconditionally in KVM tracepoints KVM: SVM: WARN if an invalid posted interrupt IRTE entry is added iommu/amd: WARN if KVM attempts to set vCPU affinity without posted intrrupts iommu/amd: Return an error if vCPU affinity is set for non-vCPU IRTE KVM: x86: Take irqfds.lock when adding/deleting IRQ bypass producer KVM: x86: Explicitly treat routing entry type changes as changes KVM: x86: Reset IRTE to host control if *new* route isn't postable KVM: SVM: Allocate IR data using atomic allocation KVM: SVM: Don't update IRTEs if APICv/AVIC is disabled KVM: arm64, x86: make kvm_arch_has_irq_bypass() inline arm64: Rework checks for broken Cavium HW in the PI code
2 parents 7deea56 + 2d71249 commit c405e18

File tree

12 files changed

+116
-94
lines changed

12 files changed

+116
-94
lines changed

arch/arm64/include/asm/kvm_host.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
15881588
#define kvm_has_s1poe(k) \
15891589
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
15901590

1591+
static inline bool kvm_arch_has_irq_bypass(void)
1592+
{
1593+
return true;
1594+
}
1595+
15911596
#endif /* __ARM64_KVM_HOST_H__ */

arch/arm64/include/asm/mmu.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void)
9494
return false;
9595
}
9696

97-
/*
98-
* Systems affected by Cavium erratum 24756 are incompatible
99-
* with KPTI.
100-
*/
101-
if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
102-
extern const struct midr_range cavium_erratum_27456_cpus[];
103-
104-
if (is_midr_in_range_list(cavium_erratum_27456_cpus))
105-
return false;
106-
}
107-
10897
return true;
10998
}
11099

arch/arm64/kernel/cpu_errata.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = {
335335
#endif
336336

337337
#ifdef CONFIG_CAVIUM_ERRATUM_27456
338-
const struct midr_range cavium_erratum_27456_cpus[] = {
338+
static const struct midr_range cavium_erratum_27456_cpus[] = {
339339
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
340340
MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
341341
/* Cavium ThunderX, T81 pass 1.0 */

arch/arm64/kernel/image-vars.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
4747
PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
4848
PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
4949
PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
50-
#ifdef CONFIG_CAVIUM_ERRATUM_27456
51-
PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
52-
PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list);
53-
#endif
5450
PROVIDE(__pi__ctype = _ctype);
5551
PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
5652

arch/arm64/kernel/pi/map_kernel.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt)
207207
dsb(ishst);
208208
}
209209

210+
/*
211+
* PI version of the Cavium Eratum 27456 detection, which makes it
212+
* impossible to use non-global mappings.
213+
*/
214+
static bool __init ng_mappings_allowed(void)
215+
{
216+
static const struct midr_range cavium_erratum_27456_cpus[] __initconst = {
217+
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
218+
MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
219+
/* Cavium ThunderX, T81 pass 1.0 */
220+
MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
221+
{},
222+
};
223+
224+
for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) {
225+
if (midr_is_cpu_model_range(read_cpuid_id(), r->model,
226+
r->rv_min, r->rv_max))
227+
return false;
228+
}
229+
230+
return true;
231+
}
232+
210233
asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
211234
{
212235
static char const chosen_str[] __initconst = "/chosen";
@@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
246269
u64 kaslr_seed = kaslr_early_init(fdt, chosen);
247270

248271
if (kaslr_seed && kaslr_requires_kpti())
249-
arm64_use_ng_mappings = true;
272+
arm64_use_ng_mappings = ng_mappings_allowed();
250273

251274
kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
252275
}

arch/arm64/kvm/arm.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
27432743
return irqchip_in_kernel(kvm);
27442744
}
27452745

2746-
bool kvm_arch_has_irq_bypass(void)
2747-
{
2748-
return true;
2749-
}
2750-
27512746
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
27522747
struct irq_bypass_producer *prod)
27532748
{

arch/x86/include/asm/kvm_host.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <asm/mtrr.h>
3636
#include <asm/msr-index.h>
3737
#include <asm/asm.h>
38+
#include <asm/irq_remapping.h>
3839
#include <asm/kvm_page_track.h>
3940
#include <asm/kvm_vcpu_regs.h>
4041
#include <asm/reboot.h>
@@ -2423,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
24232424
*/
24242425
#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1)
24252426

2427+
static inline bool kvm_arch_has_irq_bypass(void)
2428+
{
2429+
return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
2430+
}
2431+
24262432
#endif /* _ASM_X86_KVM_HOST_H */

arch/x86/kvm/svm/avic.c

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -796,12 +796,15 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
796796
struct amd_svm_iommu_ir *ir;
797797
u64 entry;
798798

799+
if (WARN_ON_ONCE(!pi->ir_data))
800+
return -EINVAL;
801+
799802
/**
800803
* In some cases, the existing irte is updated and re-set,
801804
* so we need to check here if it's already been * added
802805
* to the ir_list.
803806
*/
804-
if (pi->ir_data && (pi->prev_ga_tag != 0)) {
807+
if (pi->prev_ga_tag) {
805808
struct kvm *kvm = svm->vcpu.kvm;
806809
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
807810
struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
@@ -820,7 +823,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
820823
* Allocating new amd_iommu_pi_data, which will get
821824
* add to the per-vcpu ir_list.
822825
*/
823-
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
826+
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
824827
if (!ir) {
825828
ret = -ENOMEM;
826829
goto out;
@@ -896,10 +899,10 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
896899
{
897900
struct kvm_kernel_irq_routing_entry *e;
898901
struct kvm_irq_routing_table *irq_rt;
902+
bool enable_remapped_mode = true;
899903
int idx, ret = 0;
900904

901-
if (!kvm_arch_has_assigned_device(kvm) ||
902-
!irq_remapping_cap(IRQ_POSTING_CAP))
905+
if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
903906
return 0;
904907

905908
pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
@@ -933,6 +936,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
933936
kvm_vcpu_apicv_active(&svm->vcpu)) {
934937
struct amd_iommu_pi_data pi;
935938

939+
enable_remapped_mode = false;
940+
936941
/* Try to enable guest_mode in IRTE */
937942
pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
938943
AVIC_HPA_MASK);
@@ -951,33 +956,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
951956
*/
952957
if (!ret && pi.is_guest_mode)
953958
svm_ir_list_add(svm, &pi);
954-
} else {
955-
/* Use legacy mode in IRTE */
956-
struct amd_iommu_pi_data pi;
957-
958-
/**
959-
* Here, pi is used to:
960-
* - Tell IOMMU to use legacy mode for this interrupt.
961-
* - Retrieve ga_tag of prior interrupt remapping data.
962-
*/
963-
pi.prev_ga_tag = 0;
964-
pi.is_guest_mode = false;
965-
ret = irq_set_vcpu_affinity(host_irq, &pi);
966-
967-
/**
968-
* Check if the posted interrupt was previously
969-
* setup with the guest_mode by checking if the ga_tag
970-
* was cached. If so, we need to clean up the per-vcpu
971-
* ir_list.
972-
*/
973-
if (!ret && pi.prev_ga_tag) {
974-
int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
975-
struct kvm_vcpu *vcpu;
976-
977-
vcpu = kvm_get_vcpu_by_id(kvm, id);
978-
if (vcpu)
979-
svm_ir_list_del(to_svm(vcpu), &pi);
980-
}
981959
}
982960

983961
if (!ret && svm) {
@@ -993,6 +971,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
993971
}
994972

995973
ret = 0;
974+
if (enable_remapped_mode) {
975+
/* Use legacy mode in IRTE */
976+
struct amd_iommu_pi_data pi;
977+
978+
/**
979+
* Here, pi is used to:
980+
* - Tell IOMMU to use legacy mode for this interrupt.
981+
* - Retrieve ga_tag of prior interrupt remapping data.
982+
*/
983+
pi.prev_ga_tag = 0;
984+
pi.is_guest_mode = false;
985+
ret = irq_set_vcpu_affinity(host_irq, &pi);
986+
987+
/**
988+
* Check if the posted interrupt was previously
989+
* setup with the guest_mode by checking if the ga_tag
990+
* was cached. If so, we need to clean up the per-vcpu
991+
* ir_list.
992+
*/
993+
if (!ret && pi.prev_ga_tag) {
994+
int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
995+
struct kvm_vcpu *vcpu;
996+
997+
vcpu = kvm_get_vcpu_by_id(kvm, id);
998+
if (vcpu)
999+
svm_ir_list_del(to_svm(vcpu), &pi);
1000+
}
1001+
}
9961002
out:
9971003
srcu_read_unlock(&kvm->irq_srcu, idx);
9981004
return ret;

arch/x86/kvm/trace.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
#undef TRACE_SYSTEM
1212
#define TRACE_SYSTEM kvm
1313

14+
#ifdef CREATE_TRACE_POINTS
15+
#define tracing_kvm_rip_read(vcpu) ({ \
16+
typeof(vcpu) __vcpu = vcpu; \
17+
__vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \
18+
})
19+
#endif
20+
1421
/*
1522
* Tracepoint for guest mode entry.
1623
*/
@@ -28,7 +35,7 @@ TRACE_EVENT(kvm_entry,
2835

2936
TP_fast_assign(
3037
__entry->vcpu_id = vcpu->vcpu_id;
31-
__entry->rip = kvm_rip_read(vcpu);
38+
__entry->rip = tracing_kvm_rip_read(vcpu);
3239
__entry->immediate_exit = force_immediate_exit;
3340

3441
kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info,
@@ -319,7 +326,7 @@ TRACE_EVENT(name, \
319326
), \
320327
\
321328
TP_fast_assign( \
322-
__entry->guest_rip = kvm_rip_read(vcpu); \
329+
__entry->guest_rip = tracing_kvm_rip_read(vcpu); \
323330
__entry->isa = isa; \
324331
__entry->vcpu_id = vcpu->vcpu_id; \
325332
__entry->requests = READ_ONCE(vcpu->requests); \
@@ -423,7 +430,7 @@ TRACE_EVENT(kvm_page_fault,
423430

424431
TP_fast_assign(
425432
__entry->vcpu_id = vcpu->vcpu_id;
426-
__entry->guest_rip = kvm_rip_read(vcpu);
433+
__entry->guest_rip = tracing_kvm_rip_read(vcpu);
427434
__entry->fault_address = fault_address;
428435
__entry->error_code = error_code;
429436
),

arch/x86/kvm/vmx/posted_intr.c

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
297297
{
298298
struct kvm_kernel_irq_routing_entry *e;
299299
struct kvm_irq_routing_table *irq_rt;
300+
bool enable_remapped_mode = true;
300301
struct kvm_lapic_irq irq;
301302
struct kvm_vcpu *vcpu;
302303
struct vcpu_data vcpu_info;
@@ -335,40 +336,31 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
335336

336337
kvm_set_msi_irq(kvm, e, &irq);
337338
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
338-
!kvm_irq_is_postable(&irq)) {
339-
/*
340-
* Make sure the IRTE is in remapped mode if
341-
* we don't handle it in posted mode.
342-
*/
343-
ret = irq_set_vcpu_affinity(host_irq, NULL);
344-
if (ret < 0) {
345-
printk(KERN_INFO
346-
"failed to back to remapped mode, irq: %u\n",
347-
host_irq);
348-
goto out;
349-
}
350-
339+
!kvm_irq_is_postable(&irq))
351340
continue;
352-
}
353341

354342
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
355343
vcpu_info.vector = irq.vector;
356344

357345
trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
358346
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
359347

360-
if (set)
361-
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
362-
else
363-
ret = irq_set_vcpu_affinity(host_irq, NULL);
348+
if (!set)
349+
continue;
364350

351+
enable_remapped_mode = false;
352+
353+
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
365354
if (ret < 0) {
366355
printk(KERN_INFO "%s: failed to update PI IRTE\n",
367356
__func__);
368357
goto out;
369358
}
370359
}
371360

361+
if (enable_remapped_mode)
362+
ret = irq_set_vcpu_affinity(host_irq, NULL);
363+
372364
ret = 0;
373365
out:
374366
srcu_read_unlock(&kvm->irq_srcu, idx);

0 commit comments

Comments
 (0)