Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 89a5881

Browse files
sean-jcbonzini
authored andcommitted
KVM: x86: Drop support for hand tuning APIC timer advancement from userspace
Remove support for specifying a static local APIC timer advancement value, and instead present a read-only boolean parameter to let userspace enable or disable KVM's dynamic APIC timer advancement. Realistically, it's all but impossible for userspace to specify an advancement that is more precise than what KVM's adaptive tuning can provide. E.g. a static value needs to be tuned for the exact hardware and kernel, and if KVM is using hrtimers, likely requires additional tuning for the exact configuration of the entire system. Dropping support for a userspace provided value also fixes several flaws in the interface. E.g. KVM interprets a negative value other than -1 as a large advancement, toggling between a negative and positive value yields unpredictable behavior as vCPUs will switch from dynamic to static advancement, changing the advancement in the middle of VM creation can result in different values for vCPUs within a VM, etc. Those flaws are mostly fixable, but there's almost no justification for taking on yet more complexity (it's minimal complexity, but still non-zero). The only arguments against using KVM's adaptive tuning is if a setup needs a higher maximum, or if the adjustments are too reactive, but those are arguments for letting userspace control the absolute max advancement and the granularity of each adjustment, e.g. similar to how KVM provides knobs for halt polling. Link: https://lore.kernel.org/all/20240520115334.852510-1-zhoushuling@huawei.com Cc: Shuling Zhou <zhoushuling@huawei.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Message-ID: <20240522010304.1650603-1-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent b7e4be0 commit 89a5881

File tree

3 files changed

+23
-29
lines changed

3 files changed

+23
-29
lines changed

arch/x86/kvm/lapic.c

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,17 @@
5959
#define MAX_APIC_VECTOR 256
6060
#define APIC_VECTORS_PER_REG 32
6161

62-
static bool lapic_timer_advance_dynamic __read_mostly;
62+
/*
63+
* Enable local APIC timer advancement (tscdeadline mode only) with adaptive
64+
* tuning. When enabled, KVM programs the host timer event to fire early, i.e.
65+
* before the deadline expires, to account for the delay between taking the
66+
* VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
67+
* the guest, i.e. so that the interrupt arrives in the guest with minimal
68+
* latency relative to the deadline programmed by the guest.
69+
*/
70+
static bool lapic_timer_advance __read_mostly = true;
71+
module_param(lapic_timer_advance, bool, 0444);
72+
6373
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
6474
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
6575
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
@@ -1854,16 +1864,14 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
18541864
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
18551865
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
18561866

1857-
if (lapic_timer_advance_dynamic) {
1858-
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1859-
/*
1860-
* If the timer fired early, reread the TSC to account for the
1861-
* overhead of the above adjustment to avoid waiting longer
1862-
* than is necessary.
1863-
*/
1864-
if (guest_tsc < tsc_deadline)
1865-
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1866-
}
1867+
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1868+
1869+
/*
1870+
* If the timer fired early, reread the TSC to account for the overhead
1871+
* of the above adjustment to avoid waiting longer than is necessary.
1872+
*/
1873+
if (guest_tsc < tsc_deadline)
1874+
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
18671875

18681876
if (guest_tsc < tsc_deadline)
18691877
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
@@ -2812,7 +2820,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
28122820
return HRTIMER_NORESTART;
28132821
}
28142822

2815-
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2823+
int kvm_create_lapic(struct kvm_vcpu *vcpu)
28162824
{
28172825
struct kvm_lapic *apic;
28182826

@@ -2845,13 +2853,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
28452853
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
28462854
HRTIMER_MODE_ABS_HARD);
28472855
apic->lapic_timer.timer.function = apic_timer_fn;
2848-
if (timer_advance_ns == -1) {
2856+
if (lapic_timer_advance)
28492857
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2850-
lapic_timer_advance_dynamic = true;
2851-
} else {
2852-
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2853-
lapic_timer_advance_dynamic = false;
2854-
}
28552858

28562859
/*
28572860
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing

arch/x86/kvm/lapic.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ struct kvm_lapic {
8585

8686
struct dest_map;
8787

88-
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
88+
int kvm_create_lapic(struct kvm_vcpu *vcpu);
8989
void kvm_free_lapic(struct kvm_vcpu *vcpu);
9090

9191
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);

arch/x86/kvm/x86.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444);
164164
static u32 __read_mostly tsc_tolerance_ppm = 250;
165165
module_param(tsc_tolerance_ppm, uint, 0644);
166166

167-
/*
168-
* lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
169-
* adaptive tuning starting from default advancement of 1000ns. '0' disables
170-
* advancement entirely. Any other value is used as-is and disables adaptive
171-
* tuning, i.e. allows privileged userspace to set an exact advancement time.
172-
*/
173-
static int __read_mostly lapic_timer_advance_ns = -1;
174-
module_param(lapic_timer_advance_ns, int, 0644);
175-
176167
static bool __read_mostly vector_hashing = true;
177168
module_param(vector_hashing, bool, 0444);
178169

@@ -12169,7 +12160,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
1216912160
if (r < 0)
1217012161
return r;
1217112162

12172-
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
12163+
r = kvm_create_lapic(vcpu);
1217312164
if (r < 0)
1217412165
goto fail_mmu_destroy;
1217512166

0 commit comments

Comments
 (0)