@@ -134,9 +134,23 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
134
134
{
135
135
struct kvm_vcpu * vcpu = container_of (timer , struct kvm_vcpu ,
136
136
arch .xen .timer );
137
+ struct kvm_xen_evtchn e ;
138
+ int rc ;
139
+
137
140
if (atomic_read (& vcpu -> arch .xen .timer_pending ))
138
141
return HRTIMER_NORESTART ;
139
142
143
+ e .vcpu_id = vcpu -> vcpu_id ;
144
+ e .vcpu_idx = vcpu -> vcpu_idx ;
145
+ e .port = vcpu -> arch .xen .timer_virq ;
146
+ e .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ;
147
+
148
+ rc = kvm_xen_set_evtchn_fast (& e , vcpu -> kvm );
149
+ if (rc != - EWOULDBLOCK ) {
150
+ vcpu -> arch .xen .timer_expires = 0 ;
151
+ return HRTIMER_NORESTART ;
152
+ }
153
+
140
154
atomic_inc (& vcpu -> arch .xen .timer_pending );
141
155
kvm_make_request (KVM_REQ_UNBLOCK , vcpu );
142
156
kvm_vcpu_kick (vcpu );
@@ -146,6 +160,14 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
146
160
147
161
static void kvm_xen_start_timer (struct kvm_vcpu * vcpu , u64 guest_abs , s64 delta_ns )
148
162
{
163
+ /*
164
+ * Avoid races with the old timer firing. Checking timer_expires
165
+ * to avoid calling hrtimer_cancel() will only have false positives
166
+ * so is fine.
167
+ */
168
+ if (vcpu -> arch .xen .timer_expires )
169
+ hrtimer_cancel (& vcpu -> arch .xen .timer );
170
+
149
171
atomic_set (& vcpu -> arch .xen .timer_pending , 0 );
150
172
vcpu -> arch .xen .timer_expires = guest_abs ;
151
173
@@ -1019,9 +1041,36 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
1019
1041
break ;
1020
1042
1021
1043
case KVM_XEN_VCPU_ATTR_TYPE_TIMER :
1044
+ /*
1045
+ * Ensure a consistent snapshot of state is captured, with a
1046
+ * timer either being pending, or the event channel delivered
1047
+ * to the corresponding bit in the shared_info. Not still
1048
+ * lurking in the timer_pending flag for deferred delivery.
1049
+ * Purely as an optimisation, if the timer_expires field is
1050
+ * zero, that means the timer isn't active (or even in the
1051
+ * timer_pending flag) and there is no need to cancel it.
1052
+ */
1053
+ if (vcpu -> arch .xen .timer_expires ) {
1054
+ hrtimer_cancel (& vcpu -> arch .xen .timer );
1055
+ kvm_xen_inject_timer_irqs (vcpu );
1056
+ }
1057
+
1022
1058
data -> u .timer .port = vcpu -> arch .xen .timer_virq ;
1023
1059
data -> u .timer .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ;
1024
1060
data -> u .timer .expires_ns = vcpu -> arch .xen .timer_expires ;
1061
+
1062
+ /*
1063
+ * The hrtimer may trigger and raise the IRQ immediately,
1064
+ * while the returned state causes it to be set up and
1065
+ * raised again on the destination system after migration.
1066
+ * That's fine, as the guest won't even have had a chance
1067
+ * to run and handle the interrupt. Asserting an already
1068
+ * pending event channel is idempotent.
1069
+ */
1070
+ if (vcpu -> arch .xen .timer_expires )
1071
+ hrtimer_start_expires (& vcpu -> arch .xen .timer ,
1072
+ HRTIMER_MODE_ABS_HARD );
1073
+
1025
1074
r = 0 ;
1026
1075
break ;
1027
1076
@@ -1374,12 +1423,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
1374
1423
return true;
1375
1424
}
1376
1425
1426
+ /* A delta <= 0 results in an immediate callback, which is what we want */
1377
1427
delta = oneshot .timeout_abs_ns - get_kvmclock_ns (vcpu -> kvm );
1378
- if ((oneshot .flags & VCPU_SSHOTTMR_future ) && delta < 0 ) {
1379
- * r = - ETIME ;
1380
- return true;
1381
- }
1382
-
1383
1428
kvm_xen_start_timer (vcpu , oneshot .timeout_abs_ns , delta );
1384
1429
* r = 0 ;
1385
1430
return true;
0 commit comments