@@ -4111,48 +4111,48 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
4111
4111
4112
4112
mutex_lock (& kvm -> lock );
4113
4113
4114
- #ifdef CONFIG_LOCKDEP
4115
- /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */
4116
- mutex_lock (& vcpu -> mutex );
4117
- mutex_unlock (& vcpu -> mutex );
4118
- #endif
4119
-
4120
4114
if (kvm_get_vcpu_by_id (kvm , id )) {
4121
4115
r = - EEXIST ;
4122
4116
goto unlock_vcpu_destroy ;
4123
4117
}
4124
4118
4125
4119
vcpu -> vcpu_idx = atomic_read (& kvm -> online_vcpus );
4126
- r = xa_reserve (& kvm -> vcpu_array , vcpu -> vcpu_idx , GFP_KERNEL_ACCOUNT );
4120
+ r = xa_insert (& kvm -> vcpu_array , vcpu -> vcpu_idx , vcpu , GFP_KERNEL_ACCOUNT );
4121
+ WARN_ON_ONCE (r == - EBUSY );
4127
4122
if (r )
4128
4123
goto unlock_vcpu_destroy ;
4129
4124
4130
- /* Now it's all set up, let userspace reach it */
4125
+ /*
4126
+ * Now it's all set up, let userspace reach it. Grab the vCPU's mutex
4127
+ * so that userspace can't invoke vCPU ioctl()s until the vCPU is fully
4128
+ * visible (per online_vcpus), e.g. so that KVM doesn't get tricked
4129
+ * into a NULL-pointer dereference because KVM thinks the _current_
4130
+ * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep
4131
+ * knows it's taken *inside* kvm->lock.
4132
+ */
4133
+ mutex_lock (& vcpu -> mutex );
4131
4134
kvm_get_kvm (kvm );
4132
4135
r = create_vcpu_fd (vcpu );
4133
4136
if (r < 0 )
4134
- goto kvm_put_xa_release ;
4135
-
4136
- if (KVM_BUG_ON (xa_store (& kvm -> vcpu_array , vcpu -> vcpu_idx , vcpu , 0 ), kvm )) {
4137
- r = - EINVAL ;
4138
- goto kvm_put_xa_release ;
4139
- }
4137
+ goto kvm_put_xa_erase ;
4140
4138
4141
4139
/*
4142
4140
* Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu
4143
4141
* pointer before kvm->online_vcpu's incremented value.
4144
4142
*/
4145
4143
smp_wmb ();
4146
4144
atomic_inc (& kvm -> online_vcpus );
4145
+ mutex_unlock (& vcpu -> mutex );
4147
4146
4148
4147
mutex_unlock (& kvm -> lock );
4149
4148
kvm_arch_vcpu_postcreate (vcpu );
4150
4149
kvm_create_vcpu_debugfs (vcpu );
4151
4150
return r ;
4152
4151
4153
- kvm_put_xa_release :
4152
+ kvm_put_xa_erase :
4153
+ mutex_unlock (& vcpu -> mutex );
4154
4154
kvm_put_kvm_no_destroy (kvm );
4155
- xa_release (& kvm -> vcpu_array , vcpu -> vcpu_idx );
4155
+ xa_erase (& kvm -> vcpu_array , vcpu -> vcpu_idx );
4156
4156
unlock_vcpu_destroy :
4157
4157
mutex_unlock (& kvm -> lock );
4158
4158
kvm_dirty_ring_free (& vcpu -> dirty_ring );
@@ -4277,6 +4277,33 @@ static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
4277
4277
}
4278
4278
#endif
4279
4279
4280
+ static int kvm_wait_for_vcpu_online (struct kvm_vcpu * vcpu )
4281
+ {
4282
+ struct kvm * kvm = vcpu -> kvm ;
4283
+
4284
+ /*
4285
+ * In practice, this happy path will always be taken, as a well-behaved
4286
+ * VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns.
4287
+ */
4288
+ if (likely (vcpu -> vcpu_idx < atomic_read (& kvm -> online_vcpus )))
4289
+ return 0 ;
4290
+
4291
+ /*
4292
+ * Acquire and release the vCPU's mutex to wait for vCPU creation to
4293
+ * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU
4294
+ * is fully online).
4295
+ */
4296
+ if (mutex_lock_killable (& vcpu -> mutex ))
4297
+ return - EINTR ;
4298
+
4299
+ mutex_unlock (& vcpu -> mutex );
4300
+
4301
+ if (WARN_ON_ONCE (!kvm_get_vcpu (kvm , vcpu -> vcpu_idx )))
4302
+ return - EIO ;
4303
+
4304
+ return 0 ;
4305
+ }
4306
+
4280
4307
static long kvm_vcpu_ioctl (struct file * filp ,
4281
4308
unsigned int ioctl , unsigned long arg )
4282
4309
{
@@ -4292,6 +4319,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
4292
4319
if (unlikely (_IOC_TYPE (ioctl ) != KVMIO ))
4293
4320
return - EINVAL ;
4294
4321
4322
+ /*
4323
+ * Wait for the vCPU to be online before handling the ioctl(), as KVM
4324
+ * assumes the vCPU is reachable via vcpu_array, i.e. may dereference
4325
+ * a NULL pointer if userspace invokes an ioctl() before KVM is ready.
4326
+ */
4327
+ r = kvm_wait_for_vcpu_online (vcpu );
4328
+ if (r )
4329
+ return r ;
4330
+
4295
4331
/*
4296
4332
* Some architectures have vcpu ioctls that are asynchronous to vcpu
4297
4333
* execution; mutex_lock() would break them.
0 commit comments