26
26
/* Initial target buffer size */
27
27
#define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
28
28
29
+ static int
30
+ virtiovf_read_device_context_chunk (struct virtiovf_migration_file * migf ,
31
+ u32 ctx_size );
32
+
29
33
static struct page *
30
34
virtiovf_get_migration_page (struct virtiovf_data_buffer * buf ,
31
35
unsigned long offset )
@@ -159,6 +163,41 @@ virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
159
163
VIRTIO_RESOURCE_OBJ_DEV_PARTS , obj_id );
160
164
}
161
165
166
+ static struct virtiovf_data_buffer *
167
+ virtiovf_get_data_buffer (struct virtiovf_migration_file * migf , size_t length )
168
+ {
169
+ struct virtiovf_data_buffer * buf , * temp_buf ;
170
+ struct list_head free_list ;
171
+
172
+ INIT_LIST_HEAD (& free_list );
173
+
174
+ spin_lock_irq (& migf -> list_lock );
175
+ list_for_each_entry_safe (buf , temp_buf , & migf -> avail_list , buf_elm ) {
176
+ list_del_init (& buf -> buf_elm );
177
+ if (buf -> allocated_length >= length ) {
178
+ spin_unlock_irq (& migf -> list_lock );
179
+ goto found ;
180
+ }
181
+ /*
182
+ * Prevent holding redundant buffers. Put in a free
183
+ * list and call at the end not under the spin lock
184
+ * (&migf->list_lock) to minimize its scope usage.
185
+ */
186
+ list_add (& buf -> buf_elm , & free_list );
187
+ }
188
+ spin_unlock_irq (& migf -> list_lock );
189
+ buf = virtiovf_alloc_data_buffer (migf , length );
190
+
191
+ found :
192
+ while ((temp_buf = list_first_entry_or_null (& free_list ,
193
+ struct virtiovf_data_buffer , buf_elm ))) {
194
+ list_del (& temp_buf -> buf_elm );
195
+ virtiovf_free_data_buffer (temp_buf );
196
+ }
197
+
198
+ return buf ;
199
+ }
200
+
162
201
static void virtiovf_clean_migf_resources (struct virtiovf_migration_file * migf )
163
202
{
164
203
struct virtiovf_data_buffer * entry ;
@@ -345,6 +384,7 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
345
384
{
346
385
struct virtiovf_migration_file * migf = filp -> private_data ;
347
386
struct virtiovf_data_buffer * vhca_buf ;
387
+ bool first_loop_call = true;
348
388
bool end_of_data ;
349
389
ssize_t done = 0 ;
350
390
@@ -362,6 +402,19 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
362
402
ssize_t count ;
363
403
364
404
vhca_buf = virtiovf_get_data_buff_from_pos (migf , * pos , & end_of_data );
405
+ if (first_loop_call ) {
406
+ first_loop_call = false;
407
+ /* Temporary end of file as part of PRE_COPY */
408
+ if (end_of_data && migf -> state == VIRTIOVF_MIGF_STATE_PRECOPY ) {
409
+ done = - ENOMSG ;
410
+ goto out_unlock ;
411
+ }
412
+ if (end_of_data && migf -> state != VIRTIOVF_MIGF_STATE_COMPLETE ) {
413
+ done = - EINVAL ;
414
+ goto out_unlock ;
415
+ }
416
+ }
417
+
365
418
if (end_of_data )
366
419
goto out_unlock ;
367
420
@@ -383,9 +436,101 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
383
436
return done ;
384
437
}
385
438
439
+ static long virtiovf_precopy_ioctl (struct file * filp , unsigned int cmd ,
440
+ unsigned long arg )
441
+ {
442
+ struct virtiovf_migration_file * migf = filp -> private_data ;
443
+ struct virtiovf_pci_core_device * virtvdev = migf -> virtvdev ;
444
+ struct vfio_precopy_info info = {};
445
+ loff_t * pos = & filp -> f_pos ;
446
+ bool end_of_data = false;
447
+ unsigned long minsz ;
448
+ u32 ctx_size = 0 ;
449
+ int ret ;
450
+
451
+ if (cmd != VFIO_MIG_GET_PRECOPY_INFO )
452
+ return - ENOTTY ;
453
+
454
+ minsz = offsetofend (struct vfio_precopy_info , dirty_bytes );
455
+ if (copy_from_user (& info , (void __user * )arg , minsz ))
456
+ return - EFAULT ;
457
+
458
+ if (info .argsz < minsz )
459
+ return - EINVAL ;
460
+
461
+ mutex_lock (& virtvdev -> state_mutex );
462
+ if (virtvdev -> mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
463
+ virtvdev -> mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P ) {
464
+ ret = - EINVAL ;
465
+ goto err_state_unlock ;
466
+ }
467
+
468
+ /*
469
+ * The virtio specification does not include a PRE_COPY concept.
470
+ * Since we can expect the data to remain the same for a certain period,
471
+ * we use a rate limiter mechanism before making a call to the device.
472
+ */
473
+ if (__ratelimit (& migf -> pre_copy_rl_state )) {
474
+
475
+ ret = virtio_pci_admin_dev_parts_metadata_get (virtvdev -> core_device .pdev ,
476
+ VIRTIO_RESOURCE_OBJ_DEV_PARTS , migf -> obj_id ,
477
+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE ,
478
+ & ctx_size );
479
+ if (ret )
480
+ goto err_state_unlock ;
481
+ }
482
+
483
+ mutex_lock (& migf -> lock );
484
+ if (migf -> state == VIRTIOVF_MIGF_STATE_ERROR ) {
485
+ ret = - ENODEV ;
486
+ goto err_migf_unlock ;
487
+ }
488
+
489
+ if (migf -> pre_copy_initial_bytes > * pos ) {
490
+ info .initial_bytes = migf -> pre_copy_initial_bytes - * pos ;
491
+ } else {
492
+ info .dirty_bytes = migf -> max_pos - * pos ;
493
+ if (!info .dirty_bytes )
494
+ end_of_data = true;
495
+ info .dirty_bytes += ctx_size ;
496
+ }
497
+
498
+ if (!end_of_data || !ctx_size ) {
499
+ mutex_unlock (& migf -> lock );
500
+ goto done ;
501
+ }
502
+
503
+ mutex_unlock (& migf -> lock );
504
+ /*
505
+ * We finished transferring the current state and the device has a
506
+ * dirty state, read a new state.
507
+ */
508
+ ret = virtiovf_read_device_context_chunk (migf , ctx_size );
509
+ if (ret )
510
+ /*
511
+ * The machine is running, and context size could be grow, so no reason to mark
512
+ * the device state as VIRTIOVF_MIGF_STATE_ERROR.
513
+ */
514
+ goto err_state_unlock ;
515
+
516
+ done :
517
+ virtiovf_state_mutex_unlock (virtvdev );
518
+ if (copy_to_user ((void __user * )arg , & info , minsz ))
519
+ return - EFAULT ;
520
+ return 0 ;
521
+
522
+ err_migf_unlock :
523
+ mutex_unlock (& migf -> lock );
524
+ err_state_unlock :
525
+ virtiovf_state_mutex_unlock (virtvdev );
526
+ return ret ;
527
+ }
528
+
386
529
static const struct file_operations virtiovf_save_fops = {
387
530
.owner = THIS_MODULE ,
388
531
.read = virtiovf_save_read ,
532
+ .unlocked_ioctl = virtiovf_precopy_ioctl ,
533
+ .compat_ioctl = compat_ptr_ioctl ,
389
534
.release = virtiovf_release_file ,
390
535
};
391
536
@@ -429,7 +574,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
429
574
int nent ;
430
575
int ret ;
431
576
432
- buf = virtiovf_alloc_data_buffer (migf , ctx_size );
577
+ buf = virtiovf_get_data_buffer (migf , ctx_size );
433
578
if (IS_ERR (buf ))
434
579
return PTR_ERR (buf );
435
580
@@ -464,7 +609,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
464
609
goto out ;
465
610
466
611
buf -> length = res_size ;
467
- header_buf = virtiovf_alloc_data_buffer (migf ,
612
+ header_buf = virtiovf_get_data_buffer (migf ,
468
613
sizeof (struct virtiovf_migration_header ));
469
614
if (IS_ERR (header_buf )) {
470
615
ret = PTR_ERR (header_buf );
@@ -489,8 +634,43 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
489
634
return ret ;
490
635
}
491
636
637
+ static int
638
+ virtiovf_pci_save_device_final_data (struct virtiovf_pci_core_device * virtvdev )
639
+ {
640
+ struct virtiovf_migration_file * migf = virtvdev -> saving_migf ;
641
+ u32 ctx_size ;
642
+ int ret ;
643
+
644
+ if (migf -> state == VIRTIOVF_MIGF_STATE_ERROR )
645
+ return - ENODEV ;
646
+
647
+ ret = virtio_pci_admin_dev_parts_metadata_get (virtvdev -> core_device .pdev ,
648
+ VIRTIO_RESOURCE_OBJ_DEV_PARTS , migf -> obj_id ,
649
+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE ,
650
+ & ctx_size );
651
+ if (ret )
652
+ goto err ;
653
+
654
+ if (!ctx_size ) {
655
+ ret = - EINVAL ;
656
+ goto err ;
657
+ }
658
+
659
+ ret = virtiovf_read_device_context_chunk (migf , ctx_size );
660
+ if (ret )
661
+ goto err ;
662
+
663
+ migf -> state = VIRTIOVF_MIGF_STATE_COMPLETE ;
664
+ return 0 ;
665
+
666
+ err :
667
+ migf -> state = VIRTIOVF_MIGF_STATE_ERROR ;
668
+ return ret ;
669
+ }
670
+
492
671
static struct virtiovf_migration_file *
493
- virtiovf_pci_save_device_data (struct virtiovf_pci_core_device * virtvdev )
672
+ virtiovf_pci_save_device_data (struct virtiovf_pci_core_device * virtvdev ,
673
+ bool pre_copy )
494
674
{
495
675
struct virtiovf_migration_file * migf ;
496
676
u32 ctx_size ;
@@ -541,6 +721,18 @@ virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev)
541
721
if (ret )
542
722
goto out_clean ;
543
723
724
+ if (pre_copy ) {
725
+ migf -> pre_copy_initial_bytes = migf -> max_pos ;
726
+ /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
727
+ ratelimit_state_init (& migf -> pre_copy_rl_state , 1 * HZ , 1 );
728
+ /* Prevent any rate messages upon its usage */
729
+ ratelimit_set_flags (& migf -> pre_copy_rl_state ,
730
+ RATELIMIT_MSG_ON_RELEASE );
731
+ migf -> state = VIRTIOVF_MIGF_STATE_PRECOPY ;
732
+ } else {
733
+ migf -> state = VIRTIOVF_MIGF_STATE_COMPLETE ;
734
+ }
735
+
544
736
return migf ;
545
737
546
738
out_clean :
@@ -950,15 +1142,17 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
950
1142
return NULL ;
951
1143
}
952
1144
953
- if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P ) {
1145
+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P ) ||
1146
+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P )) {
954
1147
ret = virtio_pci_admin_mode_set (virtvdev -> core_device .pdev ,
955
1148
BIT (VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED ));
956
1149
if (ret )
957
1150
return ERR_PTR (ret );
958
1151
return NULL ;
959
1152
}
960
1153
961
- if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING ) {
1154
+ if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING ) ||
1155
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY )) {
962
1156
ret = virtio_pci_admin_mode_set (virtvdev -> core_device .pdev , 0 );
963
1157
if (ret )
964
1158
return ERR_PTR (ret );
@@ -968,15 +1162,17 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
968
1162
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY ) {
969
1163
struct virtiovf_migration_file * migf ;
970
1164
971
- migf = virtiovf_pci_save_device_data (virtvdev );
1165
+ migf = virtiovf_pci_save_device_data (virtvdev , false );
972
1166
if (IS_ERR (migf ))
973
1167
return ERR_CAST (migf );
974
1168
get_file (migf -> filp );
975
1169
virtvdev -> saving_migf = migf ;
976
1170
return migf -> filp ;
977
1171
}
978
1172
979
- if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP ) {
1173
+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP ) ||
1174
+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING ) ||
1175
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P )) {
980
1176
virtiovf_disable_fds (virtvdev );
981
1177
return NULL ;
982
1178
}
@@ -997,6 +1193,24 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
997
1193
return NULL ;
998
1194
}
999
1195
1196
+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY ) ||
1197
+ (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
1198
+ new == VFIO_DEVICE_STATE_PRE_COPY_P2P )) {
1199
+ struct virtiovf_migration_file * migf ;
1200
+
1201
+ migf = virtiovf_pci_save_device_data (virtvdev , true);
1202
+ if (IS_ERR (migf ))
1203
+ return ERR_CAST (migf );
1204
+ get_file (migf -> filp );
1205
+ virtvdev -> saving_migf = migf ;
1206
+ return migf -> filp ;
1207
+ }
1208
+
1209
+ if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY ) {
1210
+ ret = virtiovf_pci_save_device_final_data (virtvdev );
1211
+ return ret ? ERR_PTR (ret ) : NULL ;
1212
+ }
1213
+
1000
1214
/*
1001
1215
* vfio_mig_get_next_state() does not use arcs other than the above
1002
1216
*/
@@ -1101,7 +1315,8 @@ void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
1101
1315
spin_lock_init (& virtvdev -> reset_lock );
1102
1316
virtvdev -> core_device .vdev .migration_flags =
1103
1317
VFIO_MIGRATION_STOP_COPY |
1104
- VFIO_MIGRATION_P2P ;
1318
+ VFIO_MIGRATION_P2P |
1319
+ VFIO_MIGRATION_PRE_COPY ;
1105
1320
virtvdev -> core_device .vdev .mig_ops = & virtvdev_pci_mig_ops ;
1106
1321
}
1107
1322
0 commit comments