Skip to content

Commit 821b8f6

Browse files
Yishai Hadasawilliam
authored andcommitted
vfio/mlx5: Enforce PRE_COPY support
Enable live migration only once the firmware supports PRE_COPY. PRE_COPY has been supported by the firmware for a long time already [1] and is required to achieve a low downtime upon live migration. This lets us clean up some old code that is not applicable those days while PRE_COPY is fully supported by the firmware. [1] The minimum firmware version that supports PRE_COPY is 28.36.1010, it was released in January 2023. No firmware without PRE_COPY support ever available to users. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20240306105624.114830-1-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent 626f534 commit 821b8f6

File tree

3 files changed

+71
-127
lines changed

3 files changed

+71
-127
lines changed

drivers/vfio/pci/mlx5/cmd.c

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
233233
if (!MLX5_CAP_GEN(mvdev->mdev, migration))
234234
goto end;
235235

236+
if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
237+
MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
238+
goto end;
239+
236240
mvdev->vf_id = pci_iov_vf_id(pdev);
237241
if (mvdev->vf_id < 0)
238242
goto end;
@@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
262266
mvdev->migrate_cap = 1;
263267
mvdev->core_device.vdev.migration_flags =
264268
VFIO_MIGRATION_STOP_COPY |
265-
VFIO_MIGRATION_P2P;
269+
VFIO_MIGRATION_P2P |
270+
VFIO_MIGRATION_PRE_COPY;
271+
266272
mvdev->core_device.vdev.mig_ops = mig_ops;
267273
init_completion(&mvdev->tracker_comp);
268274
if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
269275
mvdev->core_device.vdev.log_ops = log_ops;
270276

271-
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
272-
MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
273-
mvdev->core_device.vdev.migration_flags |=
274-
VFIO_MIGRATION_PRE_COPY;
275-
276277
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
277278
mvdev->chunk_mode = 1;
278279

@@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
414415
kfree(buf);
415416
}
416417

418+
static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
419+
unsigned int npages)
420+
{
421+
unsigned int to_alloc = npages;
422+
struct page **page_list;
423+
unsigned long filled;
424+
unsigned int to_fill;
425+
int ret;
426+
427+
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
428+
page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
429+
if (!page_list)
430+
return -ENOMEM;
431+
432+
do {
433+
filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
434+
page_list);
435+
if (!filled) {
436+
ret = -ENOMEM;
437+
goto err;
438+
}
439+
to_alloc -= filled;
440+
ret = sg_alloc_append_table_from_pages(
441+
&buf->table, page_list, filled, 0,
442+
filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
443+
GFP_KERNEL_ACCOUNT);
444+
445+
if (ret)
446+
goto err;
447+
buf->allocated_length += filled * PAGE_SIZE;
448+
/* clean input for another bulk allocation */
449+
memset(page_list, 0, filled * sizeof(*page_list));
450+
to_fill = min_t(unsigned int, to_alloc,
451+
PAGE_SIZE / sizeof(*page_list));
452+
} while (to_alloc > 0);
453+
454+
kvfree(page_list);
455+
return 0;
456+
457+
err:
458+
kvfree(page_list);
459+
return ret;
460+
}
461+
417462
struct mlx5_vhca_data_buffer *
418463
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
419464
size_t length,
@@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
680725
goto err_out;
681726
}
682727

683-
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
684-
if (async_data->stop_copy_chunk) {
685-
u8 header_idx = buf->stop_copy_chunk_num ?
686-
buf->stop_copy_chunk_num - 1 : 0;
728+
if (async_data->stop_copy_chunk) {
729+
u8 header_idx = buf->stop_copy_chunk_num ?
730+
buf->stop_copy_chunk_num - 1 : 0;
687731

688-
header_buf = migf->buf_header[header_idx];
689-
migf->buf_header[header_idx] = NULL;
690-
}
732+
header_buf = migf->buf_header[header_idx];
733+
migf->buf_header[header_idx] = NULL;
734+
}
691735

692-
if (!header_buf) {
693-
header_buf = mlx5vf_get_data_buffer(migf,
694-
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
695-
if (IS_ERR(header_buf)) {
696-
err = PTR_ERR(header_buf);
697-
goto err_free;
698-
}
736+
if (!header_buf) {
737+
header_buf = mlx5vf_get_data_buffer(migf,
738+
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
739+
if (IS_ERR(header_buf)) {
740+
err = PTR_ERR(header_buf);
741+
goto err_free;
699742
}
700743
}
701744

drivers/vfio/pci/mlx5/cmd.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
#include <linux/mlx5/cq.h>
1414
#include <linux/mlx5/qp.h>
1515

16-
#define MLX5VF_PRE_COPY_SUPP(mvdev) \
17-
((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
18-
1916
enum mlx5_vf_migf_state {
2017
MLX5_MIGF_STATE_ERROR = 1,
2118
MLX5_MIGF_STATE_PRE_COPY_ERROR,
@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
2522
};
2623

2724
enum mlx5_vf_load_state {
28-
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
2925
MLX5_VF_LOAD_STATE_READ_HEADER,
3026
MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
3127
MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
@@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
228224
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
229225
size_t length, enum dma_data_direction dma_dir);
230226
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
231-
int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
232-
unsigned int npages);
233227
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
234228
unsigned long offset);
235229
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);

drivers/vfio/pci/mlx5/main.c

Lines changed: 8 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
6565
return NULL;
6666
}
6767

68-
int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
69-
unsigned int npages)
70-
{
71-
unsigned int to_alloc = npages;
72-
struct page **page_list;
73-
unsigned long filled;
74-
unsigned int to_fill;
75-
int ret;
76-
77-
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
78-
page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
79-
if (!page_list)
80-
return -ENOMEM;
81-
82-
do {
83-
filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
84-
page_list);
85-
if (!filled) {
86-
ret = -ENOMEM;
87-
goto err;
88-
}
89-
to_alloc -= filled;
90-
ret = sg_alloc_append_table_from_pages(
91-
&buf->table, page_list, filled, 0,
92-
filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
93-
GFP_KERNEL_ACCOUNT);
94-
95-
if (ret)
96-
goto err;
97-
buf->allocated_length += filled * PAGE_SIZE;
98-
/* clean input for another bulk allocation */
99-
memset(page_list, 0, filled * sizeof(*page_list));
100-
to_fill = min_t(unsigned int, to_alloc,
101-
PAGE_SIZE / sizeof(*page_list));
102-
} while (to_alloc > 0);
103-
104-
kvfree(page_list);
105-
return 0;
106-
107-
err:
108-
kvfree(page_list);
109-
return ret;
110-
}
111-
11268
static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
11369
{
11470
mutex_lock(&migf->lock);
@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
777733
return 0;
778734
}
779735

780-
static int
781-
mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
782-
loff_t requested_length,
783-
const char __user **buf, size_t *len,
784-
loff_t *pos, ssize_t *done)
785-
{
786-
int ret;
787-
788-
if (requested_length > MAX_LOAD_SIZE)
789-
return -ENOMEM;
790-
791-
if (vhca_buf->allocated_length < requested_length) {
792-
ret = mlx5vf_add_migration_pages(
793-
vhca_buf,
794-
DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
795-
PAGE_SIZE));
796-
if (ret)
797-
return ret;
798-
}
799-
800-
while (*len) {
801-
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
802-
done);
803-
if (ret)
804-
return ret;
805-
}
806-
807-
return 0;
808-
}
809-
810736
static ssize_t
811737
mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
812738
struct mlx5_vhca_data_buffer *vhca_buf,
@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
1038964
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
1039965
break;
1040966
}
1041-
case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
1042-
ret = mlx5vf_resume_read_image_no_header(vhca_buf,
1043-
requested_length,
1044-
&buf, &len, pos, &done);
1045-
if (ret)
1046-
goto out_unlock;
1047-
break;
1048967
case MLX5_VF_LOAD_STATE_READ_IMAGE:
1049968
ret = mlx5vf_resume_read_image(migf, vhca_buf,
1050969
migf->record_size,
@@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
11141033
}
11151034

11161035
migf->buf[0] = buf;
1117-
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
1118-
buf = mlx5vf_alloc_data_buffer(migf,
1119-
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
1120-
if (IS_ERR(buf)) {
1121-
ret = PTR_ERR(buf);
1122-
goto out_buf;
1123-
}
1124-
1125-
migf->buf_header[0] = buf;
1126-
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
1127-
} else {
1128-
/* Initial state will be to read the image */
1129-
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
1036+
buf = mlx5vf_alloc_data_buffer(migf,
1037+
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
1038+
if (IS_ERR(buf)) {
1039+
ret = PTR_ERR(buf);
1040+
goto out_buf;
11301041
}
11311042

1043+
migf->buf_header[0] = buf;
1044+
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
1045+
11321046
stream_open(migf->filp->f_inode, migf->filp);
11331047
mutex_init(&migf->lock);
11341048
INIT_LIST_HEAD(&migf->buf_list);
@@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
12621176
}
12631177

12641178
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
1265-
if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
1266-
ret = mlx5vf_cmd_load_vhca_state(mvdev,
1267-
mvdev->resuming_migf,
1268-
mvdev->resuming_migf->buf[0]);
1269-
if (ret)
1270-
return ERR_PTR(ret);
1271-
}
12721179
mlx5vf_disable_fds(mvdev, NULL);
12731180
return NULL;
12741181
}

0 commit comments

Comments
 (0)