Commit 821b8f6b authored by Yishai Hadas's avatar Yishai Hadas Committed by Alex Williamson
Browse files

vfio/mlx5: Enforce PRE_COPY support



Enable live migration only once the firmware supports PRE_COPY.

PRE_COPY has been supported by the firmware for a long time already [1]
and is required to achieve a low downtime upon live migration.

This lets us clean up some old code that is not applicable those days
while PRE_COPY is fully supported by the firmware.

[1] The minimum firmware version that supports PRE_COPY is 28.36.1010,
it was released in January 2023.

No firmware without PRE_COPY support ever available to users.

Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20240306105624.114830-1-yishaih@nvidia.com


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 626f534d
Loading
Loading
Loading
Loading
+63 −20
Original line number Diff line number Diff line
@@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
	if (!MLX5_CAP_GEN(mvdev->mdev, migration))
		goto end;

	if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
	      MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
		goto end;

	mvdev->vf_id = pci_iov_vf_id(pdev);
	if (mvdev->vf_id < 0)
		goto end;
@@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
	mvdev->migrate_cap = 1;
	mvdev->core_device.vdev.migration_flags =
		VFIO_MIGRATION_STOP_COPY |
		VFIO_MIGRATION_P2P;
		VFIO_MIGRATION_P2P |
		VFIO_MIGRATION_PRE_COPY;

	mvdev->core_device.vdev.mig_ops = mig_ops;
	init_completion(&mvdev->tracker_comp);
	if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
		mvdev->core_device.vdev.log_ops = log_ops;

	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
	    MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
		mvdev->core_device.vdev.migration_flags |=
			VFIO_MIGRATION_PRE_COPY;

	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
		mvdev->chunk_mode = 1;

@@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
	kfree(buf);
}

static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
				      unsigned int npages)
{
	unsigned int to_alloc = npages;
	struct page **page_list;
	unsigned long filled;
	unsigned int to_fill;
	int ret;

	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
	if (!page_list)
		return -ENOMEM;

	do {
		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
						page_list);
		if (!filled) {
			ret = -ENOMEM;
			goto err;
		}
		to_alloc -= filled;
		ret = sg_alloc_append_table_from_pages(
			&buf->table, page_list, filled, 0,
			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
			GFP_KERNEL_ACCOUNT);

		if (ret)
			goto err;
		buf->allocated_length += filled * PAGE_SIZE;
		/* clean input for another bulk allocation */
		memset(page_list, 0, filled * sizeof(*page_list));
		to_fill = min_t(unsigned int, to_alloc,
				PAGE_SIZE / sizeof(*page_list));
	} while (to_alloc > 0);

	kvfree(page_list);
	return 0;

err:
	kvfree(page_list);
	return ret;
}

struct mlx5_vhca_data_buffer *
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
			 size_t length,
@@ -680,7 +725,6 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
		goto err_out;
	}

	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
	if (async_data->stop_copy_chunk) {
		u8 header_idx = buf->stop_copy_chunk_num ?
			buf->stop_copy_chunk_num - 1 : 0;
@@ -697,7 +741,6 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
			goto err_free;
		}
	}
	}

	if (async_data->stop_copy_chunk)
		migf->state = MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK;
+0 −6
Original line number Diff line number Diff line
@@ -13,9 +13,6 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>

#define MLX5VF_PRE_COPY_SUPP(mvdev) \
	((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)

enum mlx5_vf_migf_state {
	MLX5_MIGF_STATE_ERROR = 1,
	MLX5_MIGF_STATE_PRE_COPY_ERROR,
@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
};

enum mlx5_vf_load_state {
	MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
	MLX5_VF_LOAD_STATE_READ_HEADER,
	MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
	MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
@@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
		       size_t length, enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
			       unsigned int npages);
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
				       unsigned long offset);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
+8 −101
Original line number Diff line number Diff line
@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
	return NULL;
}

int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
			       unsigned int npages)
{
	unsigned int to_alloc = npages;
	struct page **page_list;
	unsigned long filled;
	unsigned int to_fill;
	int ret;

	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
	if (!page_list)
		return -ENOMEM;

	do {
		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
						page_list);
		if (!filled) {
			ret = -ENOMEM;
			goto err;
		}
		to_alloc -= filled;
		ret = sg_alloc_append_table_from_pages(
			&buf->table, page_list, filled, 0,
			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
			GFP_KERNEL_ACCOUNT);

		if (ret)
			goto err;
		buf->allocated_length += filled * PAGE_SIZE;
		/* clean input for another bulk allocation */
		memset(page_list, 0, filled * sizeof(*page_list));
		to_fill = min_t(unsigned int, to_alloc,
				PAGE_SIZE / sizeof(*page_list));
	} while (to_alloc > 0);

	kvfree(page_list);
	return 0;

err:
	kvfree(page_list);
	return ret;
}

static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
{
	mutex_lock(&migf->lock);
@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
	return 0;
}

static int
mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
				   loff_t requested_length,
				   const char __user **buf, size_t *len,
				   loff_t *pos, ssize_t *done)
{
	int ret;

	if (requested_length > MAX_LOAD_SIZE)
		return -ENOMEM;

	if (vhca_buf->allocated_length < requested_length) {
		ret = mlx5vf_add_migration_pages(
			vhca_buf,
			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
				     PAGE_SIZE));
		if (ret)
			return ret;
	}

	while (*len) {
		ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
						    done);
		if (ret)
			return ret;
	}

	return 0;
}

static ssize_t
mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
			 struct mlx5_vhca_data_buffer *vhca_buf,
@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
			migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
			break;
		}
		case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
			ret = mlx5vf_resume_read_image_no_header(vhca_buf,
						requested_length,
						&buf, &len, pos, &done);
			if (ret)
				goto out_unlock;
			break;
		case MLX5_VF_LOAD_STATE_READ_IMAGE:
			ret = mlx5vf_resume_read_image(migf, vhca_buf,
						migf->record_size,
@@ -1114,7 +1033,6 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
	}

	migf->buf[0] = buf;
	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
	buf = mlx5vf_alloc_data_buffer(migf,
		sizeof(struct mlx5_vf_migration_header), DMA_NONE);
	if (IS_ERR(buf)) {
@@ -1124,10 +1042,6 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)

	migf->buf_header[0] = buf;
	migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
	} else {
		/* Initial state will be to read the image */
		migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
	}

	stream_open(migf->filp->f_inode, migf->filp);
	mutex_init(&migf->lock);
@@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
	}

	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
		if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
			ret = mlx5vf_cmd_load_vhca_state(mvdev,
							 mvdev->resuming_migf,
							 mvdev->resuming_migf->buf[0]);
			if (ret)
				return ERR_PTR(ret);
		}
		mlx5vf_disable_fds(mvdev, NULL);
		return NULL;
	}