Commit 089803c4 authored by Leon Romanovsky's avatar Leon Romanovsky Committed by Alex Williamson
Browse files

vfio/mlx5: Enable the DMA link API



Remove intermediate scatter-gather table completely and
enable new DMA link API.

Tested-by: default avatarJens Axboe <axboe@kernel.dk>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Acked-by: default avatarYishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/f71638d50c9c79a462f2e0423501b1de77617656.1747747694.git.leon@kernel.org


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent ac6c973a
Loading
Loading
Loading
Loading
+129 −165
Original line number Diff line number Diff line
@@ -345,26 +345,78 @@ static u32 *alloc_mkey_in(u32 npages, u32 pdn)
	return in;
}

static int create_mkey(struct mlx5_core_dev *mdev, u32 npages,
		       struct mlx5_vhca_data_buffer *buf, u32 *mkey_in,
static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in,
		       u32 *mkey)
{
	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
		sizeof(__be64) * round_up(npages, 2);

	return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
}

static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
				 u32 *mkey_in, struct dma_iova_state *state,
				 enum dma_data_direction dir)
{
	dma_addr_t addr;
	__be64 *mtt;
	int inlen;
	int i;

	if (dma_use_iova(state)) {
		dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir,
				 0);
	} else {
		mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in,
					     klm_pas_mtt);
		for (i = npages - 1; i >= 0; i--) {
			addr = be64_to_cpu(mtt[i]);
			dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir);
		}
	}
}

static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
			      struct page **page_list, u32 *mkey_in,
			      struct dma_iova_state *state,
			      enum dma_data_direction dir)
{
	dma_addr_t addr;
	size_t mapped = 0;
	__be64 *mtt;
	int i, err;

	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
	if (buf) {
		struct sg_dma_page_iter dma_iter;

		for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
			*mtt++ = cpu_to_be64(
				sg_page_iter_dma_address(&dma_iter));
	if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) {
		addr = state->addr;
		for (i = 0; i < npages; i++) {
			err = dma_iova_link(mdev->device, state,
					    page_to_phys(page_list[i]), mapped,
					    PAGE_SIZE, dir, 0);
			if (err)
				goto error;
			*mtt++ = cpu_to_be64(addr);
			addr += PAGE_SIZE;
			mapped += PAGE_SIZE;
		}
		err = dma_iova_sync(mdev->device, state, 0, mapped);
		if (err)
			goto error;
	} else {
		for (i = 0; i < npages; i++) {
			addr = dma_map_page(mdev->device, page_list[i], 0,
					    PAGE_SIZE, dir);
			err = dma_mapping_error(mdev->device, addr);
			if (err)
				goto error;
			*mtt++ = cpu_to_be64(addr);
		}
	}
	return 0;

	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
		sizeof(__be64) * round_up(npages, 2);

	return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
error:
	unregister_dma_pages(mdev, i, mkey_in, state, dir);
	return err;
}

static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
@@ -380,98 +432,90 @@ static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
	if (buf->mkey_in || !buf->npages)
		return -EINVAL;

	ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
	if (ret)
		return ret;

	buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn);
	if (!buf->mkey_in) {
		ret = -ENOMEM;
		goto err;
	}
	if (!buf->mkey_in)
		return -ENOMEM;

	ret = create_mkey(mdev, buf->npages, buf, buf->mkey_in, &buf->mkey);
	ret = register_dma_pages(mdev, buf->npages, buf->page_list,
				 buf->mkey_in, &buf->state, buf->dma_dir);
	if (ret)
		goto err_register_dma;

	ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey);
	if (ret)
		goto err_create_mkey;

	return 0;

err_create_mkey:
	unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state,
			     buf->dma_dir);
err_register_dma:
	kvfree(buf->mkey_in);
	buf->mkey_in = NULL;
err:
	dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
	return ret;
}

static void free_page_list(u32 npages, struct page **page_list)
{
	int i;

	/* Undo alloc_pages_bulk() */
	for (i = npages - 1; i >= 0; i--)
		__free_page(page_list[i]);

	kvfree(page_list);
}

void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
{
	struct mlx5_vf_migration_file *migf = buf->migf;
	struct sg_page_iter sg_iter;
	struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
	struct mlx5_core_dev *mdev = mvdev->mdev;

	lockdep_assert_held(&migf->mvdev->state_mutex);
	WARN_ON(migf->mvdev->mdev_detach);
	lockdep_assert_held(&mvdev->state_mutex);
	WARN_ON(mvdev->mdev_detach);

	if (buf->mkey_in) {
		mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
		mlx5_core_destroy_mkey(mdev, buf->mkey);
		unregister_dma_pages(mdev, buf->npages, buf->mkey_in,
				     &buf->state, buf->dma_dir);
		kvfree(buf->mkey_in);
		dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
				  buf->dma_dir, 0);
	}

	/* Undo alloc_pages_bulk() */
	for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
		__free_page(sg_page_iter_page(&sg_iter));
	sg_free_append_table(&buf->table);
	free_page_list(buf->npages, buf->page_list);
	kfree(buf);
}

static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
				      unsigned int npages)
static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages)
{
	unsigned int to_alloc = npages;
	struct page **page_list;
	unsigned long filled;
	unsigned int to_fill;
	int ret;
	unsigned int filled, done = 0;
	int i;

	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
	if (!page_list)
	*page_list =
		kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
	if (!*page_list)
		return -ENOMEM;

	do {
		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
					  page_list);
		if (!filled) {
			ret = -ENOMEM;
	for (;;) {
		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done,
					  *page_list + done);
		if (!filled)
			goto err;
		}
		to_alloc -= filled;
		ret = sg_alloc_append_table_from_pages(
			&buf->table, page_list, filled, 0,
			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
			GFP_KERNEL_ACCOUNT);

		if (ret)
			goto err_append;
		buf->npages += filled;
		/* clean input for another bulk allocation */
		memset(page_list, 0, filled * sizeof(*page_list));
		to_fill = min_t(unsigned int, to_alloc,
				PAGE_SIZE / sizeof(*page_list));
	} while (to_alloc > 0);
		done += filled;
		if (done == npages)
			break;
	}

	kvfree(page_list);
	return 0;

err_append:
	for (i = filled - 1; i >= 0; i--)
		__free_page(page_list[i]);
err:
	kvfree(page_list);
	return ret;
	for (i = 0; i < done; i++)
		__free_page(*page_list[i]);

	kvfree(*page_list);
	*page_list = NULL;
	return -ENOMEM;
}

struct mlx5_vhca_data_buffer *
@@ -488,10 +532,12 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
	buf->dma_dir = dma_dir;
	buf->migf = migf;
	if (npages) {
		ret = mlx5vf_add_migration_pages(buf, npages);
		ret = mlx5vf_add_pages(&buf->page_list, npages);
		if (ret)
			goto end;

		buf->npages = npages;

		if (dma_dir != DMA_NONE) {
			ret = mlx5vf_dma_data_buffer(buf);
			if (ret)
@@ -1350,101 +1396,16 @@ static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
	kfree(qp);
}

static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
{
	int i;

	/* Undo alloc_pages_bulk() */
	for (i = 0; i < recv_buf->npages; i++)
		__free_page(recv_buf->page_list[i]);

	kvfree(recv_buf->page_list);
}

static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
			    unsigned int npages)
{
	unsigned int filled = 0, done = 0;
	int i;

	recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
				       GFP_KERNEL_ACCOUNT);
	if (!recv_buf->page_list)
		return -ENOMEM;

	for (;;) {
		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT,
					  npages - done,
					  recv_buf->page_list + done);
		if (!filled)
			goto err;

		done += filled;
		if (done == npages)
			break;
	}

	recv_buf->npages = npages;
	return 0;

err:
	for (i = 0; i < npages; i++) {
		if (recv_buf->page_list[i])
			__free_page(recv_buf->page_list[i]);
	}

	kvfree(recv_buf->page_list);
	return -ENOMEM;
}
static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
				 u32 *mkey_in)
{
	dma_addr_t addr;
	__be64 *mtt;
	int i;

	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
	for (i = npages - 1; i >= 0; i--) {
		addr = be64_to_cpu(mtt[i]);
		dma_unmap_single(mdev->device, addr, PAGE_SIZE,
				DMA_FROM_DEVICE);
	}
}

static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
			      struct page **page_list, u32 *mkey_in)
{
	dma_addr_t addr;
	__be64 *mtt;
	int i;

	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);

	for (i = 0; i < npages; i++) {
		addr = dma_map_page(mdev->device, page_list[i], 0, PAGE_SIZE,
				    DMA_FROM_DEVICE);
		if (dma_mapping_error(mdev->device, addr))
			goto error;

		*mtt++ = cpu_to_be64(addr);
	}

	return 0;

error:
	unregister_dma_pages(mdev, i, mkey_in);
	return -ENOMEM;
}

static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
					  struct mlx5_vhca_qp *qp)
{
	struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;

	mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
	unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in);
	unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in,
			     &recv_buf->state, DMA_FROM_DEVICE);
	kvfree(recv_buf->mkey_in);
	free_recv_pages(&qp->recv_buf);
	free_page_list(recv_buf->npages, recv_buf->page_list);
}

static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
@@ -1455,10 +1416,12 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
	struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
	int err;

	err = alloc_recv_pages(recv_buf, npages);
	if (err < 0)
	err = mlx5vf_add_pages(&recv_buf->page_list, npages);
	if (err)
		return err;

	recv_buf->npages = npages;

	recv_buf->mkey_in = alloc_mkey_in(npages, pdn);
	if (!recv_buf->mkey_in) {
		err = -ENOMEM;
@@ -1466,24 +1429,25 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
	}

	err = register_dma_pages(mdev, npages, recv_buf->page_list,
				 recv_buf->mkey_in);
				 recv_buf->mkey_in, &recv_buf->state,
				 DMA_FROM_DEVICE);
	if (err)
		goto err_register_dma;

	err = create_mkey(mdev, npages, NULL, recv_buf->mkey_in,
			  &recv_buf->mkey);
	err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey);
	if (err)
		goto err_create_mkey;

	return 0;

err_create_mkey:
	unregister_dma_pages(mdev, npages, recv_buf->mkey_in);
	unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state,
			     DMA_FROM_DEVICE);
err_register_dma:
	kvfree(recv_buf->mkey_in);
	recv_buf->mkey_in = NULL;
end:
	free_recv_pages(recv_buf);
	free_page_list(npages, recv_buf->page_list);
	return err;
}

+14 −7
Original line number Diff line number Diff line
@@ -53,7 +53,8 @@ struct mlx5_vf_migration_header {
};

struct mlx5_vhca_data_buffer {
	struct sg_append_table table;
	struct page **page_list;
	struct dma_iova_state state;
	loff_t start_pos;
	u64 length;
	u32 npages;
@@ -63,10 +64,6 @@ struct mlx5_vhca_data_buffer {
	u8 stop_copy_chunk_num;
	struct list_head buf_elm;
	struct mlx5_vf_migration_file *migf;
	/* Optimize mlx5vf_get_migration_page() for sequential access */
	struct scatterlist *last_offset_sg;
	unsigned int sg_last_entry;
	unsigned long last_offset;
};

struct mlx5vf_async_data {
@@ -133,6 +130,7 @@ struct mlx5_vhca_cq {
struct mlx5_vhca_recv_buf {
	u32 npages;
	struct page **page_list;
	struct dma_iova_state state;
	u32 next_rq_offset;
	u32 *mkey_in;
	u32 mkey;
@@ -224,8 +222,17 @@ struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
		       enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
				       unsigned long offset);
static inline struct page *
mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
			  unsigned long offset)
{
	int page_entry = offset / PAGE_SIZE;

	if (page_entry >= buf->npages)
		return NULL;

	return buf->page_list[page_entry];
}
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
			enum mlx5_vf_migf_state *last_save_state);
+0 −31
Original line number Diff line number Diff line
@@ -34,37 +34,6 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
			    core_device);
}

struct page *
mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
			  unsigned long offset)
{
	unsigned long cur_offset = 0;
	struct scatterlist *sg;
	unsigned int i;

	/* All accesses are sequential */
	if (offset < buf->last_offset || !buf->last_offset_sg) {
		buf->last_offset = 0;
		buf->last_offset_sg = buf->table.sgt.sgl;
		buf->sg_last_entry = 0;
	}

	cur_offset = buf->last_offset;

	for_each_sg(buf->last_offset_sg, sg,
			buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
		if (offset < sg->length + cur_offset) {
			buf->last_offset_sg = sg;
			buf->sg_last_entry += i;
			buf->last_offset = cur_offset;
			return nth_page(sg_page(sg),
					(offset - cur_offset) / PAGE_SIZE);
		}
		cur_offset += sg->length;
	}
	return NULL;
}

static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
{
	mutex_lock(&migf->lock);