Commit 35360498 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfio-v6.16-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Remove an outdated DMA unmap optimization that relies on a feature
   only implemented in AMDv1 page tables. (Jason Gunthorpe)

 - Fix various migration issues in the hisi_acc_vfio_pci variant driver,
   including use of a wrong DMA address requiring an update to the
   migration data structure, resending task completion interrupt after
   migration to re-sync queues, fixing a write-back cache sequencing
   issue, fixing a driver unload issue, behaving correctly when the
   guest driver is not loaded, and avoiding to squash errors from
   sub-functions. (Longfang Liu)

 - mlx5-vfio-pci variant driver update to make use of the new two-step
   DMA API for migration, using a page array directly rather than using
   a page list mapped across a scatter list. (Leon Romanovsky)

 - Fix an incorrect loop index used when unwinding allocation of dirty
   page bitmaps on error, resulting in temporary failure in freeing
   unused bitmaps. (Li RongQing)

* tag 'vfio-v6.16-rc1' of https://github.com/awilliam/linux-vfio:
  vfio/type1: Fix error unwind in migration dirty bitmap allocation
  vfio/mlx5: Enable the DMA link API
  vfio/mlx5: Rewrite create mkey flow to allow better code reuse
  vfio/mlx5: Explicitly use number of pages instead of allocated length
  hisi_acc_vfio_pci: update function return values.
  hisi_acc_vfio_pci: bugfix live migration function without VF device driver
  hisi_acc_vfio_pci: bugfix the problem of uninstalling driver
  hisi_acc_vfio_pci: bugfix cache write-back issue
  hisi_acc_vfio_pci: add eq and aeq interruption restore
  hisi_acc_vfio_pci: fix XQE dma address error
  vfio/type1: Remove Fine Grained Superpages detection
parents 02897f5e 4518e5a6
Loading
Loading
Loading
Loading
+89 −32
Original line number Diff line number Diff line
@@ -190,9 +190,10 @@ static int qm_set_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
	int ret;

	/* Check VF state */
	if (unlikely(hisi_qm_wait_mb_ready(qm))) {
	ret = hisi_qm_wait_mb_ready(qm);
	if (unlikely(ret)) {
		dev_err(&qm->pdev->dev, "QM device is not ready to write\n");
		return -EBUSY;
		return ret;
	}

	ret = qm_write_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
@@ -325,13 +326,15 @@ static void qm_dev_cmd_init(struct hisi_qm *qm)
static int vf_qm_cache_wb(struct hisi_qm *qm)
{
	unsigned int val;
	int ret;

	writel(0x1, qm->io_base + QM_CACHE_WB_START);
	if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
	ret = readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
				       val, val & BIT(0), MB_POLL_PERIOD_US,
				       MB_POLL_TIMEOUT_US)) {
				       MB_POLL_TIMEOUT_US);
	if (ret) {
		dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n");
		return -EINVAL;
		return ret;
	}

	return 0;
@@ -350,6 +353,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
	return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
}

static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
{
	switch (vf_data->acc_magic) {
	case ACC_DEV_MAGIC_V2:
		if (vf_data->major_ver != ACC_DRV_MAJOR_VER) {
			dev_info(dev, "migration driver version<%u.%u> not match!\n",
				 vf_data->major_ver, vf_data->minor_ver);
			return -EINVAL;
		}
		break;
	case ACC_DEV_MAGIC_V1:
		/* Correct dma address */
		vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
		vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
		vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
		vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
		vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
		vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
			     struct hisi_acc_vf_migration_file *migf)
{
@@ -363,9 +392,10 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
		return 0;

	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
	ret = vf_qm_version_check(vf_data, dev);
	if (ret) {
		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
		return -EINVAL;
		return ret;
	}

	if (vf_data->dev_id != hisi_acc_vdev->vf_dev->device) {
@@ -377,7 +407,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
	ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
	if (ret <= 0) {
		dev_err(dev, "failed to get vft qp nums\n");
		return -EINVAL;
		return ret;
	}

	if (ret != vf_data->qp_num) {
@@ -399,13 +429,6 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
		return -EINVAL;
	}

	ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
	if (ret) {
		dev_err(dev, "failed to write QM_VF_STATE\n");
		return ret;
	}

	hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
	hisi_acc_vdev->match_done = true;
	return 0;
}
@@ -418,7 +441,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
	int vf_id = hisi_acc_vdev->vf_id;
	int ret;

	vf_data->acc_magic = ACC_DEV_MAGIC;
	vf_data->acc_magic = ACC_DEV_MAGIC_V2;
	vf_data->major_ver = ACC_DRV_MAJOR_VER;
	vf_data->minor_ver = ACC_DRV_MINOR_VER;
	/* Save device id */
	vf_data->dev_id = hisi_acc_vdev->vf_dev->device;

@@ -441,6 +466,19 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
	return 0;
}

static void vf_qm_xeqc_save(struct hisi_qm *qm,
			    struct hisi_acc_vf_migration_file *migf)
{
	struct acc_vf_data *vf_data = &migf->vf_data;
	u16 eq_head, aeq_head;

	eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF;
	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0);

	aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF;
	qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0);
}

static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
			   struct hisi_acc_vf_migration_file *migf)
{
@@ -456,6 +494,20 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
	if (migf->total_length < sizeof(struct acc_vf_data))
		return -EINVAL;

	if (!vf_data->eqe_dma || !vf_data->aeqe_dma ||
	    !vf_data->sqc_dma || !vf_data->cqc_dma) {
		dev_info(dev, "resume dma addr is NULL!\n");
		hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
		return 0;
	}

	ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
	if (ret) {
		dev_err(dev, "failed to write QM_VF_STATE\n");
		return ret;
	}
	hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;

	qm->eqe_dma = vf_data->eqe_dma;
	qm->aeqe_dma = vf_data->aeqe_dma;
	qm->sqc_dma = vf_data->sqc_dma;
@@ -493,27 +545,27 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)

	ret = qm_get_regs(vf_qm, vf_data);
	if (ret)
		return -EINVAL;
		return ret;

	/* Every reg is 32 bit, the dma address is 64 bit. */
	vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
	vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
	vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
	vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
	vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
	vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];

	/* Through SQC_BT/CQC_BT to get sqc and cqc address */
	ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
	if (ret) {
		dev_err(dev, "failed to read SQC addr!\n");
		return -EINVAL;
		return ret;
	}

	ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
	if (ret) {
		dev_err(dev, "failed to read CQC addr!\n");
		return -EINVAL;
		return ret;
	}

	return 0;
@@ -524,7 +576,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
{
	struct acc_vf_data *vf_data = &migf->vf_data;
	struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
	struct device *dev = &vf_qm->pdev->dev;
	int ret;

	if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
@@ -538,17 +589,14 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
	vf_data->vf_qm_state = QM_READY;
	hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;

	ret = vf_qm_cache_wb(vf_qm);
	if (ret) {
		dev_err(dev, "failed to writeback QM Cache!\n");
		return ret;
	}

	ret = vf_qm_read_data(vf_qm, vf_data);
	if (ret)
		return -EINVAL;
		return ret;

	migf->total_length = sizeof(struct acc_vf_data);
	/* Save eqc and aeqc interrupt information */
	vf_qm_xeqc_save(vf_qm, migf);

	return 0;
}

@@ -967,6 +1015,13 @@ static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev
		dev_err(dev, "failed to check QM INT state!\n");
		return ret;
	}

	ret = vf_qm_cache_wb(vf_qm);
	if (ret) {
		dev_err(dev, "failed to writeback QM cache!\n");
		return ret;
	}

	return 0;
}

@@ -1327,7 +1382,7 @@ static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vde
	ret = qm_wait_dev_not_ready(vf_qm);
	if (ret) {
		seq_puts(seq, "VF device not ready!\n");
		return -EBUSY;
		return ret;
	}

	return 0;
@@ -1463,6 +1518,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
	struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
	struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;

	hisi_acc_vf_disable_fds(hisi_acc_vdev);
	mutex_lock(&hisi_acc_vdev->open_mutex);
	hisi_acc_vdev->dev_opened = false;
	iounmap(vf_qm->io_base);
@@ -1485,6 +1541,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
	hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
	hisi_acc_vdev->pf_qm = pf_qm;
	hisi_acc_vdev->vf_dev = pdev;
	hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
	mutex_init(&hisi_acc_vdev->state_mutex);
	mutex_init(&hisi_acc_vdev->open_mutex);

+12 −2
Original line number Diff line number Diff line
@@ -39,6 +39,9 @@
#define QM_REG_ADDR_OFFSET	0x0004

#define QM_XQC_ADDR_OFFSET	32U
#define QM_XQC_ADDR_LOW	0x1
#define QM_XQC_ADDR_HIGH	0x2

#define QM_VF_AEQ_INT_MASK	0x0004
#define QM_VF_EQ_INT_MASK	0x000c
#define QM_IFC_INT_SOURCE_V	0x0020
@@ -50,10 +53,15 @@
#define QM_EQC_DW0		0X8000
#define QM_AEQC_DW0		0X8020

#define ACC_DRV_MAJOR_VER 1
#define ACC_DRV_MINOR_VER 0

#define ACC_DEV_MAGIC_V1	0XCDCDCDCDFEEDAACC
#define ACC_DEV_MAGIC_V2	0xAACCFEEDDECADEDE

struct acc_vf_data {
#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
	/* QM match information */
#define ACC_DEV_MAGIC	0XCDCDCDCDFEEDAACC
	u64 acc_magic;
	u32 qp_num;
	u32 dev_id;
@@ -61,7 +69,9 @@ struct acc_vf_data {
	u32 qp_base;
	u32 vf_qm_state;
	/* QM reserved match information */
	u32 qm_rsv_state[3];
	u16 major_ver;
	u16 minor_ver;
	u32 qm_rsv_state[2];

	/* QM RW regs */
	u32 aeq_int_mask;
+178 −193
Original line number Diff line number Diff line
@@ -313,40 +313,21 @@ static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
	return ret;
}

static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
			struct mlx5_vhca_data_buffer *buf,
			struct mlx5_vhca_recv_buf *recv_buf,
			u32 *mkey)
static u32 *alloc_mkey_in(u32 npages, u32 pdn)
{
	size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) :
				recv_buf->npages;
	int err = 0, inlen;
	__be64 *mtt;
	int inlen;
	void *mkc;
	u32 *in;

	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
		sizeof(*mtt) * round_up(npages, 2);
		sizeof(__be64) * round_up(npages, 2);

	in = kvzalloc(inlen, GFP_KERNEL);
	in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT);
	if (!in)
		return -ENOMEM;
		return NULL;

	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
		 DIV_ROUND_UP(npages, 2));
	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);

	if (buf) {
		struct sg_dma_page_iter dma_iter;

		for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
			*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
	} else {
		int i;

		for (i = 0; i < npages; i++)
			*mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
	}

	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
@@ -360,8 +341,81 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
	MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
	MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE);
	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
	kvfree(in);

	return in;
}

static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in,
		       u32 *mkey)
{
	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
		sizeof(__be64) * round_up(npages, 2);

	return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
}

static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
				 u32 *mkey_in, struct dma_iova_state *state,
				 enum dma_data_direction dir)
{
	dma_addr_t addr;
	__be64 *mtt;
	int i;

	if (dma_use_iova(state)) {
		dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir,
				 0);
	} else {
		mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in,
					     klm_pas_mtt);
		for (i = npages - 1; i >= 0; i--) {
			addr = be64_to_cpu(mtt[i]);
			dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir);
		}
	}
}

static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
			      struct page **page_list, u32 *mkey_in,
			      struct dma_iova_state *state,
			      enum dma_data_direction dir)
{
	dma_addr_t addr;
	size_t mapped = 0;
	__be64 *mtt;
	int i, err;

	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);

	if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) {
		addr = state->addr;
		for (i = 0; i < npages; i++) {
			err = dma_iova_link(mdev->device, state,
					    page_to_phys(page_list[i]), mapped,
					    PAGE_SIZE, dir, 0);
			if (err)
				goto error;
			*mtt++ = cpu_to_be64(addr);
			addr += PAGE_SIZE;
			mapped += PAGE_SIZE;
		}
		err = dma_iova_sync(mdev->device, state, 0, mapped);
		if (err)
			goto error;
	} else {
		for (i = 0; i < npages; i++) {
			addr = dma_map_page(mdev->device, page_list[i], 0,
					    PAGE_SIZE, dir);
			err = dma_mapping_error(mdev->device, addr);
			if (err)
				goto error;
			*mtt++ = cpu_to_be64(addr);
		}
	}
	return 0;

error:
	unregister_dma_pages(mdev, i, mkey_in, state, dir);
	return err;
}

@@ -375,97 +429,97 @@ static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
	if (mvdev->mdev_detach)
		return -ENOTCONN;

	if (buf->dmaed || !buf->allocated_length)
	if (buf->mkey_in || !buf->npages)
		return -EINVAL;

	ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
	if (ret)
		return ret;
	buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn);
	if (!buf->mkey_in)
		return -ENOMEM;

	ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey);
	ret = register_dma_pages(mdev, buf->npages, buf->page_list,
				 buf->mkey_in, &buf->state, buf->dma_dir);
	if (ret)
		goto err;
		goto err_register_dma;

	buf->dmaed = true;
	ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey);
	if (ret)
		goto err_create_mkey;

	return 0;
err:
	dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);

err_create_mkey:
	unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state,
			     buf->dma_dir);
err_register_dma:
	kvfree(buf->mkey_in);
	buf->mkey_in = NULL;
	return ret;
}

static void free_page_list(u32 npages, struct page **page_list)
{
	int i;

	/* Undo alloc_pages_bulk() */
	for (i = npages - 1; i >= 0; i--)
		__free_page(page_list[i]);

	kvfree(page_list);
}

void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
{
	struct mlx5_vf_migration_file *migf = buf->migf;
	struct sg_page_iter sg_iter;
	struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
	struct mlx5_core_dev *mdev = mvdev->mdev;

	lockdep_assert_held(&migf->mvdev->state_mutex);
	WARN_ON(migf->mvdev->mdev_detach);
	lockdep_assert_held(&mvdev->state_mutex);
	WARN_ON(mvdev->mdev_detach);

	if (buf->dmaed) {
		mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
		dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
				  buf->dma_dir, 0);
	if (buf->mkey_in) {
		mlx5_core_destroy_mkey(mdev, buf->mkey);
		unregister_dma_pages(mdev, buf->npages, buf->mkey_in,
				     &buf->state, buf->dma_dir);
		kvfree(buf->mkey_in);
	}

	/* Undo alloc_pages_bulk() */
	for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
		__free_page(sg_page_iter_page(&sg_iter));
	sg_free_append_table(&buf->table);
	free_page_list(buf->npages, buf->page_list);
	kfree(buf);
}

static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
				      unsigned int npages)
static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages)
{
	unsigned int to_alloc = npages;
	struct page **page_list;
	unsigned long filled;
	unsigned int to_fill;
	int ret;
	unsigned int filled, done = 0;
	int i;

	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
	if (!page_list)
	*page_list =
		kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
	if (!*page_list)
		return -ENOMEM;

	do {
		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
					  page_list);
		if (!filled) {
			ret = -ENOMEM;
	for (;;) {
		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done,
					  *page_list + done);
		if (!filled)
			goto err;
		}
		to_alloc -= filled;
		ret = sg_alloc_append_table_from_pages(
			&buf->table, page_list, filled, 0,
			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
			GFP_KERNEL_ACCOUNT);

		if (ret)
			goto err_append;
		buf->allocated_length += filled * PAGE_SIZE;
		/* clean input for another bulk allocation */
		memset(page_list, 0, filled * sizeof(*page_list));
		to_fill = min_t(unsigned int, to_alloc,
				PAGE_SIZE / sizeof(*page_list));
	} while (to_alloc > 0);
		done += filled;
		if (done == npages)
			break;
	}

	kvfree(page_list);
	return 0;

err_append:
	for (i = filled - 1; i >= 0; i--)
		__free_page(page_list[i]);
err:
	kvfree(page_list);
	return ret;
	for (i = 0; i < done; i++)
		__free_page(*page_list[i]);

	kvfree(*page_list);
	*page_list = NULL;
	return -ENOMEM;
}

struct mlx5_vhca_data_buffer *
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
			 size_t length,
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
			 enum dma_data_direction dma_dir)
{
	struct mlx5_vhca_data_buffer *buf;
@@ -477,12 +531,13 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,

	buf->dma_dir = dma_dir;
	buf->migf = migf;
	if (length) {
		ret = mlx5vf_add_migration_pages(buf,
				DIV_ROUND_UP_ULL(length, PAGE_SIZE));
	if (npages) {
		ret = mlx5vf_add_pages(&buf->page_list, npages);
		if (ret)
			goto end;

		buf->npages = npages;

		if (dma_dir != DMA_NONE) {
			ret = mlx5vf_dma_data_buffer(buf);
			if (ret)
@@ -505,8 +560,8 @@ void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
}

struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
		       size_t length, enum dma_data_direction dma_dir)
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
		       enum dma_data_direction dma_dir)
{
	struct mlx5_vhca_data_buffer *buf, *temp_buf;
	struct list_head free_list;
@@ -521,7 +576,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
	list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
		if (buf->dma_dir == dma_dir) {
			list_del_init(&buf->buf_elm);
			if (buf->allocated_length >= length) {
			if (buf->npages >= npages) {
				spin_unlock_irq(&migf->list_lock);
				goto found;
			}
@@ -535,7 +590,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
		}
	}
	spin_unlock_irq(&migf->list_lock);
	buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir);
	buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir);

found:
	while ((temp_buf = list_first_entry_or_null(&free_list,
@@ -716,7 +771,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	MLX5_SET(save_vhca_state_in, in, op_mod, 0);
	MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
	MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey);
	MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
	MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE);
	MLX5_SET(save_vhca_state_in, in, incremental, inc);
	MLX5_SET(save_vhca_state_in, in, set_track, track);

@@ -738,8 +793,11 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	}

	if (!header_buf) {
		header_buf = mlx5vf_get_data_buffer(migf,
			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
		header_buf = mlx5vf_get_data_buffer(
			migf,
			DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
				     PAGE_SIZE),
			DMA_NONE);
		if (IS_ERR(header_buf)) {
			err = PTR_ERR(header_buf);
			goto err_free;
@@ -783,7 +841,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	if (mvdev->mdev_detach)
		return -ENOTCONN;

	if (!buf->dmaed) {
	if (!buf->mkey_in) {
		err = mlx5vf_dma_data_buffer(buf);
		if (err)
			return err;
@@ -1338,103 +1396,16 @@ static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
	kfree(qp);
}

static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
{
	int i;

	/* Undo alloc_pages_bulk() */
	for (i = 0; i < recv_buf->npages; i++)
		__free_page(recv_buf->page_list[i]);

	kvfree(recv_buf->page_list);
}

static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
			    unsigned int npages)
{
	unsigned int filled = 0, done = 0;
	int i;

	recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
				       GFP_KERNEL_ACCOUNT);
	if (!recv_buf->page_list)
		return -ENOMEM;

	for (;;) {
		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT,
					  npages - done,
					  recv_buf->page_list + done);
		if (!filled)
			goto err;

		done += filled;
		if (done == npages)
			break;
	}

	recv_buf->npages = npages;
	return 0;

err:
	for (i = 0; i < npages; i++) {
		if (recv_buf->page_list[i])
			__free_page(recv_buf->page_list[i]);
	}

	kvfree(recv_buf->page_list);
	return -ENOMEM;
}

static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
				   struct mlx5_vhca_recv_buf *recv_buf)
{
	int i, j;

	recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
				       sizeof(*recv_buf->dma_addrs),
				       GFP_KERNEL_ACCOUNT);
	if (!recv_buf->dma_addrs)
		return -ENOMEM;

	for (i = 0; i < recv_buf->npages; i++) {
		recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
						      recv_buf->page_list[i],
						      0, PAGE_SIZE,
						      DMA_FROM_DEVICE);
		if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
			goto error;
	}
	return 0;

error:
	for (j = 0; j < i; j++)
		dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
				 PAGE_SIZE, DMA_FROM_DEVICE);

	kvfree(recv_buf->dma_addrs);
	return -ENOMEM;
}

static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
				      struct mlx5_vhca_recv_buf *recv_buf)
{
	int i;

	for (i = 0; i < recv_buf->npages; i++)
		dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
				 PAGE_SIZE, DMA_FROM_DEVICE);

	kvfree(recv_buf->dma_addrs);
}

static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
					  struct mlx5_vhca_qp *qp)
{
	struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;

	mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
	unregister_dma_recv_pages(mdev, recv_buf);
	free_recv_pages(&qp->recv_buf);
	unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in,
			     &recv_buf->state, DMA_FROM_DEVICE);
	kvfree(recv_buf->mkey_in);
	free_page_list(recv_buf->npages, recv_buf->page_list);
}

static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
@@ -1445,24 +1416,38 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
	struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
	int err;

	err = alloc_recv_pages(recv_buf, npages);
	if (err < 0)
	err = mlx5vf_add_pages(&recv_buf->page_list, npages);
	if (err)
		return err;

	err = register_dma_recv_pages(mdev, recv_buf);
	if (err)
	recv_buf->npages = npages;

	recv_buf->mkey_in = alloc_mkey_in(npages, pdn);
	if (!recv_buf->mkey_in) {
		err = -ENOMEM;
		goto end;
	}

	err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
	err = register_dma_pages(mdev, npages, recv_buf->page_list,
				 recv_buf->mkey_in, &recv_buf->state,
				 DMA_FROM_DEVICE);
	if (err)
		goto err_register_dma;

	err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey);
	if (err)
		goto err_create_mkey;

	return 0;

err_create_mkey:
	unregister_dma_recv_pages(mdev, recv_buf);
	unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state,
			     DMA_FROM_DEVICE);
err_register_dma:
	kvfree(recv_buf->mkey_in);
	recv_buf->mkey_in = NULL;
end:
	free_recv_pages(recv_buf);
	free_page_list(npages, recv_buf->page_list);
	return err;
}

+21 −14
Original line number Diff line number Diff line
@@ -53,20 +53,17 @@ struct mlx5_vf_migration_header {
};

struct mlx5_vhca_data_buffer {
	struct sg_append_table table;
	struct page **page_list;
	struct dma_iova_state state;
	loff_t start_pos;
	u64 length;
	u64 allocated_length;
	u32 npages;
	u32 mkey;
	u32 *mkey_in;
	enum dma_data_direction dma_dir;
	u8 dmaed:1;
	u8 stop_copy_chunk_num;
	struct list_head buf_elm;
	struct mlx5_vf_migration_file *migf;
	/* Optimize mlx5vf_get_migration_page() for sequential access */
	struct scatterlist *last_offset_sg;
	unsigned int sg_last_entry;
	unsigned long last_offset;
};

struct mlx5vf_async_data {
@@ -133,8 +130,9 @@ struct mlx5_vhca_cq {
struct mlx5_vhca_recv_buf {
	u32 npages;
	struct page **page_list;
	dma_addr_t *dma_addrs;
	struct dma_iova_state state;
	u32 next_rq_offset;
	u32 *mkey_in;
	u32 mkey;
};

@@ -217,15 +215,24 @@ int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
struct mlx5_vhca_data_buffer *
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
			 size_t length, enum dma_data_direction dma_dir);
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
			 enum dma_data_direction dma_dir);
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf);
struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
		       size_t length, enum dma_data_direction dma_dir);
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
		       enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
				       unsigned long offset);
static inline struct page *
mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
			  unsigned long offset)
{
	int page_entry = offset / PAGE_SIZE;

	if (page_entry >= buf->npages)
		return NULL;

	return buf->page_list[page_entry];
}
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
			enum mlx5_vf_migf_state *last_save_state);
+36 −51

File changed.

Preview size limit exceeded, changes collapsed.

Loading