Commit 4d674c47 authored by Boshi Yu's avatar Boshi Yu Committed by Leon Romanovsky
Browse files

RDMA/erdma: Use dma_map_page to map scatter MTT buffer



Each high-level indirect MTT entry is assumed to point to exactly one page
of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
pages when mapping. To avoid extra overhead from splitting merged regions,
use dma_map_page to map the scatter MTT buffer page by page.

Reviewed-by: default avatarCheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: default avatarBoshi Yu <boshiyu@linux.alibaba.com>
Link: https://patch.msgid.link/20250725055410.67520-2-boshiyu@linux.alibaba.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent a3c9d0fc
Loading
Loading
Loading
Loading
+69 −41
Original line number Diff line number Diff line
@@ -149,7 +149,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
			req.phy_addr[0] = mr->mem.mtt->buf_dma;
			mtt_level = ERDMA_MR_MTT_1LEVEL;
		} else {
			req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
			req.phy_addr[0] = mr->mem.mtt->dma_addrs[0];
			mtt_level = mr->mem.mtt->level;
		}
	} else if (mr->type != ERDMA_MR_TYPE_DMA) {
@@ -626,18 +626,27 @@ static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
	return ERR_PTR(-ENOMEM);
}

static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma,
				  u32 npages)
{
	u32 i;

	for (i = 0; i < npages; i++)
		dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE,
			       DMA_TO_DEVICE);
}

static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev,
					    struct erdma_mtt *mtt)
{
	dma_unmap_sg(&dev->pdev->dev, mtt->sglist,
		     DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE);
	vfree(mtt->sglist);
	erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages);
	vfree(mtt->dma_addrs);
}

static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
				      struct erdma_mtt *mtt)
{
	erdma_destroy_mtt_buf_sg(dev, mtt);
	erdma_destroy_mtt_buf_dma_addrs(dev, mtt);
	vfree(mtt->buf);
	kfree(mtt);
}
@@ -645,50 +654,69 @@ static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
				  struct erdma_mtt *low_mtt)
{
	struct scatterlist *sg;
	u32 idx = 0, i;
	dma_addr_t *pg_addr = mtt->buf;
	u32 i;

	for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
		mtt->buf[idx++] = sg_dma_address(sg);
	for (i = 0; i < low_mtt->npages; i++)
		pg_addr[i] = low_mtt->dma_addrs[i];
}

static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs,
				void *buf, u64 len)
{
	struct scatterlist *sglist;
	void *buf = mtt->buf;
	u32 npages, i, nsg;
	dma_addr_t *pg_dma;
	struct page *pg;
	u32 npages, i;
	void *addr;

	/* Failed if buf is not page aligned */
	if ((uintptr_t)buf & ~PAGE_MASK)
		return -EINVAL;

	npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
	sglist = vzalloc(npages * sizeof(*sglist));
	if (!sglist)
		return -ENOMEM;
	npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >>
		 PAGE_SHIFT;
	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
	if (!pg_dma)
		return 0;

	sg_init_table(sglist, npages);
	addr = buf;
	for (i = 0; i < npages; i++) {
		pg = vmalloc_to_page(buf);
		pg = vmalloc_to_page(addr);
		if (!pg)
			goto err;
		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
		buf += PAGE_SIZE;
	}

	nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
	if (!nsg)
		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
					 DMA_TO_DEVICE);
		if (dma_mapping_error(&dev->pdev->dev, pg_dma[i]))
			goto err;

	mtt->sglist = sglist;
	mtt->nsg = nsg;
		addr += PAGE_SIZE;
	}

	return 0;
	*dma_addrs = pg_dma;

	return npages;
err:
	vfree(sglist);
	erdma_unmap_page_list(dev, pg_dma, i);
	vfree(pg_dma);

	return 0;
}

static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev,
					  struct erdma_mtt *mtt)
{
	dma_addr_t *addrs;
	u32 npages;

	/* Failed if buf is not page aligned */
	if ((uintptr_t)mtt->buf & ~PAGE_MASK)
		return -EINVAL;

	npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size);
	if (!npages)
		return -ENOMEM;

	mtt->dma_addrs = addrs;
	mtt->npages = npages;

	return 0;
}

static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
@@ -707,12 +735,12 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
	if (!mtt->buf)
		goto err_free_mtt;

	ret = erdma_create_mtt_buf_sg(dev, mtt);
	ret = erdma_create_mtt_buf_dma_addrs(dev, mtt);
	if (ret)
		goto err_free_mtt_buf;

	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
		  mtt->size, mtt->nsg);
	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n",
		  mtt->size, mtt->npages);

	return mtt;

@@ -746,8 +774,8 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
	level = 1;

	/* convergence the mtt table. */
	while (mtt->nsg != 1 && level <= 3) {
		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
	while (mtt->npages != 1 && level <= 3) {
		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages));
		if (IS_ERR(tmp_mtt)) {
			ret = PTR_ERR(tmp_mtt);
			goto err_free_mtt;
@@ -765,7 +793,7 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,

	mtt->level = level;
	ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
		  mtt->level, mtt->sglist[0].dma_address);
		  mtt->level, mtt->dma_addrs[0]);

	return mtt;
err_free_mtt:
+2 −2
Original line number Diff line number Diff line
@@ -99,8 +99,8 @@ struct erdma_mtt {
	union {
		dma_addr_t buf_dma;
		struct {
			struct scatterlist *sglist;
			u32 nsg;
			dma_addr_t *dma_addrs;
			u32 npages;
			u32 level;
		};
	};