Commit a5c98e94 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring/zcrx: dmabuf backed zerocopy receive



Add support for dmabuf backed zcrx areas. To use it, the user should
pass IORING_ZCRX_AREA_DMABUF in the struct io_uring_zcrx_area_reg flags
field and pass a dmabuf fd in the dmabuf_fd field.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20bb1890e60a82ec945ab36370d1fd54be414ab6.1746097431.git.asml.silence@gmail.com
Link: https://lore.kernel.org/io-uring/6e37db97303212bbd8955f9501cf99b579f8aece.1746547722.git.asml.silence@gmail.com


[axboe: fold in fixup]
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8a628042
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -990,12 +990,16 @@ struct io_uring_zcrx_offsets {
	__u64	__resv[2];
};

enum io_uring_zcrx_area_flags {
	IORING_ZCRX_AREA_DMABUF		= 1,
};

struct io_uring_zcrx_area_reg {
	__u64	addr;
	__u64	len;
	__u64	rq_area_token;
	__u32	flags;
	__u32	__resv1;
	__u32	dmabuf_fd;
	__u64	__resv2[2];
};

+147 −16
Original line number Diff line number Diff line
@@ -47,30 +47,118 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
	return area->mem.pages[net_iov_idx(niov)];
}

static void io_release_area_mem(struct io_zcrx_mem *mem)
static void io_release_dmabuf(struct io_zcrx_mem *mem)
{
	if (mem->pages) {
		unpin_user_pages(mem->pages, mem->nr_folios);
		kvfree(mem->pages);
	if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
		return;

	if (mem->sgt)
		dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt,
						  DMA_FROM_DEVICE);
	if (mem->attach)
		dma_buf_detach(mem->dmabuf, mem->attach);
	if (mem->dmabuf)
		dma_buf_put(mem->dmabuf);

	mem->sgt = NULL;
	mem->attach = NULL;
	mem->dmabuf = NULL;
}

static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
			    struct io_zcrx_mem *mem,
			    struct io_uring_zcrx_area_reg *area_reg)
{
	unsigned long off = (unsigned long)area_reg->addr;
	unsigned long len = (unsigned long)area_reg->len;
	unsigned long total_size = 0;
	struct scatterlist *sg;
	int dmabuf_fd = area_reg->dmabuf_fd;
	int i, ret;

	if (WARN_ON_ONCE(!ifq->dev))
		return -EFAULT;
	if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
		return -EINVAL;

	mem->is_dmabuf = true;
	mem->dmabuf = dma_buf_get(dmabuf_fd);
	if (IS_ERR(mem->dmabuf)) {
		ret = PTR_ERR(mem->dmabuf);
		mem->dmabuf = NULL;
		goto err;
	}

static int io_import_area(struct io_zcrx_ifq *ifq,
	mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev);
	if (IS_ERR(mem->attach)) {
		ret = PTR_ERR(mem->attach);
		mem->attach = NULL;
		goto err;
	}

	mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE);
	if (IS_ERR(mem->sgt)) {
		ret = PTR_ERR(mem->sgt);
		mem->sgt = NULL;
		goto err;
	}

	for_each_sgtable_dma_sg(mem->sgt, sg, i)
		total_size += sg_dma_len(sg);

	if (total_size < off + len)
		return -EINVAL;

	mem->dmabuf_offset = off;
	mem->size = len;
	return 0;
err:
	io_release_dmabuf(mem);
	return ret;
}

static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
	unsigned long off = area->mem.dmabuf_offset;
	struct scatterlist *sg;
	unsigned i, niov_idx = 0;

	if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
		return -EINVAL;

	for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
		dma_addr_t dma = sg_dma_address(sg);
		unsigned long sg_len = sg_dma_len(sg);
		unsigned long sg_off = min(sg_len, off);

		off -= sg_off;
		sg_len -= sg_off;
		dma += sg_off;

		while (sg_len && niov_idx < area->nia.num_niovs) {
			struct net_iov *niov = &area->nia.niovs[niov_idx];

			if (net_mp_niov_set_dma_addr(niov, dma))
				return 0;
			sg_len -= PAGE_SIZE;
			dma += PAGE_SIZE;
			niov_idx++;
		}
	}
	return niov_idx;
}

static int io_import_umem(struct io_zcrx_ifq *ifq,
			  struct io_zcrx_mem *mem,
			  struct io_uring_zcrx_area_reg *area_reg)
{
	struct page **pages;
	int nr_pages;
	int ret;

	ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
	if (ret)
		return ret;
	if (area_reg->dmabuf_fd)
		return -EINVAL;
	if (!area_reg->addr)
		return -EFAULT;
	if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
		return -EINVAL;

	pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
				   &nr_pages);
	if (IS_ERR(pages))
@@ -82,6 +170,35 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
	return 0;
}

static void io_release_area_mem(struct io_zcrx_mem *mem)
{
	if (mem->is_dmabuf) {
		io_release_dmabuf(mem);
		return;
	}
	if (mem->pages) {
		unpin_user_pages(mem->pages, mem->nr_folios);
		kvfree(mem->pages);
	}
}

static int io_import_area(struct io_zcrx_ifq *ifq,
			  struct io_zcrx_mem *mem,
			  struct io_uring_zcrx_area_reg *area_reg)
{
	int ret;

	ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
	if (ret)
		return ret;
	if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
		return -EINVAL;

	if (area_reg->flags & IORING_ZCRX_AREA_DMABUF)
		return io_import_dmabuf(ifq, mem, area_reg);
	return io_import_umem(ifq, mem, area_reg);
}

static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
				struct io_zcrx_area *area, int nr_mapped)
{
@@ -101,6 +218,9 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
{
	int i;

	if (area->mem.is_dmabuf)
		io_release_dmabuf(&area->mem);
	else
		io_zcrx_unmap_umem(ifq, area, nr_mapped);

	for (i = 0; i < area->nia.num_niovs; i++)
@@ -145,7 +265,11 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
	if (area->is_mapped)
		return 0;

	if (area->mem.is_dmabuf)
		nr = io_zcrx_map_area_dmabuf(ifq, area);
	else
		nr = io_zcrx_map_area_umem(ifq, area);

	if (nr != area->nia.num_niovs) {
		__io_zcrx_unmap_area(ifq, area, nr);
		return -EINVAL;
@@ -251,6 +375,8 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
	kfree(area);
}

#define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)

static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
			       struct io_zcrx_area **res,
			       struct io_uring_zcrx_area_reg *area_reg)
@@ -259,9 +385,11 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
	unsigned nr_iovs;
	int i, ret;

	if (area_reg->flags || area_reg->rq_area_token)
	if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
		return -EINVAL;
	if (area_reg->rq_area_token)
		return -EINVAL;
	if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
	if (area_reg->__resv2[0] || area_reg->__resv2[1])
		return -EINVAL;

	ret = -ENOMEM;
@@ -819,6 +947,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
	size_t copied = 0;
	int ret = 0;

	if (area->mem.is_dmabuf)
		return -EFAULT;

	while (len) {
		size_t copy_size = min_t(size_t, PAGE_SIZE, len);
		const int dst_off = 0;
+7 −0
Original line number Diff line number Diff line
@@ -3,15 +3,22 @@
#define IOU_ZC_RX_H

#include <linux/io_uring_types.h>
#include <linux/dma-buf.h>
#include <linux/socket.h>
#include <net/page_pool/types.h>
#include <net/net_trackers.h>

struct io_zcrx_mem {
	unsigned long			size;
	bool				is_dmabuf;

	struct page			**pages;
	unsigned long			nr_folios;

	struct dma_buf_attachment	*attach;
	struct dma_buf			*dmabuf;
	struct sg_table			*sgt;
	unsigned long			dmabuf_offset;
};

struct io_zcrx_area {