Commit 81a4058e authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring: use region api for CQ



Convert internal parts of the CQ/SQ array managment to the region API.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/46fc3c801290d6b1ac16023d78f6b8e685c87fd6.1732886067.git.asml.silence@gmail.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8078486e
Loading
Loading
Loading
Loading
+1 −7
Original line number Diff line number Diff line
@@ -427,14 +427,8 @@ struct io_ring_ctx {
	 */
	struct mutex			mmap_lock;

	/*
	 * If IORING_SETUP_NO_MMAP is used, then the below holds
	 * the gup'ed pages for the two rings, and the sqes.
	 */
	unsigned short			n_ring_pages;
	struct page			**ring_pages;

	struct io_mapped_region		sq_region;
	struct io_mapped_region		ring_region;
	/* used for optimised request parameter and wait argument passing  */
	struct io_mapped_region		param_region;
};
+11 −25
Original line number Diff line number Diff line
@@ -2634,26 +2634,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
}

static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
			  size_t size)
{
	return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
				size);
}

static void io_rings_free(struct io_ring_ctx *ctx)
{
	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
		io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
				true);
	} else {
		io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
		ctx->n_ring_pages = 0;
		vunmap(ctx->rings);
	}

	io_free_region(ctx, &ctx->sq_region);

	io_free_region(ctx, &ctx->ring_region);
	ctx->rings = NULL;
	ctx->sq_sqes = NULL;
}
@@ -3485,15 +3469,17 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
	if (size == SIZE_MAX)
		return -EOVERFLOW;

	if (!(ctx->flags & IORING_SETUP_NO_MMAP))
		rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
	else
		rings = io_rings_map(ctx, p->cq_off.user_addr, size);

	if (IS_ERR(rings))
		return PTR_ERR(rings);
	memset(&rd, 0, sizeof(rd));
	rd.size = PAGE_ALIGN(size);
	if (ctx->flags & IORING_SETUP_NO_MMAP) {
		rd.user_addr = p->cq_off.user_addr;
		rd.flags |= IORING_MEM_REGION_TYPE_USER;
	}
	ret = io_create_region(ctx, &ctx->ring_region, &rd, IORING_OFF_CQ_RING);
	if (ret)
		return ret;
	ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);

	ctx->rings = rings;
	if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
		ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
	rings->sq_ring_mask = p->sq_entries - 1;
+8 −47
Original line number Diff line number Diff line
@@ -120,18 +120,6 @@ void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
	*npages = 0;
}

void io_pages_free(struct page ***pages, int npages)
{
	struct page **page_array = *pages;

	if (!page_array)
		return;

	unpin_user_pages(page_array, npages);
	kvfree(page_array);
	*pages = NULL;
}

struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
{
	unsigned long start, end, nr_pages;
@@ -174,34 +162,6 @@ struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
	return ERR_PTR(ret);
}

void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
		     unsigned long uaddr, size_t size)
{
	struct page **page_array;
	unsigned int nr_pages;
	void *page_addr;

	*npages = 0;

	if (uaddr & (PAGE_SIZE - 1) || !size)
		return ERR_PTR(-EINVAL);

	nr_pages = 0;
	page_array = io_pin_pages(uaddr, size, &nr_pages);
	if (IS_ERR(page_array))
		return page_array;

	page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
	if (page_addr) {
		*pages = page_array;
		*npages = nr_pages;
		return page_addr;
	}

	io_pages_free(&page_array, nr_pages);
	return ERR_PTR(-ENOMEM);
}

enum {
	/* memory was vmap'ed for the kernel, freeing the region vunmap's it */
	IO_REGION_F_VMAP			= 1,
@@ -446,9 +406,10 @@ int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,

static int io_region_mmap(struct io_ring_ctx *ctx,
			  struct io_mapped_region *mr,
			  struct vm_area_struct *vma)
			  struct vm_area_struct *vma,
			  unsigned max_pages)
{
	unsigned long nr_pages = mr->nr_pages;
	unsigned long nr_pages = min(mr->nr_pages, max_pages);

	vm_flags_set(vma, VM_DONTEXPAND);
	return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
@@ -459,7 +420,7 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
	struct io_ring_ctx *ctx = file->private_data;
	size_t sz = vma->vm_end - vma->vm_start;
	long offset = vma->vm_pgoff << PAGE_SHIFT;
	unsigned int npages;
	unsigned int page_limit;
	void *ptr;

	guard(mutex)(&ctx->mmap_lock);
@@ -471,14 +432,14 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
	switch (offset & IORING_OFF_MMAP_MASK) {
	case IORING_OFF_SQ_RING:
	case IORING_OFF_CQ_RING:
		npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
		return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
		page_limit = (sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
		return io_region_mmap(ctx, &ctx->ring_region, vma, page_limit);
	case IORING_OFF_SQES:
		return io_region_mmap(ctx, &ctx->sq_region, vma);
		return io_region_mmap(ctx, &ctx->sq_region, vma, UINT_MAX);
	case IORING_OFF_PBUF_RING:
		return io_pbuf_mmap(file, vma);
	case IORING_MAP_OFF_PARAM_REGION:
		return io_region_mmap(ctx, &ctx->param_region, vma);
		return io_region_mmap(ctx, &ctx->param_region, vma, UINT_MAX);
	}

	return -EINVAL;
+0 −4
Original line number Diff line number Diff line
@@ -4,7 +4,6 @@
#define IORING_MAP_OFF_PARAM_REGION		0x20000000ULL

struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
void io_pages_free(struct page ***pages, int npages);
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
			struct page **pages, int npages);

@@ -13,9 +12,6 @@ void *io_pages_map(struct page ***out_pages, unsigned short *npages,
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
		    bool put_pages);

void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
		     unsigned long uaddr, size_t size);

#ifndef CONFIG_MMU
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
#endif
+16 −19
Original line number Diff line number Diff line
@@ -367,26 +367,19 @@ static int io_register_clock(struct io_ring_ctx *ctx,
 * either mapping or freeing.
 */
struct io_ring_ctx_rings {
	unsigned short n_ring_pages;
	struct page **ring_pages;
	struct io_rings *rings;

	struct io_uring_sqe *sq_sqes;

	struct io_mapped_region sq_region;
	struct io_mapped_region ring_region;
};

static void io_register_free_rings(struct io_ring_ctx *ctx,
				   struct io_uring_params *p,
				   struct io_ring_ctx_rings *r)
{
	if (!(p->flags & IORING_SETUP_NO_MMAP)) {
		io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages,
				true);
	} else {
		io_pages_free(&r->ring_pages, r->n_ring_pages);
		vunmap(r->rings);
	}
	io_free_region(ctx, &r->sq_region);
	io_free_region(ctx, &r->ring_region);
}

#define swap_old(ctx, o, n, field)		\
@@ -439,13 +432,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
	if (size == SIZE_MAX)
		return -EOVERFLOW;

	if (!(p.flags & IORING_SETUP_NO_MMAP))
		n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
	else
		n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
						p.cq_off.user_addr, size);
	if (IS_ERR(n.rings))
		return PTR_ERR(n.rings);
	memset(&rd, 0, sizeof(rd));
	rd.size = PAGE_ALIGN(size);
	if (p.flags & IORING_SETUP_NO_MMAP) {
		rd.user_addr = p.cq_off.user_addr;
		rd.flags |= IORING_MEM_REGION_TYPE_USER;
	}
	ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
	if (ret) {
		io_register_free_rings(ctx, &p, &n);
		return ret;
	}
	n.rings = io_region_get_ptr(&n.ring_region);

	n.rings->sq_ring_mask = p.sq_entries - 1;
	n.rings->cq_ring_mask = p.cq_entries - 1;
@@ -555,8 +553,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)

	ctx->rings = n.rings;
	ctx->sq_sqes = n.sq_sqes;
	swap_old(ctx, o, n, n_ring_pages);
	swap_old(ctx, o, n, ring_pages);
	swap_old(ctx, o, n, ring_region);
	swap_old(ctx, o, n, sq_region);
	to_free = &o;
	ret = 0;