Commit e270bfd2 authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring/kbuf: vmap pinned buffer ring



This avoids needing to care about HIGHMEM, and it makes the buffer
indexing easier as both ring provided buffer methods are now virtually
mapped in a contigious fashion.

Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 1943f96b
Loading
Loading
Loading
Loading
+15 −24
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/poll.h>
#include <linux/vmalloc.h>
#include <linux/io_uring.h>

#include <uapi/linux/io_uring.h>
@@ -146,15 +147,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
		req->flags |= REQ_F_BL_EMPTY;

	head &= bl->mask;
	/* mmaped buffers are always contig */
	if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
	buf = &br->bufs[head];
	} else {
		int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
		int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
		buf = page_address(bl->buf_pages[index]);
		buf += off;
	}
	if (*len == 0 || *len > buf->len)
		*len = buf->len;
	req->flags |= REQ_F_BUFFER_RING;
@@ -241,6 +234,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
			for (j = 0; j < bl->buf_nr_pages; j++)
				unpin_user_page(bl->buf_pages[j]);
			kvfree(bl->buf_pages);
			vunmap(bl->buf_ring);
			bl->buf_pages = NULL;
			bl->buf_nr_pages = 0;
		}
@@ -498,9 +492,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
			    struct io_buffer_list *bl)
{
	struct io_uring_buf_ring *br;
	struct io_uring_buf_ring *br = NULL;
	int nr_pages, ret, i;
	struct page **pages;
	int i, nr_pages;

	pages = io_pin_pages(reg->ring_addr,
			     flex_array_size(br, bufs, reg->ring_entries),
@@ -508,18 +502,12 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
	if (IS_ERR(pages))
		return PTR_ERR(pages);

	/*
	 * Apparently some 32-bit boxes (ARM) will return highmem pages,
	 * which then need to be mapped. We could support that, but it'd
	 * complicate the code and slowdown the common cases quite a bit.
	 * So just error out, returning -EINVAL just like we did on kernels
	 * that didn't support mapped buffer rings.
	 */
	for (i = 0; i < nr_pages; i++)
		if (PageHighMem(pages[i]))
	br = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
	if (!br) {
		ret = -ENOMEM;
		goto error_unpin;
	}

	br = page_address(pages[0]);
#ifdef SHM_COLOUR
	/*
	 * On platforms that have specific aliasing requirements, SHM_COLOUR
@@ -530,8 +518,10 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
	 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
	 * this transparently.
	 */
	if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
	if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
		ret = -EINVAL;
		goto error_unpin;
	}
#endif
	bl->buf_pages = pages;
	bl->buf_nr_pages = nr_pages;
@@ -543,7 +533,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
	for (i = 0; i < nr_pages; i++)
		unpin_user_page(pages[i]);
	kvfree(pages);
	return -EINVAL;
	vunmap(br);
	return ret;
}

/*