Commit 3fc3068e authored by Sheng Zhao's avatar Sheng Zhao Committed by Michael S. Tsirkin
Browse files

vduse: Use fixed 4KB bounce pages for non-4KB page size



The allocation granularity of bounce pages is PAGE_SIZE. This may cause
even small IO requests to occupy an entire bounce page exclusively. The
kind of memory waste will be more significant when PAGE_SIZE is larger
than 4KB (e.g. arm64 with 64KB pages).

So, optimize it by using fixed 4KB bounce maps and iova allocation
granularity. A single IO request occupies at least a 4KB bounce page
instead of the entire memory page of PAGE_SIZE.

Signed-off-by: default avatarSheng Zhao <sheng.zhao@bytedance.com>
Message-Id: <20250925113516.60305-1-sheng.zhao@bytedance.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent 1c14b0e4
Loading
Loading
Loading
Loading
+90 −42
Original line number Diff line number Diff line
@@ -103,19 +103,38 @@ void vduse_domain_clear_map(struct vduse_iova_domain *domain,
static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
					 u64 iova, u64 size, u64 paddr)
{
	struct vduse_bounce_map *map;
	struct vduse_bounce_map *map, *head_map;
	struct page *tmp_page;
	u64 last = iova + size - 1;

	while (iova <= last) {
		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
		/*
		 * When PAGE_SIZE is larger than 4KB, multiple adjacent bounce_maps will
		 * point to the same memory page of PAGE_SIZE. Since bounce_maps originate
		 * from IO requests, we may not be able to guarantee that the orig_phys
		 * values of all IO requests within the same 64KB memory page are contiguous.
		 * Therefore, we need to store them separately.
		 *
		 * Bounce pages are allocated on demand. As a result, it may occur that
		 * multiple bounce pages corresponding to the same 64KB memory page attempt
		 * to allocate memory simultaneously, so we use cmpxchg to handle this
		 * concurrency.
		 */
		map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
		if (!map->bounce_page) {
			map->bounce_page = alloc_page(GFP_ATOMIC);
			if (!map->bounce_page)
			head_map = &domain->bounce_maps[(iova & PAGE_MASK) >> BOUNCE_MAP_SHIFT];
			if (!head_map->bounce_page) {
				tmp_page = alloc_page(GFP_ATOMIC);
				if (!tmp_page)
					return -ENOMEM;
				if (cmpxchg(&head_map->bounce_page, NULL, tmp_page))
					__free_page(tmp_page);
			}
			map->bounce_page = head_map->bounce_page;
		}
		map->orig_phys = paddr;
		paddr += PAGE_SIZE;
		iova += PAGE_SIZE;
		paddr += BOUNCE_MAP_SIZE;
		iova += BOUNCE_MAP_SIZE;
	}
	return 0;
}
@@ -127,12 +146,17 @@ static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
	u64 last = iova + size - 1;

	while (iova <= last) {
		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
		map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
		map->orig_phys = INVALID_PHYS_ADDR;
		iova += PAGE_SIZE;
		iova += BOUNCE_MAP_SIZE;
	}
}

static unsigned int offset_in_bounce_page(dma_addr_t addr)
{
	return (addr & ~BOUNCE_MAP_MASK);
}

static void do_bounce(phys_addr_t orig, void *addr, size_t size,
		      enum dma_data_direction dir)
{
@@ -163,7 +187,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
{
	struct vduse_bounce_map *map;
	struct page *page;
	unsigned int offset;
	unsigned int offset, head_offset;
	void *addr;
	size_t sz;

@@ -171,9 +195,10 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
		return;

	while (size) {
		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
		offset = offset_in_page(iova);
		sz = min_t(size_t, PAGE_SIZE - offset, size);
		map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
		head_offset = offset_in_page(iova);
		offset = offset_in_bounce_page(iova);
		sz = min_t(size_t, BOUNCE_MAP_SIZE - offset, size);

		if (WARN_ON(!map->bounce_page ||
			    map->orig_phys == INVALID_PHYS_ADDR))
@@ -183,7 +208,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
		       map->user_bounce_page : map->bounce_page;

		addr = kmap_local_page(page);
		do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
		do_bounce(map->orig_phys + offset, addr + head_offset, sz, dir);
		kunmap_local(addr);
		size -= sz;
		iova += sz;
@@ -218,7 +243,7 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
	struct page *page = NULL;

	read_lock(&domain->bounce_lock);
	map = &domain->bounce_maps[iova >> PAGE_SHIFT];
	map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
	if (domain->user_bounce_pages || !map->bounce_page)
		goto out;

@@ -236,7 +261,7 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
	struct vduse_bounce_map *map;
	unsigned long pfn, bounce_pfns;

	bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
	bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;

	for (pfn = 0; pfn < bounce_pfns; pfn++) {
		map = &domain->bounce_maps[pfn];
@@ -246,6 +271,7 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
		if (!map->bounce_page)
			continue;

		if (!((pfn << BOUNCE_MAP_SHIFT) & ~PAGE_MASK))
			__free_page(map->bounce_page);
		map->bounce_page = NULL;
	}
@@ -254,8 +280,12 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
				       struct page **pages, int count)
{
	struct vduse_bounce_map *map;
	int i, ret;
	struct vduse_bounce_map *map, *head_map;
	int i, j, ret;
	int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE;
	int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
	struct page *head_page = NULL;
	bool need_copy;

	/* Now we don't support partial mapping */
	if (count != (domain->bounce_size >> PAGE_SHIFT))
@@ -267,16 +297,23 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
		goto out;

	for (i = 0; i < count; i++) {
		map = &domain->bounce_maps[i];
		if (map->bounce_page) {
		need_copy = false;
		head_map = &domain->bounce_maps[(i * inner_pages)];
		head_page = head_map->bounce_page;
		for (j = 0; j < inner_pages; j++) {
			if ((i * inner_pages + j) >= bounce_pfns)
				break;
			map = &domain->bounce_maps[(i * inner_pages + j)];
			/* Copy kernel page to user page if it's in use */
			if (map->orig_phys != INVALID_PHYS_ADDR)
				memcpy_to_page(pages[i], 0,
					       page_address(map->bounce_page),
					       PAGE_SIZE);
		}
			if ((head_page) && (map->orig_phys != INVALID_PHYS_ADDR))
				need_copy = true;
			map->user_bounce_page = pages[i];
		}
		get_page(pages[i]);
		if ((head_page) && (need_copy))
			memcpy_to_page(pages[i], 0,
				       page_address(head_page),
				       PAGE_SIZE);
	}
	domain->user_bounce_pages = true;
	ret = 0;
@@ -288,8 +325,12 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,

void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
{
	struct vduse_bounce_map *map;
	unsigned long i, count;
	struct vduse_bounce_map *map, *head_map;
	unsigned long i, j, count;
	int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE;
	int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
	struct page *head_page = NULL;
	bool need_copy;

	write_lock(&domain->bounce_lock);
	if (!domain->user_bounce_pages)
@@ -297,21 +338,28 @@ void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)

	count = domain->bounce_size >> PAGE_SHIFT;
	for (i = 0; i < count; i++) {
		struct page *page = NULL;
		need_copy = false;
		head_map = &domain->bounce_maps[(i * inner_pages)];
		if (WARN_ON(!head_map->user_bounce_page))
			continue;
		head_page = head_map->user_bounce_page;

		map = &domain->bounce_maps[i];
		for (j = 0; j < inner_pages; j++) {
			if ((i * inner_pages + j) >= bounce_pfns)
				break;
			map = &domain->bounce_maps[(i * inner_pages + j)];
			if (WARN_ON(!map->user_bounce_page))
				continue;

			/* Copy user page to kernel page if it's in use */
		if (map->orig_phys != INVALID_PHYS_ADDR) {
			page = map->bounce_page;
			memcpy_from_page(page_address(page),
					 map->user_bounce_page, 0, PAGE_SIZE);
		}
		put_page(map->user_bounce_page);
			if ((map->orig_phys != INVALID_PHYS_ADDR) && (head_map->bounce_page))
				need_copy = true;
			map->user_bounce_page = NULL;
		}
		if (need_copy)
			memcpy_from_page(page_address(head_map->bounce_page),
					 head_page, 0, PAGE_SIZE);
		put_page(head_page);
	}
	domain->user_bounce_pages = false;
out:
	write_unlock(&domain->bounce_lock);
@@ -581,7 +629,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
	unsigned long pfn, bounce_pfns;
	int ret;

	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
	bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT;
	if (iova_limit <= bounce_size)
		return NULL;

@@ -613,7 +661,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
	rwlock_init(&domain->bounce_lock);
	spin_lock_init(&domain->iotlb_lock);
	init_iova_domain(&domain->stream_iovad,
			PAGE_SIZE, IOVA_START_PFN);
			BOUNCE_MAP_SIZE, IOVA_START_PFN);
	ret = iova_domain_init_rcaches(&domain->stream_iovad);
	if (ret)
		goto err_iovad_stream;
+5 −0
Original line number Diff line number Diff line
@@ -19,6 +19,11 @@

#define INVALID_PHYS_ADDR (~(phys_addr_t)0)

#define BOUNCE_MAP_SHIFT	12
#define BOUNCE_MAP_SIZE	(1 << BOUNCE_MAP_SHIFT)
#define BOUNCE_MAP_MASK	(~(BOUNCE_MAP_SIZE - 1))
#define BOUNCE_MAP_ALIGN(addr)	(((addr) + BOUNCE_MAP_SIZE - 1) & ~(BOUNCE_MAP_SIZE - 1))

struct vduse_bounce_map {
	struct page *bounce_page;
	struct page *user_bounce_page;