Commit 360c35f8 authored by Puranjay Mohan's avatar Puranjay Mohan Committed by Alexei Starovoitov
Browse files

bpf: arena: use kmalloc_nolock() in place of kvcalloc()



To make arena_alloc_pages() safe to be called from any context, replace
kvcalloc() with kmalloc_nolock() so as it doesn't sleep or take any
locks. kmalloc_nolock() returns NULL for allocations larger than
KMALLOC_MAX_CACHE_SIZE, which is (PAGE_SIZE * 2) = 8KB on systems with
4KB pages. So, round down the allocation done by kmalloc_nolock to 1024
* 8 and reuse the array in a loop.

Signed-off-by: default avatarPuranjay Mohan <puranjay@kernel.org>
Link: https://lore.kernel.org/r/20251222195022.431211-3-puranjay@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent c336b0b3
Loading
Loading
Loading
Loading
+55 −29
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)

static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt);

struct bpf_arena {
	struct bpf_map map;
	u64 user_vm_start;
@@ -500,8 +502,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
	/* user_vm_end/start are fixed before bpf prog runs */
	long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
	u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
	struct apply_range_data data;
	struct page **pages = NULL;
	long mapped = 0;
	long remaining, mapped = 0;
	long alloc_pages;
	long pgoff = 0;
	u32 uaddr32;
	int ret, i;
@@ -518,17 +522,19 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
			return 0;
	}

	/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
	pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
	/* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
	alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
	pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
	if (!pages)
		return 0;
	data.pages = pages;

	mutex_lock(&arena->lock);

	if (uaddr) {
		ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
		if (ret)
			goto out_free_pages;
			goto out_unlock_free_pages;
		ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
	} else {
		ret = pgoff = range_tree_find(&arena->rt, page_cnt);
@@ -536,40 +542,60 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
			ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
	}
	if (ret)
		goto out_free_pages;
		goto out_unlock_free_pages;

	remaining = page_cnt;
	uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);

	struct apply_range_data data = { .pages = pages, .i = 0 };
	ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
	while (remaining) {
		long this_batch = min(remaining, alloc_pages);

		/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
		memset(pages, 0, this_batch * sizeof(struct page *));

		ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
		if (ret)
			goto out;

	uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
	/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
		/*
		 * Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
		 * will not overflow 32-bit. Lower 32-bit need to represent
		 * contiguous user address range.
		 * Map these pages at kern_vm_start base.
		 * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
		 * lower 32-bit and it's ok.
		 */
	apply_to_page_range(&init_mm, kern_vm_start + uaddr32,
			    page_cnt << PAGE_SHIFT, apply_range_set_cb, &data);
	mapped = data.i;
	flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
	if (mapped < page_cnt) {
		for (i = mapped; i < page_cnt; i++)
		data.i = 0;
		ret = apply_to_page_range(&init_mm,
					  kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
					  this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
		if (ret) {
			/* data.i pages were mapped, account them and free the remaining */
			mapped += data.i;
			for (i = data.i; i < this_batch; i++)
				__free_page(pages[i]);
			goto out;
		}

		mapped += this_batch;
		remaining -= this_batch;
	}
	flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
	mutex_unlock(&arena->lock);
	kvfree(pages);
	kfree_nolock(pages);
	return clear_lo32(arena->user_vm_start) + uaddr32;
out:
	range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
out_free_pages:
	mutex_unlock(&arena->lock);
	if (mapped)
	if (mapped) {
		flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
		arena_free_pages(arena, uaddr32, mapped);
	kvfree(pages);
	}
	goto out_free_pages;
out_unlock_free_pages:
	mutex_unlock(&arena->lock);
out_free_pages:
	kfree_nolock(pages);
	return 0;
}