Commit 309e02dc authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

ublk: avoid unpinning pages under maple tree spinlock



ublk_shmem_remove_ranges() calls unpin_user_pages() while holding the
maple tree spinlock (mas_lock). Although unpin_user_pages() is safe in
atomic context, holding the spinlock across potentially many page
unpinning operations is not ideal.

Split into __ublk_shmem_remove_ranges() which erases up to 64 ranges
under mas_lock, collecting base_pfn and nr_pages into a temporary
xarray. Then drop the lock and unpin pages outside spinlock context.
ublk_shmem_remove_ranges() loops until all matching ranges are
processed.

Signed-off-by: default avatarMing Lei <tom.leiming@gmail.com>
Link: https://patch.msgid.link/20260423033058.2805135-4-tom.leiming@gmail.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ea1db795
Loading
Loading
Loading
Loading
+46 −10
Original line number Diff line number Diff line
@@ -5441,32 +5441,68 @@ static void ublk_unpin_range_pages(unsigned long base_pfn,
}

/*
 * Remove ranges from the maple tree matching buf_index, unpin pages
 * and free range structs. If buf_index < 0, remove all ranges.
 * Inner loop: erase up to UBLK_REMOVE_BATCH matching ranges under
 * mas_lock, collecting them into an xarray. Then drop the lock and
 * unpin pages + free ranges outside spinlock context.
 *
 * Returns true if the tree walk completed, false if more ranges remain.
 * Xarray key is the base PFN, value encodes nr_pages via xa_mk_value().
 */
static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index)
#define UBLK_REMOVE_BATCH	64

static bool __ublk_shmem_remove_ranges(struct ublk_device *ub,
					int buf_index, int *ret)
{
	MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
	struct ublk_buf_range *range;
	int ret = -ENOENT;
	struct xarray to_unpin;
	unsigned long idx;
	unsigned int count = 0;
	bool done = false;
	void *entry;

	xa_init(&to_unpin);

	mas_lock(&mas);
	mas_for_each(&mas, range, ULONG_MAX) {
		unsigned long base, nr;
		unsigned long nr;

		if (buf_index >= 0 && range->buf_index != buf_index)
			continue;

		ret = 0;
		base = mas.index;
		nr = mas.last - base + 1;
		*ret = 0;
		nr = mas.last - mas.index + 1;
		if (xa_err(xa_store(&to_unpin, mas.index,
				    xa_mk_value(nr), GFP_ATOMIC)))
			goto unlock;
		mas_erase(&mas);

		ublk_unpin_range_pages(base, nr);
		kfree(range);
		if (++count >= UBLK_REMOVE_BATCH)
			goto unlock;
	}
	done = true;
unlock:
	mas_unlock(&mas);

	xa_for_each(&to_unpin, idx, entry)
		ublk_unpin_range_pages(idx, xa_to_value(entry));
	xa_destroy(&to_unpin);

	return done;
}

/*
 * Remove ranges from the maple tree matching buf_index, unpin pages
 * and free range structs. If buf_index < 0, remove all ranges.
 * Processes ranges in batches to avoid holding the maple tree spinlock
 * across potentially expensive page unpinning.
 */
static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index)
{
	int ret = -ENOENT;

	while (!__ublk_shmem_remove_ranges(ub, buf_index, &ret))
		cond_resched();
	return ret;
}