Commit 4cbee026 authored by Puranjay Mohan's avatar Puranjay Mohan Committed by Alexei Starovoitov
Browse files

bpf: return VMA snapshot from task_vma iterator



Holding the per-VMA lock across the BPF program body creates a lock
ordering problem when helpers acquire locks that depend on mmap_lock:

  vm_lock -> i_rwsem -> mmap_lock -> vm_lock

Snapshot the VMA under the per-VMA lock in _next() via memcpy(), then
drop the lock before returning. The BPF program accesses only the
snapshot.

The verifier only trusts vm_mm and vm_file pointers (see
BTF_TYPE_SAFE_TRUSTED_OR_NULL in verifier.c). vm_file is reference-
counted with get_file() under the lock and released via fput() on the
next iteration or in _destroy(). vm_mm is already correct because
lock_vma_under_rcu() verifies vma->vm_mm == mm. All other pointers
are left as-is by memcpy() since the verifier treats them as untrusted.

Fixes: 4ac45468 ("bpf: Introduce task_vma open-coded iterator kfuncs")
Signed-off-by: default avatarPuranjay Mohan <puranjay@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarMykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20260408154539.3832150-4-puranjay@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent bee9ef4a
Loading
Loading
Loading
Loading
+30 −12
Original line number Diff line number Diff line
@@ -808,7 +808,7 @@ static inline void bpf_iter_mmput_async(struct mm_struct *mm)
struct bpf_iter_task_vma_kern_data {
	struct task_struct *task;
	struct mm_struct *mm;
	struct vm_area_struct *locked_vma;
	struct vm_area_struct snapshot;
	u64 next_addr;
};

@@ -842,7 +842,7 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,

	/*
	 * Reject irqs-disabled contexts including NMI. Operations used
	 * by _next() and _destroy() (vma_end_read, bpf_iter_mmput_async)
	 * by _next() and _destroy() (vma_end_read, fput, bpf_iter_mmput_async)
	 * can take spinlocks with IRQs disabled (pi_lock, pool->lock).
	 * Running from NMI or from a tracepoint that fires with those
	 * locks held could deadlock.
@@ -885,7 +885,7 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
		goto err_cleanup_iter;
	}

	kit->data->locked_vma = NULL;
	kit->data->snapshot.vm_file = NULL;
	kit->data->next_addr = addr;
	return 0;

@@ -947,26 +947,45 @@ bpf_iter_task_vma_find_next(struct bpf_iter_task_vma_kern_data *data)
	return vma;
}

static void bpf_iter_task_vma_snapshot_reset(struct vm_area_struct *snap)
{
	if (snap->vm_file) {
		fput(snap->vm_file);
		snap->vm_file = NULL;
	}
}

__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it)
{
	struct bpf_iter_task_vma_kern *kit = (void *)it;
	struct vm_area_struct *vma;
	struct vm_area_struct *snap, *vma;

	if (!kit->data) /* bpf_iter_task_vma_new failed */
		return NULL;

	if (kit->data->locked_vma) {
		vma_end_read(kit->data->locked_vma);
		kit->data->locked_vma = NULL;
	}
	snap = &kit->data->snapshot;

	bpf_iter_task_vma_snapshot_reset(snap);

	vma = bpf_iter_task_vma_find_next(kit->data);
	if (!vma)
		return NULL;

	kit->data->locked_vma = vma;
	memcpy(snap, vma, sizeof(*snap));

	/*
	 * The verifier only trusts vm_mm and vm_file (see
	 * BTF_TYPE_SAFE_TRUSTED_OR_NULL in verifier.c). Take a reference
	 * on vm_file; vm_mm is already correct because lock_vma_under_rcu()
	 * verifies vma->vm_mm == mm. All other pointers are untrusted by
	 * the verifier and left as-is.
	 */
	if (snap->vm_file)
		get_file(snap->vm_file);

	kit->data->next_addr = vma->vm_end;
	return vma;
	vma_end_read(vma);
	return snap;
}

__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
@@ -974,8 +993,7 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
	struct bpf_iter_task_vma_kern *kit = (void *)it;

	if (kit->data) {
		if (kit->data->locked_vma)
			vma_end_read(kit->data->locked_vma);
		bpf_iter_task_vma_snapshot_reset(&kit->data->snapshot);
		put_task_struct(kit->data->task);
		bpf_iter_mmput_async(kit->data->mm);
		bpf_mem_free(&bpf_global_ma, kit->data);