Commit c69df06e authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

perf/core: Fix deadlock in perf_mmap() failure path



Ian noted that commit 77de62ad ("perf/core: Fix refcount bug and
potential UAF in perf_mmap") would cause a deadlock due to
event->mmap_mutex recursion.

This happens because we're now calling perf_mmap_close() under
mmap_mutex, while that function itself can also take mmap_mutex.

Solve this by noting that perf_mmap_close() is far more complicated
than we need at this particular point, since it deals with scenarios
that cannot happen in this particular case.

Replace the call to perf_mmap_close() with a very narrow undo for the
case of first-exposure. If this is not the first mmap(), there is no
race and it is fine to drop the lock and call perf_mmap_close() to
handle to more complicated scenarios.

Note: move the rb->mmap_user (namespace) handling into the rb
init/free code such that it does not complicate the mmap handling.

Fixes: 77de62ad ("perf/core: Fix refcount bug and potential UAF in perf_mmap")
Reported-by: default avatarIan Rogers <irogers@google.com>
Closes: https://patch.msgid.link/CAP-5%3DfVJyVMZw%3DDqP53Kxg58nUmJ_0bxoaeOKAbC03BVc11HaA%40mail.gmail.com


Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260326112821.GK3738786@noisy.programming.kicks-ass.net
parent 7fd2df20
Loading
Loading
Loading
Loading
+55 −15
Original line number Diff line number Diff line
@@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
}

static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);

/*
 * A buffer can be mmap()ed multiple times; either directly through the same
@@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
	mapped_f unmapped = get_mapped(event, event_unmapped);
	struct perf_buffer *rb = ring_buffer_get(event);
	struct user_struct *mmap_user = rb->mmap_user;
	int mmap_locked = rb->mmap_locked;
	unsigned long size = perf_data_size(rb);
	bool detach_rest = false;

	/* FIXIES vs perf_pmu_unregister() */
@@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
	 * Aside from that, this buffer is 'fully' detached and unmapped,
	 * undo the VM accounting.
	 */

	atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
			&mmap_user->locked_vm);
	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
	free_uid(mmap_user);
	perf_mmap_unaccount(vma, rb);

out_put:
	ring_buffer_put(rb); /* could be last */
@@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
	atomic64_add(extra, &vma->vm_mm->pinned_vm);
}

static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
{
	struct user_struct *user = rb->mmap_user;

	atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
			&user->locked_vm);
	atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
}

static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
			unsigned long nr_pages)
{
@@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
	if (!rb)
		return -ENOMEM;

	refcount_set(&rb->mmap_count, 1);
	rb->mmap_user = get_current_user();
	rb->mmap_locked = extra;

	ring_buffer_attach(event, rb);
@@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
			mapped(event, vma->vm_mm);

		/*
		 * Try to map it into the page table. On fail, invoke
		 * perf_mmap_close() to undo the above, as the callsite expects
		 * full cleanup in this case and therefore does not invoke
		 * vmops::close().
		 * Try to map it into the page table. On fail undo the above,
		 * as the callsite expects full cleanup in this case and
		 * therefore does not invoke vmops::close().
		 */
		ret = map_range(event->rb, vma);
		if (ret)
			perf_mmap_close(vma);
		if (likely(!ret))
			return 0;

		/* Error path */

		/*
		 * If this is the first mmap(), then event->mmap_count should
		 * be stable at 1. It is only modified by:
		 * perf_mmap_{open,close}() and perf_mmap().
		 *
		 * The former are not possible because this mmap() hasn't been
		 * successful yet, and the latter is serialized by
		 * event->mmap_mutex which we still hold (note that mmap_lock
		 * is not strictly sufficient here, because the event fd can
		 * be passed to another process through trivial means like
		 * fork(), leading to concurrent mmap() from different mm).
		 *
		 * Make sure to remove event->rb before releasing
		 * event->mmap_mutex, such that any concurrent mmap() will not
		 * attempt use this failed buffer.
		 */
		if (refcount_read(&event->mmap_count) == 1) {
			/*
			 * Minimal perf_mmap_close(); there can't be AUX or
			 * other events on account of this being the first.
			 */
			mapped = get_mapped(event, event_unmapped);
			if (mapped)
				mapped(event, vma->vm_mm);
			perf_mmap_unaccount(vma, event->rb);
			ring_buffer_attach(event, NULL);	/* drops last rb->refcount */
			refcount_set(&event->mmap_count, 0);
			return ret;
		}

		/*
		 * Otherwise this is an already existing buffer, and there is
		 * no race vs first exposure, so fall-through and call
		 * perf_mmap_close().
		 */
	}

	perf_mmap_close(vma);
	return ret;
}

+1 −0
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
	struct perf_buffer *rb;

	rb = container_of(rcu_head, struct perf_buffer, rcu_head);
	free_uid(rb->mmap_user);
	rb_free(rb);
}

+2 −0
Original line number Diff line number Diff line
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
		rb->paused = 1;

	mutex_init(&rb->aux_mutex);
	rb->mmap_user = get_current_user();
	refcount_set(&rb->mmap_count, 1);
}

void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)