Commit cc483b32 authored by Suren Baghdasaryan's avatar Suren Baghdasaryan Committed by Andrew Morton
Browse files

mm: limit the scope of vma_start_read()

Limit the scope of vma_start_read() as it is used only as a helper for
higher-level locking functions implemented inside mmap_lock.c and we are
about to introduce more complex RCU rules for this function.  The change
is pure code refactoring and has no functional changes.

Link: https://lkml.kernel.org/r/20250804233349.1278678-1-surenb@google.com


Suggested-by: default avatarVlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarSuren Baghdasaryan <surenb@google.com>
Reviewed-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 35edbaa0
Loading
Loading
Loading
Loading
+0 −85
Original line number Diff line number Diff line
@@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
	}
}

/*
 * Try to read-lock a vma. The function is allowed to occasionally yield false
 * locked result to avoid performance overhead, in which case we fall back to
 * using mmap_lock. The function should never yield false unlocked result.
 * False locked result is possible if mm_lock_seq overflows or if vma gets
 * reused and attached to a different mm before we lock it.
 * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
 * detached.
 *
 * WARNING! The vma passed to this function cannot be used if the function
 * fails to lock it because in certain cases RCU lock is dropped and then
 * reacquired. Once RCU lock is dropped the vma can be concurently freed.
 */
static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
						    struct vm_area_struct *vma)
{
	int oldcnt;

	/*
	 * Check before locking. A race might cause false locked result.
	 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
	 * ACQUIRE semantics, because this is just a lockless check whose result
	 * we don't rely on for anything - the mm_lock_seq read against which we
	 * need ordering is below.
	 */
	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
		return NULL;

	/*
	 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
	 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
	 * Acquire fence is required here to avoid reordering against later
	 * vm_lock_seq check and checks inside lock_vma_under_rcu().
	 */
	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
							      VMA_REF_LIMIT))) {
		/* return EAGAIN if vma got detached from under us */
		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
	}

	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);

	/*
	 * If vma got attached to another mm from under us, that mm is not
	 * stable and can be freed in the narrow window after vma->vm_refcnt
	 * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
	 * releasing vma->vm_refcnt.
	 */
	if (unlikely(vma->vm_mm != mm)) {
		/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
		struct mm_struct *other_mm = vma->vm_mm;

		/*
		 * __mmdrop() is a heavy operation and we don't need RCU
		 * protection here. Release RCU lock during these operations.
		 * We reinstate the RCU read lock as the caller expects it to
		 * be held when this function returns even on error.
		 */
		rcu_read_unlock();
		mmgrab(other_mm);
		vma_refcount_put(vma);
		mmdrop(other_mm);
		rcu_read_lock();
		return NULL;
	}

	/*
	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
	 * False unlocked result is impossible because we modify and check
	 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
	 * modification invalidates all existing locks.
	 *
	 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
	 * racing with vma_end_write_all(), we only start reading from the VMA
	 * after it has been unlocked.
	 * This pairs with RELEASE semantics in vma_end_write_all().
	 */
	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
		vma_refcount_put(vma);
		return NULL;
	}

	return vma;
}

/*
 * Use only while holding mmap read lock which guarantees that locking will not
 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+85 −0
Original line number Diff line number Diff line
@@ -127,6 +127,91 @@ void vma_mark_detached(struct vm_area_struct *vma)
	}
}

/*
 * Try to read-lock a vma. The function is allowed to occasionally yield false
 * locked result to avoid performance overhead, in which case we fall back to
 * using mmap_lock. The function should never yield false unlocked result.
 * False locked result is possible if mm_lock_seq overflows or if vma gets
 * reused and attached to a different mm before we lock it.
 * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
 * detached.
 *
 * WARNING! The vma passed to this function cannot be used if the function
 * fails to lock it because in certain cases RCU lock is dropped and then
 * reacquired. Once RCU lock is dropped the vma can be concurently freed.
 */
static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
						    struct vm_area_struct *vma)
{
	int oldcnt;

	/*
	 * Check before locking. A race might cause false locked result.
	 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
	 * ACQUIRE semantics, because this is just a lockless check whose result
	 * we don't rely on for anything - the mm_lock_seq read against which we
	 * need ordering is below.
	 */
	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
		return NULL;

	/*
	 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
	 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
	 * Acquire fence is required here to avoid reordering against later
	 * vm_lock_seq check and checks inside lock_vma_under_rcu().
	 */
	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
							      VMA_REF_LIMIT))) {
		/* return EAGAIN if vma got detached from under us */
		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
	}

	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);

	/*
	 * If vma got attached to another mm from under us, that mm is not
	 * stable and can be freed in the narrow window after vma->vm_refcnt
	 * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
	 * releasing vma->vm_refcnt.
	 */
	if (unlikely(vma->vm_mm != mm)) {
		/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
		struct mm_struct *other_mm = vma->vm_mm;

		/*
		 * __mmdrop() is a heavy operation and we don't need RCU
		 * protection here. Release RCU lock during these operations.
		 * We reinstate the RCU read lock as the caller expects it to
		 * be held when this function returns even on error.
		 */
		rcu_read_unlock();
		mmgrab(other_mm);
		vma_refcount_put(vma);
		mmdrop(other_mm);
		rcu_read_lock();
		return NULL;
	}

	/*
	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
	 * False unlocked result is impossible because we modify and check
	 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
	 * modification invalidates all existing locks.
	 *
	 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
	 * racing with vma_end_write_all(), we only start reading from the VMA
	 * after it has been unlocked.
	 * This pairs with RELEASE semantics in vma_end_write_all().
	 */
	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
		vma_refcount_put(vma);
		return NULL;
	}

	return vma;
}

/*
 * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
 * stable and not isolated. If the VMA is not found or is being modified the