mm: limit the scope of vma_start_read() (cc483b32) · Commits · git / linux-net

include/linux/mmap_lock.h

+0 −85

Original line number	Diff line number	Diff line
		@@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
		}
		}

		/*
		* Try to read-lock a vma. The function is allowed to occasionally yield false
		* locked result to avoid performance overhead, in which case we fall back to
		* using mmap_lock. The function should never yield false unlocked result.
		* False locked result is possible if mm_lock_seq overflows or if vma gets
		* reused and attached to a different mm before we lock it.
		* Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
		* detached.
		*
		* WARNING! The vma passed to this function cannot be used if the function
		* fails to lock it because in certain cases RCU lock is dropped and then
		* reacquired. Once RCU lock is dropped the vma can be concurently freed.
		*/
		static inline struct vm_area_struct vma_start_read(struct mm_struct mm,
		struct vm_area_struct *vma)
		{
		int oldcnt;

		/*
		* Check before locking. A race might cause false locked result.
		* We can use READ_ONCE() for the mm_lock_seq here, and don't need
		* ACQUIRE semantics, because this is just a lockless check whose result
		* we don't rely on for anything - the mm_lock_seq read against which we
		* need ordering is below.
		*/
		if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
		return NULL;

		/*
		* If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
		* will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
		* Acquire fence is required here to avoid reordering against later
		* vm_lock_seq check and checks inside lock_vma_under_rcu().
		*/
		if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
		VMA_REF_LIMIT))) {
		/* return EAGAIN if vma got detached from under us */
		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
		}

		rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);

		/*
		* If vma got attached to another mm from under us, that mm is not
		* stable and can be freed in the narrow window after vma->vm_refcnt
		* is dropped and before rcuwait_wake_up(mm) is called. Grab it before
		* releasing vma->vm_refcnt.
		*/
		if (unlikely(vma->vm_mm != mm)) {
		/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
		struct mm_struct *other_mm = vma->vm_mm;

		/*
		* __mmdrop() is a heavy operation and we don't need RCU
		* protection here. Release RCU lock during these operations.
		* We reinstate the RCU read lock as the caller expects it to
		* be held when this function returns even on error.
		*/
		rcu_read_unlock();
		mmgrab(other_mm);
		vma_refcount_put(vma);
		mmdrop(other_mm);
		rcu_read_lock();
		return NULL;
		}

		/*
		* Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
		* False unlocked result is impossible because we modify and check
		* vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
		* modification invalidates all existing locks.
		*
		* We must use ACQUIRE semantics for the mm_lock_seq so that if we are
		* racing with vma_end_write_all(), we only start reading from the VMA
		* after it has been unlocked.
		* This pairs with RELEASE semantics in vma_end_write_all().
		*/
		if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
		vma_refcount_put(vma);
		return NULL;
		}

		return vma;
		}

		/*
		* Use only while holding mmap read lock which guarantees that locking will not
		* fail (nobody can concurrently write-lock the vma). vma_start_read() should

mm/mmap_lock.c

+85 −0

Original line number	Diff line number	Diff line
		@@ -127,6 +127,91 @@ void vma_mark_detached(struct vm_area_struct *vma)
		}
		}

		/*
		* Try to read-lock a vma. The function is allowed to occasionally yield false
		* locked result to avoid performance overhead, in which case we fall back to
		* using mmap_lock. The function should never yield false unlocked result.
		* False locked result is possible if mm_lock_seq overflows or if vma gets
		* reused and attached to a different mm before we lock it.
		* Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
		* detached.
		*
		* WARNING! The vma passed to this function cannot be used if the function
		* fails to lock it because in certain cases RCU lock is dropped and then
		* reacquired. Once RCU lock is dropped the vma can be concurently freed.
		*/
		static inline struct vm_area_struct vma_start_read(struct mm_struct mm,
		struct vm_area_struct *vma)
		{
		int oldcnt;

		/*
		* Check before locking. A race might cause false locked result.
		* We can use READ_ONCE() for the mm_lock_seq here, and don't need
		* ACQUIRE semantics, because this is just a lockless check whose result
		* we don't rely on for anything - the mm_lock_seq read against which we
		* need ordering is below.
		*/
		if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
		return NULL;

		/*
		* If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
		* will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
		* Acquire fence is required here to avoid reordering against later
		* vm_lock_seq check and checks inside lock_vma_under_rcu().
		*/
		if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
		VMA_REF_LIMIT))) {
		/* return EAGAIN if vma got detached from under us */
		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
		}

		rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);

		/*
		* If vma got attached to another mm from under us, that mm is not
		* stable and can be freed in the narrow window after vma->vm_refcnt
		* is dropped and before rcuwait_wake_up(mm) is called. Grab it before
		* releasing vma->vm_refcnt.
		*/
		if (unlikely(vma->vm_mm != mm)) {
		/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
		struct mm_struct *other_mm = vma->vm_mm;

		/*
		* __mmdrop() is a heavy operation and we don't need RCU
		* protection here. Release RCU lock during these operations.
		* We reinstate the RCU read lock as the caller expects it to
		* be held when this function returns even on error.
		*/
		rcu_read_unlock();
		mmgrab(other_mm);
		vma_refcount_put(vma);
		mmdrop(other_mm);
		rcu_read_lock();
		return NULL;
		}

		/*
		* Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
		* False unlocked result is impossible because we modify and check
		* vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
		* modification invalidates all existing locks.
		*
		* We must use ACQUIRE semantics for the mm_lock_seq so that if we are
		* racing with vma_end_write_all(), we only start reading from the VMA
		* after it has been unlocked.
		* This pairs with RELEASE semantics in vma_end_write_all().
		*/
		if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
		vma_refcount_put(vma);
		return NULL;
		}

		return vma;
		}

		/*
		* Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
		* stable and not isolated. If the VMA is not found or is being modified the