Commit eb449bd9 authored by Suren Baghdasaryan's avatar Suren Baghdasaryan Committed by Peter Zijlstra
Browse files

mm: convert mm_lock_seq to a proper seqcount



Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock
variants to increment it, in-line with the usual seqcount usage pattern.
This lets us check whether the mmap_lock is write-locked by checking
mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be
used when implementing mmap_lock speculation functions.
As a result vm_lock_seq is also change to be unsigned to match the type
of mm_lock_seq.sequence.

Suggested-by: default avatarPeter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarSuren Baghdasaryan <surenb@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarLiam R. Howlett <Liam.Howlett@Oracle.com>
Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@google.com
parent 75285852
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -710,7 +710,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
	 * we don't rely on for anything - the mm_lock_seq read against which we
	 * need ordering is below.
	 */
	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
		return false;

	if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -727,7 +727,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
	 * after it has been unlocked.
	 * This pairs with RELEASE semantics in vma_end_write_all().
	 */
	if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
		up_read(&vma->vm_lock->lock);
		return false;
	}
@@ -742,7 +742,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
}

/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
{
	mmap_assert_write_locked(vma->vm_mm);

@@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
	 * mm->mm_lock_seq can't be concurrently modified.
	 */
	*mm_lock_seq = vma->vm_mm->mm_lock_seq;
	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
	return (vma->vm_lock_seq == *mm_lock_seq);
}

@@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
 */
static inline void vma_start_write(struct vm_area_struct *vma)
{
	int mm_lock_seq;
	unsigned int mm_lock_seq;

	if (__is_vma_write_locked(vma, &mm_lock_seq))
		return;
@@ -779,7 +779,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)

static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{
	int mm_lock_seq;
	unsigned int mm_lock_seq;

	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
}
+5 −2
Original line number Diff line number Diff line
@@ -697,7 +697,7 @@ struct vm_area_struct {
	 * counter reuse can only lead to occasional unnecessary use of the
	 * slowpath.
	 */
	int vm_lock_seq;
	unsigned int vm_lock_seq;
	/* Unstable RCU readers are allowed to read this. */
	struct vma_lock *vm_lock;
#endif
@@ -891,6 +891,9 @@ struct mm_struct {
		 * Roughly speaking, incrementing the sequence number is
		 * equivalent to releasing locks on VMAs; reading the sequence
		 * number can be part of taking a read lock on a VMA.
		 * Incremented every time mmap_lock is write-locked/unlocked.
		 * Initialized to 0, therefore odd values indicate mmap_lock
		 * is write-locked and even values that it's released.
		 *
		 * Can be modified under write mmap_lock using RELEASE
		 * semantics.
@@ -899,7 +902,7 @@ struct mm_struct {
		 * Can be read with ACQUIRE semantics if not holding write
		 * mmap_lock.
		 */
		int mm_lock_seq;
		seqcount_t mm_lock_seq;
#endif


+36 −19
Original line number Diff line number Diff line
@@ -71,39 +71,39 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
}

#ifdef CONFIG_PER_VMA_LOCK
/*
 * Drop all currently-held per-VMA locks.
 * This is called from the mmap_lock implementation directly before releasing
 * a write-locked mmap_lock (or downgrading it to read-locked).
 * This should normally NOT be called manually from other places.
 * If you want to call this manually anyway, keep in mind that this will release
 * *all* VMA write locks, including ones from further up the stack.
 */
static inline void vma_end_write_all(struct mm_struct *mm)
static inline void mm_lock_seqcount_init(struct mm_struct *mm)
{
	mmap_assert_write_locked(mm);
	/*
	 * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
	 * mmap_lock being held.
	 * We need RELEASE semantics here to ensure that preceding stores into
	 * the VMA take effect before we unlock it with this store.
	 * Pairs with ACQUIRE semantics in vma_start_read().
	 */
	smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
	seqcount_init(&mm->mm_lock_seq);
}

static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
{
	do_raw_write_seqcount_begin(&mm->mm_lock_seq);
}

static inline void mm_lock_seqcount_end(struct mm_struct *mm)
{
	ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
	do_raw_write_seqcount_end(&mm->mm_lock_seq);
}

#else
static inline void vma_end_write_all(struct mm_struct *mm) {}
static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
#endif

static inline void mmap_init_lock(struct mm_struct *mm)
{
	init_rwsem(&mm->mmap_lock);
	mm_lock_seqcount_init(mm);
}

static inline void mmap_write_lock(struct mm_struct *mm)
{
	__mmap_lock_trace_start_locking(mm, true);
	down_write(&mm->mmap_lock);
	mm_lock_seqcount_begin(mm);
	__mmap_lock_trace_acquire_returned(mm, true, true);
}

@@ -111,6 +111,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
{
	__mmap_lock_trace_start_locking(mm, true);
	down_write_nested(&mm->mmap_lock, subclass);
	mm_lock_seqcount_begin(mm);
	__mmap_lock_trace_acquire_returned(mm, true, true);
}

@@ -120,10 +121,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)

	__mmap_lock_trace_start_locking(mm, true);
	ret = down_write_killable(&mm->mmap_lock);
	if (!ret)
		mm_lock_seqcount_begin(mm);
	__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
	return ret;
}

/*
 * Drop all currently-held per-VMA locks.
 * This is called from the mmap_lock implementation directly before releasing
 * a write-locked mmap_lock (or downgrading it to read-locked).
 * This should normally NOT be called manually from other places.
 * If you want to call this manually anyway, keep in mind that this will release
 * *all* VMA write locks, including ones from further up the stack.
 */
static inline void vma_end_write_all(struct mm_struct *mm)
{
	mmap_assert_write_locked(mm);
	mm_lock_seqcount_end(mm);
}

static inline void mmap_write_unlock(struct mm_struct *mm)
{
	__mmap_lock_trace_released(mm, true);
+1 −4
Original line number Diff line number Diff line
@@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
		return false;

	init_rwsem(&vma->vm_lock->lock);
	vma->vm_lock_seq = -1;
	vma->vm_lock_seq = UINT_MAX;

	return true;
}
@@ -1267,9 +1267,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
	seqcount_init(&mm->write_protect_seq);
	mmap_init_lock(mm);
	INIT_LIST_HEAD(&mm->mmlist);
#ifdef CONFIG_PER_VMA_LOCK
	mm->mm_lock_seq = 0;
#endif
	mm_pgtables_bytes_init(mm);
	mm->map_count = 0;
	mm->locked_vm = 0;
+1 −1
Original line number Diff line number Diff line
@@ -40,7 +40,7 @@ struct mm_struct init_mm = {
	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
#ifdef CONFIG_PER_VMA_LOCK
	.mm_lock_seq	= 0,
	.mm_lock_seq	= SEQCNT_ZERO(init_mm.mm_lock_seq),
#endif
	.user_ns	= &init_user_ns,
	.cpu_bitmap	= CPU_BITS_NONE,
Loading