sched: Compact RSEQ concurrency IDs with reduced threads and affinity (02d954c0) · Commits · git / linux-net

include/linux/mm_types.h

+4 −3

Original line number	Diff line number	Diff line
		@@ -875,10 +875,11 @@ struct mm_struct {
		*/
		unsigned int nr_cpus_allowed;
		/**
		* @max_nr_cid: Maximum number of concurrency IDs allocated.
		* @max_nr_cid: Maximum number of allowed concurrency
		* IDs allocated.
		*
		* Track the highest number of concurrency IDs allocated for the
		* mm.
		* Track the highest number of allowed concurrency IDs
		* allocated for the mm.
		*/
		atomic_t max_nr_cid;
		/**

kernel/sched/sched.h

+22 −3

Original line number	Diff line number	Diff line
		@@ -3698,10 +3698,28 @@ static inline int __mm_cid_try_get(struct task_struct t, struct mm_struct mm)
		{
		struct cpumask *cidmask = mm_cidmask(mm);
		struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
		int cid = __this_cpu_read(pcpu_cid->recent_cid);
		int cid, max_nr_cid, allowed_max_nr_cid;

		/*
		* After shrinking the number of threads or reducing the number
		* of allowed cpus, reduce the value of max_nr_cid so expansion
		* of cid allocation will preserve cache locality if the number
		* of threads or allowed cpus increase again.
		*/
		max_nr_cid = atomic_read(&mm->max_nr_cid);
		while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed),
		atomic_read(&mm->mm_users))),
		max_nr_cid > allowed_max_nr_cid) {
		/* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */
		if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) {
		max_nr_cid = allowed_max_nr_cid;
		break;
		}
		}
		/* Try to re-use recent cid. This improves cache locality. */
		if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask))
		cid = __this_cpu_read(pcpu_cid->recent_cid);
		if (!mm_cid_is_unset(cid) && cid < max_nr_cid &&
		!cpumask_test_and_set_cpu(cid, cidmask))
		return cid;
		/*
		* Expand cid allocation if the maximum number of concurrency
		@@ -3709,8 +3727,9 @@ static inline int __mm_cid_try_get(struct task_struct t, struct mm_struct mm)
		* and number of threads. Expanding cid allocation as much as
		* possible improves cache locality.
		*/
		cid = atomic_read(&mm->max_nr_cid);
		cid = max_nr_cid;
		while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {
		/* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */
		if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))
		continue;
		if (!cpumask_test_and_set_cpu(cid, cidmask))