Commit 58624e4b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "Cpuset fixes:

   - Fix isolated CPUs leaking into sched domains

   - Remove now unnecessary kernfs active break which can trigger a
     warning

   - Comment updates"

* tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/cpuset: remove kernfs active break
  cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains
  cgroup/cpuset: Remove stale text
parents 257a8be4 3cb97a92
Loading
Loading
Loading
Loading
+11 −33
Original line number Diff line number Diff line
@@ -197,10 +197,8 @@ static struct cpuset top_cpuset = {

/*
 * There are two global locks guarding cpuset structures - cpuset_mutex and
 * callback_lock. We also require taking task_lock() when dereferencing a
 * task's cpuset pointer. See "The task_lock() exception", at the end of this
 * comment.  The cpuset code uses only cpuset_mutex. Other kernel subsystems
 * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
 * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel
 * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
 * structures. Note that cpuset_mutex needs to be a mutex as it is used in
 * paths that rely on priority inheritance (e.g. scheduler - on RT) for
 * correctness.
@@ -229,9 +227,6 @@ static struct cpuset top_cpuset = {
 * The cpuset_common_seq_show() handlers only hold callback_lock across
 * small pieces of code, such as when reading out possibly multi-word
 * cpumasks and nodemasks.
 *
 * Accessing a task's cpuset should be done in accordance with the
 * guidelines for accessing subsystem state in kernel/cgroup.c
 */

static DEFINE_MUTEX(cpuset_mutex);
@@ -890,6 +885,14 @@ static int generate_sched_domains(cpumask_var_t **domains,
	 */
	if (cgrpv2) {
		for (i = 0; i < ndoms; i++) {
			/*
			 * The top cpuset may contain some boot time isolated
			 * CPUs that need to be excluded from the sched domain.
			 */
			if (csa[i] == &top_cpuset)
				cpumask_and(doms[i], csa[i]->effective_cpus,
					    housekeeping_cpumask(HK_TYPE_DOMAIN));
			else
				cpumask_copy(doms[i], csa[i]->effective_cpus);
			if (dattr)
				dattr[i] = SD_ATTR_INIT;
@@ -3121,29 +3124,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
	int retval = -ENODEV;

	buf = strstrip(buf);

	/*
	 * CPU or memory hotunplug may leave @cs w/o any execution
	 * resources, in which case the hotplug code asynchronously updates
	 * configuration and transfers all tasks to the nearest ancestor
	 * which can execute.
	 *
	 * As writes to "cpus" or "mems" may restore @cs's execution
	 * resources, wait for the previously scheduled operations before
	 * proceeding, so that we don't end up keep removing tasks added
	 * after execution capability is restored.
	 *
	 * cpuset_handle_hotplug may call back into cgroup core asynchronously
	 * via cgroup_transfer_tasks() and waiting for it from a cgroupfs
	 * operation like this one can lead to a deadlock through kernfs
	 * active_ref protection.  Let's break the protection.  Losing the
	 * protection is okay as we check whether @cs is online after
	 * grabbing cpuset_mutex anyway.  This only happens on the legacy
	 * hierarchies.
	 */
	css_get(&cs->css);
	kernfs_break_active_protection(of->kn);

	cpus_read_lock();
	mutex_lock(&cpuset_mutex);
	if (!is_cpuset_online(cs))
@@ -3176,8 +3156,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
out_unlock:
	mutex_unlock(&cpuset_mutex);
	cpus_read_unlock();
	kernfs_unbreak_active_protection(of->kn);
	css_put(&cs->css);
	flush_workqueue(cpuset_migrate_mm_wq);
	return retval ?: nbytes;
}
+19 −14
Original line number Diff line number Diff line
@@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus

#
# If isolated CPUs have been reserved at boot time (as shown in
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of
# the tests may fail incorrectly. These isolated CPUs will also be removed
# before being compared with the expected results.
# the tests may fail incorrectly. These pre-isolated CPUs should stay in
# an isolated state throughout the testing process for now.
#
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
if [[ -n "$BOOT_ISOLCPUS" ]]
then
	[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] &&
	[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
		skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
	echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi
@@ -683,15 +683,19 @@ check_isolcpus()
		EXPECT_VAL2=$EXPECT_VAL
	fi

	#
	# Appending pre-isolated CPUs
	# Even though CPU #8 isn't used for testing, it can't be pre-isolated
	# to make appending those CPUs easier.
	#
	[[ -n "$BOOT_ISOLCPUS" ]] && {
		EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
		EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
	}

	#
	# Check cpuset.cpus.isolated cpumask
	#
	if [[ -z "$BOOT_ISOLCPUS" ]]
	then
		ISOLCPUS=$(cat $ISCPUS)
	else
		ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
	fi
	[[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
		# Take a 50ms pause and try again
		pause 0.05
@@ -731,8 +735,6 @@ check_isolcpus()
		fi
	done
	[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
	[[ -n "BOOT_ISOLCPUS" ]] &&
		ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")

	[[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
}
@@ -836,9 +838,12 @@ run_state_test()
		# if available
		[[ -n "$ICPUS" ]] && {
			check_isolcpus $ICPUS
			[[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
			[[ $? -ne 0 ]] && {
				[[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
				test_fail $I "isolated CPU" \
					"Expect $ICPUS, get $ISOLCPUS instead"
			}
		}
		reset_cgroup_states
		#
		# Check to see if effective cpu list changes