Commit 05950213 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'cgroup-for-6.17-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "This contains two cgroup changes. Both are pretty low risk.

  - Fix deadlock in cgroup destruction when repeatedly
    mounting/unmounting perf_event and net_prio controllers.

    The issue occurs because cgroup_destroy_wq has max_active=1, causing
    root destruction to wait for CSS offline operations that are queued
    behind it.

    The fix splits cgroup_destroy_wq into three separate workqueues to
    eliminate the blocking.

  - Set of->priv to NULL upon file release to make potential bugs to
    manifest as NULL pointer dereferences rather than use-after-free
    errors"

* tag 'cgroup-for-6.17-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/psi: Set of->priv to NULL upon file release
  cgroup: split cgroup_destroy_wq into 3 workqueues
parents d4b77998 94a4acfe
Loading
Loading
Loading
Loading
+37 −7
Original line number Diff line number Diff line
@@ -126,8 +126,31 @@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
 * of concurrent destructions.  Use a separate workqueue so that cgroup
 * destruction work items don't end up filling up max_active of system_wq
 * which may lead to deadlock.
 */
static struct workqueue_struct *cgroup_destroy_wq;
 *
 * A cgroup destruction should enqueue work sequentially to:
 * cgroup_offline_wq: use for css offline work
 * cgroup_release_wq: use for css release work
 * cgroup_free_wq: use for free work
 *
 * Rationale for using separate workqueues:
 * The cgroup root free work may depend on completion of other css offline
 * operations. If all tasks were enqueued to a single workqueue, this could
 * create a deadlock scenario where:
 * - Free work waits for other css offline work to complete.
 * - But other css offline work is queued after free work in the same queue.
 *
 * Example deadlock scenario with single workqueue (cgroup_destroy_wq):
 * 1. umount net_prio
 * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
 * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
 * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
 * 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
 *    which can never complete as it's behind in the same queue and
 *    workqueue's max_active is 1.
 */
static struct workqueue_struct *cgroup_offline_wq;
static struct workqueue_struct *cgroup_release_wq;
static struct workqueue_struct *cgroup_free_wq;

/* generate an array of cgroup subsystem pointers */
#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@ -4159,6 +4182,7 @@ static void cgroup_file_release(struct kernfs_open_file *of)
		cft->release(of);
	put_cgroup_ns(ctx->ns);
	kfree(ctx);
	of->priv = NULL;
}

static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
@@ -5558,7 +5582,7 @@ static void css_release_work_fn(struct work_struct *work)
	cgroup_unlock();

	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
	queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
	queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
}

static void css_release(struct percpu_ref *ref)
@@ -5567,7 +5591,7 @@ static void css_release(struct percpu_ref *ref)
		container_of(ref, struct cgroup_subsys_state, refcnt);

	INIT_WORK(&css->destroy_work, css_release_work_fn);
	queue_work(cgroup_destroy_wq, &css->destroy_work);
	queue_work(cgroup_release_wq, &css->destroy_work);
}

static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5701,7 +5725,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
	list_del_rcu(&css->sibling);
err_free_css:
	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
	queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
	queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
	return ERR_PTR(err);
}

@@ -5939,7 +5963,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)

	if (atomic_dec_and_test(&css->online_cnt)) {
		INIT_WORK(&css->destroy_work, css_killed_work_fn);
		queue_work(cgroup_destroy_wq, &css->destroy_work);
		queue_work(cgroup_offline_wq, &css->destroy_work);
	}
}

@@ -6325,8 +6349,14 @@ static int __init cgroup_wq_init(void)
	 * We would prefer to do this in cgroup_init() above, but that
	 * is called before init_workqueues(): so leave this until after.
	 */
	cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
	BUG_ON(!cgroup_destroy_wq);
	cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
	BUG_ON(!cgroup_offline_wq);

	cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
	BUG_ON(!cgroup_release_wq);

	cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
	BUG_ON(!cgroup_free_wq);
	return 0;
}
core_initcall(cgroup_wq_init);