Commit 0913b580 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'cgroup-for-7.1-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:

 - cpuset fixes:
     - Partition invalidation could return CPUs still in use by sibling
       partitions, producing overlapping effective_cpus
     - cpuset_can_attach() over-reserved DL bandwidth on moves that
       stayed within the same root domain
     - Pending DL migration state leaked into later attaches when a
       later can_attach() check failed
     - Reorder PF_EXITING and __GFP_HARDWALL checks so dying tasks can
       allocate from any node and exit quickly

 - dmem: propagate -ENOMEM instead of spinning forever when the fallback
   pool allocation also fails

 - selftests/cgroup: percpu test error-path leak, bogus numeric
   comparison of cpuset strings, and a zero-length read() that silently
   passed OOM-kill tests

* tag 'cgroup-for-7.1-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/cpuset: Return only actually allocated CPUs during partition invalidation
  selftests/cgroup: Fix error path leaks in test_percpu_basic
  cgroup/cpuset: Reserve DL bandwidth only for root-domain moves
  cgroup/cpuset: Reset DL migration state on can_attach() failure
  selftests/cgroup: Fix string comparison in write_test
  selftests/cgroup: Fix cg_read_strcmp() empty string comparison
  cgroup/dmem: Return -ENOMEM on failed pool preallocation
  cgroup/cpuset: move PF_EXITING check before __GFP_HARDWALL in cpuset_current_node_allowed()
parents 50599e4c 345f4016
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -33,6 +33,15 @@ struct root_domain;
extern void dl_add_task_root_domain(struct task_struct *p);
extern void dl_clear_root_domain(struct root_domain *rd);
extern void dl_clear_root_domain_cpu(int cpu);
/*
 * Return whether moving DL task @p to @new_mask requires moving DL
 * bandwidth accounting between root domains. This helper is specific to
 * DL bandwidth move accounting semantics and is shared by
 * cpuset_can_attach() and set_cpus_allowed_dl() so both paths use the
 * same source root-domain test.
 */
extern bool dl_task_needs_bw_move(struct task_struct *p,
				  const struct cpumask *new_mask);

extern u64 dl_cookie;
extern bool dl_bw_visited(int cpu, u64 cookie);
+1 −0
Original line number Diff line number Diff line
@@ -167,6 +167,7 @@ struct cpuset {
	 */
	int nr_deadline_tasks;
	int nr_migrate_dl_tasks;
	/* DL bandwidth that needs destination reservation for this attach. */
	u64 sum_migrate_dl_bw;
	/*
	 * CPU used for temporary DL bandwidth allocation during attach;
+31 −25
Original line number Diff line number Diff line
@@ -1718,7 +1718,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
		 */
		if (is_partition_valid(parent))
			adding = cpumask_and(tmp->addmask,
					     xcpus, parent->effective_xcpus);
					     cs->effective_xcpus,
					     parent->effective_xcpus);
		if (old_prs > 0)
			new_prs = -old_prs;

@@ -2993,7 +2994,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
	struct cpuset *cs, *oldcs;
	struct task_struct *task;
	bool setsched_check;
	int ret;
	int cpu, ret;

	/* used later by cpuset_attach() */
	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
@@ -3038,31 +3039,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
		}

		if (dl_task(task)) {
			/*
			 * Count all migrating DL tasks for cpuset task accounting.
			 * Only tasks that need a root-domain bandwidth move
			 * contribute to sum_migrate_dl_bw.
			 */
			cs->nr_migrate_dl_tasks++;
			if (dl_task_needs_bw_move(task, cs->effective_cpus))
				cs->sum_migrate_dl_bw += task->dl.dl_bw;
		}
	}

	if (!cs->nr_migrate_dl_tasks)
	if (!cs->sum_migrate_dl_bw)
		goto out_success;

	if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
		int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);

	cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
	if (unlikely(cpu >= nr_cpu_ids)) {
			reset_migrate_dl_data(cs);
		ret = -EINVAL;
		goto out_unlock;
	}

	ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
		if (ret) {
			reset_migrate_dl_data(cs);
	if (ret)
		goto out_unlock;
		}

	cs->dl_bw_cpu = cpu;
	}

out_success:
	/*
@@ -3070,7 +3071,10 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
	 * changes which zero cpus/mems_allowed.
	 */
	cs->attach_in_progress++;

out_unlock:
	if (ret)
		reset_migrate_dl_data(cs);
	mutex_unlock(&cpuset_mutex);
	return ret;
}
@@ -4176,11 +4180,11 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
 * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
 * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
 * yes.  If current has access to memory reserves as an oom victim, yes.
 * Otherwise, no.
 * If the current task is PF_EXITING, yes. Otherwise, no.
 *
 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
 * and do not allow allocations outside the current tasks cpuset
 * unless the task has been OOM killed.
 * unless the task has been OOM killed or is exiting.
 * GFP_KERNEL allocations are not so marked, so can escape to the
 * nearest enclosing hardwalled ancestor cpuset.
 *
@@ -4194,7 +4198,9 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
 * The first call here from mm/page_alloc:get_page_from_freelist()
 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
 * so no allocation on a node outside the cpuset is allowed (unless
 * in interrupt, of course).
 * in interrupt, of course).  The PF_EXITING check must therefore
 * come before the __GFP_HARDWALL check, otherwise a dying task
 * would be blocked on the fast path.
 *
 * The second pass through get_page_from_freelist() doesn't even call
 * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
@@ -4204,6 +4210,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
 *	in_interrupt - any node ok (current task context irrelevant)
 *	GFP_ATOMIC   - any node ok
 *	tsk_is_oom_victim   - any node ok
 *	PF_EXITING   - any node ok (let dying task exit quickly)
 *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
 *	GFP_USER     - only nodes in current tasks mems allowed ok.
 */
@@ -4223,11 +4230,10 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask)
	 */
	if (unlikely(tsk_is_oom_victim(current)))
		return true;
	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
		return false;

	if (current->flags & PF_EXITING) /* Let dying task have memory */
		return true;
	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
		return false;

	/* Not hardwall and node outside mems_allowed: scan up cpusets */
	spin_lock_irqsave(&callback_lock, flags);
+1 −0
Original line number Diff line number Diff line
@@ -602,6 +602,7 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
				pool = NULL;
				continue;
			}
			pool = ERR_PTR(-ENOMEM);
		}
	}

+10 −3
Original line number Diff line number Diff line
@@ -3107,20 +3107,18 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
static void set_cpus_allowed_dl(struct task_struct *p,
				struct affinity_context *ctx)
{
	struct root_domain *src_rd;
	struct rq *rq;

	WARN_ON_ONCE(!dl_task(p));

	rq = task_rq(p);
	src_rd = rq->rd;
	/*
	 * Migrating a SCHED_DEADLINE task between exclusive
	 * cpusets (different root_domains) entails a bandwidth
	 * update. We already made space for us in the destination
	 * domain (see cpuset_can_attach()).
	 */
	if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
	if (dl_task_needs_bw_move(p, ctx->new_mask)) {
		struct dl_bw *src_dl_b;

		src_dl_b = dl_bw_of(cpu_of(rq));
@@ -3137,6 +3135,15 @@ static void set_cpus_allowed_dl(struct task_struct *p,
	set_cpus_allowed_common(p, ctx);
}

bool dl_task_needs_bw_move(struct task_struct *p,
			   const struct cpumask *new_mask)
{
	if (!dl_task(p))
		return false;

	return !cpumask_intersects(task_rq(p)->rd->span, new_mask);
}

/* Assumes rq->lock is held */
static void rq_online_dl(struct rq *rq)
{
Loading