Commit cc9877fb authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Improve error reporting during loading



When the BPF scheduler fails, ops.exit() allows rich error reporting through
scx_exit_info. Use scx.exit() path consistently for all failures which can
be caused by the BPF scheduler:

- scx_ops_error() is called after ops.init() and ops.cgroup_init() failure
  to record error information.

- ops.init_task() failure now uses scx_ops_error() instead of pr_err().

- The err_disable path updated to automatically trigger scx_ops_error() to
  cover cases that the error message hasn't already been generated and
  always return 0 indicating init success so that the error is reported
  through ops.exit().

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: David Vernet <void@manifault.com>
Cc: Daniel Hodges <hodges.daniel.scott@gmail.com>
Cc: Changwoo Min <multics69@gmail.com>
Cc: Andrea Righi <andrea.righi@linux.dev>
Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
parent fcbc4235
Loading
Loading
Loading
Loading
+19 −11
Original line number Diff line number Diff line
@@ -625,6 +625,10 @@ struct sched_ext_ops {
	/**
	 * exit - Clean up after the BPF scheduler
	 * @info: Exit info
	 *
	 * ops.exit() is also called on ops.init() failure, which is a bit
	 * unusual. This is to allow rich reporting through @info on how
	 * ops.init() failed.
	 */
	void (*exit)(struct scx_exit_info *info);

@@ -4117,6 +4121,7 @@ static int scx_cgroup_init(void)
				      css->cgroup, &args);
		if (ret) {
			css_put(css);
			scx_ops_error("ops.cgroup_init() failed (%d)", ret);
			return ret;
		}
		tg->scx_flags |= SCX_TG_INITED;
@@ -5041,6 +5046,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
		if (ret) {
			ret = ops_sanitize_err("init", ret);
			cpus_read_unlock();
			scx_ops_error("ops.init() failed (%d)", ret);
			goto err_disable;
		}
	}
@@ -5150,7 +5156,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
			spin_lock_irq(&scx_tasks_lock);
			scx_task_iter_exit(&sti);
			spin_unlock_irq(&scx_tasks_lock);
			pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
			scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
				      ret, p->comm, p->pid);
			goto err_disable_unlock_all;
		}
@@ -5199,14 +5205,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)

	scx_ops_bypass(false);

	/*
	 * Returning an error code here would lose the recorded error
	 * information. Exit indicating success so that the error is notified
	 * through ops.exit() with all the details.
	 */
	if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
		WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
		ret = 0;
		goto err_disable;
	}

@@ -5241,10 +5241,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
	scx_ops_bypass(false);
err_disable:
	mutex_unlock(&scx_ops_enable_mutex);
	/* must be fully disabled before returning */
	scx_ops_disable(SCX_EXIT_ERROR);
	/*
	 * Returning an error code here would not pass all the error information
	 * to userspace. Record errno using scx_ops_error() for cases
	 * scx_ops_error() wasn't already invoked and exit indicating success so
	 * that the error is notified through ops.exit() with all the details.
	 *
	 * Flush scx_ops_disable_work to ensure that error is reported before
	 * init completion.
	 */
	scx_ops_error("scx_ops_enable() failed (%d)", ret);
	kthread_flush_work(&scx_ops_disable_work);
	return ret;
	return 0;
}