Commit 2d2b026c authored by Cheng-Yang Chou's avatar Cheng-Yang Chou Committed by Tejun Heo
Browse files

sched_ext: Deny SCX kfuncs to non-SCX struct_ops programs



scx_kfunc_context_filter() currently allows non-SCX struct_ops programs
(e.g. tcp_congestion_ops) to call SCX unlocked kfuncs. This is wrong
for two reasons:

- It is semantically incorrect: a TCP congestion control program has no
  business calling SCX kfuncs such as scx_bpf_kick_cpu().

- With CONFIG_EXT_SUB_SCHED=y, kfuncs like scx_bpf_kick_cpu() call
  scx_prog_sched(aux), which invokes bpf_prog_get_assoc_struct_ops(aux)
  and casts the result to struct sched_ext_ops * before reading ops->priv.
  For a non-SCX struct_ops program the returned pointer is the kdata of
  that struct_ops type, which is far smaller than sched_ext_ops, making
  the read an out-of-bounds access (confirmed with KASAN).

Extend the filter to cover scx_kfunc_set_any and scx_kfunc_set_idle as
well, and deny all SCX kfuncs for any struct_ops program that is not the
SCX struct_ops. This addresses both issues: the semantic contract is
enforced at the verifier level, and the runtime out-of-bounds access
becomes unreachable.

Fixes: d1d3c1c6 ("sched_ext: Add verifier-time kfunc context filter")
Suggested-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarCheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 87019cb6
Loading
Loading
Loading
Loading
+18 −14
Original line number Diff line number Diff line
@@ -9480,6 +9480,7 @@ BTF_KFUNCS_END(scx_kfunc_ids_any)
static const struct btf_kfunc_id_set scx_kfunc_set_any = {
	.owner			= THIS_MODULE,
	.set			= &scx_kfunc_ids_any,
	.filter			= scx_kfunc_context_filter,
};

/*
@@ -9527,13 +9528,12 @@ static const u32 scx_kf_allow_flags[] = {
};

/*
 * Verifier-time filter for context-sensitive SCX kfuncs. Registered via the
 * .filter field on each per-group btf_kfunc_id_set. The BPF core invokes this
 * for every kfunc call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or
 * Verifier-time filter for SCX kfuncs. Registered via the .filter field on
 * each per-group btf_kfunc_id_set. The BPF core invokes this for every kfunc
 * call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or
 * BPF_PROG_TYPE_SYSCALL), regardless of which set originally introduced the
 * kfunc - so the filter must short-circuit on kfuncs it doesn't govern (e.g.
 * scx_kfunc_ids_any) by falling through to "allow" when none of the
 * context-sensitive sets contain the kfunc.
 * kfunc - so the filter must short-circuit on kfuncs it doesn't govern by
 * falling through to "allow" when none of the SCX sets contain the kfunc.
 */
int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
@@ -9542,18 +9542,21 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
	bool in_enqueue = btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id);
	bool in_dispatch = btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id);
	bool in_cpu_release = btf_id_set8_contains(&scx_kfunc_ids_cpu_release, kfunc_id);
	bool in_idle = btf_id_set8_contains(&scx_kfunc_ids_idle, kfunc_id);
	bool in_any = btf_id_set8_contains(&scx_kfunc_ids_any, kfunc_id);
	u32 moff, flags;

	/* Not a context-sensitive kfunc (e.g. from scx_kfunc_ids_any) - allow. */
	if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || in_cpu_release))
	/* Not an SCX kfunc - allow. */
	if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch ||
	      in_cpu_release || in_idle || in_any))
		return 0;

	/* SYSCALL progs (e.g. BPF test_run()) may call unlocked and select_cpu kfuncs. */
	if (prog->type == BPF_PROG_TYPE_SYSCALL)
		return (in_unlocked || in_select_cpu) ? 0 : -EACCES;
		return (in_unlocked || in_select_cpu || in_idle || in_any) ? 0 : -EACCES;

	if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
		return -EACCES;
		return (in_any || in_idle) ? 0 : -EACCES;

	/*
	 * add_subprog_and_kfunc() collects all kfunc calls, including dead code
@@ -9566,14 +9569,15 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
		return 0;

	/*
	 * Non-SCX struct_ops: only unlocked kfuncs are safe. The other
	 * context-sensitive kfuncs assume the rq lock is held by the SCX
	 * dispatch path, which doesn't apply to other struct_ops users.
	 * Non-SCX struct_ops: SCX kfuncs are not permitted.
	 */
	if (prog->aux->st_ops != &bpf_sched_ext_ops)
		return in_unlocked ? 0 : -EACCES;
		return -EACCES;

	/* SCX struct_ops: check the per-op allow list. */
	if (in_any || in_idle)
		return 0;

	moff = prog->aux->attach_st_ops_member_off;
	flags = scx_kf_allow_flags[SCX_MOFF_IDX(moff)];

+1 −0
Original line number Diff line number Diff line
@@ -1467,6 +1467,7 @@ BTF_KFUNCS_END(scx_kfunc_ids_idle)
static const struct btf_kfunc_id_set scx_kfunc_set_idle = {
	.owner			= THIS_MODULE,
	.set			= &scx_kfunc_ids_idle,
	.filter			= scx_kfunc_context_filter,
};

/*
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@

struct sched_ext_ops;

extern struct btf_id_set8 scx_kfunc_ids_idle;
extern struct btf_id_set8 scx_kfunc_ids_select_cpu;

void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops);