Commit acf87264 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-trampoline-support-jmp-mode'

Menglong Dong says:

====================
bpf trampoline support "jmp" mode

For now, the bpf trampoline is called by the "call" instruction. However,
it break the RSB and introduce extra overhead in x86_64 arch.

For example, we hook the function "foo" with fexit, the call and return
logic will be like this:
  call foo -> call trampoline -> call foo-body ->
  return foo-body -> return foo

As we can see above, there are 3 call, but 2 return, which break the RSB
balance. We can pseudo a "return" here, but it's not the best choice,
as it will still cause once RSB miss:
  call foo -> call trampoline -> call foo-body ->
  return foo-body -> return dummy -> return foo

The "return dummy" doesn't pair the "call trampoline", which can also
cause the RSB miss.

Therefore, we introduce the "jmp" mode for bpf trampoline, as advised by
Alexei in [1]. And the logic will become this:
  call foo -> jmp trampoline -> call foo-body ->
  return foo-body -> return foo

As we can see above, the RSB is totally balanced after this series.

In this series, we introduce the FTRACE_OPS_FL_JMP for ftrace to make it
use the "jmp" instruction instead of "call".

And we also do some adjustment to bpf_arch_text_poke() to allow us specify
the old and new poke_type.

For the BPF_TRAMP_F_SHARE_IPMODIFY case, we will fallback to the "call"
mode, as it need to get the function address from the stack, which is not
supported in "jmp" mode.

Before this series, we have the following performance with the bpf
benchmark:

  $ cd tools/testing/selftests/bpf
  $ ./benchs/run_bench_trigger.sh
  usermode-count :  890.171 ± 1.522M/s
  kernel-count   :  409.184 ± 0.330M/s
  syscall-count  :   26.792 ± 0.010M/s
  fentry         :  171.242 ± 0.322M/s
  fexit          :   80.544 ± 0.045M/s
  fmodret        :   78.301 ± 0.065M/s
  rawtp          :  192.906 ± 0.900M/s
  tp             :   81.883 ± 0.209M/s
  kprobe         :   52.029 ± 0.113M/s
  kprobe-multi   :   62.237 ± 0.060M/s
  kprobe-multi-all:    4.761 ± 0.014M/s
  kretprobe      :   23.779 ± 0.046M/s
  kretprobe-multi:   29.134 ± 0.012M/s
  kretprobe-multi-all:    3.822 ± 0.003M/

And after this series, we have the following performance:

  usermode-count :  890.443 ± 0.307M/s
  kernel-count   :  416.139 ± 0.055M/s
  syscall-count  :   31.037 ± 0.813M/s
  fentry         :  169.549 ± 0.519M/s
  fexit          :  136.540 ± 0.518M/s
  fmodret        :  159.248 ± 0.188M/s
  rawtp          :  194.475 ± 0.144M/s
  tp             :   84.505 ± 0.041M/s
  kprobe         :   59.951 ± 0.071M/s
  kprobe-multi   :   63.153 ± 0.177M/s
  kprobe-multi-all:    4.699 ± 0.012M/s
  kretprobe      :   23.740 ± 0.015M/s
  kretprobe-multi:   29.301 ± 0.022M/s
  kretprobe-multi-all:    3.869 ± 0.005M/s

As we can see above, the performance of fexit increase from 80.544M/s to
136.540M/s, and the "fmodret" increase from 78.301M/s to 159.248M/s.

Link: https://lore.kernel.org/bpf/20251117034906.32036-1-dongml2@chinatelecom.cn/
Changes since v2:
* reject if the addr is already "jmp" in register_ftrace_direct() and
  __modify_ftrace_direct() in the 1st patch.
* fix compile error in powerpc in the 5th patch.
* changes in the 6th patch:
  - fix the compile error by wrapping the write to tr->fops->flags with
    CONFIG_DYNAMIC_FTRACE_WITH_JMP
  - reset BPF_TRAMP_F_SKIP_FRAME when the second try of modify_fentry in
    bpf_trampoline_update()

Link: https://lore.kernel.org/bpf/20251114092450.172024-1-dongml2@chinatelecom.cn/
Changes since v1:
* change the bool parameter that we add to save_args() to "u32 flags"
* rename bpf_trampoline_need_jmp() to bpf_trampoline_use_jmp()
* add new function parameter to bpf_arch_text_poke instead of introduce
  bpf_arch_text_poke_type()
* rename bpf_text_poke to bpf_trampoline_update_fentry
* remove the BPF_TRAMP_F_JMPED and check the current mode with the origin
  flags instead.

Link: https://lore.kernel.org/bpf/CAADnVQLX54sVi1oaHrkSiLqjJaJdm3TQjoVrgU-LZimK6iDcSA@mail.gmail.com/[1]


====================

Acked-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
Link: https://patch.msgid.link/20251118123639.688444-1-dongml2@chinatelecom.cn


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents fad80400 402e44b3
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -2934,8 +2934,9 @@ static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
 * The dummy_tramp is used to prevent another CPU from jumping to unknown
 * locations during the patching process, making the patching process easier.
 */
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
		       void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
		       enum bpf_text_poke_type new_t, void *old_addr,
		       void *new_addr)
{
	int ret;
	u32 old_insn;
@@ -2979,14 +2980,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
		    !poking_bpf_entry))
		return -EINVAL;

	if (poke_type == BPF_MOD_CALL)
		branch_type = AARCH64_INSN_BRANCH_LINK;
	else
		branch_type = AARCH64_INSN_BRANCH_NOLINK;

	branch_type = old_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
					      AARCH64_INSN_BRANCH_NOLINK;
	if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
		return -EFAULT;

	branch_type = new_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
					      AARCH64_INSN_BRANCH_NOLINK;
	if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
		return -EFAULT;

+6 −3
Original line number Diff line number Diff line
@@ -1284,11 +1284,12 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
	return ret ? ERR_PTR(-EINVAL) : dst;
}

int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
		       void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
		       enum bpf_text_poke_type new_t, void *old_addr,
		       void *new_addr)
{
	int ret;
	bool is_call = (poke_type == BPF_MOD_CALL);
	bool is_call;
	u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
	u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};

@@ -1298,6 +1299,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
	if (!is_bpf_text_address((unsigned long)ip))
		return -ENOTSUPP;

	is_call = old_t == BPF_MOD_CALL;
	ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
	if (ret)
		return ret;
@@ -1305,6 +1307,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
	if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
		return -EFAULT;

	is_call = new_t == BPF_MOD_CALL;
	ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
	if (ret)
		return ret;
+6 −4
Original line number Diff line number Diff line
@@ -1107,8 +1107,9 @@ static void do_isync(void *info __maybe_unused)
 * execute isync (or some CSI) so that they don't go back into the
 * trampoline again.
 */
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
		       void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
		       enum bpf_text_poke_type new_t, void *old_addr,
		       void *new_addr)
{
	unsigned long bpf_func, bpf_func_end, size, offset;
	ppc_inst_t old_inst, new_inst;
@@ -1119,7 +1120,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
		return -EOPNOTSUPP;

	bpf_func = (unsigned long)ip;
	branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;

	/* We currently only support poking bpf programs */
	if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) {
@@ -1132,7 +1132,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
	 * an unconditional branch instruction at im->ip_after_call
	 */
	if (offset) {
		if (poke_type != BPF_MOD_JUMP) {
		if (old_t == BPF_MOD_CALL || new_t == BPF_MOD_CALL) {
			pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__,
			       bpf_func);
			return -EOPNOTSUPP;
@@ -1166,6 +1166,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
	}

	old_inst = ppc_inst(PPC_RAW_NOP());
	branch_flags = old_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
	if (old_addr) {
		if (is_offset_in_branch_range(ip - old_addr))
			create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags);
@@ -1174,6 +1175,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
				      branch_flags);
	}
	new_inst = ppc_inst(PPC_RAW_NOP());
	branch_flags = new_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
	if (new_addr) {
		if (is_offset_in_branch_range(ip - new_addr))
			create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags);
+7 −4
Original line number Diff line number Diff line
@@ -852,17 +852,19 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
	return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx);
}

int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
		       void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
		       enum bpf_text_poke_type new_t, void *old_addr,
		       void *new_addr)
{
	u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS];
	bool is_call = poke_type == BPF_MOD_CALL;
	bool is_call;
	int ret;

	if (!is_kernel_text((unsigned long)ip) &&
	    !is_bpf_text_address((unsigned long)ip))
		return -ENOTSUPP;

	is_call = old_t == BPF_MOD_CALL;
	ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
	if (ret)
		return ret;
@@ -870,6 +872,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
	if (memcmp(ip, old_insns, RV_FENTRY_NBYTES))
		return -EFAULT;

	is_call = new_t == BPF_MOD_CALL;
	ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
	if (ret)
		return ret;
@@ -1131,7 +1134,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
	store_args(nr_arg_slots, args_off, ctx);

	/* skip to actual body of traced function */
	if (flags & BPF_TRAMP_F_SKIP_FRAME)
	if (flags & BPF_TRAMP_F_ORIG_STACK)
		orig_call += RV_FENTRY_NINSNS * 4;

	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+4 −3
Original line number Diff line number Diff line
@@ -2413,8 +2413,9 @@ bool bpf_jit_supports_far_kfunc_call(void)
	return true;
}

int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
		       void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
		       enum bpf_text_poke_type new_t, void *old_addr,
		       void *new_addr)
{
	struct bpf_plt expected_plt, current_plt, new_plt, *plt;
	struct {
@@ -2431,7 +2432,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
		return -EINVAL;

	if (t == BPF_MOD_JUMP &&
	if ((new_t == BPF_MOD_JUMP || old_t == BPF_MOD_JUMP) &&
	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
		/*
		 * The branch already points to the destination,
Loading