Commit ba225407 authored by Andrii Nakryiko's avatar Andrii Nakryiko
Browse files

Merge branch 'ftrace-bpf-use-single-direct-ops-for-bpf-trampolines'

Jiri Olsa says:

====================
ftrace,bpf: Use single direct ops for bpf trampolines

hi,
while poking the multi-tracing interface I ended up with just one ftrace_ops
object to attach all trampolines.

This change allows to use less direct API calls during the attachment changes
in the future code, so in effect speeding up the attachment.

In current code we get a speed up from using just a single ftrace_ops object.

- with current code:

  Performance counter stats for 'bpftrace -e fentry:vmlinux:ksys_* {} -c true':

     6,364,157,902      cycles:k
       828,728,902      cycles:u
     1,064,803,824      instructions:u                   #    1.28  insn per cycle
    23,797,500,067      instructions:k                   #    3.74  insn per cycle

       4.416004987 seconds time elapsed

       0.164121000 seconds user
       1.289550000 seconds sys

- with the fix:

   Performance counter stats for 'bpftrace -e fentry:vmlinux:ksys_* {} -c true':

     6,535,857,905      cycles:k
       810,809,429      cycles:u
     1,064,594,027      instructions:u                   #    1.31  insn per cycle
    23,962,552,894      instructions:k                   #    3.67  insn per cycle

       1.666961239 seconds time elapsed

       0.157412000 seconds user
       1.283396000 seconds sys

The speedup seems to be related to the fact that with single ftrace_ops object
we don't call ftrace_shutdown anymore (we use ftrace_update_ops instead) and
we skip the synchronize rcu calls (each ~100ms) at the end of that function.

rfc: https://lore.kernel.org/bpf/20250729102813.1531457-1-jolsa@kernel.org/
v1:  https://lore.kernel.org/bpf/20250923215147.1571952-1-jolsa@kernel.org/
v2:  https://lore.kernel.org/bpf/20251113123750.2507435-1-jolsa@kernel.org/
v3:  https://lore.kernel.org/bpf/20251120212402.466524-1-jolsa@kernel.org/
v4:  https://lore.kernel.org/bpf/20251203082402.78816-1-jolsa@kernel.org/
v5:  https://lore.kernel.org/bpf/20251215211402.353056-10-jolsa@kernel.org/

v6 changes:
- rename add_hash_entry_direct to add_ftrace_hash_entry_direct [Steven]
- factor hash_add/hash_sub [Steven]
- add kerneldoc header for update_ftrace_direct_* functions [Steven]
- few assorted smaller fixes [Steven]
- added missing direct_ops wrappers for !CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  case [Steven]

v5 changes:
- do not export ftrace_hash object [Steven]
- fix update_ftrace_direct_add new_filter_hash leak [ci]

v4 changes:
- rebased on top of bpf-next/master (with jmp attach changes)
  added patch 1 to deal with that
- added extra checks for update_ftrace_direct_del/mod to address
  the ci bot review

v3 changes:
- rebased on top of bpf-next/master
- fixed update_ftrace_direct_del cleanup path
- added missing inline to update_ftrace_direct_* stubs

v2 changes:
- rebased on top fo bpf-next/master plus Song's livepatch fixes [1]
- renamed the API functions [2] [Steven]
- do not export the new api [Steven]
- kept the original direct interface:

  I'm not sure if we want to melt both *_ftrace_direct and the new interface
  into single one. It's bit different in semantic (hence the name change as
  Steven suggested [2]) and I don't think the changes are not that big so
  we could easily keep both APIs.

v1 changes:
- make the change x86 specific, after discussing with Mark options for
  arm64 [Mark]

thanks,
jirka

[1] https://lore.kernel.org/bpf/20251027175023.1521602-1-song@kernel.org/
[2] https://lore.kernel.org/bpf/20250924050415.4aefcb91@batman.local.home/
---
====================

Link: https://patch.msgid.link/20251230145010.103439-1-jolsa@kernel.org


Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents ae23bc81 424f6a36
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -336,6 +336,7 @@ config X86
	select SCHED_SMT			if SMP
	select ARCH_SUPPORTS_SCHED_CLUSTER	if SMP
	select ARCH_SUPPORTS_SCHED_MC		if SMP
	select HAVE_SINGLE_FTRACE_DIRECT_OPS	if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS

config INSTRUCTION_DECODER
	def_bool y
+5 −2
Original line number Diff line number Diff line
@@ -1329,14 +1329,17 @@ struct bpf_tramp_image {
};

struct bpf_trampoline {
	/* hlist for trampoline_table */
	struct hlist_node hlist;
	/* hlist for trampoline_key_table */
	struct hlist_node hlist_key;
	/* hlist for trampoline_ip_table */
	struct hlist_node hlist_ip;
	struct ftrace_ops *fops;
	/* serializes access to fields of this trampoline */
	struct mutex mutex;
	refcount_t refcnt;
	u32 flags;
	u64 key;
	unsigned long ip;
	struct {
		struct btf_func_model model;
		void *addr;
+29 −2
Original line number Diff line number Diff line
@@ -82,6 +82,7 @@ static inline void early_trace_init(void) { }

struct module;
struct ftrace_hash;
struct ftrace_func_entry;

#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
	defined(CONFIG_DYNAMIC_FTRACE)
@@ -359,7 +360,6 @@ enum {
	FTRACE_OPS_FL_DIRECT			= BIT(17),
	FTRACE_OPS_FL_SUBOP			= BIT(18),
	FTRACE_OPS_FL_GRAPH			= BIT(19),
	FTRACE_OPS_FL_JMP			= BIT(20),
};

#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -403,9 +403,17 @@ enum ftrace_ops_cmd {
 *        Negative on failure. The return value is dependent on the
 *        callback.
 */
typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, unsigned long ip, enum ftrace_ops_cmd cmd);

#ifdef CONFIG_DYNAMIC_FTRACE

#define FTRACE_HASH_DEFAULT_BITS 10

struct ftrace_hash *alloc_ftrace_hash(int size_bits);
void free_ftrace_hash(struct ftrace_hash *hash);
struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
						       unsigned long ip, unsigned long direct);

/* The hash used to know what functions callbacks trace */
struct ftrace_ops_hash {
	struct ftrace_hash __rcu	*notrace_hash;
@@ -535,6 +543,10 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr);
int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr);

int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash);
int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash);
int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock);

void ftrace_stub_direct_tramp(void);

#else
@@ -561,6 +573,21 @@ static inline int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned l
	return -ENODEV;
}

static inline int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash)
{
	return -ENODEV;
}

static inline int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash)
{
	return -ENODEV;
}

static inline int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock)
{
	return -ENODEV;
}

/*
 * This must be implemented by the architecture.
 * It is the way the ftrace direct_ops helper, when called
+212 −47
Original line number Diff line number Diff line
@@ -24,19 +24,49 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_HASH_BITS 10
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)

static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
static struct hlist_head trampoline_key_table[TRAMPOLINE_TABLE_SIZE];
static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];

/* serializes access to trampoline_table */
/* serializes access to trampoline tables */
static DEFINE_MUTEX(trampoline_mutex);

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);

static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
{
	struct bpf_trampoline *tr = ops->private;
	struct hlist_head *head_ip;
	struct bpf_trampoline *tr;

	mutex_lock(&trampoline_mutex);
	head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
	hlist_for_each_entry(tr, head_ip, hlist_ip) {
		if (tr->ip == ip)
			goto out;
	}
	tr = NULL;
out:
	mutex_unlock(&trampoline_mutex);
	return tr;
}
#else
static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
{
	return ops->private;
}
#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */

static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
				     enum ftrace_ops_cmd cmd)
{
	struct bpf_trampoline *tr;
	int ret = 0;

	tr = direct_ops_ip_lookup(ops, ip);
	if (!tr)
		return -EINVAL;

	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
		/* This is called inside register_ftrace_direct_multi(), so
		 * tr->mutex is already locked.
@@ -142,15 +172,171 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
			   PAGE_SIZE, true, ksym->name);
}

static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
/*
 * We have only single direct_ops which contains all the direct call
 * sites and is the only global ftrace_ops for all trampolines.
 *
 * We use 'update_ftrace_direct_*' api for attachment.
 */
struct ftrace_ops direct_ops = {
	.ops_func = bpf_tramp_ftrace_ops_func,
};

static int direct_ops_alloc(struct bpf_trampoline *tr)
{
	tr->fops = &direct_ops;
	return 0;
}

static void direct_ops_free(struct bpf_trampoline *tr) { }

static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
{
	unsigned long ip, addr = (unsigned long) ptr;
	struct ftrace_hash *hash;

	ip = ftrace_location(tr->ip);
	if (!ip)
		return NULL;
	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
	if (!hash)
		return NULL;
	if (bpf_trampoline_use_jmp(tr->flags))
		addr = ftrace_jmp_set(addr);
	if (!add_ftrace_hash_entry_direct(hash, ip, addr)) {
		free_ftrace_hash(hash);
		return NULL;
	}
	return hash;
}

static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
{
	struct ftrace_hash *hash = hash_from_ip(tr, addr);
	int err;

	if (!hash)
		return -ENOMEM;
	err = update_ftrace_direct_add(tr->fops, hash);
	free_ftrace_hash(hash);
	return err;
}

static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
{
	struct ftrace_hash *hash = hash_from_ip(tr, addr);
	int err;

	if (!hash)
		return -ENOMEM;
	err = update_ftrace_direct_del(tr->fops, hash);
	free_ftrace_hash(hash);
	return err;
}

static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
{
	struct ftrace_hash *hash = hash_from_ip(tr, addr);
	int err;

	if (!hash)
		return -ENOMEM;
	err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
	free_ftrace_hash(hash);
	return err;
}
#else
/*
 * We allocate ftrace_ops object for each trampoline and it contains
 * call site specific for that trampoline.
 *
 * We use *_ftrace_direct api for attachment.
 */
static int direct_ops_alloc(struct bpf_trampoline *tr)
{
	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
	if (!tr->fops)
		return -ENOMEM;
	tr->fops->private = tr;
	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
	return 0;
}

static void direct_ops_free(struct bpf_trampoline *tr)
{
	if (!tr->fops)
		return;
	ftrace_free_filter(tr->fops);
	kfree(tr->fops);
}

static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
{
	unsigned long addr = (unsigned long) ptr;
	struct ftrace_ops *ops = tr->fops;
	int ret;

	if (bpf_trampoline_use_jmp(tr->flags))
		addr = ftrace_jmp_set(addr);

	ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
	if (ret)
		return ret;
	return register_ftrace_direct(ops, addr);
}

static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
{
	return unregister_ftrace_direct(tr->fops, (long)addr, false);
}

static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
{
	unsigned long addr = (unsigned long) ptr;
	struct ftrace_ops *ops = tr->fops;

	if (bpf_trampoline_use_jmp(tr->flags))
		addr = ftrace_jmp_set(addr);
	if (lock_direct_mutex)
		return modify_ftrace_direct(ops, addr);
	return modify_ftrace_direct_nolock(ops, addr);
}
#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
#else
static void direct_ops_free(struct bpf_trampoline *tr) { }

static int direct_ops_alloc(struct bpf_trampoline *tr)
{
	return 0;
}

static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
{
	return -ENODEV;
}

static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
{
	return -ENODEV;
}

static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
{
	return -ENODEV;
}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */

static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
{
	struct bpf_trampoline *tr;
	struct hlist_head *head;
	int i;

	mutex_lock(&trampoline_mutex);
	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
	hlist_for_each_entry(tr, head, hlist) {
	head = &trampoline_key_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
	hlist_for_each_entry(tr, head, hlist_key) {
		if (tr->key == key) {
			refcount_inc(&tr->refcnt);
			goto out;
@@ -159,20 +345,19 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
	if (!tr)
		goto out;
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
	if (!tr->fops) {
	if (direct_ops_alloc(tr)) {
		kfree(tr);
		tr = NULL;
		goto out;
	}
	tr->fops->private = tr;
	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
#endif

	tr->key = key;
	INIT_HLIST_NODE(&tr->hlist);
	hlist_add_head(&tr->hlist, head);
	tr->ip = ftrace_location(ip);
	INIT_HLIST_NODE(&tr->hlist_key);
	INIT_HLIST_NODE(&tr->hlist_ip);
	hlist_add_head(&tr->hlist_key, head);
	head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
	hlist_add_head(&tr->hlist_ip, head);
	refcount_set(&tr->refcnt, 1);
	mutex_init(&tr->mutex);
	for (i = 0; i < BPF_TRAMP_MAX; i++)
@@ -207,7 +392,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
	int ret;

	if (tr->func.ftrace_managed)
		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
		ret = direct_ops_del(tr, old_addr);
	else
		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);

@@ -221,10 +406,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
	int ret;

	if (tr->func.ftrace_managed) {
		if (lock_direct_mutex)
			ret = modify_ftrace_direct(tr->fops, (long)new_addr);
		else
			ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
		ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
	} else {
		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
						   new_addr);
@@ -247,10 +429,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
	}

	if (tr->func.ftrace_managed) {
		ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
		if (ret)
			return ret;
		ret = register_ftrace_direct(tr->fops, (long)new_addr);
		ret = direct_ops_add(tr, new_addr);
	} else {
		ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
	}
@@ -506,13 +685,6 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
	if (err)
		goto out_free;

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
	if (bpf_trampoline_use_jmp(tr->flags))
		tr->fops->flags |= FTRACE_OPS_FL_JMP;
	else
		tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
#endif

	WARN_ON(tr->cur_image && total == 0);
	if (tr->cur_image)
		/* progs already running at this address */
@@ -540,15 +712,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
	tr->cur_image = im;
out:
	/* If any error happens, restore previous flags */
	if (err) {
	if (err)
		tr->flags = orig_flags;
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
		if (bpf_trampoline_use_jmp(tr->flags))
			tr->fops->flags |= FTRACE_OPS_FL_JMP;
		else
			tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
#endif
	}
	kfree(tlinks);
	return err;

@@ -887,7 +1052,7 @@ void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
					 prog->aux->attach_btf_id);

	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
	tr = bpf_trampoline_lookup(key);
	tr = bpf_trampoline_lookup(key, 0);
	if (WARN_ON_ONCE(!tr))
		return;

@@ -907,7 +1072,7 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
{
	struct bpf_trampoline *tr;

	tr = bpf_trampoline_lookup(key);
	tr = bpf_trampoline_lookup(key, tgt_info->tgt_addr);
	if (!tr)
		return NULL;

@@ -943,11 +1108,9 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
	 * fexit progs. The fentry-only trampoline will be freed via
	 * multiple rcu callbacks.
	 */
	hlist_del(&tr->hlist);
	if (tr->fops) {
		ftrace_free_filter(tr->fops);
		kfree(tr->fops);
	}
	hlist_del(&tr->hlist_key);
	hlist_del(&tr->hlist_ip);
	direct_ops_free(tr);
	kfree(tr);
out:
	mutex_unlock(&trampoline_mutex);
@@ -1216,7 +1379,9 @@ static int __init init_trampolines(void)
	int i;

	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
		INIT_HLIST_HEAD(&trampoline_table[i]);
		INIT_HLIST_HEAD(&trampoline_key_table[i]);
	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
		INIT_HLIST_HEAD(&trampoline_ip_table[i]);
	return 0;
}
late_initcall(init_trampolines);
+3 −0
Original line number Diff line number Diff line
@@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	bool

config HAVE_SINGLE_FTRACE_DIRECT_OPS
	bool

config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
	bool

Loading