Commit 1e4a6d97 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-add-support-for-local-percpu-kptr'

Yonghong Song says:

====================
bpf: Add support for local percpu kptr

Patch set [1] implemented cgroup local storage BPF_MAP_TYPE_CGRP_STORAGE
similar to sk/task/inode local storage and old BPF_MAP_TYPE_CGROUP_STORAGE
map is marked as deprecated since old BPF_MAP_TYPE_CGROUP_STORAGE map can
only work with current cgroup.

Similarly, the existing BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE map
is a percpu version of BPF_MAP_TYPE_CGROUP_STORAGE and only works
with current cgroup. But there is no replacement which can work
with arbitrary cgroup.

This patch set solved this problem but adding support for local
percpu kptr. The map value can have a percpu kptr field which holds
a bpf prog allocated percpu data. The below is an example,

  struct percpu_val_t {
    ... fields ...
  }

  struct map_value_t {
    struct percpu_val_t __percpu_kptr *percpu_data_ptr;
  }

In the above, 'map_value_t' is the map value type for a
BPF_MAP_TYPE_CGRP_STORAGE map. User can access 'percpu_data_ptr'
and then read/write percpu data. This covers BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
and more. So BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE map type
is marked as deprecated.

In additional, local percpu kptr supports the same map type
as other kptrs including hash, lru_hash, array, sk/inode/task/cgrp
local storage. Currently, percpu data structure does not support
non-scalars or special fields (e.g., bpf_spin_lock, bpf_rb_root, etc.).
They can be supported in the future if there exist use cases.

Please for individual patches for details.

  [1] https://lore.kernel.org/all/20221026042835.672317-1-yhs@fb.com/

Changelog:
  v2 -> v3:
    - fix libbpf_str test failure.
  v1 -> v2:
    - does not support special fields in percpu data structure.
    - rename __percpu attr to __percpu_kptr attr.
    - rename BPF_KPTR_PERCPU_REF to BPF_KPTR_PERCPU.
    - better code to handle bpf_{this,per}_cpu_ptr() helpers.
    - add more negative tests.
    - fix a bpftool related test failure.
====================

Link: https://lore.kernel.org/r/20230827152729.1995219-1-yonghong.song@linux.dev


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 3903802b 9bc95a95
Loading
Loading
Loading
Loading
+14 −8
Original line number Diff line number Diff line
@@ -55,8 +55,8 @@ struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
extern struct bpf_mem_alloc bpf_global_ma;
extern bool bpf_global_ma_set;
extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;

typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -180,14 +180,15 @@ enum btf_field_type {
	BPF_TIMER      = (1 << 1),
	BPF_KPTR_UNREF = (1 << 2),
	BPF_KPTR_REF   = (1 << 3),
	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
	BPF_LIST_HEAD  = (1 << 4),
	BPF_LIST_NODE  = (1 << 5),
	BPF_RB_ROOT    = (1 << 6),
	BPF_RB_NODE    = (1 << 7),
	BPF_KPTR_PERCPU = (1 << 4),
	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
	BPF_LIST_HEAD  = (1 << 5),
	BPF_LIST_NODE  = (1 << 6),
	BPF_RB_ROOT    = (1 << 7),
	BPF_RB_NODE    = (1 << 8),
	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
				 BPF_RB_NODE | BPF_RB_ROOT,
	BPF_REFCOUNT   = (1 << 8),
	BPF_REFCOUNT   = (1 << 9),
};

typedef void (*btf_dtor_kfunc_t)(void *);
@@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
		return "kptr";
	case BPF_KPTR_PERCPU:
		return "percpu_kptr";
	case BPF_LIST_HEAD:
		return "bpf_list_head";
	case BPF_LIST_NODE:
@@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
		return sizeof(struct bpf_timer);
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
	case BPF_KPTR_PERCPU:
		return sizeof(u64);
	case BPF_LIST_HEAD:
		return sizeof(struct bpf_list_head);
@@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
		return __alignof__(struct bpf_timer);
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
	case BPF_KPTR_PERCPU:
		return __alignof__(u64);
	case BPF_LIST_HEAD:
		return __alignof__(struct bpf_list_head);
@@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
	case BPF_TIMER:
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
	case BPF_KPTR_PERCPU:
		break;
	default:
		WARN_ON_ONCE(1);
+1 −0
Original line number Diff line number Diff line
@@ -480,6 +480,7 @@ struct bpf_insn_aux_data {
	bool zext_dst; /* this insn zero extends dst reg */
	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
	bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
	bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
	u8 alu_state; /* used in combination with alu_limit */

	/* below fields are initialized once */
+8 −1
Original line number Diff line number Diff line
@@ -932,7 +932,14 @@ enum bpf_map_type {
	 */
	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
	/* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
	 * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
	 * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
	 * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
	 * deprecated.
	 */
	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
	BPF_MAP_TYPE_QUEUE,
	BPF_MAP_TYPE_STACK,
	BPF_MAP_TYPE_SK_STORAGE,
+5 −0
Original line number Diff line number Diff line
@@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
		type = BPF_KPTR_UNREF;
	else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
		type = BPF_KPTR_REF;
	else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
		type = BPF_KPTR_PERCPU;
	else
		return -EINVAL;

@@ -3457,6 +3459,7 @@ static int btf_find_struct_field(const struct btf *btf,
			break;
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
		case BPF_KPTR_PERCPU:
			ret = btf_find_kptr(btf, member_type, off, sz,
					    idx < info_cnt ? &info[idx] : &tmp);
			if (ret < 0)
@@ -3523,6 +3526,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
			break;
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
		case BPF_KPTR_PERCPU:
			ret = btf_find_kptr(btf, var_type, off, sz,
					    idx < info_cnt ? &info[idx] : &tmp);
			if (ret < 0)
@@ -3783,6 +3787,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
			break;
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
		case BPF_KPTR_PERCPU:
			ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
			if (ret < 0)
				goto end;
+5 −3
Original line number Diff line number Diff line
@@ -64,8 +64,8 @@
#define OFF	insn->off
#define IMM	insn->imm

struct bpf_mem_alloc bpf_global_ma;
bool bpf_global_ma_set;
struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
bool bpf_global_ma_set, bpf_global_percpu_ma_set;

/* No hurry in this branch
 *
@@ -2921,7 +2921,9 @@ static int __init bpf_global_ma_init(void)

	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
	bpf_global_ma_set = !ret;
	return ret;
	ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
	bpf_global_percpu_ma_set = !ret;
	return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
}
late_initcall(bpf_global_ma_init);
#endif
Loading