Commit a3c70a3c authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

bpf: Shrink size of struct bpf_map/bpf_array.



Back in 2018 the commit be95a845 ("bpf: avoid false sharing of map refcount
with max_entries") added ____cacheline_aligned to "struct bpf_map" to make sure
that fields like refcnt don't share a cache line with max_entries that is used
to bounds check map access. That was done to make spectre style attacks harder.
The main mitigation is done via code similar to array_index_nospec(), of course.
This was an additional precaution.

It increased the size of "struct bpf_map" a little, but it's affect on all
other maps (like array) is significant, since "struct bpf_map" is typically
the first member in other map types.

Undo this ____cacheline_aligned tag. Instead move freeze_mutex field around, so
that refcnt and max_entries are still in different cache lines.

The main effect is seen in sizeof(struct bpf_array) that reduces from 320
to 248 bytes.

BEFORE:

struct bpf_map {
	const struct bpf_map_ops  * ops;                 /*     0     8 */
	...
	char                       name[16];             /*    96    16 */

	/* XXX 16 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) --- */
	atomic64_t refcnt __attribute__((__aligned__(64))); /*   128     8 */
	...
	/* size: 256, cachelines: 4, members: 30 */
	/* sum members: 232, holes: 1, sum holes: 16 */
	/* padding: 8 */
	/* paddings: 1, sum paddings: 2 */
} __attribute__((__aligned__(64)));

struct bpf_array {
	struct bpf_map             map;                  /*     0   256 */
	...
	/* size: 320, cachelines: 5, members: 5 */
	/* padding: 48 */
	/* paddings: 1, sum paddings: 8 */
} __attribute__((__aligned__(64)));

AFTER:

struct bpf_map {
	/* size: 232, cachelines: 4, members: 30 */
	/* paddings: 1, sum paddings: 2 */
	/* last cacheline: 40 bytes */
};
struct bpf_array {
	/* size: 248, cachelines: 4, members: 5 */
	/* last cacheline: 56 bytes */
};

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarYonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20240220235001.57411-1-alexei.starovoitov@gmail.com
parent 01dbd7d8
Loading
Loading
Loading
Loading
+3 −9
Original line number Diff line number Diff line
@@ -251,10 +251,7 @@ struct bpf_list_node_kern {
} __attribute__((aligned(8)));

struct bpf_map {
	/* The first two cachelines with read-mostly members of which some
	 * are also accessed in fast-path (e.g. ops, max_entries).
	 */
	const struct bpf_map_ops *ops ____cacheline_aligned;
	const struct bpf_map_ops *ops;
	struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
	void *security;
@@ -276,17 +273,14 @@ struct bpf_map {
	struct obj_cgroup *objcg;
#endif
	char name[BPF_OBJ_NAME_LEN];
	/* The 3rd and 4th cacheline with misc members to avoid false sharing
	 * particularly with refcounting.
	 */
	atomic64_t refcnt ____cacheline_aligned;
	struct mutex freeze_mutex;
	atomic64_t refcnt;
	atomic64_t usercnt;
	/* rcu is used before freeing and work is only used during freeing */
	union {
		struct work_struct work;
		struct rcu_head rcu;
	};
	struct mutex freeze_mutex;
	atomic64_t writecnt;
	/* 'Ownership' of program-containing map is claimed by the first program
	 * that is going to use this map or by the first program which FD is