Commit fa593d0f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull bpf updates from Alexei Starovoitov:
 "For this merge window we're splitting BPF pull request into three for
  higher visibility: main changes, res_spin_lock, try_alloc_pages.

  These are the main BPF changes:

   - Add DFA-based live registers analysis to improve verification of
     programs with loops (Eduard Zingerman)

   - Introduce load_acquire and store_release BPF instructions and add
     x86, arm64 JIT support (Peilin Ye)

   - Fix loop detection logic in the verifier (Eduard Zingerman)

   - Drop unnecesary lock in bpf_map_inc_not_zero() (Eric Dumazet)

   - Add kfunc for populating cpumask bits (Emil Tsalapatis)

   - Convert various shell based tests to selftests/bpf/test_progs
     format (Bastien Curutchet)

   - Allow passing referenced kptrs into struct_ops callbacks (Amery
     Hung)

   - Add a flag to LSM bpf hook to facilitate bpf program signing
     (Blaise Boscaccy)

   - Track arena arguments in kfuncs (Ihor Solodrai)

   - Add copy_remote_vm_str() helper for reading strings from remote VM
     and bpf_copy_from_user_task_str() kfunc (Jordan Rome)

   - Add support for timed may_goto instruction (Kumar Kartikeya
     Dwivedi)

   - Allow bpf_get_netns_cookie() int cgroup_skb programs (Mahe Tardy)

   - Reduce bpf_cgrp_storage_busy false positives when accessing cgroup
     local storage (Martin KaFai Lau)

   - Introduce bpf_dynptr_copy() kfunc (Mykyta Yatsenko)

   - Allow retrieving BTF data with BTF token (Mykyta Yatsenko)

   - Add BPF kfuncs to set and get xattrs with 'security.bpf.' prefix
     (Song Liu)

   - Reject attaching programs to noreturn functions (Yafang Shao)

   - Introduce pre-order traversal of cgroup bpf programs (Yonghong
     Song)"

* tag 'bpf-next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (186 commits)
  selftests/bpf: Add selftests for load-acquire/store-release when register number is invalid
  bpf: Fix out-of-bounds read in check_atomic_load/store()
  libbpf: Add namespace for errstr making it libbpf_errstr
  bpf: Add struct_ops context information to struct bpf_prog_aux
  selftests/bpf: Sanitize pointer prior fclose()
  selftests/bpf: Migrate test_xdp_vlan.sh into test_progs
  selftests/bpf: test_xdp_vlan: Rename BPF sections
  bpf: clarify a misleading verifier error message
  selftests/bpf: Add selftest for attaching fexit to __noreturn functions
  bpf: Reject attaching fexit/fmod_ret to __noreturn functions
  bpf: Only fails the busy counter check in bpf_cgrp_storage_get if it creates storage
  bpf: Make perf_event_read_output accessible in all program types.
  bpftool: Using the right format specifiers
  bpftool: Add -Wformat-signedness flag to detect format errors
  selftests/bpf: Test freplace from user namespace
  libbpf: Pass BPF token from find_prog_btf_id to BPF_BTF_GET_FD_BY_ID
  bpf: Return prog btf_id without capable check
  bpf: BPF token support for BPF_BTF_GET_FD_BY_ID
  bpf, x86: Fix objtool warning for timed may_goto
  bpf: Check map->record at the beginning of check_and_free_fields()
  ...
parents 7f2ff7b6 9aa8fe29
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -86,7 +86,7 @@ following steps:
The following are a few examples of selftest BPF iterator programs:

* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
* `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_
* `bpf_iter_task_vmas.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c>`_
* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_

Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
+21 −4
Original line number Diff line number Diff line
@@ -102,7 +102,8 @@ Each type contains the following common data::
         * bits 24-28: kind (e.g. int, ptr, array...etc)
         * bits 29-30: unused
         * bit     31: kind_flag, currently used by
         *             struct, union, fwd, enum and enum64.
         *             struct, union, enum, fwd, enum64,
         *             decl_tag and type_tag
         */
        __u32 info;
        /* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
@@ -478,7 +479,7 @@ No additional type data follow ``btf_type``.

``struct btf_type`` encoding requirement:
 * ``name_off``: offset to a non-empty string
 * ``info.kind_flag``: 0
 * ``info.kind_flag``: 0 or 1
 * ``info.kind``: BTF_KIND_DECL_TAG
 * ``info.vlen``: 0
 * ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef``
@@ -489,7 +490,6 @@ No additional type data follow ``btf_type``.
        __u32   component_idx;
    };

The ``name_off`` encodes btf_decl_tag attribute string.
The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``.
For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``.
For the other three types, if the btf_decl_tag attribute is
@@ -499,12 +499,21 @@ the attribute is applied to a ``struct``/``union`` member or
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
valid index (starting from 0) pointing to a member or an argument.

If ``info.kind_flag`` is 0, then this is a normal decl tag, and the
``name_off`` encodes btf_decl_tag attribute string.

If ``info.kind_flag`` is 1, then the decl tag represents an arbitrary
__attribute__. In this case, ``name_off`` encodes a string
representing the attribute-list of the attribute specifier. For
example, for an ``__attribute__((aligned(4)))`` the string's contents
is ``aligned(4)``.

2.2.18 BTF_KIND_TYPE_TAG
~~~~~~~~~~~~~~~~~~~~~~~~

``struct btf_type`` encoding requirement:
 * ``name_off``: offset to a non-empty string
 * ``info.kind_flag``: 0
 * ``info.kind_flag``: 0 or 1
 * ``info.kind``: BTF_KIND_TYPE_TAG
 * ``info.vlen``: 0
 * ``type``: the type with ``btf_type_tag`` attribute
@@ -522,6 +531,14 @@ type_tag, then zero or more const/volatile/restrict/typedef
and finally the base type. The base type is one of
int, ptr, array, struct, union, enum, func_proto and float types.

Similarly to decl tags, if the ``info.kind_flag`` is 0, then this is a
normal type tag, and the ``name_off`` encodes btf_type_tag attribute
string.

If ``info.kind_flag`` is 1, then the type tag represents an arbitrary
__attribute__, and the ``name_off`` encodes a string representing the
attribute-list of the attribute specifier.

2.2.19 BTF_KIND_ENUM64
~~~~~~~~~~~~~~~~~~~~~~

+14 −6
Original line number Diff line number Diff line
@@ -324,34 +324,42 @@ register.

.. table:: Arithmetic instructions

  =====  =====  =======  ==========================================================
  =====  =====  =======  ===================================================================================
  name   code   offset   description
  =====  =====  =======  ==========================================================
  =====  =====  =======  ===================================================================================
  ADD    0x0    0        dst += src
  SUB    0x1    0        dst -= src
  MUL    0x2    0        dst \*= src
  DIV    0x3    0        dst = (src != 0) ? (dst / src) : 0
  SDIV   0x3    1        dst = (src != 0) ? (dst s/ src) : 0
  SDIV   0x3    1        dst = (src == 0) ? 0 : ((src == -1 && dst == LLONG_MIN) ? LLONG_MIN : (dst s/ src))
  OR     0x4    0        dst \|= src
  AND    0x5    0        dst &= src
  LSH    0x6    0        dst <<= (src & mask)
  RSH    0x7    0        dst >>= (src & mask)
  NEG    0x8    0        dst = -dst
  MOD    0x9    0        dst = (src != 0) ? (dst % src) : dst
  SMOD   0x9    1        dst = (src != 0) ? (dst s% src) : dst
  SMOD   0x9    1        dst = (src == 0) ? dst : ((src == -1 && dst == LLONG_MIN) ? 0: (dst s% src))
  XOR    0xa    0        dst ^= src
  MOV    0xb    0        dst = src
  MOVSX  0xb    8/16/32  dst = (s8,s16,s32)src
  ARSH   0xc    0        :term:`sign extending<Sign Extend>` dst >>= (src & mask)
  END    0xd    0        byte swap operations (see `Byte swap instructions`_ below)
  =====  =====  =======  ==========================================================
  =====  =====  =======  ===================================================================================

Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
dividing -1, the desination register is instead set to ``LLONG_MIN``. For
``ALU``, if execution would result in ``INT_MIN`` dividing -1, the
desination register is instead set to ``INT_MIN``.

If execution would result in modulo by zero, for ``ALU64`` the value of
the destination register is unchanged whereas for ``ALU`` the upper
32 bits of the destination register are zeroed.
32 bits of the destination register are zeroed. Otherwise, for ``ALU64``,
if execution would resuslt in ``LLONG_MIN`` modulo -1, the destination
register is instead set to 0. For ``ALU``, if execution would result in
``INT_MIN`` modulo -1, the destination register is instead set to 0.

``{ADD, X, ALU}``, where 'code' = ``ADD``, 'source' = ``X``, and 'class' = ``ALU``, means::

+10 −2
Original line number Diff line number Diff line
@@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type {
	AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
	AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
	AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
	AARCH64_INSN_LDST_LOAD_ACQ,
	AARCH64_INSN_LDST_LOAD_EX,
	AARCH64_INSN_LDST_LOAD_ACQ_EX,
	AARCH64_INSN_LDST_STORE_REL,
	AARCH64_INSN_LDST_STORE_EX,
	AARCH64_INSN_LDST_STORE_REL_EX,
	AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
@@ -351,8 +353,10 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit,	0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive,	0x3F800000, 0x08000000)
__AARCH64_INSN_FUNCS(load_ex,	0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex,	0x3F400000, 0x08000000)
__AARCH64_INSN_FUNCS(load_acq,  0x3FDFFC00, 0x08DFFC00)
__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00)
__AARCH64_INSN_FUNCS(load_ex,	0x3FC00000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex,	0x3FC00000, 0x08000000)
__AARCH64_INSN_FUNCS(mops,	0x3B200C00, 0x19000400)
__AARCH64_INSN_FUNCS(stp,	0x7FC00000, 0x29000000)
__AARCH64_INSN_FUNCS(ldp,	0x7FC00000, 0x29400000)
@@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
				     int offset,
				     enum aarch64_insn_variant variant,
				     enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
					enum aarch64_insn_register base,
					enum aarch64_insn_size_type size,
					enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
				   enum aarch64_insn_register base,
				   enum aarch64_insn_register state,
+29 −0
Original line number Diff line number Diff line
@@ -540,6 +540,35 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
					     offset >> shift);
}

u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
					enum aarch64_insn_register base,
					enum aarch64_insn_size_type size,
					enum aarch64_insn_ldst_type type)
{
	u32 insn;

	switch (type) {
	case AARCH64_INSN_LDST_LOAD_ACQ:
		insn = aarch64_insn_get_load_acq_value();
		break;
	case AARCH64_INSN_LDST_STORE_REL:
		insn = aarch64_insn_get_store_rel_value();
		break;
	default:
		pr_err("%s: unknown load-acquire/store-release encoding %d\n",
		       __func__, type);
		return AARCH64_BREAK_FAULT;
	}

	insn = aarch64_insn_encode_ldst_size(size, insn);

	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
					    reg);

	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
					    base);
}

u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
				   enum aarch64_insn_register base,
				   enum aarch64_insn_register state,
Loading