Merge tag 'bpf-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Pull bpf updates from Alexei Starovoitov: - Support pulling non-linear xdp data with bpf_xdp_pull_data() kfunc (Amery Hung) Applied as a stable branch in bpf-next and net-next trees. - Support reading skb metadata via bpf_dynptr (Jakub Sitnicki) Also a stable branch in bpf-next and net-next trees. - Enforce expected_attach_type for tailcall compatibility (Daniel Borkmann) - Replace path-sensitive with path-insensitive live stack analysis in the verifier (Eduard Zingerman) This is a significant change in the verification logic. More details, motivation, long term plans are in the cover letter/merge commit. - Support signed BPF programs (KP Singh) This is another major feature that took years to materialize. Algorithm details are in the cover letter/marge commit - Add support for may_goto instruction to s390 JIT (Ilya Leoshkevich) - Add support for may_goto instruction to arm64 JIT (Puranjay Mohan) - Fix USDT SIB argument handling in libbpf (Jiawei Zhao) - Allow uprobe-bpf program to change context registers (Jiri Olsa) - Support signed loads from BPF arena (Kumar Kartikeya Dwivedi and Puranjay Mohan) - Allow access to union arguments in tracing programs (Leon Hwang) - Optimize rcu_read_lock() + migrate_disable() combination where it's used in BPF subsystem (Menglong Dong) - Introduce bpf_task_work_schedule*() kfuncs to schedule deferred execution of BPF callback in the context of a specific task using the kernel’s task_work infrastructure (Mykyta Yatsenko) - Enforce RCU protection for KF_RCU_PROTECTED kfuncs (Kumar Kartikeya Dwivedi) - Add stress test for rqspinlock in NMI (Kumar Kartikeya Dwivedi) - Improve the precision of tnum multiplier verifier operation (Nandakumar Edamana) - Use tnums to improve is_branch_taken() logic (Paul Chaignon) - Add support for atomic operations in arena in riscv JIT (Pu Lehui) - Report arena faults to BPF error stream (Puranjay Mohan) - Search for tracefs at /sys/kernel/tracing first in bpftool (Quentin Monnet) - Add bpf_strcasecmp() kfunc (Rong Tao) - Support lookup_and_delete_elem command in BPF_MAP_STACK_TRACE (Tao Chen) * tag 'bpf-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (197 commits) libbpf: Replace AF_ALG with open coded SHA-256 selftests/bpf: Add stress test for rqspinlock in NMI selftests/bpf: Add test case for different expected_attach_type bpf: Enforce expected_attach_type for tailcall compatibility bpftool: Remove duplicate string.h header bpf: Remove duplicate crypto/sha2.h header libbpf: Fix error when st-prefix_ops and ops from differ btf selftests/bpf: Test changing packet data from kfunc selftests/bpf: Add stacktrace map lookup_and_delete_elem test case selftests/bpf: Refactor stacktrace_map case with skeleton bpf: Add lookup_and_delete_elem for BPF_MAP_STACK_TRACE selftests/bpf: Fix flaky bpf_cookie selftest selftests/bpf: Test changing packet data from global functions with a kfunc bpf: Emit struct bpf_xdp_sock type in vmlinux BTF selftests/bpf: Task_work selftest cleanup fixes MAINTAINERS: Delete inactive maintainers from AF_XDP bpf: Mark kfuncs as __noclone selftests/bpf: Add kprobe multi write ctx attach test selftests/bpf: Add kprobe write ctx attach test selftests/bpf: Add uprobe context ip register change test ...
2026-04-18 03:23:53 -04:00 · 2025-09-30 17:58:11 -07:00
parent 4b81e2eb9e 4ef77dd584
commit ae28ed4578
254 changed files with 11853 additions and 2817 deletions
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -3,7 +3,7 @@
 # BPF interpreter that, for example, classic socket filters depend on.
 config BPF
 	bool
-	select CRYPTO_LIB_SHA1
+	select CRYPTO_LIB_SHA256

 # Used by archs to tell that they support BPF JIT compiler plus which
 # flavour. Only one of the two can be selected for a specific arch since
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
 endif
 CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)

-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -633,3 +633,33 @@ static int __init kfunc_init(void)
 	return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
 }
 late_initcall(kfunc_init);
+
+void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip)
+{
+	struct bpf_stream_stage ss;
+	struct bpf_prog *prog;
+	u64 user_vm_start;
+
+	/*
+	 * The RCU read lock is held to safely traverse the latch tree, but we
+	 * don't need its protection when accessing the prog, since it will not
+	 * disappear while we are handling the fault.
+	 */
+	rcu_read_lock();
+	prog = bpf_prog_ksym_find(fault_ip);
+	rcu_read_unlock();
+	if (!prog)
+		return;
+
+	/* Use main prog for stream access */
+	prog = prog->aux->main_prog_aux->prog;
+
+	user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
+	addr += clear_lo32(user_vm_start);
+
+	bpf_stream_stage(ss, prog, BPF_STDERR, ({
+		bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n",
+				  write ? "WRITE" : "READ", addr);
+		bpf_stream_dump_stack(ss);
+	}));
+}
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -12,6 +12,7 @@
 #include <uapi/linux/btf.h>
 #include <linux/rcupdate_trace.h>
 #include <linux/btf_ids.h>
+#include <crypto/sha2.h>

 #include "map_in_map.h"

@@ -174,6 +175,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
 	return array->value + (u64)array->elem_size * (index & array->index_mask);
 }

+static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size,
+			       void *hash_buf)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	sha256(array->value, (u64)array->elem_size * array->map.max_entries,
+	       hash_buf);
+	memcpy(array->map.sha, hash_buf, sizeof(array->map.sha));
+	return 0;
+}
+
 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
 				       u32 off)
 {
@@ -431,7 +443,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
 	return (void *)round_down((unsigned long)array, PAGE_SIZE);
 }

-static void array_map_free_timers_wq(struct bpf_map *map)
+static void array_map_free_internal_structs(struct bpf_map *map)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	int i;
@@ -439,12 +451,14 @@ static void array_map_free_timers_wq(struct bpf_map *map)
 	/* We don't reset or free fields other than timer and workqueue
 	 * on uref dropping to zero.
 	 */
-	if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
+	if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
 		for (i = 0; i < array->map.max_entries; i++) {
 			if (btf_record_has_field(map->record, BPF_TIMER))
 				bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
 			if (btf_record_has_field(map->record, BPF_WORKQUEUE))
 				bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
+			if (btf_record_has_field(map->record, BPF_TASK_WORK))
+				bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
 		}
 	}
 }
@@ -783,7 +797,7 @@ const struct bpf_map_ops array_map_ops = {
 	.map_alloc = array_map_alloc,
 	.map_free = array_map_free,
 	.map_get_next_key = array_map_get_next_key,
-	.map_release_uref = array_map_free_timers_wq,
+	.map_release_uref = array_map_free_internal_structs,
 	.map_lookup_elem = array_map_lookup_elem,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
@@ -800,6 +814,7 @@ const struct bpf_map_ops array_map_ops = {
 	.map_mem_usage = array_map_mem_usage,
 	.map_btf_id = &array_map_btf_ids[0],
 	.iter_seq_info = &iter_seq_info,
+	.map_get_hash = &array_map_get_hash,
 };

 const struct bpf_map_ops percpu_array_map_ops = {
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -45,8 +45,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
 {
 	struct bpf_local_storage *local_storage;

-	migrate_disable();
-	rcu_read_lock();
+	rcu_read_lock_dont_migrate();
 	local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
 	if (!local_storage)
 		goto out;
@@ -55,8 +54,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
 	bpf_local_storage_destroy(local_storage);
 	bpf_cgrp_storage_unlock();
 out:
-	rcu_read_unlock();
-	migrate_enable();
+	rcu_read_unlock_migrate();
 }

 static struct bpf_local_storage_data *
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -62,8 +62,7 @@ void bpf_inode_storage_free(struct inode *inode)
 	if (!bsb)
 		return;

-	migrate_disable();
-	rcu_read_lock();
+	rcu_read_lock_dont_migrate();

 	local_storage = rcu_dereference(bsb->storage);
 	if (!local_storage)
@@ -71,8 +70,7 @@ void bpf_inode_storage_free(struct inode *inode)

 	bpf_local_storage_destroy(local_storage);
 out:
-	rcu_read_unlock();
-	migrate_enable();
+	rcu_read_unlock_migrate();
 }

 static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -705,13 +705,11 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 		migrate_enable();
 		rcu_read_unlock_trace();
 	} else {
-		rcu_read_lock();
-		migrate_disable();
+		rcu_read_lock_dont_migrate();
 		old_run_ctx = bpf_set_run_ctx(&run_ctx);
 		ret = bpf_prog_run(prog, ctx);
 		bpf_reset_run_ctx(old_run_ctx);
-		migrate_enable();
-		rcu_read_unlock();
+		rcu_read_unlock_migrate();
 	}

 	/* bpf program can only return 0 or 1:
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -19,14 +19,6 @@
 #define LOCAL_PENDING_LIST_IDX	LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
 #define IS_LOCAL_LIST_TYPE(t)	((t) >= BPF_LOCAL_LIST_T_OFFSET)

-static int get_next_cpu(int cpu)
-{
-	cpu = cpumask_next(cpu, cpu_possible_mask);
-	if (cpu >= nr_cpu_ids)
-		cpu = cpumask_first(cpu_possible_mask);
-	return cpu;
-}
-
 /* Local list helpers */
 static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
 {
@@ -482,7 +474,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,

 		raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);

-		steal = get_next_cpu(steal);
+		steal = cpumask_next_wrap(steal, cpu_possible_mask);
 	} while (!node && steal != first_steal);

 	loc_l->next_steal = steal;
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1174,6 +1174,18 @@ void bpf_struct_ops_put(const void *kdata)
 	bpf_map_put(&st_map->map);
 }

+u32 bpf_struct_ops_id(const void *kdata)
+{
+	struct bpf_struct_ops_value *kvalue;
+	struct bpf_struct_ops_map *st_map;
+
+	kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+	st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
+
+	return st_map->map.id;
+}
+EXPORT_SYMBOL_GPL(bpf_struct_ops_id);
+
 static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
 {
 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -70,8 +70,7 @@ void bpf_task_storage_free(struct task_struct *task)
 {
 	struct bpf_local_storage *local_storage;

-	migrate_disable();
-	rcu_read_lock();
+	rcu_read_lock_dont_migrate();

 	local_storage = rcu_dereference(task->bpf_storage);
 	if (!local_storage)
@@ -81,8 +80,7 @@ void bpf_task_storage_free(struct task_struct *task)
 	bpf_local_storage_destroy(local_storage);
 	bpf_task_storage_unlock();
 out:
-	rcu_read_unlock();
-	migrate_enable();
+	rcu_read_unlock_migrate();
 }

 static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3478,60 +3478,45 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
 	return BTF_FIELD_FOUND;
 }

-#define field_mask_test_name(field_type, field_type_str) \
-	if (field_mask & field_type && !strcmp(name, field_type_str)) { \
-		type = field_type;					\
-		goto end;						\
-	}
-
 static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type,
-			      u32 field_mask, u32 *seen_mask,
-			      int *align, int *sz)
+			      u32 field_mask, u32 *seen_mask, int *align, int *sz)
 {
-	int type = 0;
+	const struct {
+		enum btf_field_type type;
+		const char *const name;
+		const bool is_unique;
+	} field_types[] = {
+		{ BPF_SPIN_LOCK, "bpf_spin_lock", true },
+		{ BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true },
+		{ BPF_TIMER, "bpf_timer", true },
+		{ BPF_WORKQUEUE, "bpf_wq", true },
+		{ BPF_TASK_WORK, "bpf_task_work", true },
+		{ BPF_LIST_HEAD, "bpf_list_head", false },
+		{ BPF_LIST_NODE, "bpf_list_node", false },
+		{ BPF_RB_ROOT, "bpf_rb_root", false },
+		{ BPF_RB_NODE, "bpf_rb_node", false },
+		{ BPF_REFCOUNT, "bpf_refcount", false },
+	};
+	int type = 0, i;
 	const char *name = __btf_name_by_offset(btf, var_type->name_off);
+	const char *field_type_name;
+	enum btf_field_type field_type;
+	bool is_unique;

-	if (field_mask & BPF_SPIN_LOCK) {
-		if (!strcmp(name, "bpf_spin_lock")) {
-			if (*seen_mask & BPF_SPIN_LOCK)
+	for (i = 0; i < ARRAY_SIZE(field_types); ++i) {
+		field_type = field_types[i].type;
+		field_type_name = field_types[i].name;
+		is_unique = field_types[i].is_unique;
+		if (!(field_mask & field_type) || strcmp(name, field_type_name))
+			continue;
+		if (is_unique) {
+			if (*seen_mask & field_type)
 				return -E2BIG;
-			*seen_mask |= BPF_SPIN_LOCK;
-			type = BPF_SPIN_LOCK;
-			goto end;
+			*seen_mask |= field_type;
 		}
+		type = field_type;
+		goto end;
 	}
-	if (field_mask & BPF_RES_SPIN_LOCK) {
-		if (!strcmp(name, "bpf_res_spin_lock")) {
-			if (*seen_mask & BPF_RES_SPIN_LOCK)
-				return -E2BIG;
-			*seen_mask |= BPF_RES_SPIN_LOCK;
-			type = BPF_RES_SPIN_LOCK;
-			goto end;
-		}
-	}
-	if (field_mask & BPF_TIMER) {
-		if (!strcmp(name, "bpf_timer")) {
-			if (*seen_mask & BPF_TIMER)
-				return -E2BIG;
-			*seen_mask |= BPF_TIMER;
-			type = BPF_TIMER;
-			goto end;
-		}
-	}
-	if (field_mask & BPF_WORKQUEUE) {
-		if (!strcmp(name, "bpf_wq")) {
-			if (*seen_mask & BPF_WORKQUEUE)
-				return -E2BIG;
-			*seen_mask |= BPF_WORKQUEUE;
-			type = BPF_WORKQUEUE;
-			goto end;
-		}
-	}
-	field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
-	field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
-	field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
-	field_mask_test_name(BPF_RB_NODE,   "bpf_rb_node");
-	field_mask_test_name(BPF_REFCOUNT,  "bpf_refcount");

 	/* Only return BPF_KPTR when all other types with matchable names fail */
 	if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) {
@@ -3545,8 +3530,6 @@ end:
 	return type;
 }

-#undef field_mask_test_name
-
 /* Repeat a number of fields for a specified number of times.
 *
 * Copy the fields starting from the first field and repeat them for
@@ -3693,6 +3676,7 @@ static int btf_find_field_one(const struct btf *btf,
 	case BPF_LIST_NODE:
 	case BPF_RB_NODE:
 	case BPF_REFCOUNT:
+	case BPF_TASK_WORK:
 		ret = btf_find_struct(btf, var_type, off, sz, field_type,
 				      info_cnt ? &info[0] : &tmp);
 		if (ret < 0)
@@ -3985,6 +3969,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 	rec->timer_off = -EINVAL;
 	rec->wq_off = -EINVAL;
 	rec->refcount_off = -EINVAL;
+	rec->task_work_off = -EINVAL;
 	for (i = 0; i < cnt; i++) {
 		field_type_size = btf_field_type_size(info_arr[i].type);
 		if (info_arr[i].off + field_type_size > value_size) {
@@ -4024,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 			/* Cache offset for faster lookup at runtime */
 			rec->wq_off = rec->fields[i].offset;
 			break;
+		case BPF_TASK_WORK:
+			WARN_ON_ONCE(rec->task_work_off >= 0);
+			rec->task_work_off = rec->fields[i].offset;
+			break;
 		case BPF_REFCOUNT:
 			WARN_ON_ONCE(rec->refcount_off >= 0);
 			/* Cache offset for faster lookup at runtime */
@@ -6762,7 +6751,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	/* skip modifiers */
 	while (btf_type_is_modifier(t))
 		t = btf_type_by_id(btf, t->type);
-	if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
+	if (btf_type_is_small_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
 		/* accessing a scalar */
 		return true;
 	if (!btf_type_is_ptr(t)) {
@@ -7334,7 +7323,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
 	if (btf_type_is_ptr(t))
 		/* kernel size of pointer. Not BPF's size of pointer*/
 		return sizeof(void *);
-	if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
+	if (btf_type_is_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
 		return t->size;
 	return -EINVAL;
 }
@@ -7343,7 +7332,7 @@ static u8 __get_type_fmodel_flags(const struct btf_type *t)
 {
 	u8 flags = 0;

-	if (__btf_type_is_struct(t))
+	if (btf_type_is_struct(t))
 		flags |= BTF_FMODEL_STRUCT_ARG;
 	if (btf_type_is_signed_int(t))
 		flags |= BTF_FMODEL_SIGNED_ARG;
@@ -7384,7 +7373,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 		return -EINVAL;
 	}
 	ret = __get_type_size(btf, func->type, &t);
-	if (ret < 0 || __btf_type_is_struct(t)) {
+	if (ret < 0 || btf_type_is_struct(t)) {
 		bpf_log(log,
 			"The function %s return type %s is unsupported.\n",
 			tname, btf_type_str(t));
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -27,14 +27,15 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
 /*
 * cgroup bpf destruction makes heavy use of work items and there can be a lot
 * of concurrent destructions.  Use a separate workqueue so that cgroup bpf
- * destruction work items don't end up filling up max_active of system_wq
+ * destruction work items don't end up filling up max_active of system_percpu_wq
 * which may lead to deadlock.
 */
 static struct workqueue_struct *cgroup_bpf_destroy_wq;

 static int __init cgroup_bpf_wq_init(void)
 {
-	cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
+	cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy",
+						WQ_PERCPU, 1);
 	if (!cgroup_bpf_destroy_wq)
 		panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
 	return 0;
@@ -71,8 +72,7 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
 	u32 func_ret;

 	run_ctx.retval = retval;
-	migrate_disable();
-	rcu_read_lock();
+	rcu_read_lock_dont_migrate();
 	array = rcu_dereference(cgrp->effective[atype]);
 	item = &array->items[0];
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
@@ -88,8 +88,7 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
-	rcu_read_unlock();
-	migrate_enable();
+	rcu_read_unlock_migrate();
 	return run_ctx.retval;
 }

--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -18,6 +18,7 @@
 */

 #include <uapi/linux/btf.h>
+#include <crypto/sha1.h>
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
@@ -38,6 +39,7 @@
 #include <linux/bpf_mem_alloc.h>
 #include <linux/memcontrol.h>
 #include <linux/execmem.h>
+#include <crypto/sha2.h>

 #include <asm/barrier.h>
 #include <linux/unaligned.h>
@@ -119,6 +121,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag

 	fp->pages = size / PAGE_SIZE;
 	fp->aux = aux;
+	fp->aux->main_prog_aux = aux;
 	fp->aux->prog = fp;
 	fp->jit_requested = ebpf_jit_enabled();
 	fp->blinding_requested = bpf_jit_blinding_enabled(fp);
@@ -293,28 +296,18 @@ void __bpf_prog_free(struct bpf_prog *fp)

 int bpf_prog_calc_tag(struct bpf_prog *fp)
 {
-	const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
-	u32 raw_size = bpf_prog_tag_scratch_size(fp);
-	u32 digest[SHA1_DIGEST_WORDS];
-	u32 ws[SHA1_WORKSPACE_WORDS];
-	u32 i, bsize, psize, blocks;
+	size_t size = bpf_prog_insn_size(fp);
 	struct bpf_insn *dst;
 	bool was_ld_map;
-	u8 *raw, *todo;
-	__be32 *result;
-	__be64 *bits;
+	u32 i;

-	raw = vmalloc(raw_size);
-	if (!raw)
+	dst = vmalloc(size);
+	if (!dst)
 		return -ENOMEM;

-	sha1_init_raw(digest);
-	memset(ws, 0, sizeof(ws));
-
 	/* We need to take out the map fd for the digest calculation
 	 * since they are unstable from user space side.
 	 */
-	dst = (void *)raw;
 	for (i = 0, was_ld_map = false; i < fp->len; i++) {
 		dst[i] = fp->insnsi[i];
 		if (!was_ld_map &&
@@ -334,33 +327,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
 			was_ld_map = false;
 		}
 	}
-
-	psize = bpf_prog_insn_size(fp);
-	memset(&raw[psize], 0, raw_size - psize);
-	raw[psize++] = 0x80;
-
-	bsize  = round_up(psize, SHA1_BLOCK_SIZE);
-	blocks = bsize / SHA1_BLOCK_SIZE;
-	todo   = raw;
-	if (bsize - psize >= sizeof(__be64)) {
-		bits = (__be64 *)(todo + bsize - sizeof(__be64));
-	} else {
-		bits = (__be64 *)(todo + bsize + bits_offset);
-		blocks++;
-	}
-	*bits = cpu_to_be64((psize - 1) << 3);
-
-	while (blocks--) {
-		sha1_transform(digest, todo, ws);
-		todo += SHA1_BLOCK_SIZE;
-	}
-
-	result = (__force __be32 *)digest;
-	for (i = 0; i < SHA1_DIGEST_WORDS; i++)
-		result[i] = cpu_to_be32(digest[i]);
-	memcpy(fp->tag, result, sizeof(fp->tag));
-
-	vfree(raw);
+	sha256((u8 *)dst, size, fp->digest);
+	vfree(dst);
 	return 0;
 }

@@ -2393,6 +2361,7 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
 		map->owner->type  = prog_type;
 		map->owner->jited = fp->jited;
 		map->owner->xdp_has_frags = aux->xdp_has_frags;
+		map->owner->expected_attach_type = fp->expected_attach_type;
 		map->owner->attach_func_proto = aux->attach_func_proto;
 		for_each_cgroup_storage_type(i) {
 			map->owner->storage_cookie[i] =
@@ -2404,6 +2373,10 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
 		ret = map->owner->type  == prog_type &&
 		      map->owner->jited == fp->jited &&
 		      map->owner->xdp_has_frags == aux->xdp_has_frags;
+		if (ret &&
+		    map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
+		    map->owner->expected_attach_type != fp->expected_attach_type)
+			ret = false;
 		for_each_cgroup_storage_type(i) {
 			if (!ret)
 				break;
@@ -3329,9 +3302,8 @@ static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp)
 	rcu_read_unlock();
 	if (!prog)
 		return true;
-	if (bpf_is_subprog(prog))
-		return true;
-	ctxp->prog = prog;
+	/* Make sure we return the main prog if we found a subprog */
+	ctxp->prog = prog->aux->main_prog_aux->prog;
 	return false;
 }

--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -550,7 +550,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
 	old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu)));
 	if (old_rcpu) {
 		INIT_RCU_WORK(&old_rcpu->free_work, __cpu_map_entry_free);
-		queue_rcu_work(system_wq, &old_rcpu->free_work);
+		queue_rcu_work(system_percpu_wq, &old_rcpu->free_work);
 	}
 }

--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -865,7 +865,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 	struct bpf_dtab_netdev *dev;

 	dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
-				   GFP_NOWAIT | __GFP_NOWARN,
+				   GFP_NOWAIT,
 				   dtab->map.numa_node);
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -215,7 +215,20 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
 	return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
 }

-static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
+static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *elem)
+{
+	if (btf_record_has_field(htab->map.record, BPF_TIMER))
+		bpf_obj_free_timer(htab->map.record,
+				   htab_elem_value(elem, htab->map.key_size));
+	if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
+		bpf_obj_free_workqueue(htab->map.record,
+				       htab_elem_value(elem, htab->map.key_size));
+	if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
+		bpf_obj_free_task_work(htab->map.record,
+				       htab_elem_value(elem, htab->map.key_size));
+}
+
+static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
 {
 	u32 num_entries = htab->map.max_entries;
 	int i;
@@ -227,12 +240,7 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
 		struct htab_elem *elem;

 		elem = get_htab_elem(htab, i);
-		if (btf_record_has_field(htab->map.record, BPF_TIMER))
-			bpf_obj_free_timer(htab->map.record,
-					   htab_elem_value(elem, htab->map.key_size));
-		if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
-			bpf_obj_free_workqueue(htab->map.record,
-					       htab_elem_value(elem, htab->map.key_size));
+		htab_free_internal_structs(htab, elem);
 		cond_resched();
 	}
 }
@@ -1490,7 +1498,7 @@ static void delete_all_elements(struct bpf_htab *htab)
 	}
 }

-static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
+static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
 {
 	int i;

@@ -1502,28 +1510,23 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)

 		hlist_nulls_for_each_entry(l, n, head, hash_node) {
 			/* We only free timer on uref dropping to zero */
-			if (btf_record_has_field(htab->map.record, BPF_TIMER))
-				bpf_obj_free_timer(htab->map.record,
-						   htab_elem_value(l, htab->map.key_size));
-			if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
-				bpf_obj_free_workqueue(htab->map.record,
-						       htab_elem_value(l, htab->map.key_size));
+			htab_free_internal_structs(htab, l);
 		}
 		cond_resched_rcu();
 	}
 	rcu_read_unlock();
 }

-static void htab_map_free_timers_and_wq(struct bpf_map *map)
+static void htab_map_free_internal_structs(struct bpf_map *map)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);

 	/* We only free timer and workqueue on uref dropping to zero */
-	if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
+	if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
 		if (!htab_is_prealloc(htab))
-			htab_free_malloced_timers_and_wq(htab);
+			htab_free_malloced_internal_structs(htab);
 		else
-			htab_free_prealloced_timers_and_wq(htab);
+			htab_free_prealloced_internal_structs(htab);
 	}
 }

@@ -2255,7 +2258,7 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_alloc = htab_map_alloc,
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
-	.map_release_uref = htab_map_free_timers_and_wq,
+	.map_release_uref = htab_map_free_internal_structs,
 	.map_lookup_elem = htab_map_lookup_elem,
 	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
 	.map_update_elem = htab_map_update_elem,
@@ -2276,7 +2279,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_alloc = htab_map_alloc,
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
-	.map_release_uref = htab_map_free_timers_and_wq,
+	.map_release_uref = htab_map_free_internal_structs,
 	.map_lookup_elem = htab_lru_map_lookup_elem,
 	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
 	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -25,6 +25,9 @@
 #include <linux/kasan.h>
 #include <linux/bpf_verifier.h>
 #include <linux/uaccess.h>
+#include <linux/verification.h>
+#include <linux/task_work.h>
+#include <linux/irq_work.h>

 #include "../../lib/kstrtox.h"

@@ -774,11 +777,9 @@ int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs)
 {
 	int nest_level;

-	preempt_disable();
 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
 	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
 		this_cpu_dec(bpf_bprintf_nest_level);
-		preempt_enable();
 		return -EBUSY;
 	}
 	*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
@@ -791,7 +792,6 @@ void bpf_put_buffers(void)
 	if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
 		return;
 	this_cpu_dec(bpf_bprintf_nest_level);
-	preempt_enable();
 }

 void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
@@ -1084,6 +1084,17 @@ const struct bpf_func_proto bpf_snprintf_proto = {
 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };

+static void *map_key_from_value(struct bpf_map *map, void *value, u32 *arr_idx)
+{
+	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
+		struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+		*arr_idx = ((char *)value - array->value) / array->elem_size;
+		return arr_idx;
+	}
+	return (void *)value - round_up(map->key_size, 8);
+}
+
 struct bpf_async_cb {
 	struct bpf_map *map;
 	struct bpf_prog *prog;
@@ -1166,15 +1177,8 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
 	 * bpf_map_delete_elem() on the same timer.
 	 */
 	this_cpu_write(hrtimer_running, t);
-	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
-		struct bpf_array *array = container_of(map, struct bpf_array, map);

-		/* compute the key */
-		idx = ((char *)value - array->value) / array->elem_size;
-		key = &idx;
-	} else { /* hash or lru */
-		key = value - round_up(map->key_size, 8);
-	}
+	key = map_key_from_value(map, value, &idx);

 	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
 	/* The verifier checked that return value is zero. */
@@ -1200,15 +1204,7 @@ static void bpf_wq_work(struct work_struct *work)
 	if (!callback_fn)
 		return;

-	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
-		struct bpf_array *array = container_of(map, struct bpf_array, map);
-
-		/* compute the key */
-		idx = ((char *)value - array->value) / array->elem_size;
-		key = &idx;
-	} else { /* hash or lru */
-		key = value - round_up(map->key_size, 8);
-	}
+	key = map_key_from_value(map, value, &idx);

        rcu_read_lock_trace();
        migrate_disable();
@@ -1600,7 +1596,7 @@ void bpf_timer_cancel_and_free(void *val)
 	 * timer callback.
 	 */
 	if (this_cpu_read(hrtimer_running)) {
-		queue_work(system_unbound_wq, &t->cb.delete_work);
+		queue_work(system_dfl_wq, &t->cb.delete_work);
 		return;
 	}

@@ -1613,7 +1609,7 @@ void bpf_timer_cancel_and_free(void *val)
 		if (hrtimer_try_to_cancel(&t->timer) >= 0)
 			kfree_rcu(t, cb.rcu);
 		else
-			queue_work(system_unbound_wq, &t->cb.delete_work);
+			queue_work(system_dfl_wq, &t->cb.delete_work);
 	} else {
 		bpf_timer_delete_work(&t->cb.delete_work);
 	}
@@ -1783,6 +1779,9 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s
 		return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
 	case BPF_DYNPTR_TYPE_XDP:
 		return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
+	case BPF_DYNPTR_TYPE_SKB_META:
+		memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
+		return 0;
 	default:
 		WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
 		return -EFAULT;
@@ -1839,6 +1838,11 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
 		if (flags)
 			return -EINVAL;
 		return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
+	case BPF_DYNPTR_TYPE_SKB_META:
+		if (flags)
+			return -EINVAL;
+		memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len);
+		return 0;
 	default:
 		WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
 		return -EFAULT;
@@ -1885,6 +1889,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
 		return (unsigned long)(ptr->data + ptr->offset + offset);
 	case BPF_DYNPTR_TYPE_SKB:
 	case BPF_DYNPTR_TYPE_XDP:
+	case BPF_DYNPTR_TYPE_SKB_META:
 		/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
 		return 0;
 	default:
@@ -2540,7 +2545,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
 {
 	struct cgroup *cgrp;

-	cgrp = cgroup_get_from_id(cgid);
+	cgrp = __cgroup_get_from_id(cgid);
 	if (IS_ERR(cgrp))
 		return NULL;
 	return cgrp;
@@ -2713,6 +2718,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
 		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
 		return buffer__opt;
 	}
+	case BPF_DYNPTR_TYPE_SKB_META:
+		return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
 	default:
 		WARN_ONCE(true, "unknown dynptr type %d\n", type);
 		return NULL;
@@ -3344,6 +3351,36 @@ __bpf_kfunc void __bpf_trap(void)
 * __get_kernel_nofault instead of plain dereference to make them safe.
 */

+static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
+{
+	char c1, c2;
+	int i;
+
+	if (!copy_from_kernel_nofault_allowed(s1, 1) ||
+	    !copy_from_kernel_nofault_allowed(s2, 1)) {
+		return -ERANGE;
+	}
+
+	guard(pagefault)();
+	for (i = 0; i < XATTR_SIZE_MAX; i++) {
+		__get_kernel_nofault(&c1, s1, char, err_out);
+		__get_kernel_nofault(&c2, s2, char, err_out);
+		if (ignore_case) {
+			c1 = tolower(c1);
+			c2 = tolower(c2);
+		}
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (c1 == '\0')
+			return 0;
+		s1++;
+		s2++;
+	}
+	return -E2BIG;
+err_out:
+	return -EFAULT;
+}
+
 /**
 * bpf_strcmp - Compare two strings
 * @s1__ign: One string
@@ -3359,28 +3396,25 @@ __bpf_kfunc void __bpf_trap(void)
 */
 __bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
 {
-	char c1, c2;
-	int i;
+	return __bpf_strcasecmp(s1__ign, s2__ign, false);
+}

-	if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
-	    !copy_from_kernel_nofault_allowed(s2__ign, 1)) {
-		return -ERANGE;
-	}
-
-	guard(pagefault)();
-	for (i = 0; i < XATTR_SIZE_MAX; i++) {
-		__get_kernel_nofault(&c1, s1__ign, char, err_out);
-		__get_kernel_nofault(&c2, s2__ign, char, err_out);
-		if (c1 != c2)
-			return c1 < c2 ? -1 : 1;
-		if (c1 == '\0')
-			return 0;
-		s1__ign++;
-		s2__ign++;
-	}
-	return -E2BIG;
-err_out:
-	return -EFAULT;
+/**
+ * bpf_strcasecmp - Compare two strings, ignoring the case of the characters
+ * @s1__ign: One string
+ * @s2__ign: Another string
+ *
+ * Return:
+ * * %0       - Strings are equal
+ * * %-1      - @s1__ign is smaller
+ * * %1       - @s2__ign is smaller
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG  - One of strings is too large
+ * * %-ERANGE - One of strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign)
+{
+	return __bpf_strcasecmp(s1__ign, s2__ign, true);
 }

 /**
@@ -3712,9 +3746,490 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
 {
 	return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
 }
+#ifdef CONFIG_KEYS
+/**
+ * bpf_lookup_user_key - lookup a key by its serial
+ * @serial: key handle serial number
+ * @flags: lookup-specific flags
+ *
+ * Search a key with a given *serial* and the provided *flags*.
+ * If found, increment the reference count of the key by one, and
+ * return it in the bpf_key structure.
+ *
+ * The bpf_key structure must be passed to bpf_key_put() when done
+ * with it, so that the key reference count is decremented and the
+ * bpf_key structure is freed.
+ *
+ * Permission checks are deferred to the time the key is used by
+ * one of the available key-specific kfuncs.
+ *
+ * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
+ * special keyring (e.g. session keyring), if it doesn't yet exist.
+ * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
+ * for the key construction, and to retrieve uninstantiated keys (keys
+ * without data attached to them).
+ *
+ * Return: a bpf_key pointer with a valid key pointer if the key is found, a
+ *         NULL pointer otherwise.
+ */
+__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
+{
+	key_ref_t key_ref;
+	struct bpf_key *bkey;
+
+	if (flags & ~KEY_LOOKUP_ALL)
+		return NULL;
+
+	/*
+	 * Permission check is deferred until the key is used, as the
+	 * intent of the caller is unknown here.
+	 */
+	key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
+	if (IS_ERR(key_ref))
+		return NULL;
+
+	bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
+	if (!bkey) {
+		key_put(key_ref_to_ptr(key_ref));
+		return NULL;
+	}
+
+	bkey->key = key_ref_to_ptr(key_ref);
+	bkey->has_ref = true;
+
+	return bkey;
+}
+
+/**
+ * bpf_lookup_system_key - lookup a key by a system-defined ID
+ * @id: key ID
+ *
+ * Obtain a bpf_key structure with a key pointer set to the passed key ID.
+ * The key pointer is marked as invalid, to prevent bpf_key_put() from
+ * attempting to decrement the key reference count on that pointer. The key
+ * pointer set in such way is currently understood only by
+ * verify_pkcs7_signature().
+ *
+ * Set *id* to one of the values defined in include/linux/verification.h:
+ * 0 for the primary keyring (immutable keyring of system keys);
+ * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
+ * (where keys can be added only if they are vouched for by existing keys
+ * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
+ * keyring (primarily used by the integrity subsystem to verify a kexec'ed
+ * kerned image and, possibly, the initramfs signature).
+ *
+ * Return: a bpf_key pointer with an invalid key pointer set from the
+ *         pre-determined ID on success, a NULL pointer otherwise
+ */
+__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
+{
+	struct bpf_key *bkey;
+
+	if (system_keyring_id_check(id) < 0)
+		return NULL;
+
+	bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
+	if (!bkey)
+		return NULL;
+
+	bkey->key = (struct key *)(unsigned long)id;
+	bkey->has_ref = false;
+
+	return bkey;
+}
+
+/**
+ * bpf_key_put - decrement key reference count if key is valid and free bpf_key
+ * @bkey: bpf_key structure
+ *
+ * Decrement the reference count of the key inside *bkey*, if the pointer
+ * is valid, and free *bkey*.
+ */
+__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
+{
+	if (bkey->has_ref)
+		key_put(bkey->key);
+
+	kfree(bkey);
+}
+
+/**
+ * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
+ * @data_p: data to verify
+ * @sig_p: signature of the data
+ * @trusted_keyring: keyring with keys trusted for signature verification
+ *
+ * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
+ * with keys in a keyring referenced by *trusted_keyring*.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+			       struct bpf_dynptr *sig_p,
+			       struct bpf_key *trusted_keyring)
+{
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+	struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
+	struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
+	const void *data, *sig;
+	u32 data_len, sig_len;
+	int ret;
+
+	if (trusted_keyring->has_ref) {
+		/*
+		 * Do the permission check deferred in bpf_lookup_user_key().
+		 * See bpf_lookup_user_key() for more details.
+		 *
+		 * A call to key_task_permission() here would be redundant, as
+		 * it is already done by keyring_search() called by
+		 * find_asymmetric_key().
+		 */
+		ret = key_validate(trusted_keyring->key);
+		if (ret < 0)
+			return ret;
+	}
+
+	data_len = __bpf_dynptr_size(data_ptr);
+	data = __bpf_dynptr_data(data_ptr, data_len);
+	sig_len = __bpf_dynptr_size(sig_ptr);
+	sig = __bpf_dynptr_data(sig_ptr, sig_len);
+
+	return verify_pkcs7_signature(data, data_len, sig, sig_len,
+				      trusted_keyring->key,
+				      VERIFYING_BPF_SIGNATURE, NULL,
+				      NULL);
+#else
+	return -EOPNOTSUPP;
+#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
+}
+#endif /* CONFIG_KEYS */
+
+typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value);
+
+enum bpf_task_work_state {
+	/* bpf_task_work is ready to be used */
+	BPF_TW_STANDBY = 0,
+	/* irq work scheduling in progress */
+	BPF_TW_PENDING,
+	/* task work scheduling in progress */
+	BPF_TW_SCHEDULING,
+	/* task work is scheduled successfully */
+	BPF_TW_SCHEDULED,
+	/* callback is running */
+	BPF_TW_RUNNING,
+	/* associated BPF map value is deleted */
+	BPF_TW_FREED,
+};
+
+struct bpf_task_work_ctx {
+	enum bpf_task_work_state state;
+	refcount_t refcnt;
+	struct callback_head work;
+	struct irq_work irq_work;
+	/* bpf_prog that schedules task work */
+	struct bpf_prog *prog;
+	/* task for which callback is scheduled */
+	struct task_struct *task;
+	/* the map and map value associated with this context */
+	struct bpf_map *map;
+	void *map_val;
+	enum task_work_notify_mode mode;
+	bpf_task_work_callback_t callback_fn;
+	struct rcu_head rcu;
+} __aligned(8);
+
+/* Actual type for struct bpf_task_work */
+struct bpf_task_work_kern {
+	struct bpf_task_work_ctx *ctx;
+};
+
+static void bpf_task_work_ctx_reset(struct bpf_task_work_ctx *ctx)
+{
+	if (ctx->prog) {
+		bpf_prog_put(ctx->prog);
+		ctx->prog = NULL;
+	}
+	if (ctx->task) {
+		bpf_task_release(ctx->task);
+		ctx->task = NULL;
+	}
+}
+
+static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx)
+{
+	return refcount_inc_not_zero(&ctx->refcnt);
+}
+
+static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx)
+{
+	if (!refcount_dec_and_test(&ctx->refcnt))
+		return;
+
+	bpf_task_work_ctx_reset(ctx);
+
+	/* bpf_mem_free expects migration to be disabled */
+	migrate_disable();
+	bpf_mem_free(&bpf_global_ma, ctx);
+	migrate_enable();
+}
+
+static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx)
+{
+	/*
+	 * Scheduled task_work callback holds ctx ref, so if we successfully
+	 * cancelled, we put that ref on callback's behalf. If we couldn't
+	 * cancel, callback will inevitably run or has already completed
+	 * running, and it would have taken care of its ctx ref itself.
+	 */
+	if (task_work_cancel(ctx->task, &ctx->work))
+		bpf_task_work_ctx_put(ctx);
+}
+
+static void bpf_task_work_callback(struct callback_head *cb)
+{
+	struct bpf_task_work_ctx *ctx = container_of(cb, struct bpf_task_work_ctx, work);
+	enum bpf_task_work_state state;
+	u32 idx;
+	void *key;
+
+	/* Read lock is needed to protect ctx and map key/value access */
+	guard(rcu_tasks_trace)();
+	/*
+	 * This callback may start running before bpf_task_work_irq() switched to
+	 * SCHEDULED state, so handle both transition variants SCHEDULING|SCHEDULED -> RUNNING.
+	 */
+	state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_RUNNING);
+	if (state == BPF_TW_SCHEDULED)
+		state = cmpxchg(&ctx->state, BPF_TW_SCHEDULED, BPF_TW_RUNNING);
+	if (state == BPF_TW_FREED) {
+		bpf_task_work_ctx_put(ctx);
+		return;
+	}
+
+	key = (void *)map_key_from_value(ctx->map, ctx->map_val, &idx);
+
+	migrate_disable();
+	ctx->callback_fn(ctx->map, key, ctx->map_val);
+	migrate_enable();
+
+	bpf_task_work_ctx_reset(ctx);
+	(void)cmpxchg(&ctx->state, BPF_TW_RUNNING, BPF_TW_STANDBY);
+
+	bpf_task_work_ctx_put(ctx);
+}
+
+static void bpf_task_work_irq(struct irq_work *irq_work)
+{
+	struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
+	enum bpf_task_work_state state;
+	int err;
+
+	guard(rcu_tasks_trace)();
+
+	if (cmpxchg(&ctx->state, BPF_TW_PENDING, BPF_TW_SCHEDULING) != BPF_TW_PENDING) {
+		bpf_task_work_ctx_put(ctx);
+		return;
+	}
+
+	err = task_work_add(ctx->task, &ctx->work, ctx->mode);
+	if (err) {
+		bpf_task_work_ctx_reset(ctx);
+		/*
+		 * try to switch back to STANDBY for another task_work reuse, but we might have
+		 * gone to FREED already, which is fine as we already cleaned up after ourselves
+		 */
+		(void)cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_STANDBY);
+		bpf_task_work_ctx_put(ctx);
+		return;
+	}
+
+	/*
+	 * It's technically possible for just scheduled task_work callback to
+	 * complete running by now, going SCHEDULING -> RUNNING and then
+	 * dropping its ctx refcount. Instead of capturing extra ref just to
+	 * protected below ctx->state access, we rely on RCU protection to
+	 * perform below SCHEDULING -> SCHEDULED attempt.
+	 */
+	state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_SCHEDULED);
+	if (state == BPF_TW_FREED)
+		bpf_task_work_cancel(ctx); /* clean up if we switched into FREED state */
+}
+
+static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *tw,
+							 struct bpf_map *map)
+{
+	struct bpf_task_work_kern *twk = (void *)tw;
+	struct bpf_task_work_ctx *ctx, *old_ctx;
+
+	ctx = READ_ONCE(twk->ctx);
+	if (ctx)
+		return ctx;
+
+	ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx));
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	memset(ctx, 0, sizeof(*ctx));
+	refcount_set(&ctx->refcnt, 1); /* map's own ref */
+	ctx->state = BPF_TW_STANDBY;
+
+	old_ctx = cmpxchg(&twk->ctx, NULL, ctx);
+	if (old_ctx) {
+		/*
+		 * tw->ctx is set by concurrent BPF program, release allocated
+		 * memory and try to reuse already set context.
+		 */
+		bpf_mem_free(&bpf_global_ma, ctx);
+		return old_ctx;
+	}
+
+	return ctx; /* Success */
+}
+
+static struct bpf_task_work_ctx *bpf_task_work_acquire_ctx(struct bpf_task_work *tw,
+							   struct bpf_map *map)
+{
+	struct bpf_task_work_ctx *ctx;
+
+	ctx = bpf_task_work_fetch_ctx(tw, map);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	/* try to get ref for task_work callback to hold */
+	if (!bpf_task_work_ctx_tryget(ctx))
+		return ERR_PTR(-EBUSY);
+
+	if (cmpxchg(&ctx->state, BPF_TW_STANDBY, BPF_TW_PENDING) != BPF_TW_STANDBY) {
+		/* lost acquiring race or map_release_uref() stole it from us, put ref and bail */
+		bpf_task_work_ctx_put(ctx);
+		return ERR_PTR(-EBUSY);
+	}
+
+	/*
+	 * If no process or bpffs is holding a reference to the map, no new callbacks should be
+	 * scheduled. This does not address any race or correctness issue, but rather is a policy
+	 * choice: dropping user references should stop everything.
+	 */
+	if (!atomic64_read(&map->usercnt)) {
+		/* drop ref we just got for task_work callback itself */
+		bpf_task_work_ctx_put(ctx);
+		/* transfer map's ref into cancel_and_free() */
+		bpf_task_work_cancel_and_free(tw);
+		return ERR_PTR(-EBUSY);
+	}
+
+	return ctx;
+}
+
+static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work *tw,
+				  struct bpf_map *map, bpf_task_work_callback_t callback_fn,
+				  struct bpf_prog_aux *aux, enum task_work_notify_mode mode)
+{
+	struct bpf_prog *prog;
+	struct bpf_task_work_ctx *ctx;
+	int err;
+
+	BTF_TYPE_EMIT(struct bpf_task_work);
+
+	prog = bpf_prog_inc_not_zero(aux->prog);
+	if (IS_ERR(prog))
+		return -EBADF;
+	task = bpf_task_acquire(task);
+	if (!task) {
+		err = -EBADF;
+		goto release_prog;
+	}
+
+	ctx = bpf_task_work_acquire_ctx(tw, map);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto release_all;
+	}
+
+	ctx->task = task;
+	ctx->callback_fn = callback_fn;
+	ctx->prog = prog;
+	ctx->mode = mode;
+	ctx->map = map;
+	ctx->map_val = (void *)tw - map->record->task_work_off;
+	init_task_work(&ctx->work, bpf_task_work_callback);
+	init_irq_work(&ctx->irq_work, bpf_task_work_irq);
+
+	irq_work_queue(&ctx->irq_work);
+	return 0;
+
+release_all:
+	bpf_task_release(task);
+release_prog:
+	bpf_prog_put(prog);
+	return err;
+}
+
+/**
+ * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
+ * @task: Task struct for which callback should be scheduled
+ * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
+ * @map__map: bpf_map that embeds struct bpf_task_work in the values
+ * @callback: pointer to BPF subprogram to call
+ * @aux__prog: user should pass NULL
+ *
+ * Return: 0 if task work has been scheduled successfully, negative error code otherwise
+ */
+__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
+					      void *map__map, bpf_task_work_callback_t callback,
+					      void *aux__prog)
+{
+	return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL);
+}
+
+/**
+ * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode
+ * @task: Task struct for which callback should be scheduled
+ * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
+ * @map__map: bpf_map that embeds struct bpf_task_work in the values
+ * @callback: pointer to BPF subprogram to call
+ * @aux__prog: user should pass NULL
+ *
+ * Return: 0 if task work has been scheduled successfully, negative error code otherwise
+ */
+__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
+					      void *map__map, bpf_task_work_callback_t callback,
+					      void *aux__prog)
+{
+	return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
+}

 __bpf_kfunc_end_defs();

+static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
+{
+	struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
+
+	bpf_task_work_cancel(ctx); /* this might put task_work callback's ref */
+	bpf_task_work_ctx_put(ctx); /* and here we put map's own ref that was transferred to us */
+}
+
+void bpf_task_work_cancel_and_free(void *val)
+{
+	struct bpf_task_work_kern *twk = val;
+	struct bpf_task_work_ctx *ctx;
+	enum bpf_task_work_state state;
+
+	ctx = xchg(&twk->ctx, NULL);
+	if (!ctx)
+		return;
+
+	state = xchg(&ctx->state, BPF_TW_FREED);
+	if (state == BPF_TW_SCHEDULED) {
+		/* run in irq_work to avoid locks in NMI */
+		init_irq_work(&ctx->irq_work, bpf_task_work_cancel_scheduled);
+		irq_work_queue(&ctx->irq_work);
+		return;
+	}
+
+	bpf_task_work_ctx_put(ctx); /* put bpf map's ref */
+}
+
 BTF_KFUNCS_START(generic_btf_ids)
 #ifdef CONFIG_CRASH_DUMP
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
@@ -3753,6 +4268,14 @@ BTF_ID_FLAGS(func, bpf_throw)
 #ifdef CONFIG_BPF_EVENTS
 BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS)
 #endif
+#ifdef CONFIG_KEYS
+BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
+#endif
+#endif
 BTF_KFUNCS_END(generic_btf_ids)

 static const struct btf_kfunc_id_set generic_kfunc_set = {
@@ -3834,6 +4357,7 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
 #endif
 BTF_ID_FLAGS(func, __bpf_trap)
 BTF_ID_FLAGS(func, bpf_strcmp);
+BTF_ID_FLAGS(func, bpf_strcasecmp);
 BTF_ID_FLAGS(func, bpf_strchr);
 BTF_ID_FLAGS(func, bpf_strchrnul);
 BTF_ID_FLAGS(func, bpf_strnchr);
@@ -3848,6 +4372,8 @@ BTF_ID_FLAGS(func, bpf_strnstr);
 BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
 #endif
 BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS)
 BTF_KFUNCS_END(common_btf_ids)

 static const struct btf_kfunc_id_set common_kfunc_set = {
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf_verifier.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+
+/*
+ * This file implements live stack slots analysis. After accumulating
+ * stack usage data, the analysis answers queries about whether a
+ * particular stack slot may be read by an instruction or any of it's
+ * successors.  This data is consumed by the verifier states caching
+ * mechanism to decide which stack slots are important when looking for a
+ * visited state corresponding to the current state.
+ *
+ * The analysis is call chain sensitive, meaning that data is collected
+ * and queried for tuples (call chain, subprogram instruction index).
+ * Such sensitivity allows identifying if some subprogram call always
+ * leads to writes in the caller's stack.
+ *
+ * The basic idea is as follows:
+ * - As the verifier accumulates a set of visited states, the analysis instance
+ *   accumulates a conservative estimate of stack slots that can be read
+ *   or must be written for each visited tuple (call chain, instruction index).
+ * - If several states happen to visit the same instruction with the same
+ *   call chain, stack usage information for the corresponding tuple is joined:
+ *   - "may_read" set represents a union of all possibly read slots
+ *     (any slot in "may_read" set might be read at or after the instruction);
+ *   - "must_write" set represents an intersection of all possibly written slots
+ *     (any slot in "must_write" set is guaranteed to be written by the instruction).
+ * - The analysis is split into two phases:
+ *   - read and write marks accumulation;
+ *   - read and write marks propagation.
+ * - The propagation phase is a textbook live variable data flow analysis:
+ *
+ *     state[cc, i].live_after = U [state[cc, s].live_before for s in insn_successors(i)]
+ *     state[cc, i].live_before =
+ *       (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read
+ *
+ *   Where:
+ *   - `U`  stands for set union
+ *   - `/`  stands for set difference;
+ *   - `cc` stands for a call chain;
+ *   - `i` and `s` are instruction indexes;
+ *
+ *   The above equations are computed for each call chain and instruction
+ *   index until state stops changing.
+ * - Additionally, in order to transfer "must_write" information from a
+ *   subprogram to call instructions invoking this subprogram,
+ *   the "must_write_acc" set is tracked for each (cc, i) tuple.
+ *   A set of stack slots that are guaranteed to be written by this
+ *   instruction or any of its successors (within the subprogram).
+ *   The equation for "must_write_acc" propagation looks as follows:
+ *
+ *     state[cc, i].must_write_acc =
+ *       ∩ [state[cc, s].must_write_acc for s in insn_successors(i)]
+ *       U state[cc, i].must_write
+ *
+ *   (An intersection of all "must_write_acc" for instruction successors
+ *    plus all "must_write" slots for the instruction itself).
+ * - After the propagation phase completes for a subprogram, information from
+ *   (cc, 0) tuple (subprogram entry) is transferred to the caller's call chain:
+ *   - "must_write_acc" set is intersected with the call site's "must_write" set;
+ *   - "may_read" set is added to the call site's "may_read" set.
+ * - Any live stack queries must be taken after the propagation phase.
+ * - Accumulation and propagation phases can be entered multiple times,
+ *   at any point in time:
+ *   - "may_read" set only grows;
+ *   - "must_write" set only shrinks;
+ *   - for each visited verifier state with zero branches, all relevant
+ *     read and write marks are already recorded by the analysis instance.
+ *
+ * Technically, the analysis is facilitated by the following data structures:
+ * - Call chain: for given verifier state, the call chain is a tuple of call
+ *   instruction indexes leading to the current subprogram plus the subprogram
+ *   entry point index.
+ * - Function instance: for a given call chain, for each instruction in
+ *   the current subprogram, a mapping between instruction index and a
+ *   set of "may_read", "must_write" and other marks accumulated for this
+ *   instruction.
+ * - A hash table mapping call chains to function instances.
+ */
+
+struct callchain {
+	u32 callsites[MAX_CALL_FRAMES];	/* instruction pointer for each frame */
+	/* cached subprog_info[*].start for functions owning the frames:
+	 * - sp_starts[curframe] used to get insn relative index within current function;
+	 * - sp_starts[0..current-1] used for fast callchain_frame_up().
+	 */
+	u32 sp_starts[MAX_CALL_FRAMES];
+	u32 curframe;			/* depth of callsites and sp_starts arrays */
+};
+
+struct per_frame_masks {
+	u64 may_read;		/* stack slots that may be read by this instruction */
+	u64 must_write;		/* stack slots written by this instruction */
+	u64 must_write_acc;	/* stack slots written by this instruction and its successors */
+	u64 live_before;	/* stack slots that may be read by this insn and its successors */
+};
+
+/*
+ * A function instance created for a specific callchain.
+ * Encapsulates read and write marks for each instruction in the function.
+ * Marks are tracked for each frame in the callchain.
+ */
+struct func_instance {
+	struct hlist_node hl_node;
+	struct callchain callchain;
+	u32 insn_cnt;		/* cached number of insns in the function */
+	bool updated;
+	bool must_write_dropped;
+	/* Per frame, per instruction masks, frames allocated lazily. */
+	struct per_frame_masks *frames[MAX_CALL_FRAMES];
+	/* For each instruction a flag telling if "must_write" had been initialized for it. */
+	bool *must_write_set;
+};
+
+struct live_stack_query {
+	struct func_instance *instances[MAX_CALL_FRAMES]; /* valid in range [0..curframe] */
+	u32 curframe;
+	u32 insn_idx;
+};
+
+struct bpf_liveness {
+	DECLARE_HASHTABLE(func_instances, 8);		/* maps callchain to func_instance */
+	struct live_stack_query live_stack_query;	/* cache to avoid repetitive ht lookups */
+	/* Cached instance corresponding to env->cur_state, avoids per-instruction ht lookup */
+	struct func_instance *cur_instance;
+	/*
+	 * Below fields are used to accumulate stack write marks for instruction at
+	 * @write_insn_idx before submitting the marks to @cur_instance.
+	 */
+	u64 write_masks_acc[MAX_CALL_FRAMES];
+	u32 write_insn_idx;
+};
+
+/* Compute callchain corresponding to state @st at depth @frameno */
+static void compute_callchain(struct bpf_verifier_env *env, struct bpf_verifier_state *st,
+			      struct callchain *callchain, u32 frameno)
+{
+	struct bpf_subprog_info *subprog_info = env->subprog_info;
+	u32 i;
+
+	memset(callchain, 0, sizeof(*callchain));
+	for (i = 0; i <= frameno; i++) {
+		callchain->sp_starts[i] = subprog_info[st->frame[i]->subprogno].start;
+		if (i < st->curframe)
+			callchain->callsites[i] = st->frame[i + 1]->callsite;
+	}
+	callchain->curframe = frameno;
+	callchain->callsites[callchain->curframe] = callchain->sp_starts[callchain->curframe];
+}
+
+static u32 hash_callchain(struct callchain *callchain)
+{
+	return jhash2(callchain->callsites, callchain->curframe, 0);
+}
+
+static bool same_callsites(struct callchain *a, struct callchain *b)
+{
+	int i;
+
+	if (a->curframe != b->curframe)
+		return false;
+	for (i = a->curframe; i >= 0; i--)
+		if (a->callsites[i] != b->callsites[i])
+			return false;
+	return true;
+}
+
+/*
+ * Find existing or allocate new function instance corresponding to @callchain.
+ * Instances are accumulated in env->liveness->func_instances and persist
+ * until the end of the verification process.
+ */
+static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
+					       struct callchain *callchain)
+{
+	struct bpf_liveness *liveness = env->liveness;
+	struct bpf_subprog_info *subprog;
+	struct func_instance *result;
+	u32 subprog_sz, size, key;
+
+	key = hash_callchain(callchain);
+	hash_for_each_possible(liveness->func_instances, result, hl_node, key)
+		if (same_callsites(&result->callchain, callchain))
+			return result;
+
+	subprog = bpf_find_containing_subprog(env, callchain->sp_starts[callchain->curframe]);
+	subprog_sz = (subprog + 1)->start - subprog->start;
+	size = sizeof(struct func_instance);
+	result = kvzalloc(size, GFP_KERNEL_ACCOUNT);
+	if (!result)
+		return ERR_PTR(-ENOMEM);
+	result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set),
+					  GFP_KERNEL_ACCOUNT);
+	if (!result->must_write_set)
+		return ERR_PTR(-ENOMEM);
+	memcpy(&result->callchain, callchain, sizeof(*callchain));
+	result->insn_cnt = subprog_sz;
+	hash_add(liveness->func_instances, &result->hl_node, key);
+	return result;
+}
+
+static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
+					     struct bpf_verifier_state *st,
+					     u32 frameno)
+{
+	struct callchain callchain;
+
+	compute_callchain(env, st, &callchain, frameno);
+	return __lookup_instance(env, &callchain);
+}
+
+int bpf_stack_liveness_init(struct bpf_verifier_env *env)
+{
+	env->liveness = kvzalloc(sizeof(*env->liveness), GFP_KERNEL_ACCOUNT);
+	if (!env->liveness)
+		return -ENOMEM;
+	hash_init(env->liveness->func_instances);
+	return 0;
+}
+
+void bpf_stack_liveness_free(struct bpf_verifier_env *env)
+{
+	struct func_instance *instance;
+	struct hlist_node *tmp;
+	int bkt, i;
+
+	if (!env->liveness)
+		return;
+	hash_for_each_safe(env->liveness->func_instances, bkt, tmp, instance, hl_node) {
+		for (i = 0; i <= instance->callchain.curframe; i++)
+			kvfree(instance->frames[i]);
+		kvfree(instance->must_write_set);
+		kvfree(instance);
+	}
+	kvfree(env->liveness);
+}
+
+/*
+ * Convert absolute instruction index @insn_idx to an index relative
+ * to start of the function corresponding to @instance.
+ */
+static int relative_idx(struct func_instance *instance, u32 insn_idx)
+{
+	return insn_idx - instance->callchain.sp_starts[instance->callchain.curframe];
+}
+
+static struct per_frame_masks *get_frame_masks(struct func_instance *instance,
+					       u32 frame, u32 insn_idx)
+{
+	if (!instance->frames[frame])
+		return NULL;
+
+	return &instance->frames[frame][relative_idx(instance, insn_idx)];
+}
+
+static struct per_frame_masks *alloc_frame_masks(struct bpf_verifier_env *env,
+						 struct func_instance *instance,
+						 u32 frame, u32 insn_idx)
+{
+	struct per_frame_masks *arr;
+
+	if (!instance->frames[frame]) {
+		arr = kvcalloc(instance->insn_cnt, sizeof(*arr), GFP_KERNEL_ACCOUNT);
+		instance->frames[frame] = arr;
+		if (!arr)
+			return ERR_PTR(-ENOMEM);
+	}
+	return get_frame_masks(instance, frame, insn_idx);
+}
+
+void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env)
+{
+	env->liveness->cur_instance = NULL;
+}
+
+/* If @env->liveness->cur_instance is null, set it to instance corresponding to @env->cur_state. */
+static int ensure_cur_instance(struct bpf_verifier_env *env)
+{
+	struct bpf_liveness *liveness = env->liveness;
+	struct func_instance *instance;
+
+	if (liveness->cur_instance)
+		return 0;
+
+	instance = lookup_instance(env, env->cur_state, env->cur_state->curframe);
+	if (IS_ERR(instance))
+		return PTR_ERR(instance);
+
+	liveness->cur_instance = instance;
+	return 0;
+}
+
+/* Accumulate may_read masks for @frame at @insn_idx */
+static int mark_stack_read(struct bpf_verifier_env *env,
+			   struct func_instance *instance, u32 frame, u32 insn_idx, u64 mask)
+{
+	struct per_frame_masks *masks;
+	u64 new_may_read;
+
+	masks = alloc_frame_masks(env, instance, frame, insn_idx);
+	if (IS_ERR(masks))
+		return PTR_ERR(masks);
+	new_may_read = masks->may_read | mask;
+	if (new_may_read != masks->may_read &&
+	    ((new_may_read | masks->live_before) != masks->live_before))
+		instance->updated = true;
+	masks->may_read |= mask;
+	return 0;
+}
+
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, u64 mask)
+{
+	int err;
+
+	err = ensure_cur_instance(env);
+	err = err ?: mark_stack_read(env, env->liveness->cur_instance, frame, insn_idx, mask);
+	return err;
+}
+
+static void reset_stack_write_marks(struct bpf_verifier_env *env,
+				    struct func_instance *instance, u32 insn_idx)
+{
+	struct bpf_liveness *liveness = env->liveness;
+	int i;
+
+	liveness->write_insn_idx = insn_idx;
+	for (i = 0; i <= instance->callchain.curframe; i++)
+		liveness->write_masks_acc[i] = 0;
+}
+
+int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
+{
+	struct bpf_liveness *liveness = env->liveness;
+	int err;
+
+	err = ensure_cur_instance(env);
+	if (err)
+		return err;
+
+	reset_stack_write_marks(env, liveness->cur_instance, insn_idx);
+	return 0;
+}
+
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, u64 mask)
+{
+	env->liveness->write_masks_acc[frame] |= mask;
+}
+
+static int commit_stack_write_marks(struct bpf_verifier_env *env,
+				    struct func_instance *instance)
+{
+	struct bpf_liveness *liveness = env->liveness;
+	u32 idx, frame, curframe, old_must_write;
+	struct per_frame_masks *masks;
+	u64 mask;
+
+	if (!instance)
+		return 0;
+
+	curframe = instance->callchain.curframe;
+	idx = relative_idx(instance, liveness->write_insn_idx);
+	for (frame = 0; frame <= curframe; frame++) {
+		mask = liveness->write_masks_acc[frame];
+		/* avoid allocating frames for zero masks */
+		if (mask == 0 && !instance->must_write_set[idx])
+			continue;
+		masks = alloc_frame_masks(env, instance, frame, liveness->write_insn_idx);
+		if (IS_ERR(masks))
+			return PTR_ERR(masks);
+		old_must_write = masks->must_write;
+		/*
+		 * If instruction at this callchain is seen for a first time, set must_write equal
+		 * to @mask. Otherwise take intersection with the previous value.
+		 */
+		if (instance->must_write_set[idx])
+			mask &= old_must_write;
+		if (old_must_write != mask) {
+			masks->must_write = mask;
+			instance->updated = true;
+		}
+		if (old_must_write & ~mask)
+			instance->must_write_dropped = true;
+	}
+	instance->must_write_set[idx] = true;
+	liveness->write_insn_idx = 0;
+	return 0;
+}
+
+/*
+ * Merge stack writes marks in @env->liveness->write_masks_acc
+ * with information already in @env->liveness->cur_instance.
+ */
+int bpf_commit_stack_write_marks(struct bpf_verifier_env *env)
+{
+	return commit_stack_write_marks(env, env->liveness->cur_instance);
+}
+
+static char *fmt_callchain(struct bpf_verifier_env *env, struct callchain *callchain)
+{
+	char *buf_end = env->tmp_str_buf + sizeof(env->tmp_str_buf);
+	char *buf = env->tmp_str_buf;
+	int i;
+
+	buf += snprintf(buf, buf_end - buf, "(");
+	for (i = 0; i <= callchain->curframe; i++)
+		buf += snprintf(buf, buf_end - buf, "%s%d", i ? "," : "", callchain->callsites[i]);
+	snprintf(buf, buf_end - buf, ")");
+	return env->tmp_str_buf;
+}
+
+static void log_mask_change(struct bpf_verifier_env *env, struct callchain *callchain,
+			    char *pfx, u32 frame, u32 insn_idx, u64 old, u64 new)
+{
+	u64 changed_bits = old ^ new;
+	u64 new_ones = new & changed_bits;
+	u64 new_zeros = ~new & changed_bits;
+
+	if (!changed_bits)
+		return;
+	bpf_log(&env->log, "%s frame %d insn %d ", fmt_callchain(env, callchain), frame, insn_idx);
+	if (new_ones) {
+		bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
+		bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
+	}
+	if (new_zeros) {
+		bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
+		bpf_log(&env->log, "-%s %s", pfx, env->tmp_str_buf);
+	}
+	bpf_log(&env->log, "\n");
+}
+
+int bpf_jmp_offset(struct bpf_insn *insn)
+{
+	u8 code = insn->code;
+
+	if (code == (BPF_JMP32 | BPF_JA))
+		return insn->imm;
+	return insn->off;
+}
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field initialization overrides for opcode_info_tbl");
+
+inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
+{
+	static const struct opcode_info {
+		bool can_jump;
+		bool can_fallthrough;
+	} opcode_info_tbl[256] = {
+		[0 ... 255] = {.can_jump = false, .can_fallthrough = true},
+	#define _J(code, ...) \
+		[BPF_JMP   | code] = __VA_ARGS__, \
+		[BPF_JMP32 | code] = __VA_ARGS__
+
+		_J(BPF_EXIT,  {.can_jump = false, .can_fallthrough = false}),
+		_J(BPF_JA,    {.can_jump = true,  .can_fallthrough = false}),
+		_J(BPF_JEQ,   {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JNE,   {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JLT,   {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JLE,   {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JGT,   {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JGE,   {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JSGT,  {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JSGE,  {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JSLT,  {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JSLE,  {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JCOND, {.can_jump = true,  .can_fallthrough = true}),
+		_J(BPF_JSET,  {.can_jump = true,  .can_fallthrough = true}),
+	#undef _J
+	};
+	struct bpf_insn *insn = &prog->insnsi[idx];
+	const struct opcode_info *opcode_info;
+	int i = 0, insn_sz;
+
+	opcode_info = &opcode_info_tbl[BPF_CLASS(insn->code) | BPF_OP(insn->code)];
+	insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+	if (opcode_info->can_fallthrough)
+		succ[i++] = idx + insn_sz;
+
+	if (opcode_info->can_jump)
+		succ[i++] = idx + bpf_jmp_offset(insn) + 1;
+
+	return i;
+}
+
+__diag_pop();
+
+static struct func_instance *get_outer_instance(struct bpf_verifier_env *env,
+						struct func_instance *instance)
+{
+	struct callchain callchain = instance->callchain;
+
+	/* Adjust @callchain to represent callchain one frame up */
+	callchain.callsites[callchain.curframe] = 0;
+	callchain.sp_starts[callchain.curframe] = 0;
+	callchain.curframe--;
+	callchain.callsites[callchain.curframe] = callchain.sp_starts[callchain.curframe];
+	return __lookup_instance(env, &callchain);
+}
+
+static u32 callchain_subprog_start(struct callchain *callchain)
+{
+	return callchain->sp_starts[callchain->curframe];
+}
+
+/*
+ * Transfer @may_read and @must_write_acc marks from the first instruction of @instance,
+ * to the call instruction in function instance calling @instance.
+ */
+static int propagate_to_outer_instance(struct bpf_verifier_env *env,
+				       struct func_instance *instance)
+{
+	struct callchain *callchain = &instance->callchain;
+	u32 this_subprog_start, callsite, frame;
+	struct func_instance *outer_instance;
+	struct per_frame_masks *insn;
+	int err;
+
+	this_subprog_start = callchain_subprog_start(callchain);
+	outer_instance = get_outer_instance(env, instance);
+	callsite = callchain->callsites[callchain->curframe - 1];
+
+	reset_stack_write_marks(env, outer_instance, callsite);
+	for (frame = 0; frame < callchain->curframe; frame++) {
+		insn = get_frame_masks(instance, frame, this_subprog_start);
+		if (!insn)
+			continue;
+		bpf_mark_stack_write(env, frame, insn->must_write_acc);
+		err = mark_stack_read(env, outer_instance, frame, callsite, insn->live_before);
+		if (err)
+			return err;
+	}
+	commit_stack_write_marks(env, outer_instance);
+	return 0;
+}
+
+static inline bool update_insn(struct bpf_verifier_env *env,
+			       struct func_instance *instance, u32 frame, u32 insn_idx)
+{
+	struct bpf_insn_aux_data *aux = env->insn_aux_data;
+	u64 new_before, new_after, must_write_acc;
+	struct per_frame_masks *insn, *succ_insn;
+	u32 succ_num, s, succ[2];
+	bool changed;
+
+	succ_num = bpf_insn_successors(env->prog, insn_idx, succ);
+	if (unlikely(succ_num == 0))
+		return false;
+
+	changed = false;
+	insn = get_frame_masks(instance, frame, insn_idx);
+	new_before = 0;
+	new_after = 0;
+	/*
+	 * New "must_write_acc" is an intersection of all "must_write_acc"
+	 * of successors plus all "must_write" slots of instruction itself.
+	 */
+	must_write_acc = U64_MAX;
+	for (s = 0; s < succ_num; ++s) {
+		succ_insn = get_frame_masks(instance, frame, succ[s]);
+		new_after |= succ_insn->live_before;
+		must_write_acc &= succ_insn->must_write_acc;
+	}
+	must_write_acc |= insn->must_write;
+	/*
+	 * New "live_before" is a union of all "live_before" of successors
+	 * minus slots written by instruction plus slots read by instruction.
+	 */
+	new_before = (new_after & ~insn->must_write) | insn->may_read;
+	changed |= new_before != insn->live_before;
+	changed |= must_write_acc != insn->must_write_acc;
+	if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
+	    (insn->may_read || insn->must_write ||
+	     insn_idx == callchain_subprog_start(&instance->callchain) ||
+	     aux[insn_idx].prune_point)) {
+		log_mask_change(env, &instance->callchain, "live",
+				frame, insn_idx, insn->live_before, new_before);
+		log_mask_change(env, &instance->callchain, "written",
+				frame, insn_idx, insn->must_write_acc, must_write_acc);
+	}
+	insn->live_before = new_before;
+	insn->must_write_acc = must_write_acc;
+	return changed;
+}
+
+/* Fixed-point computation of @live_before and @must_write_acc marks */
+static int update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+{
+	u32 i, frame, po_start, po_end, cnt, this_subprog_start;
+	struct callchain *callchain = &instance->callchain;
+	int *insn_postorder = env->cfg.insn_postorder;
+	struct bpf_subprog_info *subprog;
+	struct per_frame_masks *insn;
+	bool changed;
+	int err;
+
+	this_subprog_start = callchain_subprog_start(callchain);
+	/*
+	 * If must_write marks were updated must_write_acc needs to be reset
+	 * (to account for the case when new must_write sets became smaller).
+	 */
+	if (instance->must_write_dropped) {
+		for (frame = 0; frame <= callchain->curframe; frame++) {
+			if (!instance->frames[frame])
+				continue;
+
+			for (i = 0; i < instance->insn_cnt; i++) {
+				insn = get_frame_masks(instance, frame, this_subprog_start + i);
+				insn->must_write_acc = 0;
+			}
+		}
+	}
+
+	subprog = bpf_find_containing_subprog(env, this_subprog_start);
+	po_start = subprog->postorder_start;
+	po_end = (subprog + 1)->postorder_start;
+	cnt = 0;
+	/* repeat until fixed point is reached */
+	do {
+		cnt++;
+		changed = false;
+		for (frame = 0; frame <= instance->callchain.curframe; frame++) {
+			if (!instance->frames[frame])
+				continue;
+
+			for (i = po_start; i < po_end; i++)
+				changed |= update_insn(env, instance, frame, insn_postorder[i]);
+		}
+	} while (changed);
+
+	if (env->log.level & BPF_LOG_LEVEL2)
+		bpf_log(&env->log, "%s live stack update done in %d iterations\n",
+			fmt_callchain(env, callchain), cnt);
+
+	/* transfer marks accumulated for outer frames to outer func instance (caller) */
+	if (callchain->curframe > 0) {
+		err = propagate_to_outer_instance(env, instance);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Prepare all callchains within @env->cur_state for querying.
+ * This function should be called after each verifier.c:pop_stack()
+ * and whenever verifier.c:do_check_insn() processes subprogram exit.
+ * This would guarantee that visited verifier states with zero branches
+ * have their bpf_mark_stack_{read,write}() effects propagated in
+ * @env->liveness.
+ */
+int bpf_update_live_stack(struct bpf_verifier_env *env)
+{
+	struct func_instance *instance;
+	int err, frame;
+
+	bpf_reset_live_stack_callchain(env);
+	for (frame = env->cur_state->curframe; frame >= 0; --frame) {
+		instance = lookup_instance(env, env->cur_state, frame);
+		if (IS_ERR(instance))
+			return PTR_ERR(instance);
+
+		if (instance->updated) {
+			err = update_instance(env, instance);
+			if (err)
+				return err;
+			instance->updated = false;
+			instance->must_write_dropped = false;
+		}
+	}
+	return 0;
+}
+
+static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 spi)
+{
+	struct per_frame_masks *masks;
+
+	masks = get_frame_masks(instance, frameno, insn_idx);
+	return masks && (masks->live_before & BIT(spi));
+}
+
+int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+	struct live_stack_query *q = &env->liveness->live_stack_query;
+	struct func_instance *instance;
+	u32 frame;
+
+	memset(q, 0, sizeof(*q));
+	for (frame = 0; frame <= st->curframe; frame++) {
+		instance = lookup_instance(env, st, frame);
+		if (IS_ERR(instance))
+			return PTR_ERR(instance);
+		q->instances[frame] = instance;
+	}
+	q->curframe = st->curframe;
+	q->insn_idx = st->insn_idx;
+	return 0;
+}
+
+bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi)
+{
+	/*
+	 * Slot is alive if it is read before q->st->insn_idx in current func instance,
+	 * or if for some outer func instance:
+	 * - alive before callsite if callsite calls callback, otherwise
+	 * - alive after callsite
+	 */
+	struct live_stack_query *q = &env->liveness->live_stack_query;
+	struct func_instance *instance, *curframe_instance;
+	u32 i, callsite;
+	bool alive;
+
+	curframe_instance = q->instances[q->curframe];
+	if (is_live_before(curframe_instance, q->insn_idx, frameno, spi))
+		return true;
+
+	for (i = frameno; i < q->curframe; i++) {
+		callsite = curframe_instance->callchain.callsites[i];
+		instance = q->instances[i];
+		alive = bpf_calls_callback(env, callsite)
+			? is_live_before(instance, callsite, frameno, spi)
+			: is_live_before(instance, callsite + 1, frameno, spi);
+		if (alive)
+			return true;
+	}
+
+	return false;
+}
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -165,7 +165,7 @@ static long cgroup_storage_update_elem(struct bpf_map *map, void *key,
 	}

 	new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size),
-				   __GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN,
+				   __GFP_ZERO | GFP_NOWAIT,
 				   map->numa_node);
 	if (!new)
 		return -ENOMEM;
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type)
 		return "skb";
 	case BPF_DYNPTR_TYPE_XDP:
 		return "xdp";
+	case BPF_DYNPTR_TYPE_SKB_META:
+		return "skb_meta";
 	case BPF_DYNPTR_TYPE_INVALID:
 		return "<invalid>";
 	default:
@@ -540,19 +542,6 @@ static char slot_type_char[] = {
 	[STACK_IRQ_FLAG] = 'f'
 };

-static void print_liveness(struct bpf_verifier_env *env,
-			   enum bpf_reg_liveness live)
-{
-	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
-	    verbose(env, "_");
-	if (live & REG_LIVE_READ)
-		verbose(env, "r");
-	if (live & REG_LIVE_WRITTEN)
-		verbose(env, "w");
-	if (live & REG_LIVE_DONE)
-		verbose(env, "D");
-}
-
 #define UNUM_MAX_DECIMAL U16_MAX
 #define SNUM_MAX_DECIMAL S16_MAX
 #define SNUM_MIN_DECIMAL S16_MIN
@@ -770,7 +759,6 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 		if (!print_all && !reg_scratched(env, i))
 			continue;
 		verbose(env, " R%d", i);
-		print_liveness(env, reg->live);
 		verbose(env, "=");
 		print_reg_state(env, state, reg);
 	}
@@ -803,9 +791,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 					break;
 			types_buf[j] = '\0';

-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=%s", types_buf);
+			verbose(env, " fp%d=%s", (-i - 1) * BPF_REG_SIZE, types_buf);
 			print_reg_state(env, state, reg);
 			break;
 		case STACK_DYNPTR:
@@ -814,7 +800,6 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 			reg = &state->stack[i].spilled_ptr;

 			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
 			verbose(env, "=dynptr_%s(", dynptr_type_str(reg->dynptr.type));
 			if (reg->id)
 				verbose_a("id=%d", reg->id);
@@ -829,9 +814,8 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 			if (!reg->ref_obj_id)
 				continue;

-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
+			verbose(env, " fp%d=iter_%s(ref_id=%d,state=%s,depth=%u)",
+				(-i - 1) * BPF_REG_SIZE,
 				iter_type_str(reg->iter.btf, reg->iter.btf_id),
 				reg->ref_obj_id, iter_state_str(reg->iter.state),
 				reg->iter.depth);
@@ -839,9 +823,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
 		case STACK_MISC:
 		case STACK_ZERO:
 		default:
-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=%s", types_buf);
+			verbose(env, " fp%d=%s", (-i - 1) * BPF_REG_SIZE, types_buf);
 			break;
 		}
 	}
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -736,7 +736,7 @@ static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress)
 	/* Defer barriers into worker to let the rest of map memory to be freed */
 	memset(ma, 0, sizeof(*ma));
 	INIT_WORK(&copy->work, free_mem_alloc_deferred);
-	queue_work(system_unbound_wq, &copy->work);
+	queue_work(system_dfl_wq, &copy->work);
 }

 void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -646,7 +646,15 @@ static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
 }

 /* Called from syscall */
-int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+static int stack_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					    void *value, u64 flags)
+{
+	return bpf_stackmap_extract(map, key, value, true);
+}
+
+/* Called from syscall */
+int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
+			 bool delete)
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
 	struct stack_map_bucket *bucket, *old_bucket;
@@ -663,7 +671,10 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 	memcpy(value, bucket->data, trace_len);
 	memset(value + trace_len, 0, map->value_size - trace_len);

-	old_bucket = xchg(&smap->buckets[id], bucket);
+	if (delete)
+		old_bucket = bucket;
+	else
+		old_bucket = xchg(&smap->buckets[id], bucket);
 	if (old_bucket)
 		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
 	return 0;
@@ -754,6 +765,7 @@ const struct bpf_map_ops stack_trace_map_ops = {
 	.map_free = stack_map_free,
 	.map_get_next_key = stack_map_get_next_key,
 	.map_lookup_elem = stack_map_lookup_elem,
+	.map_lookup_and_delete_elem = stack_map_lookup_and_delete_elem,
 	.map_update_elem = stack_map_update_elem,
 	.map_delete_elem = stack_map_delete_elem,
 	.map_check_btf = map_check_no_btf,
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 */
+#include <crypto/sha2.h>
 #include <linux/bpf.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/bpf_trace.h>
@@ -38,6 +39,7 @@
 #include <linux/tracepoint.h>
 #include <linux/overflow.h>
 #include <linux/cookie.h>
+#include <linux/verification.h>

 #include <net/netfilter/nf_bpf_link.h>
 #include <net/netkit.h>
@@ -318,7 +320,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
 		err = bpf_percpu_cgroup_storage_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
-		err = bpf_stackmap_copy(map, key, value);
+		err = bpf_stackmap_extract(map, key, value, false);
 	} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
 		err = bpf_fd_array_map_lookup_elem(map, key, value);
 	} else if (IS_FD_HASH(map)) {
@@ -672,6 +674,7 @@ void btf_record_free(struct btf_record *rec)
 		case BPF_TIMER:
 		case BPF_REFCOUNT:
 		case BPF_WORKQUEUE:
+		case BPF_TASK_WORK:
 			/* Nothing to release */
 			break;
 		default:
@@ -725,6 +728,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
 		case BPF_TIMER:
 		case BPF_REFCOUNT:
 		case BPF_WORKQUEUE:
+		case BPF_TASK_WORK:
 			/* Nothing to acquire */
 			break;
 		default:
@@ -783,6 +787,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
 	bpf_wq_cancel_and_free(obj + rec->wq_off);
 }

+void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
+{
+	if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
+		return;
+	bpf_task_work_cancel_and_free(obj + rec->task_work_off);
+}
+
 void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 {
 	const struct btf_field *fields;
@@ -807,6 +818,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 		case BPF_WORKQUEUE:
 			bpf_wq_cancel_and_free(field_ptr);
 			break;
+		case BPF_TASK_WORK:
+			bpf_task_work_cancel_and_free(field_ptr);
+			break;
 		case BPF_KPTR_UNREF:
 			WRITE_ONCE(*(u64 *)field_ptr, 0);
 			break;
@@ -860,6 +874,7 @@ static void bpf_map_free(struct bpf_map *map)
 	 * the free of values or special fields allocated from bpf memory
 	 * allocator.
 	 */
+	kfree(map->excl_prog_sha);
 	migrate_disable();
 	map->ops->map_free(map);
 	migrate_enable();
@@ -905,7 +920,7 @@ static void bpf_map_free_in_work(struct bpf_map *map)
 	/* Avoid spawning kworkers, since they all might contend
 	 * for the same mutex like slab_mutex.
 	 */
-	queue_work(system_unbound_wq, &map->work);
+	queue_work(system_dfl_wq, &map->work);
 }

 static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
@@ -1237,7 +1252,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,

 	map->record = btf_parse_fields(btf, value_type,
 				       BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
-				       BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
+				       BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
+				       BPF_TASK_WORK,
 				       map->value_size);
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
@@ -1269,6 +1285,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
 				break;
 			case BPF_TIMER:
 			case BPF_WORKQUEUE:
+			case BPF_TASK_WORK:
 				if (map->map_type != BPF_MAP_TYPE_HASH &&
 				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
 				    map->map_type != BPF_MAP_TYPE_ARRAY) {
@@ -1338,9 +1355,9 @@ static bool bpf_net_capable(void)
 	return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
 }

-#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
+#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
 /* called via syscall */
-static int map_create(union bpf_attr *attr, bool kernel)
+static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 {
 	const struct bpf_map_ops *ops;
 	struct bpf_token *token = NULL;
@@ -1534,7 +1551,29 @@ static int map_create(union bpf_attr *attr, bool kernel)
 			attr->btf_vmlinux_value_type_id;
 	}

-	err = security_bpf_map_create(map, attr, token, kernel);
+	if (attr->excl_prog_hash) {
+		bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel);
+
+		if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) {
+			err = -EINVAL;
+			goto free_map;
+		}
+
+		map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+		if (!map->excl_prog_sha) {
+			err = -ENOMEM;
+			goto free_map;
+		}
+
+		if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) {
+			err = -EFAULT;
+			goto free_map;
+		}
+	} else if (attr->excl_prog_hash_size) {
+		return -EINVAL;
+	}
+
+	err = security_bpf_map_create(map, attr, token, uattr.is_kernel);
 	if (err)
 		goto free_map_sec;

@@ -1627,7 +1666,8 @@ struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
 }
 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);

-int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
+				bool delete)
 {
 	return -ENOTSUPP;
 }
@@ -2158,7 +2198,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
 		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
-		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 		if (!bpf_map_is_offloaded(map)) {
 			bpf_disable_instrumentation();
 			rcu_read_lock();
@@ -2761,8 +2802,44 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 	}
 }

+static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr,
+				     bool is_kernel)
+{
+	bpfptr_t usig = make_bpfptr(attr->signature, is_kernel);
+	struct bpf_dynptr_kern sig_ptr, insns_ptr;
+	struct bpf_key *key = NULL;
+	void *sig;
+	int err = 0;
+
+	if (system_keyring_id_check(attr->keyring_id) == 0)
+		key = bpf_lookup_system_key(attr->keyring_id);
+	else
+		key = bpf_lookup_user_key(attr->keyring_id, 0);
+
+	if (!key)
+		return -EINVAL;
+
+	sig = kvmemdup_bpfptr(usig, attr->signature_size);
+	if (IS_ERR(sig)) {
+		bpf_key_put(key);
+		return -ENOMEM;
+	}
+
+	bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0,
+			attr->signature_size);
+	bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0,
+			prog->len * sizeof(struct bpf_insn));
+
+	err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr,
+					 (struct bpf_dynptr *)&sig_ptr, key);
+
+	bpf_key_put(key);
+	kvfree(sig);
+	return err;
+}
+
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
+#define BPF_PROG_LOAD_LAST_FIELD keyring_id

 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 {
@@ -2926,6 +3003,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
 	prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;

+	if (attr->signature) {
+		err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel);
+		if (err)
+			goto free_prog;
+	}
+
 	prog->orig_prog = NULL;
 	prog->jited = 0;

@@ -5161,6 +5244,9 @@ static int bpf_map_get_info_by_fd(struct file *file,
 	info_len = min_t(u32, sizeof(info), info_len);

 	memset(&info, 0, sizeof(info));
+	if (copy_from_user(&info, uinfo, info_len))
+		return -EFAULT;
+
 	info.type = map->map_type;
 	info.id = map->id;
 	info.key_size = map->key_size;
@@ -5185,6 +5271,25 @@ static int bpf_map_get_info_by_fd(struct file *file,
 			return err;
 	}

+	if (info.hash) {
+		char __user *uhash = u64_to_user_ptr(info.hash);
+
+		if (!map->ops->map_get_hash)
+			return -EINVAL;
+
+		if (info.hash_size != SHA256_DIGEST_SIZE)
+			return -EINVAL;
+
+		err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha);
+		if (err != 0)
+			return err;
+
+		if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0)
+			return -EFAULT;
+	} else if (info.hash_size) {
+		return -EINVAL;
+	}
+
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
 		return -EFAULT;
@@ -6008,7 +6113,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)

 	switch (cmd) {
 	case BPF_MAP_CREATE:
-		err = map_create(&attr, uattr.is_kernel);
+		err = map_create(&attr, uattr);
 		break;
 	case BPF_MAP_LOOKUP_ELEM:
 		err = map_lookup_elem(&attr);
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -116,31 +116,55 @@ struct tnum tnum_xor(struct tnum a, struct tnum b)
 	return TNUM(v & ~mu, mu);
 }

-/* Generate partial products by multiplying each bit in the multiplier (tnum a)
- * with the multiplicand (tnum b), and add the partial products after
- * appropriately bit-shifting them. Instead of directly performing tnum addition
- * on the generated partial products, equivalenty, decompose each partial
- * product into two tnums, consisting of the value-sum (acc_v) and the
- * mask-sum (acc_m) and then perform tnum addition on them. The following paper
- * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.
+/* Perform long multiplication, iterating through the bits in a using rshift:
+ * - if LSB(a) is a known 0, keep current accumulator
+ * - if LSB(a) is a known 1, add b to current accumulator
+ * - if LSB(a) is unknown, take a union of the above cases.
+ *
+ * For example:
+ *
+ *               acc_0:        acc_1:
+ *
+ *     11 *  ->      11 *  ->      11 *  -> union(0011, 1001) == x0x1
+ *     x1            01            11
+ * ------        ------        ------
+ *     11            11            11
+ *    xx            00            11
+ * ------        ------        ------
+ *   ????          0011          1001
 */
 struct tnum tnum_mul(struct tnum a, struct tnum b)
 {
-	u64 acc_v = a.value * b.value;
-	struct tnum acc_m = TNUM(0, 0);
+	struct tnum acc = TNUM(0, 0);

 	while (a.value || a.mask) {
 		/* LSB of tnum a is a certain 1 */
 		if (a.value & 1)
-			acc_m = tnum_add(acc_m, TNUM(0, b.mask));
+			acc = tnum_add(acc, b);
 		/* LSB of tnum a is uncertain */
-		else if (a.mask & 1)
-			acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask));
+		else if (a.mask & 1) {
+			/* acc = tnum_union(acc_0, acc_1), where acc_0 and
+			 * acc_1 are partial accumulators for cases
+			 * LSB(a) = certain 0 and LSB(a) = certain 1.
+			 * acc_0 = acc + 0 * b = acc.
+			 * acc_1 = acc + 1 * b = tnum_add(acc, b).
+			 */
+
+			acc = tnum_union(acc, tnum_add(acc, b));
+		}
 		/* Note: no case for LSB is certain 0 */
 		a = tnum_rshift(a, 1);
 		b = tnum_lshift(b, 1);
 	}
-	return tnum_add(TNUM(acc_v, 0), acc_m);
+	return acc;
+}
+
+bool tnum_overlap(struct tnum a, struct tnum b)
+{
+	u64 mu;
+
+	mu = ~a.mask & ~b.mask;
+	return (a.value & mu) == (b.value & mu);
 }

 /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
@@ -155,6 +179,19 @@ struct tnum tnum_intersect(struct tnum a, struct tnum b)
 	return TNUM(v & ~mu, mu);
 }

+/* Returns a tnum with the uncertainty from both a and b, and in addition, new
+ * uncertainty at any position that a and b disagree. This represents a
+ * superset of the union of the concrete sets of both a and b. Despite the
+ * overapproximation, it is optimal.
+ */
+struct tnum tnum_union(struct tnum a, struct tnum b)
+{
+	u64 v = a.value & b.value;
+	u64 mu = (a.value ^ b.value) | a.mask | b.mask;
+
+	return TNUM(v & ~mu, mu);
+}
+
 struct tnum tnum_cast(struct tnum a, u8 size)
 {
 	a.value &= (1ULL << (size * 8)) - 1;
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -899,8 +899,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
 static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 	__acquires(RCU)
 {
-	rcu_read_lock();
-	migrate_disable();
+	rcu_read_lock_dont_migrate();

 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);

@@ -949,8 +948,7 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,

 	update_prog_stats(prog, start);
 	this_cpu_dec(*(prog->active));
-	migrate_enable();
-	rcu_read_unlock();
+	rcu_read_unlock_migrate();
 }

 static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
@@ -960,8 +958,7 @@ static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
 	/* Runtime stats are exported via actual BPF_LSM_CGROUP
 	 * programs, not the shims.
 	 */
-	rcu_read_lock();
-	migrate_disable();
+	rcu_read_lock_dont_migrate();

 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);

@@ -974,8 +971,7 @@ static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
 {
 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);

-	migrate_enable();
-	rcu_read_unlock();
+	rcu_read_unlock_migrate();
 }

 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
@@ -1033,8 +1029,7 @@ static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
 				    struct bpf_tramp_run_ctx *run_ctx)
 	__acquires(RCU)
 {
-	rcu_read_lock();
-	migrate_disable();
+	rcu_read_lock_dont_migrate();

 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);

@@ -1048,8 +1043,7 @@ static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);

 	update_prog_stats(prog, start);
-	migrate_enable();
-	rcu_read_unlock();
+	rcu_read_unlock_migrate();
 }

 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6481,15 +6481,15 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
 }

 /*
- * cgroup_get_from_id : get the cgroup associated with cgroup id
+ * __cgroup_get_from_id : get the cgroup associated with cgroup id
 * @id: cgroup id
 * On success return the cgrp or ERR_PTR on failure
- * Only cgroups within current task's cgroup NS are valid.
+ * There are no cgroup NS restrictions.
 */
-struct cgroup *cgroup_get_from_id(u64 id)
+struct cgroup *__cgroup_get_from_id(u64 id)
 {
 	struct kernfs_node *kn;
-	struct cgroup *cgrp, *root_cgrp;
+	struct cgroup *cgrp;

 	kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
 	if (!kn)
@@ -6511,6 +6511,22 @@ struct cgroup *cgroup_get_from_id(u64 id)

 	if (!cgrp)
 		return ERR_PTR(-ENOENT);
+	return cgrp;
+}
+
+/*
+ * cgroup_get_from_id : get the cgroup associated with cgroup id
+ * @id: cgroup id
+ * On success return the cgrp or ERR_PTR on failure
+ * Only cgroups within current task's cgroup NS are valid.
+ */
+struct cgroup *cgroup_get_from_id(u64 id)
+{
+	struct cgroup *cgrp, *root_cgrp;
+
+	cgrp = __cgroup_get_from_id(id);
+	if (IS_ERR(cgrp))
+		return cgrp;

 	root_cgrp = current_cgns_cgroup_dfl();
 	if (!cgroup_is_descendant(cgrp, root_cgrp)) {
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11245,6 +11245,10 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
 	if (prog->kprobe_override && !is_kprobe)
 		return -EINVAL;

+	/* Writing to context allowed only for uprobes. */
+	if (prog->aux->kprobe_write_ctx && !is_uprobe)
+		return -EINVAL;
+
 	if (is_tracepoint || is_syscall_tp) {
 		int off = trace_event_get_offsets(event->tp_event);

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2765,6 +2765,13 @@ static void handle_swbp(struct pt_regs *regs)

 	handler_chain(uprobe, regs);

+	/*
+	 * If user decided to take execution elsewhere, it makes little sense
+	 * to execute the original instruction, so let's skip it.
+	 */
+	if (instruction_pointer(regs) != bp_vaddr)
+		goto out;
+
 	/* Try to optimize after first hit. */
 	arch_uprobe_optimize(&uprobe->arch, bp_vaddr);

--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -22,7 +22,6 @@
 #include <linux/bsearch.h>
 #include <linux/sort.h>
 #include <linux/key.h>
-#include <linux/verification.h>
 #include <linux/namei.h>

 #include <net/bpf_sk_storage.h>
@@ -1241,188 +1240,6 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };

-#ifdef CONFIG_KEYS
-__bpf_kfunc_start_defs();
-
-/**
- * bpf_lookup_user_key - lookup a key by its serial
- * @serial: key handle serial number
- * @flags: lookup-specific flags
- *
- * Search a key with a given *serial* and the provided *flags*.
- * If found, increment the reference count of the key by one, and
- * return it in the bpf_key structure.
- *
- * The bpf_key structure must be passed to bpf_key_put() when done
- * with it, so that the key reference count is decremented and the
- * bpf_key structure is freed.
- *
- * Permission checks are deferred to the time the key is used by
- * one of the available key-specific kfuncs.
- *
- * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
- * special keyring (e.g. session keyring), if it doesn't yet exist.
- * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
- * for the key construction, and to retrieve uninstantiated keys (keys
- * without data attached to them).
- *
- * Return: a bpf_key pointer with a valid key pointer if the key is found, a
- *         NULL pointer otherwise.
- */
-__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
-{
-	key_ref_t key_ref;
-	struct bpf_key *bkey;
-
-	if (flags & ~KEY_LOOKUP_ALL)
-		return NULL;
-
-	/*
-	 * Permission check is deferred until the key is used, as the
-	 * intent of the caller is unknown here.
-	 */
-	key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
-	if (IS_ERR(key_ref))
-		return NULL;
-
-	bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
-	if (!bkey) {
-		key_put(key_ref_to_ptr(key_ref));
-		return NULL;
-	}
-
-	bkey->key = key_ref_to_ptr(key_ref);
-	bkey->has_ref = true;
-
-	return bkey;
-}
-
-/**
- * bpf_lookup_system_key - lookup a key by a system-defined ID
- * @id: key ID
- *
- * Obtain a bpf_key structure with a key pointer set to the passed key ID.
- * The key pointer is marked as invalid, to prevent bpf_key_put() from
- * attempting to decrement the key reference count on that pointer. The key
- * pointer set in such way is currently understood only by
- * verify_pkcs7_signature().
- *
- * Set *id* to one of the values defined in include/linux/verification.h:
- * 0 for the primary keyring (immutable keyring of system keys);
- * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
- * (where keys can be added only if they are vouched for by existing keys
- * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
- * keyring (primarily used by the integrity subsystem to verify a kexec'ed
- * kerned image and, possibly, the initramfs signature).
- *
- * Return: a bpf_key pointer with an invalid key pointer set from the
- *         pre-determined ID on success, a NULL pointer otherwise
- */
-__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
-{
-	struct bpf_key *bkey;
-
-	if (system_keyring_id_check(id) < 0)
-		return NULL;
-
-	bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
-	if (!bkey)
-		return NULL;
-
-	bkey->key = (struct key *)(unsigned long)id;
-	bkey->has_ref = false;
-
-	return bkey;
-}
-
-/**
- * bpf_key_put - decrement key reference count if key is valid and free bpf_key
- * @bkey: bpf_key structure
- *
- * Decrement the reference count of the key inside *bkey*, if the pointer
- * is valid, and free *bkey*.
- */
-__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
-{
-	if (bkey->has_ref)
-		key_put(bkey->key);
-
-	kfree(bkey);
-}
-
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-/**
- * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
- * @data_p: data to verify
- * @sig_p: signature of the data
- * @trusted_keyring: keyring with keys trusted for signature verification
- *
- * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
- * with keys in a keyring referenced by *trusted_keyring*.
- *
- * Return: 0 on success, a negative value on error.
- */
-__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
-			       struct bpf_dynptr *sig_p,
-			       struct bpf_key *trusted_keyring)
-{
-	struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
-	struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
-	const void *data, *sig;
-	u32 data_len, sig_len;
-	int ret;
-
-	if (trusted_keyring->has_ref) {
-		/*
-		 * Do the permission check deferred in bpf_lookup_user_key().
-		 * See bpf_lookup_user_key() for more details.
-		 *
-		 * A call to key_task_permission() here would be redundant, as
-		 * it is already done by keyring_search() called by
-		 * find_asymmetric_key().
-		 */
-		ret = key_validate(trusted_keyring->key);
-		if (ret < 0)
-			return ret;
-	}
-
-	data_len = __bpf_dynptr_size(data_ptr);
-	data = __bpf_dynptr_data(data_ptr, data_len);
-	sig_len = __bpf_dynptr_size(sig_ptr);
-	sig = __bpf_dynptr_data(sig_ptr, sig_len);
-
-	return verify_pkcs7_signature(data, data_len, sig, sig_len,
-				      trusted_keyring->key,
-				      VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
-				      NULL);
-}
-#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
-
-__bpf_kfunc_end_defs();
-
-BTF_KFUNCS_START(key_sig_kfunc_set)
-BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
-#endif
-BTF_KFUNCS_END(key_sig_kfunc_set)
-
-static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = {
-	.owner = THIS_MODULE,
-	.set = &key_sig_kfunc_set,
-};
-
-static int __init bpf_key_sig_kfuncs_init(void)
-{
-	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
-					 &bpf_key_sig_kfunc_set);
-}
-
-late_initcall(bpf_key_sig_kfuncs_init);
-#endif /* CONFIG_KEYS */
-
 static const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1521,8 +1338,6 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
 {
 	if (off < 0 || off >= sizeof(struct pt_regs))
 		return false;
-	if (type != BPF_READ)
-		return false;
 	if (off % size != 0)
 		return false;
 	/*
@@ -1532,6 +1347,9 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
 	if (off + size > sizeof(struct pt_regs))
 		return false;

+	if (type == BPF_WRITE)
+		prog->aux->kprobe_write_ctx = true;
+
 	return true;
 }

@@ -2728,20 +2546,25 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
 	struct pt_regs *regs;
 	int err;

+	/*
+	 * graph tracer framework ensures we won't migrate, so there is no need
+	 * to use migrate_disable for bpf_prog_run again. The check here just for
+	 * __this_cpu_inc_return.
+	 */
+	cant_sleep();
+
 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
 		bpf_prog_inc_misses_counter(link->link.prog);
 		err = 1;
 		goto out;
 	}

-	migrate_disable();
 	rcu_read_lock();
 	regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
 	err = bpf_prog_run(link->link.prog, regs);
 	bpf_reset_run_ctx(old_run_ctx);
 	rcu_read_unlock();
-	migrate_enable();

 out:
 	__this_cpu_dec(bpf_prog_active);
@@ -2913,6 +2736,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 	if (!is_kprobe_multi(prog))
 		return -EINVAL;

+	/* Writing to context is not allowed for kprobes. */
+	if (prog->aux->kprobe_write_ctx)
+		return -EINVAL;
+
 	flags = attr->link_create.kprobe_multi.flags;
 	if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
 		return -EINVAL;