Commit 6e10b635 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'introduce-bpf_wq'

Benjamin Tissoires says:

====================
Introduce bpf_wq

This is a followup of sleepable bpf_timer[0].

When discussing sleepable bpf_timer, it was thought that we should give
a try to bpf_wq, as the 2 APIs are similar but distinct enough to
justify a new one.

So here it is.

I tried to keep as much as possible common code in kernel/bpf/helpers.c
but I couldn't get away with code duplication in kernel/bpf/verifier.c.

This series introduces a basic bpf_wq support:
- creation is supported
- assignment is supported
- running a simple bpf_wq is also supported.

We will probably need to extend the API further with:
- a full delayed_work API (can be piggy backed on top with a correct
  flag)
- bpf_wq_cancel() <- apparently not, this is shooting ourself in the
  foot
- bpf_wq_cancel_sync() (for sleepable programs)
- documentation
---

For reference, the use cases I have in mind:

---

Basically, I need to be able to defer a HID-BPF program for the
following reasons (from the aforementioned patch):
1. defer an event:
   Sometimes we receive an out of proximity event, but the device can not
   be trusted enough, and we need to ensure that we won't receive another
   one in the following n milliseconds. So we need to wait those n
   milliseconds, and eventually re-inject that event in the stack.

2. inject new events in reaction to one given event:
   We might want to transform one given event into several. This is the
   case for macro keys where a single key press is supposed to send
   a sequence of key presses. But this could also be used to patch a
   faulty behavior, if a device forgets to send a release event.

3. communicate with the device in reaction to one event:
   We might want to communicate back to the device after a given event.
   For example a device might send us an event saying that it came back
   from sleeping state and needs to be re-initialized.

Currently we can achieve that by keeping a userspace program around,
raise a bpf event, and let that userspace program inject the events and
commands.
However, we are just keeping that program alive as a daemon for just
scheduling commands. There is no logic in it, so it doesn't really justify
an actual userspace wakeup. So a kernel workqueue seems simpler to handle.

bpf_timers are currently running in a soft IRQ context, this patch
series implements a sleppable context for them.

Cheers,
Benjamin

[0] https://lore.kernel.org/all/20240408-hid-bpf-sleepable-v6-0-0499ddd91b94@kernel.org/

Changes in v2:
- took previous review into account
- mainly dropped BPF_F_WQ_SLEEPABLE
- Link to v1: https://lore.kernel.org/r/20240416-bpf_wq-v1-0-c9e66092f842@kernel.org

====================

Link: https://lore.kernel.org/r/20240420-bpf_wq-v2-0-6c986a5a741f@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a7de265c 8290dba5
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -185,7 +185,7 @@ struct bpf_map_ops {

enum {
	/* Support at most 10 fields in a BTF type */
	BTF_FIELDS_MAX	   = 10,
	BTF_FIELDS_MAX	   = 11,
};

enum btf_field_type {
@@ -202,6 +202,7 @@ enum btf_field_type {
	BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
	BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
	BPF_REFCOUNT   = (1 << 9),
	BPF_WORKQUEUE  = (1 << 10),
};

typedef void (*btf_dtor_kfunc_t)(void *);
@@ -238,6 +239,7 @@ struct btf_record {
	u32 field_mask;
	int spin_lock_off;
	int timer_off;
	int wq_off;
	int refcount_off;
	struct btf_field fields[];
};
@@ -312,6 +314,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
		return "bpf_spin_lock";
	case BPF_TIMER:
		return "bpf_timer";
	case BPF_WORKQUEUE:
		return "bpf_wq";
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
		return "kptr";
@@ -340,6 +344,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
		return sizeof(struct bpf_spin_lock);
	case BPF_TIMER:
		return sizeof(struct bpf_timer);
	case BPF_WORKQUEUE:
		return sizeof(struct bpf_wq);
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
	case BPF_KPTR_PERCPU:
@@ -367,6 +373,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
		return __alignof__(struct bpf_spin_lock);
	case BPF_TIMER:
		return __alignof__(struct bpf_timer);
	case BPF_WORKQUEUE:
		return __alignof__(struct bpf_wq);
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
	case BPF_KPTR_PERCPU:
@@ -406,6 +414,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
		/* RB_ROOT_CACHED 0-inits, no need to do anything after memset */
	case BPF_SPIN_LOCK:
	case BPF_TIMER:
	case BPF_WORKQUEUE:
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
	case BPF_KPTR_PERCPU:
@@ -525,6 +534,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
			   bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
void bpf_wq_cancel_and_free(void *timer);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
			struct bpf_spin_lock *spin_lock);
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2195,6 +2205,7 @@ void bpf_map_free_record(struct bpf_map *map);
struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);

+1 −0
Original line number Diff line number Diff line
@@ -426,6 +426,7 @@ struct bpf_verifier_state {
	 * while they are still in use.
	 */
	bool used_as_loop_entry;
	bool in_sleepable;

	/* first and last insn idx of this verifier state */
	u32 first_insn_idx;
+4 −0
Original line number Diff line number Diff line
@@ -7306,6 +7306,10 @@ struct bpf_timer {
	__u64 __opaque[2];
} __attribute__((aligned(8)));

struct bpf_wq {
	__u64 __opaque[2];
} __attribute__((aligned(8)));

struct bpf_dynptr {
	__u64 __opaque[2];
} __attribute__((aligned(8)));
+11 −7
Original line number Diff line number Diff line
@@ -428,17 +428,21 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
	return (void *)round_down((unsigned long)array, PAGE_SIZE);
}

static void array_map_free_timers(struct bpf_map *map)
static void array_map_free_timers_wq(struct bpf_map *map)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	int i;

	/* We don't reset or free fields other than timer on uref dropping to zero. */
	if (!btf_record_has_field(map->record, BPF_TIMER))
		return;

	/* We don't reset or free fields other than timer and workqueue
	 * on uref dropping to zero.
	 */
	if (btf_record_has_field(map->record, BPF_TIMER))
		for (i = 0; i < array->map.max_entries; i++)
			bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));

	if (btf_record_has_field(map->record, BPF_WORKQUEUE))
		for (i = 0; i < array->map.max_entries; i++)
			bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
}

/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -782,7 +786,7 @@ const struct bpf_map_ops array_map_ops = {
	.map_alloc = array_map_alloc,
	.map_free = array_map_free,
	.map_get_next_key = array_map_get_next_key,
	.map_release_uref = array_map_free_timers,
	.map_release_uref = array_map_free_timers_wq,
	.map_lookup_elem = array_map_lookup_elem,
	.map_update_elem = array_map_update_elem,
	.map_delete_elem = array_map_delete_elem,
+17 −0
Original line number Diff line number Diff line
@@ -3464,6 +3464,15 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
			goto end;
		}
	}
	if (field_mask & BPF_WORKQUEUE) {
		if (!strcmp(name, "bpf_wq")) {
			if (*seen_mask & BPF_WORKQUEUE)
				return -E2BIG;
			*seen_mask |= BPF_WORKQUEUE;
			type = BPF_WORKQUEUE;
			goto end;
		}
	}
	field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
	field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
	field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
@@ -3515,6 +3524,7 @@ static int btf_find_struct_field(const struct btf *btf,
		switch (field_type) {
		case BPF_SPIN_LOCK:
		case BPF_TIMER:
		case BPF_WORKQUEUE:
		case BPF_LIST_NODE:
		case BPF_RB_NODE:
		case BPF_REFCOUNT:
@@ -3582,6 +3592,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
		switch (field_type) {
		case BPF_SPIN_LOCK:
		case BPF_TIMER:
		case BPF_WORKQUEUE:
		case BPF_LIST_NODE:
		case BPF_RB_NODE:
		case BPF_REFCOUNT:
@@ -3816,6 +3827,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type

	rec->spin_lock_off = -EINVAL;
	rec->timer_off = -EINVAL;
	rec->wq_off = -EINVAL;
	rec->refcount_off = -EINVAL;
	for (i = 0; i < cnt; i++) {
		field_type_size = btf_field_type_size(info_arr[i].type);
@@ -3846,6 +3858,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
			/* Cache offset for faster lookup at runtime */
			rec->timer_off = rec->fields[i].offset;
			break;
		case BPF_WORKQUEUE:
			WARN_ON_ONCE(rec->wq_off >= 0);
			/* Cache offset for faster lookup at runtime */
			rec->wq_off = rec->fields[i].offset;
			break;
		case BPF_REFCOUNT:
			WARN_ON_ONCE(rec->refcount_off >= 0);
			/* Cache offset for faster lookup at runtime */
Loading