Commit d839a731 authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by Paolo Abeni
Browse files

net: Optimize xdp_do_flush() with bpf_net_context infos.



Every NIC driver utilizing XDP should invoke xdp_do_flush() after
processing all packages. With the introduction of the bpf_net_context
logic the flush lists (for dev, CPU-map and xsk) are lazy initialized
only if used. However xdp_do_flush() tries to flush all three of them so
all three lists are always initialized and the likely empty lists are
"iterated".
Without the usage of XDP but with CONFIG_DEBUG_NET the lists are also
initialized due to xdp_do_check_flushed().

Jakub suggest to utilize the hints in bpf_net_context and avoid invoking
the flush function. This will also avoiding initializing the lists which
are otherwise unused.

Introduce bpf_net_ctx_get_all_used_flush_lists() to return the
individual list if not-empty. Use the logic in xdp_do_flush() and
xdp_do_check_flushed(). Remove the not needed .*_check_flush().

Suggested-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 2896624b
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -2494,7 +2494,7 @@ struct sk_buff;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;

void __dev_flush(void);
void __dev_flush(struct list_head *flush_list);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
		    struct net_device *dev_rx);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
@@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
			   struct bpf_prog *xdp_prog, struct bpf_map *map,
			   bool exclude_ingress);

void __cpu_map_flush(void);
void __cpu_map_flush(struct list_head *flush_list);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
		    struct net_device *dev_rx);
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
@@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);

bool dev_check_flush(void);
bool cpu_map_check_flush(void);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
	return ERR_PTR(-EOPNOTSUPP);
}

static inline void __dev_flush(void)
static inline void __dev_flush(struct list_head *flush_list)
{
}

@@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
	return 0;
}

static inline void __cpu_map_flush(void)
static inline void __cpu_map_flush(struct list_head *flush_list)
{
}

+27 −0
Original line number Diff line number Diff line
@@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
	return &bpf_net_ctx->xskmap_map_flush_list;
}

static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map,
							struct list_head **lh_dev,
							struct list_head **lh_xsk)
{
	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
	u32 kern_flags = bpf_net_ctx->ri.kern_flags;
	struct list_head *lh;

	*lh_map = *lh_dev = *lh_xsk = NULL;

	if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
		return;

	lh = &bpf_net_ctx->dev_map_flush_list;
	if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh))
		*lh_dev = lh;

	lh = &bpf_net_ctx->cpu_map_flush_list;
	if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh))
		*lh_map = lh;

	lh = &bpf_net_ctx->xskmap_map_flush_list;
	if (IS_ENABLED(CONFIG_XDP_SOCKETS) &&
	    kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh))
		*lh_xsk = lh;
}

/* Compute the linear packet data range [data, data_end) which
 * will be accessed by various program types (cls_bpf, act_bpf,
 * lwt, ...). Subsystems allowing direct data access must (!)
+2 −12
Original line number Diff line number Diff line
@@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops {

int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
void __xsk_map_flush(struct list_head *flush_list);

/**
 *  xsk_tx_metadata_to_compl - Save enough relevant metadata information
@@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
	return -EOPNOTSUPP;
}

static inline void __xsk_map_flush(void)
static inline void __xsk_map_flush(struct list_head *flush_list)
{
}

@@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
}

#endif /* CONFIG_XDP_SOCKETS */

#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
bool xsk_map_check_flush(void);
#else
static inline bool xsk_map_check_flush(void)
{
	return false;
}
#endif

#endif /* _LINUX_XDP_SOCK_H */
+1 −12
Original line number Diff line number Diff line
@@ -757,9 +757,8 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
	return ret;
}

void __cpu_map_flush(void)
void __cpu_map_flush(struct list_head *flush_list)
{
	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
	struct xdp_bulk_queue *bq, *tmp;

	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -769,13 +768,3 @@ void __cpu_map_flush(void)
		wake_up_process(bq->obj->kthread);
	}
}

#ifdef CONFIG_DEBUG_NET
bool cpu_map_check_flush(void)
{
	if (list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
		return false;
	__cpu_map_flush();
	return true;
}
#endif
+1 −12
Original line number Diff line number Diff line
@@ -412,9 +412,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 * driver before returning from its napi->poll() routine. See the comment above
 * xdp_do_flush() in filter.c.
 */
void __dev_flush(void)
void __dev_flush(struct list_head *flush_list)
{
	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
	struct xdp_dev_bulk_queue *bq, *tmp;

	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -425,16 +424,6 @@ void __dev_flush(void)
	}
}

#ifdef CONFIG_DEBUG_NET
bool dev_check_flush(void)
{
	if (list_empty(bpf_net_ctx_get_dev_flush_list()))
		return false;
	__dev_flush();
	return true;
}
#endif

/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
 * by local_bh_disable() (from XDP calls inside NAPI). The
 * rcu_read_lock_bh_held() below makes lockdep accept both.
Loading