Commit b2d66643 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Daniel Borkmann says:

====================
pull-request: bpf 2023-11-21

We've added 19 non-merge commits during the last 4 day(s) which contain
a total of 18 files changed, 1043 insertions(+), 416 deletions(-).

The main changes are:

1) Fix BPF verifier to validate callbacks as if they are called an unknown
   number of times in order to fix not detecting some unsafe programs,
   from Eduard Zingerman.

2) Fix bpf_redirect_peer() handling which missed proper stats accounting
   for veth and netkit and also generally fix missing stats for the latter,
   from Peilin Ye, Daniel Borkmann et al.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: check if max number of bpf_loop iterations is tracked
  bpf: keep track of max number of bpf_loop callback iterations
  selftests/bpf: test widening for iterating callbacks
  bpf: widening for callback iterators
  selftests/bpf: tests for iterating callbacks
  bpf: verify callbacks as if they are called unknown number of times
  bpf: extract setup_func_entry() utility function
  bpf: extract __check_reg_arg() utility function
  selftests/bpf: fix bpf_loop_bench for new callback verification scheme
  selftests/bpf: track string payload offset as scalar in strobemeta
  selftests/bpf: track tcp payload offset as scalar in xdp_synproxy
  selftests/bpf: Add netkit to tc_redirect selftest
  selftests/bpf: De-veth-ize the tc_redirect test case
  bpf, netkit: Add indirect call wrapper for fetching peer dev
  bpf: Fix dev's rx stats for bpf_redirect_peer traffic
  veth: Use tstats per-CPU traffic counters
  netkit: Add tstats per-CPU traffic counters
  net: Move {l,t,d}stats allocation to core and convert veth & vrf
  net, vrf: Move dstats structure to core
====================

Link: https://lore.kernel.org/r/20231121193113.11796-1-daniel@iogearbox.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 495ec91b acb12c85
Loading
Loading
Loading
Loading
+20 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/filter.h>
#include <linux/netfilter_netdev.h>
#include <linux/bpf_mprog.h>
#include <linux/indirect_call_wrapper.h>

#include <net/netkit.h>
#include <net/dst.h>
@@ -68,6 +69,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
	netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
	const struct bpf_mprog_entry *entry;
	struct net_device *peer;
	int len = skb->len;

	rcu_read_lock();
	peer = rcu_dereference(nk->peer);
@@ -85,15 +87,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
	case NETKIT_PASS:
		skb->protocol = eth_type_trans(skb, skb->dev);
		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
		__netif_rx(skb);
		if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
			dev_sw_netstats_tx_add(dev, 1, len);
			dev_sw_netstats_rx_add(peer, len);
		} else {
			goto drop_stats;
		}
		break;
	case NETKIT_REDIRECT:
		dev_sw_netstats_tx_add(dev, 1, len);
		skb_do_redirect(skb);
		break;
	case NETKIT_DROP:
	default:
drop:
		kfree_skb(skb);
drop_stats:
		dev_core_stats_tx_dropped_inc(dev);
		ret_dev = NET_XMIT_DROP;
		break;
@@ -169,11 +178,18 @@ static void netkit_set_headroom(struct net_device *dev, int headroom)
	rcu_read_unlock();
}

static struct net_device *netkit_peer_dev(struct net_device *dev)
INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
{
	return rcu_dereference(netkit_priv(dev)->peer);
}

static void netkit_get_stats(struct net_device *dev,
			     struct rtnl_link_stats64 *stats)
{
	dev_fetch_sw_netstats(stats, dev->tstats);
	stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
}

static void netkit_uninit(struct net_device *dev);

static const struct net_device_ops netkit_netdev_ops = {
@@ -184,6 +200,7 @@ static const struct net_device_ops netkit_netdev_ops = {
	.ndo_set_rx_headroom	= netkit_set_headroom,
	.ndo_get_iflink		= netkit_get_iflink,
	.ndo_get_peer_dev	= netkit_peer_dev,
	.ndo_get_stats64	= netkit_get_stats,
	.ndo_uninit		= netkit_uninit,
	.ndo_features_check	= passthru_features_check,
};
@@ -218,6 +235,7 @@ static void netkit_setup(struct net_device *dev)

	ether_setup(dev);
	dev->max_mtu = ETH_MAX_MTU;
	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;

	dev->flags |= IFF_NOARP;
	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+12 −32
Original line number Diff line number Diff line
@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
	skb_tx_timestamp(skb);
	if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
		if (!use_napi)
			dev_lstats_add(dev, length);
			dev_sw_netstats_tx_add(dev, 1, length);
		else
			__veth_xdp_flush(rq);
	} else {
@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
	return ret;
}

static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
{
	struct veth_priv *priv = netdev_priv(dev);

	dev_lstats_read(dev, packets, bytes);
	return atomic64_read(&priv->dropped);
}

static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
{
	struct veth_priv *priv = netdev_priv(dev);
@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
	struct veth_priv *priv = netdev_priv(dev);
	struct net_device *peer;
	struct veth_stats rx;
	u64 packets, bytes;

	tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
	tot->tx_bytes = bytes;
	tot->tx_packets = packets;
	tot->tx_dropped = atomic64_read(&priv->dropped);
	dev_fetch_sw_netstats(tot, dev->tstats);

	veth_stats_rx(&rx, dev);
	tot->tx_dropped += rx.xdp_tx_err;
	tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
	tot->rx_bytes = rx.xdp_bytes;
	tot->rx_packets = rx.xdp_packets;
	tot->rx_bytes += rx.xdp_bytes;
	tot->rx_packets += rx.xdp_packets;

	rcu_read_lock();
	peer = rcu_dereference(priv->peer);
	if (peer) {
		veth_stats_tx(peer, &packets, &bytes);
		tot->rx_bytes += bytes;
		tot->rx_packets += packets;
		struct rtnl_link_stats64 tot_peer = {};

		dev_fetch_sw_netstats(&tot_peer, peer->tstats);
		tot->rx_bytes += tot_peer.tx_bytes;
		tot->rx_packets += tot_peer.tx_packets;

		veth_stats_rx(&rx, peer);
		tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
@@ -1506,25 +1498,12 @@ static void veth_free_queues(struct net_device *dev)

static int veth_dev_init(struct net_device *dev)
{
	int err;

	dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
	if (!dev->lstats)
		return -ENOMEM;

	err = veth_alloc_queues(dev);
	if (err) {
		free_percpu(dev->lstats);
		return err;
	}

	return 0;
	return veth_alloc_queues(dev);
}

static void veth_dev_free(struct net_device *dev)
{
	veth_free_queues(dev);
	free_percpu(dev->lstats);
}

#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1796,6 +1775,7 @@ static void veth_setup(struct net_device *dev)
			       NETIF_F_HW_VLAN_STAG_RX);
	dev->needs_free_netdev = true;
	dev->priv_destructor = veth_dev_free;
	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
	dev->max_mtu = ETH_MAX_MTU;

	dev->hw_features = VETH_FEATURES;
+10 −28
Original line number Diff line number Diff line
@@ -121,22 +121,12 @@ struct net_vrf {
	int			ifindex;
};

struct pcpu_dstats {
	u64			tx_pkts;
	u64			tx_bytes;
	u64			tx_drps;
	u64			rx_pkts;
	u64			rx_bytes;
	u64			rx_drps;
	struct u64_stats_sync	syncp;
};

static void vrf_rx_stats(struct net_device *dev, int len)
{
	struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);

	u64_stats_update_begin(&dstats->syncp);
	dstats->rx_pkts++;
	dstats->rx_packets++;
	dstats->rx_bytes += len;
	u64_stats_update_end(&dstats->syncp);
}
@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev,
		do {
			start = u64_stats_fetch_begin(&dstats->syncp);
			tbytes = dstats->tx_bytes;
			tpkts = dstats->tx_pkts;
			tdrops = dstats->tx_drps;
			tpkts = dstats->tx_packets;
			tdrops = dstats->tx_drops;
			rbytes = dstats->rx_bytes;
			rpkts = dstats->rx_pkts;
			rpkts = dstats->rx_packets;
		} while (u64_stats_fetch_retry(&dstats->syncp, start));
		stats->tx_bytes += tbytes;
		stats->tx_packets += tpkts;
@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
	if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
		vrf_rx_stats(dev, len);
	else
		this_cpu_inc(dev->dstats->rx_drps);
		this_cpu_inc(dev->dstats->rx_drops);

	return NETDEV_TX_OK;
}
@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
		struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);

		u64_stats_update_begin(&dstats->syncp);
		dstats->tx_pkts++;
		dstats->tx_packets++;
		dstats->tx_bytes += len;
		u64_stats_update_end(&dstats->syncp);
	} else {
		this_cpu_inc(dev->dstats->tx_drps);
		this_cpu_inc(dev->dstats->tx_drops);
	}

	return ret;
@@ -1174,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev)

	vrf_rtable_release(dev, vrf);
	vrf_rt6_release(dev, vrf);

	free_percpu(dev->dstats);
	dev->dstats = NULL;
}

static int vrf_dev_init(struct net_device *dev)
{
	struct net_vrf *vrf = netdev_priv(dev);

	dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
	if (!dev->dstats)
		goto out_nomem;

	/* create the default dst which points back to us */
	if (vrf_rtable_create(dev) != 0)
		goto out_stats;
		goto out_nomem;

	if (vrf_rt6_create(dev) != 0)
		goto out_rth;
@@ -1203,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev)

out_rth:
	vrf_rtable_release(dev, vrf);
out_stats:
	free_percpu(dev->dstats);
	dev->dstats = NULL;
out_nomem:
	return -ENOMEM;
}
@@ -1704,6 +1684,8 @@ static void vrf_setup(struct net_device *dev)
	dev->min_mtu = IPV6_MIN_MTU;
	dev->max_mtu = IP6_MAX_MTU;
	dev->mtu = dev->max_mtu;

	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
}

static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
+16 −0
Original line number Diff line number Diff line
@@ -301,6 +301,17 @@ struct bpf_func_state {
	struct tnum callback_ret_range;
	bool in_async_callback_fn;
	bool in_exception_callback_fn;
	/* For callback calling functions that limit number of possible
	 * callback executions (e.g. bpf_loop) keeps track of current
	 * simulated iteration number.
	 * Value in frame N refers to number of times callback with frame
	 * N+1 was simulated, e.g. for the following call:
	 *
	 *   bpf_loop(..., fn, ...); | suppose current frame is N
	 *                           | fn would be simulated in frame N+1
	 *                           | number of simulations is tracked in frame N
	 */
	u32 callback_depth;

	/* The following fields should be last. See copy_func_state() */
	int acquired_refs;
@@ -400,6 +411,7 @@ struct bpf_verifier_state {
	struct bpf_idx_pair *jmp_history;
	u32 jmp_history_cnt;
	u32 dfs_depth;
	u32 callback_unroll_depth;
};

#define bpf_get_spilled_reg(slot, frame, mask)				\
@@ -511,6 +523,10 @@ struct bpf_insn_aux_data {
	 * this instruction, regardless of any heuristics
	 */
	bool force_checkpoint;
	/* true if instruction is a call to a helper function that
	 * accepts callback function as a parameter.
	 */
	bool calls_callback;
};

#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+26 −4
Original line number Diff line number Diff line
@@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type {
	ML_PRIV_CAN,
};

enum netdev_stat_type {
	NETDEV_PCPU_STAT_NONE,
	NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
	NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
	NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
};

/**
 *	struct net_device - The DEVICE structure.
 *
@@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type {
 *
 * 	@ml_priv:	Mid-layer private
 *	@ml_priv_type:  Mid-layer private type
 * 	@lstats:	Loopback statistics
 * 	@tstats:	Tunnel statistics
 * 	@dstats:	Dummy statistics
 * 	@vstats:	Virtual ethernet statistics
 *
 *	@pcpu_stat_type:	Type of device statistics which the core should
 *				allocate/free: none, lstats, tstats, dstats. none
 *				means the driver is handling statistics allocation/
 *				freeing internally.
 *	@lstats:		Loopback statistics: packets, bytes
 *	@tstats:		Tunnel statistics: RX/TX packets, RX/TX bytes
 *	@dstats:		Dummy statistics: RX/TX/drop packets, RX/TX bytes
 *
 *	@garp_port:	GARP
 *	@mrp_port:	MRP
@@ -2354,6 +2365,7 @@ struct net_device {
	void				*ml_priv;
	enum netdev_ml_priv_type	ml_priv_type;

	enum netdev_stat_type		pcpu_stat_type:8;
	union {
		struct pcpu_lstats __percpu		*lstats;
		struct pcpu_sw_netstats __percpu	*tstats;
@@ -2755,6 +2767,16 @@ struct pcpu_sw_netstats {
	struct u64_stats_sync   syncp;
} __aligned(4 * sizeof(u64));

struct pcpu_dstats {
	u64			rx_packets;
	u64			rx_bytes;
	u64			rx_drops;
	u64			tx_packets;
	u64			tx_bytes;
	u64			tx_drops;
	struct u64_stats_sync	syncp;
} __aligned(8 * sizeof(u64));

struct pcpu_lstats {
	u64_stats_t packets;
	u64_stats_t bytes;
Loading