Commit 21d58b35 authored by Daniel Borkmann's avatar Daniel Borkmann Committed by Jakub Kicinski
Browse files

net: Add lease info to queue-get response



Populate nested lease info to the queue-get response that returns the
ifindex, queue id with type and optionally netns id if the device
resides in a different netns.

Example with ynl client when using AF_XDP via queue leasing:

  # ip a
  [...]
  4: enp10s0f0np0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 xdp/id:24 qdisc mq state UP group default qlen 1000
    link/ether e8:eb:d3:a3:43:f6 brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.2/24 scope global enp10s0f0np0
       valid_lft forever preferred_lft forever
    inet6 fe80::eaeb:d3ff:fea3:43f6/64 scope link proto kernel_ll
       valid_lft forever preferred_lft forever
  [...]

  # ethtool -i enp10s0f0np0
  driver: mlx5_core
  [...]

  # ynl --family netdev --output-json --do queue-get \
        --json '{"ifindex": 4, "id": 15, "type": "rx"}'
  {'id': 15,
   'ifindex': 4,
   'lease': {'ifindex': 8, 'netns-id': 0, 'queue': {'id': 1, 'type': 'rx'}},
   'napi-id': 8227,
   'type': 'rx',
   'xsk': {}}

  # ip netns list
  foo (id: 0)

  # ip netns exec foo ip a
  [...]
  8: nk@NONE: <BROADCAST,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
      link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
      inet6 fe80::200:ff:fe00:0/64 scope link proto kernel_ll
         valid_lft forever preferred_lft forever
  [...]

  # ip netns exec foo ethtool -i nk
  driver: netkit
  [...]

  # ip netns exec foo ls /sys/class/net/nk/queues/
  rx-0  rx-1  tx-0

  # ip netns exec foo ynl --family netdev --output-json --do queue-get \
        --json '{"ifindex": 8, "id": 1, "type": "rx"}'
  {"id": 1, "type": "rx", "ifindex": 8, "xsk": {}}

Note that the caller of netdev_nl_queue_fill_one() holds the netdevice
lock. For the queue-get we do not lock both devices. When queues get
{un,}leased, both devices are locked, thus if __netif_get_rx_queue_lease()
returns a lease pointer, it points to a valid device. The netns-id is
fetched via peernet2id_alloc() similarly as done in OVS.

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Co-developed-by: default avatarDavid Wei <dw@davidwei.uk>
Signed-off-by: default avatarDavid Wei <dw@davidwei.uk>
Reviewed-by: default avatarNikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402231031.447597-4-daniel@iogearbox.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d04686d9
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -67,6 +67,20 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
	return index;
}

enum netif_lease_dir {
	NETIF_VIRT_TO_PHYS,
	NETIF_PHYS_TO_VIRT,
};

struct netdev_rx_queue *
__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq,
			   enum netif_lease_dir dir);

struct netdev_rx_queue *
netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq);
void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
				     struct net_device *dev);

int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
			   struct netdev_rx_queue *rxq_src);
+62 −4
Original line number Diff line number Diff line
@@ -386,12 +386,63 @@ static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
	return 0;
}

static int
netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev,
			   u32 q_idx, u32 q_type)
{
	struct net_device *orig_netdev = netdev;
	struct nlattr *nest_lease, *nest_queue;
	struct netdev_rx_queue *rxq;
	struct net *net, *peer_net;

	rxq = __netif_get_rx_queue_lease(&netdev, &q_idx,
					 NETIF_PHYS_TO_VIRT);
	if (!rxq || orig_netdev == netdev)
		return 0;

	nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
	if (!nest_lease)
		goto nla_put_failure;

	nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
	if (!nest_queue)
		goto nla_put_failure;
	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx))
		goto nla_put_failure;
	if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
		goto nla_put_failure;
	nla_nest_end(rsp, nest_queue);

	if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
			READ_ONCE(netdev->ifindex)))
		goto nla_put_failure;

	rcu_read_lock();
	peer_net = dev_net_rcu(netdev);
	net = dev_net_rcu(orig_netdev);
	if (!net_eq(net, peer_net)) {
		s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);

		if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
			goto nla_put_failure_unlock;
	}
	rcu_read_unlock();
	nla_nest_end(rsp, nest_lease);
	return 0;

nla_put_failure_unlock:
	rcu_read_unlock();
nla_put_failure:
	return -ENOMEM;
}

static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
			 u32 q_idx, u32 q_type, const struct genl_info *info)
{
	struct pp_memory_provider_params *params;
	struct netdev_rx_queue *rxq;
	struct net_device *orig_netdev = netdev;
	struct netdev_rx_queue *rxq, *rxq_lease;
	struct netdev_queue *txq;
	void *hdr;

@@ -409,17 +460,22 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
		rxq = __netif_get_rx_queue(netdev, q_idx);
		if (nla_put_napi_id(rsp, rxq->napi))
			goto nla_put_failure;
		if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type))
			goto nla_put_failure;

		rxq_lease = netif_get_rx_queue_lease_locked(&netdev, &q_idx);
		if (rxq_lease)
			rxq = rxq_lease;
		params = &rxq->mp_params;
		if (params->mp_ops &&
		    params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
			goto nla_put_failure;
			goto nla_put_failure_lease;
#ifdef CONFIG_XDP_SOCKETS
		if (rxq->pool)
			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
				goto nla_put_failure;
				goto nla_put_failure_lease;
#endif

		netif_put_rx_queue_lease_locked(orig_netdev, netdev);
		break;
	case NETDEV_QUEUE_TYPE_TX:
		txq = netdev_get_tx_queue(netdev, q_idx);
@@ -437,6 +493,8 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,

	return 0;

nla_put_failure_lease:
	netif_put_rx_queue_lease_locked(orig_netdev, netdev);
nla_put_failure:
	genlmsg_cancel(rsp, hdr);
	return -EMSGSIZE;
+54 −0
Original line number Diff line number Diff line
@@ -41,6 +41,60 @@ bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx)
	return false;
}

/* Virtual devices eligible for leasing have no dev->dev.parent, while
 * physical devices always have one. Use this to enforce the correct
 * lease traversal direction.
 */
static bool netif_lease_dir_ok(const struct net_device *dev,
			       enum netif_lease_dir dir)
{
	if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent)
		return true;
	if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent)
		return true;
	return false;
}

struct netdev_rx_queue *
__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx,
			   enum netif_lease_dir dir)
{
	struct net_device *orig_dev = *dev;
	struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx);

	if (rxq->lease) {
		if (!netif_lease_dir_ok(orig_dev, dir))
			return NULL;
		rxq = rxq->lease;
		*rxq_idx = get_netdev_rx_queue_index(rxq);
		*dev = rxq->dev;
	}
	return rxq;
}

struct netdev_rx_queue *
netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx)
{
	struct net_device *orig_dev = *dev;
	struct netdev_rx_queue *rxq;

	/* Locking order is always from the virtual to the physical device
	 * see netdev_nl_queue_create_doit().
	 */
	netdev_ops_assert_locked(orig_dev);
	rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS);
	if (rxq && orig_dev != *dev)
		netdev_lock(*dev);
	return rxq;
}

void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
				     struct net_device *dev)
{
	if (orig_dev != dev)
		netdev_unlock(dev);
}

/* See also page_pool_is_unreadable() */
bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx)
{