Commit 5602ad61 authored by David Wei's avatar David Wei Committed by Jakub Kicinski
Browse files

net: Proxy netif_mp_{open,close}_rxq for leased queues



When a process in a container wants to setup a memory provider, it will
use the virtual netdev and a leased rxq, and call netif_mp_{open,close}_rxq
to try and restart the queue. At this point, proxy the queue restart on
the real rxq in the physical netdev.

For memory providers (io_uring zero-copy rx and devmem), it causes the
real rxq in the physical netdev to be filled from a memory provider that
has DMA mapped memory from a process within a container.

Signed-off-by: default avatarDavid Wei <dw@davidwei.uk>
Co-developed-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarNikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402231031.447597-7-daniel@iogearbox.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 1e91c98b
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -12350,10 +12350,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev)

	for (i = 0; i < dev->real_num_rx_queues; i++) {
		struct netdev_rx_queue *rxq = &dev->_rx[i];
		struct pp_memory_provider_params *p = &rxq->mp_params;

		if (p->mp_ops && p->mp_ops->uninstall)
			p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
		__netif_mp_uninstall_rxq(rxq, &rxq->mp_params);
	}
}

+7 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ struct net;
struct netlink_ext_ack;
struct netdev_queue_config;
struct cpumask;
struct pp_memory_provider_params;

/* Random bits of netdevice that don't need to be exposed */
#define FLOW_LIMIT_HISTORY	(1 << 7)  /* must be ^2 and !overflow buckets */
@@ -101,6 +102,12 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);

void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
			      const struct pp_memory_provider_params *p);

void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
			       struct netdev_rx_queue *virt_rxq);

/* netdev management, shared between various uAPI entry points */
struct netdev_name_node {
	struct hlist_node hlist;
+87 −17
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@ void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
	netdev_assert_locked(rxq_dst->dev);
	netdev_assert_locked(rxq_src->dev);

	netif_rxq_cleanup_unlease(rxq_src, rxq_dst);

	WRITE_ONCE(rxq_src->lease, NULL);
	WRITE_ONCE(rxq_dst->lease, NULL);

@@ -200,7 +202,7 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");

int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
			       const struct pp_memory_provider_params *p,
			       struct netlink_ext_ack *extack)
{
@@ -209,15 +211,6 @@ int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
	struct netdev_rx_queue *rxq;
	int ret;

	if (!netdev_need_ops_lock(dev))
		return -EOPNOTSUPP;

	if (rxq_idx >= dev->real_num_rx_queues) {
		NL_SET_ERR_MSG(extack, "rx queue index out of range");
		return -ERANGE;
	}
	rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);

	if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
		NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
		return -EINVAL;
@@ -264,16 +257,48 @@ int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
	return ret;
}

void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
		      const struct pp_memory_provider_params *p,
		      struct netlink_ext_ack *extack)
{
	struct net_device *orig_dev = dev;
	int ret;

	if (!netdev_need_ops_lock(dev))
		return -EOPNOTSUPP;

	if (rxq_idx >= dev->real_num_rx_queues) {
		NL_SET_ERR_MSG(extack, "rx queue index out of range");
		return -ERANGE;
	}
	rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);

	if (!netif_rxq_is_leased(dev, rxq_idx))
		return __netif_mp_open_rxq(dev, rxq_idx, p, extack);

	if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) {
		NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev");
		return -EBUSY;
	}
	if (!dev->dev.parent) {
		NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev");
		ret = -EOPNOTSUPP;
		goto out;
	}

	ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack);
out:
	netif_put_rx_queue_lease_locked(orig_dev, dev);
	return ret;
}

static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
				 const struct pp_memory_provider_params *old_p)
{
	struct netdev_queue_config qcfg[2];
	struct netdev_rx_queue *rxq;
	int err;

	if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
		return;

	rxq = __netif_get_rx_queue(dev, ifq_idx);

	/* Callers holding a netdev ref may get here after we already
@@ -294,3 +319,48 @@ void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
	err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]);
	WARN_ON(err && err != -ENETDOWN);
}

void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
			const struct pp_memory_provider_params *old_p)
{
	struct net_device *orig_dev = dev;

	if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
		return;
	if (!netif_rxq_is_leased(dev, ifq_idx))
		return __netif_mp_close_rxq(dev, ifq_idx, old_p);

	if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx)))
		return;

	__netif_mp_close_rxq(dev, ifq_idx, old_p);
	netif_put_rx_queue_lease_locked(orig_dev, dev);
}

void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
			      const struct pp_memory_provider_params *p)
{
	if (p->mp_ops && p->mp_ops->uninstall)
		p->mp_ops->uninstall(p->mp_priv, rxq);
}

/* Clean up memory provider state when a queue lease is torn down. If
 * a memory provider was installed on the physical queue via the lease,
 * close it now. The memory provider is a property of the queue itself,
 * and it was _guaranteed_ to be installed on the physical queue via
 * the lease redirection. The extra __netif_mp_close_rxq is needed
 * since the physical queue can outlive the virtual queue in the lease
 * case, so it needs to be reconfigured to clear the memory provider.
 */
void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
			       struct netdev_rx_queue *virt_rxq)
{
	struct pp_memory_provider_params *p = &phys_rxq->mp_params;
	unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq);

	if (!p->mp_ops)
		return;

	__netif_mp_uninstall_rxq(virt_rxq, p);
	__netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p);
}