Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp' (15089225) · Commits · git / linux-nf

Documentation/netlink/specs/netdev.yaml

+46 −0

Original line number	Diff line number	Diff line
		@@ -339,6 +339,15 @@ attribute-sets:
		doc: XSK information for this queue, if any.
		type: nest
		nested-attributes: xsk-info
		-
		name: lease
		doc: \|
		A queue from a virtual device can have a lease which refers to
		another queue from a physical device. This is useful for memory
		providers and AF_XDP operations which take an ifindex and queue id
		to allow applications to bind against virtual devices in containers.
		type: nest
		nested-attributes: lease
		-
		name: qstats
		doc: \|
		@@ -537,6 +546,26 @@ attribute-sets:
		name: id
		-
		name: type
		-
		name: lease
		attributes:
		-
		name: ifindex
		doc: The netdev ifindex to lease the queue from.
		type: u32
		checks:
		min: 1
		-
		name: queue
		doc: The netdev queue to lease from.
		type: nest
		nested-attributes: queue-id
		-
		name: netns-id
		doc: The network namespace id of the netdev.
		type: s32
		checks:
		min: 0
		-
		name: dmabuf
		attributes:
		@@ -686,6 +715,7 @@ operations:
		- dmabuf
		- io-uring
		- xsk
		- lease
		dump:
		request:
		attributes:
		@@ -797,6 +827,22 @@ operations:
		reply:
		attributes:
		- id
		-
		name: queue-create
		doc: \|
		Create a new queue for the given netdevice. Whether this operation
		is supported depends on the device and the driver.
		attribute-set: queue
		flags: [admin-perm]
		do:
		request:
		attributes:
		- ifindex
		- type
		- lease
		reply: &queue-create-op
		attributes:
		- id

		kernel-family:
		headers: ["net/netdev_netlink.h"]

Documentation/netlink/specs/rt-link.yaml

+11 −0

Original line number	Diff line number	Diff line
		@@ -825,6 +825,13 @@ definitions:
		entries:
		- name: none
		- name: default
		-
		name: netkit-pairing
		type: enum
		enum-name: netkit-pairing
		entries:
		- name: pair
		- name: single
		-
		name: ovpn-mode
		enum-name: ovpn-mode
		@@ -2299,6 +2306,10 @@ attribute-sets:
		-
		name: tailroom
		type: u16
		-
		name: pairing
		type: u32
		enum: netkit-pairing
		-
		name: linkinfo-ovpn-attrs
		name-prefix: ifla-ovpn-

Documentation/networking/netdevices.rst

+6 −0

Original line number	Diff line number	Diff line
		@@ -329,6 +329,12 @@ by setting ``request_ops_lock`` to true. Code comments and docs refer
		to drivers which have ops called under the instance lock as "ops locked".
		See also the documentation of the ``lock`` member of struct net_device.

		There is also a case of taking two per-netdev locks in sequence when netdev
		queues are leased, that is, the netdev-scope lock is taken for both the
		virtual and the physical device. To prevent deadlocks, the virtual device's
		lock must always be acquired before the physical device's (see
		``netdev_nl_queue_create_doit``).

		In the future, there will be an option for individual
		drivers to opt out of using ``rtnl_lock`` and instead perform their control
		operations directly under the netdev instance lock.

drivers/net/netkit.c

+349 −63

Original line number	Diff line number	Diff line
		@@ -9,11 +9,21 @@
		#include <linux/bpf_mprog.h>
		#include <linux/indirect_call_wrapper.h>

		#include <net/netdev_lock.h>
		#include <net/netdev_queues.h>
		#include <net/netdev_rx_queue.h>
		#include <net/xdp_sock_drv.h>
		#include <net/netkit.h>
		#include <net/dst.h>
		#include <net/tcx.h>

		#define DRV_NAME "netkit"
		#define NETKIT_DRV_NAME "netkit"

		#define NETKIT_NUM_RX_QUEUES_MAX 1024
		#define NETKIT_NUM_TX_QUEUES_MAX 1

		#define NETKIT_NUM_RX_QUEUES_REAL 1
		#define NETKIT_NUM_TX_QUEUES_REAL 1

		struct netkit {
		__cacheline_group_begin(netkit_fastpath);
		@@ -26,6 +36,7 @@ struct netkit {

		__cacheline_group_begin(netkit_slowpath);
		enum netkit_mode mode;
		enum netkit_pairing pair;
		bool primary;
		u32 headroom;
		__cacheline_group_end(netkit_slowpath);
		@@ -36,6 +47,8 @@ struct netkit_link {
		struct net_device *dev;
		};

		static struct rtnl_link_ops netkit_link_ops;

		static __always_inline int
		netkit_run(const struct bpf_mprog_entry entry, struct sk_buff skb,
		enum netkit_action ret)
		@@ -135,6 +148,10 @@ static int netkit_open(struct net_device *dev)
		struct netkit *nk = netkit_priv(dev);
		struct net_device *peer = rtnl_dereference(nk->peer);

		if (nk->pair == NETKIT_DEVICE_SINGLE) {
		netif_carrier_on(dev);
		return 0;
		}
		if (!peer)
		return -ENOTCONN;
		if (peer->flags & IFF_UP) {
		@@ -194,16 +211,17 @@ static void netkit_set_headroom(struct net_device *dev, int headroom)

		rcu_read_lock();
		peer = rcu_dereference(nk->peer);
		if (unlikely(!peer))
		goto out;

		if (!peer) {
		nk->headroom = headroom;
		dev->needed_headroom = headroom;
		} else {
		nk2 = netkit_priv(peer);
		nk->headroom = headroom;
		headroom = max(nk->headroom, nk2->headroom);

		peer->needed_headroom = headroom;
		dev->needed_headroom = headroom;
		out:
		}
		rcu_read_unlock();
		}

		@@ -219,9 +237,96 @@ static void netkit_get_stats(struct net_device *dev,
		stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
		}

		static bool netkit_xsk_supported_at_phys(const struct net_device *dev)
		{
		if (!dev->netdev_ops->ndo_bpf \|\|
		!dev->netdev_ops->ndo_xdp_xmit \|\|
		!dev->netdev_ops->ndo_xsk_wakeup)
		return false;
		return true;
		}

		static int netkit_xsk(struct net_device dev, struct netdev_bpf xdp)
		{
		struct netkit *nk = netkit_priv(dev);
		struct netdev_bpf xdp_lower;
		struct netdev_rx_queue *rxq;
		struct net_device *phys;
		bool create = false;
		int ret = -EBUSY;

		switch (xdp->command) {
		case XDP_SETUP_XSK_POOL:
		if (nk->pair == NETKIT_DEVICE_PAIR)
		return -EOPNOTSUPP;
		if (xdp->xsk.queue_id >= dev->real_num_rx_queues)
		return -EINVAL;

		rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id);
		if (!rxq->lease)
		return -EOPNOTSUPP;

		phys = rxq->lease->dev;
		if (!netkit_xsk_supported_at_phys(phys))
		return -EOPNOTSUPP;

		create = xdp->xsk.pool;
		memcpy(&xdp_lower, xdp, sizeof(xdp_lower));
		xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease);
		break;
		case XDP_SETUP_PROG:
		return -EOPNOTSUPP;
		default:
		return -EINVAL;
		}

		netdev_lock(phys);
		if (create &&
		(phys->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) {
		ret = -EOPNOTSUPP;
		goto out;
		}
		if (!create \|\| !dev_get_min_mp_channel_count(phys))
		ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower);
		out:
		netdev_unlock(phys);
		return ret;
		}

		static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
		{
		struct netdev_rx_queue rxq, rxq_lease;
		struct net_device *phys;

		if (queue_id >= dev->real_num_rx_queues)
		return -EINVAL;

		rxq = __netif_get_rx_queue(dev, queue_id);
		rxq_lease = READ_ONCE(rxq->lease);
		if (unlikely(!rxq_lease))
		return -EOPNOTSUPP;

		/* netkit_xsk already validated full xsk support, hence it's
		* fine to call into ndo_xsk_wakeup right away given this
		* was a prerequisite to get here in the first place. The
		* phys xsk support cannot change without tearing down the
		* device (which clears the lease first).
		*/
		phys = rxq_lease->dev;
		return phys->netdev_ops->ndo_xsk_wakeup(phys,
		get_netdev_rx_queue_index(rxq_lease), flags);
		}

		static int netkit_init(struct net_device *dev)
		{
		netdev_lockdep_set_classes(dev);
		return 0;
		}

		static void netkit_uninit(struct net_device *dev);

		static const struct net_device_ops netkit_netdev_ops = {
		.ndo_init = netkit_init,
		.ndo_open = netkit_open,
		.ndo_stop = netkit_close,
		.ndo_start_xmit = netkit_xmit,
		@@ -232,19 +337,104 @@ static const struct net_device_ops netkit_netdev_ops = {
		.ndo_get_peer_dev = netkit_peer_dev,
		.ndo_get_stats64 = netkit_get_stats,
		.ndo_uninit = netkit_uninit,
		.ndo_bpf = netkit_xsk,
		.ndo_xsk_wakeup = netkit_xsk_wakeup,
		.ndo_features_check = passthru_features_check,
		};

		static void netkit_get_drvinfo(struct net_device *dev,
		struct ethtool_drvinfo *info)
		{
		strscpy(info->driver, DRV_NAME, sizeof(info->driver));
		strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
		}

		static const struct ethtool_ops netkit_ethtool_ops = {
		.get_drvinfo = netkit_get_drvinfo,
		};

		static int netkit_queue_create(struct net_device *dev,
		struct netlink_ext_ack *extack)
		{
		struct netkit *nk = netkit_priv(dev);
		u32 rxq_count_old, rxq_count_new;
		int err;

		rxq_count_old = dev->real_num_rx_queues;
		rxq_count_new = rxq_count_old + 1;

		/* In paired mode, only the non-primary (peer) device can
		* create leased queues since the primary is the management
		* side. In single device mode, leasing is always allowed.
		*/
		if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) {
		NL_SET_ERR_MSG(extack,
		"netkit can only lease against the peer device");
		return -EOPNOTSUPP;
		}

		err = netif_set_real_num_rx_queues(dev, rxq_count_new);
		if (err) {
		if (rxq_count_new > dev->num_rx_queues)
		NL_SET_ERR_MSG(extack,
		"netkit maximum queue limit reached");
		else
		NL_SET_ERR_MSG_FMT(extack,
		"netkit cannot create more queues err=%d", err);
		return err;
		}

		return rxq_count_old;
		}

		static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
		.ndo_queue_create = netkit_queue_create,
		};

		static struct net_device netkit_alloc(struct nlattr tb[],
		const char *ifname,
		unsigned char name_assign_type,
		unsigned int num_tx_queues,
		unsigned int num_rx_queues)
		{
		const struct rtnl_link_ops *ops = &netkit_link_ops;
		struct net_device *dev;

		if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX \|\|
		num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
		return ERR_PTR(-EOPNOTSUPP);

		dev = alloc_netdev_mqs(ops->priv_size, ifname,
		name_assign_type, ops->setup,
		num_tx_queues, num_rx_queues);
		if (dev) {
		dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
		dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
		}
		return dev;
		}

		static void netkit_queue_unlease(struct net_device *dev)
		{
		struct netdev_rx_queue rxq, rxq_lease;
		struct net_device *dev_lease;
		int i;

		if (dev->real_num_rx_queues == 1)
		return;

		netdev_lock(dev);
		for (i = 1; i < dev->real_num_rx_queues; i++) {
		rxq = __netif_get_rx_queue(dev, i);
		rxq_lease = rxq->lease;
		dev_lease = rxq_lease->dev;

		netdev_lock(dev_lease);
		netdev_rx_queue_unlease(rxq, rxq_lease);
		netdev_unlock(dev_lease);
		}
		netdev_unlock(dev);
		}

		static void netkit_setup(struct net_device *dev)
		{
		static const netdev_features_t netkit_features_hw_vlan =
		@@ -275,8 +465,9 @@ static void netkit_setup(struct net_device *dev)
		dev->priv_flags \|= IFF_DISABLE_NETPOLL;
		dev->lltx = true;

		dev->ethtool_ops = &netkit_ethtool_ops;
		dev->netdev_ops = &netkit_netdev_ops;
		dev->ethtool_ops = &netkit_ethtool_ops;
		dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;

		dev->features \|= netkit_features;
		dev->hw_features = netkit_features;
		@@ -325,8 +516,6 @@ static int netkit_validate(struct nlattr tb[], struct nlattr data[],
		return 0;
		}

		static struct rtnl_link_ops netkit_link_ops;

		static int netkit_new_link(struct net_device *dev,
		struct rtnl_newlink_params *params,
		struct netlink_ext_ack *extack)
		@@ -335,15 +524,17 @@ static int netkit_new_link(struct net_device *dev,
		enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
		enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
		struct nlattr peer_tb[IFLA_MAX + 1], tbp, attr;
		enum netkit_pairing pair = NETKIT_DEVICE_PAIR;
		enum netkit_action policy_prim = NETKIT_PASS;
		enum netkit_action policy_peer = NETKIT_PASS;
		bool seen_peer = false, seen_scrub = false;
		struct nlattr **data = params->data;
		enum netkit_mode mode = NETKIT_L3;
		unsigned char ifname_assign_type;
		struct nlattr **tb = params->tb;
		u16 headroom = 0, tailroom = 0;
		struct ifinfomsg *ifmp = NULL;
		struct net_device *peer;
		struct net_device *peer = NULL;
		char ifname[IFNAMSIZ];
		struct netkit *nk;
		int err;
		@@ -380,6 +571,13 @@ static int netkit_new_link(struct net_device *dev,
		headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
		if (data[IFLA_NETKIT_TAILROOM])
		tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
		if (data[IFLA_NETKIT_PAIRING])
		pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]);

		seen_scrub = data[IFLA_NETKIT_SCRUB];
		seen_peer = data[IFLA_NETKIT_PEER_INFO] \|\|
		data[IFLA_NETKIT_PEER_SCRUB] \|\|
		data[IFLA_NETKIT_PEER_POLICY];
		}

		if (ifmp && tbp[IFLA_IFNAME]) {
		@@ -392,22 +590,22 @@ static int netkit_new_link(struct net_device *dev,
		if (mode != NETKIT_L2 &&
		(tb[IFLA_ADDRESS] \|\| tbp[IFLA_ADDRESS]))
		return -EOPNOTSUPP;
		if (pair == NETKIT_DEVICE_SINGLE &&
		(tb != tbp \|\| seen_peer \|\| seen_scrub \|\|
		policy_prim != NETKIT_PASS))
		return -EOPNOTSUPP;

		if (pair == NETKIT_DEVICE_PAIR) {
		peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
		&netkit_link_ops, tbp, extack);
		if (IS_ERR(peer))
		return PTR_ERR(peer);

		netif_inherit_tso_max(peer, dev);
		if (headroom) {
		if (headroom)
		peer->needed_headroom = headroom;
		dev->needed_headroom = headroom;
		}
		if (tailroom) {
		if (tailroom)
		peer->needed_tailroom = tailroom;
		dev->needed_tailroom = tailroom;
		}

		if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
		eth_hw_addr_random(peer);
		if (ifmp && dev->ifindex)
		@@ -418,6 +616,7 @@ static int netkit_new_link(struct net_device *dev,
		nk->policy = policy_peer;
		nk->scrub = scrub_peer;
		nk->mode = mode;
		nk->pair = pair;
		nk->headroom = headroom;
		bpf_mprog_bundle_init(&nk->bundle);

		@@ -431,6 +630,7 @@ static int netkit_new_link(struct net_device *dev,
		err = rtnl_configure_link(peer, NULL, 0, NULL);
		if (err < 0)
		goto err_configure_peer;
		}

		if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
		eth_hw_addr_random(dev);
		@@ -438,15 +638,23 @@ static int netkit_new_link(struct net_device *dev,
		nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
		else
		strscpy(dev->name, "nk%d", IFNAMSIZ);
		if (headroom)
		dev->needed_headroom = headroom;
		if (tailroom)
		dev->needed_tailroom = tailroom;

		nk = netkit_priv(dev);
		nk->primary = true;
		nk->policy = policy_prim;
		nk->scrub = scrub_prim;
		nk->mode = mode;
		nk->pair = pair;
		nk->headroom = headroom;
		bpf_mprog_bundle_init(&nk->bundle);

		if (pair == NETKIT_DEVICE_SINGLE)
		xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK);

		err = register_netdevice(dev);
		if (err < 0)
		goto err_configure_peer;
		@@ -455,9 +663,11 @@ static int netkit_new_link(struct net_device *dev,
		dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);

		rcu_assign_pointer(netkit_priv(dev)->peer, peer);
		if (peer)
		rcu_assign_pointer(netkit_priv(peer)->peer, dev);
		return 0;
		err_configure_peer:
		if (peer)
		unregister_netdevice(peer);
		return err;
		err_register_peer:
		@@ -518,6 +728,8 @@ static struct net_device netkit_dev_fetch(struct net net, u32 ifindex, u32 whi
		nk = netkit_priv(dev);
		if (!nk->primary)
		return ERR_PTR(-EACCES);
		if (nk->pair == NETKIT_DEVICE_SINGLE)
		return ERR_PTR(-EOPNOTSUPP);
		if (which == BPF_NETKIT_PEER) {
		dev = rcu_dereference_rtnl(nk->peer);
		if (!dev)
		@@ -844,6 +1056,7 @@ static void netkit_release_all(struct net_device *dev)
		static void netkit_uninit(struct net_device *dev)
		{
		netkit_release_all(dev);
		netkit_queue_unlease(dev);
		}

		static void netkit_del_link(struct net_device dev, struct list_head head)
		@@ -856,6 +1069,14 @@ static void netkit_del_link(struct net_device dev, struct list_head head)
		if (peer) {
		nk = netkit_priv(peer);
		RCU_INIT_POINTER(nk->peer, NULL);
		/* Guard against the peer already being in an unregister
		* list (e.g. same-namespace teardown where the peer is
		* in the caller's dev_kill_list). list_move_tail() on an
		* already-queued device would otherwise corrupt that
		* list's iteration. This situation can occur via netkit
		* notifier, hence guard against this scenario.
		*/
		if (!unregister_netdevice_queued(peer))
		unregister_netdevice_queue(peer, head);
		}
		}
		@@ -879,6 +1100,7 @@ static int netkit_change_link(struct net_device dev, struct nlattr tb[],
		{ IFLA_NETKIT_PEER_INFO, "peer info" },
		{ IFLA_NETKIT_HEADROOM, "headroom" },
		{ IFLA_NETKIT_TAILROOM, "tailroom" },
		{ IFLA_NETKIT_PAIRING, "pairing" },
		};

		if (!nk->primary) {
		@@ -898,8 +1120,10 @@ static int netkit_change_link(struct net_device dev, struct nlattr tb[],
		}

		if (data[IFLA_NETKIT_POLICY]) {
		err = -EOPNOTSUPP;
		attr = data[IFLA_NETKIT_POLICY];
		policy = nla_get_u32(attr);
		if (nk->pair == NETKIT_DEVICE_PAIR)
		err = netkit_check_policy(policy, attr, extack);
		if (err)
		return err;
		@@ -921,6 +1145,50 @@ static int netkit_change_link(struct net_device dev, struct nlattr tb[],
		return 0;
		}

		static void netkit_check_lease_unregister(struct net_device *dev)
		{
		LIST_HEAD(list_kill);
		u32 q_idx;

		if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING \|\|
		!dev->dev.parent)
		return;

		netdev_lock_ops(dev);
		for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) {
		struct net_device *tmp = dev;
		struct netdev_rx_queue *rxq;
		u32 tmp_q_idx = q_idx;

		rxq = __netif_get_rx_queue_lease(&tmp, &tmp_q_idx,
		NETIF_PHYS_TO_VIRT);
		if (rxq && tmp != dev &&
		tmp->netdev_ops == &netkit_netdev_ops) {
		/* A single phys device can have multiple queues leased
		* to one netkit device. We can only queue that netkit
		* device once to the list_kill. Queues of that phys
		* device can be leased with different individual netkit
		* devices, hence we batch via list_kill.
		*/
		if (unregister_netdevice_queued(tmp))
		continue;
		netkit_del_link(tmp, &list_kill);
		}
		}
		netdev_unlock_ops(dev);
		unregister_netdevice_many(&list_kill);
		}

		static int netkit_notifier(struct notifier_block *this,
		unsigned long event, void *ptr)
		{
		struct net_device *dev = netdev_notifier_info_to_dev(ptr);

		if (event == NETDEV_UNREGISTER)
		netkit_check_lease_unregister(dev);
		return NOTIFY_DONE;
		}

		static size_t netkit_get_size(const struct net_device *dev)
		{
		return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
		@@ -931,6 +1199,7 @@ static size_t netkit_get_size(const struct net_device *dev)
		nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
		nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
		nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
		nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */
		0;
		}

		@@ -951,6 +1220,8 @@ static int netkit_fill_info(struct sk_buff skb, const struct net_device dev)
		return -EMSGSIZE;
		if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
		return -EMSGSIZE;
		if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair))
		return -EMSGSIZE;

		if (peer) {
		nk = netkit_priv(peer);
		@@ -972,13 +1243,15 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
		[IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 },
		[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
		[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
		[IFLA_NETKIT_PAIRING] = NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE),
		[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
		.reject_message = "Primary attribute is read-only" },
		};

		static struct rtnl_link_ops netkit_link_ops = {
		.kind = DRV_NAME,
		.kind = NETKIT_DRV_NAME,
		.priv_size = sizeof(struct netkit),
		.alloc = netkit_alloc,
		.setup = netkit_setup,
		.newlink = netkit_new_link,
		.dellink = netkit_del_link,
		@@ -992,26 +1265,39 @@ static struct rtnl_link_ops netkit_link_ops = {
		.maxtype = IFLA_NETKIT_MAX,
		};

		static __init int netkit_init(void)
		static struct notifier_block netkit_netdev_notifier = {
		.notifier_call = netkit_notifier,
		};

		static __init int netkit_mod_init(void)
		{
		int ret;

		BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT \|\|
		(int)NETKIT_PASS != (int)TCX_PASS \|\|
		(int)NETKIT_DROP != (int)TCX_DROP \|\|
		(int)NETKIT_REDIRECT != (int)TCX_REDIRECT);

		return rtnl_link_register(&netkit_link_ops);
		ret = rtnl_link_register(&netkit_link_ops);
		if (ret)
		return ret;
		ret = register_netdevice_notifier(&netkit_netdev_notifier);
		if (ret)
		rtnl_link_unregister(&netkit_link_ops);
		return ret;
		}

		static __exit void netkit_exit(void)
		static __exit void netkit_mod_exit(void)
		{
		unregister_netdevice_notifier(&netkit_netdev_notifier);
		rtnl_link_unregister(&netkit_link_ops);
		}

		module_init(netkit_init);
		module_exit(netkit_exit);
		module_init(netkit_mod_init);
		module_exit(netkit_mod_exit);

		MODULE_DESCRIPTION("BPF-programmable network device");
		MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
		MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
		MODULE_LICENSE("GPL");
		MODULE_ALIAS_RTNL_LINK(DRV_NAME);
		MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);

include/linux/netdevice.h

+10 −1

Original line number	Diff line number	Diff line
		@@ -2561,7 +2561,14 @@ struct net_device {
		* Also protects some fields in:
		* struct napi_struct, struct netdev_queue, struct netdev_rx_queue
		*
		* Ordering: take after rtnl_lock.
		* Ordering:
		*
		* - take after rtnl_lock
		*
		* - for the case of netdev queue leasing, the netdev-scope lock is
		* taken for both the virtual and the physical device; to prevent
		* deadlocks, the virtual device's lock must always be acquired
		* before the physical device's (see netdev_nl_queue_create_doit)
		*/
		struct mutex lock;

		@@ -3413,6 +3420,8 @@ static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
		int register_netdevice(struct net_device *dev);
		void unregister_netdevice_queue(struct net_device dev, struct list_head head);
		void unregister_netdevice_many(struct list_head *head);
		bool unregister_netdevice_queued(const struct net_device *dev);

		static inline void unregister_netdevice(struct net_device *dev)
		{
		unregister_netdevice_queue(dev, NULL);