Commit 0a13c1e0 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Jakub Kicinski
Browse files

net: revert to lockless TC_SETUP_BLOCK and TC_SETUP_FT



There is a couple of places from which we can arrive to ndo_setup_tc
with TC_SETUP_BLOCK/TC_SETUP_FT:
- netlink
- netlink notifier
- netdev notifier

Locking netdev too deep in this call chain seems to be problematic
(especially assuming some/all of the call_netdevice_notifiers
NETDEV_UNREGISTER) might soon be running with the instance lock).
Revert to lockless ndo_setup_tc for TC_SETUP_BLOCK/TC_SETUP_FT. NFT
framework already takes care of most of the locking. Document
the assumptions.

ndo_setup_tc TC_SETUP_BLOCK
  nft_block_offload_cmd
    nft_chain_offload_cmd
      nft_flow_block_chain
        nft_flow_offload_chain
	  nft_flow_rule_offload_abort
	    nft_flow_rule_offload_commit
	  nft_flow_rule_offload_commit
	    nf_tables_commit
	      nfnetlink_rcv_batch
	        nfnetlink_rcv_skb_batch
		  nfnetlink_rcv
	nft_offload_netdev_event
	  NETDEV_UNREGISTER notifier

ndo_setup_tc TC_SETUP_FT
  nf_flow_table_offload_cmd
    nf_flow_table_offload_setup
      nft_unregister_flowtable_hook
        nft_register_flowtable_net_hooks
	  nft_flowtable_update
	  nf_tables_newflowtable
	    nfnetlink_rcv_batch (.call NFNL_CB_BATCH)
	nft_flowtable_update
	  nf_tables_newflowtable
	nft_flowtable_event
	  nf_tables_flowtable_event
	    NETDEV_UNREGISTER notifier
      __nft_unregister_flowtable_net_hooks
        nft_unregister_flowtable_net_hooks
	  nf_tables_commit
	    nfnetlink_rcv_batch (.call NFNL_CB_BATCH)
	  __nf_tables_abort
	    nf_tables_abort
	      nfnetlink_rcv_batch
	__nft_release_hook
	  __nft_release_hooks
	    nf_tables_pre_exit_net -> module unload
	  nft_rcv_nl_event
	    netlink_register_notifier (oh boy)
      nft_register_flowtable_net_hooks
      	nft_flowtable_update
	  nf_tables_newflowtable
        nf_tables_newflowtable

Fixes: c4f0f30b ("net: hold netdev instance lock during nft ndo_setup_tc")
Signed-off-by: default avatarStanislav Fomichev <sdf@fomichev.me>
Reported-by: default avatar <syzbot+0afb4bcf91e5a1afdcad@syzkaller.appspotmail.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250308044726.1193222-1-sdf@fomichev.me


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 0ea09cbf
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -290,6 +290,12 @@ ndo_set_rx_mode:
	Synchronization: netif_addr_lock spinlock.
	Context: BHs disabled

ndo_setup_tc:
	``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locks
	(i.e. no ``rtnl_lock`` and no device instance lock). The rest of
	``tc_setup_type`` types run under netdev instance lock if the driver
	implements queue management or shaper API.

Most ndo callbacks not specified in the list above are running
under ``rtnl_lock``. In addition, netdev instance lock is taken as well if
the driver implements queue management or shaper API.
+0 −2
Original line number Diff line number Diff line
@@ -3316,8 +3316,6 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack);
void netif_close(struct net_device *dev);
void dev_close(struct net_device *dev);
void dev_close_many(struct list_head *head, bool unlink);
int dev_setup_tc(struct net_device *dev, enum tc_setup_type type,
		 void *type_data);
void netif_disable_lro(struct net_device *dev);
void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
+0 −19
Original line number Diff line number Diff line
@@ -1761,25 +1761,6 @@ void netif_close(struct net_device *dev)
	}
}

int dev_setup_tc(struct net_device *dev, enum tc_setup_type type,
		 void *type_data)
{
	const struct net_device_ops *ops = dev->netdev_ops;
	int ret;

	ASSERT_RTNL();

	if (!ops->ndo_setup_tc)
		return -EOPNOTSUPP;

	netdev_lock_ops(dev);
	ret = ops->ndo_setup_tc(dev, type, type_data);
	netdev_unlock_ops(dev);

	return ret;
}
EXPORT_SYMBOL(dev_setup_tc);

void netif_disable_lro(struct net_device *dev)
{
	struct net_device *lower_dev;
+1 −1
Original line number Diff line number Diff line
@@ -1175,7 +1175,7 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
					 extack);
	down_write(&flowtable->flow_block_lock);
	err = dev_setup_tc(dev, TC_SETUP_FT, bo);
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
	up_write(&flowtable->flow_block_lock);
	if (err < 0)
		return err;
+1 −1
Original line number Diff line number Diff line
@@ -390,7 +390,7 @@ static int nft_block_offload_cmd(struct nft_base_chain *chain,

	nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);

	err = dev_setup_tc(dev, TC_SETUP_BLOCK, &bo);
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
	if (err < 0)
		return err;