Commit 11ea9b8a authored by Tonghao Zhang's avatar Tonghao Zhang Committed by Paolo Abeni
Browse files

net: bonding: use workqueue to make sure peer notify updated in lacp mode



The rtnl lock might be locked, preventing ad_cond_set_peer_notif() from
acquiring the lock and updating send_peer_notif. This patch addresses
the issue by using a workqueue. Since updating send_peer_notif does
not require high real-time performance, such delayed updates are entirely
acceptable.

In fact, checking this value and using it in multiple places, all operations
are protected at the same time by rtnl lock, such as
- read send_peer_notif
- send_peer_notif--
- bond_should_notify_peers

By the way, rtnl lock is still required, when accessing bond.params.* for
updating send_peer_notif. In lacp mode, resetting send_peer_notif in
workqueue is safe, simple and effective way.

Additionally, this patch introduces bond_peer_notify_may_events(), which
is used to check whether an event should be sent. This function will be
used in both patch 1 and 2.

Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Cc: Jason Xing <kerneljasonxing@gmail.com>
Suggested-by: default avatarHangbin Liu <liuhangbin@gmail.com>
Signed-off-by: default avatarTonghao Zhang <tonghao@bamaicloud.com>
Reviewed-by: default avatarHangbin Liu <liuhangbin@gmail.com>
Link: https://patch.msgid.link/f95accb5db0b10ce3ed2f834fc70f716c9abbb9c.1768709239.git.tonghao@bamaicloud.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 0b42aeb4
Loading
Loading
Loading
Loading
+2 −5
Original line number Diff line number Diff line
@@ -1017,11 +1017,8 @@ static void ad_cond_set_peer_notif(struct port *port)
{
	struct bonding *bond = port->slave->bond;

	if (bond->params.broadcast_neighbor && rtnl_trylock()) {
		bond->send_peer_notif = bond->params.num_peer_notif *
			max(1, bond->params.peer_notif_delay);
		rtnl_unlock();
	}
	if (bond->params.broadcast_neighbor)
		bond_peer_notify_work_rearm(bond, 0);
}

/**
+52 −14
Original line number Diff line number Diff line
@@ -1195,6 +1195,48 @@ static bool bond_should_notify_peers(struct bonding *bond)
	return true;
}

/* Use this to update send_peer_notif when RTNL may be held in other places. */
void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
{
	queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
}

/* Peer notify update handler. Holds only RTNL */
static void bond_peer_notify_reset(struct bonding *bond)
{
	bond->send_peer_notif = bond->params.num_peer_notif *
		max(1, bond->params.peer_notif_delay);
}

static void bond_peer_notify_handler(struct work_struct *work)
{
	struct bonding *bond = container_of(work, struct bonding,
					    peer_notify_work.work);

	if (!rtnl_trylock()) {
		bond_peer_notify_work_rearm(bond, 1);
		return;
	}

	bond_peer_notify_reset(bond);

	rtnl_unlock();
}

/* Peer notify events post. Holds only RTNL */
static void bond_peer_notify_may_events(struct bonding *bond, bool force)
{
	bool notified = false;

	if (bond_should_notify_peers(bond)) {
		notified = true;
		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
	}

	if (notified || force)
		bond->send_peer_notif--;
}

/**
 * bond_change_active_slave - change the active slave into the specified one
 * @bond: our bonding struct
@@ -1270,8 +1312,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
						      BOND_SLAVE_NOTIFY_NOW);

		if (new_active) {
			bool should_notify_peers = false;

			bond_set_slave_active_flags(new_active,
						    BOND_SLAVE_NOTIFY_NOW);

@@ -1279,19 +1319,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
				bond_do_fail_over_mac(bond, new_active,
						      old_active);

			if (netif_running(bond->dev)) {
				bond->send_peer_notif =
					bond->params.num_peer_notif *
					max(1, bond->params.peer_notif_delay);
				should_notify_peers =
					bond_should_notify_peers(bond);
			}

			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
			if (should_notify_peers) {
				bond->send_peer_notif--;
				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
							 bond->dev);

			if (netif_running(bond->dev)) {
				bond_peer_notify_reset(bond);
				bond_peer_notify_may_events(bond, false);
			}
		}
	}
@@ -4213,6 +4245,10 @@ static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)

void bond_work_init_all(struct bonding *bond)
{
	/* ndo_stop, bond_close() will try to flush the work under
	 * the rtnl lock. The workqueue must not block on rtnl lock
	 * to avoid deadlock.
	 */
	INIT_DELAYED_WORK(&bond->mcast_work,
			  bond_resend_igmp_join_requests_delayed);
	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
@@ -4220,6 +4256,7 @@ void bond_work_init_all(struct bonding *bond)
	INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
	INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
	INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
}

void bond_work_cancel_all(struct bonding *bond)
@@ -4230,6 +4267,7 @@ void bond_work_cancel_all(struct bonding *bond)
	cancel_delayed_work_sync(&bond->ad_work);
	cancel_delayed_work_sync(&bond->mcast_work);
	cancel_delayed_work_sync(&bond->slave_arr_work);
	cancel_delayed_work_sync(&bond->peer_notify_work);
}

static int bond_open(struct net_device *bond_dev)
+2 −0
Original line number Diff line number Diff line
@@ -254,6 +254,7 @@ struct bonding {
	struct   delayed_work ad_work;
	struct   delayed_work mcast_work;
	struct   delayed_work slave_arr_work;
	struct   delayed_work peer_notify_work;
#ifdef CONFIG_DEBUG_FS
	/* debugging support via debugfs */
	struct	 dentry *debug_dir;
@@ -709,6 +710,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
					      int level);
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
void bond_work_init_all(struct bonding *bond);
void bond_work_cancel_all(struct bonding *bond);