Commit 9d405911 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Tariq Toukan says:

====================
mlx5-next updates 2026-01-13

* 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
  net/mlx5: Add IFC bits for extended ETS rate limit bandwidth value
  net/mlx5: Add support for querying bond speed
  net/mlx5: Handle port and vport speed change events in MPESW
  net/mlx5: Propagate LAG effective max_tx_speed to vports
  net/mlx5: Add max_tx_speed and its CAP bit to IFC
====================

Link: https://patch.msgid.link/1768299471-1603093-1-git-send-email-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c65182ef 49e41f3e
Loading
Loading
Loading
Loading
+215 −0
Original line number Diff line number Diff line
@@ -233,14 +233,25 @@ static void mlx5_ldev_free(struct kref *ref)
{
	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
	struct net *net;
	int i;

	if (ldev->nb.notifier_call) {
		net = read_pnet(&ldev->net);
		unregister_netdevice_notifier_net(net, &ldev->nb);
	}

	mlx5_ldev_for_each(i, 0, ldev) {
		if (ldev->pf[i].dev &&
		    ldev->pf[i].port_change_nb.nb.notifier_call) {
			struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;

			mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
		}
	}

	mlx5_lag_mp_cleanup(ldev);
	cancel_delayed_work_sync(&ldev->bond_work);
	cancel_work_sync(&ldev->speed_update_work);
	destroy_workqueue(ldev->wq);
	mutex_destroy(&ldev->lock);
	kfree(ldev);
@@ -274,6 +285,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
	kref_init(&ldev->ref);
	mutex_init(&ldev->lock);
	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);

	ldev->nb.notifier_call = mlx5_lag_netdev_event;
	write_pnet(&ldev->net, mlx5_core_net(dev));
@@ -996,6 +1008,137 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
	       ldev->mode != MLX5_LAG_MODE_MPESW;
}

#ifdef CONFIG_MLX5_ESWITCH
static int
mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
			   int (*get_speed)(struct mlx5_core_dev *, u32 *))
{
	struct mlx5_core_dev *pf_mdev;
	int pf_idx;
	u32 speed;
	int ret;

	*sum_speed = 0;
	mlx5_ldev_for_each(pf_idx, 0, ldev) {
		pf_mdev = ldev->pf[pf_idx].dev;
		if (!pf_mdev)
			continue;

		ret = get_speed(pf_mdev, &speed);
		if (ret) {
			mlx5_core_dbg(pf_mdev,
				      "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
				      get_speed, dev_name(pf_mdev->device),
				      ret);
			return ret;
		}

		*sum_speed += speed;
	}

	return 0;
}

static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
{
	return mlx5_lag_sum_devices_speed(ldev, max_speed,
					  mlx5_port_max_linkspeed);
}

static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
					   u32 *oper_speed)
{
	return mlx5_lag_sum_devices_speed(ldev, oper_speed,
					  mlx5_port_oper_linkspeed);
}

static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
						u32 speed)
{
	u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
	struct mlx5_eswitch *esw = mdev->priv.eswitch;
	struct mlx5_vport *vport;
	unsigned long i;
	int ret;

	if (!esw)
		return;

	if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
		return;

	mlx5_esw_for_each_vport(esw, i, vport) {
		if (!vport)
			continue;

		if (vport->vport == MLX5_VPORT_UPLINK)
			continue;

		ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
						     vport->vport, true, speed);
		if (ret)
			mlx5_core_dbg(mdev,
				      "Failed to set vport %d speed %d, err=%d\n",
				      vport->vport, speed, ret);
	}
}

void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
{
	struct mlx5_core_dev *mdev;
	u32 speed;
	int pf_idx;

	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
		if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
			return;
	} else {
		speed = ldev->tracker.bond_speed_mbps;
		if (speed == SPEED_UNKNOWN)
			return;
	}

	/* If speed is not set, use the sum of max speeds of all PFs */
	if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
		return;

	speed = speed / MLX5_MAX_TX_SPEED_UNIT;

	mlx5_ldev_for_each(pf_idx, 0, ldev) {
		mdev = ldev->pf[pf_idx].dev;
		if (!mdev)
			continue;

		mlx5_lag_modify_device_vports_speed(mdev, speed);
	}
}

void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
{
	struct mlx5_core_dev *mdev;
	u32 speed;
	int pf_idx;
	int ret;

	mlx5_ldev_for_each(pf_idx, 0, ldev) {
		mdev = ldev->pf[pf_idx].dev;
		if (!mdev)
			continue;

		ret = mlx5_port_oper_linkspeed(mdev, &speed);
		if (ret) {
			mlx5_core_dbg(mdev,
				      "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
				      dev_name(mdev->device), ret);
			continue;
		}

		speed = speed / MLX5_MAX_TX_SPEED_UNIT;
		mlx5_lag_modify_device_vports_speed(mdev, speed);
	}
}
#endif

static void mlx5_do_bond(struct mlx5_lag *ldev)
{
	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
@@ -1083,9 +1226,12 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
						     ndev);
			dev_put(ndev);
		}
		mlx5_lag_set_vports_agg_speed(ldev);
	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
		mlx5_modify_lag(ldev, &tracker);
		mlx5_lag_set_vports_agg_speed(ldev);
	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
		mlx5_lag_reset_vports_speed(ldev);
		mlx5_disable_lag(ldev);
	}
}
@@ -1286,6 +1432,65 @@ static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
	return 1;
}

static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
					  struct net_device *ndev)
{
	struct ethtool_link_ksettings lksettings;
	struct net_device *bond_dev;
	int err;

	if (netif_is_lag_master(ndev))
		bond_dev = ndev;
	else
		bond_dev = netdev_master_upper_dev_get(ndev);

	if (!bond_dev) {
		tracker->bond_speed_mbps = SPEED_UNKNOWN;
		return;
	}

	err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
	if (err) {
		netdev_dbg(bond_dev,
			   "Failed to get speed for bond dev %s, err=%d\n",
			   bond_dev->name, err);
		tracker->bond_speed_mbps = SPEED_UNKNOWN;
		return;
	}

	if (lksettings.base.speed == SPEED_UNKNOWN)
		tracker->bond_speed_mbps = 0;
	else
		tracker->bond_speed_mbps = lksettings.base.speed;
}

/* Returns speed in Mbps. */
int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
{
	struct mlx5_lag *ldev;
	unsigned long flags;
	int ret = 0;

	spin_lock_irqsave(&lag_lock, flags);
	ldev = mlx5_lag_dev(mdev);
	if (!ldev) {
		ret = -ENODEV;
		goto unlock;
	}

	*speed = ldev->tracker.bond_speed_mbps;

	if (*speed == SPEED_UNKNOWN) {
		mlx5_core_dbg(mdev, "Bond speed is unknown\n");
		ret = -EINVAL;
	}

unlock:
	spin_unlock_irqrestore(&lag_lock, flags);
	return ret;
}
EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);

/* this handler is always registered to netdev events */
static int mlx5_lag_netdev_event(struct notifier_block *this,
				 unsigned long event, void *ptr)
@@ -1317,6 +1522,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
		break;
	}

	if (changed)
		mlx5_lag_update_tracker_speed(&tracker, ndev);

	ldev->tracker = tracker;

	if (changed)
@@ -1362,6 +1570,10 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,

	ldev->pf[fn].dev = dev;
	dev->priv.lag = ldev;

	MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
		     mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
	mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
}

static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
@@ -1373,6 +1585,9 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
	if (ldev->pf[fn].dev != dev)
		return;

	if (ldev->pf[fn].port_change_nb.nb.notifier_call)
		mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);

	ldev->pf[fn].dev = NULL;
	dev->priv.lag = NULL;
}
+11 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct lag_func {
	struct mlx5_core_dev *dev;
	struct net_device    *netdev;
	bool has_drop;
	struct mlx5_nb port_change_nb;
};

/* Used for collection of netdev event info. */
@@ -48,6 +49,7 @@ struct lag_tracker {
	unsigned int is_bonded:1;
	unsigned int has_inactive:1;
	enum netdev_lag_hash hash_type;
	u32 bond_speed_mbps;
};

/* LAG data of a ConnectX card.
@@ -66,6 +68,7 @@ struct mlx5_lag {
	struct lag_tracker        tracker;
	struct workqueue_struct   *wq;
	struct delayed_work       bond_work;
	struct work_struct        speed_update_work;
	struct notifier_block     nb;
	possible_net_t net;
	struct lag_mp             lag_mp;
@@ -116,6 +119,14 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev);
void mlx5_lag_add_devices(struct mlx5_lag *ldev);
struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev);

#ifdef CONFIG_MLX5_ESWITCH
void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev);
void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev);
#else
static inline void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) {}
static inline void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) {}
#endif

static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev)
{
	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+39 −0
Original line number Diff line number Diff line
@@ -110,6 +110,8 @@ static int enable_mpesw(struct mlx5_lag *ldev)
			goto err_rescan_drivers;
	}

	mlx5_lag_set_vports_agg_speed(ldev);

	return 0;

err_rescan_drivers:
@@ -223,3 +225,40 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
	return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
}
EXPORT_SYMBOL(mlx5_lag_is_mpesw);

void mlx5_mpesw_speed_update_work(struct work_struct *work)
{
	struct mlx5_lag *ldev = container_of(work, struct mlx5_lag,
					     speed_update_work);

	mutex_lock(&ldev->lock);
	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
		if (ldev->mode_changes_in_progress)
			queue_work(ldev->wq, &ldev->speed_update_work);
		else
			mlx5_lag_set_vports_agg_speed(ldev);
	}

	mutex_unlock(&ldev->lock);
}

int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
				     unsigned long event, void *data)
{
	struct mlx5_nb *mlx5_nb = container_of(nb, struct mlx5_nb, nb);
	struct lag_func *lag_func = container_of(mlx5_nb,
						 struct lag_func,
						 port_change_nb);
	struct mlx5_core_dev *dev = lag_func->dev;
	struct mlx5_lag *ldev = dev->priv.lag;
	struct mlx5_eqe *eqe = data;

	if (!ldev)
		return NOTIFY_DONE;

	if (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_DOWN ||
	    eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE)
		queue_work(ldev->wq, &ldev->speed_update_work);

	return NOTIFY_OK;
}
+14 −0
Original line number Diff line number Diff line
@@ -32,4 +32,18 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev);
int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev);

#ifdef CONFIG_MLX5_ESWITCH
void mlx5_mpesw_speed_update_work(struct work_struct *work);
int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
				     unsigned long event, void *data);
#else
static inline void mlx5_mpesw_speed_update_work(struct work_struct *work) {}
static inline int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
						   unsigned long event,
						   void *data)
{
	return NOTIFY_DONE;
}
#endif /* CONFIG_MLX5_ESWITCH */

#endif /* __MLX5_LAG_MPESW_H__ */
+1 −0
Original line number Diff line number Diff line
@@ -381,6 +381,7 @@ const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev,
u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev,
			     struct mlx5_link_info *info,
			     bool force_legacy);
int mlx5_port_oper_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);

#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&		\
Loading