Commit 1164057b authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mlx5-misc-fixes'

Tariq Toukan says:

====================
mlx5 misc fixes

This patchset provides bug fixes to mlx5 driver.

Patch 1 by Shay fixes the error flow in mlx5e_suspend().
Patch 2 by Shay aligns the peer devlink set logic with the register devlink flow.
Patch 3 by Maher solves a deadlock in lag enable/disable.
Patches 4 and 5 by Akiva address issues in command interface corner cases.

Series generated against:
commit 393ceeb9 ("Merge branch 'there-are-some-bugfix-for-the-hns3-ethernet-driver'")
====================

Link: https://lore.kernel.org/r/20240509112951.590184-1-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents df7025b3 db9b31aa
Loading
Loading
Loading
Loading
+35 −9
Original line number Diff line number Diff line
@@ -969,19 +969,32 @@ static void cmd_work_handler(struct work_struct *work)
	bool poll_cmd = ent->polling;
	struct mlx5_cmd_layout *lay;
	struct mlx5_core_dev *dev;
	unsigned long cb_timeout;
	struct semaphore *sem;
	unsigned long timeout;
	unsigned long flags;
	int alloc_ret;
	int cmd_mode;

	complete(&ent->handling);

	dev = container_of(cmd, struct mlx5_core_dev, cmd);
	cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
	timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));

	complete(&ent->handling);
	sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem;
	down(sem);
	if (!ent->page_queue) {
		if (down_timeout(&cmd->vars.sem, timeout)) {
			mlx5_core_warn(dev, "%s(0x%x) timed out while waiting for a slot.\n",
				       mlx5_command_str(ent->op), ent->op);
			if (ent->callback) {
				ent->callback(-EBUSY, ent->context);
				mlx5_free_cmd_msg(dev, ent->out);
				free_msg(dev, ent->in);
				cmd_ent_put(ent);
			} else {
				ent->ret = -EBUSY;
				complete(&ent->done);
			}
			complete(&ent->slotted);
			return;
		}
		alloc_ret = cmd_alloc_index(cmd, ent);
		if (alloc_ret < 0) {
			mlx5_core_err_rl(dev, "failed to allocate command entry\n");
@@ -994,10 +1007,11 @@ static void cmd_work_handler(struct work_struct *work)
				ent->ret = -EAGAIN;
				complete(&ent->done);
			}
			up(sem);
			up(&cmd->vars.sem);
			return;
		}
	} else {
		down(&cmd->vars.pages_sem);
		ent->idx = cmd->vars.max_reg_cmds;
		spin_lock_irqsave(&cmd->alloc_lock, flags);
		clear_bit(ent->idx, &cmd->vars.bitmask);
@@ -1005,6 +1019,8 @@ static void cmd_work_handler(struct work_struct *work)
		spin_unlock_irqrestore(&cmd->alloc_lock, flags);
	}

	complete(&ent->slotted);

	lay = get_inst(cmd, ent->idx);
	ent->lay = lay;
	memset(lay, 0, sizeof(*lay));
@@ -1023,7 +1039,7 @@ static void cmd_work_handler(struct work_struct *work)
	ent->ts1 = ktime_get_ns();
	cmd_mode = cmd->mode;

	if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
	if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, timeout))
		cmd_ent_get(ent);
	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);

@@ -1143,6 +1159,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
		ent->ret = -ECANCELED;
		goto out_err;
	}

	wait_for_completion(&ent->slotted);

	if (cmd->mode == CMD_MODE_POLLING || ent->polling)
		wait_for_completion(&ent->done);
	else if (!wait_for_completion_timeout(&ent->done, timeout))
@@ -1157,6 +1176,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
	} else if (err == -ECANCELED) {
		mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
			       mlx5_command_str(ent->op), ent->op);
	} else if (err == -EBUSY) {
		mlx5_core_warn(dev, "%s(0x%x) timeout while waiting for command semaphore.\n",
			       mlx5_command_str(ent->op), ent->op);
	}
	mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
		      err, deliv_status_to_str(ent->status), ent->status);
@@ -1208,6 +1230,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
	ent->polling = force_polling;

	init_completion(&ent->handling);
	init_completion(&ent->slotted);
	if (!callback)
		init_completion(&ent->done);

@@ -1225,7 +1248,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
		return 0; /* mlx5_cmd_comp_handler() will put(ent) */

	err = wait_func(dev, ent);
	if (err == -ETIMEDOUT || err == -ECANCELED)
	if (err == -ETIMEDOUT || err == -ECANCELED || err == -EBUSY)
		goto out_free;

	ds = ent->ts2 - ent->ts1;
@@ -1611,6 +1634,9 @@ static int cmd_comp_notifier(struct notifier_block *nb,
	dev = container_of(cmd, struct mlx5_core_dev, cmd);
	eqe = data;

	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
		return NOTIFY_DONE;

	mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);

	return NOTIFY_OK;
+5 −5
Original line number Diff line number Diff line
@@ -6058,7 +6058,7 @@ static int mlx5e_resume(struct auxiliary_device *adev)
	return 0;
}

static int _mlx5e_suspend(struct auxiliary_device *adev)
static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
{
	struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
	struct mlx5e_priv *priv = mlx5e_dev->priv;
@@ -6067,7 +6067,7 @@ static int _mlx5e_suspend(struct auxiliary_device *adev)
	struct mlx5_core_dev *pos;
	int i;

	if (!netif_device_present(netdev)) {
	if (!pre_netdev_reg && !netif_device_present(netdev)) {
		if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
			mlx5_sd_for_each_dev(i, mdev, pos)
				mlx5e_destroy_mdev_resources(pos);
@@ -6090,7 +6090,7 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)

	actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
	if (actual_adev)
		err = _mlx5e_suspend(actual_adev);
		err = _mlx5e_suspend(actual_adev, false);

	mlx5_sd_cleanup(mdev);
	return err;
@@ -6157,7 +6157,7 @@ static int _mlx5e_probe(struct auxiliary_device *adev)
	return 0;

err_resume:
	_mlx5e_suspend(adev);
	_mlx5e_suspend(adev, true);
err_profile_cleanup:
	profile->cleanup(priv);
err_destroy_netdev:
@@ -6197,7 +6197,7 @@ static void _mlx5e_remove(struct auxiliary_device *adev)
	mlx5_core_uplink_netdev_set(mdev, NULL);
	mlx5e_dcbnl_delete_app(priv);
	unregister_netdev(priv->netdev);
	_mlx5e_suspend(adev);
	_mlx5e_suspend(adev, false);
	priv->profile->cleanup(priv);
	mlx5e_destroy_netdev(priv);
	mlx5e_devlink_port_unregister(mlx5e_dev);
+2 −2
Original line number Diff line number Diff line
@@ -833,7 +833,7 @@ int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
					     struct mlx5_eswitch *slave_esw, int max_slaves);
void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
					      struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw);

bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
@@ -925,7 +925,7 @@ mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; }

static inline int
mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
{
	return 0;
}
+18 −10
Original line number Diff line number Diff line
@@ -2502,6 +2502,16 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
	esw_offloads_cleanup_reps(esw);
}

static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
				   struct mlx5_eswitch_rep *rep, u8 rep_type)
{
	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
			   REP_REGISTERED, REP_LOADED) == REP_REGISTERED)
		return esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);

	return 0;
}

static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
				      struct mlx5_eswitch_rep *rep, u8 rep_type)
{
@@ -2526,10 +2536,8 @@ static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
	int err;

	rep = mlx5_eswitch_get_rep(esw, vport_num);
	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
		if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
				   REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
			err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
		err = __esw_offloads_load_rep(esw, rep, rep_type);
		if (err)
			goto err_reps;
	}
@@ -3277,7 +3285,7 @@ static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
		esw_vport_destroy_offloads_acl_tables(esw, vport);
}

int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
{
	struct mlx5_eswitch_rep *rep;
	unsigned long i;
@@ -3290,13 +3298,13 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
	if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
		return 0;

	ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
	ret = __esw_offloads_load_rep(esw, rep, REP_IB);
	if (ret)
		return ret;

	mlx5_esw_for_each_rep(esw, i, rep) {
		if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
			mlx5_esw_offloads_rep_load(esw, rep->vport);
			__esw_offloads_load_rep(esw, rep, REP_IB);
	}

	return 0;
+3 −3
Original line number Diff line number Diff line
@@ -814,7 +814,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
	if (shared_fdb)
		for (i = 0; i < ldev->ports; i++)
			if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
				mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
				mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
}

static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
@@ -922,7 +922,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
			mlx5_rescan_drivers_locked(dev0);

			for (i = 0; i < ldev->ports; i++) {
				err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
				err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
				if (err)
					break;
			}
@@ -933,7 +933,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
				mlx5_deactivate_lag(ldev);
				mlx5_lag_add_devices(ldev);
				for (i = 0; i < ldev->ports; i++)
					mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
				mlx5_core_err(dev0, "Failed to enable lag\n");
				return;
			}
Loading