Commit 22675f07 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-mlx5-fixes-for-socket-direct'

Tariq Toukan says:

====================
net/mlx5: Fixes for Socket-Direct

This series fixes several race conditions and bugs in the mlx5
Socket-Direct (SD) single netdev flow.

Patch 1 serializes mlx5_sd_init()/mlx5_sd_cleanup() with
mlx5_devcom_comp_lock() and tracks the SD group state on the primary
device, preventing concurrent or duplicate bring-up/tear-down.

Patch 2 fixes the debugfs "multi-pf" directory being stored on the
calling device's sd struct instead of the primary's, which caused
memory leaks and recreation errors when cleanup ran from a different PF.

Patch 3 fixes a race where a secondary PF could access the primary's
auxiliary device after it had been unbound, by holding the primary's
device lock while operating on its auxiliary device.

Patch 4 fixes missing cleanup on ETH probe errors. The analogous gap on
the resume path requires introducing sd_suspend/resume APIs that only
destroy FW resources and is left for a follow-up series.
====================

Link: https://patch.msgid.link/20260504180206.268568-1-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents af0e9b26 d466ddda
Loading
Loading
Loading
Loading
+21 −5
Original line number Diff line number Diff line
@@ -6774,9 +6774,11 @@ static int mlx5e_resume(struct auxiliary_device *adev)
		return err;

	actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
	if (actual_adev)
		return _mlx5e_resume(actual_adev);
	return 0;
	if (actual_adev) {
		err = _mlx5e_resume(actual_adev);
		mlx5_sd_put_adev(actual_adev, adev);
	}
	return err;
}

static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
@@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
		err = _mlx5e_suspend(actual_adev, false);

	mlx5_sd_cleanup(mdev);
	if (actual_adev)
		mlx5_sd_put_adev(actual_adev, adev);
	return err;
}

@@ -6912,9 +6916,19 @@ static int mlx5e_probe(struct auxiliary_device *adev,
		return err;

	actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
	if (actual_adev)
		return _mlx5e_probe(actual_adev);
	if (actual_adev) {
		err = _mlx5e_probe(actual_adev);
		if (err)
			goto sd_cleanup;
		mlx5_sd_put_adev(actual_adev, adev);
	}
	return 0;

sd_cleanup:
	mlx5_sd_cleanup(mdev);
	if (actual_adev)
		mlx5_sd_put_adev(actual_adev, adev);
	return err;
}

static void _mlx5e_remove(struct auxiliary_device *adev)
@@ -6966,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev)
		_mlx5e_remove(actual_adev);

	mlx5_sd_cleanup(mdev);
	if (actual_adev)
		mlx5_sd_put_adev(actual_adev, adev);
}

static const struct auxiliary_device_id mlx5e_id_table[] = {
+99 −15
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ struct mlx5_sd {
	u8 host_buses;
	struct mlx5_devcom_comp_dev *devcom;
	struct dentry *dfs;
	u8 state;
	bool primary;
	union {
		struct { /* primary */
@@ -31,6 +32,11 @@ struct mlx5_sd {
	};
};

enum mlx5_sd_state {
	MLX5_SD_STATE_DOWN = 0,
	MLX5_SD_STATE_UP,
};

static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
{
	struct mlx5_sd *sd = mlx5_get_sd(dev);
@@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev)
{
	struct mlx5_sd *sd = mlx5_get_sd(dev);

	mlx5_devcom_comp_lock(sd->devcom);
	mlx5_devcom_comp_set_ready(sd->devcom, false);
	mlx5_devcom_comp_unlock(sd->devcom);
	mlx5_devcom_unregister_component(sd->devcom);
}

@@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
	struct mlx5_core_dev *primary, *pos, *to;
	struct mlx5_sd *sd = mlx5_get_sd(dev);
	u8 alias_key[ACCESS_KEY_LEN];
	struct mlx5_sd *primary_sd;
	int err, i;

	err = sd_init(dev);
@@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
	if (err)
		goto err_sd_cleanup;

	mlx5_devcom_comp_lock(sd->devcom);
	if (!mlx5_devcom_comp_is_ready(sd->devcom))
		return 0;
		goto out;

	primary = mlx5_sd_get_primary(dev);
	if (!primary)
		goto out;

	primary_sd = mlx5_get_sd(primary);
	if (primary_sd->state != MLX5_SD_STATE_DOWN)
		goto out;

	for (i = 0; i < ACCESS_KEY_LEN; i++)
		alias_key[i] = get_random_u8();
@@ -452,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
	if (err)
		goto err_sd_unregister;

	sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary));
	debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id);
	debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops);
	primary_sd->dfs =
		debugfs_create_dir("multi-pf",
				   mlx5_debugfs_get_dev_root(primary));
	debugfs_create_x32("group_id", 0400, primary_sd->dfs,
			   &primary_sd->group_id);
	debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
			    &dev_fops);

	mlx5_sd_for_each_secondary(i, primary, pos) {
		char name[32];
@@ -464,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
			goto err_unset_secondaries;

		snprintf(name, sizeof(name), "secondary_%d", i - 1);
		debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops);
		debugfs_create_file(name, 0400, primary_sd->dfs, pos,
				    &dev_fops);

	}

@@ -472,6 +488,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
		sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
	sd_print_group(primary);

	primary_sd->state = MLX5_SD_STATE_UP;
out:
	mlx5_devcom_comp_unlock(sd->devcom);
	return 0;

err_unset_secondaries:
@@ -479,8 +498,18 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
	mlx5_sd_for_each_secondary_to(i, primary, to, pos)
		sd_cmd_unset_secondary(pos);
	sd_cmd_unset_primary(primary);
	debugfs_remove_recursive(sd->dfs);
	debugfs_remove_recursive(primary_sd->dfs);
	primary_sd->dfs = NULL;
err_sd_unregister:
	mlx5_sd_for_each_secondary(i, primary, pos) {
		struct mlx5_sd *peer_sd = mlx5_get_sd(pos);

		primary_sd->secondaries[i - 1] = NULL;
		peer_sd->primary_dev = NULL;
	}
	primary_sd->primary = false;
	mlx5_devcom_comp_set_ready(sd->devcom, false);
	mlx5_devcom_comp_unlock(sd->devcom);
	sd_unregister(dev);
err_sd_cleanup:
	sd_cleanup(dev);
@@ -491,42 +520,97 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
{
	struct mlx5_sd *sd = mlx5_get_sd(dev);
	struct mlx5_core_dev *primary, *pos;
	struct mlx5_sd *primary_sd;
	int i;

	if (!sd)
		return;

	mlx5_devcom_comp_lock(sd->devcom);
	if (!mlx5_devcom_comp_is_ready(sd->devcom))
		goto out;
		goto out_unlock;

	primary = mlx5_sd_get_primary(dev);
	if (!primary)
		goto out_ready_false;

	primary_sd = mlx5_get_sd(primary);
	if (primary_sd->state != MLX5_SD_STATE_UP)
		goto out_clear_peers;

	mlx5_sd_for_each_secondary(i, primary, pos)
		sd_cmd_unset_secondary(pos);
	sd_cmd_unset_primary(primary);
	debugfs_remove_recursive(sd->dfs);
	debugfs_remove_recursive(primary_sd->dfs);
	primary_sd->dfs = NULL;

	sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
out:
	primary_sd->state = MLX5_SD_STATE_DOWN;
out_clear_peers:
	mlx5_sd_for_each_secondary(i, primary, pos) {
		struct mlx5_sd *peer_sd = mlx5_get_sd(pos);

		primary_sd->secondaries[i - 1] = NULL;
		peer_sd->primary_dev = NULL;
	}
	primary_sd->primary = false;
out_ready_false:
	mlx5_devcom_comp_set_ready(sd->devcom, false);
out_unlock:
	mlx5_devcom_comp_unlock(sd->devcom);
	sd_unregister(dev);
	sd_cleanup(dev);
}

/* Lock order:
 *   primary:   actual_adev_lock -> SD devcom comp lock
 *   secondary: SD devcom comp lock -> (drop) -> actual_adev_lock
 * The two locks are never held together, so no ABBA.
 */
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
					  struct auxiliary_device *adev,
					  int idx)
{
	struct mlx5_sd *sd = mlx5_get_sd(dev);
	struct mlx5_core_dev *primary;
	struct mlx5_adev *primary_adev;

	if (!sd)
		return adev;

	if (!mlx5_devcom_comp_is_ready(sd->devcom))
	mlx5_devcom_comp_lock(sd->devcom);
	if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
		mlx5_devcom_comp_unlock(sd->devcom);
		return NULL;
	}

	primary = mlx5_sd_get_primary(dev);
	if (dev == primary)
	if (!primary || dev == primary) {
		mlx5_devcom_comp_unlock(sd->devcom);
		return adev;
	}

	return &primary->priv.adev[idx]->adev;
	primary_adev = primary->priv.adev[idx];
	get_device(&primary_adev->adev.dev);
	mlx5_devcom_comp_unlock(sd->devcom);

	device_lock(&primary_adev->adev.dev);
	/* Primary may have completed remove between dropping devcom and
	 * acquiring device_lock; recheck.
	 */
	if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
		device_unlock(&primary_adev->adev.dev);
		put_device(&primary_adev->adev.dev);
		return NULL;
	}
	return &primary_adev->adev;
}

void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
		      struct auxiliary_device *adev)
{
	if (actual_adev != adev) {
		device_unlock(&actual_adev->dev);
		put_device(&actual_adev->dev);
	}
}
+2 −0
Original line number Diff line number Diff line
@@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
					  struct auxiliary_device *adev,
					  int idx);
void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
		      struct auxiliary_device *adev);

int mlx5_sd_init(struct mlx5_core_dev *dev);
void mlx5_sd_cleanup(struct mlx5_core_dev *dev);