Commit f83e0e0b authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-mlx5-misc-changes-2025-11-17'

Tariq Toukan says:

====================
net/mlx5: misc changes 2025-11-17

This series contains misc enhancements to the mlx5 driver.
====================

Link: https://patch.msgid.link/1763415729-1238421-1-git-send-email-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 1064d521 70ca239b
Loading
Loading
Loading
Loading
+55 −0
Original line number Diff line number Diff line
@@ -181,6 +181,7 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
	lockdep_assert_held(&cmd->alloc_lock);
	cmd->ent_arr[idx] = NULL;
	set_bit(idx, &cmd->vars.bitmask);
}

@@ -1200,6 +1201,44 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
	return err;
}

/* Check if all command slots are stalled (timed out and not recovered).
 * returns true if all slots timed out on a recent command and have not been
 * completed by FW yet. (stalled state)
 * false otherwise (at least one slot is not stalled).
 *
 * In such odd situation "all_stalled", this serves as a protection mechanism
 * to avoid blocking the kernel for long periods of time in case FW is not
 * responding to commands.
 */
static bool mlx5_cmd_all_stalled(struct mlx5_core_dev *dev)
{
	struct mlx5_cmd *cmd = &dev->cmd;
	bool all_stalled = true;
	unsigned long flags;
	int i;

	spin_lock_irqsave(&cmd->alloc_lock, flags);

	/* at least one command slot is free */
	if (bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds) > 0) {
		all_stalled = false;
		goto out;
	}

	for_each_clear_bit(i, &cmd->vars.bitmask, cmd->vars.max_reg_cmds) {
		struct mlx5_cmd_work_ent *ent = dev->cmd.ent_arr[i];

		if (!test_bit(MLX5_CMD_ENT_STATE_TIMEDOUT, &ent->state)) {
			all_stalled = false;
			break;
		}
	}
out:
	spin_unlock_irqrestore(&cmd->alloc_lock, flags);

	return all_stalled;
}

/*  Notes:
 *    1. Callback functions may not sleep
 *    2. page queue commands do not support asynchrous completion
@@ -1230,6 +1269,15 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
	if (callback && page_queue)
		return -EINVAL;

	if (!page_queue && mlx5_cmd_all_stalled(dev)) {
		mlx5_core_err_rl(dev,
				 "All CMD slots are stalled, aborting command\n");
		/* there's no reason to wait and block the whole kernel if FW
		 * isn't currently responding to all slots, fail immediately
		 */
		return -EAGAIN;
	}

	ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
			    callback, context, page_queue);
	if (IS_ERR(ent))
@@ -1700,6 +1748,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
		if (test_bit(i, &vector)) {
			ent = cmd->ent_arr[i];

			if (forced && ent->ret == -ETIMEDOUT)
				set_bit(MLX5_CMD_ENT_STATE_TIMEDOUT,
					&ent->state);
			else if (!forced) /* real FW completion */
				clear_bit(MLX5_CMD_ENT_STATE_TIMEDOUT,
					  &ent->state);

			/* if we already completed the command, ignore it */
			if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
						&ent->state)) {
+1 −1
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)

	if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
		mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
		return -ENOTSUPP;
		return -EOPNOTSUPP;
	}

	tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+15 −6
Original line number Diff line number Diff line
@@ -82,7 +82,7 @@ static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
}

static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
				     struct mlx5e_ptp_cq_stats *cq_stats)
				     struct mlx5e_ptpsq *ptpsq)
{
	struct skb_shared_hwtstamps hwts = {};
	ktime_t diff;
@@ -92,8 +92,17 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,

	/* Maximal allowed diff is 1 / 128 second */
	if (diff > (NSEC_PER_SEC >> 7)) {
		cq_stats->abort++;
		cq_stats->abort_abs_diff_ns += diff;
		struct mlx5e_txqsq *sq = &ptpsq->txqsq;

		ptpsq->cq_stats->abort++;
		ptpsq->cq_stats->abort_abs_diff_ns += diff;
		if (diff > (NSEC_PER_SEC >> 1) &&
		    !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
			netdev_warn(sq->channel->netdev,
				    "PTP TX timestamp difference between CQE and port exceeds threshold: %lld ns, recovering SQ %u\n",
				    (s64)diff, sq->sqn);
			queue_work(sq->priv->wq, &ptpsq->report_unhealthy_work);
		}
		return;
	}

@@ -103,7 +112,7 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,

void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
				   ktime_t hwtstamp,
				   struct mlx5e_ptp_cq_stats *cq_stats)
				   struct mlx5e_ptpsq *ptpsq)
{
	switch (hwtstamp_type) {
	case (MLX5E_SKB_CB_CQE_HWTSTAMP):
@@ -121,7 +130,7 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
	    !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
		return;

	mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
	mlx5e_skb_cb_hwtstamp_tx(skb, ptpsq);
	memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
}

@@ -209,7 +218,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,

	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
	mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
				      hwtstamp, ptpsq->cq_stats);
				      hwtstamp, ptpsq);
	ptpsq->cq_stats->cqe++;

	mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
+1 −1
Original line number Diff line number Diff line
@@ -147,7 +147,7 @@ enum {

void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
				   ktime_t hwtstamp,
				   struct mlx5e_ptp_cq_stats *cq_stats);
				   struct mlx5e_ptpsq *ptpsq);

void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb);
#endif /* __MLX5_EN_PTP_H__ */
+11 −8
Original line number Diff line number Diff line
@@ -2027,7 +2027,7 @@ static int mlx5e_get_module_info(struct net_device *netdev,
	int size_read = 0;
	u8 data[4] = {0};

	size_read = mlx5_query_module_eeprom(dev, 0, 2, data);
	size_read = mlx5_query_module_eeprom(dev, 0, 2, data, NULL);
	if (size_read < 2)
		return -EIO;

@@ -2069,6 +2069,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
	struct mlx5_core_dev *mdev = priv->mdev;
	int offset = ee->offset;
	int size_read;
	u8 status = 0;
	int i = 0;

	if (!ee->len)
@@ -2078,15 +2079,15 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,

	while (i < ee->len) {
		size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i,
						     data + i);

						     data + i, &status);
		if (!size_read)
			/* Done reading */
			return 0;

		if (size_read < 0) {
			netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
				   __func__, size_read);
			netdev_err(netdev,
				   "%s: mlx5_query_eeprom failed:0x%x, status %u\n",
				   __func__, size_read, status);
			return size_read;
		}

@@ -2106,6 +2107,7 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
	struct mlx5_core_dev *mdev = priv->mdev;
	u8 *data = page_data->data;
	int size_read;
	u8 status = 0;
	int i = 0;

	if (!page_data->length)
@@ -2119,7 +2121,8 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
	query.page = page_data->page;
	while (i < page_data->length) {
		query.size = page_data->length - i;
		size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i);
		size_read = mlx5_query_module_eeprom_by_page(mdev, &query,
							     data + i, &status);

		/* Done reading, return how many bytes was read */
		if (!size_read)
@@ -2128,8 +2131,8 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
		if (size_read < 0) {
			NL_SET_ERR_MSG_FMT_MOD(
				extack,
				"Query module eeprom by page failed, read %u bytes, err %d",
				i, size_read);
				"Query module eeprom by page failed, read %u bytes, err %d, status %u",
				i, size_read, status);
			return size_read;
		}

Loading