Commit 69776921 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx5-genl-queue-stats'

Joe Damato says:

====================
mlx5: Add netdev-genl queue stats

Welcome to v5.

Switched from RFC to just a v5, because I think this is pretty close.
Minor changes from v4 summarized below in the changelog.

Note that my NIC does not seem to support PTP and I couldn't get the
mlnx-tools mlnx_qos script to work, so I was only able to test the
following cases:

- device up at boot
- adjusting queue counts
- device down (e.g. ip link set dev eth4 down)

Please see the commit message of patch 2/2 for more details on output
and test cases.

rfcv4 thread:
  https://lore.kernel.org/linux-kernel/20240604004629.299699-1-jdamato@fastly.com/T/



rfcv4 -> v5:
 - Patch 1/2: change variable name 'mlx5e_qid' to 'txq_ix'.
 - Patch 2/2:
    - remove logic in mlx5e_get_queue_stats_rx for PTP. PTP RX are
      always reported in base.
    - report PTP TX in mlx5e_get_base_stats only if:
      - PTP has ever been opened, and
      - either PTP is NULL (closed) or the MLX5E_PTP_STATE_TX bit in its
        state is not set

    Otherwise, PTP TX will be reported when the txq_ix is passed into
    mlx5e_get_queue_stats_tx

rfcv3 -> rfcv4:
 - Patch 1/2 now creates a mapping (priv->txq2sq_stats) which maps txq
   indices to sq_stats structures so stats can be accessed directly.
   This mapping is kept up to date along side txq2sq.

 - Patch 2/2:
   - All mutex_lock/unlock on state_lock has been dropped.
   - mlx5e_get_queue_stats_rx now uses ASSERT_RTNL() and has a special
     case for PTP. If PTP was ever opened, is currently opened, and the
     channel index matches, stats for PTP RX are output.
   - mlx5e_get_queue_stats_tx rewritten to use priv->txq2sq_stats. No
     corner cases are needed here because any txq idx (passed in as i)
     will have an up to date mapping in priv->txq2sq_stats.
   - mlx5e_get_base_stats:
     - in the RX case:
       - iterates from [params.num_channels, stats_nch) collecting
         stats.
       - if ptp was ever opened but is currently closed, add the PTP
         stats.
     - in the TX case:
       - handle 2 cases:
         - the channel is available, so sum only the unavailable TCs
           [mlx5e_get_dcb_num_tc, max_opened_tc).
         - the channel is unavailable, so sum all TCs [0, max_opened_tc).
       - if ptp was ever opened but is currently closed, add the PTP
         sq stats.

v2 -> rfcv3:
 - Added patch 1/2 which creates some helpers for computing the txq_ix
   and ch_ix/tc_ix.

 - Patch 2/2 modified in several ways:
   - Fixed variable declarations in mlx5e_get_queue_stats_rx to be at
     the start of the function.
   - mlx5e_get_queue_stats_tx rewritten to access sq stats directly by
     using the helpers added in the previous patch.
   - mlx5e_get_base_stats modified in several ways:
     - Took the state_lock when accessing priv->channels.
     - For the base RX stats, code was simplified to call
       mlx5e_get_queue_stats_rx instead of repeating the same code.
     - For the base TX stats, I attempted to implement what I think
       Tariq suggested in the previous thread:
         - for available channels, only unavailable TC stats are summed
	 - for unavailable channels, all stats for TCs up to
	   max_opened_tc are summed.

v1 - > v2:
  - Essentially a full rewrite after comments from Jakub, Tariq, and
    Zhu.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 934c2999 7b66ae53
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -867,6 +867,8 @@ struct mlx5e_priv {
	/* priv data path fields - start */
	struct mlx5e_selq selq;
	struct mlx5e_txqsq **txq2sq;
	struct mlx5e_sq_stats **txq2sq_stats;

#ifdef CONFIG_MLX5_CORE_EN_DCB
	struct mlx5e_dcbx_dp       dcbx_dp;
#endif
+11 −2
Original line number Diff line number Diff line
@@ -170,6 +170,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
	mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));

	priv->txq2sq[qid] = sq;
	priv->txq2sq_stats[qid] = sq->stats;

	/* Make the change to txq2sq visible before the queue is started.
	 * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@@ -186,6 +187,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
{
	struct mlx5e_txqsq *sq;
	u16 txq_ix;

	sq = mlx5e_get_qos_sq(priv, qid);
	if (!sq) /* Handle the case when the SQ failed to open. */
@@ -194,7 +196,10 @@ void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
	qos_dbg(sq->mdev, "Deactivate QoS SQ qid %u\n", qid);
	mlx5e_deactivate_txqsq(sq);

	priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
	txq_ix = mlx5e_qid_from_qos(&priv->channels, qid);

	priv->txq2sq[txq_ix] = NULL;
	priv->txq2sq_stats[txq_ix] = NULL;

	/* Make the change to txq2sq visible before the queue is started again.
	 * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@@ -325,6 +330,7 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
{
	struct mlx5e_params *params = &c->priv->channels.params;
	struct mlx5e_txqsq __rcu **qos_sqs;
	u16 txq_ix;
	int i;

	qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
@@ -342,8 +348,11 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
		qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
		mlx5e_deactivate_txqsq(sq);

		txq_ix = mlx5e_qid_from_qos(&c->priv->channels, qid);

		/* The queue is disabled, no synchronization with datapath is needed. */
		c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
		c->priv->txq2sq[txq_ix] = NULL;
		c->priv->txq2sq_stats[txq_ix] = NULL;
	}
}

+142 −1
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <linux/debugfs.h>
#include <linux/if_bridge.h>
#include <linux/filter.h>
#include <net/netdev_queues.h>
#include <net/page_pool/types.h>
#include <net/pkt_sched.h>
#include <net/xdp_sock_drv.h>
@@ -3125,6 +3126,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
			struct mlx5e_txqsq *sq = &c->sq[tc];

			priv->txq2sq[sq->txq_ix] = sq;
			priv->txq2sq_stats[sq->txq_ix] = sq->stats;
		}
	}

@@ -3139,6 +3141,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
		struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;

		priv->txq2sq[sq->txq_ix] = sq;
		priv->txq2sq_stats[sq->txq_ix] = sq->stats;
	}

out:
@@ -5296,6 +5299,136 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
	return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
}

static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i,
				     struct netdev_queue_stats_rx *stats)
{
	struct mlx5e_priv *priv = netdev_priv(dev);
	struct mlx5e_channel_stats *channel_stats;
	struct mlx5e_rq_stats *xskrq_stats;
	struct mlx5e_rq_stats *rq_stats;

	ASSERT_RTNL();
	if (mlx5e_is_uplink_rep(priv))
		return;

	channel_stats = priv->channel_stats[i];
	xskrq_stats = &channel_stats->xskrq;
	rq_stats = &channel_stats->rq;

	stats->packets = rq_stats->packets + xskrq_stats->packets;
	stats->bytes = rq_stats->bytes + xskrq_stats->bytes;
	stats->alloc_fail = rq_stats->buff_alloc_err +
			    xskrq_stats->buff_alloc_err;
}

static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i,
				     struct netdev_queue_stats_tx *stats)
{
	struct mlx5e_priv *priv = netdev_priv(dev);
	struct mlx5e_sq_stats *sq_stats;

	ASSERT_RTNL();
	/* no special case needed for ptp htb etc since txq2sq_stats is kept up
	 * to date for active sq_stats, otherwise get_base_stats takes care of
	 * inactive sqs.
	 */
	sq_stats = priv->txq2sq_stats[i];
	stats->packets = sq_stats->packets;
	stats->bytes = sq_stats->bytes;
}

static void mlx5e_get_base_stats(struct net_device *dev,
				 struct netdev_queue_stats_rx *rx,
				 struct netdev_queue_stats_tx *tx)
{
	struct mlx5e_priv *priv = netdev_priv(dev);
	struct mlx5e_ptp *ptp_channel;
	int i, tc;

	ASSERT_RTNL();
	if (!mlx5e_is_uplink_rep(priv)) {
		rx->packets = 0;
		rx->bytes = 0;
		rx->alloc_fail = 0;

		for (i = priv->channels.params.num_channels; i < priv->stats_nch; i++) {
			struct netdev_queue_stats_rx rx_i = {0};

			mlx5e_get_queue_stats_rx(dev, i, &rx_i);

			rx->packets += rx_i.packets;
			rx->bytes += rx_i.bytes;
			rx->alloc_fail += rx_i.alloc_fail;
		}

		/* always report PTP RX stats from base as there is no
		 * corresponding channel to report them under in
		 * mlx5e_get_queue_stats_rx.
		 */
		if (priv->rx_ptp_opened) {
			struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;

			rx->packets += rq_stats->packets;
			rx->bytes += rq_stats->bytes;
		}
	}

	tx->packets = 0;
	tx->bytes = 0;

	for (i = 0; i < priv->stats_nch; i++) {
		struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];

		/* handle two cases:
		 *
		 *  1. channels which are active. In this case,
		 *     report only deactivated TCs on these channels.
		 *
		 *  2. channels which were deactivated
		 *     (i > priv->channels.params.num_channels)
		 *     must have all of their TCs [0 .. priv->max_opened_tc)
		 *     examined because deactivated channels will not be in the
		 *     range of [0..real_num_tx_queues) and will not have their
		 *     stats reported by mlx5e_get_queue_stats_tx.
		 */
		if (i < priv->channels.params.num_channels)
			tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
		else
			tc = 0;

		for (; tc < priv->max_opened_tc; tc++) {
			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[tc];

			tx->packets += sq_stats->packets;
			tx->bytes += sq_stats->bytes;
		}
	}

	/* if PTP TX was opened at some point and has since either:
	 *    -  been shutdown and set to NULL, or
	 *    -  simply disabled (bit unset)
	 *
	 * report stats directly from the ptp_stats structures as these queues
	 * are now unavailable and there is no txq index to retrieve these
	 * stats via calls to mlx5e_get_queue_stats_tx.
	 */
	ptp_channel = priv->channels.ptp;
	if (priv->tx_ptp_opened && (!ptp_channel || !test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state))) {
		for (tc = 0; tc < priv->max_opened_tc; tc++) {
			struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[tc];

			tx->packets += sq_stats->packets;
			tx->bytes   += sq_stats->bytes;
		}
	}
}

static const struct netdev_stat_ops mlx5e_stat_ops = {
	.get_queue_stats_rx  = mlx5e_get_queue_stats_rx,
	.get_queue_stats_tx  = mlx5e_get_queue_stats_tx,
	.get_base_stats      = mlx5e_get_base_stats,
};

static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -5313,6 +5446,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)

	netdev->watchdog_timeo    = 15 * HZ;

	netdev->stat_ops	  = &mlx5e_stat_ops;
	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;

	netdev->vlan_features    |= NETIF_F_SG;
@@ -5848,9 +5982,13 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
	if (!priv->txq2sq)
		goto err_destroy_workqueue;

	priv->txq2sq_stats = kcalloc_node(num_txqs, sizeof(*priv->txq2sq_stats), GFP_KERNEL, node);
	if (!priv->txq2sq_stats)
		goto err_free_txq2sq;

	priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
	if (!priv->tx_rates)
		goto err_free_txq2sq;
		goto err_free_txq2sq_stats;

	priv->channel_stats =
		kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
@@ -5861,6 +5999,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,

err_free_tx_rates:
	kfree(priv->tx_rates);
err_free_txq2sq_stats:
	kfree(priv->txq2sq_stats);
err_free_txq2sq:
	kfree(priv->txq2sq);
err_destroy_workqueue:
@@ -5884,6 +6024,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
		kvfree(priv->channel_stats[i]);
	kfree(priv->channel_stats);
	kfree(priv->tx_rates);
	kfree(priv->txq2sq_stats);
	kfree(priv->txq2sq);
	destroy_workqueue(priv->wq);
	mlx5e_selq_cleanup(&priv->selq);