Commit ec706a86 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Alexei Starovoitov
Browse files

net/mlx5e: Implement AF_XDP TX timestamp and checksum offload



TX timestamp:
- requires passing clock, not sure I'm passing the correct one (from
  cq->mdev), but the timestamp value looks convincing

TX checksum:
- looks like device does packet parsing (and doesn't accept custom
  start/offset), so I'm ignoring user offsets

Cc: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-5-sdf@google.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 9276009d
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -484,10 +484,12 @@ struct mlx5e_xdp_info_fifo {

struct mlx5e_xdpsq;
struct mlx5e_xmit_data;
struct xsk_tx_metadata;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
					struct mlx5e_xmit_data *,
					int);
					int,
					struct xsk_tx_metadata *);

struct mlx5e_xdpsq {
	/* data path */
+61 −11
Original line number Diff line number Diff line
@@ -103,7 +103,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
		xdptxd->dma_addr = dma_addr;

		if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
					      mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
					      mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL)))
			return false;

		/* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
@@ -145,7 +145,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
	xdptxd->dma_addr = dma_addr;

	if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
				      mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
				      mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL)))
		return false;

	/* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
@@ -261,6 +261,37 @@ const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
	.xmo_rx_hash			= mlx5e_xdp_rx_hash,
};

struct mlx5e_xsk_tx_complete {
	struct mlx5_cqe64 *cqe;
	struct mlx5e_cq *cq;
};

static u64 mlx5e_xsk_fill_timestamp(void *_priv)
{
	struct mlx5e_xsk_tx_complete *priv = _priv;
	u64 ts;

	ts = get_cqe_ts(priv->cqe);

	if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev))
		return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts);

	return  mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts);
}

static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv)
{
	struct mlx5_wqe_eth_seg *eseg = priv;

	/* HW/FW is doing parsing, so offsets are largely ignored. */
	eseg->cs_flags |= MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
}

const struct xsk_tx_metadata_ops mlx5e_xsk_tx_metadata_ops = {
	.tmo_fill_timestamp		= mlx5e_xsk_fill_timestamp,
	.tmo_request_checksum		= mlx5e_xsk_request_checksum,
};

/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
		      struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf)
@@ -398,11 +429,11 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq

INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
		     int check_result);
		     int check_result, struct xsk_tx_metadata *meta);

INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
			   int check_result)
			   int check_result, struct xsk_tx_metadata *meta)
{
	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
	struct mlx5e_xdpsq_stats *stats = sq->stats;
@@ -420,7 +451,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
			 */
			if (unlikely(sq->mpwqe.wqe))
				mlx5e_xdp_mpwqe_complete(sq);
			return mlx5e_xmit_xdp_frame(sq, xdptxd, 0);
			return mlx5e_xmit_xdp_frame(sq, xdptxd, 0, meta);
		}
		if (!xdptxd->len) {
			skb_frag_t *frag = &xdptxdf->sinfo->frags[0];
@@ -450,6 +481,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
		 * and it's safe to complete it at any time.
		 */
		mlx5e_xdp_mpwqe_session_start(sq);
		xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, &session->wqe->eth);
	}

	mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
@@ -480,7 +512,7 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)

INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
		     int check_result)
		     int check_result, struct xsk_tx_metadata *meta)
{
	struct mlx5e_xmit_data_frags *xdptxdf =
		container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
@@ -599,6 +631,8 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
		sq->pc++;
	}

	xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg);

	sq->doorbell_cseg = cseg;

	stats->xmit++;
@@ -608,7 +642,9 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
				  struct mlx5e_xdp_wqe_info *wi,
				  u32 *xsk_frames,
				  struct xdp_frame_bulk *bq)
				  struct xdp_frame_bulk *bq,
				  struct mlx5e_cq *cq,
				  struct mlx5_cqe64 *cqe)
{
	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
	u16 i;
@@ -668,10 +704,24 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,

			break;
		}
		case MLX5E_XDP_XMIT_MODE_XSK:
		case MLX5E_XDP_XMIT_MODE_XSK: {
			/* AF_XDP send */
			struct xsk_tx_metadata_compl *compl = NULL;
			struct mlx5e_xsk_tx_complete priv = {
				.cqe = cqe,
				.cq = cq,
			};

			if (xp_tx_metadata_enabled(sq->xsk_pool)) {
				xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
				compl = &xdpi.xsk_meta;

				xsk_tx_metadata_complete(compl, &mlx5e_xsk_tx_metadata_ops, &priv);
			}

			(*xsk_frames)++;
			break;
		}
		default:
			WARN_ON_ONCE(true);
		}
@@ -720,7 +770,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)

			sqcc += wi->num_wqebbs;

			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq, cq, cqe);
		} while (!last_wqe);

		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
@@ -767,7 +817,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)

		sq->cc += wi->num_wqebbs;

		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq, NULL, NULL);
	}

	xdp_flush_frame_bulk(&bq);
@@ -840,7 +890,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
		}

		ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
				      mlx5e_xmit_xdp_frame, sq, xdptxd, 0);
				      mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL);
		if (unlikely(!ret)) {
			int j;

+8 −3
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#define __MLX5_EN_XDP_H__

#include <linux/indirect_call_wrapper.h>
#include <net/xdp_sock.h>

#include "en.h"
#include "en/txrx.h"
@@ -82,7 +83,7 @@ enum mlx5e_xdp_xmit_mode {
 *    num, page_1, page_2, ... , page_num.
 *
 * MLX5E_XDP_XMIT_MODE_XSK:
 *    none.
 *    frame.xsk_meta.
 */
#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4

@@ -97,6 +98,7 @@ union mlx5e_xdp_info {
		u8 num;
		struct page *page;
	} page;
	struct xsk_tx_metadata_compl xsk_meta;
};

struct mlx5e_xsk_param;
@@ -112,13 +114,16 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
		   u32 flags);

extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops;
extern const struct xsk_tx_metadata_ops mlx5e_xsk_tx_metadata_ops;

INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
							  struct mlx5e_xmit_data *xdptxd,
							  int check_result));
							  int check_result,
							  struct xsk_tx_metadata *meta));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
						    struct mlx5e_xmit_data *xdptxd,
						    int check_result));
						    int check_result,
						    struct xsk_tx_metadata *meta));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));

+16 −1
Original line number Diff line number Diff line
@@ -55,12 +55,16 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,

	nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi);
	if (xp_tx_metadata_enabled(sq->xsk_pool))
		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
				     (union mlx5e_xdp_info) { .xsk_meta = {} });
	sq->doorbell_cseg = &nopwqe->ctrl;
}

bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
	struct xsk_buff_pool *pool = sq->xsk_pool;
	struct xsk_tx_metadata *meta = NULL;
	union mlx5e_xdp_info xdpi;
	bool work_done = true;
	bool flush = false;
@@ -93,12 +97,13 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
		xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
		xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
		xdptxd.len = desc.len;
		meta = xsk_buff_get_metadata(pool, desc.addr);

		xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);

		ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
				      mlx5e_xmit_xdp_frame, sq, &xdptxd,
				      check_result);
				      check_result, meta);
		if (unlikely(!ret)) {
			if (sq->mpwqe.wqe)
				mlx5e_xdp_mpwqe_complete(sq);
@@ -106,6 +111,16 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
			mlx5e_xsk_tx_post_err(sq, &xdpi);
		} else {
			mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
			if (xp_tx_metadata_enabled(sq->xsk_pool)) {
				struct xsk_tx_metadata_compl compl;

				xsk_tx_metadata_to_compl(meta, &compl);
				XSK_TX_COMPL_FITS(void *);

				mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
						     (union mlx5e_xdp_info)
						     { .xsk_meta = compl });
			}
		}

		flush = true;
+1 −0
Original line number Diff line number Diff line
@@ -5164,6 +5164,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)

	netdev->netdev_ops = &mlx5e_netdev_ops;
	netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops;
	netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops;

	mlx5e_dcbnl_build_netdev(netdev);