Commit 48eb03dd authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Alexei Starovoitov
Browse files

xsk: Add TX timestamp and TX checksum offload support



This change actually defines the (initial) metadata layout
that should be used by AF_XDP userspace (xsk_tx_metadata).
The first field is flags which requests appropriate offloads,
followed by the offload-specific fields. The supported per-device
offloads are exported via netlink (new xsk-flags).

The offloads themselves are still implemented in a bit of a
framework-y fashion that's left from my initial kfunc attempt.
I'm introducing new xsk_tx_metadata_ops which drivers are
supposed to implement. The drivers are also supposed
to call xsk_tx_metadata_request/xsk_tx_metadata_complete in
the right places. Since xsk_tx_metadata_{request,_complete}
are static inline, we don't incur any extra overhead doing
indirect calls.

The benefit of this scheme is as follows:
- keeps all metadata layout parsing away from driver code
- makes it easy to grep and see which drivers implement what
- don't need any extra flags to maintain to keep track of what
  offloads are implemented; if the callback is implemented - the offload
  is supported (used by netlink reporting code)

Two offloads are defined right now:
1. XDP_TXMD_FLAGS_CHECKSUM: skb-style csum_start+csum_offset
2. XDP_TXMD_FLAGS_TIMESTAMP: writes TX timestamp back into metadata
   area upon completion (tx_timestamp field)

XDP_TXMD_FLAGS_TIMESTAMP is also implemented for XDP_COPY mode: it writes
SW timestamp from the skb destructor (note I'm reusing hwtstamps to pass
metadata pointer).

The struct is forward-compatible and can be extended in the future
by appending more fields.

Reviewed-by: default avatarSong Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Acked-by: default avatarJakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-3-sdf@google.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 341ac980
Loading
Loading
Loading
Loading
+18 −1
Original line number Diff line number Diff line
@@ -45,7 +45,6 @@ definitions:
  -
    type: flags
    name: xdp-rx-metadata
    render-max: true
    entries:
      -
        name: timestamp
@@ -55,6 +54,18 @@ definitions:
        name: hash
        doc:
          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
  -
    type: flags
    name: xsk-flags
    entries:
      -
        name: tx-timestamp
        doc:
          HW timestamping egress packets is supported by the driver.
      -
        name: tx-checksum
        doc:
          L3 checksum HW offload is supported by the driver.

attribute-sets:
  -
@@ -86,6 +97,11 @@ attribute-sets:
             See Documentation/networking/xdp-rx-metadata.rst for more details.
        type: u64
        enum: xdp-rx-metadata
      -
        name: xsk-features
        doc: Bitmask of enabled AF_XDP features.
        type: u64
        enum: xsk-flags

operations:
  list:
@@ -103,6 +119,7 @@ operations:
            - xdp-features
            - xdp-zc-max-segs
            - xdp-rx-metadata-features
            - xsk-features
      dump:
        reply: *dev-all
    -
+2 −0
Original line number Diff line number Diff line
@@ -1865,6 +1865,7 @@ enum netdev_stat_type {
 *	@netdev_ops:	Includes several pointers to callbacks,
 *			if one wants to override the ndo_*() functions
 *	@xdp_metadata_ops:	Includes pointers to XDP metadata callbacks.
 *	@xsk_tx_metadata_ops:	Includes pointers to AF_XDP TX metadata callbacks.
 *	@ethtool_ops:	Management operations
 *	@l3mdev_ops:	Layer 3 master device operations
 *	@ndisc_ops:	Includes callbacks for different IPv6 neighbour
@@ -2128,6 +2129,7 @@ struct net_device {
	unsigned long long	priv_flags;
	const struct net_device_ops *netdev_ops;
	const struct xdp_metadata_ops *xdp_metadata_ops;
	const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
	int			ifindex;
	unsigned short		gflags;
	unsigned short		hard_header_len;
+13 −1
Original line number Diff line number Diff line
@@ -566,6 +566,15 @@ struct ubuf_info_msgzc {
int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);

/* Preserve some data across TX submission and completion.
 *
 * Note, this state is stored in the driver. Extending the layout
 * might need some special care.
 */
struct xsk_tx_metadata_compl {
	__u64 *tx_timestamp;
};

/* This data is invariant across clones and lives at
 * the end of the header data, ie. at skb->end.
 */
@@ -578,7 +587,10 @@ struct skb_shared_info {
	/* Warning: this field is not always filled in (UFO)! */
	unsigned short	gso_segs;
	struct sk_buff	*frag_list;
	union {
		struct skb_shared_hwtstamps hwtstamps;
		struct xsk_tx_metadata_compl xsk_meta;
	};
	unsigned int	gso_type;
	u32		tskey;

+110 −0
Original line number Diff line number Diff line
@@ -93,12 +93,105 @@ struct xdp_sock {
	struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};

/*
 * AF_XDP TX metadata hooks for network devices.
 * The following hooks can be defined; unless noted otherwise, they are
 * optional and can be filled with a null pointer.
 *
 * void (*tmo_request_timestamp)(void *priv)
 *     Called when AF_XDP frame requested egress timestamp.
 *
 * u64 (*tmo_fill_timestamp)(void *priv)
 *     Called when AF_XDP frame, that had requested egress timestamp,
 *     received a completion. The hook needs to return the actual HW timestamp.
 *
 * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv)
 *     Called when AF_XDP frame requested HW checksum offload. csum_start
 *     indicates position where checksumming should start.
 *     csum_offset indicates position where checksum should be stored.
 *
 */
struct xsk_tx_metadata_ops {
	void	(*tmo_request_timestamp)(void *priv);
	u64	(*tmo_fill_timestamp)(void *priv);
	void	(*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
};

#ifdef CONFIG_XDP_SOCKETS

int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);

/**
 *  xsk_tx_metadata_to_compl - Save enough relevant metadata information
 *  to perform tx completion in the future.
 *  @meta: pointer to AF_XDP metadata area
 *  @compl: pointer to output struct xsk_tx_metadata_to_compl
 *
 *  This function should be called by the networking device when
 *  it prepares AF_XDP egress packet. The value of @compl should be stored
 *  and passed to xsk_tx_metadata_complete upon TX completion.
 */
static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
					    struct xsk_tx_metadata_compl *compl)
{
	if (!meta)
		return;

	if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
		compl->tx_timestamp = &meta->completion.tx_timestamp;
	else
		compl->tx_timestamp = NULL;
}

/**
 *  xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
 *  and call appropriate xsk_tx_metadata_ops operation.
 *  @meta: pointer to AF_XDP metadata area
 *  @ops: pointer to struct xsk_tx_metadata_ops
 *  @priv: pointer to driver-private aread
 *
 *  This function should be called by the networking device when
 *  it prepares AF_XDP egress packet.
 */
static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
					   const struct xsk_tx_metadata_ops *ops,
					   void *priv)
{
	if (!meta)
		return;

	if (ops->tmo_request_timestamp)
		if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
			ops->tmo_request_timestamp(priv);

	if (ops->tmo_request_checksum)
		if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM)
			ops->tmo_request_checksum(meta->request.csum_start,
						  meta->request.csum_offset, priv);
}

/**
 *  xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion
 *  and call appropriate xsk_tx_metadata_ops operation.
 *  @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl
 *  @ops: pointer to struct xsk_tx_metadata_ops
 *  @priv: pointer to driver-private aread
 *
 *  This function should be called by the networking device upon
 *  AF_XDP egress completion.
 */
static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
					    const struct xsk_tx_metadata_ops *ops,
					    void *priv)
{
	if (!compl)
		return;

	*compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
}

#else

static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -115,6 +208,23 @@ static inline void __xsk_map_flush(void)
{
}

static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
					    struct xsk_tx_metadata_compl *compl)
{
}

static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta,
					   const struct xsk_tx_metadata_ops *ops,
					   void *priv)
{
}

static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
					    const struct xsk_tx_metadata_ops *ops,
					    void *priv)
{
}

#endif /* CONFIG_XDP_SOCKETS */

#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
+13 −0
Original line number Diff line number Diff line
@@ -165,6 +165,14 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
	return xp_raw_get_data(pool, addr);
}

static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
{
	if (!pool->tx_metadata_len)
		return NULL;

	return xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
}

static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -324,6 +332,11 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
	return NULL;
}

static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
{
	return NULL;
}

static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
}
Loading