Commit 24f90786 authored by Alexander Lobakin's avatar Alexander Lobakin Committed by Tony Nguyen
Browse files

ice: reshuffle and group Rx and Tx queue fields by cachelines



Place the fields in ice_{rx,tx}_ring used in the same pieces of
hotpath code closer to each other and use
__cacheline_group_{begin,end}_aligned() to isolate the read mostly,
read-write, and cold groups into separate cachelines similarly
to idpf.

Suggested-by: default avatarJacob Keller <jacob.e.keller@intel.com>
Reviewed-by: default avatarAleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
Signed-off-by: default avatarAlexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: default avatarPaul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent b38a418d
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -3388,7 +3388,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
				 */
				rx_rings[i].next_to_use = 0;
				rx_rings[i].next_to_clean = 0;
				rx_rings[i].next_to_alloc = 0;
				*vsi->rx_rings[i] = rx_rings[i];
			}
			kfree(rx_rings);
+0 −1
Original line number Diff line number Diff line
@@ -574,7 +574,6 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
		     PAGE_SIZE);
	memset(rx_ring->desc, 0, size);

	rx_ring->next_to_alloc = 0;
	rx_ring->next_to_clean = 0;
	rx_ring->next_to_use = 0;
}
+70 −52
Original line number Diff line number Diff line
@@ -267,34 +267,49 @@ struct ice_tstamp_ring {
} ____cacheline_internodealigned_in_smp;

struct ice_rx_ring {
	/* CL1 - 1st cacheline starts here */
	__cacheline_group_begin_aligned(read_mostly);
	void *desc;			/* Descriptor ring memory */
	struct page_pool *pp;
	struct net_device *netdev;	/* netdev ring maps to */
	struct ice_vsi *vsi;		/* Backreference to associated VSI */
	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
	u8 __iomem *tail;
	u16 q_index;			/* Queue number of ring */

	u16 count;			/* Number of descriptors */
	u16 reg_idx;			/* HW register index of the ring */
	u16 next_to_alloc;

	union {
		struct libeth_fqe *rx_fqes;
		struct xdp_buff **xdp_buf;
	};

	/* CL2 - 2nd cacheline starts here */
	struct libeth_fqe *hdr_fqes;
	u16 count;			/* Number of descriptors */
	u8 ptp_rx;

	u8 flags;
#define ICE_RX_FLAGS_CRC_STRIP_DIS	BIT(2)
#define ICE_RX_FLAGS_MULTIDEV		BIT(3)
#define ICE_RX_FLAGS_RING_GCS		BIT(4)

	u32 truesize;

	struct page_pool *hdr_pp;
	struct libeth_fqe *hdr_fqes;

	struct bpf_prog *xdp_prog;
	struct ice_tx_ring *xdp_ring;
	struct xsk_buff_pool *xsk_pool;

	/* stats structs */
	struct ice_ring_stats *ring_stats;
	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */

	u32 hdr_truesize;

	struct xdp_rxq_info xdp_rxq;
	__cacheline_group_end_aligned(read_mostly);

	__cacheline_group_begin_aligned(read_write);
	union {
		struct libeth_xdp_buff_stash xdp;
		struct libeth_xdp_buff *xsk;
	};

	/* CL3 - 3rd cacheline starts here */
	union {
		struct ice_pkt_ctx pkt_ctx;
		struct {
@@ -302,75 +317,78 @@ struct ice_rx_ring {
			__be16 vlan_proto;
		};
	};
	struct bpf_prog *xdp_prog;

	/* used in interrupt processing */
	u16 next_to_use;
	u16 next_to_clean;
	__cacheline_group_end_aligned(read_write);

	u32 hdr_truesize;
	u32 truesize;

	/* stats structs */
	struct ice_ring_stats *ring_stats;

	__cacheline_group_begin_aligned(cold);
	struct rcu_head rcu;		/* to avoid race on free */
	/* CL4 - 4th cacheline starts here */
	struct ice_vsi *vsi;		/* Backreference to associated VSI */
	struct ice_channel *ch;
	struct ice_tx_ring *xdp_ring;
	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
	struct xsk_buff_pool *xsk_pool;
	u16 rx_hdr_len;
	u16 rx_buf_len;

	dma_addr_t dma;			/* physical address of ring */
	u16 q_index;			/* Queue number of ring */
	u16 reg_idx;			/* HW register index of the ring */
	u8 dcb_tc;			/* Traffic class of ring */
	u8 ptp_rx;
#define ICE_RX_FLAGS_CRC_STRIP_DIS	BIT(2)
#define ICE_RX_FLAGS_MULTIDEV		BIT(3)
#define ICE_RX_FLAGS_RING_GCS		BIT(4)
	u8 flags;
	/* CL5 - 5th cacheline starts here */
	struct xdp_rxq_info xdp_rxq;

	u16 rx_hdr_len;
	u16 rx_buf_len;
	__cacheline_group_end_aligned(cold);
} ____cacheline_internodealigned_in_smp;

struct ice_tx_ring {
	/* CL1 - 1st cacheline starts here */
	struct ice_tx_ring *next;	/* pointer to next ring in q_vector */
	__cacheline_group_begin_aligned(read_mostly);
	void *desc;			/* Descriptor ring memory */
	struct device *dev;		/* Used for DMA mapping */
	u8 __iomem *tail;
	struct ice_tx_buf *tx_buf;

	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
	struct net_device *netdev;	/* netdev ring maps to */
	struct ice_vsi *vsi;		/* Backreference to associated VSI */
	/* CL2 - 2nd cacheline starts here */
	dma_addr_t dma;			/* physical address of ring */
	struct xsk_buff_pool *xsk_pool;
	u16 next_to_use;
	u16 next_to_clean;
	u16 q_handle;			/* Queue handle per TC */
	u16 reg_idx;			/* HW register index of the ring */

	u16 count;			/* Number of descriptors */
	u16 q_index;			/* Queue number of ring */
	u16 xdp_tx_active;

	u8 flags;
#define ICE_TX_FLAGS_RING_XDP		BIT(0)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG1	BIT(1)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG2	BIT(2)
#define ICE_TX_FLAGS_TXTIME		BIT(3)

	struct xsk_buff_pool *xsk_pool;

	/* stats structs */
	struct ice_ring_stats *ring_stats;
	/* CL3 - 3rd cacheline starts here */
	struct ice_tx_ring *next;	/* pointer to next ring in q_vector */

	struct ice_tstamp_ring *tstamp_ring;
	struct ice_ptp_tx *tx_tstamps;
	__cacheline_group_end_aligned(read_mostly);

	__cacheline_group_begin_aligned(read_write);
	u16 next_to_use;
	u16 next_to_clean;

	u16 xdp_tx_active;
	spinlock_t tx_lock;
	__cacheline_group_end_aligned(read_write);

	__cacheline_group_begin_aligned(cold);
	struct rcu_head rcu;		/* to avoid race on free */
	DECLARE_BITMAP(xps_state, ICE_TX_NBITS);	/* XPS Config State */
	struct ice_channel *ch;
	struct ice_ptp_tx *tx_tstamps;
	spinlock_t tx_lock;
	u32 txq_teid;			/* Added Tx queue TEID */
	/* CL4 - 4th cacheline starts here */
	struct ice_tstamp_ring *tstamp_ring;
#define ICE_TX_FLAGS_RING_XDP		BIT(0)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG1	BIT(1)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG2	BIT(2)
#define ICE_TX_FLAGS_TXTIME		BIT(3)
	u8 flags;

	dma_addr_t dma;			/* physical address of ring */
	u16 q_handle;			/* Queue handle per TC */
	u16 reg_idx;			/* HW register index of the ring */
	u8 dcb_tc;			/* Traffic class of ring */

	u16 quanta_prof_id;
	u32 txq_teid;			/* Added Tx queue TEID */
	__cacheline_group_end_aligned(cold);
} ____cacheline_internodealigned_in_smp;

static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
+0 −3
Original line number Diff line number Diff line
@@ -20,9 +20,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val)

	rx_ring->next_to_use = val;

	/* update next to alloc since we have filled the ring */
	rx_ring->next_to_alloc = val;

	/* QRX_TAIL will be updated with any tail value, but hardware ignores
	 * the lower 3 bits. This makes it so we only bump tail on meaningful
	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to