Commit 544070db authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mlx5-esw-qos-refactor-and-shampo-cleanup'

Tariq Toukan says:

====================
mlx5 esw qos refactor and SHAMPO cleanup

This patchset for the mlx5 core and Eth drivers consists of 3 parts.

First patch by Patrisious improves the E-switch mode change operation.

The following 6 patches by Carolina introduce further refactoring for
the QoS handling, to set the foundation for future extensions.

In the following 5 patches by Dragos, we enhance the SHAMPO datapath
flow by simplifying some logic, and cleaning up the implementation.
====================

Link: https://patch.msgid.link/20241107194357.683732-1-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 7d3f3b43 ab4219db
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -83,6 +83,7 @@ struct page_pool;
#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8)
#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
#define MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE (PAGE_SHIFT - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
@@ -624,16 +625,14 @@ struct mlx5e_dma_info {

struct mlx5e_shampo_hd {
	u32 mkey;
	struct mlx5e_dma_info *info;
	struct mlx5e_frag_page *pages;
	u16 curr_page_index;
	u32 hd_per_wq;
	u16 hd_per_wqe;
	u16 pages_per_wq;
	unsigned long *bitmap;
	u16 pi;
	u16 ci;
	__be32 key;
	u64 last_addr;
};

struct mlx5e_hw_gro_data {
+4 −11
Original line number Diff line number Diff line
@@ -350,19 +350,15 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)

	shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
					    node);
	shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
						sizeof(*shampo->info)),
				     GFP_KERNEL, node);
	shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq,
						 sizeof(*shampo->pages)),
				     GFP_KERNEL, node);
	if (!shampo->bitmap || !shampo->info || !shampo->pages)
	if (!shampo->bitmap || !shampo->pages)
		goto err_nomem;

	return 0;

err_nomem:
	kvfree(shampo->info);
	kvfree(shampo->bitmap);
	kvfree(shampo->pages);

@@ -372,7 +368,6 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
{
	kvfree(rq->mpwqe.shampo->bitmap);
	kvfree(rq->mpwqe.shampo->info);
	kvfree(rq->mpwqe.shampo->pages);
}

@@ -767,8 +762,6 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
				u32 *pool_size,
				int node)
{
	void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
	int wq_size;
	int err;

	if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
@@ -793,9 +786,9 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
		cpu_to_be32(rq->mpwqe.shampo->mkey);
	rq->mpwqe.shampo->hd_per_wqe =
		mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
	wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
	*pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
		     MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
	rq->mpwqe.shampo->pages_per_wq =
		rq->mpwqe.shampo->hd_per_wq / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
	*pool_size += rq->mpwqe.shampo->pages_per_wq;
	return 0;

err_hw_gro_data:
+61 −62
Original line number Diff line number Diff line
@@ -643,83 +643,82 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
	umr_wqe->uctrl.mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}

static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index)
{
	BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT);

	return &rq->mpwqe.shampo->pages[header_index >> MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE];
}

static u64 mlx5e_shampo_hd_offset(int header_index)
{
	return (header_index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
		MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
}

static void mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index);

static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
				     struct mlx5e_icosq *sq,
				     u16 ksm_entries, u16 index)
{
	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
	u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
	u16 pi, header_offset, err, wqe_bbs;
	u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
	u16 page_index = shampo->curr_page_index;
	struct mlx5e_frag_page *frag_page;
	u64 addr = shampo->last_addr;
	struct mlx5e_dma_info *dma_info;
	struct mlx5e_umr_wqe *umr_wqe;
	int headroom, i;
	int headroom, i = 0;

	headroom = rq->buff.headroom;
	new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
	entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
	wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries);
	wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries);
	pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
	umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
	build_ksm_umr(sq, umr_wqe, shampo->key, index, entries);

	frag_page = &shampo->pages[page_index];
	build_ksm_umr(sq, umr_wqe, shampo->key, index, ksm_entries);

	for (i = 0; i < entries; i++, index++) {
		dma_info = &shampo->info[index];
		if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index <
					 MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT))
			goto update_ksm;
		header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
			MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
		if (!(header_offset & (PAGE_SIZE - 1))) {
			page_index = (page_index + 1) & (shampo->hd_per_wq - 1);
			frag_page = &shampo->pages[page_index];
	WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1));
	while (i < ksm_entries) {
		struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
		u64 addr;

		err = mlx5e_page_alloc_fragmented(rq, frag_page);
		if (unlikely(err))
			goto err_unmap;


		addr = page_pool_get_dma_addr(frag_page->page);

			dma_info->addr = addr;
			dma_info->frag_page = frag_page;
		} else {
			dma_info->addr = addr + header_offset;
			dma_info->frag_page = frag_page;
		}
		for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) {
			header_offset = mlx5e_shampo_hd_offset(index++);

update_ksm:
		umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
			umr_wqe->inline_ksms[i++] = (struct mlx5_ksm) {
				.key = cpu_to_be32(lkey),
			.va  = cpu_to_be64(dma_info->addr + headroom),
				.va  = cpu_to_be64(addr + header_offset + headroom),
			};
		}
	}

	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
		.wqe_type	= MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
		.num_wqebbs	= wqe_bbs,
		.shampo.len	= new_entries,
		.shampo.len	= ksm_entries,
	};

	shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
	shampo->curr_page_index = page_index;
	shampo->last_addr = addr;
	shampo->pi = (shampo->pi + ksm_entries) & (shampo->hd_per_wq - 1);
	sq->pc += wqe_bbs;
	sq->doorbell_cseg = &umr_wqe->ctrl;

	return 0;

err_unmap:
	while (--i >= 0) {
		dma_info = &shampo->info[--index];
		if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
			dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
			mlx5e_page_release_fragmented(rq, dma_info->frag_page);
	while (--i) {
		--index;
		header_offset = mlx5e_shampo_hd_offset(index);
		if (!header_offset) {
			struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);

			mlx5e_page_release_fragmented(rq, frag_page);
		}
	}

	rq->stats->buff_alloc_err++;
	return err;
}
@@ -731,7 +730,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
	struct mlx5e_icosq *sq = rq->icosq;
	int i, err, max_ksm_entries, len;

	max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev);
	max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev),
				     MLX5E_SHAMPO_WQ_HEADER_PER_PAGE);
	ksm_entries = bitmap_find_window(shampo->bitmap,
					 shampo->hd_per_wqe,
					 shampo->hd_per_wq, shampo->pi);
@@ -739,8 +739,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
	if (!ksm_entries)
		return 0;

	ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
	index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
	/* pi is aligned to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE */
	index = shampo->pi;
	entries_before = shampo->hd_per_wq - index;

	if (unlikely(entries_before < ksm_entries))
@@ -851,13 +851,11 @@ static void
mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
{
	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
	u64 addr = shampo->info[header_index].addr;

	if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
		struct mlx5e_dma_info *dma_info = &shampo->info[header_index];
		struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);

		dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE);
		mlx5e_page_release_fragmented(rq, dma_info->frag_page);
		mlx5e_page_release_fragmented(rq, frag_page);
	}
	clear_bit(header_index, shampo->bitmap);
}
@@ -1211,10 +1209,10 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,

static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
{
	struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
	u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
	struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);
	u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom;

	return page_address(last_head->frag_page->page) + head_offset;
	return page_address(frag_page->page) + head_offset;
}

static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
@@ -2185,29 +2183,30 @@ static struct sk_buff *
mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
			  struct mlx5_cqe64 *cqe, u16 header_index)
{
	struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
	u16 head_offset = head->addr & (PAGE_SIZE - 1);
	struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);
	dma_addr_t page_dma_addr = page_pool_get_dma_addr(frag_page->page);
	u16 head_offset = mlx5e_shampo_hd_offset(header_index);
	dma_addr_t dma_addr = page_dma_addr + head_offset;
	u16 head_size = cqe->shampo.header_size;
	u16 rx_headroom = rq->buff.headroom;
	struct sk_buff *skb = NULL;
	void *hdr, *data;
	u32 frag_size;

	hdr		= page_address(head->frag_page->page) + head_offset;
	hdr		= page_address(frag_page->page) + head_offset;
	data		= hdr + rx_headroom;
	frag_size	= MLX5_SKB_FRAG_SZ(rx_headroom + head_size);

	if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
		/* build SKB around header */
		dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
		dma_sync_single_range_for_cpu(rq->pdev, dma_addr, 0, frag_size, rq->buff.map_dir);
		net_prefetchw(hdr);
		net_prefetch(data);
		skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);

		if (unlikely(!skb))
			return NULL;

		head->frag_page->frags++;
		frag_page->frags++;
	} else {
		/* allocate SKB and copy header for large header */
		rq->stats->gro_large_hds++;
@@ -2219,7 +2218,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
		}

		net_prefetchw(skb->data);
		mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr,
		mlx5e_copy_skb_header(rq, skb, frag_page->page, dma_addr,
				      head_offset + rx_headroom,
				      rx_headroom, head_size);
		/* skb linear part was allocated with headlen and aligned to long */
+1 −1
Original line number Diff line number Diff line
@@ -195,7 +195,7 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport)
		return;
	dl_port = vport->dl_port;

	mlx5_esw_qos_vport_update_node(vport, NULL, NULL);
	mlx5_esw_qos_vport_update_parent(vport, NULL, NULL);
	devl_rate_leaf_destroy(&dl_port->dl_port);

	devl_port_unregister(&dl_port->dl_port);
+209 −349

File changed.

Preview size limit exceeded, changes collapsed.

Loading