Commit 1ba06ca9 authored by Ido Schimmel's avatar Ido Schimmel Committed by Jakub Kicinski
Browse files

mlxsw: Switch to napi_gro_receive()

Benefit from the recent conversion of the driver to NAPI and enable GRO
support through the use of napi_gro_receive(). Pass the NAPI pointer
from the bus driver (mlxsw_pci) to the switch driver (mlxsw_spectrum)
through the skb control block where various packet metadata is already
encoded.

The main motivation is to improve forwarding performance through the use
of GRO fraglist [1]. In my testing, when the forwarding data path is
simple (routing between two ports) there is not much difference in
forwarding performance between GRO disabled and GRO enabled with
fraglist.

The improvement becomes more noticeable as the data path becomes more
complex since it is traversed less times with GRO enabled. For example,
with 10 ingress and 10 egress flower filters with different priorities
on the two ports between which routing is performed, there is an
improvement of about 140% in forwarded bandwidth.

[1] https://lore.kernel.org/netdev/20200125102645.4782-1-steffen.klassert@secunet.com/



Signed-off-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarPetr Machata <petrm@nvidia.com>
Reviewed-by: default avatarAmit Cohen <amcohen@nvidia.com>
Signed-off-by: default avatarPetr Machata <petrm@nvidia.com>
Reviewed-by: default avatarAlexander Lobakin <aleksander.lobakin@intel.com>
Link: https://patch.msgid.link/21258fe55f608ccf1ee2783a5a4534220af28903.1734354812.git.petrm@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 3a413055
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@ struct mlxsw_tx_info {
};

struct mlxsw_rx_md_info {
	struct napi_struct *napi;
	u32 cookie_index;
	u32 latency;
	u32 tx_congestion;
+3 −1
Original line number Diff line number Diff line
@@ -737,6 +737,7 @@ static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe)
}

static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
				     struct napi_struct *napi,
				     struct mlxsw_pci_queue *q,
				     u16 consumer_counter_limit,
				     enum mlxsw_pci_cqe_v cqe_v, char *cqe)
@@ -807,6 +808,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
	}

	mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe);
	mlxsw_skb_cb(skb)->rx_md_info.napi = napi;

	mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);

@@ -869,7 +871,7 @@ static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget)
			continue;
		}

		mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
		mlxsw_pci_cqe_rdq_handle(mlxsw_pci, napi, rdq,
					 wqe_counter, q->u.cq.v, cqe);

		if (++work_done == budget)
+1 −1
Original line number Diff line number Diff line
@@ -2449,7 +2449,7 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
	u64_stats_update_end(&pcpu_stats->syncp);

	skb->protocol = eth_type_trans(skb, skb->dev);
	netif_receive_skb(skb);
	napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb);
}

static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u16 local_port,
+1 −1
Original line number Diff line number Diff line
@@ -173,7 +173,7 @@ static void mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u16 local_port,
	if (err)
		return;

	netif_receive_skb(skb);
	napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb);
}

static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u16 local_port,