Commit b1c92cdf authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-wangxun-complete-ethtool-coalesce-options'

Jiawen Wu says:

====================
net: wangxun: complete ethtool coalesce options

Support to use adaptive RX coalescing. Change the default RX coalesce
usecs and limit the range of parameters for various types of devices,
according to their hardware design.
====================

Link: https://patch.msgid.link/20250821023408.53472-1-jiawenwu@trustnetic.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 2fa1369d 40477b8b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ config LIBWX
	tristate
	depends on PTP_1588_CLOCK_OPTIONAL
	select PAGE_POOL
	select DIMLIB
	help
	Common library for Wangxun(R) Ethernet drivers.

+35 −20
Original line number Diff line number Diff line
@@ -303,6 +303,11 @@ int wx_get_coalesce(struct net_device *netdev,
	else
		ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2;

	if (wx->adaptive_itr) {
		ec->use_adaptive_rx_coalesce = 1;
		ec->use_adaptive_tx_coalesce = 1;
	}

	/* if in mixed tx/rx queues per vector mode, report only rx settings */
	if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count)
		return 0;
@@ -334,19 +339,28 @@ int wx_set_coalesce(struct net_device *netdev,
			return -EOPNOTSUPP;
	}

	if (ec->tx_max_coalesced_frames_irq)
	if (ec->tx_max_coalesced_frames_irq > U16_MAX  ||
	    !ec->tx_max_coalesced_frames_irq)
		return -EINVAL;

	wx->tx_work_limit = ec->tx_max_coalesced_frames_irq;

	switch (wx->mac.type) {
	case wx_mac_sp:
		max_eitr = WX_SP_MAX_EITR;
		rx_itr_param = WX_20K_ITR;
		tx_itr_param = WX_12K_ITR;
		break;
	case wx_mac_aml:
	case wx_mac_aml40:
		max_eitr = WX_AML_MAX_EITR;
		rx_itr_param = WX_20K_ITR;
		tx_itr_param = WX_12K_ITR;
		break;
	default:
		max_eitr = WX_EM_MAX_EITR;
		rx_itr_param = WX_7K_ITR;
		tx_itr_param = WX_7K_ITR;
		break;
	}

@@ -354,35 +368,36 @@ int wx_set_coalesce(struct net_device *netdev,
	    (ec->tx_coalesce_usecs > (max_eitr >> 2)))
		return -EINVAL;

	if (ec->use_adaptive_rx_coalesce) {
		wx->adaptive_itr = true;
		wx->rx_itr_setting = 1;
		wx->tx_itr_setting = 1;
		return 0;
	}

	if (ec->rx_coalesce_usecs > 1)
		wx->rx_itr_setting = ec->rx_coalesce_usecs << 2;
	else
		wx->rx_itr_setting = ec->rx_coalesce_usecs;

	if (wx->rx_itr_setting == 1)
		rx_itr_param = WX_20K_ITR;
	else
		rx_itr_param = wx->rx_itr_setting;

	if (ec->tx_coalesce_usecs > 1)
		wx->tx_itr_setting = ec->tx_coalesce_usecs << 2;
	else
		wx->tx_itr_setting = ec->tx_coalesce_usecs;

	if (wx->tx_itr_setting == 1) {
		switch (wx->mac.type) {
		case wx_mac_sp:
		case wx_mac_aml:
		case wx_mac_aml40:
			tx_itr_param = WX_12K_ITR;
			break;
		default:
			tx_itr_param = WX_20K_ITR;
			break;
	if (wx->adaptive_itr) {
		wx->adaptive_itr = false;
		wx->rx_itr_setting = rx_itr_param;
		wx->tx_itr_setting = tx_itr_param;
	} else if (wx->rx_itr_setting == 1 || wx->tx_itr_setting == 1) {
		wx->adaptive_itr = true;
	}
	} else {

	if (wx->rx_itr_setting != 1)
		rx_itr_param = wx->rx_itr_setting;

	if (wx->tx_itr_setting != 1)
		tx_itr_param = wx->tx_itr_setting;
	}

	/* mixed Rx/Tx */
	if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count)
+102 −1
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include "wx_lib.h"
#include "wx_ptp.h"
#include "wx_hw.h"
#include "wx_vf_lib.h"

/* Lookup table mapping the HW PTYPE to the bit field for decoding */
static struct wx_dec_ptype wx_ptype_lookup[256] = {
@@ -832,6 +833,36 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector,
	return !!budget;
}

static void wx_update_rx_dim_sample(struct wx_q_vector *q_vector)
{
	struct dim_sample sample = {};

	dim_update_sample(q_vector->total_events,
			  q_vector->rx.total_packets,
			  q_vector->rx.total_bytes,
			  &sample);

	net_dim(&q_vector->rx.dim, &sample);
}

static void wx_update_tx_dim_sample(struct wx_q_vector *q_vector)
{
	struct dim_sample sample = {};

	dim_update_sample(q_vector->total_events,
			  q_vector->tx.total_packets,
			  q_vector->tx.total_bytes,
			  &sample);

	net_dim(&q_vector->tx.dim, &sample);
}

static void wx_update_dim_sample(struct wx_q_vector *q_vector)
{
	wx_update_rx_dim_sample(q_vector);
	wx_update_tx_dim_sample(q_vector);
}

/**
 * wx_poll - NAPI polling RX/TX cleanup routine
 * @napi: napi struct with our devices info in it
@@ -878,6 +909,8 @@ static int wx_poll(struct napi_struct *napi, int budget)

	/* all work done, exit the polling mode */
	if (likely(napi_complete_done(napi, work_done))) {
		if (wx->adaptive_itr)
			wx_update_dim_sample(q_vector);
		if (netif_running(wx->netdev))
			wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx));
	}
@@ -1591,6 +1624,65 @@ netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
}
EXPORT_SYMBOL(wx_xmit_frame);

static void wx_set_itr(struct wx_q_vector *q_vector)
{
	struct wx *wx = q_vector->wx;
	u32 new_itr;

	if (!wx->adaptive_itr)
		return;

	/* use the smallest value of new ITR delay calculations */
	new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
	new_itr <<= 2;

	if (new_itr != q_vector->itr) {
		/* save the algorithm value here */
		q_vector->itr = new_itr;

		if (wx->pdev->is_virtfn)
			wx_write_eitr_vf(q_vector);
		else
			wx_write_eitr(q_vector);
	}
}

static void wx_rx_dim_work(struct work_struct *work)
{
	struct dim *dim = container_of(work, struct dim, work);
	struct dim_cq_moder rx_moder;
	struct wx_ring_container *rx;
	struct wx_q_vector *q_vector;

	rx = container_of(dim, struct wx_ring_container, dim);

	rx_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
	rx->itr = rx_moder.usec;

	q_vector = container_of(rx, struct wx_q_vector, rx);
	wx_set_itr(q_vector);

	dim->state = DIM_START_MEASURE;
}

static void wx_tx_dim_work(struct work_struct *work)
{
	struct dim *dim = container_of(work, struct dim, work);
	struct dim_cq_moder tx_moder;
	struct wx_ring_container *tx;
	struct wx_q_vector *q_vector;

	tx = container_of(dim, struct wx_ring_container, dim);

	tx_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
	tx->itr = tx_moder.usec;

	q_vector = container_of(tx, struct wx_q_vector, tx);
	wx_set_itr(q_vector);

	dim->state = DIM_START_MEASURE;
}

void wx_napi_enable_all(struct wx *wx)
{
	struct wx_q_vector *q_vector;
@@ -1598,6 +1690,11 @@ void wx_napi_enable_all(struct wx *wx)

	for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
		q_vector = wx->q_vector[q_idx];

		INIT_WORK(&q_vector->rx.dim.work, wx_rx_dim_work);
		INIT_WORK(&q_vector->tx.dim.work, wx_tx_dim_work);
		q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
		q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
		napi_enable(&q_vector->napi);
	}
}
@@ -1611,6 +1708,8 @@ void wx_napi_disable_all(struct wx *wx)
	for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
		q_vector = wx->q_vector[q_idx];
		napi_disable(&q_vector->napi);
		disable_work_sync(&q_vector->rx.dim.work);
		disable_work_sync(&q_vector->tx.dim.work);
	}
}
EXPORT_SYMBOL(wx_napi_disable_all);
@@ -2197,8 +2296,10 @@ irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data)
	struct wx_q_vector *q_vector = data;

	/* EIAM disabled interrupts (on this vector) for us */
	if (q_vector->rx.ring || q_vector->tx.ring)
	if (q_vector->rx.ring || q_vector->tx.ring) {
		napi_schedule_irqoff(&q_vector->napi);
		q_vector->total_events++;
	}

	return IRQ_HANDLED;
}
+5 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/phylink.h>
#include <linux/dim.h>
#include <net/ip.h>

#define WX_NCSI_SUP                             0x8000
@@ -1033,6 +1034,7 @@ struct wx_ring_container {
	unsigned int total_packets;     /* total packets processed this int */
	u8 count;                       /* total number of rings in vector */
	u8 itr;                         /* current ITR setting for ring */
	struct dim dim;                 /* data for net_dim algorithm */
};
struct wx_ring {
	struct wx_ring *next;           /* pointer to next ring in q_vector */
@@ -1089,6 +1091,8 @@ struct wx_q_vector {
	struct napi_struct napi;
	struct rcu_head rcu;    /* to avoid race with update stats on free */

	u16 total_events;       /* number of interrupts processed */

	char name[IFNAMSIZ + 17];

	/* for dynamic allocation of rings associated with this q_vector */
@@ -1268,6 +1272,7 @@ struct wx {
	int num_rx_queues;
	u16 rx_itr_setting;
	u16 rx_work_limit;
	bool adaptive_itr;

	int num_q_vectors;      /* current number of q_vectors for device */
	int max_q_vectors;      /* upper limit of q_vectors for device */
+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
#include "wx_vf.h"
#include "wx_vf_lib.h"

static void wx_write_eitr_vf(struct wx_q_vector *q_vector)
void wx_write_eitr_vf(struct wx_q_vector *q_vector)
{
	struct wx *wx = q_vector->wx;
	int v_idx = q_vector->v_idx;
Loading