Commit 2ea9b94c authored by Marc Kleine-Budde's avatar Marc Kleine-Budde
Browse files

Merge patch series "can: m_can: Optimizations for m_can/tcan part 2"

Markus Schneider-Pargmann <msp@baylibre.com> says:

The series implements many small and bigger throughput improvements and
adds rx/tx coalescing at the end.

Changes in v7:
- Rebased to v6.8-rc1
- Fixed NULL pointer dereference in m_can_clean() on am62 that happened
  when doing ip link up, ip link down, ip link up
- Fixed a racecondition on am62 observed with high throughput tests.
  netdev_completed_queue() was called before netdev_sent_queue() as the
  interrupt was processed so fast. netdev_sent_queue() is now reported
  before the actual sent is done.
- Fixed an initializing issue on am62 where active interrupts are
  getting lost between runs. Fixed by resetting cdev->active_interrupts
  in m_can_disable_all_interrupts()
- Removed m_can_start_fast_xmit() because of a reordering of operations
  due to above mentioned race condition

Changes in v6:
- Rebased to v6.6-rc2
- Added two small changes for the newly integrated polling feature
- Reuse the polling hrtimer for coalescing as the timer used for
  coalescing has a similar purpose as the one for polling. Also polling
  and coalescing will never be active at the same time.

Changes in v5:
- Add back parenthesis in m_can_set_coalesce(). This will make
  checkpatch unhappy but gcc happy.
- Remove unused fifo_header variable in m_can_tx_handler().
- Rebased to v6.5-rc1

Changes in v4:
- Create and use struct m_can_fifo_element in m_can_tx_handler
- Fix memcpy_and_pad to copy the full buffer
- Fixed a few checkpatch warnings
- Change putidx to be unsigned
- Print hard_xmit error only once when TX FIFO is full

Changes in v3:
- Remove parenthesis in error messages
- Use memcpy_and_pad for buffer copy in 'can: m_can: Write transmit
  header and data in one transaction'.
- Replace spin_lock with spin_lock_irqsave. I got a report of a
  interrupt that was calling start_xmit just after the netqueue was
  woken up before the locked region was exited. spin_lock_irqsave should
  fix this. I attached the full stack at the end of the mail if someone
  wants to know.
- Rebased to v6.3-rc1.
- Removed tcan4x5x patches from this series.

Changes in v2:
- Rebased on v6.2-rc5
- Fixed missing/broken accounting for non peripheral m_can devices.

previous versions:
v1 - https://lore.kernel.org/lkml/20221221152537.751564-1-msp@baylibre.com
v2 - https://lore.kernel.org/lkml/20230125195059.630377-1-msp@baylibre.com
v3 - https://lore.kernel.org/lkml/20230315110546.2518305-1-msp@baylibre.com
v4 - https://lore.kernel.org/lkml/20230621092350.3130866-1-msp@baylibre.com
v5 - https://lore.kernel.org/lkml/20230718075708.958094-1-msp@baylibre.com
v6 - https://lore.kernel.org/lkml/20230929141304.3934380-1-msp@baylibre.com

Link: https://lore.kernel.org/all/20240207093220.2681425-1-msp@baylibre.com


Signed-off-by: default avatarMarc Kleine-Budde <mkl@pengutronix.de>
parents dbd86e47 c306c387
Loading
Loading
Loading
Loading
+407 −144
Original line number Diff line number Diff line
@@ -255,6 +255,7 @@ enum m_can_reg {
#define TXESC_TBDS_64B		0x7

/* Tx Event FIFO Configuration (TXEFC) */
#define TXEFC_EFWM_MASK		GENMASK(29, 24)
#define TXEFC_EFS_MASK		GENMASK(21, 16)

/* Tx Event FIFO Status (TXEFS) */
@@ -320,6 +321,12 @@ struct id_and_dlc {
	u32 dlc;
};

struct m_can_fifo_element {
	u32 id;
	u32 dlc;
	u8 data[CANFD_MAX_DLEN];
};

static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg)
{
	return cdev->ops->read_reg(cdev, reg);
@@ -372,16 +379,6 @@ m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset, u32 *val)
	return cdev->ops->read_fifo(cdev, addr_offset, val, 1);
}

static inline bool _m_can_tx_fifo_full(u32 txfqs)
{
	return !!(txfqs & TXFQS_TFQF);
}

static inline bool m_can_tx_fifo_full(struct m_can_classdev *cdev)
{
	return _m_can_tx_fifo_full(m_can_read(cdev, M_CAN_TXFQS));
}

static void m_can_config_endisable(struct m_can_classdev *cdev, bool enable)
{
	u32 cccr = m_can_read(cdev, M_CAN_CCCR);
@@ -416,15 +413,48 @@ static void m_can_config_endisable(struct m_can_classdev *cdev, bool enable)
	}
}

static void m_can_interrupt_enable(struct m_can_classdev *cdev, u32 interrupts)
{
	if (cdev->active_interrupts == interrupts)
		return;
	cdev->ops->write_reg(cdev, M_CAN_IE, interrupts);
	cdev->active_interrupts = interrupts;
}

static void m_can_coalescing_disable(struct m_can_classdev *cdev)
{
	u32 new_interrupts = cdev->active_interrupts | IR_RF0N | IR_TEFN;

	if (!cdev->net->irq)
		return;

	hrtimer_cancel(&cdev->hrtimer);
	m_can_interrupt_enable(cdev, new_interrupts);
}

static inline void m_can_enable_all_interrupts(struct m_can_classdev *cdev)
{
	if (!cdev->net->irq) {
		dev_dbg(cdev->dev, "Start hrtimer\n");
		hrtimer_start(&cdev->hrtimer,
			      ms_to_ktime(HRTIMER_POLL_INTERVAL_MS),
			      HRTIMER_MODE_REL_PINNED);
	}

	/* Only interrupt line 0 is used in this driver */
	m_can_write(cdev, M_CAN_ILE, ILE_EINT0);
}

static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev)
{
	m_can_coalescing_disable(cdev);
	m_can_write(cdev, M_CAN_ILE, 0x0);
	cdev->active_interrupts = 0x0;

	if (!cdev->net->irq) {
		dev_dbg(cdev->dev, "Stop hrtimer\n");
		hrtimer_cancel(&cdev->hrtimer);
	}
}

/* Retrieve internal timestamp counter from TSCV.TSC, and shift it to 32-bit
@@ -444,20 +474,28 @@ static u32 m_can_get_timestamp(struct m_can_classdev *cdev)
static void m_can_clean(struct net_device *net)
{
	struct m_can_classdev *cdev = netdev_priv(net);
	unsigned long irqflags;

	if (cdev->tx_skb) {
		int putidx = 0;
	if (cdev->tx_ops) {
		for (int i = 0; i != cdev->tx_fifo_size; ++i) {
			if (!cdev->tx_ops[i].skb)
				continue;

			net->stats.tx_errors++;
		if (cdev->version > 30)
			putidx = FIELD_GET(TXFQS_TFQPI_MASK,
					   m_can_read(cdev, M_CAN_TXFQS));

		can_free_echo_skb(cdev->net, putidx, NULL);
		cdev->tx_skb = NULL;
			cdev->tx_ops[i].skb = NULL;
		}
	}

	for (int i = 0; i != cdev->can.echo_skb_max; ++i)
		can_free_echo_skb(cdev->net, i, NULL);

	netdev_reset_queue(cdev->net);

	spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags);
	cdev->tx_fifo_in_flight = 0;
	spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
}

/* For peripherals, pass skb to rx-offload, which will push skb from
 * napi. For non-peripherals, RX is done in napi already, so push
 * directly. timestamp is used to ensure good skb ordering in
@@ -1007,23 +1045,60 @@ static int m_can_poll(struct napi_struct *napi, int quota)
 * echo. timestamp is used for peripherals to ensure correct ordering
 * by rx-offload, and is ignored for non-peripherals.
 */
static void m_can_tx_update_stats(struct m_can_classdev *cdev,
				  unsigned int msg_mark,
				  u32 timestamp)
static unsigned int m_can_tx_update_stats(struct m_can_classdev *cdev,
					  unsigned int msg_mark, u32 timestamp)
{
	struct net_device *dev = cdev->net;
	struct net_device_stats *stats = &dev->stats;
	unsigned int frame_len;

	if (cdev->is_peripheral)
		stats->tx_bytes +=
			can_rx_offload_get_echo_skb_queue_timestamp(&cdev->offload,
								    msg_mark,
								    timestamp,
								    NULL);
								    &frame_len);
	else
		stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL);
		stats->tx_bytes += can_get_echo_skb(dev, msg_mark, &frame_len);

	stats->tx_packets++;

	return frame_len;
}

static void m_can_finish_tx(struct m_can_classdev *cdev, int transmitted,
			    unsigned int transmitted_frame_len)
{
	unsigned long irqflags;

	netdev_completed_queue(cdev->net, transmitted, transmitted_frame_len);

	spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags);
	if (cdev->tx_fifo_in_flight >= cdev->tx_fifo_size && transmitted > 0)
		netif_wake_queue(cdev->net);
	cdev->tx_fifo_in_flight -= transmitted;
	spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
}

static netdev_tx_t m_can_start_tx(struct m_can_classdev *cdev)
{
	unsigned long irqflags;
	int tx_fifo_in_flight;

	spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags);
	tx_fifo_in_flight = cdev->tx_fifo_in_flight + 1;
	if (tx_fifo_in_flight >= cdev->tx_fifo_size) {
		netif_stop_queue(cdev->net);
		if (tx_fifo_in_flight > cdev->tx_fifo_size) {
			netdev_err_once(cdev->net, "hard_xmit called while TX FIFO full\n");
			spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);
			return NETDEV_TX_BUSY;
		}
	}
	cdev->tx_fifo_in_flight = tx_fifo_in_flight;
	spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags);

	return NETDEV_TX_OK;
}

static int m_can_echo_tx_event(struct net_device *dev)
@@ -1035,6 +1110,8 @@ static int m_can_echo_tx_event(struct net_device *dev)
	int i = 0;
	int err = 0;
	unsigned int msg_mark;
	int processed = 0;
	unsigned int processed_frame_len = 0;

	struct m_can_classdev *cdev = netdev_priv(dev);

@@ -1063,25 +1140,62 @@ static int m_can_echo_tx_event(struct net_device *dev)
		fgi = (++fgi >= cdev->mcfg[MRAM_TXE].num ? 0 : fgi);

		/* update stats */
		m_can_tx_update_stats(cdev, msg_mark, timestamp);
		processed_frame_len += m_can_tx_update_stats(cdev, msg_mark,
							     timestamp);

		++processed;
	}

	if (ack_fgi != -1)
		m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK,
							  ack_fgi));

	m_can_finish_tx(cdev, processed, processed_frame_len);

	return err;
}

static void m_can_coalescing_update(struct m_can_classdev *cdev, u32 ir)
{
	u32 new_interrupts = cdev->active_interrupts;
	bool enable_rx_timer = false;
	bool enable_tx_timer = false;

	if (!cdev->net->irq)
		return;

	if (cdev->rx_coalesce_usecs_irq > 0 && (ir & (IR_RF0N | IR_RF0W))) {
		enable_rx_timer = true;
		new_interrupts &= ~IR_RF0N;
	}
	if (cdev->tx_coalesce_usecs_irq > 0 && (ir & (IR_TEFN | IR_TEFW))) {
		enable_tx_timer = true;
		new_interrupts &= ~IR_TEFN;
	}
	if (!enable_rx_timer && !hrtimer_active(&cdev->hrtimer))
		new_interrupts |= IR_RF0N;
	if (!enable_tx_timer && !hrtimer_active(&cdev->hrtimer))
		new_interrupts |= IR_TEFN;

	m_can_interrupt_enable(cdev, new_interrupts);
	if (enable_rx_timer | enable_tx_timer)
		hrtimer_start(&cdev->hrtimer, cdev->irq_timer_wait,
			      HRTIMER_MODE_REL);
}

static irqreturn_t m_can_isr(int irq, void *dev_id)
{
	struct net_device *dev = (struct net_device *)dev_id;
	struct m_can_classdev *cdev = netdev_priv(dev);
	u32 ir;

	if (pm_runtime_suspended(cdev->dev))
	if (pm_runtime_suspended(cdev->dev)) {
		m_can_coalescing_disable(cdev);
		return IRQ_NONE;
	}

	ir = m_can_read(cdev, M_CAN_IR);
	m_can_coalescing_update(cdev, ir);
	if (!ir)
		return IRQ_NONE;

@@ -1096,12 +1210,16 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
	 * - state change IRQ
	 * - bus error IRQ and bus error reporting
	 */
	if ((ir & IR_RF0N) || (ir & IR_ERR_ALL_30X)) {
	if (ir & (IR_RF0N | IR_RF0W | IR_ERR_ALL_30X)) {
		cdev->irqstatus = ir;
		if (!cdev->is_peripheral) {
			m_can_disable_all_interrupts(cdev);
			napi_schedule(&cdev->napi);
		} else if (m_can_rx_peripheral(dev, ir) < 0) {
		} else {
			int pkts;

			pkts = m_can_rx_peripheral(dev, ir);
			if (pkts < 0)
				goto out_fail;
		}
	}
@@ -1110,21 +1228,18 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
		if (ir & IR_TC) {
			/* Transmission Complete Interrupt*/
			u32 timestamp = 0;
			unsigned int frame_len;

			if (cdev->is_peripheral)
				timestamp = m_can_get_timestamp(cdev);
			m_can_tx_update_stats(cdev, 0, timestamp);
			netif_wake_queue(dev);
			frame_len = m_can_tx_update_stats(cdev, 0, timestamp);
			m_can_finish_tx(cdev, 1, frame_len);
		}
	} else  {
		if (ir & IR_TEFN) {
		if (ir & (IR_TEFN | IR_TEFW)) {
			/* New TX FIFO Element arrived */
			if (m_can_echo_tx_event(dev) != 0)
				goto out_fail;

			if (netif_queue_stopped(dev) &&
			    !m_can_tx_fifo_full(cdev))
				netif_wake_queue(dev);
		}
	}

@@ -1138,6 +1253,15 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
	return IRQ_HANDLED;
}

static enum hrtimer_restart m_can_coalescing_timer(struct hrtimer *timer)
{
	struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer);

	irq_wake_thread(cdev->net->irq, cdev->net);

	return HRTIMER_NORESTART;
}

static const struct can_bittiming_const m_can_bittiming_const_30X = {
	.name = KBUILD_MODNAME,
	.tseg1_min = 2,		/* Time segment 1 = prop_seg + phase_seg1 */
@@ -1276,9 +1400,8 @@ static int m_can_chip_config(struct net_device *dev)
	}

	/* Disable unused interrupts */
	interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TEFW | IR_TFE |
			IR_TCF | IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N |
			IR_RF0F | IR_RF0W);
	interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TFE | IR_TCF |
			IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N | IR_RF0F);

	m_can_config_endisable(cdev, true);

@@ -1315,6 +1438,8 @@ static int m_can_chip_config(struct net_device *dev)
	} else {
		/* Full TX Event FIFO is used */
		m_can_write(cdev, M_CAN_TXEFC,
			    FIELD_PREP(TXEFC_EFWM_MASK,
				       cdev->tx_max_coalesced_frames_irq) |
			    FIELD_PREP(TXEFC_EFS_MASK,
				       cdev->mcfg[MRAM_TXE].num) |
			    cdev->mcfg[MRAM_TXE].off);
@@ -1322,6 +1447,7 @@ static int m_can_chip_config(struct net_device *dev)

	/* rx fifo configuration, blocking mode, fifo size 1 */
	m_can_write(cdev, M_CAN_RXF0C,
		    FIELD_PREP(RXFC_FWM_MASK, cdev->rx_max_coalesced_frames_irq) |
		    FIELD_PREP(RXFC_FS_MASK, cdev->mcfg[MRAM_RXF0].num) |
		    cdev->mcfg[MRAM_RXF0].off);

@@ -1380,7 +1506,7 @@ static int m_can_chip_config(struct net_device *dev)
		else
			interrupts &= ~(IR_ERR_LEC_31X);
	}
	m_can_write(cdev, M_CAN_IE, interrupts);
	m_can_interrupt_enable(cdev, interrupts);

	/* route all interrupts to INT0 */
	m_can_write(cdev, M_CAN_ILS, ILS_ALL_INT0);
@@ -1413,15 +1539,16 @@ static int m_can_start(struct net_device *dev)
	if (ret)
		return ret;

	netdev_queue_set_dql_min_limit(netdev_get_tx_queue(cdev->net, 0),
				       cdev->tx_max_coalesced_frames);

	cdev->can.state = CAN_STATE_ERROR_ACTIVE;

	m_can_enable_all_interrupts(cdev);

	if (!dev->irq) {
		dev_dbg(cdev->dev, "Start hrtimer\n");
		hrtimer_start(&cdev->hrtimer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS),
			      HRTIMER_MODE_REL_PINNED);
	}
	if (cdev->version > 30)
		cdev->tx_fifo_putidx = FIELD_GET(TXFQS_TFQPI_MASK,
						 m_can_read(cdev, M_CAN_TXFQS));

	return 0;
}
@@ -1577,11 +1704,6 @@ static void m_can_stop(struct net_device *dev)
{
	struct m_can_classdev *cdev = netdev_priv(dev);

	if (!dev->irq) {
		dev_dbg(cdev->dev, "Stop hrtimer\n");
		hrtimer_cancel(&cdev->hrtimer);
	}

	/* disable all interrupts */
	m_can_disable_all_interrupts(cdev);

@@ -1605,8 +1727,9 @@ static int m_can_close(struct net_device *dev)
	m_can_clk_stop(cdev);
	free_irq(dev->irq, dev);

	m_can_clean(dev);

	if (cdev->is_peripheral) {
		cdev->tx_skb = NULL;
		destroy_workqueue(cdev->tx_wq);
		cdev->tx_wq = NULL;
		can_rx_offload_disable(&cdev->offload);
@@ -1619,57 +1742,42 @@ static int m_can_close(struct net_device *dev)
	return 0;
}

static int m_can_next_echo_skb_occupied(struct net_device *dev, int putidx)
static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev,
				    struct sk_buff *skb)
{
	struct m_can_classdev *cdev = netdev_priv(dev);
	/*get wrap around for loopback skb index */
	unsigned int wrap = cdev->can.echo_skb_max;
	int next_idx;

	/* calculate next index */
	next_idx = (++putidx >= wrap ? 0 : putidx);

	/* check if occupied */
	return !!cdev->can.echo_skb[next_idx];
}

static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
{
	struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data;
	struct canfd_frame *cf = (struct canfd_frame *)skb->data;
	u8 len_padded = DIV_ROUND_UP(cf->len, 4);
	struct m_can_fifo_element fifo_element;
	struct net_device *dev = cdev->net;
	struct sk_buff *skb = cdev->tx_skb;
	struct id_and_dlc fifo_header;
	u32 cccr, fdflags;
	u32 txfqs;
	int err;
	int putidx;

	cdev->tx_skb = NULL;
	u32 putidx;
	unsigned int frame_len = can_skb_get_frame_len(skb);

	/* Generate ID field for TX buffer Element */
	/* Common to all supported M_CAN versions */
	if (cf->can_id & CAN_EFF_FLAG) {
		fifo_header.id = cf->can_id & CAN_EFF_MASK;
		fifo_header.id |= TX_BUF_XTD;
		fifo_element.id = cf->can_id & CAN_EFF_MASK;
		fifo_element.id |= TX_BUF_XTD;
	} else {
		fifo_header.id = ((cf->can_id & CAN_SFF_MASK) << 18);
		fifo_element.id = ((cf->can_id & CAN_SFF_MASK) << 18);
	}

	if (cf->can_id & CAN_RTR_FLAG)
		fifo_header.id |= TX_BUF_RTR;
		fifo_element.id |= TX_BUF_RTR;

	if (cdev->version == 30) {
		netif_stop_queue(dev);

		fifo_header.dlc = can_fd_len2dlc(cf->len) << 16;
		fifo_element.dlc = can_fd_len2dlc(cf->len) << 16;

		/* Write the frame ID, DLC, and payload to the FIFO element. */
		err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_header, 2);
		err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_element, 2);
		if (err)
			goto out_fail;

		err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_DATA,
				       cf->data, DIV_ROUND_UP(cf->len, 4));
				       cf->data, len_padded);
		if (err)
			goto out_fail;

@@ -1690,33 +1798,15 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
		}
		m_can_write(cdev, M_CAN_TXBTIE, 0x1);

		can_put_echo_skb(skb, dev, 0, 0);
		can_put_echo_skb(skb, dev, 0, frame_len);

		m_can_write(cdev, M_CAN_TXBAR, 0x1);
		/* End of xmit function for version 3.0.x */
	} else {
		/* Transmit routine for version >= v3.1.x */

		txfqs = m_can_read(cdev, M_CAN_TXFQS);

		/* Check if FIFO full */
		if (_m_can_tx_fifo_full(txfqs)) {
			/* This shouldn't happen */
			netif_stop_queue(dev);
			netdev_warn(dev,
				    "TX queue active although FIFO is full.");

			if (cdev->is_peripheral) {
				kfree_skb(skb);
				dev->stats.tx_dropped++;
				return NETDEV_TX_OK;
			} else {
				return NETDEV_TX_BUSY;
			}
		}

		/* get put index for frame */
		putidx = FIELD_GET(TXFQS_TFQPI_MASK, txfqs);
		putidx = cdev->tx_fifo_putidx;

		/* Construct DLC Field, with CAN-FD configuration.
		 * Use the put index of the fifo as the message marker,
@@ -1731,30 +1821,32 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
				fdflags |= TX_BUF_BRS;
		}

		fifo_header.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) |
		fifo_element.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) |
			FIELD_PREP(TX_BUF_DLC_MASK, can_fd_len2dlc(cf->len)) |
			fdflags | TX_BUF_EFC;
		err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, &fifo_header, 2);
		if (err)
			goto out_fail;

		err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA,
				       cf->data, DIV_ROUND_UP(cf->len, 4));
		memcpy_and_pad(fifo_element.data, CANFD_MAX_DLEN, &cf->data,
			       cf->len, 0);

		err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID,
				       &fifo_element, 2 + len_padded);
		if (err)
			goto out_fail;

		/* Push loopback echo.
		 * Will be looped back on TX interrupt based on message marker
		 */
		can_put_echo_skb(skb, dev, putidx, 0);
		can_put_echo_skb(skb, dev, putidx, frame_len);

		if (cdev->is_peripheral) {
			/* Delay enabling TX FIFO element */
			cdev->tx_peripheral_submit |= BIT(putidx);
		} else {
			/* Enable TX FIFO element to start transfer  */
		m_can_write(cdev, M_CAN_TXBAR, (1 << putidx));

		/* stop network queue if fifo full */
		if (m_can_tx_fifo_full(cdev) ||
		    m_can_next_echo_skb_occupied(dev, putidx))
			netif_stop_queue(dev);
			m_can_write(cdev, M_CAN_TXBAR, BIT(putidx));
		}
		cdev->tx_fifo_putidx = (++cdev->tx_fifo_putidx >= cdev->can.echo_skb_max ?
					0 : cdev->tx_fifo_putidx);
	}

	return NETDEV_TX_OK;
@@ -1765,46 +1857,91 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
	return NETDEV_TX_BUSY;
}

static void m_can_tx_submit(struct m_can_classdev *cdev)
{
	if (cdev->version == 30)
		return;
	if (!cdev->is_peripheral)
		return;

	m_can_write(cdev, M_CAN_TXBAR, cdev->tx_peripheral_submit);
	cdev->tx_peripheral_submit = 0;
}

static void m_can_tx_work_queue(struct work_struct *ws)
{
	struct m_can_classdev *cdev = container_of(ws, struct m_can_classdev,
						   tx_work);
	struct m_can_tx_op *op = container_of(ws, struct m_can_tx_op, work);
	struct m_can_classdev *cdev = op->cdev;
	struct sk_buff *skb = op->skb;

	op->skb = NULL;
	m_can_tx_handler(cdev, skb);
	if (op->submit)
		m_can_tx_submit(cdev);
}

static void m_can_tx_queue_skb(struct m_can_classdev *cdev, struct sk_buff *skb,
			       bool submit)
{
	cdev->tx_ops[cdev->next_tx_op].skb = skb;
	cdev->tx_ops[cdev->next_tx_op].submit = submit;
	queue_work(cdev->tx_wq, &cdev->tx_ops[cdev->next_tx_op].work);

	++cdev->next_tx_op;
	if (cdev->next_tx_op >= cdev->tx_fifo_size)
		cdev->next_tx_op = 0;
}

	m_can_tx_handler(cdev);
static netdev_tx_t m_can_start_peripheral_xmit(struct m_can_classdev *cdev,
					       struct sk_buff *skb)
{
	bool submit;

	++cdev->nr_txs_without_submit;
	if (cdev->nr_txs_without_submit >= cdev->tx_max_coalesced_frames ||
	    !netdev_xmit_more()) {
		cdev->nr_txs_without_submit = 0;
		submit = true;
	} else {
		submit = false;
	}
	m_can_tx_queue_skb(cdev, skb, submit);

	return NETDEV_TX_OK;
}

static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
				    struct net_device *dev)
{
	struct m_can_classdev *cdev = netdev_priv(dev);
	unsigned int frame_len;
	netdev_tx_t ret;

	if (can_dev_dropped_skb(dev, skb))
		return NETDEV_TX_OK;

	if (cdev->is_peripheral) {
		if (cdev->tx_skb) {
			netdev_err(dev, "hard_xmit called while tx busy\n");
			return NETDEV_TX_BUSY;
		}
	frame_len = can_skb_get_frame_len(skb);

	if (cdev->can.state == CAN_STATE_BUS_OFF) {
			m_can_clean(dev);
		} else {
			/* Need to stop the queue to avoid numerous requests
			 * from being sent.  Suggested improvement is to create
			 * a queueing mechanism that will queue the skbs and
			 * process them in order.
			 */
			cdev->tx_skb = skb;
			netif_stop_queue(cdev->net);
			queue_work(cdev->tx_wq, &cdev->tx_work);
		}
	} else {
		cdev->tx_skb = skb;
		return m_can_tx_handler(cdev);
		m_can_clean(cdev->net);
		return NETDEV_TX_OK;
	}

	return NETDEV_TX_OK;
	ret = m_can_start_tx(cdev);
	if (ret != NETDEV_TX_OK)
		return ret;

	netdev_sent_queue(dev, frame_len);

	if (cdev->is_peripheral)
		ret = m_can_start_peripheral_xmit(cdev, skb);
	else
		ret = m_can_tx_handler(cdev, skb);

	if (ret != NETDEV_TX_OK)
		netdev_completed_queue(dev, 1, frame_len);

	return ret;
}

static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer)
@@ -1844,15 +1981,17 @@ static int m_can_open(struct net_device *dev)

	/* register interrupt handler */
	if (cdev->is_peripheral) {
		cdev->tx_skb = NULL;
		cdev->tx_wq = alloc_workqueue("mcan_wq",
					      WQ_FREEZABLE | WQ_MEM_RECLAIM, 0);
		cdev->tx_wq = alloc_ordered_workqueue("mcan_wq",
						      WQ_FREEZABLE | WQ_MEM_RECLAIM);
		if (!cdev->tx_wq) {
			err = -ENOMEM;
			goto out_wq_fail;
		}

		INIT_WORK(&cdev->tx_work, m_can_tx_work_queue);
		for (int i = 0; i != cdev->tx_fifo_size; ++i) {
			cdev->tx_ops[i].cdev = cdev;
			INIT_WORK(&cdev->tx_ops[i].work, m_can_tx_work_queue);
		}

		err = request_threaded_irq(dev->irq, NULL, m_can_isr,
					   IRQF_ONESHOT,
@@ -1900,7 +2039,108 @@ static const struct net_device_ops m_can_netdev_ops = {
	.ndo_change_mtu = can_change_mtu,
};

static int m_can_get_coalesce(struct net_device *dev,
			      struct ethtool_coalesce *ec,
			      struct kernel_ethtool_coalesce *kec,
			      struct netlink_ext_ack *ext_ack)
{
	struct m_can_classdev *cdev = netdev_priv(dev);

	ec->rx_max_coalesced_frames_irq = cdev->rx_max_coalesced_frames_irq;
	ec->rx_coalesce_usecs_irq = cdev->rx_coalesce_usecs_irq;
	ec->tx_max_coalesced_frames = cdev->tx_max_coalesced_frames;
	ec->tx_max_coalesced_frames_irq = cdev->tx_max_coalesced_frames_irq;
	ec->tx_coalesce_usecs_irq = cdev->tx_coalesce_usecs_irq;

	return 0;
}

static int m_can_set_coalesce(struct net_device *dev,
			      struct ethtool_coalesce *ec,
			      struct kernel_ethtool_coalesce *kec,
			      struct netlink_ext_ack *ext_ack)
{
	struct m_can_classdev *cdev = netdev_priv(dev);

	if (cdev->can.state != CAN_STATE_STOPPED) {
		netdev_err(dev, "Device is in use, please shut it down first\n");
		return -EBUSY;
	}

	if (ec->rx_max_coalesced_frames_irq > cdev->mcfg[MRAM_RXF0].num) {
		netdev_err(dev, "rx-frames-irq %u greater than the RX FIFO %u\n",
			   ec->rx_max_coalesced_frames_irq,
			   cdev->mcfg[MRAM_RXF0].num);
		return -EINVAL;
	}
	if ((ec->rx_max_coalesced_frames_irq == 0) != (ec->rx_coalesce_usecs_irq == 0)) {
		netdev_err(dev, "rx-frames-irq and rx-usecs-irq can only be set together\n");
		return -EINVAL;
	}
	if (ec->tx_max_coalesced_frames_irq > cdev->mcfg[MRAM_TXE].num) {
		netdev_err(dev, "tx-frames-irq %u greater than the TX event FIFO %u\n",
			   ec->tx_max_coalesced_frames_irq,
			   cdev->mcfg[MRAM_TXE].num);
		return -EINVAL;
	}
	if (ec->tx_max_coalesced_frames_irq > cdev->mcfg[MRAM_TXB].num) {
		netdev_err(dev, "tx-frames-irq %u greater than the TX FIFO %u\n",
			   ec->tx_max_coalesced_frames_irq,
			   cdev->mcfg[MRAM_TXB].num);
		return -EINVAL;
	}
	if ((ec->tx_max_coalesced_frames_irq == 0) != (ec->tx_coalesce_usecs_irq == 0)) {
		netdev_err(dev, "tx-frames-irq and tx-usecs-irq can only be set together\n");
		return -EINVAL;
	}
	if (ec->tx_max_coalesced_frames > cdev->mcfg[MRAM_TXE].num) {
		netdev_err(dev, "tx-frames %u greater than the TX event FIFO %u\n",
			   ec->tx_max_coalesced_frames,
			   cdev->mcfg[MRAM_TXE].num);
		return -EINVAL;
	}
	if (ec->tx_max_coalesced_frames > cdev->mcfg[MRAM_TXB].num) {
		netdev_err(dev, "tx-frames %u greater than the TX FIFO %u\n",
			   ec->tx_max_coalesced_frames,
			   cdev->mcfg[MRAM_TXB].num);
		return -EINVAL;
	}
	if (ec->rx_coalesce_usecs_irq != 0 && ec->tx_coalesce_usecs_irq != 0 &&
	    ec->rx_coalesce_usecs_irq != ec->tx_coalesce_usecs_irq) {
		netdev_err(dev, "rx-usecs-irq %u needs to be equal to tx-usecs-irq %u if both are enabled\n",
			   ec->rx_coalesce_usecs_irq,
			   ec->tx_coalesce_usecs_irq);
		return -EINVAL;
	}

	cdev->rx_max_coalesced_frames_irq = ec->rx_max_coalesced_frames_irq;
	cdev->rx_coalesce_usecs_irq = ec->rx_coalesce_usecs_irq;
	cdev->tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
	cdev->tx_max_coalesced_frames_irq = ec->tx_max_coalesced_frames_irq;
	cdev->tx_coalesce_usecs_irq = ec->tx_coalesce_usecs_irq;

	if (cdev->rx_coalesce_usecs_irq)
		cdev->irq_timer_wait =
			ns_to_ktime(cdev->rx_coalesce_usecs_irq * NSEC_PER_USEC);
	else
		cdev->irq_timer_wait =
			ns_to_ktime(cdev->tx_coalesce_usecs_irq * NSEC_PER_USEC);

	return 0;
}

static const struct ethtool_ops m_can_ethtool_ops = {
	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS_IRQ |
		ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ |
		ETHTOOL_COALESCE_TX_USECS_IRQ |
		ETHTOOL_COALESCE_TX_MAX_FRAMES |
		ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ,
	.get_ts_info = ethtool_op_get_ts_info,
	.get_coalesce = m_can_get_coalesce,
	.set_coalesce = m_can_set_coalesce,
};

static const struct ethtool_ops m_can_ethtool_ops_polling = {
	.get_ts_info = ethtool_op_get_ts_info,
};

@@ -1908,7 +2148,10 @@ static int register_m_can_dev(struct net_device *dev)
{
	dev->flags |= IFF_ECHO;	/* we support local echo */
	dev->netdev_ops = &m_can_netdev_ops;
	if (dev->irq)
		dev->ethtool_ops = &m_can_ethtool_ops;
	else
		dev->ethtool_ops = &m_can_ethtool_ops_polling;

	return register_candev(dev);
}
@@ -2056,6 +2299,19 @@ int m_can_class_register(struct m_can_classdev *cdev)
{
	int ret;

	cdev->tx_fifo_size = max(1, min(cdev->mcfg[MRAM_TXB].num,
					cdev->mcfg[MRAM_TXE].num));
	if (cdev->is_peripheral) {
		cdev->tx_ops =
			devm_kzalloc(cdev->dev,
				     cdev->tx_fifo_size * sizeof(*cdev->tx_ops),
				     GFP_KERNEL);
		if (!cdev->tx_ops) {
			dev_err(cdev->dev, "Failed to allocate tx_ops for workqueue\n");
			return -ENOMEM;
		}
	}

	if (cdev->pm_clock_support) {
		ret = m_can_clk_start(cdev);
		if (ret)
@@ -2069,8 +2325,15 @@ int m_can_class_register(struct m_can_classdev *cdev)
			goto clk_disable;
	}

	if (!cdev->net->irq)
	if (!cdev->net->irq) {
		dev_dbg(cdev->dev, "Polling enabled, initialize hrtimer");
		hrtimer_init(&cdev->hrtimer, CLOCK_MONOTONIC,
			     HRTIMER_MODE_REL_PINNED);
		cdev->hrtimer.function = &hrtimer_callback;
	} else {
		hrtimer_init(&cdev->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
		cdev->hrtimer.function = m_can_coalescing_timer;
	}

	ret = m_can_dev_setup(cdev);
	if (ret)
+32 −2
Original line number Diff line number Diff line
@@ -70,6 +70,13 @@ struct m_can_ops {
	int (*init)(struct m_can_classdev *cdev);
};

struct m_can_tx_op {
	struct m_can_classdev *cdev;
	struct work_struct work;
	struct sk_buff *skb;
	bool submit;
};

struct m_can_classdev {
	struct can_priv can;
	struct can_rx_offload offload;
@@ -80,10 +87,10 @@ struct m_can_classdev {
	struct clk *cclk;

	struct workqueue_struct *tx_wq;
	struct work_struct tx_work;
	struct sk_buff *tx_skb;
	struct phy *transceiver;

	ktime_t irq_timer_wait;

	struct m_can_ops *ops;

	int version;
@@ -92,6 +99,29 @@ struct m_can_classdev {
	int pm_clock_support;
	int is_peripheral;

	// Cached M_CAN_IE register content
	u32 active_interrupts;
	u32 rx_max_coalesced_frames_irq;
	u32 rx_coalesce_usecs_irq;
	u32 tx_max_coalesced_frames;
	u32 tx_max_coalesced_frames_irq;
	u32 tx_coalesce_usecs_irq;

	// Store this internally to avoid fetch delays on peripheral chips
	u32 tx_fifo_putidx;

	/* Protects shared state between start_xmit and m_can_isr */
	spinlock_t tx_handling_spinlock;
	int tx_fifo_in_flight;

	struct m_can_tx_op *tx_ops;
	int tx_fifo_size;
	int next_tx_op;

	int nr_txs_without_submit;
	/* bitfield of fifo elements that will be submitted together */
	u32 tx_peripheral_submit;

	struct mram_cfg mcfg[MRAM_CFG_NUM];

	struct hrtimer hrtimer;
+0 −4
Original line number Diff line number Diff line
@@ -109,10 +109,6 @@ static int m_can_plat_probe(struct platform_device *pdev)
			ret = irq;
			goto probe_fail;
		}
	} else {
		dev_dbg(mcan_class->dev, "Polling enabled, initialize hrtimer");
		hrtimer_init(&mcan_class->hrtimer, CLOCK_MONOTONIC,
			     HRTIMER_MODE_REL_PINNED);
	}

	/* message ram could be shared */