Commit b706fb4e authored by Manish Dharanenthiran's avatar Manish Dharanenthiran Committed by Jeff Johnson
Browse files

wifi: ath12k: Use 1KB Cache Flush Command for QoS TID Descriptors



Currently, if the descriptor size exceeds 128 bytes, the total
descriptor is split into multiple 128-byte segments, each
requiring a separate flush cache queue command. This results in
multiple commands being issued to flush a single TID, which
negatively impacts performance. To optimize this, use the
_FLUSH_QUEUE_1K_DESC REO command to flush a 1KB descriptor in a single
operation to optimize performance.

Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1
Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3

Signed-off-by: default avatarManish Dharanenthiran <manish.dharanenthiran@oss.qualcomm.com>
Signed-off-by: default avatarNithyanantham Paramasivam <nithyanantham.paramasivam@oss.qualcomm.com>
Reviewed-by: default avatarBaochen Qiang <baochen.qiang@oss.qualcomm.com>
Reviewed-by: default avatarVasanthakumar Thiagarajan <vasanthakumar.thiagarajan@oss.qualcomm.com>
Link: https://patch.msgid.link/20250806111750.3214584-8-nithyanantham.paramasivam@oss.qualcomm.com


Signed-off-by: default avatarJeff Johnson <jeff.johnson@oss.qualcomm.com>
parent 5e32edc6
Loading
Loading
Loading
Loading
+29 −30
Original line number Diff line number Diff line
@@ -693,44 +693,33 @@ static int ath12k_dp_reo_cmd_send(struct ath12k_base *ab,
	return 0;
}

static void ath12k_dp_reo_cache_flush(struct ath12k_base *ab,
static int ath12k_dp_reo_cache_flush(struct ath12k_base *ab,
				     struct ath12k_dp_rx_tid_rxq *rx_tid)
{
	struct ath12k_hal_reo_cmd cmd = {};
	unsigned long tot_desc_sz, desc_sz;
	int ret;

	tot_desc_sz = rx_tid->qbuf.size;
	desc_sz = ath12k_hal_reo_qdesc_size(0, HAL_DESC_REO_NON_QOS_TID);

	while (tot_desc_sz > desc_sz) {
		tot_desc_sz -= desc_sz;
		cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned + tot_desc_sz);
		cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned);
		ret = ath12k_dp_reo_cmd_send(ab, rx_tid,
					     HAL_REO_CMD_FLUSH_CACHE, &cmd,
					     NULL);
		if (ret)
			ath12k_warn(ab,
				    "failed to send HAL_REO_CMD_FLUSH_CACHE, tid %d (%d)\n",
				    rx_tid->tid, ret);
	}

	memset(&cmd, 0, sizeof(cmd));
	cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned);
	cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned);
	cmd.flag = HAL_REO_CMD_FLG_NEED_STATUS;
	/* HAL_REO_CMD_FLG_FLUSH_FWD_ALL_MPDUS - all pending MPDUs
	 *in the bitmap will be forwarded/flushed to REO output rings
	 */
	cmd.flag = HAL_REO_CMD_FLG_NEED_STATUS |
		   HAL_REO_CMD_FLG_FLUSH_FWD_ALL_MPDUS;

	/* For all QoS TIDs (except NON_QOS), the driver allocates a maximum
	 * window size of 1024. In such cases, the driver can issue a single
	 * 1KB descriptor flush command instead of sending multiple 128-byte
	 * flush commands for each QoS TID, improving efficiency.
	 */

	if (rx_tid->tid != HAL_DESC_REO_NON_QOS_TID)
		cmd.flag |= HAL_REO_CMD_FLG_FLUSH_QUEUE_1K_DESC;

	ret = ath12k_dp_reo_cmd_send(ab, rx_tid,
				     HAL_REO_CMD_FLUSH_CACHE,
				     &cmd, ath12k_dp_reo_cmd_free);
	if (ret) {
		ath12k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
			   rx_tid->tid, ret);
		dma_unmap_single(ab->dev, rx_tid->qbuf.paddr_aligned, rx_tid->qbuf.size,
				 DMA_BIDIRECTIONAL);
		kfree(rx_tid->qbuf.vaddr);
		rx_tid->qbuf.vaddr = NULL;
	}
	return ret;
}

static void ath12k_peer_rx_tid_qref_reset(struct ath12k_base *ab, u16 peer_id, u16 tid)
@@ -828,9 +817,19 @@ static void ath12k_dp_rx_tid_del_func(struct ath12k_dp *dp, void *ctx,
		if (dp->reo_cmd_cache_flush_count > ATH12K_DP_RX_REO_DESC_FREE_THRES ||
		    time_after(jiffies, elem->ts +
			       msecs_to_jiffies(ATH12K_DP_RX_REO_DESC_FREE_TIMEOUT_MS))) {
			/* The reo_cmd_cache_flush_list is used in only two contexts,
			 * one is in this function called from napi and the
			 * other in ath12k_dp_free during core destroy.
			 * If cache command sent is success, delete the element in
			 * the cache list. ath12k_dp_rx_reo_cmd_list_cleanup
			 * will be called during core destroy.
			 */

			if (ath12k_dp_reo_cache_flush(ab, &elem->data))
				break;

			list_del(&elem->list);
			dp->reo_cmd_cache_flush_count--;
			ath12k_dp_reo_cache_flush(ab, &elem->data);
			kfree(elem);
		}
	}
+1 −0
Original line number Diff line number Diff line
@@ -832,6 +832,7 @@ enum hal_rx_buf_return_buf_manager {
#define HAL_REO_CMD_FLG_FLUSH_ALL		BIT(6)
#define HAL_REO_CMD_FLG_UNBLK_RESOURCE		BIT(7)
#define HAL_REO_CMD_FLG_UNBLK_CACHE		BIT(8)
#define HAL_REO_CMD_FLG_FLUSH_QUEUE_1K_DESC	BIT(9)

/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO0_UPD_* fields */
#define HAL_REO_CMD_UPD0_RX_QUEUE_NUM		BIT(8)
+1 −0
Original line number Diff line number Diff line
@@ -1225,6 +1225,7 @@ struct hal_reo_flush_queue {
#define HAL_REO_FLUSH_CACHE_INFO0_FLUSH_WO_INVALIDATE	BIT(12)
#define HAL_REO_FLUSH_CACHE_INFO0_BLOCK_CACHE_USAGE	BIT(13)
#define HAL_REO_FLUSH_CACHE_INFO0_FLUSH_ALL		BIT(14)
#define HAL_REO_FLUSH_CACHE_INFO0_FLUSH_QUEUE_1K_DESC	BIT(15)

struct hal_reo_flush_cache {
	struct hal_reo_cmd_hdr cmd;
+3 −0
Original line number Diff line number Diff line
@@ -89,6 +89,9 @@ static int ath12k_hal_reo_cmd_flush_cache(struct ath12k_hal *hal,
	if (cmd->flag & HAL_REO_CMD_FLG_FLUSH_ALL)
		desc->info0 |= cpu_to_le32(HAL_REO_FLUSH_CACHE_INFO0_FLUSH_ALL);

	if (cmd->flag & HAL_REO_CMD_FLG_FLUSH_QUEUE_1K_DESC)
		desc->info0 |= cpu_to_le32(HAL_REO_FLUSH_CACHE_INFO0_FLUSH_QUEUE_1K_DESC);

	return le32_get_bits(desc->cmd.info0, HAL_REO_CMD_HDR_INFO0_CMD_NUMBER);
}