Commit 2dec50d4 authored by Paolo Abeni's avatar Paolo Abeni
Browse files


Tony Nguyen says:

====================
Add RDMA support for Intel IPU E2000 in idpf

Tatyana Nikolova says:

This idpf patch series is the second part of the staged submission for
introducing RDMA RoCEv2 support for the IPU E2000 line of products,
referred to as GEN3.

To support RDMA GEN3 devices, the idpf driver uses common definitions
of the IIDC interface and implements specific device functionality in
iidc_rdma_idpf.h.

The IPU model can host one or more logical network endpoints called
vPorts per PCI function that are flexibly associated with a physical
port or an internal communication port.

Other features as it pertains to GEN3 devices include:
* MMIO learning
* RDMA capability negotiation
* RDMA vectors discovery between idpf and control plane

These patches are split from the submission "Add RDMA support for Intel
IPU E2000 (GEN3)" [1]. The patches have been tested on a range of hosts
and platforms with a variety of general RDMA applications which include
standalone verbs (rping, perftest, etc.), storage and HPC applications.

Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>

[1] https://lore.kernel.org/all/20240724233917.704-1-tatyana.e.nikolova@intel.com/


This idpf patch series is the second part of the staged submission for
introducing RDMA RoCEv2 support for the IPU E2000 line of products,
referred to as GEN3.

To support RDMA GEN3 devices, the idpf driver uses common definitions
of the IIDC interface and implements specific device functionality in
iidc_rdma_idpf.h.

The IPU model can host one or more logical network endpoints called
vPorts per PCI function that are flexibly associated with a physical
port or an internal communication port.

Other features as it pertains to GEN3 devices include:
* MMIO learning
* RDMA capability negotiation
* RDMA vectors discovery between idpf and control plane

These patches are split from the submission "Add RDMA support for Intel
IPU E2000 (GEN3)" [1]. The patches have been tested on a range of hosts
and platforms with a variety of general RDMA applications which include
standalone verbs (rping, perftest, etc.), storage and HPC applications.

Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>

[1] https://lore.kernel.org/all/20240724233917.704-1-tatyana.e.nikolova@intel.com/

IWL reviews:
v3: https://lore.kernel.org/all/20250708210554.1662-1-tatyana.e.nikolova@intel.com/
v2: https://lore.kernel.org/all/20250612220002.1120-1-tatyana.e.nikolova@intel.com/
v1 (split from previous series):
    https://lore.kernel.org/all/20250523170435.668-1-tatyana.e.nikolova@intel.com/

v3: https://lore.kernel.org/all/20250207194931.1569-1-tatyana.e.nikolova@intel.com/
RFC v2: https://lore.kernel.org/all/20240824031924.421-1-tatyana.e.nikolova@intel.com/
RFC: https://lore.kernel.org/all/20240724233917.704-1-tatyana.e.nikolova@intel.com/

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/linux:
  idpf: implement get LAN MMIO memory regions
  idpf: implement IDC vport aux driver MTU change handler
  idpf: implement remaining IDC RDMA core callbacks and handlers
  idpf: implement RDMA vport auxiliary dev create, init, and destroy
  idpf: implement core RDMA auxiliary dev create, init, and destroy
  idpf: use reserved RDMA vectors from control plane
====================

Link: https://patch.msgid.link/20250714181002.2865694-1-anthony.l.nguyen@intel.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents cd031354 6aa53e86
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ idpf-y := \
	idpf_controlq_setup.o	\
	idpf_dev.o		\
	idpf_ethtool.o		\
	idpf_idc.o		\
	idpf_lib.o		\
	idpf_main.o		\
	idpf_txrx.o		\
+111 −5
Original line number Diff line number Diff line
@@ -12,12 +12,16 @@ struct idpf_vport_max_q;
#include <net/pkt_sched.h>
#include <linux/aer.h>
#include <linux/etherdevice.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/bitfield.h>
#include <linux/sctp.h>
#include <linux/ethtool_netlink.h>
#include <net/gro.h>

#include <linux/net/intel/iidc_rdma.h>
#include <linux/net/intel/iidc_rdma_idpf.h>

#include "virtchnl2.h"
#include "idpf_txrx.h"
#include "idpf_controlq.h"
@@ -194,7 +198,8 @@ struct idpf_vport_max_q {
 * @ptp_reg_init: PTP register initialization
 */
struct idpf_reg_ops {
	void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq);
	void (*ctlq_reg_init)(struct idpf_adapter *adapter,
			      struct idpf_ctlq_create_info *cq);
	int (*intr_reg_init)(struct idpf_vport *vport);
	void (*mb_intr_reg_init)(struct idpf_adapter *adapter);
	void (*reset_reg_init)(struct idpf_adapter *adapter);
@@ -203,12 +208,25 @@ struct idpf_reg_ops {
	void (*ptp_reg_init)(const struct idpf_adapter *adapter);
};

#define IDPF_MMIO_REG_NUM_STATIC	2
#define IDPF_PF_MBX_REGION_SZ		4096
#define IDPF_PF_RSTAT_REGION_SZ		2048
#define IDPF_VF_MBX_REGION_SZ		10240
#define IDPF_VF_RSTAT_REGION_SZ		2048

/**
 * struct idpf_dev_ops - Device specific operations
 * @reg_ops: Register operations
 * @idc_init: IDC initialization
 * @static_reg_info: array of mailbox and rstat register info
 */
struct idpf_dev_ops {
	struct idpf_reg_ops reg_ops;

	int (*idc_init)(struct idpf_adapter *adapter);

	/* static_reg_info[0] is mailbox region, static_reg_info[1] is rstat */
	struct resource static_reg_info[IDPF_MMIO_REG_NUM_STATIC];
};

/**
@@ -275,6 +293,7 @@ struct idpf_port_stats {
 *	      group will yield total number of RX queues.
 * @rxq_model: Splitq queue or single queue queuing model
 * @rx_ptype_lkup: Lookup table for ptypes on RX
 * @vdev_info: IDC vport device info pointer
 * @adapter: back pointer to associated adapter
 * @netdev: Associated net_device. Each vport should have one and only one
 *	    associated netdev.
@@ -320,6 +339,8 @@ struct idpf_vport {
	u32 rxq_model;
	struct libeth_rx_pt *rx_ptype_lkup;

	struct iidc_rdma_vport_dev_info *vdev_info;

	struct idpf_adapter *adapter;
	struct net_device *netdev;
	DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS);
@@ -507,10 +528,11 @@ struct idpf_vc_xn_manager;
 * @flags: See enum idpf_flags
 * @reset_reg: See struct idpf_reset_reg
 * @hw: Device access data
 * @num_req_msix: Requested number of MSIX vectors
 * @num_avail_msix: Available number of MSIX vectors
 * @num_msix_entries: Number of entries in MSIX table
 * @msix_entries: MSIX table
 * @num_rdma_msix_entries: Available number of MSIX vectors for RDMA
 * @rdma_msix_entries: RDMA MSIX table
 * @req_vec_chunks: Requested vector chunk data
 * @mb_vector: Mailbox vector data
 * @vector_stack: Stack to store the msix vector indexes
@@ -539,6 +561,7 @@ struct idpf_vc_xn_manager;
 * @caps: Negotiated capabilities with device
 * @vcxn_mngr: Virtchnl transaction manager
 * @dev_ops: See idpf_dev_ops
 * @cdev_info: IDC core device info pointer
 * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk
 *	     to VFs but is used to initialize them
 * @crc_enable: Enable CRC insertion offload
@@ -561,10 +584,11 @@ struct idpf_adapter {
	DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS);
	struct idpf_reset_reg reset_reg;
	struct idpf_hw hw;
	u16 num_req_msix;
	u16 num_avail_msix;
	u16 num_msix_entries;
	struct msix_entry *msix_entries;
	u16 num_rdma_msix_entries;
	struct msix_entry *rdma_msix_entries;
	struct virtchnl2_alloc_vectors *req_vec_chunks;
	struct idpf_q_vector mb_vector;
	struct idpf_vector_lifo vector_stack;
@@ -597,6 +621,7 @@ struct idpf_adapter {
	struct idpf_vc_xn_manager *vcxn_mngr;

	struct idpf_dev_ops dev_ops;
	struct iidc_rdma_core_dev_info *cdev_info;
	int num_vfs;
	bool crc_enable;
	bool req_tx_splitq;
@@ -630,6 +655,17 @@ static inline int idpf_is_queue_model_split(u16 q_model)
bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all,
			    enum idpf_cap_field field, u64 flag);

/**
 * idpf_is_rdma_cap_ena - Determine if RDMA is supported
 * @adapter: private data struct
 *
 * Return: true if RDMA capability is enabled, false otherwise
 */
static inline bool idpf_is_rdma_cap_ena(struct idpf_adapter *adapter)
{
	return idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_RDMA);
}

#define IDPF_CAP_RSS (\
	VIRTCHNL2_CAP_RSS_IPV4_TCP	|\
	VIRTCHNL2_CAP_RSS_IPV4_TCP	|\
@@ -682,6 +718,17 @@ static inline u16 idpf_get_reserved_vecs(struct idpf_adapter *adapter)
	return le16_to_cpu(adapter->caps.num_allocated_vectors);
}

/**
 * idpf_get_reserved_rdma_vecs - Get reserved RDMA vectors
 * @adapter: private data struct
 *
 * Return: number of vectors reserved for RDMA
 */
static inline u16 idpf_get_reserved_rdma_vecs(struct idpf_adapter *adapter)
{
	return le16_to_cpu(adapter->caps.num_rdma_allocated_vectors);
}

/**
 * idpf_get_default_vports - Get default number of vports
 * @adapter: private data struct
@@ -720,6 +767,34 @@ static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter)
	return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN;
}

/**
 * idpf_get_mbx_reg_addr - Get BAR0 mailbox register address
 * @adapter: private data struct
 * @reg_offset: register offset value
 *
 * Return: BAR0 mailbox register address based on register offset.
 */
static inline void __iomem *idpf_get_mbx_reg_addr(struct idpf_adapter *adapter,
						  resource_size_t reg_offset)
{
	return adapter->hw.mbx.vaddr + reg_offset;
}

/**
 * idpf_get_rstat_reg_addr - Get BAR0 rstat register address
 * @adapter: private data struct
 * @reg_offset: register offset value
 *
 * Return: BAR0 rstat register address based on register offset.
 */
static inline void __iomem *idpf_get_rstat_reg_addr(struct idpf_adapter *adapter,
						    resource_size_t reg_offset)
{
	reg_offset -= adapter->dev_ops.static_reg_info[1].start;

	return adapter->hw.rstat.vaddr + reg_offset;
}

/**
 * idpf_get_reg_addr - Get BAR0 register address
 * @adapter: private data struct
@@ -730,7 +805,30 @@ static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter)
static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter,
					      resource_size_t reg_offset)
{
	return (void __iomem *)(adapter->hw.hw_addr + reg_offset);
	struct idpf_hw *hw = &adapter->hw;

	for (int i = 0; i < hw->num_lan_regs; i++) {
		struct idpf_mmio_reg *region = &hw->lan_regs[i];

		if (reg_offset >= region->addr_start &&
		    reg_offset < (region->addr_start + region->addr_len)) {
			/* Convert the offset so that it is relative to the
			 * start of the region.  Then add the base address of
			 * the region to get the final address.
			 */
			reg_offset -= region->addr_start;

			return region->vaddr + reg_offset;
		}
	}

	/* It's impossible to hit this case with offsets from the CP. But if we
	 * do for any other reason, the kernel will panic on that register
	 * access. Might as well do it here to make it clear what's happening.
	 */
	BUG();

	return NULL;
}

/**
@@ -744,7 +842,7 @@ static inline bool idpf_is_reset_detected(struct idpf_adapter *adapter)
	if (!adapter->hw.arq)
		return true;

	return !(readl(idpf_get_reg_addr(adapter, adapter->hw.arq->reg.len)) &
	return !(readl(idpf_get_mbx_reg_addr(adapter, adapter->hw.arq->reg.len)) &
		 adapter->hw.arq->reg.len_mask);
}

@@ -853,5 +951,13 @@ int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs);

u8 idpf_vport_get_hsplit(const struct idpf_vport *vport);
bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val);
int idpf_idc_init(struct idpf_adapter *adapter);
int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
			       enum iidc_function_type ftype);
void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info);
void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info);
void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info);
void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info,
			     enum iidc_rdma_event_type event_type);

#endif /* !_IDPF_H_ */
+7 −7
Original line number Diff line number Diff line
@@ -36,19 +36,19 @@ static void idpf_ctlq_init_regs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
{
	/* Update tail to post pre-allocated buffers for rx queues */
	if (is_rxq)
		wr32(hw, cq->reg.tail, (u32)(cq->ring_size - 1));
		idpf_mbx_wr32(hw, cq->reg.tail, (u32)(cq->ring_size - 1));

	/* For non-Mailbox control queues only TAIL need to be set */
	if (cq->q_id != -1)
		return;

	/* Clear Head for both send or receive */
	wr32(hw, cq->reg.head, 0);
	idpf_mbx_wr32(hw, cq->reg.head, 0);

	/* set starting point */
	wr32(hw, cq->reg.bal, lower_32_bits(cq->desc_ring.pa));
	wr32(hw, cq->reg.bah, upper_32_bits(cq->desc_ring.pa));
	wr32(hw, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask));
	idpf_mbx_wr32(hw, cq->reg.bal, lower_32_bits(cq->desc_ring.pa));
	idpf_mbx_wr32(hw, cq->reg.bah, upper_32_bits(cq->desc_ring.pa));
	idpf_mbx_wr32(hw, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask));
}

/**
@@ -328,7 +328,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
	 */
	dma_wmb();

	wr32(hw, cq->reg.tail, cq->next_to_use);
	idpf_mbx_wr32(hw, cq->reg.tail, cq->next_to_use);

err_unlock:
	spin_unlock(&cq->cq_lock);
@@ -520,7 +520,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,

		dma_wmb();

		wr32(hw, cq->reg.tail, cq->next_to_post);
		idpf_mbx_wr32(hw, cq->reg.tail, cq->next_to_post);
	}

	spin_unlock(&cq->cq_lock);
+16 −2
Original line number Diff line number Diff line
@@ -94,12 +94,26 @@ struct idpf_mbxq_desc {
	u32 pf_vf_id;		/* used by CP when sending to PF */
};

/* Max number of MMIO regions not including the mailbox and rstat regions in
 * the fallback case when the whole bar is mapped.
 */
#define IDPF_MMIO_MAP_FALLBACK_MAX_REMAINING		3

struct idpf_mmio_reg {
	void __iomem *vaddr;
	resource_size_t addr_start;
	resource_size_t addr_len;
};

/* Define the driver hardware struct to replace other control structs as needed
 * Align to ctlq_hw_info
 */
struct idpf_hw {
	void __iomem *hw_addr;
	resource_size_t hw_addr_len;
	struct idpf_mmio_reg mbx;
	struct idpf_mmio_reg rstat;
	/* Array of remaining LAN BAR regions */
	int num_lan_regs;
	struct idpf_mmio_reg *lan_regs;

	struct idpf_adapter *back;

+35 −14
Original line number Diff line number Diff line
@@ -10,10 +10,13 @@

/**
 * idpf_ctlq_reg_init - initialize default mailbox registers
 * @adapter: adapter structure
 * @cq: pointer to the array of create control queues
 */
static void idpf_ctlq_reg_init(struct idpf_ctlq_create_info *cq)
static void idpf_ctlq_reg_init(struct idpf_adapter *adapter,
			       struct idpf_ctlq_create_info *cq)
{
	resource_size_t mbx_start = adapter->dev_ops.static_reg_info[0].start;
	int i;

	for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) {
@@ -22,22 +25,22 @@ static void idpf_ctlq_reg_init(struct idpf_ctlq_create_info *cq)
		switch (ccq->type) {
		case IDPF_CTLQ_TYPE_MAILBOX_TX:
			/* set head and tail registers in our local struct */
			ccq->reg.head = PF_FW_ATQH;
			ccq->reg.tail = PF_FW_ATQT;
			ccq->reg.len = PF_FW_ATQLEN;
			ccq->reg.bah = PF_FW_ATQBAH;
			ccq->reg.bal = PF_FW_ATQBAL;
			ccq->reg.head = PF_FW_ATQH - mbx_start;
			ccq->reg.tail = PF_FW_ATQT - mbx_start;
			ccq->reg.len = PF_FW_ATQLEN - mbx_start;
			ccq->reg.bah = PF_FW_ATQBAH - mbx_start;
			ccq->reg.bal = PF_FW_ATQBAL - mbx_start;
			ccq->reg.len_mask = PF_FW_ATQLEN_ATQLEN_M;
			ccq->reg.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
			ccq->reg.head_mask = PF_FW_ATQH_ATQH_M;
			break;
		case IDPF_CTLQ_TYPE_MAILBOX_RX:
			/* set head and tail registers in our local struct */
			ccq->reg.head = PF_FW_ARQH;
			ccq->reg.tail = PF_FW_ARQT;
			ccq->reg.len = PF_FW_ARQLEN;
			ccq->reg.bah = PF_FW_ARQBAH;
			ccq->reg.bal = PF_FW_ARQBAL;
			ccq->reg.head = PF_FW_ARQH - mbx_start;
			ccq->reg.tail = PF_FW_ARQT - mbx_start;
			ccq->reg.len = PF_FW_ARQLEN - mbx_start;
			ccq->reg.bah = PF_FW_ARQBAH - mbx_start;
			ccq->reg.bal = PF_FW_ARQBAL - mbx_start;
			ccq->reg.len_mask = PF_FW_ARQLEN_ARQLEN_M;
			ccq->reg.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
			ccq->reg.head_mask = PF_FW_ARQH_ARQH_M;
@@ -130,7 +133,7 @@ static int idpf_intr_reg_init(struct idpf_vport *vport)
 */
static void idpf_reset_reg_init(struct idpf_adapter *adapter)
{
	adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, PFGEN_RSTAT);
	adapter->reset_reg.rstat = idpf_get_rstat_reg_addr(adapter, PFGEN_RSTAT);
	adapter->reset_reg.rstat_m = PFGEN_RSTAT_PFR_STATE_M;
}

@@ -144,9 +147,9 @@ static void idpf_trigger_reset(struct idpf_adapter *adapter,
{
	u32 reset_reg;

	reset_reg = readl(idpf_get_reg_addr(adapter, PFGEN_CTRL));
	reset_reg = readl(idpf_get_rstat_reg_addr(adapter, PFGEN_CTRL));
	writel(reset_reg | PFGEN_CTRL_PFSWR,
	       idpf_get_reg_addr(adapter, PFGEN_CTRL));
	       idpf_get_rstat_reg_addr(adapter, PFGEN_CTRL));
}

/**
@@ -161,6 +164,17 @@ static void idpf_ptp_reg_init(const struct idpf_adapter *adapter)
	adapter->ptp->cmd.exec_cmd_mask = PF_GLTSYN_CMD_SYNC_EXEC_CMD_M;
}

/**
 * idpf_idc_register - register for IDC callbacks
 * @adapter: Driver specific private structure
 *
 * Return: 0 on success or error code on failure.
 */
static int idpf_idc_register(struct idpf_adapter *adapter)
{
	return idpf_idc_init_aux_core_dev(adapter, IIDC_FUNCTION_TYPE_PF);
}

/**
 * idpf_reg_ops_init - Initialize register API function pointers
 * @adapter: Driver specific private structure
@@ -182,4 +196,11 @@ static void idpf_reg_ops_init(struct idpf_adapter *adapter)
void idpf_dev_ops_init(struct idpf_adapter *adapter)
{
	idpf_reg_ops_init(adapter);

	adapter->dev_ops.idc_init = idpf_idc_register;

	resource_set_range(&adapter->dev_ops.static_reg_info[0],
			   PF_FW_BASE, IDPF_PF_MBX_REGION_SZ);
	resource_set_range(&adapter->dev_ops.static_reg_info[1],
			   PFGEN_RTRIG, IDPF_PF_RSTAT_REGION_SZ);
}
Loading