Commit 7fc45cb6 authored by Shradha Gupta's avatar Shradha Gupta Committed by Leon Romanovsky
Browse files

net: mana: Allow variable size indirection table



Allow variable size indirection table allocation in MANA instead
of using a constant value MANA_INDIRECT_TABLE_SIZE.
The size is now derived from the MANA_QUERY_VPORT_CONFIG and the
indirection table is allocated dynamically.

Signed-off-by: default avatarShradha Gupta <shradhagupta@linux.microsoft.com>
Link: https://lore.kernel.org/r/1718015319-9609-1-git-send-email-shradhagupta@linux.microsoft.com


Reviewed-by: default avatarDexuan Cui <decui@microsoft.com>
Reviewed-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 83a7eefe
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,

	gc = mdev_to_gc(dev);

	req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_SIZE);
	req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
	req = kzalloc(req_buf_size, GFP_KERNEL);
	if (!req)
		return -ENOMEM;
@@ -41,18 +41,18 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
	if (log_ind_tbl_size)
		req->rss_enable = true;

	req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
	req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
	req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
					 indir_tab);
	req->update_indir_tab = true;
	req->cqe_coalescing_enable = 1;

	/* The ind table passed to the hardware must have
	 * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
	 * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
	 * ind_table to MANA_INDIRECT_TABLE_SIZE if required
	 */
	ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
	for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
		req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
		ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
			  req->indir_tab[i]);
@@ -137,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
	}

	ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
	if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
	if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
		ibdev_dbg(&mdev->ib_dev,
			  "Indirect table size %d exceeding limit\n",
			  ind_tbl_size);
+72 −13
Original line number Diff line number Diff line
@@ -481,7 +481,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb,
	struct sock *sk = skb->sk;
	int txq;

	txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK];
	txq = apc->indir_table[hash & (apc->indir_table_sz - 1)];

	if (txq != old_q && sk && sk_fullsock(sk) &&
	    rcu_access_pointer(sk->sk_dst_cache))
@@ -721,6 +721,13 @@ static void mana_cleanup_port_context(struct mana_port_context *apc)
	apc->rxqs = NULL;
}

static void mana_cleanup_indir_table(struct mana_port_context *apc)
{
	apc->indir_table_sz = 0;
	kfree(apc->indir_table);
	kfree(apc->rxobj_table);
}

static int mana_init_port_context(struct mana_port_context *apc)
{
	apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *),
@@ -962,7 +969,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,

	*max_sq = resp.max_num_sq;
	*max_rq = resp.max_num_rq;
	if (resp.num_indirection_ent > 0 &&
	    resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE &&
	    is_power_of_2(resp.num_indirection_ent)) {
		*num_indir_entry = resp.num_indirection_ent;
	} else {
		netdev_warn(apc->ndev,
			    "Setting indirection table size to default %d for vPort %d\n",
			    MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx);
		*num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE;
	}

	apc->port_handle = resp.vport;
	ether_addr_copy(apc->mac_addr, resp.mac_addr);
@@ -1054,14 +1070,13 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
				   bool update_default_rxobj, bool update_key,
				   bool update_tab)
{
	u16 num_entries = MANA_INDIRECT_TABLE_SIZE;
	struct mana_cfg_rx_steer_req_v2 *req;
	struct mana_cfg_rx_steer_resp resp = {};
	struct net_device *ndev = apc->ndev;
	u32 req_buf_size;
	int err;

	req_buf_size = struct_size(req, indir_tab, num_entries);
	req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz);
	req = kzalloc(req_buf_size, GFP_KERNEL);
	if (!req)
		return -ENOMEM;
@@ -1072,7 +1087,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
	req->hdr.req.msg_version = GDMA_MESSAGE_V2;

	req->vport = apc->port_handle;
	req->num_indir_entries = num_entries;
	req->num_indir_entries = apc->indir_table_sz;
	req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
					 indir_tab);
	req->rx_enable = rx;
@@ -1111,7 +1126,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
	}

	netdev_info(ndev, "Configured steering vPort %llu entries %u\n",
		    apc->port_handle, num_entries);
		    apc->port_handle, apc->indir_table_sz);
out:
	kfree(req);
	return err;
@@ -2344,11 +2359,33 @@ static int mana_create_vport(struct mana_port_context *apc,
	return mana_create_txq(apc, net);
}

static int mana_rss_table_alloc(struct mana_port_context *apc)
{
	if (!apc->indir_table_sz) {
		netdev_err(apc->ndev,
			   "Indirection table size not set for vPort %d\n",
			   apc->port_idx);
		return -EINVAL;
	}

	apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL);
	if (!apc->indir_table)
		return -ENOMEM;

	apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL);
	if (!apc->rxobj_table) {
		kfree(apc->indir_table);
		return -ENOMEM;
	}

	return 0;
}

static void mana_rss_table_init(struct mana_port_context *apc)
{
	int i;

	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
	for (i = 0; i < apc->indir_table_sz; i++)
		apc->indir_table[i] =
			ethtool_rxfh_indir_default(i, apc->num_queues);
}
@@ -2361,7 +2398,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
	int i;

	if (update_tab) {
		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
		for (i = 0; i < apc->indir_table_sz; i++) {
			queue_idx = apc->indir_table[i];
			apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
		}
@@ -2466,7 +2503,6 @@ static int mana_init_port(struct net_device *ndev)
	struct mana_port_context *apc = netdev_priv(ndev);
	u32 max_txq, max_rxq, max_queues;
	int port_idx = apc->port_idx;
	u32 num_indirect_entries;
	int err;

	err = mana_init_port_context(apc);
@@ -2474,7 +2510,7 @@ static int mana_init_port(struct net_device *ndev)
		return err;

	err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
				   &num_indirect_entries);
				   &apc->indir_table_sz);
	if (err) {
		netdev_err(ndev, "Failed to query info for vPort %d\n",
			   port_idx);
@@ -2723,6 +2759,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
	if (err)
		goto free_net;

	err = mana_rss_table_alloc(apc);
	if (err)
		goto reset_apc;

	netdev_lockdep_set_classes(ndev);

	ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
@@ -2739,11 +2779,13 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
	err = register_netdev(ndev);
	if (err) {
		netdev_err(ndev, "Unable to register netdev.\n");
		goto reset_apc;
		goto free_indir;
	}

	return 0;

free_indir:
	mana_cleanup_indir_table(apc);
reset_apc:
	kfree(apc->rxqs);
	apc->rxqs = NULL;
@@ -2872,18 +2914,32 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
	if (!resuming) {
		for (i = 0; i < ac->num_ports; i++) {
			err = mana_probe_port(ac, i, &ac->ports[i]);
			if (err)
			/* we log the port for which the probe failed and stop
			 * probes for subsequent ports.
			 * Note that we keep running ports, for which the probes
			 * were successful, unless add_adev fails too
			 */
			if (err) {
				dev_err(dev, "Probe Failed for port %d\n", i);
				break;
			}
		}
	} else {
		for (i = 0; i < ac->num_ports; i++) {
			rtnl_lock();
			err = mana_attach(ac->ports[i]);
			rtnl_unlock();
			if (err)
			/* we log the port for which the attach failed and stop
			 * attach for subsequent ports
			 * Note that we keep running ports, for which the attach
			 * were successful, unless add_adev fails too
			 */
			if (err) {
				dev_err(dev, "Attach Failed for port %d\n", i);
				break;
			}
		}
	}

	err = add_adev(gd);
out:
@@ -2897,6 +2953,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
{
	struct gdma_context *gc = gd->gdma_context;
	struct mana_context *ac = gd->driver_data;
	struct mana_port_context *apc;
	struct device *dev = gc->dev;
	struct net_device *ndev;
	int err;
@@ -2908,6 +2965,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)

	for (i = 0; i < ac->num_ports; i++) {
		ndev = ac->ports[i];
		apc = netdev_priv(ndev);
		if (!ndev) {
			if (i == 0)
				dev_err(dev, "No net device to remove\n");
@@ -2931,6 +2989,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
		}

		unregister_netdevice(ndev);
		mana_cleanup_indir_table(apc);

		rtnl_unlock();

+19 −8
Original line number Diff line number Diff line
@@ -245,7 +245,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev)

static u32 mana_rss_indir_size(struct net_device *ndev)
{
	return MANA_INDIRECT_TABLE_SIZE;
	struct mana_port_context *apc = netdev_priv(ndev);

	return apc->indir_table_sz;
}

static int mana_get_rxfh(struct net_device *ndev,
@@ -257,7 +259,7 @@ static int mana_get_rxfh(struct net_device *ndev,
	rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */

	if (rxfh->indir) {
		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
		for (i = 0; i < apc->indir_table_sz; i++)
			rxfh->indir[i] = apc->indir_table[i];
	}

@@ -273,8 +275,8 @@ static int mana_set_rxfh(struct net_device *ndev,
{
	struct mana_port_context *apc = netdev_priv(ndev);
	bool update_hash = false, update_table = false;
	u32 save_table[MANA_INDIRECT_TABLE_SIZE];
	u8 save_key[MANA_HASH_KEY_SIZE];
	u32 *save_table;
	int i, err;

	if (!apc->port_is_up)
@@ -284,13 +286,19 @@ static int mana_set_rxfh(struct net_device *ndev,
	    rxfh->hfunc != ETH_RSS_HASH_TOP)
		return -EOPNOTSUPP;

	save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL);
	if (!save_table)
		return -ENOMEM;

	if (rxfh->indir) {
		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
			if (rxfh->indir[i] >= apc->num_queues)
				return -EINVAL;
		for (i = 0; i < apc->indir_table_sz; i++)
			if (rxfh->indir[i] >= apc->num_queues) {
				err = -EINVAL;
				goto cleanup;
			}

		update_table = true;
		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
		for (i = 0; i < apc->indir_table_sz; i++) {
			save_table[i] = apc->indir_table[i];
			apc->indir_table[i] = rxfh->indir[i];
		}
@@ -306,7 +314,7 @@ static int mana_set_rxfh(struct net_device *ndev,

	if (err) { /* recover to original values */
		if (update_table) {
			for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
			for (i = 0; i < apc->indir_table_sz; i++)
				apc->indir_table[i] = save_table[i];
		}

@@ -316,6 +324,9 @@ static int mana_set_rxfh(struct net_device *ndev,
		mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
	}

cleanup:
	kfree(save_table);

	return err;
}

+3 −1
Original line number Diff line number Diff line
@@ -543,11 +543,13 @@ enum {
 */
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)

#define GDMA_DRV_CAP_FLAGS1 \
	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
	 GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT)

#define GDMA_DRV_CAP_FLAGS2 0

+5 −4
Original line number Diff line number Diff line
@@ -30,8 +30,8 @@ enum TRI_STATE {
};

/* Number of entries for hardware indirection table must be in power of 2 */
#define MANA_INDIRECT_TABLE_SIZE 64
#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
#define MANA_INDIRECT_TABLE_MAX_SIZE 512
#define MANA_INDIRECT_TABLE_DEF_SIZE 64

/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
#define MANA_HASH_KEY_SIZE 40
@@ -410,10 +410,11 @@ struct mana_port_context {
	struct mana_tx_qp *tx_qp;

	/* Indirection Table for RX & TX. The values are queue indexes */
	u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
	u32 *indir_table;
	u32 indir_table_sz;

	/* Indirection table containing RxObject Handles */
	mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
	mana_handle_t *rxobj_table;

	/*  Hash key used by the NIC */
	u8 hashkey[MANA_HASH_KEY_SIZE];