Commit d9023e46 authored by Junxian Huang's avatar Junxian Huang Committed by Leon Romanovsky
Browse files

RDMA/hns: Implement bonding init/uninit process



Implement hns_roce_slave_init() and hns_roce_slave_uninit() for device
init/uninit in bonding cases. The former is used to initialize a slave
ibdev (when the slave is unlinked from a bond) or a bond ibdev, while
the latter does the opposite. Most of the process is the same as
regular device init/uninit, while some bonding‑specific steps below are
also added.

In bond device init flow, choose one slave to re-initialize as the
main_hr_dev of the bond, and it will be the only device presented for
multiple slaves. During registration, set and active netdev to the
ibdev based on the link state of the slaves. When this main_hr_dev
slave is being unlinked while the bond is still valid, choose a new
slave from the rest and initialize it as the new bond device.

In uninit flow, add a bond cleanup process, restore all the other
slaves and clean up bond resource. This is only for the case where
the port of main_hr_dev is directly removed without unlinking it
from bond.

Signed-off-by: default avatarJunxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20251112093510.3696363-6-huangjunxian6@hisilicon.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 14f0455e
Loading
Loading
Loading
Loading
+178 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
 * Copyright (c) 2025 Hisilicon Limited.
 */

#include <net/bonding.h>
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
#include "hns_roce_bond.h"
@@ -74,6 +75,143 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
	return NULL;
}

static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp,
				    struct hns_roce_dev *hr_dev)
{
	struct net_device *active_dev;
	struct net_device *old_dev;
	int i, ret = 0;

	if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
		rcu_read_lock();
		active_dev =
			bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev));
		rcu_read_unlock();
	} else {
		for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
			active_dev = bond_grp->bond_func_info[i].net_dev;
			if (active_dev &&
			    ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE)
				break;
		}
	}

	if (!active_dev || i == ROCE_BOND_FUNC_MAX)
		active_dev = get_hr_netdev(hr_dev, 0);

	old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1);
	if (old_dev == active_dev)
		goto out;

	ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1);
	if (ret) {
		dev_err(hr_dev->dev, "failed to set netdev for bond.\n");
		goto out;
	}

	if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
		if (old_dev)
			roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev);
		rdma_roce_rescan_port(&hr_dev->ib_dev, 1);
	}
out:
	dev_put(old_dev);
	return ret;
}

bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev)
{
	struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
	struct hns_roce_bond_group *bond_grp;
	u8 bus_num = get_hr_bus_num(hr_dev);

	bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
	if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED &&
	    bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
		return true;

	return false;
}

static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp,
				  u8 func_idx)
{
	struct hnae3_handle *handle;

	handle = bond_grp->bond_func_info[func_idx].handle;
	if (handle->priv)
		hns_roce_bond_uninit_client(bond_grp, func_idx);
}

static struct hns_roce_dev
	*hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
			     u8 func_idx, bool need_switch);

static int switch_main_dev(struct hns_roce_bond_group *bond_grp,
			   u8 main_func_idx)
{
	struct hns_roce_dev *hr_dev;
	struct net_device *net_dev;
	u8 i;

	bond_grp->main_hr_dev = NULL;
	hns_roce_bond_uninit_client(bond_grp, main_func_idx);

	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
		net_dev = bond_grp->bond_func_info[i].net_dev;
		if ((bond_grp->slave_map & (1U << i)) && net_dev) {
			/* In case this slave is still being registered as
			 * a non-bonded PF, uninit it first and then re-init
			 * it as the main device.
			 */
			hns_roce_slave_uninit(bond_grp, i);
			hr_dev = hns_roce_slave_init(bond_grp, i, false);
			if (hr_dev) {
				bond_grp->main_hr_dev = hr_dev;
				break;
			}
		}
	}

	if (!bond_grp->main_hr_dev)
		return -ENODEV;

	return 0;
}

static struct hns_roce_dev
	*hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
			     u8 func_idx, bool need_switch)
{
	struct hns_roce_dev *hr_dev = NULL;
	struct hnae3_handle *handle;
	u8 main_func_idx;
	int ret;

	if (need_switch) {
		main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
		if (func_idx == main_func_idx) {
			ret = switch_main_dev(bond_grp, main_func_idx);
			if (ret == -ENODEV)
				return NULL;
		}
	}

	handle = bond_grp->bond_func_info[func_idx].handle;
	if (handle) {
		if (handle->priv)
			return handle->priv;
		/* Prevent this device from being initialized as a bond device */
		if (need_switch)
			bond_grp->bond_func_info[func_idx].net_dev = NULL;
		hr_dev = hns_roce_bond_init_client(bond_grp, func_idx);
		if (!hr_dev)
			BOND_ERR_LOG("failed to init slave %u.\n", func_idx);
	}

	return hr_dev;
}

static struct hns_roce_die_info *alloc_die_info(int bus_num)
{
	struct hns_roce_die_info *die_info;
@@ -204,6 +342,35 @@ static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp,
	bond_grp->bond_ready = false;
}

static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp)
{
	mutex_lock(&bond_grp->bond_mutex);

	bond_grp->upper_dev = NULL;
	bond_grp->main_hr_dev = NULL;
	bond_grp->bond_ready = false;
	bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
	bond_grp->slave_map = 0;
	memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info));

	mutex_unlock(&bond_grp->bond_mutex);
}

void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp)
{
	int ret;

	ret = bond_grp->main_hr_dev ?
	      hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO;
	if (ret)
		BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
	else
		ibdev_info(&bond_grp->main_hr_dev->ib_dev,
			   "RoCE clear bond finished!\n");

	hns_roce_detach_bond_grp(bond_grp);
}

static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp,
				    struct net_device *net_dev)
{
@@ -504,3 +671,14 @@ void hns_roce_dealloc_bond_grp(void)
		}
	}
}

int hns_roce_bond_init(struct hns_roce_dev *hr_dev)
{
	struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
	struct hns_roce_bond_group *bond_grp;
	u8 bus_num = get_hr_bus_num(hr_dev);

	bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);

	return hns_roce_set_bond_netdev(bond_grp, hr_dev);
}
+6 −0
Original line number Diff line number Diff line
@@ -14,6 +14,9 @@

#define BOND_ID(id) BIT(id)

#define BOND_ERR_LOG(fmt, ...)				\
	pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__)

enum {
	BOND_MODE_1,
	BOND_MODE_2_4,
@@ -80,5 +83,8 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
						  u8 bus_num);
int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev);
void hns_roce_dealloc_bond_grp(void);
void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp);
bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev);
int hns_roce_bond_init(struct hns_roce_dev *hr_dev);

#endif
+2 −1
Original line number Diff line number Diff line
@@ -179,6 +179,7 @@ enum hns_roce_instance_state {
	HNS_ROCE_STATE_INIT,
	HNS_ROCE_STATE_INITED,
	HNS_ROCE_STATE_UNINIT,
	HNS_ROCE_STATE_BOND_UNINIT,
};

enum {
@@ -1304,7 +1305,7 @@ void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup);
int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq);
int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq);
int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+37 −4
Original line number Diff line number Diff line
@@ -7141,7 +7141,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
}

static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
					   bool reset)
					   bool reset, bool bond_cleanup)
{
	struct hns_roce_dev *hr_dev = handle->priv;

@@ -7153,7 +7153,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
	hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
	hns_roce_handle_device_err(hr_dev);

	hns_roce_exit(hr_dev);
	hns_roce_exit(hr_dev, bond_cleanup);
	kfree(hr_dev->priv);
	ib_dealloc_device(&hr_dev->ib_dev);
}
@@ -7209,7 +7209,40 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,

	handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT;

	__hns_roce_hw_v2_uninit_instance(handle, reset);
	__hns_roce_hw_v2_uninit_instance(handle, reset, true);

	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}

struct hns_roce_dev
	*hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
				   int func_idx)
{
	struct hnae3_handle *handle;
	int ret;

	handle = bond_grp->bond_func_info[func_idx].handle;
	if (!handle || !handle->client)
		return NULL;

	ret = hns_roce_hw_v2_init_instance(handle);
	if (ret)
		return NULL;

	return handle->priv;
}

void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
				 int func_idx)
{
	struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle;

	if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
		return;

	handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;

	__hns_roce_hw_v2_uninit_instance(handle, false, false);

	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
@@ -7278,7 +7311,7 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
	handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
	dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
	msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
	__hns_roce_hw_v2_uninit_instance(handle, false);
	__hns_roce_hw_v2_uninit_instance(handle, false, false);

	return 0;
}
+5 −0
Original line number Diff line number Diff line
@@ -1478,6 +1478,11 @@ struct hns_roce_bond_info {
	__le32 hash_policy;
};

struct hns_roce_dev
	*hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
				   int func_idx);
void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
				 int func_idx);
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
		      enum hns_roce_bond_cmd_type bond_type);
Loading