Commit 197c2974 authored by Steffen Klassert's avatar Steffen Klassert
Browse files

Merge branch 'xfrm & bonding: Correct use of xso.real_dev'



Cosmin Ratiu says:

====================
This patch series was motivated by fixing a few bugs in the bonding
driver related to xfrm state migration on device failover.

struct xfrm_dev_offload has two net_device pointers: dev and real_dev.
The first one is the device the xfrm_state is offloaded on and the
second one is used by the bonding driver to manage the underlying device
xfrm_states are actually offloaded on. When bonding isn't used, the two
pointers are the same.

This causes confusion in drivers: Which device pointer should they use?
If they want to support bonding, they need to only use real_dev and
never look at dev.

Furthermore, real_dev is used without proper locking from multiple code
paths and changing it is dangerous. See commit [1] for example.

This patch series clears things out by removing all uses of real_dev
from outside the bonding driver.
Then, the bonding driver is refactored to fix a couple of long standing
races and the original bug which motivated this patch series.

[1] commit f8cde980 ("bonding: fix xfrm real_dev null pointer
dereference")

v2 -> v3:
Added a comment with locking expectations for real_dev.
Removed unnecessary bond variable from bond_ipsec_del_sa().
v1 -> v2:
Added missing kdoc for various functions.
Made bond_ipsec_del_sa() use xso.real_dev instead of curr_active_slave.
====================

Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parents 20eb35da d2fddbd3
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -65,9 +65,13 @@ Callbacks to implement
  /* from include/linux/netdevice.h */
  struct xfrmdev_ops {
        /* Crypto and Packet offload callbacks */
	int	(*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack);
	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
	void	(*xdo_dev_state_free) (struct xfrm_state *x);
	int	(*xdo_dev_state_add)(struct net_device *dev,
                                     struct xfrm_state *x,
                                     struct netlink_ext_ack *extack);
	void	(*xdo_dev_state_delete)(struct net_device *dev,
                                        struct xfrm_state *x);
	void	(*xdo_dev_state_free)(struct net_device *dev,
                                      struct xfrm_state *x);
	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
				       struct xfrm_state *x);
	void    (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+58 −61
Original line number Diff line number Diff line
@@ -453,13 +453,14 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs)

/**
 * bond_ipsec_add_sa - program device with a security association
 * @bond_dev: pointer to the bond net device
 * @xs: pointer to transformer state struct
 * @extack: extack point to fill failure reason
 **/
static int bond_ipsec_add_sa(struct xfrm_state *xs,
static int bond_ipsec_add_sa(struct net_device *bond_dev,
			     struct xfrm_state *xs,
			     struct netlink_ext_ack *extack)
{
	struct net_device *bond_dev = xs->xso.dev;
	struct net_device *real_dev;
	netdevice_tracker tracker;
	struct bond_ipsec *ipsec;
@@ -495,9 +496,9 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs,
		goto out;
	}

	xs->xso.real_dev = real_dev;
	err = real_dev->xfrmdev_ops->xdo_dev_state_add(xs, extack);
	err = real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, xs, extack);
	if (!err) {
		xs->xso.real_dev = real_dev;
		ipsec->xs = xs;
		INIT_LIST_HEAD(&ipsec->list);
		mutex_lock(&bond->ipsec_lock);
@@ -539,11 +540,25 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
		if (ipsec->xs->xso.real_dev == real_dev)
			continue;

		ipsec->xs->xso.real_dev = real_dev;
		if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
		if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev,
							     ipsec->xs, NULL)) {
			slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
			ipsec->xs->xso.real_dev = NULL;
			continue;
		}

		spin_lock_bh(&ipsec->xs->lock);
		/* xs might have been killed by the user during the migration
		 * to the new dev, but bond_ipsec_del_sa() should have done
		 * nothing, as xso.real_dev is NULL.
		 * Delete it from the device we just added it to. The pending
		 * bond_ipsec_free_sa() call will do the rest of the cleanup.
		 */
		if (ipsec->xs->km.state == XFRM_STATE_DEAD &&
		    real_dev->xfrmdev_ops->xdo_dev_state_delete)
			real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
								    ipsec->xs);
		ipsec->xs->xso.real_dev = real_dev;
		spin_unlock_bh(&ipsec->xs->lock);
	}
out:
	mutex_unlock(&bond->ipsec_lock);
@@ -551,54 +566,27 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)

/**
 * bond_ipsec_del_sa - clear out this specific SA
 * @bond_dev: pointer to the bond net device
 * @xs: pointer to transformer state struct
 **/
static void bond_ipsec_del_sa(struct xfrm_state *xs)
static void bond_ipsec_del_sa(struct net_device *bond_dev,
			      struct xfrm_state *xs)
{
	struct net_device *bond_dev = xs->xso.dev;
	struct net_device *real_dev;
	netdevice_tracker tracker;
	struct bond_ipsec *ipsec;
	struct bonding *bond;
	struct slave *slave;

	if (!bond_dev)
	if (!bond_dev || !xs->xso.real_dev)
		return;

	rcu_read_lock();
	bond = netdev_priv(bond_dev);
	slave = rcu_dereference(bond->curr_active_slave);
	real_dev = slave ? slave->dev : NULL;
	netdev_hold(real_dev, &tracker, GFP_ATOMIC);
	rcu_read_unlock();

	if (!slave)
		goto out;

	if (!xs->xso.real_dev)
		goto out;

	WARN_ON(xs->xso.real_dev != real_dev);
	real_dev = xs->xso.real_dev;

	if (!real_dev->xfrmdev_ops ||
	    !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
	    netif_is_bond_master(real_dev)) {
		slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__);
		goto out;
		return;
	}

	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
out:
	netdev_put(real_dev, &tracker);
	mutex_lock(&bond->ipsec_lock);
	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
		if (ipsec->xs == xs) {
			list_del(&ipsec->list);
			kfree(ipsec);
			break;
		}
	}
	mutex_unlock(&bond->ipsec_lock);
	real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs);
}

static void bond_ipsec_del_sa_all(struct bonding *bond)
@@ -624,46 +612,55 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
			slave_warn(bond_dev, real_dev,
				   "%s: no slave xdo_dev_state_delete\n",
				   __func__);
		} else {
			real_dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs);
			if (real_dev->xfrmdev_ops->xdo_dev_state_free)
				real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
			continue;
		}

		spin_lock_bh(&ipsec->xs->lock);
		ipsec->xs->xso.real_dev = NULL;
		/* Don't double delete states killed by the user. */
		if (ipsec->xs->km.state != XFRM_STATE_DEAD)
			real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
								    ipsec->xs);
		spin_unlock_bh(&ipsec->xs->lock);

		if (real_dev->xfrmdev_ops->xdo_dev_state_free)
			real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev,
								  ipsec->xs);
	}
	mutex_unlock(&bond->ipsec_lock);
}

static void bond_ipsec_free_sa(struct xfrm_state *xs)
static void bond_ipsec_free_sa(struct net_device *bond_dev,
			       struct xfrm_state *xs)
{
	struct net_device *bond_dev = xs->xso.dev;
	struct net_device *real_dev;
	netdevice_tracker tracker;
	struct bond_ipsec *ipsec;
	struct bonding *bond;
	struct slave *slave;

	if (!bond_dev)
		return;

	rcu_read_lock();
	bond = netdev_priv(bond_dev);
	slave = rcu_dereference(bond->curr_active_slave);
	real_dev = slave ? slave->dev : NULL;
	netdev_hold(real_dev, &tracker, GFP_ATOMIC);
	rcu_read_unlock();

	if (!slave)
		goto out;

	mutex_lock(&bond->ipsec_lock);
	if (!xs->xso.real_dev)
		goto out;

	WARN_ON(xs->xso.real_dev != real_dev);
	real_dev = xs->xso.real_dev;

	if (real_dev && real_dev->xfrmdev_ops &&
	xs->xso.real_dev = NULL;
	if (real_dev->xfrmdev_ops &&
	    real_dev->xfrmdev_ops->xdo_dev_state_free)
		real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
		real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs);
out:
	netdev_put(real_dev, &tracker);
	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
		if (ipsec->xs == xs) {
			list_del(&ipsec->list);
			kfree(ipsec);
			break;
		}
	}
	mutex_unlock(&bond->ipsec_lock);
}

/**
+11 −9
Original line number Diff line number Diff line
@@ -6480,10 +6480,11 @@ static const struct tlsdev_ops cxgb4_ktls_ops = {

#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)

static int cxgb4_xfrm_add_state(struct xfrm_state *x,
static int cxgb4_xfrm_add_state(struct net_device *dev,
				struct xfrm_state *x,
				struct netlink_ext_ack *extack)
{
	struct adapter *adap = netdev2adap(x->xso.dev);
	struct adapter *adap = netdev2adap(dev);
	int ret;

	if (!mutex_trylock(&uld_mutex)) {
@@ -6494,7 +6495,8 @@ static int cxgb4_xfrm_add_state(struct xfrm_state *x,
	if (ret)
		goto out_unlock;

	ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(x, extack);
	ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(dev, x,
									extack);

out_unlock:
	mutex_unlock(&uld_mutex);
@@ -6502,9 +6504,9 @@ static int cxgb4_xfrm_add_state(struct xfrm_state *x,
	return ret;
}

static void cxgb4_xfrm_del_state(struct xfrm_state *x)
static void cxgb4_xfrm_del_state(struct net_device *dev, struct xfrm_state *x)
{
	struct adapter *adap = netdev2adap(x->xso.dev);
	struct adapter *adap = netdev2adap(dev);

	if (!mutex_trylock(&uld_mutex)) {
		dev_dbg(adap->pdev_dev,
@@ -6514,15 +6516,15 @@ static void cxgb4_xfrm_del_state(struct xfrm_state *x)
	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
		goto out_unlock;

	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(x);
	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(dev, x);

out_unlock:
	mutex_unlock(&uld_mutex);
}

static void cxgb4_xfrm_free_state(struct xfrm_state *x)
static void cxgb4_xfrm_free_state(struct net_device *dev, struct xfrm_state *x)
{
	struct adapter *adap = netdev2adap(x->xso.dev);
	struct adapter *adap = netdev2adap(dev);

	if (!mutex_trylock(&uld_mutex)) {
		dev_dbg(adap->pdev_dev,
@@ -6532,7 +6534,7 @@ static void cxgb4_xfrm_free_state(struct xfrm_state *x)
	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
		goto out_unlock;

	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(x);
	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(dev, x);

out_unlock:
	mutex_unlock(&uld_mutex);
+12 −6
Original line number Diff line number Diff line
@@ -75,9 +75,12 @@ static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state);
static int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop);
static void ch_ipsec_advance_esn_state(struct xfrm_state *x);
static void ch_ipsec_xfrm_free_state(struct xfrm_state *x);
static void ch_ipsec_xfrm_del_state(struct xfrm_state *x);
static int ch_ipsec_xfrm_add_state(struct xfrm_state *x,
static void ch_ipsec_xfrm_free_state(struct net_device *dev,
				     struct xfrm_state *x);
static void ch_ipsec_xfrm_del_state(struct net_device *dev,
				    struct xfrm_state *x);
static int ch_ipsec_xfrm_add_state(struct net_device *dev,
				   struct xfrm_state *x,
				   struct netlink_ext_ack *extack);

static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = {
@@ -223,7 +226,8 @@ static int ch_ipsec_setkey(struct xfrm_state *x,
 * returns 0 on success, negative error if failed to send message to FPGA
 * positive error if FPGA returned a bad response
 */
static int ch_ipsec_xfrm_add_state(struct xfrm_state *x,
static int ch_ipsec_xfrm_add_state(struct net_device *dev,
				   struct xfrm_state *x,
				   struct netlink_ext_ack *extack)
{
	struct ipsec_sa_entry *sa_entry;
@@ -302,14 +306,16 @@ static int ch_ipsec_xfrm_add_state(struct xfrm_state *x,
	return res;
}

static void ch_ipsec_xfrm_del_state(struct xfrm_state *x)
static void ch_ipsec_xfrm_del_state(struct net_device *dev,
				    struct xfrm_state *x)
{
	/* do nothing */
	if (!x->xso.offload_handle)
		return;
}

static void ch_ipsec_xfrm_free_state(struct xfrm_state *x)
static void ch_ipsec_xfrm_free_state(struct net_device *dev,
				     struct xfrm_state *x)
{
	struct ipsec_sa_entry *sa_entry;

+23 −18
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@
#define IXGBE_IPSEC_KEY_BITS  160
static const char aes_gcm_name[] = "rfc4106(gcm(aes))";

static void ixgbe_ipsec_del_sa(struct xfrm_state *xs);
static void ixgbe_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs);

/**
 * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
@@ -321,7 +321,7 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)

		if (r->used) {
			if (r->mode & IXGBE_RXTXMOD_VF)
				ixgbe_ipsec_del_sa(r->xs);
				ixgbe_ipsec_del_sa(adapter->netdev, r->xs);
			else
				ixgbe_ipsec_set_rx_sa(hw, i, r->xs->id.spi,
						      r->key, r->salt,
@@ -330,7 +330,7 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)

		if (t->used) {
			if (t->mode & IXGBE_RXTXMOD_VF)
				ixgbe_ipsec_del_sa(t->xs);
				ixgbe_ipsec_del_sa(adapter->netdev, t->xs);
			else
				ixgbe_ipsec_set_tx_sa(hw, i, t->key, t->salt);
		}
@@ -417,6 +417,7 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,

/**
 * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol
 * @dev: pointer to net device
 * @xs: pointer to xfrm_state struct
 * @mykey: pointer to key array to populate
 * @mysalt: pointer to salt value to populate
@@ -424,10 +425,10 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
 * This copies the protocol keys and salt to our own data tables.  The
 * 82599 family only supports the one algorithm.
 **/
static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
static int ixgbe_ipsec_parse_proto_keys(struct net_device *dev,
					struct xfrm_state *xs,
					u32 *mykey, u32 *mysalt)
{
	struct net_device *dev = xs->xso.real_dev;
	unsigned char *key_data;
	char *alg_name = NULL;
	int key_len;
@@ -473,11 +474,12 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,

/**
 * ixgbe_ipsec_check_mgmt_ip - make sure there is no clash with mgmt IP filters
 * @dev: pointer to net device
 * @xs: pointer to transformer state struct
 **/
static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)
static int ixgbe_ipsec_check_mgmt_ip(struct net_device *dev,
				     struct xfrm_state *xs)
{
	struct net_device *dev = xs->xso.real_dev;
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct ixgbe_hw *hw = &adapter->hw;
	u32 mfval, manc, reg;
@@ -556,13 +558,14 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)

/**
 * ixgbe_ipsec_add_sa - program device with a security association
 * @dev: pointer to device to program
 * @xs: pointer to transformer state struct
 * @extack: extack point to fill failure reason
 **/
static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
static int ixgbe_ipsec_add_sa(struct net_device *dev,
			      struct xfrm_state *xs,
			      struct netlink_ext_ack *extack)
{
	struct net_device *dev = xs->xso.real_dev;
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct ixgbe_ipsec *ipsec = adapter->ipsec;
	struct ixgbe_hw *hw = &adapter->hw;
@@ -581,7 +584,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
		return -EINVAL;
	}

	if (ixgbe_ipsec_check_mgmt_ip(xs)) {
	if (ixgbe_ipsec_check_mgmt_ip(dev, xs)) {
		NL_SET_ERR_MSG_MOD(extack, "IPsec IP addr clash with mgmt filters");
		return -EINVAL;
	}
@@ -615,7 +618,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
			rsa.decrypt = xs->ealg || xs->aead;

		/* get the key and salt */
		ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
		ret = ixgbe_ipsec_parse_proto_keys(dev, xs, rsa.key, &rsa.salt);
		if (ret) {
			NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table");
			return ret;
@@ -724,7 +727,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
		if (xs->id.proto & IPPROTO_ESP)
			tsa.encrypt = xs->ealg || xs->aead;

		ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
		ret = ixgbe_ipsec_parse_proto_keys(dev, xs, tsa.key, &tsa.salt);
		if (ret) {
			NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table");
			memset(&tsa, 0, sizeof(tsa));
@@ -752,11 +755,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,

/**
 * ixgbe_ipsec_del_sa - clear out this specific SA
 * @dev: pointer to device to program
 * @xs: pointer to transformer state struct
 **/
static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
static void ixgbe_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs)
{
	struct net_device *dev = xs->xso.real_dev;
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct ixgbe_ipsec *ipsec = adapter->ipsec;
	struct ixgbe_hw *hw = &adapter->hw;
@@ -841,7 +844,8 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
			continue;
		if (ipsec->rx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
		    ipsec->rx_tbl[i].vf == vf)
			ixgbe_ipsec_del_sa(ipsec->rx_tbl[i].xs);
			ixgbe_ipsec_del_sa(adapter->netdev,
					   ipsec->rx_tbl[i].xs);
	}

	/* search tx sa table */
@@ -850,7 +854,8 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
			continue;
		if (ipsec->tx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
		    ipsec->tx_tbl[i].vf == vf)
			ixgbe_ipsec_del_sa(ipsec->tx_tbl[i].xs);
			ixgbe_ipsec_del_sa(adapter->netdev,
					   ipsec->tx_tbl[i].xs);
	}
}

@@ -930,7 +935,7 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
	memcpy(xs->aead->alg_name, aes_gcm_name, sizeof(aes_gcm_name));

	/* set up the HW offload */
	err = ixgbe_ipsec_add_sa(xs, NULL);
	err = ixgbe_ipsec_add_sa(adapter->netdev, xs, NULL);
	if (err)
		goto err_aead;

@@ -1034,7 +1039,7 @@ int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
		xs = ipsec->tx_tbl[sa_idx].xs;
	}

	ixgbe_ipsec_del_sa(xs);
	ixgbe_ipsec_del_sa(adapter->netdev, xs);

	/* remove the xs that was made-up in the add request */
	kfree_sensitive(xs);
Loading