Commit d7a39d39 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bridge-mdb-bulk-delete'

Ido Schimmel says:

====================
Add MDB bulk deletion support

This patchset adds MDB bulk deletion support, allowing user space to
request the deletion of matching entries instead of dumping the entire
MDB and issuing a separate deletion request for each matching entry.
Support is added in both the bridge and VXLAN drivers in a similar
fashion to the existing FDB bulk deletion support.

The parameters according to which bulk deletion can be performed are
similar to the FDB ones, namely: Destination port, VLAN ID, state (e.g.,
"permanent"), routing protocol, source / destination VNI, destination IP
and UDP port. Flushing based on flags (e.g., "offload", "fast_leave",
"added_by_star_ex", "blocked") is not currently supported, but can be
added in the future, if a use case arises.

Patch #1 adds a new uAPI attribute to allow specifying the state mask
according to which bulk deletion will be performed, if any.

Patch #2 adds a new policy according to which bulk deletion requests
(with 'NLM_F_BULK' flag set) will be parsed.

Patches #3-#4 add a new NDO for MDB bulk deletion and invoke it from the
rtnetlink code when a bulk deletion request is made.

Patches #5-#6 implement the MDB bulk deletion NDO in the bridge and
VXLAN drivers, respectively.

Patch #7 allows user space to issue MDB bulk deletion requests by no
longer rejecting the 'NLM_F_BULK' flag when it is set in 'RTM_DELMDB'
requests.

Patches #8-#9 add selftests for both drivers, for both good and bad
flows.

iproute2 changes can be found here [1].

https://github.com/idosch/iproute2/tree/submit/mdb_flush_v1


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b6895d0a c3e87a7f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3235,6 +3235,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
	.ndo_fdb_get		= vxlan_fdb_get,
	.ndo_mdb_add		= vxlan_mdb_add,
	.ndo_mdb_del		= vxlan_mdb_del,
	.ndo_mdb_del_bulk	= vxlan_mdb_del_bulk,
	.ndo_mdb_dump		= vxlan_mdb_dump,
	.ndo_mdb_get		= vxlan_mdb_get,
	.ndo_fill_metadata_dst	= vxlan_fill_metadata_dst,
+150 −24
Original line number Diff line number Diff line
@@ -74,6 +74,14 @@ struct vxlan_mdb_config {
	u8 rt_protocol;
};

struct vxlan_mdb_flush_desc {
	union vxlan_addr remote_ip;
	__be32 src_vni;
	__be32 remote_vni;
	__be16 remote_port;
	u8 rt_protocol;
};

static const struct rhashtable_params vxlan_mdb_rht_params = {
	.head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
	.key_offset = offsetof(struct vxlan_mdb_entry, key),
@@ -1306,6 +1314,145 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
	return err;
}

static const struct nla_policy
vxlan_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
	[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
	[MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
					   sizeof(struct in_addr),
					   sizeof(struct in6_addr)),
	[MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
	[MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
	[MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
	[MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
};

static int vxlan_mdb_flush_desc_init(struct vxlan_dev *vxlan,
				     struct vxlan_mdb_flush_desc *desc,
				     struct nlattr *tb[],
				     struct netlink_ext_ack *extack)
{
	struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
	struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
	int err;

	if (entry->ifindex && entry->ifindex != vxlan->dev->ifindex) {
		NL_SET_ERR_MSG_MOD(extack, "Invalid port net device");
		return -EINVAL;
	}

	if (entry->vid) {
		NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
		return -EINVAL;
	}

	if (!tb[MDBA_SET_ENTRY_ATTRS])
		return 0;

	err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
			       tb[MDBA_SET_ENTRY_ATTRS],
			       vxlan_mdbe_attrs_del_bulk_pol, extack);
	if (err)
		return err;

	if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) {
		u8 state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);

		if ((state_mask & MDB_PERMANENT) && !(entry->state & MDB_PERMANENT)) {
			NL_SET_ERR_MSG_MOD(extack, "Only permanent MDB entries are supported");
			return -EINVAL;
		}
	}

	if (mdbe_attrs[MDBE_ATTR_RTPROT])
		desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);

	if (mdbe_attrs[MDBE_ATTR_DST])
		vxlan_nla_get_addr(&desc->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);

	if (mdbe_attrs[MDBE_ATTR_DST_PORT])
		desc->remote_port =
			cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));

	if (mdbe_attrs[MDBE_ATTR_VNI])
		desc->remote_vni =
			cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));

	if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
		desc->src_vni =
			cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));

	return 0;
}

static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
				    struct vxlan_mdb_entry *mdb_entry,
				    const struct vxlan_mdb_flush_desc *desc)
{
	struct vxlan_mdb_remote *remote, *tmp;

	list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) {
		struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
		__be32 remote_vni;

		if (desc->remote_ip.sa.sa_family &&
		    !vxlan_addr_equal(&desc->remote_ip, &rd->remote_ip))
			continue;

		/* Encapsulation is performed with source VNI if remote VNI
		 * is not set.
		 */
		remote_vni = rd->remote_vni ? : mdb_entry->key.vni;
		if (desc->remote_vni && desc->remote_vni != remote_vni)
			continue;

		if (desc->remote_port && desc->remote_port != rd->remote_port)
			continue;

		if (desc->rt_protocol &&
		    desc->rt_protocol != remote->rt_protocol)
			continue;

		vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
	}
}

static void vxlan_mdb_flush(struct vxlan_dev *vxlan,
			    const struct vxlan_mdb_flush_desc *desc)
{
	struct vxlan_mdb_entry *mdb_entry;
	struct hlist_node *tmp;

	/* The removal of an entry cannot trigger the removal of another entry
	 * since entries are always added to the head of the list.
	 */
	hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
		if (desc->src_vni && desc->src_vni != mdb_entry->key.vni)
			continue;

		vxlan_mdb_remotes_flush(vxlan, mdb_entry, desc);
		/* Entry will only be removed if its remotes list is empty. */
		vxlan_mdb_entry_put(vxlan, mdb_entry);
	}
}

int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
		       struct netlink_ext_ack *extack)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_mdb_flush_desc desc = {};
	int err;

	ASSERT_RTNL();

	err = vxlan_mdb_flush_desc_init(vxlan, &desc, tb, extack);
	if (err)
		return err;

	vxlan_mdb_flush(vxlan, &desc);

	return 0;
}

static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
	[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
					      sizeof(struct in_addr),
@@ -1575,29 +1722,6 @@ static void vxlan_mdb_check_empty(void *ptr, void *arg)
	WARN_ON_ONCE(1);
}

static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
				    struct vxlan_mdb_entry *mdb_entry)
{
	struct vxlan_mdb_remote *remote, *tmp;

	list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
		vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
}

static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
{
	struct vxlan_mdb_entry *mdb_entry;
	struct hlist_node *tmp;

	/* The removal of an entry cannot trigger the removal of another entry
	 * since entries are always added to the head of the list.
	 */
	hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
		vxlan_mdb_remotes_flush(vxlan, mdb_entry);
		vxlan_mdb_entry_put(vxlan, mdb_entry);
	}
}

int vxlan_mdb_init(struct vxlan_dev *vxlan)
{
	int err;
@@ -1613,7 +1737,9 @@ int vxlan_mdb_init(struct vxlan_dev *vxlan)

void vxlan_mdb_fini(struct vxlan_dev *vxlan)
{
	vxlan_mdb_entries_flush(vxlan);
	struct vxlan_mdb_flush_desc desc = {};

	vxlan_mdb_flush(vxlan, &desc);
	WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
	rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,
				    NULL);
+2 −0
Original line number Diff line number Diff line
@@ -235,6 +235,8 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
		  struct netlink_ext_ack *extack);
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
		  struct netlink_ext_ack *extack);
int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
		       struct netlink_ext_ack *extack);
int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid,
		  u32 seq, struct netlink_ext_ack *extack);
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
+6 −0
Original line number Diff line number Diff line
@@ -1329,6 +1329,9 @@ struct netdev_net_notifier {
 * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[],
 *		      struct netlink_ext_ack *extack);
 *	Deletes the MDB entry from dev.
 * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[],
 *			   struct netlink_ext_ack *extack);
 *	Bulk deletes MDB entries from dev.
 * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb,
 *		       struct netlink_callback *cb);
 *	Dumps MDB entries from dev. The first argument (marker) in the netlink
@@ -1611,6 +1614,9 @@ struct net_device_ops {
	int			(*ndo_mdb_del)(struct net_device *dev,
					       struct nlattr *tb[],
					       struct netlink_ext_ack *extack);
	int			(*ndo_mdb_del_bulk)(struct net_device *dev,
						    struct nlattr *tb[],
						    struct netlink_ext_ack *extack);
	int			(*ndo_mdb_dump)(struct net_device *dev,
						struct sk_buff *skb,
						struct netlink_callback *cb);
+1 −0
Original line number Diff line number Diff line
@@ -757,6 +757,7 @@ enum {
	MDBE_ATTR_VNI,
	MDBE_ATTR_IFINDEX,
	MDBE_ATTR_SRC_VNI,
	MDBE_ATTR_STATE_MASK,
	__MDBE_ATTR_MAX,
};
#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
Loading