Commit d92589f8 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

Patch #1 fixes insufficient sanitization of netlink attributes for the
	 inner expression which can trigger nul-pointer dereference,
	 from Davide Ornaghi.

Patch #2 address a report that there is a race condition between
         namespace cleanup and the garbage collection of the list:set
         type. This patch resolves this issue with other minor issues
	 as well, from Jozsef Kadlecsik.

Patch #3 ip6_route_me_harder() ignores flowlabel/dsfield when ip dscp
	 has been mangled, this unbreaks ip6 dscp set $v,
	 from Florian Westphal.

All of these patches address issues that are present in several releases.

* tag 'nf-24-06-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: Use flowlabel flow key when re-routing mangled packets
  netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type
  netfilter: nft_inner: validate mandatory meta and payload
====================

Link: https://lore.kernel.org/r/20240611220323.413713-1-pablo@netfilter.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents be27b896 6f8f132c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
		.flowi6_uid = sock_net_uid(net, sk),
		.daddr = iph->daddr,
		.saddr = iph->saddr,
		.flowlabel = ip6_flowinfo(iph),
	};
	int err;

+46 −35
Original line number Diff line number Diff line
@@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
				    .len = IPSET_MAXNAMELEN - 1 },
};

/* In order to return quickly when destroying a single set, it is split
 * into two stages:
 * - Cancel garbage collector
 * - Destroy the set itself via call_rcu()
 */

static void
ip_set_destroy_set(struct ip_set *set)
ip_set_destroy_set_rcu(struct rcu_head *head)
{
	pr_debug("set: %s\n",  set->name);
	struct ip_set *set = container_of(head, struct ip_set, rcu);

	/* Must call it without holding any lock */
	set->variant->destroy(set);
	module_put(set->type->me);
	kfree(set);
}

static void
ip_set_destroy_set_rcu(struct rcu_head *head)
_destroy_all_sets(struct ip_set_net *inst)
{
	struct ip_set *set = container_of(head, struct ip_set, rcu);
	struct ip_set *set;
	ip_set_id_t i;
	bool need_wait = false;

	ip_set_destroy_set(set);
	/* First cancel gc's: set:list sets are flushed as well */
	for (i = 0; i < inst->ip_set_max; i++) {
		set = ip_set(inst, i);
		if (set) {
			set->variant->cancel_gc(set);
			if (set->type->features & IPSET_TYPE_NAME)
				need_wait = true;
		}
	}
	/* Must wait for flush to be really finished  */
	if (need_wait)
		rcu_barrier();
	for (i = 0; i < inst->ip_set_max; i++) {
		set = ip_set(inst, i);
		if (set) {
			ip_set(inst, i) = NULL;
			set->variant->destroy(set);
			module_put(set->type->me);
			kfree(set);
		}
	}
}

static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
@@ -1202,11 +1229,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
	if (unlikely(protocol_min_failed(attr)))
		return -IPSET_ERR_PROTOCOL;


	/* Commands are serialized and references are
	 * protected by the ip_set_ref_lock.
	 * External systems (i.e. xt_set) must call
	 * ip_set_put|get_nfnl_* functions, that way we
	 * ip_set_nfnl_get_* functions, that way we
	 * can safely check references here.
	 *
	 * list:set timer can only decrement the reference
@@ -1214,8 +1240,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
	 * without holding the lock.
	 */
	if (!attr[IPSET_ATTR_SETNAME]) {
		/* Must wait for flush to be really finished in list:set */
		rcu_barrier();
		read_lock_bh(&ip_set_ref_lock);
		for (i = 0; i < inst->ip_set_max; i++) {
			s = ip_set(inst, i);
@@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
		}
		inst->is_destroyed = true;
		read_unlock_bh(&ip_set_ref_lock);
		for (i = 0; i < inst->ip_set_max; i++) {
			s = ip_set(inst, i);
			if (s) {
				ip_set(inst, i) = NULL;
				/* Must cancel garbage collectors */
				s->variant->cancel_gc(s);
				ip_set_destroy_set(s);
			}
		}
		_destroy_all_sets(inst);
		/* Modified by ip_set_destroy() only, which is serialized */
		inst->is_destroyed = false;
	} else {
@@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
		features = s->type->features;
		ip_set(inst, i) = NULL;
		read_unlock_bh(&ip_set_ref_lock);
		/* Must cancel garbage collectors */
		s->variant->cancel_gc(s);
		if (features & IPSET_TYPE_NAME) {
			/* Must wait for flush to be really finished  */
			rcu_barrier();
		}
		/* Must cancel garbage collectors */
		s->variant->cancel_gc(s);
		call_rcu(&s->rcu, ip_set_destroy_set_rcu);
	}
	return 0;
@@ -2365,30 +2381,25 @@ ip_set_net_init(struct net *net)
}

static void __net_exit
ip_set_net_exit(struct net *net)
ip_set_net_pre_exit(struct net *net)
{
	struct ip_set_net *inst = ip_set_pernet(net);

	struct ip_set *set = NULL;
	ip_set_id_t i;

	inst->is_deleted = true; /* flag for ip_set_nfnl_put */

	nfnl_lock(NFNL_SUBSYS_IPSET);
	for (i = 0; i < inst->ip_set_max; i++) {
		set = ip_set(inst, i);
		if (set) {
			ip_set(inst, i) = NULL;
			set->variant->cancel_gc(set);
			ip_set_destroy_set(set);
		}
}
	nfnl_unlock(NFNL_SUBSYS_IPSET);

static void __net_exit
ip_set_net_exit(struct net *net)
{
	struct ip_set_net *inst = ip_set_pernet(net);

	_destroy_all_sets(inst);
	kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
}

static struct pernet_operations ip_set_net_ops = {
	.init	= ip_set_net_init,
	.pre_exit = ip_set_net_pre_exit,
	.exit   = ip_set_net_exit,
	.id	= &ip_set_net_id,
	.size	= sizeof(struct ip_set_net),
+14 −16
Original line number Diff line number Diff line
@@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
	struct set_elem *e;
	int ret;

	list_for_each_entry(e, &map->members, list) {
	list_for_each_entry_rcu(e, &map->members, list) {
		if (SET_WITH_TIMEOUT(set) &&
		    ip_set_timeout_expired(ext_timeout(e, set)))
			continue;
@@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
	struct set_elem *e;
	int ret;

	list_for_each_entry(e, &map->members, list) {
	list_for_each_entry_rcu(e, &map->members, list) {
		if (SET_WITH_TIMEOUT(set) &&
		    ip_set_timeout_expired(ext_timeout(e, set)))
			continue;
@@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
	struct list_set *map = set->data;
	struct set_adt_elem *d = value;
	struct set_elem *e, *next, *prev = NULL;
	int ret;
	int ret = 0;

	list_for_each_entry(e, &map->members, list) {
	rcu_read_lock();
	list_for_each_entry_rcu(e, &map->members, list) {
		if (SET_WITH_TIMEOUT(set) &&
		    ip_set_timeout_expired(ext_timeout(e, set)))
			continue;
@@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,

		if (d->before == 0) {
			ret = 1;
			goto out;
		} else if (d->before > 0) {
			next = list_next_entry(e, list);
			ret = !list_is_last(&e->list, &map->members) &&
@@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
		} else {
			ret = prev && prev->id == d->refid;
		}
		return ret;
		goto out;
	}
	return 0;
out:
	rcu_read_unlock();
	return ret;
}

static void
@@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,

	/* Find where to add the new entry */
	n = prev = next = NULL;
	list_for_each_entry(e, &map->members, list) {
	list_for_each_entry_rcu(e, &map->members, list) {
		if (SET_WITH_TIMEOUT(set) &&
		    ip_set_timeout_expired(ext_timeout(e, set)))
			continue;
@@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
	struct list_set *map = set->data;
	struct set_adt_elem *d = value;
	struct set_elem *e, *next, *prev = NULL;
	struct set_elem *e, *n, *next, *prev = NULL;

	list_for_each_entry(e, &map->members, list) {
	list_for_each_entry_safe(e, n, &map->members, list) {
		if (SET_WITH_TIMEOUT(set) &&
		    ip_set_timeout_expired(ext_timeout(e, set)))
			continue;
@@ -424,14 +428,8 @@ static void
list_set_destroy(struct ip_set *set)
{
	struct list_set *map = set->data;
	struct set_elem *e, *n;

	list_for_each_entry_safe(e, n, &map->members, list) {
		list_del(&e->list);
		ip_set_put_byindex(map->net, e->id);
		ip_set_ext_destroy(set, e);
		kfree(e);
	}
	WARN_ON_ONCE(!list_empty(&map->members));
	kfree(map);

	set->data = NULL;
+3 −0
Original line number Diff line number Diff line
@@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx,
	struct nft_meta *priv = nft_expr_priv(expr);
	unsigned int len;

	if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG])
		return -EINVAL;

	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
	switch (priv->key) {
	case NFT_META_PROTOCOL:
+4 −0
Original line number Diff line number Diff line
@@ -650,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx,
	struct nft_payload *priv = nft_expr_priv(expr);
	u32 base;

	if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] ||
	    !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG])
		return -EINVAL;

	base   = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
	switch (base) {
	case NFT_PAYLOAD_TUN_HEADER: