Commit b611b776 authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

Patch #1 restores NFPROTO_INET with nft_compat, from Ignat Korchagin.

Patch #2 fixes an issue with bridge netfilter and broadcast/multicast
packets.

There is a day 0 bug in br_netfilter when used with connection tracking.

Conntrack assumes that an nf_conn structure that is not yet added to
hash table ("unconfirmed"), is only visible by the current cpu that is
processing the sk_buff.

For bridge this isn't true, sk_buff can get cloned in between, and
clones can be processed in parallel on different cpu.

This patch disables NAT and conntrack helpers for multicast packets.

Patch #3 adds a selftest to cover for the br_netfilter bug.

netfilter pull request 24-02-29

* tag 'nf-24-02-29' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  selftests: netfilter: add bridge conntrack + multicast test case
  netfilter: bridge: confirm multicast packets before passing them up the stack
  netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
====================

Link: https://lore.kernel.org/r/20240229000135.8780-1-pablo@netfilter.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 51dd4ee0 6523cf51
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -474,6 +474,7 @@ struct nf_ct_hook {
			      const struct sk_buff *);
	void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
	void (*set_closing)(struct nf_conntrack *nfct);
	int (*confirm)(struct sk_buff *skb);
};
extern const struct nf_ct_hook __rcu *nf_ct_hook;

+96 −0
Original line number Diff line number Diff line
@@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack_core.h>
#endif

static unsigned int brnf_net_id __read_mostly;

struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
	return NF_STOLEN;
}

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
 * the same nf_conn entry, which will happen for multicast (broadcast)
 * Frames on bridges.
 *
 * Example:
 *      macvlan0
 *      br0
 *  ethX  ethY
 *
 * ethX (or Y) receives multicast or broadcast packet containing
 * an IP packet, not yet in conntrack table.
 *
 * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
 *    -> skb->_nfct now references a unconfirmed entry
 * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
 *    interface.
 * 3. skb gets passed up the stack.
 * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
 *    and schedules a work queue to send them out on the lower devices.
 *
 *    The clone skb->_nfct is not a copy, it is the same entry as the
 *    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS.
 * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
 *
 * The Macvlan broadcast worker and normal confirm path will race.
 *
 * This race will not happen if step 2 already confirmed a clone. In that
 * case later steps perform skb_clone() with skb->_nfct already confirmed (in
 * hash table).  This works fine.
 *
 * But such confirmation won't happen when eb/ip/nftables rules dropped the
 * packets before they reached the nf_confirm step in postrouting.
 *
 * Work around this problem by explicit confirmation of the entry at
 * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
 * entry.
 *
 */
static unsigned int br_nf_local_in(void *priv,
				   struct sk_buff *skb,
				   const struct nf_hook_state *state)
{
	struct nf_conntrack *nfct = skb_nfct(skb);
	const struct nf_ct_hook *ct_hook;
	struct nf_conn *ct;
	int ret;

	if (!nfct || skb->pkt_type == PACKET_HOST)
		return NF_ACCEPT;

	ct = container_of(nfct, struct nf_conn, ct_general);
	if (likely(nf_ct_is_confirmed(ct)))
		return NF_ACCEPT;

	WARN_ON_ONCE(skb_shared(skb));
	WARN_ON_ONCE(refcount_read(&nfct->use) != 1);

	/* We can't call nf_confirm here, it would create a dependency
	 * on nf_conntrack module.
	 */
	ct_hook = rcu_dereference(nf_ct_hook);
	if (!ct_hook) {
		skb->_nfct = 0ul;
		nf_conntrack_put(nfct);
		return NF_ACCEPT;
	}

	nf_bridge_pull_encap_header(skb);
	ret = ct_hook->confirm(skb);
	switch (ret & NF_VERDICT_MASK) {
	case NF_STOLEN:
		return NF_STOLEN;
	default:
		nf_bridge_push_encap_header(skb);
		break;
	}

	ct = container_of(nfct, struct nf_conn, ct_general);
	WARN_ON_ONCE(!nf_ct_is_confirmed(ct));

	return ret;
}
#endif

/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
		.hooknum = NF_BR_PRE_ROUTING,
		.priority = NF_BR_PRI_BRNF,
	},
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	{
		.hook = br_nf_local_in,
		.pf = NFPROTO_BRIDGE,
		.hooknum = NF_BR_LOCAL_IN,
		.priority = NF_BR_PRI_LAST,
	},
#endif
	{
		.hook = br_nf_forward,
		.pf = NFPROTO_BRIDGE,
+30 −0
Original line number Diff line number Diff line
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
	return nf_conntrack_in(skb, &bridge_state);
}

static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
				    const struct nf_hook_state *state)
{
	enum ip_conntrack_info ctinfo;
	struct nf_conn *ct;

	if (skb->pkt_type == PACKET_HOST)
		return NF_ACCEPT;

	/* nf_conntrack_confirm() cannot handle concurrent clones,
	 * this happens for broad/multicast frames with e.g. macvlan on top
	 * of the bridge device.
	 */
	ct = nf_ct_get(skb, &ctinfo);
	if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
		return NF_ACCEPT;

	/* let inet prerouting call conntrack again */
	skb->_nfct = 0;
	nf_ct_put(ct);

	return NF_ACCEPT;
}

static void nf_ct_bridge_frag_save(struct sk_buff *skb,
				   struct nf_bridge_frag_data *data)
{
@@ -385,6 +409,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
		.hooknum	= NF_BR_PRE_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK,
	},
	{
		.hook		= nf_ct_bridge_in,
		.pf		= NFPROTO_BRIDGE,
		.hooknum	= NF_BR_LOCAL_IN,
		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
	},
	{
		.hook		= nf_ct_bridge_post,
		.pf		= NFPROTO_BRIDGE,
+1 −0
Original line number Diff line number Diff line
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
	.attach		= nf_conntrack_attach,
	.set_closing	= nf_conntrack_set_closing,
	.confirm	= __nf_conntrack_confirm,
};

void nf_conntrack_init_end(void)
+20 −0
Original line number Diff line number Diff line
@@ -359,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,

	if (ctx->family != NFPROTO_IPV4 &&
	    ctx->family != NFPROTO_IPV6 &&
	    ctx->family != NFPROTO_INET &&
	    ctx->family != NFPROTO_BRIDGE &&
	    ctx->family != NFPROTO_ARP)
		return -EOPNOTSUPP;

	ret = nft_chain_validate_hooks(ctx->chain,
				       (1 << NF_INET_PRE_ROUTING) |
				       (1 << NF_INET_LOCAL_IN) |
				       (1 << NF_INET_FORWARD) |
				       (1 << NF_INET_LOCAL_OUT) |
				       (1 << NF_INET_POST_ROUTING));
	if (ret)
		return ret;

	if (nft_is_base_chain(ctx->chain)) {
		const struct nft_base_chain *basechain =
						nft_base_chain(ctx->chain);
@@ -610,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,

	if (ctx->family != NFPROTO_IPV4 &&
	    ctx->family != NFPROTO_IPV6 &&
	    ctx->family != NFPROTO_INET &&
	    ctx->family != NFPROTO_BRIDGE &&
	    ctx->family != NFPROTO_ARP)
		return -EOPNOTSUPP;

	ret = nft_chain_validate_hooks(ctx->chain,
				       (1 << NF_INET_PRE_ROUTING) |
				       (1 << NF_INET_LOCAL_IN) |
				       (1 << NF_INET_FORWARD) |
				       (1 << NF_INET_LOCAL_OUT) |
				       (1 << NF_INET_POST_ROUTING));
	if (ret)
		return ret;

	if (nft_is_base_chain(ctx->chain)) {
		const struct nft_base_chain *basechain =
						nft_base_chain(ctx->chain);
Loading