Commit 91721c2d authored by Daniel Xu's avatar Daniel Xu Committed by Alexei Starovoitov
Browse files

netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link



This commit adds support for enabling IP defrag using pre-existing
netfilter defrag support. Basically all the flag does is bump a refcnt
while the link the active. Checks are also added to ensure the prog
requesting defrag support is run _after_ netfilter defrag hooks.

We also take care to avoid any issues w.r.t. module unloading -- while
defrag is active on a link, the module is prevented from unloading.

Signed-off-by: default avatarDaniel Xu <dxu@dxuuu.xyz>
Reviewed-by: default avatarFlorian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/r/5cff26f97e55161b7d56b09ddcf5f8888a5add1d.1689970773.git.dxu@dxuuu.xyz


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 9abddac5
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
 */
#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)

/* link_create.netfilter.flags used in LINK_CREATE command for
 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 */
#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)

/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 * the following extensions:
 *
+108 −15
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/kmod.h>
#include <linux/module.h>
#include <linux/netfilter.h>

#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +25,88 @@ struct bpf_nf_link {
	struct nf_hook_ops hook_ops;
	struct net *net;
	u32 dead;
	const struct nf_defrag_hook *defrag_hook;
};

static const struct nf_defrag_hook *
get_proto_defrag_hook(struct bpf_nf_link *link,
		      const struct nf_defrag_hook __rcu *global_hook,
		      const char *mod)
{
	const struct nf_defrag_hook *hook;
	int err;

	/* RCU protects us from races against module unloading */
	rcu_read_lock();
	hook = rcu_dereference(global_hook);
	if (!hook) {
		rcu_read_unlock();
		err = request_module(mod);
		if (err)
			return ERR_PTR(err < 0 ? err : -EINVAL);

		rcu_read_lock();
		hook = rcu_dereference(global_hook);
	}

	if (hook && try_module_get(hook->owner)) {
		/* Once we have a refcnt on the module, we no longer need RCU */
		hook = rcu_pointer_handoff(hook);
	} else {
		WARN_ONCE(!hook, "%s has bad registration", mod);
		hook = ERR_PTR(-ENOENT);
	}
	rcu_read_unlock();

	if (!IS_ERR(hook)) {
		err = hook->enable(link->net);
		if (err) {
			module_put(hook->owner);
			hook = ERR_PTR(err);
		}
	}

	return hook;
}

static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
{
	const struct nf_defrag_hook __maybe_unused *hook;

	switch (link->hook_ops.pf) {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
	case NFPROTO_IPV4:
		hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
		if (IS_ERR(hook))
			return PTR_ERR(hook);

		link->defrag_hook = hook;
		return 0;
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
	case NFPROTO_IPV6:
		hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
		if (IS_ERR(hook))
			return PTR_ERR(hook);

		link->defrag_hook = hook;
		return 0;
#endif
	default:
		return -EAFNOSUPPORT;
	}
}

static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
{
	const struct nf_defrag_hook *hook = link->defrag_hook;

	if (!hook)
		return;
	hook->disable(link->net);
	module_put(hook->owner);
}

static void bpf_nf_link_release(struct bpf_link *link)
{
	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -32,11 +114,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
	if (nf_link->dead)
		return;

	/* prevent hook-not-found warning splat from netfilter core when
	 * .detach was already called
	 */
	if (!cmpxchg(&nf_link->dead, 0, 1))
	/* do not double release in case .detach was already called */
	if (!cmpxchg(&nf_link->dead, 0, 1)) {
		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
		bpf_nf_disable_defrag(nf_link);
	}
}

static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +174,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {

static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
	int prio;

	switch (attr->link_create.netfilter.pf) {
	case NFPROTO_IPV4:
	case NFPROTO_IPV6:
@@ -102,19 +186,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
		return -EAFNOSUPPORT;
	}

	if (attr->link_create.netfilter.flags)
	if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
		return -EOPNOTSUPP;

	/* make sure conntrack confirm is always last.
	 *
	 * In the future, if userspace can e.g. request defrag, then
	 * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
	 * should fail.
	 */
	switch (attr->link_create.netfilter.priority) {
	case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
	case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
	}
	/* make sure conntrack confirm is always last */
	prio = attr->link_create.netfilter.priority;
	if (prio == NF_IP_PRI_FIRST)
		return -ERANGE;  /* sabotage_in and other warts */
	else if (prio == NF_IP_PRI_LAST)
		return -ERANGE;  /* e.g. conntrack confirm */
	else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
		 prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
		return -ERANGE;  /* cannot use defrag if prog runs before nf_defrag */

	return 0;
}
@@ -149,6 +232,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)

	link->net = net;
	link->dead = false;
	link->defrag_hook = NULL;

	err = bpf_link_prime(&link->link, &link_primer);
	if (err) {
@@ -156,8 +240,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
		return err;
	}

	if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
		err = bpf_nf_enable_defrag(link);
		if (err) {
			bpf_link_cleanup(&link_primer);
			return err;
		}
	}

	err = nf_register_net_hook(net, &link->hook_ops);
	if (err) {
		bpf_nf_disable_defrag(link);
		bpf_link_cleanup(&link_primer);
		return err;
	}
+5 −0
Original line number Diff line number Diff line
@@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
 */
#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)

/* link_create.netfilter.flags used in LINK_CREATE command for
 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 */
#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)

/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 * the following extensions:
 *