Commit e75e408d authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Florian Westphal says:

====================
netfilter: updates for net-next

The following patchset contains Netfilter updates for *net-next*:

Patches 1 to 4 add IP6IP6 tunneling acceleration to the flowtable
infrastructure.  Patch 5 extends test coverage for this.
From Lorenzo Bianconi.

Patch 6 removes a duplicated helper from xt_time extension, we can
use an existing helper for this, from Jinjie Ruan.

Patch 7 adds an rhashtable to nfnetink_queue to speed up out-of-order
verdict processing.  Before this list walk was required due to in-order
design assumption.

netfilter pull request nf-next-26-01-29

* tag 'nf-next-26-01-29' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nfnetlink_queue: optimize verdict lookup with hash table
  netfilter: xt_time: use is_leap_year() helper
  selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest
  netfilter: flowtable: Add IP6IP6 tx sw acceleration
  netfilter: flowtable: Add IP6IP6 rx sw acceleration
  netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
  netfilter: Add ctx pointer in nf_flow_skb_encap_protocol/nf_flow_ip4_tunnel_proto signature
====================

Link: https://patch.msgid.link/20260129105427.12494-1-fw@strlen.de


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents aba0138e e19079ad
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -6,11 +6,13 @@
#include <linux/ipv6.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
#include <linux/rhashtable-types.h>
#include <linux/skbuff.h>

/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
	struct list_head	list;
	struct rhash_head	hash_node;
	struct sk_buff		*skb;
	unsigned int		id;
	unsigned int		hook_index;	/* index in hook_entries->hook[] */
@@ -20,6 +22,7 @@ struct nf_queue_entry {
#endif
	struct nf_hook_state	state;
	u16			size; /* sizeof(entry) + saved route keys */
	u16			queue_num;

	/* extra space to store route keys */
};
+27 −0
Original line number Diff line number Diff line
@@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t,
}
EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);

static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
				     struct net_device_path *path)
{
	struct ip6_tnl *t = netdev_priv(ctx->dev);
	struct flowi6 fl6 = {
		.daddr = t->parms.raddr,
	};
	struct dst_entry *dst;
	int err;

	dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
	if (!dst->error) {
		path->type = DEV_PATH_TUN;
		path->tun.src_v6 = t->parms.laddr;
		path->tun.dst_v6 = t->parms.raddr;
		path->tun.l3_proto = IPPROTO_IPV6;
		path->dev = ctx->dev;
		ctx->dev = dst->dev;
	}

	err = dst->error;
	dst_release(dst);

	return err;
}

static const struct net_device_ops ip6_tnl_netdev_ops = {
	.ndo_init	= ip6_tnl_dev_init,
	.ndo_uninit	= ip6_tnl_dev_uninit,
@@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
	.ndo_change_mtu = ip6_tnl_change_mtu,
	.ndo_get_stats64 = dev_get_tstats64,
	.ndo_get_iflink = ip6_tnl_get_iflink,
	.ndo_fill_forward_path = ip6_tnl_fill_forward_path,
};

#define IPXIPX_FEATURES (NETIF_F_SG |		\
+207 −36
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -144,12 +145,26 @@ static bool ip_has_options(unsigned int thoff)
	return thoff != sizeof(struct iphdr);
}

static void nf_flow_tuple_encap(struct sk_buff *skb,
struct nf_flowtable_ctx {
	const struct net_device	*in;
	u32			offset;
	u32			hdrsize;
	struct {
		/* Tunnel IP header size */
		u32 hdr_size;
		/* IP tunnel protocol */
		u8 proto;
	} tun;
};

static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
				struct sk_buff *skb,
				struct flow_offload_tuple *tuple)
{
	__be16 inner_proto = skb->protocol;
	struct vlan_ethhdr *veth;
	struct pppoe_hdr *phdr;
	struct ipv6hdr *ip6h;
	struct iphdr *iph;
	u16 offset = 0;
	int i = 0;
@@ -176,22 +191,28 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
		break;
	}

	if (inner_proto == htons(ETH_P_IP)) {
	switch (inner_proto) {
	case htons(ETH_P_IP):
		iph = (struct iphdr *)(skb_network_header(skb) + offset);
		if (iph->protocol == IPPROTO_IPIP) {
		if (ctx->tun.proto == IPPROTO_IPIP) {
			tuple->tun.dst_v4.s_addr = iph->daddr;
			tuple->tun.src_v4.s_addr = iph->saddr;
			tuple->tun.l3_proto = IPPROTO_IPIP;
		}
		break;
	case htons(ETH_P_IPV6):
		ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
		if (ctx->tun.proto == IPPROTO_IPV6) {
			tuple->tun.dst_v6 = ip6h->daddr;
			tuple->tun.src_v6 = ip6h->saddr;
			tuple->tun.l3_proto = IPPROTO_IPV6;
		}
		break;
	default:
		break;
	}
}

struct nf_flowtable_ctx {
	const struct net_device	*in;
	u32			offset;
	u32			hdrsize;
};

static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
			    struct flow_offload_tuple *tuple)
{
@@ -259,7 +280,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
	tuple->l3proto		= AF_INET;
	tuple->l4proto		= ipproto;
	tuple->iifidx		= ctx->in->ifindex;
	nf_flow_tuple_encap(skb, tuple);
	nf_flow_tuple_encap(ctx, skb, tuple);

	return 0;
}
@@ -295,15 +316,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
	return NF_STOLEN;
}

static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
				     struct sk_buff *skb)
{
	struct iphdr *iph;
	u16 size;

	if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
	if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
		return false;

	iph = (struct iphdr *)(skb_network_header(skb) + *psize);
	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
	size = iph->ihl << 2;

	if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
@@ -312,25 +334,62 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
	if (iph->ttl <= 1)
		return false;

	if (iph->protocol == IPPROTO_IPIP)
		*psize += size;
	if (iph->protocol == IPPROTO_IPIP) {
		ctx->tun.proto = IPPROTO_IPIP;
		ctx->tun.hdr_size = size;
		ctx->offset += size;
	}

	return true;
}

static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
				     struct sk_buff *skb)
{
	struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
#if IS_ENABLED(CONFIG_IPV6)
	struct ipv6hdr *ip6h, _ip6h;
	__be16 frag_off;
	u8 nexthdr;
	int hdrlen;

	if (iph->protocol != IPPROTO_IPIP)
	ip6h = skb_header_pointer(skb, ctx->offset, sizeof(*ip6h), &_ip6h);
	if (!ip6h)
		return false;

	if (ip6h->hop_limit <= 1)
		return false;

	nexthdr = ip6h->nexthdr;
	hdrlen = ipv6_skip_exthdr(skb, sizeof(*ip6h) + ctx->offset, &nexthdr,
				  &frag_off);
	if (hdrlen < 0)
		return false;

	if (nexthdr == IPPROTO_IPV6) {
		ctx->tun.hdr_size = hdrlen;
		ctx->tun.proto = IPPROTO_IPV6;
	}
	ctx->offset += ctx->tun.hdr_size;

	return true;
#else
	return false;
#endif /* IS_ENABLED(CONFIG_IPV6) */
}

static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx,
				  struct sk_buff *skb)
{
	if (ctx->tun.proto != IPPROTO_IPIP &&
	    ctx->tun.proto != IPPROTO_IPV6)
		return;

	skb_pull(skb, iph->ihl << 2);
	skb_pull(skb, ctx->tun.hdr_size);
	skb_reset_network_header(skb);
}

static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
				       u32 *offset)
static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
				       struct sk_buff *skb, __be16 proto)
{
	__be16 inner_proto = skb->protocol;
	struct vlan_ethhdr *veth;
@@ -343,7 +402,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,

		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
		if (veth->h_vlan_encapsulated_proto == proto) {
			*offset += VLAN_HLEN;
			ctx->offset += VLAN_HLEN;
			inner_proto = proto;
			ret = true;
		}
@@ -351,19 +410,28 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
	case htons(ETH_P_PPP_SES):
		if (nf_flow_pppoe_proto(skb, &inner_proto) &&
		    inner_proto == proto) {
			*offset += PPPOE_SES_HLEN;
			ctx->offset += PPPOE_SES_HLEN;
			ret = true;
		}
		break;
	}

	if (inner_proto == htons(ETH_P_IP))
		ret = nf_flow_ip4_tunnel_proto(skb, offset);
	switch (inner_proto) {
	case htons(ETH_P_IP):
		ret = nf_flow_ip4_tunnel_proto(ctx, skb);
		break;
	case htons(ETH_P_IPV6):
		ret = nf_flow_ip6_tunnel_proto(ctx, skb);
		break;
	default:
		break;
	}

	return ret;
}

static void nf_flow_encap_pop(struct sk_buff *skb,
static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
			      struct sk_buff *skb,
			      struct flow_offload_tuple_rhash *tuplehash)
{
	struct vlan_hdr *vlan_hdr;
@@ -389,8 +457,9 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
		}
	}

	if (skb->protocol == htons(ETH_P_IP))
		nf_flow_ip4_tunnel_pop(skb);
	if (skb->protocol == htons(ETH_P_IP) ||
	    skb->protocol == htons(ETH_P_IPV6))
		nf_flow_ip_tunnel_pop(ctx, skb);
}

struct nf_flow_xmit {
@@ -416,7 +485,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
{
	struct flow_offload_tuple tuple = {};

	if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
	if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
		return NULL;

	if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -460,7 +529,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,

	flow_offload_refresh(flow_table, flow, false);

	nf_flow_encap_pop(skb, tuplehash);
	nf_flow_encap_pop(ctx, skb, tuplehash);
	thoff -= ctx->offset;

	iph = ip_hdr(skb);
@@ -569,6 +638,97 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
	return 0;
}

struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
};

static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
				      struct flow_offload_tuple *tuple,
				      struct in6_addr **ip6_daddr,
				      int encap_limit)
{
	struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
	u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
	struct rtable *rt = dst_rtable(tuple->dst_cache);
	__u8 dsfield = ipv6_get_dsfield(ip6h);
	struct flowi6 fl6 = {
		.daddr = tuple->tun.src_v6,
		.saddr = tuple->tun.dst_v6,
		.flowi6_proto = proto,
	};
	int err, mtu;
	u32 headroom;

	err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
	if (err)
		return err;

	skb_set_inner_ipproto(skb, proto);
	headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
		   rt->dst.header_len;
	if (encap_limit)
		headroom += 8;
	err = skb_cow_head(skb, headroom);
	if (err)
		return err;

	skb_scrub_packet(skb, true);
	mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
	if (encap_limit)
		mtu -= 8;
	mtu = max(mtu, IPV6_MIN_MTU);
	skb_dst_update_pmtu_no_confirm(skb, mtu);

	if (encap_limit > 0) {
		struct ipv6_tel_txoption opt = {
			.dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
			.dst_opt[3] = 1,
			.dst_opt[4] = encap_limit,
			.dst_opt[5] = IPV6_TLV_PADN,
			.dst_opt[6] = 1,
		};
		struct ipv6_opt_hdr *hopt;

		opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
		opt.ops.opt_nflen = 8;

		hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt));
		memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt));
		hopt->nexthdr = IPPROTO_IPV6;
		proto = NEXTHDR_DEST;
	}

	skb_push(skb, sizeof(*ip6h));
	skb_reset_network_header(skb);

	ip6h = ipv6_hdr(skb);
	ip6_flow_hdr(ip6h, dsfield,
		     ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
	ip6h->hop_limit = hop_limit;
	ip6h->nexthdr = proto;
	ip6h->daddr = tuple->tun.src_v6;
	ip6h->saddr = tuple->tun.dst_v6;
	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);

	*ip6_daddr = &tuple->tun.src_v6;

	return 0;
}

static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
				  struct flow_offload_tuple *tuple,
				  struct in6_addr **ip6_daddr,
				  int encap_limit)
{
	if (tuple->tun_num)
		return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr,
						  encap_limit);

	return 0;
}

static int nf_flow_encap_push(struct sk_buff *skb,
			      struct flow_offload_tuple *tuple)
{
@@ -838,7 +998,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
	tuple->l3proto		= AF_INET6;
	tuple->l4proto		= nexthdr;
	tuple->iifidx		= ctx->in->ifindex;
	nf_flow_tuple_encap(skb, tuple);
	nf_flow_tuple_encap(ctx, skb, tuple);

	return 0;
}
@@ -846,7 +1006,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
					struct nf_flowtable *flow_table,
					struct flow_offload_tuple_rhash *tuplehash,
					struct sk_buff *skb)
					struct sk_buff *skb, int encap_limit)
{
	enum flow_offload_tuple_dir dir;
	struct flow_offload *flow;
@@ -857,6 +1017,12 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);

	mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
	if (flow->tuplehash[!dir].tuple.tun_num) {
		mtu -= sizeof(*ip6h);
		if (encap_limit > 0)
			mtu -= 8; /* encap limit option */
	}

	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
		return 0;

@@ -875,7 +1041,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,

	flow_offload_refresh(flow_table, flow, false);

	nf_flow_encap_pop(skb, tuplehash);
	nf_flow_encap_pop(ctx, skb, tuplehash);

	ip6h = ipv6_hdr(skb);
	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
@@ -896,8 +1062,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
{
	struct flow_offload_tuple tuple = {};

	if (skb->protocol != htons(ETH_P_IPV6) &&
	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
	if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
		return NULL;

	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
@@ -910,6 +1075,7 @@ unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
			  const struct nf_hook_state *state)
{
	int encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
	struct flow_offload_tuple_rhash *tuplehash;
	struct nf_flowtable *flow_table = priv;
	struct flow_offload_tuple *other_tuple;
@@ -928,7 +1094,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
	if (tuplehash == NULL)
		return NF_ACCEPT;

	ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
	ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb,
					   encap_limit);
	if (ret < 0)
		return NF_DROP;
	else if (ret == 0)
@@ -947,6 +1114,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
	other_tuple = &flow->tuplehash[!dir].tuple;
	ip6_daddr = &other_tuple->src_v6;

	if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
				   &ip6_daddr, encap_limit) < 0)
		return NF_DROP;

	if (nf_flow_encap_push(skb, other_tuple) < 0)
		return NF_DROP;

+116 −30
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <linux/cgroup-defs.h>
#include <linux/rhashtable.h>
#include <linux/jhash.h>
#include <net/gso.h>
#include <net/sock.h>
#include <net/tcp_states.h>
@@ -47,6 +49,8 @@
#endif

#define NFQNL_QMAX_DEFAULT 1024
#define NFQNL_HASH_MIN     1024
#define NFQNL_HASH_MAX     1048576

/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
 * includes the header length. Thus, the maximum packet length that we
@@ -56,6 +60,26 @@
 */
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)

/* Composite key for packet lookup: (net, queue_num, packet_id) */
struct nfqnl_packet_key {
	possible_net_t net;
	u32 packet_id;
	u16 queue_num;
} __aligned(sizeof(u32));  /* jhash2 requires 32-bit alignment */

/* Global rhashtable - one for entire system, all netns */
static struct rhashtable nfqnl_packet_map __read_mostly;

/* Helper to initialize composite key */
static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
				  struct net *net, u32 packet_id, u16 queue_num)
{
	memset(key, 0, sizeof(*key));
	write_pnet(&key->net, net);
	key->packet_id = packet_id;
	key->queue_num = queue_num;
}

struct nfqnl_instance {
	struct hlist_node hlist;		/* global list of queues */
	struct rcu_head rcu;
@@ -100,6 +124,39 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}

/* Extract composite key from nf_queue_entry for hashing */
static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
{
	const struct nf_queue_entry *entry = data;
	struct nfqnl_packet_key key;

	nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);

	return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
}

/* Compare stack-allocated key against entry */
static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
				  const void *obj)
{
	const struct nfqnl_packet_key *key = arg->key;
	const struct nf_queue_entry *entry = obj;

	return !net_eq(entry->state.net, read_pnet(&key->net)) ||
	       entry->queue_num != key->queue_num ||
	       entry->id != key->packet_id;
}

static const struct rhashtable_params nfqnl_rhashtable_params = {
	.head_offset = offsetof(struct nf_queue_entry, hash_node),
	.key_len = sizeof(struct nfqnl_packet_key),
	.obj_hashfn = nfqnl_packet_obj_hashfn,
	.obj_cmpfn = nfqnl_packet_obj_cmpfn,
	.automatic_shrinking = true,
	.min_size = NFQNL_HASH_MIN,
	.max_size = NFQNL_HASH_MAX,
};

static struct nfqnl_instance *
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
@@ -188,33 +245,45 @@ instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
	spin_unlock(&q->instances_lock);
}

static inline void
static int
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
	int err;

	entry->queue_num = queue->queue_num;

	err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
				     nfqnl_rhashtable_params);
	if (unlikely(err))
		return err;

	list_add_tail(&entry->list, &queue->queue_list);
	queue->queue_total++;

	return 0;
}

static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
	rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
			       nfqnl_rhashtable_params);
	list_del(&entry->list);
	queue->queue_total--;
}

static struct nf_queue_entry *
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
		   struct net *net)
{
	struct nf_queue_entry *entry = NULL, *i;
	struct nfqnl_packet_key key;
	struct nf_queue_entry *entry;

	spin_lock_bh(&queue->lock);
	nfqnl_init_key(&key, net, id, queue->queue_num);

	list_for_each_entry(i, &queue->queue_list, list) {
		if (i->id == id) {
			entry = i;
			break;
		}
	}
	spin_lock_bh(&queue->lock);
	entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
				       nfqnl_rhashtable_params);

	if (entry)
		__dequeue_entry(queue, entry);
@@ -404,8 +473,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
	spin_lock_bh(&queue->lock);
	list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
		if (!cmpfn || cmpfn(entry, data)) {
			list_del(&entry->list);
			queue->queue_total--;
			__dequeue_entry(queue, entry);
			nfqnl_reinject(entry, NF_DROP);
		}
	}
@@ -885,23 +953,23 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
	if (nf_ct_drop_unconfirmed(entry))
		goto err_out_free_nskb;

	if (queue->queue_total >= queue->queue_maxlen) {
		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
			failopen = 1;
			err = 0;
		} else {
			queue->queue_dropped++;
			net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
					     queue->queue_total);
		}
		goto err_out_free_nskb;
	}
	if (queue->queue_total >= queue->queue_maxlen)
		goto err_out_queue_drop;

	entry->id = ++queue->id_sequence;
	*packet_id_ptr = htonl(entry->id);

	/* Insert into hash BEFORE unicast. If failure don't send to userspace. */
	err = __enqueue_entry(queue, entry);
	if (unlikely(err))
		goto err_out_queue_drop;

	/* nfnetlink_unicast will either free the nskb or add it to a socket */
	err = nfnetlink_unicast(nskb, net, queue->peer_portid);
	if (err < 0) {
		/* Unicast failed - remove entry we just inserted */
		__dequeue_entry(queue, entry);

		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
			failopen = 1;
			err = 0;
@@ -911,11 +979,22 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
		goto err_out_unlock;
	}

	__enqueue_entry(queue, entry);

	spin_unlock_bh(&queue->lock);
	return 0;

err_out_queue_drop:
	if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
		failopen = 1;
		err = 0;
	} else {
		queue->queue_dropped++;

		if (queue->queue_total >= queue->queue_maxlen)
			net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
					     queue->queue_total);
		else
			net_warn_ratelimited("nf_queue: hash insert failed: %d\n", err);
	}
err_out_free_nskb:
	kfree_skb(nskb);
err_out_unlock:
@@ -1427,7 +1506,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,

	verdict = ntohl(vhdr->verdict);

	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
	entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
	if (entry == NULL)
		return -ENOENT;

@@ -1774,10 +1853,14 @@ static int __init nfnetlink_queue_init(void)
{
	int status;

	status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
	if (status < 0)
		return status;

	status = register_pernet_subsys(&nfnl_queue_net_ops);
	if (status < 0) {
		pr_err("failed to register pernet ops\n");
		goto out;
		goto cleanup_rhashtable;
	}

	netlink_register_notifier(&nfqnl_rtnl_notifier);
@@ -1802,7 +1885,8 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
	unregister_pernet_subsys(&nfnl_queue_net_ops);
out:
cleanup_rhashtable:
	rhashtable_destroy(&nfqnl_packet_map);
	return status;
}

@@ -1814,6 +1898,8 @@ static void __exit nfnetlink_queue_fini(void)
	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
	unregister_pernet_subsys(&nfnl_queue_net_ops);

	rhashtable_destroy(&nfqnl_packet_map);

	rcu_barrier(); /* Wait for completion of call_rcu()'s */
}

+2 −6
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@

#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/netfilter/x_tables.h>
@@ -64,11 +65,6 @@ static const u_int16_t days_since_epoch[] = {
	3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0,
};

static inline bool is_leap(unsigned int y)
{
	return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
}

/*
 * Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp.
 * Since we match against days and daytime, the SSTE value needs to be
@@ -138,7 +134,7 @@ static void localtime_3(struct xtm *r, time64_t time)
	 * (A different approach to use would be to subtract a monthlength
	 * from w repeatedly while counting.)
	 */
	if (is_leap(year)) {
	if (is_leap_year(year)) {
		/* use days_since_leapyear[] in a leap year */
		for (i = ARRAY_SIZE(days_since_leapyear) - 1;
		    i > 0 && days_since_leapyear[i] > w; --i)
Loading