From 76522fcdbc3a02b568f5d957f7e66fc194abb893 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 26 Mar 2026 00:17:09 +0100 Subject: [PATCH 01/10] netfilter: flowtable: strictly check for maximum number of actions The maximum number of flowtable hardware offload actions in IPv6 is: * ethernet mangling (4 payload actions, 2 for each ethernet address) * SNAT (4 payload actions) * DNAT (4 payload actions) * Double VLAN (4 vlan actions, 2 for popping vlan, and 2 for pushing) for QinQ. * Redirect (1 action) Which makes 17, while the maximum is 16. But act_ct supports for tunnels actions too. Note that payload action operates at 32-bit word level, so mangling an IPv6 address takes 4 payload actions. Update flow_action_entry_next() calls to check for the maximum number of supported actions. While at it, rise the maximum number of actions per flow from 16 to 24 so this works fine with IPv6 setups. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Hyunwoo Kim Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 196 +++++++++++++++++--------- 1 file changed, 130 insertions(+), 66 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 9b677e116487..93d0aa7f8fcc 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -14,6 +14,8 @@ #include #include +#define NF_FLOW_RULE_ACTION_MAX 24 + static struct workqueue_struct *nf_flow_offload_add_wq; static struct workqueue_struct *nf_flow_offload_del_wq; static struct workqueue_struct *nf_flow_offload_stats_wq; @@ -216,7 +218,12 @@ static void flow_offload_mangle(struct flow_action_entry *entry, static inline struct flow_action_entry * flow_action_entry_next(struct nf_flow_rule *flow_rule) { - int i = flow_rule->rule->action.num_entries++; + int i; + + if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX)) + return NULL; + + i = flow_rule->rule->action.num_entries++; return &flow_rule->rule->action.entries[i]; } @@ -234,6 +241,9 @@ static int flow_offload_eth_src(struct net *net, u32 mask, val; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -284,6 +294,9 @@ static int flow_offload_eth_dst(struct net *net, u8 nud_state; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -325,16 +338,19 @@ static int flow_offload_eth_dst(struct net *net, return 0; } -static void flow_offload_ipv4_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; @@ -345,23 +361,27 @@ static void flow_offload_ipv4_snat(struct net *net, offset = offsetof(struct iphdr, daddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv4_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; @@ -372,14 +392,15 @@ static void flow_offload_ipv4_dnat(struct net *net, offset = offsetof(struct iphdr, saddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, +static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, unsigned int offset, const __be32 *addr, const __be32 *mask) { @@ -388,15 +409,20 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, offset + i * sizeof(u32), &addr[i], mask); } + + return 0; } -static void flow_offload_ipv6_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -412,16 +438,16 @@ static void flow_offload_ipv6_snat(struct net *net, offset = offsetof(struct ipv6hdr, daddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } -static void flow_offload_ipv6_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -437,10 +463,10 @@ static void flow_offload_ipv6_dnat(struct net *net, offset = offsetof(struct ipv6hdr, saddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static int flow_offload_l4proto(const struct flow_offload *flow) @@ -462,15 +488,18 @@ static int flow_offload_l4proto(const struct flow_offload *flow) return type; } -static void flow_offload_port_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); @@ -485,22 +514,26 @@ static void flow_offload_port_snat(struct net *net, mask = ~htonl(0xffff); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_port_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); @@ -515,20 +548,24 @@ static void flow_offload_port_dnat(struct net *net, mask = ~htonl(0xffff0000); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_ipv4_checksum(struct net *net, - const struct flow_offload *flow, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_checksum(struct net *net, + const struct flow_offload *flow, + struct nf_flow_rule *flow_rule) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; struct flow_action_entry *entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + entry->id = FLOW_ACTION_CSUM; entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; @@ -540,12 +577,14 @@ static void flow_offload_ipv4_checksum(struct net *net, entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; break; } + + return 0; } -static void flow_offload_redirect(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_redirect(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple, *other_tuple; struct flow_action_entry *entry; @@ -563,21 +602,28 @@ static void flow_offload_redirect(struct net *net, ifindex = other_tuple->iifidx; break; default: - return; + return -EOPNOTSUPP; } dev = dev_get_by_index(net, ifindex); if (!dev) - return; + return -ENODEV; entry = flow_action_entry_next(flow_rule); + if (!entry) { + dev_put(dev); + return -E2BIG; + } + entry->id = FLOW_ACTION_REDIRECT; entry->dev = dev; + + return 0; } -static void flow_offload_encap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple; struct flow_action_entry *entry; @@ -585,7 +631,7 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, this_tuple = &flow->tuplehash[dir].tuple; if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = this_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -594,15 +640,19 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_ENCAP; entry->tunnel = tun_info; } } + + return 0; } -static void flow_offload_decap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; struct flow_action_entry *entry; @@ -610,7 +660,7 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, other_tuple = &flow->tuplehash[!dir].tuple; if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = other_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -619,9 +669,13 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_DECAP; } } + + return 0; } static int @@ -633,8 +687,9 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, const struct flow_offload_tuple *tuple; int i; - flow_offload_decap_tunnel(flow, dir, flow_rule); - flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 || + flow_offload_encap_tunnel(flow, dir, flow_rule) < 0) + return -1; if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) @@ -650,6 +705,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; entry->id = FLOW_ACTION_VLAN_POP; } } @@ -663,6 +720,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, continue; entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; switch (other_tuple->encap[i].proto) { case htons(ETH_P_PPP_SES): @@ -688,18 +747,22 @@ int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv4_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv4_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_SNAT, &flow->flags) || test_bit(NF_FLOW_DNAT, &flow->flags)) - flow_offload_ipv4_checksum(net, flow, flow_rule); + if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0) + return -1; - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } @@ -713,22 +776,23 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv6_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv6_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); -#define NF_FLOW_RULE_ACTION_MAX 16 - static struct nf_flow_rule * nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload_work *offload, From 6d52a4a0520a6696bdde51caa11f2d6821cd0c01 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Mar 2026 16:17:24 +0100 Subject: [PATCH 02/10] netfilter: nfnetlink_log: account for netlink header size This is a followup to an old bug fix: NLMSG_DONE needs to account for the netlink header size, not just the attribute size. This can result in a WARN splat + drop of the netlink message, but other than this there are no ill effects. Fixes: 9dfa1dfe4d5e ("netfilter: nf_log: account for size of NLMSG_DONE attribute") Reported-by: Yiming Qian Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fcbe54940b2e..f80978c06fa0 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -726,7 +726,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) - + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ + + nlmsg_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) From a958a4f90ddd7de0800b33ca9d7b886b7d40f74e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 31 Mar 2026 23:13:36 +0200 Subject: [PATCH 03/10] netfilter: x_tables: ensure names are nul-terminated Reject names that lack a \0 character before feeding them to functions that expect c-strings. Fixes tag is the most recent commit that needs this change. Fixes: c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 6 ++++++ net/netfilter/xt_rateest.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index c437fbd59ec1..43d2ae2be628 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -65,6 +65,9 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", @@ -102,6 +105,9 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 72324bd976af..b1d736c15fcb 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -91,6 +91,11 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) goto err1; } + if (strnlen(info->name1, sizeof(info->name1)) >= sizeof(info->name1)) + return -ENAMETOOLONG; + if (strnlen(info->name2, sizeof(info->name2)) >= sizeof(info->name2)) + return -ENAMETOOLONG; + ret = -ENOENT; est1 = xt_rateest_lookup(par->net, info->name1); if (!est1) From b7e8590987aa94c9dc51518fad0e58cb887b1db5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Mar 2026 14:16:34 +0200 Subject: [PATCH 04/10] netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr IPSET_ATTR_NAME and IPSET_ATTR_NAMEREF are of NLA_STRING type, they cannot be treated like a c-string. They either have to be switched to NLA_NUL_STRING, or the compare operations need to use the nla functions. Fixes: f830837f0eed ("netfilter: ipset: list:set set type support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 4 ++-- net/netfilter/ipset/ip_set_list_set.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e9f4f845d760..b98331572ad2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -309,7 +309,7 @@ enum { /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, - const char *name, struct ip_set **set); + const struct nlattr *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a2fe711cb5e3..d0c9fe59c67d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -821,7 +821,7 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const struct nlattr *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; @@ -830,7 +830,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s && STRNCMP(s->name, name)) { + if (s && nla_strcmp(name, s->name) == 0) { __ip_set_get(s); index = i; *set = s; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 98b91b34c314..1cef84f15e8c 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -367,7 +367,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, tb[IPSET_ATTR_NAME], &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -389,7 +389,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_NAMEREF]) { e.refid = ip_set_get_byname(map->net, - nla_data(tb[IPSET_ATTR_NAMEREF]), + tb[IPSET_ATTR_NAMEREF], &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; From a242a9ae58aa46ff7dae51ce64150a93957abe65 Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Mon, 30 Mar 2026 00:50:36 +0800 Subject: [PATCH 05/10] netfilter: nf_conntrack_helper: pass helper to expect cleanup nf_conntrack_helper_unregister() calls nf_ct_expect_iterate_destroy() to remove expectations belonging to the helper being unregistered. However, it passes NULL instead of the helper pointer as the data argument, so expect_iter_me() never matches any expectation and all of them survive the cleanup. After unregister returns, nfnl_cthelper_del() frees the helper object immediately. Subsequent expectation dumps or packet-driven init_conntrack() calls then dereference the freed exp->helper, causing a use-after-free. Pass the actual helper pointer so expectations referencing it are properly destroyed before the helper object is freed. BUG: KASAN: slab-use-after-free in string+0x38f/0x430 Read of size 1 at addr ffff888003b14d20 by task poc/103 Call Trace: string+0x38f/0x430 vsnprintf+0x3cc/0x1170 seq_printf+0x17a/0x240 exp_seq_show+0x2e5/0x560 seq_read_iter+0x419/0x1280 proc_reg_read+0x1ac/0x270 vfs_read+0x179/0x930 ksys_read+0xef/0x1c0 Freed by task 103: The buggy address is located 32 bytes inside of freed 192-byte region [ffff888003b14d00, ffff888003b14dc0) Fixes: ac7b84839003 ("netfilter: expect: add and use nf_ct_expect_iterate helpers") Signed-off-by: Qi Tang Reviewed-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 1b330ba6613b..a715304a53d8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -415,7 +415,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - nf_ct_expect_iterate_destroy(expect_iter_me, NULL); + nf_ct_expect_iterate_destroy(expect_iter_me, me); nf_ct_iterate_destroy(unhelp, me); /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as From 35177c6877134a21315f37d57a5577846225623e Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Tue, 31 Mar 2026 14:17:12 +0800 Subject: [PATCH 06/10] netfilter: ctnetlink: zero expect NAT fields when CTA_EXPECT_NAT absent ctnetlink_alloc_expect() allocates expectations from a non-zeroing slab cache via nf_ct_expect_alloc(). When CTA_EXPECT_NAT is not present in the netlink message, saved_addr and saved_proto are never initialized. Stale data from a previous slab occupant can then be dumped to userspace by ctnetlink_exp_dump_expect(), which checks these fields to decide whether to emit CTA_EXPECT_NAT. The safe sibling nf_ct_expect_init(), used by the packet path, explicitly zeroes these fields. Zero saved_addr, saved_proto and dir in the else branch, guarded by IS_ENABLED(CONFIG_NF_NAT) since these fields only exist when NAT is enabled. Confirmed by priming the expect slab with NAT-bearing expectations, freeing them, creating a new expectation without CTA_EXPECT_NAT, and observing that the ctnetlink dump emits a spurious CTA_EXPECT_NAT containing stale data from the prior allocation. Fixes: 076a0ca02644 ("netfilter: ctnetlink: add NAT support for expectations") Reported-by: kernel test robot Signed-off-by: Qi Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3f408f3713bb..38bd7124d9f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3588,6 +3588,12 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp, nf_ct_l3num(ct)); if (err < 0) goto err_out; +#if IS_ENABLED(CONFIG_NF_NAT) + } else { + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); + exp->dir = 0; +#endif } return exp; err_out: From 917b61fa2042f11e2af4c428e43f08199586633a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 Mar 2026 11:26:22 +0200 Subject: [PATCH 07/10] netfilter: ctnetlink: ignore explicit helper on new expectations Use the existing master conntrack helper, anything else is not really supported and it just makes validation more complicated, so just ignore what helper userspace suggests for this expectation. This was uncovered when validating CTA_EXPECT_CLASS via different helper provided by userspace than the existing master conntrack helper: BUG: KASAN: slab-out-of-bounds in nf_ct_expect_related_report+0x2479/0x27c0 Read of size 4 at addr ffff8880043fe408 by task poc/102 Call Trace: nf_ct_expect_related_report+0x2479/0x27c0 ctnetlink_create_expect+0x22b/0x3b0 ctnetlink_new_expect+0x4bd/0x5c0 nfnetlink_rcv_msg+0x67a/0x950 netlink_rcv_skb+0x120/0x350 Allowing to read kernel memory bytes off the expectation boundary. CTA_EXPECT_HELP_NAME is still used to offer the helper name to userspace via netlink dump. Fixes: bd0779370588 ("netfilter: nfnetlink_queue: allow to attach expectations to conntracks") Reported-by: Qi Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 54 +++++----------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 38bd7124d9f7..a20cd82446c5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2636,7 +2636,6 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2865,7 +2864,6 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, { struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; int err; @@ -2879,17 +2877,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper == NULL) - return -EOPNOTSUPP; - } - exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - helper, &tuple, &mask); + &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3528,11 +3517,11 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { struct net *net = read_pnet(&ct->ct_net); + struct nf_conntrack_helper *helper; struct nf_conntrack_expect *exp; struct nf_conn_help *help; u32 class = 0; @@ -3542,7 +3531,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, if (!help) return ERR_PTR(-EOPNOTSUPP); - if (cda[CTA_EXPECT_CLASS] && helper) { + helper = rcu_dereference(help->helper); + if (!helper) + return ERR_PTR(-EOPNOTSUPP); + + if (cda[CTA_EXPECT_CLASS]) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) return ERR_PTR(-EINVAL); @@ -3576,8 +3569,6 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; #endif - if (!helper) - helper = rcu_dereference(help->helper); rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; @@ -3609,7 +3600,6 @@ ctnetlink_create_expect(struct net *net, { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; struct nf_conn *ct; int err; @@ -3635,33 +3625,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper == NULL) { - rcu_read_unlock(); -#ifdef CONFIG_MODULES - if (request_module("nfct-helper-%s", helpname) < 0) { - err = -EOPNOTSUPP; - goto err_ct; - } - rcu_read_lock(); - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper) { - err = -EAGAIN; - goto err_rcu; - } - rcu_read_unlock(); -#endif - err = -EOPNOTSUPP; - goto err_ct; - } - } - - exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; @@ -3671,8 +3635,8 @@ ctnetlink_create_expect(struct net *net, nf_ct_expect_put(exp); err_rcu: rcu_read_unlock(); -err_ct: nf_ct_put(ct); + return err; } From 9862ef9ab0a116c6dca98842aab7de13a252ae02 Mon Sep 17 00:00:00 2001 From: Yifan Wu Date: Mon, 30 Mar 2026 14:39:24 -0700 Subject: [PATCH 08/10] netfilter: ipset: drop logically empty buckets in mtype_del mtype_del() counts empty slots below n->pos in k, but it only drops the bucket when both n->pos and k are zero. This misses buckets whose live entries have all been removed while n->pos still points past deleted slots. Treat a bucket as empty when all positions below n->pos are unused and release it directly instead of shrinking it further. Fixes: 8af1c6fbd923 ("netfilter: ipset: Fix forceadd evaluation path") Cc: stable@vger.kernel.org Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Yifan Wu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Reviewed-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 181daa9c2019..b79e5dd2af03 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1098,7 +1098,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!test_bit(i, n->used)) k++; } - if (n->pos == 0 && k == 0) { + if (k == n->pos) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); From 3d5d488f11776738deab9da336038add95d342d1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2026 16:41:25 +0200 Subject: [PATCH 09/10] netfilter: x_tables: restrict xt_check_match/xt_check_target extensions for NFPROTO_ARP Weiming Shi says: xt_match and xt_target structs registered with NFPROTO_UNSPEC can be loaded by any protocol family through nft_compat. When such a match/target sets .hooks to restrict which hooks it may run on, the bitmask uses NF_INET_* constants. This is only correct for families whose hook layout matches NF_INET_*: IPv4, IPv6, INET, and bridge all share the same five hooks (PRE_ROUTING ... POST_ROUTING). ARP only has three hooks (IN=0, OUT=1, FORWARD=2) with different semantics. Because NF_ARP_OUT == 1 == NF_INET_LOCAL_IN, the .hooks validation silently passes for the wrong reasons, allowing matches to run on ARP chains where the hook assumptions (e.g. state->in being set on input hooks) do not hold. This leads to NULL pointer dereferences; xt_devgroup is one concrete example: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000044: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000220-0x0000000000000227] RIP: 0010:devgroup_mt+0xff/0x350 Call Trace: nft_match_eval (net/netfilter/nft_compat.c:407) nft_do_chain (net/netfilter/nf_tables_core.c:285) nft_do_chain_arp (net/netfilter/nft_chain_filter.c:61) nf_hook_slow (net/netfilter/core.c:623) arp_xmit (net/ipv4/arp.c:666) Kernel panic - not syncing: Fatal exception in interrupt Fix it by restricting arptables to NFPROTO_ARP extensions only. Note that arptables-legacy only supports: - arpt_CLASSIFY - arpt_mangle - arpt_MARK that provide explicit NFPROTO_ARP match/target declarations. Fixes: 9291747f118d ("netfilter: xtables: add device group match") Reported-by: Xiang Mei Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e594b3b7ad82..b39017c80548 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -501,6 +501,17 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->match->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s match: not valid for this family\n", + xt_prefix[par->family], par->match->name); + return -EINVAL; + } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; @@ -1016,6 +1027,18 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->target->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s target: not valid for this family\n", + xt_prefix[par->family], par->target->name); + return -EINVAL; + } + if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; From da107398cbd4bbdb6bffecb2ce86d5c9384f4cec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2026 23:08:02 +0200 Subject: [PATCH 10/10] netfilter: nf_tables: reject immediate NF_QUEUE verdict nft_queue is always used from userspace nftables to deliver the NF_QUEUE verdict. Immediately emitting an NF_QUEUE verdict is never used by the userspace nft tools, so reject immediate NF_QUEUE verdicts. The arp family does not provide queue support, but such an immediate verdict is still reachable. Globally reject NF_QUEUE immediate verdicts to address this issue. Fixes: f342de4e2f33 ("netfilter: nf_tables: reject QUEUE/DROP verdict parameters") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3922cff1bb3d..8c42247a176c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11667,8 +11667,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, switch (data->verdict.code) { case NF_ACCEPT: case NF_DROP: - case NF_QUEUE: - break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -11703,6 +11701,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + case NF_QUEUE: + /* The nft_queue expression is used for this purpose, an + * immediate NF_QUEUE verdict should not ever be seen here. + */ + fallthrough; default: return -EINVAL; }