Commit edafd348 authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Zero out the remainder in nft_pipapo AVX2 implementation, otherwise
   next lookup could bogusly report a mismatch. This is followed by two
   patches to update nft_pipapo selftests to cover for the previous bug.
   From Florian Westphal.

2) Check for reverse tuple too in case of esoteric NAT collisions for
   UDP traffic and extend selftest coverage. Also from Florian.

netfilter pull request 25-06-05

* tag 'nf-25-06-05' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  selftests: netfilter: nft_nat.sh: add test for reverse clash with nat
  netfilter: nf_nat: also check reverse tuple to obtain clashing entry
  selftests: netfilter: nft_concat_range.sh: add datapath check for map fill bug
  selftests: netfilter: nft_concat_range.sh: prefer per element counters for testing
  netfilter: nf_set_pipapo_avx2: fix initial map fill
====================

Link: https://patch.msgid.link/20250605085735.52205-1-pablo@netfilter.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 8e59d9e2 3c3c3248
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -248,7 +248,7 @@ static noinline bool
nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
		      const struct nf_conn *ignored_ct)
{
	static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
	static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST;
	const struct nf_conntrack_tuple_hash *thash;
	const struct nf_conntrack_zone *zone;
	struct nf_conn *ct;
@@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
	zone = nf_ct_zone(ignored_ct);

	thash = nf_conntrack_find_get(net, zone, tuple);
	if (unlikely(!thash)) /* clashing entry went away */
	if (unlikely(!thash)) {
		struct nf_conntrack_tuple reply;

		nf_ct_invert_tuple(&reply, tuple);
		thash = nf_conntrack_find_get(net, zone, &reply);
		if (!thash) /* clashing entry went away */
			return false;
	}

	ct = nf_ct_tuplehash_to_ctrack(thash);

+20 −1
Original line number Diff line number Diff line
@@ -1113,6 +1113,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
	return true;
}

/**
 * pipapo_resmap_init_avx2() - Initialise result map before first use
 * @m:		Matching data, including mapping table
 * @res_map:	Result map
 *
 * Like pipapo_resmap_init() but do not set start map bits covered by the first field.
 */
static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map)
{
	const struct nft_pipapo_field *f = m->f;
	int i;

	/* Starting map doesn't need to be set to all-ones for this implementation,
	 * but we do need to zero the remaining bits, if any.
	 */
	for (i = f->bsize; i < m->bsize_max; i++)
		res_map[i] = 0ul;
}

/**
 * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
 * @net:	Network namespace
@@ -1171,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
	res  = scratch->map + (map_index ? m->bsize_max : 0);
	fill = scratch->map + (map_index ? 0 : m->bsize_max);

	/* Starting map doesn't need to be set for this implementation */
	pipapo_resmap_init_avx2(m, res);

	nft_pipapo_avx2_prepare();

+88 −14
Original line number Diff line number Diff line
@@ -419,6 +419,7 @@ table inet filter {

	set test {
		type ${type_spec}
		counter
		flags interval,timeout
	}

@@ -1158,8 +1159,17 @@ del() {
	fi
}

# Return packet count from 'test' counter in 'inet filter' table
# Return packet count for elem $1 from 'test' counter in 'inet filter' table
count_packets() {
	found=0
	for token in $(nft reset element inet filter test "${1}" ); do
		[ ${found} -eq 1 ] && echo "${token}" && return
		[ "${token}" = "packets" ] && found=1
	done
}

# Return packet count from 'test' counter in 'inet filter' table
count_packets_nomatch() {
	found=0
	for token in $(nft list counter inet filter test); do
		[ ${found} -eq 1 ] && echo "${token}" && return
@@ -1206,6 +1216,10 @@ perf() {

# Set MAC addresses, send single packet, check that it matches, reset counter
send_match() {
	local elem="$1"

	shift

	ip link set veth_a address "$(format_mac "${1}")"
	ip -n B link set veth_b address "$(format_mac "${2}")"

@@ -1216,7 +1230,7 @@ send_match() {
		eval src_"$f"=\$\(format_\$f "${2}"\)
	done
	eval send_\$proto
	if [ "$(count_packets)" != "1" ]; then
	if [ "$(count_packets "$elem")" != "1" ]; then
		err "${proto} packet to:"
		err "  $(for f in ${dst}; do
			 eval format_\$f "${1}"; printf ' '; done)"
@@ -1242,7 +1256,7 @@ send_nomatch() {
		eval src_"$f"=\$\(format_\$f "${2}"\)
	done
	eval send_\$proto
	if [ "$(count_packets)" != "0" ]; then
	if [ "$(count_packets_nomatch)" != "0" ]; then
		err "${proto} packet to:"
		err "  $(for f in ${dst}; do
			 eval format_\$f "${1}"; printf ' '; done)"
@@ -1255,6 +1269,42 @@ send_nomatch() {
	fi
}

maybe_send_nomatch() {
	local elem="$1"
	local what="$4"

	[ $((RANDOM%20)) -gt 0 ] && return

	dst_addr4="$2"
	dst_port="$3"
	send_udp

	if [ "$(count_packets_nomatch)" != "0" ]; then
		err "Packet to $dst_addr4:$dst_port did match $what"
		err "$(nft -a list ruleset)"
		return 1
	fi
}

maybe_send_match() {
	local elem="$1"
	local what="$4"

	[ $((RANDOM%20)) -gt 0 ] && return

	dst_addr4="$2"
	dst_port="$3"
	send_udp

	if [ "$(count_packets "{ $elem }")" != "1" ]; then
		err "Packet to $dst_addr4:$dst_port did not match $what"
		err "$(nft -a list ruleset)"
		return 1
	fi
	nft reset counter inet filter test >/dev/null
	nft reset element inet filter test "{ $elem }" >/dev/null
}

# Correctness test template:
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
@@ -1262,6 +1312,8 @@ send_nomatch() {
test_correctness_main() {
	range_size=1
	for i in $(seq "${start}" $((start + count))); do
		local elem=""

		end=$((start + range_size))

		# Avoid negative or zero-sized port ranges
@@ -1272,15 +1324,16 @@ test_correctness_main() {
		srcstart=$((start + src_delta))
		srcend=$((end + src_delta))

		add "$(format)" || return 1
		elem="$(format)"
		add "$elem" || return 1
		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
			send_match "${j}" $((j + src_delta)) || return 1
			send_match "$elem" "${j}" $((j + src_delta)) || return 1
		done
		send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1

		# Delete elements now and then
		if [ $((i % 3)) -eq 0 ]; then
			del "$(format)" || return 1
			del "$elem" || return 1
			for j in $(seq "$start" \
				   $((range_size / 2 + 1)) ${end}); do
				send_nomatch "${j}" $((j + src_delta)) \
@@ -1572,14 +1625,17 @@ test_timeout() {

	range_size=1
	for i in $(seq "$start" $((start + count))); do
		local elem=""

		end=$((start + range_size))
		srcstart=$((start + src_delta))
		srcend=$((end + src_delta))

		add "$(format)" || return 1
		elem="$(format)"
		add "$elem" || return 1

		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
			send_match "${j}" $((j + src_delta)) || return 1
			send_match "$elem" "${j}" $((j + src_delta)) || return 1
		done

		range_size=$((range_size + 1))
@@ -1737,7 +1793,7 @@ test_bug_reload() {
		srcend=$((end + src_delta))

		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
			send_match "${j}" $((j + src_delta)) || return 1
			send_match "$(format)" "${j}" $((j + src_delta)) || return 1
		done

		range_size=$((range_size + 1))
@@ -1756,22 +1812,34 @@ test_bug_net_port_proto_match() {
	range_size=1
	for i in $(seq 1 10); do
		for j in $(seq 1 20) ; do
			elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
			local dport=$j

			elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")

			# too slow, do not test all addresses
			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1

			nft "add element inet filter test { $elem }" || return 1

			maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1

			nft "get element inet filter test { $elem }" | grep -q "$elem"
			if [ $? -ne 0 ];then
				local got=$(nft "get element inet filter test { $elem }")
				err "post-add: should have returned $elem but got $got"
				return 1
			fi

			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
		done
	done

	# recheck after set was filled
	for i in $(seq 1 10); do
		for j in $(seq 1 20) ; do
			elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
			local dport=$j

			elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")

			nft "get element inet filter test { $elem }" | grep -q "$elem"
			if [ $? -ne 0 ];then
@@ -1779,6 +1847,9 @@ test_bug_net_port_proto_match() {
				err "post-fill: should have returned $elem but got $got"
				return 1
			fi

			maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
		done
	done

@@ -1786,9 +1857,10 @@ test_bug_net_port_proto_match() {
	for i in $(seq 1 10); do
		for j in $(seq 1 20) ; do
			local rnd=$((RANDOM%10))
			local dport=$j
			local got=""

			elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
			elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
			if [ $rnd -gt 0 ];then
				continue
			fi
@@ -1799,6 +1871,8 @@ test_bug_net_port_proto_match() {
				err "post-delete: query for $elem returned $got instead of error."
				return 1
			fi

			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
		done
	done

@@ -1817,7 +1891,7 @@ test_bug_avx2_mismatch()
	dst_addr6="$a2"
	send_icmp6

	if [ "$(count_packets)" -gt "0" ]; then
	if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
		err "False match for $a2"
		return 1
	fi
+76 −5
Original line number Diff line number Diff line
@@ -866,6 +866,24 @@ EOF
	ip netns exec "$ns0" nft delete table $family nat
}

file_cmp()
{
	local infile="$1"
	local outfile="$2"

	if ! cmp "$infile" "$outfile";then
		echo -n "Infile "
		ls -l "$infile"
		echo -n "Outfile "
		ls -l "$outfile"
		echo "ERROR: in and output file mismatch when checking $msg" 1>&1
		ret=1
		return 1
	fi

	return 0
}

test_stateless_nat_ip()
{
	local lret=0
@@ -966,11 +984,7 @@ EOF

	wait

	if ! cmp "$INFILE" "$OUTFILE";then
		ls -l "$INFILE" "$OUTFILE"
		echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
		lret=1
	fi
	file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1

	:> "$OUTFILE"

@@ -991,6 +1005,62 @@ EOF
	return $lret
}

test_dnat_clash()
{
	local lret=0

	if ! socat -h > /dev/null 2>&1;then
		echo "SKIP: Could not run dnat clash test without socat tool"
		[ $ret -eq 0 ] && ret=$ksft_skip
		return $ksft_skip
	fi

ip netns exec "$ns0" nft -f /dev/stdin <<EOF
flush ruleset
table ip dnat-test {
 chain prerouting {
  type nat hook prerouting priority dstnat; policy accept;
  ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
 }
}
EOF
	if [ $? -ne 0 ]; then
		echo "SKIP: Could not add dnat rules"
		[ $ret -eq 0 ] && ret=$ksft_skip
		return $ksft_skip
	fi

	local udpdaddr="10.0.2.1"
	for i in 1 2;do
		echo "PING $udpdaddr" > "$INFILE"
		echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
		local lpid=$!

		busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"

		result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
		udpdaddr="10.0.1.1"

		if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
			echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
			lret=1
			ret=1
		fi

		wait

		file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1

		:> "$OUTFILE"
	done

	test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"

	ip netns exec "$ns0" nft flush ruleset

	return $lret
}

# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in "$ns0" "$ns1" "$ns2" ;do
ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet

test_port_shadowing
test_stateless_nat_ip
test_dnat_clash

if [ $ret -ne 0 ];then
	echo -n "FAIL: "