Commit 11bd5784 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'netpoll-factor-out-functions-from-netpoll_send_udp-and-add-ipv6-selftest'

Breno Leitao says:

====================
netpoll: Factor out functions from netpoll_send_udp() and add ipv6 selftest

Refactors the netpoll UDP transmit path to improve code clarity,
maintainability, and protocol-layer encapsulation.

Function netpoll_send_udp() has more than 100 LoC, which is hard to
understand and review. After this patchset, it has only 32 LoC, which is
more manageable.

The series systematically moves the construction of protocol headers
(UDP, IPv4, IPv6, Ethernet) out of the core `netpoll_send_udp()`
function into dedicated static helpers:

  - `push_udp()` for UDP header setup
  - `push_ipv4()` and `push_ipv6()` for IP header setup
  - `push_eth()` for Ethernet header setup

This results in a clean, layered abstraction that mirrors the protocol
stack, reduces code duplication, and improves readability.

Also, to make sure this is not breaking anything, add IPv6 selftest to
netconsole tests, which will exercise this code. This test would also pick
problems similiar to the one fixed by f5990207  ("net: netpoll:
Initialize UDP checksum field before checksumming"), which was
embarrassin we didn't have a selftest catch it.

Anyway, there are **no functional changes** intended in this patchset.

v1: https://lore.kernel.org/20250627-netpoll_untagle_ip-v1-0-61a21692f84a@debian.org
====================

Link: https://patch.msgid.link/20250702-netpoll_untagle_ip-v2-0-13cf3db24e2b@debian.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 49402a62 3dc6c763
Loading
Loading
Loading
Loading
+116 −76
Original line number Diff line number Diff line
@@ -372,6 +372,31 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
	return ret;
}

static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb,
				 int len)
{
	struct udphdr *udph;
	int udp_len;

	udp_len = len + sizeof(struct udphdr);
	udph = udp_hdr(skb);

	/* check needs to be set, since it will be consumed in csum_partial */
	udph->check = 0;
	if (np->ipv6)
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
	else
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
	if (udph->check == 0)
		udph->check = CSUM_MANGLED_0;
}

netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
	unsigned long flags;
@@ -389,52 +414,11 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
EXPORT_SYMBOL(netpoll_send_skb);

int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len)
{
	int total_len, ip_len, udp_len;
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
	static atomic_t ip_ident;
	struct ipv6hdr *ip6h;

	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
		WARN_ON_ONCE(!irqs_disabled());

	udp_len = len + sizeof(*udph);
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
		ip_len = udp_len + sizeof(*iph);

	total_len = ip_len + LL_RESERVED_SPACE(np->dev);

	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
	if (!skb)
		return -ENOMEM;

	skb_copy_to_linear_data(skb, msg, len);
	skb_put(skb, len);

	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);

	udph->check = 0;
	if (np->ipv6) {
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	ip6h = ipv6_hdr(skb);

@@ -450,42 +434,98 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
	ip6h->saddr = np->local_ip.in6;
	ip6h->daddr = np->remote_ip.in6;

		eth = skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;
	skb->protocol = htons(ETH_P_IPV6);
}

		skb_push(skb, sizeof(*iph));
static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len)
{
	static atomic_t ip_ident;
	struct iphdr *iph;
	int ip_len;

	ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr);

	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);

	/* iph->version = 4; iph->ihl = 5; */
	*(unsigned char *)iph = 0x45;
	iph->tos = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
	put_unaligned(htons(ip_len), &iph->tot_len);
	iph->id = htons(atomic_inc_return(&ip_ident));
	iph->frag_off = 0;
	iph->ttl = 64;
	iph->protocol = IPPROTO_UDP;
	iph->check = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
	put_unaligned(np->local_ip.ip, &iph->saddr);
	put_unaligned(np->remote_ip.ip, &iph->daddr);
	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	skb->protocol = htons(ETH_P_IP);
}

		eth = skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
static void push_udp(struct netpoll *np, struct sk_buff *skb, int len)
{
	struct udphdr *udph;
	int udp_len;

	udp_len = len + sizeof(struct udphdr);

	skb_push(skb, sizeof(struct udphdr));
	skb_reset_transport_header(skb);

	udph = udp_hdr(skb);
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);

	netpoll_udp_checksum(np, skb, len);
}

static void push_eth(struct netpoll *np, struct sk_buff *skb)
{
	struct ethhdr *eth;

	eth = skb_push(skb, ETH_HLEN);
	skb_reset_mac_header(skb);
	ether_addr_copy(eth->h_source, np->dev->dev_addr);
	ether_addr_copy(eth->h_dest, np->remote_mac);
	if (np->ipv6)
		eth->h_proto = htons(ETH_P_IPV6);
	else
		eth->h_proto = htons(ETH_P_IP);
}

int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
	int total_len, ip_len, udp_len;
	struct sk_buff *skb;

	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
		WARN_ON_ONCE(!irqs_disabled());

	udp_len = len + sizeof(struct udphdr);
	if (np->ipv6)
		ip_len = udp_len + sizeof(struct ipv6hdr);
	else
		ip_len = udp_len + sizeof(struct iphdr);

	total_len = ip_len + LL_RESERVED_SPACE(np->dev);

	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
	if (!skb)
		return -ENOMEM;

	skb_copy_to_linear_data(skb, msg, len);
	skb_put(skb, len);

	push_udp(np, skb, len);
	if (np->ipv6)
		push_ipv6(np, skb, len);
	else
		push_ipv4(np, skb, len);
	push_eth(np, skb);
	skb->dev = np->dev;

	return (int)netpoll_send_skb(np, skb);
+69 −7
Original line number Diff line number Diff line
@@ -11,9 +11,11 @@ set -euo pipefail
LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")

SRCIF="" # to be populated later
SRCIP=192.0.2.1
SRCIP4="192.0.2.1"
SRCIP6="fc00::1"
DSTIF="" # to be populated later
DSTIP=192.0.2.2
DSTIP4="192.0.2.2"
DSTIP6="fc00::2"

PORT="6666"
MSG="netconsole selftest"
@@ -80,7 +82,23 @@ function configure_ip() {
	ip link set "${SRCIF}" up
}

function select_ipv4_or_ipv6()
{
	local VERSION=${1}

	if [[ "$VERSION" == "ipv6" ]]
	then
		DSTIP="${DSTIP6}"
		SRCIP="${SRCIP6}"
	else
		DSTIP="${DSTIP4}"
		SRCIP="${SRCIP4}"
	fi
}

function set_network() {
	local IP_VERSION=${1:-"ipv4"}

	# setup_ns function is coming from lib.sh
	setup_ns NAMESPACE

@@ -91,6 +109,7 @@ function set_network() {
	# Link both interfaces back to back
	link_ifaces

	select_ipv4_or_ipv6 "${IP_VERSION}"
	configure_ip
}

@@ -119,6 +138,11 @@ function create_dynamic_target() {
	fi

	echo 1 > "${NETCONS_PATH}"/enabled

	# This will make sure that the kernel was able to
	# load the netconsole driver configuration. The console message
	# gets more organized/sequential as well.
	sleep 1
}

# Generate the command line argument for netconsole following:
@@ -179,9 +203,18 @@ function set_user_data() {

function listen_port_and_save_to() {
	local OUTPUT=${1}
	local IPVERSION=${2:-"ipv4"}

	if [ "${IPVERSION}" == "ipv4" ]
	then
		SOCAT_MODE="UDP-LISTEN"
	else
		SOCAT_MODE="UDP6-LISTEN"
	fi

	# Just wait for 2 seconds
	timeout 2 ip netns exec "${NAMESPACE}" \
		socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
		socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
}

# Only validate that the message arrived properly
@@ -263,8 +296,15 @@ function check_for_dependencies() {
		exit "${ksft_skip}"
	fi

	if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
		echo "SKIP: IPs already in use. Skipping it" >&2
	REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
	REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
	if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
		echo "SKIP: IPv4s already in use. Skipping it" >&2
		exit "${ksft_skip}"
	fi

	if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
		echo "SKIP: IPv6s already in use. Skipping it" >&2
		exit "${ksft_skip}"
	fi
}
@@ -278,11 +318,13 @@ function check_for_taskset() {

# This is necessary if running multiple tests in a row
function pkill_socat() {
	PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
	PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
	PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
	# socat runs under timeout(1), kill it if it is still alive
	# do not fail if socat doesn't exist anymore
	set +e
	pkill -f "${PROCESS_NAME}"
	pkill -f "${PROCESS_NAME4}"
	pkill -f "${PROCESS_NAME6}"
	set -e
}

@@ -294,3 +336,23 @@ function check_netconsole_module() {
		exit "${ksft_skip}"
	fi
}

# A wrapper to translate protocol version to udp version
function wait_for_port() {
	local NAMESPACE=${1}
	local PORT=${2}
	IP_VERSION=${3}

	if [ "${IP_VERSION}" == "ipv6" ]
	then
		PROTOCOL="udp6"
	else
		PROTOCOL="udp"
	fi

	wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
	# even after the port is open, let's wait 1 second before writing
	# otherwise the packet could be missed, and the test will fail. Happens
	# more frequently on IPv6
	sleep 1
}
+30 −23
Original line number Diff line number Diff line
@@ -36,9 +36,11 @@ trap cleanup EXIT
# Run the test twice, with different format modes
for FORMAT in "basic" "extended"
do
	echo "Running with target mode: ${FORMAT}"
	for IP_VERSION in "ipv6" "ipv4"
	do
		echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
		# Create one namespace and two interfaces
	set_network
		set_network "${IP_VERSION}"
		# Create a dynamic target for netconsole
		create_dynamic_target "${FORMAT}"
		# Only set userdata for extended format
@@ -47,10 +49,11 @@ do
			# Set userdata "key" with the "value" value
			set_user_data
		fi
	# Listed for netconsole port inside the namespace and destination interface
	listen_port_and_save_to "${OUTPUT_FILE}" &
		# Listed for netconsole port inside the namespace and
		# destination interface
		listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
		# Wait for socat to start and listen to the port.
	wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
		wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
		# Send the message
		echo "${MSG}: ${TARGET}" > /dev/kmsg
		# Wait until socat saves the file to disk
@@ -59,7 +62,11 @@ do
		# Make sure the message was received in the dst part
		# and exit
		validate_result "${OUTPUT_FILE}" "${FORMAT}"
		# kill socat in case it is still running
		pkill_socat
		cleanup
		echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
	done
done

trap - EXIT