Commit 529676ca authored by Eric Dumazet's avatar Eric Dumazet Committed by Dave Hansen
Browse files

x86/lib: Inline csum_ipv6_magic()

Inline this small helper. It has been observed to consume up
to 0.75%, which is significant for such a small function.

This should reduce register pressure, as saddr and daddr are often
back to back in memory.

For instance code inlined in tcp6_gro_receive() will look like:

 55a:	48 03 73 28          	add    0x28(%rbx),%rsi
 55e:	8b 43 70             	mov    0x70(%rbx),%eax
 561:	29 f8                	sub    %edi,%eax
 563:	0f c8                	bswap  %eax
 565:	89 c0                	mov    %eax,%eax
 567:	48 05 00 06 00 00    	add    $0x600,%rax
 56d:	48 03 46 08          	add    0x8(%rsi),%rax
 571:	48 13 46 10          	adc    0x10(%rsi),%rax
 575:	48 13 46 18          	adc    0x18(%rsi),%rax
 579:	48 13 46 20          	adc    0x20(%rsi),%rax
 57d:	48 83 d0 00          	adc    $0x0,%rax
 581:	48 89 c6             	mov    %rax,%rsi
 584:	48 c1 ee 20          	shr    $0x20,%rsi
 588:	01 f0                	add    %esi,%eax
 58a:	83 d0 00             	adc    $0x0,%eax
 58d:	89 c6                	mov    %eax,%esi
 58f:	66 31 c0             	xor    %ax,%ax

Surprisingly, this inlining does not seem to bloat kernel text size.
It at least two cases[1], it either has no effect or results in a
slightly smaller kernel.

1. https://lore.kernel.org/all/CANn89iJzcb_XO9oCApKYfRxsMMmg7BHukRDqWTca3ZLQ8HT0iQ@mail.gmail.com/



[ dhansen: add justification and note about lack of kernel bloat ]

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Acked-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Link: https://patch.msgid.link/20251113154545.594580-1-edumazet@google.com
parent 9ace4753
Loading
Loading
Loading
Loading
+33 −12
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
 */

#include <linux/compiler.h>
#include <linux/in6.h>
#include <asm/byteorder.h>

/**
@@ -145,6 +146,17 @@ extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 */
extern __sum16 ip_compute_csum(const void *buff, int len);

static inline unsigned add32_with_carry(unsigned a, unsigned b)
{
	asm("addl %2,%0\n\t"
	    "adcl $0,%0"
	    : "=r" (a)
	    : "0" (a), "rm" (b));
	return a;
}

#define _HAVE_ARCH_IPV6_CSUM 1

/**
 * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header.
 * @saddr: source address
@@ -158,20 +170,29 @@ extern __sum16 ip_compute_csum(const void *buff, int len);
 * Returns the unfolded 32bit checksum.
 */

struct in6_addr;
static inline __sum16 csum_ipv6_magic(
	const struct in6_addr *_saddr, const struct in6_addr *_daddr,
	__u32 len, __u8 proto, __wsum sum)
{
	const unsigned long *saddr = (const unsigned long *)_saddr;
	const unsigned long *daddr = (const unsigned long *)_daddr;
	__u64 sum64;

#define _HAVE_ARCH_IPV6_CSUM 1
extern __sum16
csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr,
		__u32 len, __u8 proto, __wsum sum);
	sum64 = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
		(__force __u64)sum;

static inline unsigned add32_with_carry(unsigned a, unsigned b)
{
	asm("addl %2,%0\n\t"
	    "adcl $0,%0"
	    : "=r" (a)
	    : "0" (a), "rm" (b));
	return a;
	asm("	addq %1,%[sum64]\n"
	    "	adcq %2,%[sum64]\n"
	    "	adcq %3,%[sum64]\n"
	    "	adcq %4,%[sum64]\n"
	    "	adcq $0,%[sum64]\n"

	    : [sum64] "+r" (sum64)
	    : "m" (saddr[0]), "m" (saddr[1]),
	      "m" (daddr[0]), "m" (daddr[1]));

	return csum_fold(
	       (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
}

#define HAVE_ARCH_CSUM_ADD
+0 −22
Original line number Diff line number Diff line
@@ -68,25 +68,3 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
			const struct in6_addr *daddr,
			__u32 len, __u8 proto, __wsum sum)
{
	__u64 rest, sum64;

	rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
		(__force __u64)sum;

	asm("	addq (%[saddr]),%[sum]\n"
	    "	adcq 8(%[saddr]),%[sum]\n"
	    "	adcq (%[daddr]),%[sum]\n"
	    "	adcq 8(%[daddr]),%[sum]\n"
	    "	adcq $0,%[sum]\n"

	    : [sum] "=r" (sum64)
	    : "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));

	return csum_fold(
	       (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
}
EXPORT_SYMBOL(csum_ipv6_magic);