Commit e377240a authored by Steffen Klassert's avatar Steffen Klassert
Browse files

Merge branch 'xfrm: Support GRO decapsulation for ESP in UDP encapsulation'



Antony Antony says:

============
I have added how to enable this feature, and more description to the second
patch. Here is copy of that.

xfrm: Support GRO for IPv4i & IPv6 ESP in UDP encapsulation

This patchset enables the GRO codepath for ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Enabling this would imporove performance for ESP in UDP datapath, i.e
IPsec with NAT in between. Our initial tests show 20% improvement.

By default GRP for ESP-in-UDP is disabled for UDP sockets.
To enable this feature for an ESP socket, the following two options
need to be set:
1. enable ESP-in-UDP: (this is already set by an IKE daemon).
   int type = UDP_ENCAP_ESPINUDP;
   setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type));

2. To enable GRO for ESP in UDP socket:
   type = true;
   setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type));

Enabling ESP-in-UDP has the side effect of preventing the Linux stack from
seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are
immediately decapsulated from UDP and decrypted.
This change may affect nftable rules that match on ESP packets  at L3.
Also tcpdump won't see the ESP packet.

Developers/admins are advised to review and adapt any nftable rules
accordingly before enabling this feature to prevent potential rule breakage.
Also tcpdump will not see from ESP packets from a ESP in UDP flow when this
is enabled.

Initial, a quick test showed performance difference of about 20%
impromvent on the receiver, when using iperf, tcp flow, over ESP in UDP.
============

Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parents 1d495f1c 221ddb72
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ struct napi_gro_cb {
	/* Number of segments aggregated. */
	u16	count;

	/* Used in ipv6_gro_receive() and foo-over-udp */
	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
	u16	proto;

/* Used in napi_gro_cb::free */
+3 −0
Original line number Diff line number Diff line
@@ -60,6 +60,9 @@ struct ipv6_stub {
#if IS_ENABLED(CONFIG_XFRM)
	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
	struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
						   struct list_head *head,
						   struct sk_buff *skb);
	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
			       int encap_type);
#endif
+4 −0
Original line number Diff line number Diff line
@@ -1710,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
					struct sk_buff *skb);
struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
					struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
		     int optlen);
#else
+5 −1
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
	int offset = skb_gro_offset(skb);
	struct xfrm_offload *xo;
	struct xfrm_state *x;
	int encap_type = 0;
	__be32 seq;
	__be32 spi;

@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

	xo->flags |= XFRM_GRO;

	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
		encap_type = UDP_ENCAP_ESPINUDP;

	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,

	/* We don't need to handle errors from xfrm_input, it does all
	 * the error handling and frees the resources on error. */
	xfrm_input(skb, IPPROTO_ESP, spi, -2);
	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

	return ERR_PTR(-EINPROGRESS);
out_reset:
+16 −0
Original line number Diff line number Diff line
@@ -2625,6 +2625,19 @@ void udp_destroy_sock(struct sock *sk)
	}
}

static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
				       struct sock *sk)
{
#ifdef CONFIG_XFRM
	if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
		if (family == AF_INET)
			WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
		else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
			WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
	}
#endif
}

/*
 *	Socket option code for UDP
 */
@@ -2674,6 +2687,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
		case 0:
#ifdef CONFIG_XFRM
		case UDP_ENCAP_ESPINUDP:
			set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
			fallthrough;
		case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
			if (sk->sk_family == AF_INET6)
@@ -2716,6 +2731,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
			udp_tunnel_encap_enable(sk);
		udp_assign_bit(GRO_ENABLED, sk, valbool);
		udp_assign_bit(ACCEPT_L4, sk, valbool);
		set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
		break;

	/*
Loading