Commit 67f4cfb5 authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'net-smc-introduce-smc_hs_ctrl'

D. Wythe says:

====================
net/smc: Introduce smc_hs_ctrl

This patch aims to introduce BPF injection capabilities for SMC and
includes a self-test to ensure code stability.

Since the SMC protocol isn't ideal for every situation, especially
short-lived ones, most applications can't guarantee the absence of
such scenarios. Consequently, applications may need specific strategies
to decide whether to use SMC. For example, an application might limit SMC
usage to certain IP addresses or ports.

To maintain the principle of transparent replacement, we want applications
to remain unaffected even if they need specific SMC strategies. In other
words, they should not require recompilation of their code.

Additionally, we need to ensure the scalability of strategy implementation.
While using socket options or sysctl might be straightforward, it could
complicate future expansions.

Fortunately, BPF addresses these concerns effectively. Users can write
their own strategies in eBPF to determine whether to use SMC, and they can
easily modify those strategies in the future.

This is a rework of the series from [1]. Changes since [1] are limited to
the SMC parts:

1. Rename smc_ops to smc_hs_ctrl and change interface name.
2. Squash SMC patches, removing standalone non-BPF hook capability.
3. Fix typos

[1]: https://lore.kernel.org/bpf/20250123015942.94810-1-alibuda@linux.alibaba.com/#t

v2 -> v1:
  - Removed the fixes patch, which have already been merged on current branch.
  - Fixed compilation warning of smc_call_hsbpf() when CONFIG_SMC_HS_CTRL_BPF
    is not enabled.
  - Changed the default value of CONFIG_SMC_HS_CTRL_BPF to Y.
  - Fix typo and renamed some variables

v3 -> v2:
  - Removed the libbpf patch, which have already been merged on current branch.
  - Fixed sparse warning of smc_call_hsbpf() and xchg().

v4 -> v3:
   - Rebased on latest bpf-next, updated SMC loopback config from SMC_LO to DIBS_LO
     per upstream changes.

v5 -> v4:
    - Removed the redundant sk parameter from smc_call_hsbpf
    - Reject registration when bpf_link is set, link support will be added in the
      future.
    - Updated selftests with new test heplers.
====================

Link: https://patch.msgid.link/20251107035632.115950-1-alibuda@linux.alibaba.com


Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents abd0c0f6 beb3c672
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -17,6 +17,9 @@ struct netns_smc {
#ifdef CONFIG_SYSCTL
	struct ctl_table_header		*smc_hdr;
#endif
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
	struct smc_hs_ctrl __rcu	*hs_ctrl;
#endif /* CONFIG_SMC_HS_CTRL_BPF */
	unsigned int			sysctl_autocorking_size;
	unsigned int			sysctl_smcr_buf_type;
	int				sysctl_smcr_testlink_time;
+53 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
#include <linux/wait.h>
#include <linux/dibs.h>

struct tcp_sock;
struct inet_request_sock;
struct sock;

#define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
@@ -50,4 +52,55 @@ struct smcd_dev {
	u8 going_away : 1;
};

#define SMC_HS_CTRL_NAME_MAX 16

enum {
	/* ops can be inherit from init_net */
	SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,

	SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
};

struct smc_hs_ctrl {
	/* private */

	struct list_head list;
	struct module *owner;

	/* public */

	/* unique name */
	char name[SMC_HS_CTRL_NAME_MAX];
	int flags;

	/* Invoked before computing SMC option for SYN packets.
	 * We can control whether to set SMC options by returning various value.
	 * Return 0 to disable SMC, or return any other value to enable it.
	 */
	int (*syn_option)(struct tcp_sock *tp);

	/* Invoked before Set up SMC options for SYN-ACK packets
	 * We can control whether to respond SMC options by returning various
	 * value. Return 0 to disable SMC, or return any other value to enable
	 * it.
	 */
	int (*synack_option)(const struct tcp_sock *tp,
			     struct inet_request_sock *ireq);
};

#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
#define smc_call_hsbpf(init_val, tp, func, ...) ({				\
	typeof(init_val) __ret = (init_val);					\
	struct smc_hs_ctrl *ctrl;						\
	rcu_read_lock();							\
	ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl);	\
	if (ctrl && ctrl->func)							\
		__ret = ctrl->func(tp, ##__VA_ARGS__);				\
	rcu_read_unlock();							\
	__ret;									\
})
#else
#define smc_call_hsbpf(init_val, tp, ...)  ({ (void)(tp); (init_val); })
#endif /* CONFIG_SMC_HS_CTRL_BPF */

#endif	/* _SMC_H */
+2 −0
Original line number Diff line number Diff line
@@ -1162,6 +1162,7 @@ bool bpf_struct_ops_get(const void *kdata)
	map = __bpf_map_inc_not_zero(&st_map->map, false);
	return !IS_ERR(map);
}
EXPORT_SYMBOL_GPL(bpf_struct_ops_get);

void bpf_struct_ops_put(const void *kdata)
{
@@ -1173,6 +1174,7 @@ void bpf_struct_ops_put(const void *kdata)

	bpf_map_put(&st_map->map);
}
EXPORT_SYMBOL_GPL(bpf_struct_ops_put);

u32 bpf_struct_ops_id(const void *kdata)
{
+1 −0
Original line number Diff line number Diff line
@@ -1234,6 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)

	return src - orig_src;
}
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);

int map_check_no_btf(const struct bpf_map *map,
		     const struct btf *btf,
+17 −14
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include <net/tcp.h>
#include <net/tcp_ecn.h>
#include <net/mptcp.h>
#include <net/smc.h>
#include <net/proto_memory.h>
#include <net/psp.h>

@@ -802,36 +803,38 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
	mptcp_options_write(th, ptr, tp, opts);
}

static void smc_set_option(const struct tcp_sock *tp,
static void smc_set_option(struct tcp_sock *tp,
			   struct tcp_out_options *opts,
			   unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
	if (static_branch_unlikely(&tcp_have_smc)) {
		if (tp->syn_smc) {
			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
	if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) {
		tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option);
		/* re-check syn_smc */
		if (tp->syn_smc &&
		    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
			opts->options |= OPTION_SMC;
			*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
		}
	}
	}
#endif
}

static void smc_set_option_cond(const struct tcp_sock *tp,
				const struct inet_request_sock *ireq,
				struct inet_request_sock *ireq,
				struct tcp_out_options *opts,
				unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
	if (static_branch_unlikely(&tcp_have_smc)) {
		if (tp->syn_smc && ireq->smc_ok) {
			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
	if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) {
		ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq);
		/* re-check smc_ok */
		if (ireq->smc_ok &&
		    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
			opts->options |= OPTION_SMC;
			*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
		}
	}
	}
#endif
}

Loading