Commit 944b6b21 authored by Zhang Tengfei's avatar Zhang Tengfei Committed by Florian Westphal
Browse files

ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable



KCSAN reported a data-race on the `ipvs->enable` flag, which is
written in the control path and read concurrently from many other
contexts.

Following a suggestion by Julian, this patch fixes the race by
converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
This lightweight approach ensures atomic access and acts as a
compiler barrier, preventing unsafe optimizations where the flag
is checked in loops (e.g., in ip_vs_est.c).

Additionally, the `enable` checks in the fast-path hooks
(`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
removed. These are unnecessary since commit 857ca897
("ipvs: register hooks only with services"). The `enable=0`
condition they check for can only occur in two rare and non-fatal
scenarios: 1) after hooks are registered but before the flag is set,
and 2) after hooks are unregistered on cleanup_net. In the worst
case, a single packet might be mishandled (e.g., dropped), which
does not lead to a system crash or data corruption. Adding a check
in the performance-critical fast-path to handle this harmless
condition is not a worthwhile trade-off.

Fixes: 857ca897 ("ipvs: register hooks only with services")
Reported-by: default avatar <syzbot+1651b5234028c294c339@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339


Suggested-by: default avatarJulian Anastasov <ja@ssi.bg>
Link: https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@ssi.bg/


Signed-off-by: default avatarZhang Tengfei <zhtfdev@gmail.com>
Acked-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
parent cbd2257d
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
			 * conntrack cleanup for the net.
			 */
			smp_rmb();
			if (ipvs->enable)
			if (READ_ONCE(ipvs->enable))
				ip_vs_conn_drop_conntrack(cp);
		}

@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
		cond_resched_rcu();

		/* netns clean up started, abort delayed work */
		if (!ipvs->enable)
		if (!READ_ONCE(ipvs->enable))
			break;
	}
	rcu_read_unlock();
+4 −7
Original line number Diff line number Diff line
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
	if (unlikely(!skb_dst(skb)))
		return NF_ACCEPT;

	if (!ipvs->enable)
		return NF_ACCEPT;

	ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
	if (af == AF_INET6) {
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
		return NF_ACCEPT;
	}
	/* ipvs enabled in this netns ? */
	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
	if (unlikely(sysctl_backup_only(ipvs)))
		return NF_ACCEPT;

	ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
	int r;

	/* ipvs enabled in this netns ? */
	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
	if (unlikely(sysctl_backup_only(ipvs)))
		return NF_ACCEPT;

	if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
		return -ENOMEM;

	/* Hold the beast until a service is registered */
	ipvs->enable = 0;
	WRITE_ONCE(ipvs->enable, 0);
	ipvs->net = net;
	/* Counters used for creating unique names */
	ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
		ipvs = net_ipvs(net);
		ip_vs_unregister_hooks(ipvs, AF_INET);
		ip_vs_unregister_hooks(ipvs, AF_INET6);
		ipvs->enable = 0;	/* Disable packet reception */
		WRITE_ONCE(ipvs->enable, 0);	/* Disable packet reception */
		smp_wmb();
		ip_vs_sync_net_cleanup(ipvs);
	}
+3 −3
Original line number Diff line number Diff line
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
		struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];

		/* netns clean up started, abort delayed work */
		if (!ipvs->enable)
		if (!READ_ONCE(ipvs->enable))
			goto unlock;
		if (!kd)
			continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,

	*svc_p = svc;

	if (!ipvs->enable) {
	if (!READ_ONCE(ipvs->enable)) {
		/* Now there is a service - full throttle */
		ipvs->enable = 1;
		WRITE_ONCE(ipvs->enable, 1);

		/* Start estimation for first time */
		ip_vs_est_reload_start(ipvs);
+8 −8
Original line number Diff line number Diff line
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
{
	/* Ignore reloads before first service is added */
	if (!ipvs->enable)
	if (!READ_ONCE(ipvs->enable))
		return;
	ip_vs_est_stopped_recalc(ipvs);
	/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
	int i;

	if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
	    ipvs->enable && ipvs->est_max_threads)
	    READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
		return -EINVAL;

	mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
	}

	/* Start kthread tasks only when services are present */
	if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
	if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
		ret = ip_vs_est_kthread_start(ipvs, kd);
		if (ret < 0)
			goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
	struct ip_vs_estimator *est = &stats->est;
	int ret;

	if (!ipvs->est_max_threads && ipvs->enable)
	if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
		ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);

	est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
			/* Wait for cpufreq frequency transition */
			wait_event_idle_timeout(wq, kthread_should_stop(),
						HZ / 50);
			if (!ipvs->enable || kthread_should_stop())
			if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
				goto stop;
		}

@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
		rcu_read_unlock();
		local_bh_enable();

		if (!ipvs->enable || kthread_should_stop())
		if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
			goto stop;
		cond_resched();

@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
	mutex_lock(&ipvs->est_mutex);
	for (id = 1; id < ipvs->est_kt_count; id++) {
		/* netns clean up started, abort */
		if (!ipvs->enable)
		if (!READ_ONCE(ipvs->enable))
			goto unlock2;
		kd = ipvs->est_kt_arr[id];
		if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
	id = ipvs->est_kt_count;

next_kt:
	if (!ipvs->enable || kthread_should_stop())
	if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
		goto unlock;
	id--;
	if (id < 0)