Commit 40aa9fce authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Pablo Neira Ayuso says:

====================
IPVS fixes for net

The following batch contains IPVS fixes for net to address issues
from the latest net-next pull request.

Julian Anastasov made the following summary:

1-3) Fixes for the recently added resizable hash tables

4) dest from trash can be leaked if ip_vs_start_estimator() fails

5) fixed races and locking for the estimation kthreads

6) fix for wrong roundup_pow_of_two() usage in the resizable hash
   tables

7-8) v2 of the changes from Waiman Long to properly guard against
  the housekeeping_cpumask() updates:

  https://lore.kernel.org/netfilter-devel/20260331165015.2777765-1-longman@redhat.com/

  I added missing Fixes tag. The original description:

  Since commit 041ee6f3 ("kthread: Rely on HK_TYPE_DOMAIN for preferred
  affinity management"), the HK_TYPE_KTHREAD housekeeping cpumask may no
  longer be correct in showing the actual CPU affinity of kthreads that
  have no predefined CPU affinity. As the ipvs networking code is still
  using HK_TYPE_KTHREAD, we need to make HK_TYPE_KTHREAD reflect the
  reality.

  This patch series makes HK_TYPE_KTHREAD an alias of HK_TYPE_DOMAIN
  and uses RCU to protect access to the HK_TYPE_KTHREAD housekeeping
  cpumask.

Julian plans to post a nf-next patch to limit the connections by using
"conn_max" sysctl. With Simon Horman, they agreed that this is an old
problem that we do not have a limit of connections and it is not a
stopper for this patchset.

* tag 'nf-26-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  sched/isolation: Make HK_TYPE_KTHREAD an alias of HK_TYPE_DOMAIN
  ipvs: Guard access of HK_TYPE_KTHREAD cpumask with RCU
  ipvs: fix shift-out-of-bounds in ip_vs_rht_desired_size
  ipvs: fix races around est_mutex and est_cpulist
  ipvs: do not leak dest after get from dest trash
  ipvs: fix the spin_lock usage for RT build
  ipvs: fix races around the conn_lfactor and svc_lfactor sysctl vars
  ipvs: fixes for the new ip_vs_status info
====================

Link: https://patch.msgid.link/20260505001648.360569-1-pablo@netfilter.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 561a22d9 8f78b749
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -20,6 +20,11 @@ enum hk_type {
	HK_TYPE_KERNEL_NOISE,
	HK_TYPE_MAX,

	/*
	 * HK_TYPE_KTHREAD is now an alias of HK_TYPE_DOMAIN
	 */
	HK_TYPE_KTHREAD = HK_TYPE_DOMAIN,

	/*
	 * The following housekeeping types are only set by the nohz_full
	 * boot commandline option. So they can share the same value.
@@ -29,7 +34,6 @@ enum hk_type {
	HK_TYPE_RCU     = HK_TYPE_KERNEL_NOISE,
	HK_TYPE_MISC    = HK_TYPE_KERNEL_NOISE,
	HK_TYPE_WQ      = HK_TYPE_KERNEL_NOISE,
	HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE
};

#ifdef CONFIG_CPU_ISOLATION
+26 −5
Original line number Diff line number Diff line
@@ -491,6 +491,7 @@ struct ip_vs_est_kt_data {
	DECLARE_BITMAP(avail, IPVS_EST_NTICKS);	/* tick has space for ests */
	unsigned long		est_timer;	/* estimation timer (jiffies) */
	struct ip_vs_stats	*calc_stats;	/* Used for calculation */
	int			needed;		/* task is needed */
	int			tick_len[IPVS_EST_NTICKS];	/* est count */
	int			id;		/* ktid per netns */
	int			chain_max;	/* max ests per tick chain */
@@ -1411,7 +1412,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
	return ipvs->sysctl_run_estimation;
}

static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs)
{
	if (ipvs->est_cpulist_valid)
		return ipvs->sysctl_est_cpulist;
@@ -1529,7 +1530,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
	return 1;
}

static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs)
{
	return housekeeping_cpumask(HK_TYPE_KTHREAD);
}
@@ -1564,6 +1565,18 @@ static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs)
	return READ_ONCE(ipvs->sysctl_svc_lfactor);
}

static inline bool sysctl_est_cpulist_empty(struct netns_ipvs *ipvs)
{
	guard(rcu)();
	return cpumask_empty(__sysctl_est_cpulist(ipvs));
}

static inline unsigned int sysctl_est_cpulist_weight(struct netns_ipvs *ipvs)
{
	guard(rcu)();
	return cpumask_weight(__sysctl_est_cpulist(ipvs));
}

/* IPVS core functions
 * (from ip_vs_core.c)
 */
@@ -1884,18 +1897,26 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
void ip_vs_zero_estimator(struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
void ip_vs_est_reload_start(struct netns_ipvs *ipvs);
void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart);
int ip_vs_est_kthread_start(struct netns_ipvs *ipvs,
			    struct ip_vs_est_kt_data *kd);
void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd);

static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs)
{
#ifdef CONFIG_SYSCTL
	ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);
	ipvs->tot_stats->s.est.ktid = -2;
#endif
}

static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs)
{
#ifdef CONFIG_SYSCTL
	/* Stop tasks while cpulist is empty or if disabled with flag */
	ipvs->est_stopped = !sysctl_run_estimation(ipvs) ||
			    (ipvs->est_cpulist_valid &&
			     cpumask_empty(sysctl_est_cpulist(ipvs)));
			     sysctl_est_cpulist_empty(ipvs));
#endif
}

@@ -1911,7 +1932,7 @@ static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs)
static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs)
{
	unsigned int limit = IPVS_EST_CPU_KTHREADS *
			     cpumask_weight(sysctl_est_cpulist(ipvs));
			     sysctl_est_cpulist_weight(ipvs);

	return max(1U, limit);
}
+42 −34
Original line number Diff line number Diff line
@@ -267,11 +267,10 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
		hash_key2 = hash_key;
		use2 = false;
	}

	conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */,
		      &head, &head2);
	spin_lock(&cp->lock);

	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
	cp->flags |= IP_VS_CONN_F_HASHED;
	WRITE_ONCE(cp->hn0.hash_key, hash_key);
	WRITE_ONCE(cp->hn1.hash_key, hash_key2);
@@ -279,15 +278,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
	hlist_bl_add_head_rcu(&cp->hn0.node, head);
	if (use2)
		hlist_bl_add_head_rcu(&cp->hn1.node, head2);
		ret = 1;
	} else {
		pr_err("%s(): request for already hashed, called from %pS\n",
		       __func__, __builtin_return_address(0));
		ret = 0;
	}

	spin_unlock(&cp->lock);
	conn_tab_unlock(head, head2);
	ret = 1;

	/* Schedule resizing if load increases */
	if (atomic_read(&ipvs->conn_count) > t->u_thresh &&
@@ -321,7 +314,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)

	conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */,
		      &head, &head2);
	spin_lock(&cp->lock);

	if (cp->flags & IP_VS_CONN_F_HASHED) {
		/* Decrease refcnt and unlink conn only if we are last user */
@@ -334,7 +326,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
		}
	}

	spin_unlock(&cp->lock);
	conn_tab_unlock(head, head2);

	rcu_read_unlock();
@@ -637,6 +628,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
	struct ip_vs_conn_hnode *hn;
	u32 hash_key, hash_key_new;
	struct ip_vs_conn_param p;
	bool by_me = false;
	int ntbl;
	int dir;

@@ -664,9 +656,17 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
		t = rcu_dereference(t->new_tbl);
		ntbl++;
		/* We are lost? */
		if (ntbl >= 2)
		if (ntbl >= 2) {
			spin_lock_bh(&cp->lock);
			if (cp->flags & IP_VS_CONN_F_NO_CPORT && by_me)
				cp->cport = 0;
			/* hn1 will be rehashed on next packet */
			spin_unlock_bh(&cp->lock);
			IP_VS_ERR_RL("%s(): Too many ht changes for dir %d\n",
				     __func__, dir);
			return;
		}
	}

	/* Rehashing during resize? Use the recent table for adds */
	t2 = rcu_dereference(t->new_tbl);
@@ -683,10 +683,13 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
	if (head > head2 && t == t2)
		swap(head, head2);

	/* Protect the cp->flags modification */
	spin_lock_bh(&cp->lock);

	/* Lock seqcount only for the old bucket, even if we are on new table
	 * because it affects the del operation, not the adding.
	 */
	spin_lock_bh(&t->lock[hash_key & t->lock_mask].l);
	spin_lock(&t->lock[hash_key & t->lock_mask].l);
	preempt_disable_nested();
	write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]);

@@ -704,14 +707,23 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
		hlist_bl_unlock(head);
		write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]);
		preempt_enable_nested();
		spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l);
		spin_unlock(&t->lock[hash_key & t->lock_mask].l);
		spin_unlock_bh(&cp->lock);
		hash_key = hash_key_new;
		goto retry;
	}

	spin_lock(&cp->lock);
	if ((cp->flags & IP_VS_CONN_F_NO_CPORT) &&
	    (cp->flags & IP_VS_CONN_F_HASHED)) {
	/* Fill cport once, even if multiple packets try to do it */
	if (cp->flags & IP_VS_CONN_F_NO_CPORT && (!cp->cport || by_me)) {
		/* If we race with resizing make sure cport is set for dir 1 */
		if (!cp->cport) {
			cp->cport = cport;
			by_me = true;
		}
		if (!dir) {
			atomic_dec(&ipvs->no_cport_conns[af_id]);
			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
		}
		/* We do not recalc hash_key_r under lock, we assume the
		 * parameters in cp do not change, i.e. cport is
		 * the only possible change.
@@ -726,21 +738,17 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
			hlist_bl_del_rcu(&hn->node);
			hlist_bl_add_head_rcu(&hn->node, head_new);
		}
		if (!dir) {
			atomic_dec(&ipvs->no_cport_conns[af_id]);
			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
			cp->cport = cport;
		}
	}
	spin_unlock(&cp->lock);

	if (head != head2)
		hlist_bl_unlock(head2);
	hlist_bl_unlock(head);
	write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]);
	preempt_enable_nested();
	spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l);
	if (dir--)
	spin_unlock(&t->lock[hash_key & t->lock_mask].l);

	spin_unlock_bh(&cp->lock);
	if (dir-- && by_me)
		goto next_dir;
}

@@ -1835,7 +1843,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)

	if (!rcu_dereference_protected(ipvs->conn_tab, 1))
		return;
	cancel_delayed_work_sync(&ipvs->conn_resize_work);
	disable_delayed_work_sync(&ipvs->conn_resize_work);
	if (!atomic_read(&ipvs->conn_count))
		goto unreg;

+1 −1
Original line number Diff line number Diff line
@@ -237,7 +237,7 @@ int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n,
{
	if (!t)
		return 1 << min_bits;
	n = roundup_pow_of_two(n);
	n = n > 0 ? roundup_pow_of_two(n) : 1;
	if (lfactor < 0) {
		int factor = min(-lfactor, max_bits);

+119 −45
Original line number Diff line number Diff line
@@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work)
		if (!kd)
			continue;
		/* New config ? Stop kthread tasks */
		if (genid != genid_done)
		if (genid != genid_done) {
			if (!id) {
				/* Only we can stop kt 0 but not under mutex */
				mutex_unlock(&ipvs->est_mutex);
				ip_vs_est_kthread_stop(kd);
				mutex_lock(&ipvs->est_mutex);
				if (!READ_ONCE(ipvs->enable))
					goto unlock;
				/* kd for kt 0 is never destroyed */
			} else {
				ip_vs_est_kthread_stop(kd);
			}
		}
		if (!kd->task && !ip_vs_est_stopped(ipvs)) {
			bool start;

			/* Do not start kthreads above 0 in calc phase */
			if ((!id || !ipvs->est_calc_phase) &&
			    ip_vs_est_kthread_start(ipvs, kd) < 0)
			if (id)
				start = !ipvs->est_calc_phase;
			else
				start = kd->needed;
			if (start && ip_vs_est_kthread_start(ipvs, kd) < 0)
				repeat = true;
		}
	}
@@ -1102,6 +1118,24 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
	return dest;
}

/* Put destination in trash */
static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs,
				 struct ip_vs_dest *dest, unsigned long istart,
				 bool cleanup)
{
	spin_lock_bh(&ipvs->dest_trash_lock);
	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
		      refcount_read(&dest->refcnt));
	if (list_empty(&ipvs->dest_trash) && !cleanup)
		mod_timer(&ipvs->dest_trash_timer,
			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
	/* dest lives in trash with reference */
	list_add(&dest->t_list, &ipvs->dest_trash);
	dest->idle_start = istart;
	spin_unlock_bh(&ipvs->dest_trash_lock);
}

static void ip_vs_dest_rcu_free(struct rcu_head *head)
{
	struct ip_vs_dest *dest;
@@ -1461,8 +1495,11 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
			      ntohs(dest->vport));

		ret = ip_vs_start_estimator(svc->ipvs, &dest->stats);
		/* On error put back dest into the trash */
		if (ret < 0)
			return ret;
			ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start,
					     false);
		else
			__ip_vs_update_dest(svc, dest, udest, 1);
	} else {
		/*
@@ -1533,17 +1570,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
	 */
	ip_vs_rs_unhash(dest);

	spin_lock_bh(&ipvs->dest_trash_lock);
	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
		      refcount_read(&dest->refcnt));
	if (list_empty(&ipvs->dest_trash) && !cleanup)
		mod_timer(&ipvs->dest_trash_timer,
			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
	/* dest lives in trash with reference */
	list_add(&dest->t_list, &ipvs->dest_trash);
	dest->idle_start = 0;
	spin_unlock_bh(&ipvs->dest_trash_lock);
	ip_vs_trash_put_dest(ipvs, dest, 0, cleanup);

	/* Queue up delayed work to expire all no destination connections.
	 * No-op when CONFIG_SYSCTL is disabled.
@@ -1812,11 +1839,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
	*svc_p = svc;

	if (!READ_ONCE(ipvs->enable)) {
		mutex_lock(&ipvs->est_mutex);

		/* Now there is a service - full throttle */
		WRITE_ONCE(ipvs->enable, 1);

		ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);

		/* Start estimation for first time */
		ip_vs_est_reload_start(ipvs);
		ip_vs_est_reload_start(ipvs, true);
		mutex_unlock(&ipvs->est_mutex);
	}

	return 0;
@@ -2032,6 +2064,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
		cancel_delayed_work_sync(&ipvs->svc_resize_work);
		if (t) {
			rcu_assign_pointer(ipvs->svc_table, NULL);
			/* Inform readers that table is removed */
			smp_mb__before_atomic();
			atomic_inc(&ipvs->svc_table_changes);
			while (1) {
				p = rcu_dereference_protected(t->new_tbl, 1);
				call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
@@ -2078,6 +2113,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
	t = rcu_dereference_protected(ipvs->svc_table, 1);
	if (t) {
		rcu_assign_pointer(ipvs->svc_table, NULL);
		/* Inform readers that table is removed */
		smp_mb__before_atomic();
		atomic_inc(&ipvs->svc_table_changes);
		while (1) {
			p = rcu_dereference_protected(t->new_tbl, 1);
			call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
@@ -2086,6 +2124,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
			t = p;
		}
	}
	/* Stop the tot_stats estimator early under service_mutex
	 * to avoid locking it again later.
	 */
	if (cleanup)
		ip_vs_stop_estimator_tot_stats(ipvs);
	return 0;
}

@@ -2331,7 +2374,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table,
	/* est_max_threads may depend on cpulist size */
	ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
	ipvs->est_calc_phase = 1;
	ip_vs_est_reload_start(ipvs);
	ip_vs_est_reload_start(ipvs, true);

unlock:
	mutex_unlock(&ipvs->est_mutex);
@@ -2351,11 +2394,14 @@ static int ipvs_proc_est_cpumask_get(const struct ctl_table *table,

	mutex_lock(&ipvs->est_mutex);

	/* HK_TYPE_KTHREAD cpumask needs RCU protection */
	scoped_guard(rcu) {
		if (ipvs->est_cpulist_valid)
			mask = *valp;
		else
			mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD);
		ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask));
	}

	mutex_unlock(&ipvs->est_mutex);

@@ -2411,7 +2457,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write,
			mutex_lock(&ipvs->est_mutex);
			if (*valp != val) {
				*valp = val;
				ip_vs_est_reload_start(ipvs);
				ip_vs_est_reload_start(ipvs, true);
			}
			mutex_unlock(&ipvs->est_mutex);
		}
@@ -2438,7 +2484,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write,
		mutex_lock(&ipvs->est_mutex);
		if (*valp != val) {
			*valp = val;
			ip_vs_est_reload_start(ipvs);
			ip_vs_est_reload_start(ipvs, true);
		}
		mutex_unlock(&ipvs->est_mutex);
	}
@@ -2463,7 +2509,7 @@ static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write,
		if (val < -8 || val > 8) {
			ret = -EINVAL;
		} else {
			*valp = val;
			WRITE_ONCE(*valp, val);
			if (rcu_access_pointer(ipvs->conn_tab))
				mod_delayed_work(system_unbound_wq,
						 &ipvs->conn_resize_work, 0);
@@ -2490,10 +2536,16 @@ static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write,
		if (val < -8 || val > 8) {
			ret = -EINVAL;
		} else {
			*valp = val;
			if (rcu_access_pointer(ipvs->svc_table))
			mutex_lock(&ipvs->service_mutex);
			WRITE_ONCE(*valp, val);
			/* Make sure the services are present */
			if (rcu_access_pointer(ipvs->svc_table) &&
			    READ_ONCE(ipvs->enable) &&
			    !test_bit(IP_VS_WORK_SVC_NORESIZE,
				      &ipvs->work_flags))
				mod_delayed_work(system_unbound_wq,
						 &ipvs->svc_resize_work, 0);
			mutex_unlock(&ipvs->service_mutex);
		}
	}
	return ret;
@@ -3004,7 +3056,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
	int old_gen, new_gen;
	u32 counts[8];
	u32 bucket;
	int count;
	u32 count;
	int loops;
	u32 sum1;
	u32 sum;
	int i;
@@ -3020,6 +3073,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
	if (!atomic_read(&ipvs->conn_count))
		goto after_conns;
	old_gen = atomic_read(&ipvs->conn_tab_changes);
	loops = 0;

repeat_conn:
	smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */
@@ -3032,8 +3086,11 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
			resched_score++;
			ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
				count = 0;
				hlist_bl_for_each_entry_rcu(hn, e, head, node)
				hlist_bl_for_each_entry_rcu(hn, e, head, node) {
					count++;
					if (count >= ARRAY_SIZE(counts) - 1)
						break;
				}
			}
			resched_score += count;
			if (resched_score >= 100) {
@@ -3042,37 +3099,41 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
				new_gen = atomic_read(&ipvs->conn_tab_changes);
				/* New table installed ? */
				if (old_gen != new_gen) {
					/* Too many changes? */
					if (++loops >= 5)
						goto after_conns;
					old_gen = new_gen;
					goto repeat_conn;
				}
			}
			counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
			counts[count]++;
		}
	}
	for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
		sum += counts[i];
	sum1 = sum - counts[0];
	seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n",
		   counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
	seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n",
		   counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
	for (i = 1; i < ARRAY_SIZE(counts); i++) {
		if (!counts[i])
			continue;
		seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n",
		seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n",
			   i, counts[i],
			   (unsigned long)counts[i] * 100 / max(sum1, 1U));
			   div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
	}

after_conns:
	t = rcu_dereference(ipvs->svc_table);

	count = ip_vs_get_num_services(ipvs);
	seq_printf(seq, "Services:\t%d\n", count);
	seq_printf(seq, "Services:\t%u\n", count);
	seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n",
		   t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);

	if (!count)
		goto after_svc;
	old_gen = atomic_read(&ipvs->svc_table_changes);
	loops = 0;

repeat_svc:
	smp_rmb(); /* ipvs->svc_table and svc_table_changes */
@@ -3086,8 +3147,11 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
			ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
				count = 0;
				hlist_bl_for_each_entry_rcu(svc, e, head,
							    s_list)
							    s_list) {
					count++;
					if (count >= ARRAY_SIZE(counts) - 1)
						break;
				}
			}
			resched_score += count;
			if (resched_score >= 100) {
@@ -3096,24 +3160,27 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
				new_gen = atomic_read(&ipvs->svc_table_changes);
				/* New table installed ? */
				if (old_gen != new_gen) {
					/* Too many changes? */
					if (++loops >= 5)
						goto after_svc;
					old_gen = new_gen;
					goto repeat_svc;
				}
			}
			counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
			counts[count]++;
		}
	}
	for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
		sum += counts[i];
	sum1 = sum - counts[0];
	seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n",
		   counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
	seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n",
		   counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
	for (i = 1; i < ARRAY_SIZE(counts); i++) {
		if (!counts[i])
			continue;
		seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n",
		seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n",
			   i, counts[i],
			   (unsigned long)counts[i] * 100 / max(sum1, 1U));
			   div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
	}

after_svc:
@@ -4967,7 +5034,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
	cancel_delayed_work_sync(&ipvs->defense_work);
	cancel_work_sync(&ipvs->defense_work.work);
	unregister_net_sysctl_table(ipvs->sysctl_hdr);
	if (ipvs->tot_stats->s.est.ktid != -2) {
		/* Not stopped yet? This happens only on netns init error and
		 * we even do not need to lock the service_mutex for this case.
		 */
		mutex_lock(&ipvs->service_mutex);
		ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);
		mutex_unlock(&ipvs->service_mutex);
	}

	if (ipvs->est_cpulist_valid)
		free_cpumask_var(ipvs->sysctl_est_cpulist);
@@ -5039,7 +5113,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
				    ipvs->net->proc_net,
				    ip_vs_stats_percpu_show, NULL))
		goto err_percpu;
	if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net,
	if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net,
				    ip_vs_status_show, NULL))
		goto err_status;
#endif
Loading