Commit 09b71fb4 authored by Julian Anastasov's avatar Julian Anastasov Committed by Jakub Kicinski
Browse files

ipvs: no_cport and dropentry counters can be per-net



Change the no_cport counters to be per-net and address family.
This should reduce the extra conn lookups done during present
NO_CPORT connections.

By changing from global to per-net dropentry counters, one net
will not affect the drop rate of another net.

Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Link: https://patch.msgid.link/20260224205048.4718-7-fw@strlen.de


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent c59bd9e6
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -948,6 +948,7 @@ struct netns_ipvs {
#endif
	/* ip_vs_conn */
	atomic_t		conn_count;      /* connection counter */
	atomic_t		no_cport_conns[IP_VS_AF_MAX];

	/* ip_vs_ctl */
	struct ip_vs_stats_rcu	*tot_stats;      /* Statistics & est. */
@@ -973,6 +974,7 @@ struct netns_ipvs {
	int			drop_counter;
	int			old_secure_tcp;
	atomic_t		dropentry;
	s8			dropentry_counters[8];
	/* locks in ctl.c */
	spinlock_t		dropentry_lock;  /* drop entry handling */
	spinlock_t		droppacket_lock; /* drop packet handling */
+39 −25
Original line number Diff line number Diff line
@@ -59,9 +59,6 @@ static struct hlist_head *ip_vs_conn_tab __read_mostly;
/*  SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;

/*  counter for no client port connections */
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);

/* random value for IPVS connection hash */
static unsigned int ip_vs_conn_rnd __read_mostly;

@@ -294,11 +291,17 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
	struct ip_vs_conn *cp;

	cp = __ip_vs_conn_in_get(p);
	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
	if (!cp) {
		struct netns_ipvs *ipvs = p->ipvs;
		int af_id = ip_vs_af_index(p->af);

		if (atomic_read(&ipvs->no_cport_conns[af_id])) {
			struct ip_vs_conn_param cport_zero_p = *p;

			cport_zero_p.cport = 0;
			cp = __ip_vs_conn_in_get(&cport_zero_p);
		}
	}

	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
		      ip_vs_proto_name(p->protocol),
@@ -490,9 +493,12 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
	if (ip_vs_conn_unhash(cp)) {
		struct netns_ipvs *ipvs = cp->ipvs;
		int af_id = ip_vs_af_index(cp->af);

		spin_lock_bh(&cp->lock);
		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
			atomic_dec(&ip_vs_conn_no_cport_cnt);
			atomic_dec(&ipvs->no_cport_conns[af_id]);
			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
			cp->cport = cport;
		}
@@ -891,8 +897,11 @@ static void ip_vs_conn_expire(struct timer_list *t)
		if (unlikely(cp->app != NULL))
			ip_vs_unbind_app(cp);
		ip_vs_unbind_dest(cp);
		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
			atomic_dec(&ip_vs_conn_no_cport_cnt);
		if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
			int af_id = ip_vs_af_index(cp->af);

			atomic_dec(&ipvs->no_cport_conns[af_id]);
		}
		if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
			ip_vs_conn_rcu_free(&cp->rcu_head);
		else
@@ -999,8 +1008,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
	cp->out_seq.delta = 0;

	atomic_inc(&ipvs->conn_count);
	if (flags & IP_VS_CONN_F_NO_CPORT)
		atomic_inc(&ip_vs_conn_no_cport_cnt);
	if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
		int af_id = ip_vs_af_index(cp->af);

		atomic_inc(&ipvs->no_cport_conns[af_id]);
	}

	/* Bind the connection with a destination server */
	cp->dest = NULL;
@@ -1257,6 +1269,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
};
#endif

#ifdef CONFIG_SYSCTL

/* Randomly drop connection entries before running out of memory
 * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
@@ -1266,12 +1279,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 */
static inline int todrop_entry(struct ip_vs_conn *cp)
{
	/*
	 * The drop rate array needs tuning for real environments.
	 * Called from timer bh only => no locking
	 */
	static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
	static signed char todrop_counter[9] = {0};
	struct netns_ipvs *ipvs = cp->ipvs;
	int i;

	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
@@ -1280,15 +1288,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
		return 0;

	/* Don't drop the entry if its number of incoming packets is not
	   located in [0, 8] */
	/* Drop only conns with number of incoming packets in [1..8] range */
	i = atomic_read(&cp->in_pkts);
	if (i > 8 || i < 0) return 0;
	if (i > 8 || i < 1)
		return 0;

	if (!todrop_rate[i]) return 0;
	if (--todrop_counter[i] > 0) return 0;
	i--;
	if (--ipvs->dropentry_counters[i] > 0)
		return 0;

	todrop_counter[i] = todrop_rate[i];
	/* Prefer to drop conns with less number of incoming packets */
	ipvs->dropentry_counters[i] = i + 1;
	return 1;
}

@@ -1368,7 +1378,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
	}
	rcu_read_unlock();
}

#endif

/*
 *      Flush all the connection entries in the ip_vs_conn_tab
@@ -1450,7 +1460,11 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
 */
int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
	int idx;

	atomic_set(&ipvs->conn_count, 0);
	for (idx = 0; idx < IP_VS_AF_MAX; idx++)
		atomic_set(&ipvs->no_cport_conns[idx], 0);

#ifdef CONFIG_PROC_FS
	if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,