Commit a9522664 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-up-pin-tw-timer'



Florian Westphal says:

====================
net: tcp: un-pin tw timer

Changes since previous iteration:
 - Patch 1: update a comment, I copied Erics v7 RvB tag.
 - Patch 2: move bh off/on into hashdance_schedule and get rid of
   comment mentioning pinned tw timer.
   I did not copy Erics RvB tag over from v7 because of the change.
 - Patch 3 is unchanged, so I kept Erics RvB tag.

This is v8 of the series where the tw_timer is un-pinned to get rid of
interferences in isolated CPUs setups.

First patch makes necessary preparations, existing code relies on
TIMER_PINNED to avoid races.

Second patch un-pins the TW timer. Could be folded into the first one,
but it might help wrt. bisection.

Third patch is a minor cleanup to move a helper from .h to the only
remaining compilation unit.

Tested with iperf3 and stress-ng socket mode.
====================

Reviewed-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8d466c8f f81d0dd2
Loading
Loading
Loading
Loading
+4 −7
Original line number Diff line number Diff line
@@ -93,17 +93,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
					   struct inet_timewait_death_row *dr,
					   const int state);

void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
			 struct inet_hashinfo *hashinfo);
void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
				  struct sock *sk,
				  struct inet_hashinfo *hashinfo,
				  int timeo);

void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
			  bool rearm);

static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
{
	__inet_twsk_schedule(tw, timeo, false);
}

static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
{
	__inet_twsk_schedule(tw, timeo, true);
+1 −8
Original line number Diff line number Diff line
@@ -54,17 +54,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
		if (state == DCCP_TIME_WAIT)
			timeo = DCCP_TIMEWAIT_LEN;

		/* tw_timer is pinned, so we need to make sure BH are disabled
		 * in following section, otherwise timer handler could run before
		 * we complete the initialization.
		 */
		local_bh_disable();
		inet_twsk_schedule(tw, timeo);
		/* Linkage updates.
		 * Note that access to tw after this point is illegal.
		 */
		inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
		local_bh_enable();
		inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo);
	} else {
		/* Sorry, if we're out of memory, just CLOSE this
		 * socket up.  We've got bigger problems than
+54 −9
Original line number Diff line number Diff line
@@ -92,13 +92,22 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
	hlist_nulls_add_head_rcu(&tw->tw_node, list);
}

static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
{
	__inet_twsk_schedule(tw, timeo, false);
}

/*
 * Enter the time wait state. This is called with locally disabled BH.
 * Enter the time wait state.
 * Essentially we whip up a timewait bucket, copy the relevant info into it
 * from the SK, and mess with hash chains and list linkage.
 *
 * The caller must not access @tw anymore after this function returns.
 */
void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
			   struct inet_hashinfo *hashinfo)
void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
				  struct sock *sk,
				  struct inet_hashinfo *hashinfo,
				  int timeo)
{
	const struct inet_sock *inet = inet_sk(sk);
	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -114,6 +123,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
			hashinfo->bhash_size)];
	bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);

	local_bh_disable();
	spin_lock(&bhead->lock);
	spin_lock(&bhead2->lock);

@@ -129,26 +139,34 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,

	spin_lock(lock);

	/* Step 2: Hash TW into tcp ehash chain */
	inet_twsk_add_node_rcu(tw, &ehead->chain);

	/* Step 3: Remove SK from hash chain */
	if (__sk_nulls_del_node_init_rcu(sk))
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);

	spin_unlock(lock);

	/* Ensure above writes are committed into memory before updating the
	 * refcount.
	 * Provides ordering vs later refcount_inc().
	 */
	smp_wmb();
	/* tw_refcnt is set to 3 because we have :
	 * - one reference for bhash chain.
	 * - one reference for ehash chain.
	 * - one reference for timer.
	 * We can use atomic_set() because prior spin_lock()/spin_unlock()
	 * committed into memory all tw fields.
	 * Also note that after this point, we lost our implicit reference
	 * so we are not allowed to use tw anymore.
	 */
	refcount_set(&tw->tw_refcnt, 3);

	inet_twsk_schedule(tw, timeo);

	spin_unlock(lock);
	local_bh_enable();
}
EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule);

static void tw_timer_handler(struct timer_list *t)
{
@@ -192,7 +210,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
		tw->tw_prot	    = sk->sk_prot_creator;
		atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
		twsk_net_set(tw, sock_net(sk));
		timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
		timer_setup(&tw->tw_timer, tw_timer_handler, 0);
		/*
		 * Because we use RCU lookups, we should not set tw_refcnt
		 * to a non null value before everything is setup for this
@@ -217,7 +235,34 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc);
 */
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
{
	if (del_timer_sync(&tw->tw_timer))
	struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
	spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);

	/* inet_twsk_purge() walks over all sockets, including tw ones,
	 * and removes them via inet_twsk_deschedule_put() after a
	 * refcount_inc_not_zero().
	 *
	 * inet_twsk_hashdance_schedule() must (re)init the refcount before
	 * arming the timer, i.e. inet_twsk_purge can obtain a reference to
	 * a twsk that did not yet schedule the timer.
	 *
	 * The ehash lock synchronizes these two:
	 * After acquiring the lock, the timer is always scheduled (else
	 * timer_shutdown returns false), because hashdance_schedule releases
	 * the ehash lock only after completing the timer initialization.
	 *
	 * Without grabbing the ehash lock, we get:
	 * 1) cpu x sets twsk refcount to 3
	 * 2) cpu y bumps refcount to 4
	 * 3) cpu y calls inet_twsk_deschedule_put() and shuts timer down
	 * 4) cpu x tries to start timer, but mod_timer is a noop post-shutdown
	 * -> timer refcount is never decremented.
	 */
	spin_lock(lock);
	/*  Makes sure hashdance_schedule() has completed */
	spin_unlock(lock);

	if (timer_shutdown_sync(&tw->tw_timer))
		inet_twsk_kill(tw);
	inet_twsk_put(tw);
}
+1 −1
Original line number Diff line number Diff line
@@ -157,7 +157,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
	if (ts_recent_stamp &&
	    (!twp || (reuse && time_after32(ktime_get_seconds(),
					    ts_recent_stamp)))) {
		/* inet_twsk_hashdance() sets sk_refcnt after putting twsk
		/* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk
		 * and releasing the bucket lock.
		 */
		if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt)))
+1 −8
Original line number Diff line number Diff line
@@ -345,17 +345,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
		if (state == TCP_TIME_WAIT)
			timeo = TCP_TIMEWAIT_LEN;

		/* tw_timer is pinned, so we need to make sure BH are disabled
		 * in following section, otherwise timer handler could run before
		 * we complete the initialization.
		 */
		local_bh_disable();
		inet_twsk_schedule(tw, timeo);
		/* Linkage updates.
		 * Note that access to tw after this point is illegal.
		 */
		inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo);
		local_bh_enable();
		inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo);
	} else {
		/* Sorry, if we're out of memory, just CLOSE this
		 * socket up.  We've got bigger problems than