Commit cd64d10b authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'net-rds-rds-tcp-bug-fix-collection-subset-1-work-queue-scalability'

Allison Henderson says:

====================
net/rds: RDS-TCP bug fix collection, subset 1: Work queue scalability

This is subset 1 of the RDS-TCP bug fix collection series I posted last
Oct.  The greater series aims to correct multiple rds-tcp bugs that
can cause dropped or out of sequence messages.  The set was starting to
get a bit large, so I've broken it down into smaller sets to make
reviews more manageable.

In this subset, we focus on work queue scalability.  Messages queues
are refactored to operate in parallel across multiple connections,
which improves response times and avoids timeouts.

The entire set can be viewed in the rfc here:
https://lore.kernel.org/netdev/20251022191715.157755-1-achender@kernel.org/

Questions, comments, flames appreciated!
====================

Link: https://patch.msgid.link/20260109224843.128076-1-achender@kernel.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents ff420c56 4716af38
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -242,7 +242,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
			 *    therefore trigger warnings.
			 * Defer the xmit to rds_send_worker() instead.
			 */
			queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
			queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0);
		}
		rcu_read_unlock();
	}
+23 −5
Original line number Diff line number Diff line
@@ -169,6 +169,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
	struct rds_connection *conn, *parent = NULL;
	struct hlist_head *head = rds_conn_bucket(laddr, faddr);
	struct rds_transport *loop_trans;
	struct rds_conn_path *free_cp = NULL;
	unsigned long flags;
	int ret, i;
	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
@@ -269,6 +270,11 @@ static struct rds_connection *__rds_conn_create(struct net *net,
		__rds_conn_path_init(conn, &conn->c_path[i],
				     is_outgoing);
		conn->c_path[i].cp_index = i;
		conn->c_path[i].cp_wq =
			alloc_ordered_workqueue("krds_cp_wq#%lu/%d", 0,
						rds_conn_count, i);
		if (!conn->c_path[i].cp_wq)
			conn->c_path[i].cp_wq = rds_wq;
	}
	rcu_read_lock();
	if (rds_destroy_pending(conn))
@@ -277,7 +283,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
		ret = trans->conn_alloc(conn, GFP_ATOMIC);
	if (ret) {
		rcu_read_unlock();
		kfree(conn->c_path);
		free_cp = conn->c_path;
		kmem_cache_free(rds_conn_slab, conn);
		conn = ERR_PTR(ret);
		goto out;
@@ -300,7 +306,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
		/* Creating passive conn */
		if (parent->c_passive) {
			trans->conn_free(conn->c_path[0].cp_transport_data);
			kfree(conn->c_path);
			free_cp = conn->c_path;
			kmem_cache_free(rds_conn_slab, conn);
			conn = parent->c_passive;
		} else {
@@ -327,7 +333,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
				if (cp->cp_transport_data)
					trans->conn_free(cp->cp_transport_data);
			}
			kfree(conn->c_path);
			free_cp = conn->c_path;
			kmem_cache_free(rds_conn_slab, conn);
			conn = found;
		} else {
@@ -342,6 +348,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
	rcu_read_unlock();

out:
	if (free_cp) {
		for (i = 0; i < npaths; i++)
			if (free_cp[i].cp_wq != rds_wq)
				destroy_workqueue(free_cp[i].cp_wq);
		kfree(free_cp);
	}

	return conn;
}

@@ -469,6 +482,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
	WARN_ON(delayed_work_pending(&cp->cp_conn_w));
	WARN_ON(work_pending(&cp->cp_down_w));

	if (cp->cp_wq != rds_wq) {
		destroy_workqueue(cp->cp_wq);
		cp->cp_wq = NULL;
	}

	cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
}

@@ -884,7 +902,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
		rcu_read_unlock();
		return;
	}
	queue_work(rds_wq, &cp->cp_down_w);
	queue_work(cp->cp_wq, &cp->cp_down_w);
	rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -909,7 +927,7 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
	}
	if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
	    !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
		queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
	rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
+1 −1
Original line number Diff line number Diff line
@@ -457,7 +457,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
	    (must_wake ||
	    (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
	    rds_ib_ring_empty(&ic->i_recv_ring))) {
		queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
		queue_delayed_work(conn->c_path->cp_wq, &conn->c_recv_w, 1);
	}
	if (can_wait)
		cond_resched();
+2 −2
Original line number Diff line number Diff line
@@ -297,7 +297,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)

	if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
	    test_bit(0, &conn->c_map_queued))
		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
		queue_delayed_work(conn->c_path->cp_wq, &conn->c_send_w, 0);

	/* We expect errors as the qp is drained during shutdown */
	if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
@@ -419,7 +419,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)

	atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
	if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
		queue_delayed_work(conn->c_path->cp_wq, &conn->c_send_w, 0);

	WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);

+1 −0
Original line number Diff line number Diff line
@@ -118,6 +118,7 @@ struct rds_conn_path {

	void			*cp_transport_data;

	struct workqueue_struct	*cp_wq;
	atomic_t		cp_state;
	unsigned long		cp_send_gen;
	unsigned long		cp_flags;
Loading