Commit 4ba67ef3 authored by Breno Leitao's avatar Breno Leitao Committed by Jakub Kicinski
Browse files

net: dqs: make struct dql more cache efficient



With the previous change, struct dqs->stall_thrs will be in the hot path
(at queue side), even if DQS is disabled.

The other fields accessed in this function (last_obj_cnt and num_queued)
are in the first cache line, let's move this field  (stall_thrs) to the
very first cache line, since there is a hole there.

This does not change the structure size, since it moves an short (2
bytes) to 4-bytes whole in the first cache line.

This is the new structure format now:

struct dql {
	unsigned int    num_queued;
	unsigned int    last_obj_cnt;
...
	short unsigned int    stall_thrs;
	/* XXX 2 bytes hole, try to pack */
...
	/* --- cacheline 1 boundary (64 bytes) --- */
...
 	/* Longest stall detected, reported to user */
	short unsigned int         stall_max;
	/* XXX 2 bytes hole, try to pack */
};

Also, read the stall_thrs (now in the very first cache line) earlier,
together with dql->num_queued (also in the first cache line).

Suggested-by: default avatarJakub Kicinski <kuba@kernel.org>
Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarBreno Leitao <leitao@debian.org>
Link: https://lore.kernel.org/r/20240411192241.2498631-5-leitao@debian.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 721f076b
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -50,6 +50,9 @@ struct dql {
	unsigned int	adj_limit;		/* limit + num_completed */
	unsigned int	last_obj_cnt;		/* Count at last queuing */

	/* Stall threshold (in jiffies), defined by user */
	unsigned short	stall_thrs;

	unsigned long	history_head;		/* top 58 bits of jiffies */
	/* stall entries, a bit per entry */
	unsigned long	history[DQL_HIST_LEN];
@@ -71,8 +74,6 @@ struct dql {
	unsigned int	min_limit;		/* Minimum limit */
	unsigned int	slack_hold_time;	/* Time to measure slack */

	/* Stall threshold (in jiffies), defined by user */
	unsigned short	stall_thrs;
	/* Longest stall detected, reported to user */
	unsigned short	stall_max;
	unsigned long	last_reap;		/* Last reap (in jiffies) */
+9 −4
Original line number Diff line number Diff line
@@ -15,12 +15,10 @@
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)

static void dql_check_stall(struct dql *dql)
static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
{
	unsigned short stall_thrs;
	unsigned long now;

	stall_thrs = READ_ONCE(dql->stall_thrs);
	if (!stall_thrs)
		return;

@@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count)
{
	unsigned int inprogress, prev_inprogress, limit;
	unsigned int ovlimit, completed, num_queued;
	unsigned short stall_thrs;
	bool all_prev_completed;

	num_queued = READ_ONCE(dql->num_queued);
	/* Read stall_thrs in advance since it belongs to the same (first)
	 * cache line as ->num_queued. This way, dql_check_stall() does not
	 * need to touch the first cache line again later, reducing the window
	 * of possible false sharing.
	 */
	stall_thrs = READ_ONCE(dql->stall_thrs);

	/* Can't complete more than what's in queue */
	BUG_ON(count > num_queued - dql->num_completed);
@@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count)
	dql->num_completed = completed;
	dql->prev_num_queued = num_queued;

	dql_check_stall(dql);
	dql_check_stall(dql, stall_thrs);
}
EXPORT_SYMBOL(dql_completed);