Commit 37a26087 authored by Yu Zhao's avatar Yu Zhao Committed by Andrew Morton
Browse files

mm/mglru: rework type selection

With anon and file min_seq being able to move independently, rework type
selection so that it is based on the total refaults from all tiers of each
type.  Also allow a type to be selected until that type reaches
MIN_NR_GENS, regardless of whether that type has a larger min_seq or not,
to accommodate extreme swappiness.

Since some tiers of a selected type can have higher refaults than the
first tier of the other type, use a less larger gain factor 2:3 instead of
1:2, in order for those tiers in the selected type to be better protected.

As an intermediate step to the final optimization, this change by itself
should not have userspace-visiable effects beyond performance.

Link: https://lkml.kernel.org/r/20241231043538.4075764-5-yuzhao@google.com


Signed-off-by: default avatarYu Zhao <yuzhao@google.com>
Reported-by: default avatarDavid Stevens <stevensd@chromium.org>
Tested-by: default avatarKalesh Singh <kaleshsingh@google.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 798c0330
Loading
Loading
Loading
Loading
+26 −56
Original line number Diff line number Diff line
@@ -3085,15 +3085,20 @@ struct ctrl_pos {
static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
			  struct ctrl_pos *pos)
{
	int i;
	struct lru_gen_folio *lrugen = &lruvec->lrugen;
	int hist = lru_hist_from_seq(lrugen->min_seq[type]);

	pos->refaulted = lrugen->avg_refaulted[type][tier] +
			 atomic_long_read(&lrugen->refaulted[hist][type][tier]);
	pos->total = lrugen->avg_total[type][tier] +
		     lrugen->protected[hist][type][tier] +
		     atomic_long_read(&lrugen->evicted[hist][type][tier]);
	pos->gain = gain;
	pos->refaulted = pos->total = 0;

	for (i = tier % MAX_NR_TIERS; i <= min(tier, MAX_NR_TIERS - 1); i++) {
		pos->refaulted += lrugen->avg_refaulted[type][i] +
				  atomic_long_read(&lrugen->refaulted[hist][type][i]);
		pos->total += lrugen->avg_total[type][i] +
			      lrugen->protected[hist][type][i] +
			      atomic_long_read(&lrugen->evicted[hist][type][i]);
	}
}

static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
@@ -4493,13 +4498,13 @@ static int get_tier_idx(struct lruvec *lruvec, int type)
	struct ctrl_pos sp, pv;

	/*
	 * To leave a margin for fluctuations, use a larger gain factor (1:2).
	 * To leave a margin for fluctuations, use a larger gain factor (2:3).
	 * This value is chosen because any other tier would have at least twice
	 * as many refaults as the first tier.
	 */
	read_ctrl_pos(lruvec, type, 0, 1, &sp);
	read_ctrl_pos(lruvec, type, 0, 2, &sp);
	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
		read_ctrl_pos(lruvec, type, tier, 2, &pv);
		read_ctrl_pos(lruvec, type, tier, 3, &pv);
		if (!positive_ctrl_err(&sp, &pv))
			break;
	}
@@ -4507,68 +4512,34 @@ static int get_tier_idx(struct lruvec *lruvec, int type)
	return tier - 1;
}

static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx)
static int get_type_to_scan(struct lruvec *lruvec, int swappiness)
{
	int type, tier;
	struct ctrl_pos sp, pv;
	int gain[ANON_AND_FILE] = { swappiness, MAX_SWAPPINESS - swappiness };

	if (swappiness <= MIN_SWAPPINESS + 1)
		return LRU_GEN_FILE;

	if (swappiness >= MAX_SWAPPINESS)
		return LRU_GEN_ANON;
	/*
	 * Compare the first tier of anon with that of file to determine which
	 * type to scan. Also need to compare other tiers of the selected type
	 * with the first tier of the other type to determine the last tier (of
	 * the selected type) to evict.
	 * Compare the sum of all tiers of anon with that of file to determine
	 * which type to scan.
	 */
	read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp);
	read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv);
	type = positive_ctrl_err(&sp, &pv);
	read_ctrl_pos(lruvec, LRU_GEN_ANON, MAX_NR_TIERS, swappiness, &sp);
	read_ctrl_pos(lruvec, LRU_GEN_FILE, MAX_NR_TIERS, MAX_SWAPPINESS - swappiness, &pv);

	read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp);
	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
		read_ctrl_pos(lruvec, type, tier, gain[type], &pv);
		if (!positive_ctrl_err(&sp, &pv))
			break;
	}

	*tier_idx = tier - 1;

	return type;
	return positive_ctrl_err(&sp, &pv);
}

static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
			  int *type_scanned, struct list_head *list)
{
	int i;
	int type;
	int tier = -1;
	DEFINE_MIN_SEQ(lruvec);

	/*
	 * Try to make the obvious choice first, and if anon and file are both
	 * available from the same generation,
	 * 1. Interpret swappiness 1 as file first and MAX_SWAPPINESS as anon
	 *    first.
	 * 2. If !__GFP_IO, file first since clean pagecache is more likely to
	 *    exist than clean swapcache.
	 */
	if (swappiness <= MIN_SWAPPINESS + 1)
		type = LRU_GEN_FILE;
	else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE])
		type = LRU_GEN_ANON;
	else if (swappiness == 1)
		type = LRU_GEN_FILE;
	if (swappiness >= MAX_SWAPPINESS)
		type = LRU_GEN_ANON;
	else if (!(sc->gfp_mask & __GFP_IO))
		type = LRU_GEN_FILE;
	else
		type = get_type_to_scan(lruvec, swappiness, &tier);
	int type = get_type_to_scan(lruvec, swappiness);

	for_each_evictable_type(i, swappiness) {
		int scanned;

		if (tier < 0)
			tier = get_tier_idx(lruvec, type);
		int tier = get_tier_idx(lruvec, type);

		*type_scanned = type;

@@ -4577,7 +4548,6 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
			return scanned;

		type = !type;
		tier = -1;
	}

	return 0;