Commit 378bdb97 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Andrew Morton
Browse files

memcg: convert memcg->socket_pressure to u64

memcg->socket_pressure is initialised with jiffies when the memcg is
created.

Once vmpressure detects that the cgroup is under memory pressure, the
field is updated with jiffies + HZ to signal the fact to the socket layer
and suppress memory allocation for one second.

Otherwise, the field is not updated.

mem_cgroup_under_socket_pressure() uses time_before() to check if jiffies
is less than memcg->socket_pressure, and this has a bug on 32-bit kernel.

  if (time_before(jiffies, memcg->socket_pressure))
          return true;

As time_before() casts the final result to long, the acceptable delta
between two timestamps is 2 ^ (BITS_PER_LONG - 1).

On 32-bit kernel with CONFIG_HZ=1000, this is about 24 days.

  >>> (2 ** 31) / 1000 / 60 / 60 / 24
  24.855134814814818

Once 24 days have passed since the last update of socket_pressure,
mem_cgroup_under_socket_pressure() starts to lie until the next 24 days
pass.

We don't need to worry about this on 64-bit machines unless they serve for
300 million years.

  >>> (2 ** 63) / 1000 / 60 / 60 / 24 / 365
  292471208.6775361

Let's convert memcg->socket_pressure to u64.

Performance teting:

I don't have a real 32-bit machine so this is a result on QEMU, but
with/without the u64 jiffie patch, the time spent in
mem_cgroup_under_socket_pressure() was 1~5us and I didn't see any
measurable delta.

no patch applied:
iperf3   273 [000]   137.296248:
probe:mem_cgroup_under_socket_pressure: (c13660d0)
                c13660d1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   273 [000]   137.296249:
probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f)
iperf3   273 [000]   137.296251:
probe:mem_cgroup_under_socket_pressure: (c13660d0)
                c13660d1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   273 [000]   137.296253:
probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f)


u64 jiffies patch applied:
iperf3   308 [001]   330.669370:
probe:mem_cgroup_under_socket_pressure: (c12ddba0)
                c12ddba1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   308 [001]   330.669371:
probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf)
iperf3   308 [001]   330.669382:
probe:mem_cgroup_under_socket_pressure: (c12ddba0)
                c12ddba1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   308 [001]   330.669384:
probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf)

So the u64 approach is good enough.

Link: https://lkml.kernel.org/r/20250717194645.1096500-1-kuniyu@google.com


Fixes: 8e8ae645 ("mm: memcontrol: hook up vmpressure to socket pressure")
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Reported-by: default avatarNeal Cardwell <ncardwell@google.com>
Suggested-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Acked-by: default avatarShakeel Butt <shakeel.butt@linux.dev>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <ncardwell@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent a9e056de
Loading
Loading
Loading
Loading
+41 −3
Original line number Diff line number Diff line
@@ -251,8 +251,10 @@ struct mem_cgroup {
	 * that this indicator should NOT be used in legacy cgroup mode
	 * where socket memory is accounted/charged separately.
	 */
	unsigned long		socket_pressure;

	u64			socket_pressure;
#if BITS_PER_LONG < 64
	seqlock_t		socket_pressure_seqlock;
#endif
	int kmemcg_id;
	/*
	 * memcg->objcg is wiped out as a part of the objcg repaprenting
@@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key;
#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);

#if BITS_PER_LONG < 64
static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
{
	u64 val = get_jiffies_64() + HZ;
	unsigned long flags;

	write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags);
	memcg->socket_pressure = val;
	write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags);
}

static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
{
	unsigned int seq;
	u64 val;

	do {
		seq = read_seqbegin(&memcg->socket_pressure_seqlock);
		val = memcg->socket_pressure;
	} while (read_seqretry(&memcg->socket_pressure_seqlock, seq));

	return val;
}
#else
static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
{
	WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
}

static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
{
	return READ_ONCE(memcg->socket_pressure);
}
#endif

static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
#ifdef CONFIG_MEMCG_V1
@@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
		return !!memcg->tcpmem_pressure;
#endif /* CONFIG_MEMCG_V1 */
	do {
		if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
		if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
			return true;
	} while ((memcg = parent_mem_cgroup(memcg)));
	return false;
+4 −1
Original line number Diff line number Diff line
@@ -3754,7 +3754,10 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
	INIT_LIST_HEAD(&memcg->memory_peaks);
	INIT_LIST_HEAD(&memcg->swap_peaks);
	spin_lock_init(&memcg->peaks_lock);
	memcg->socket_pressure = jiffies;
	memcg->socket_pressure = get_jiffies_64();
#if BITS_PER_LONG < 64
	seqlock_init(&memcg->socket_pressure_seqlock);
#endif
	memcg1_memcg_init(memcg);
	memcg->kmemcg_id = -1;
	INIT_LIST_HEAD(&memcg->objcg_list);
+1 −1
Original line number Diff line number Diff line
@@ -316,7 +316,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
			 * asserted for a second in which subsequent
			 * pressure events can occur.
			 */
			WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
			mem_cgroup_set_socket_pressure(memcg);
		}
	}
}