Commit 3fc567e4 authored by Libo Chen's avatar Libo Chen Committed by Andrew Morton
Browse files

sched/numa: add tracepoint that tracks the skipping of numa balancing due to cpuset memory pinning

Unlike sched_skip_vma_numa tracepoint which tracks skipped VMAs, this
tracks the task subjected to cpuset.mems pinning and prints out its
allowed memory node mask.

Link: https://lkml.kernel.org/r/20250424024523.2298272-3-libo.chen@oracle.com


Signed-off-by: default avatarLibo Chen <libo.chen@oracle.com>
Cc: "Chen, Tim C" <tim.c.chen@intel.com>
Cc: Chen Yu <yu.c.chen@intel.com>
Cc: Chris Hyser <chris.hyser@oracle.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Raghavendra K T <raghavendra.kt@amd.com>
Cc: Srikanth Aithal <sraithal@amd.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 1f6c6ac0
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -745,6 +745,39 @@ TRACE_EVENT(sched_skip_vma_numa,
		  __entry->vm_end,
		  __print_symbolic(__entry->reason, NUMAB_SKIP_REASON))
);

TRACE_EVENT(sched_skip_cpuset_numa,

	TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr),

	TP_ARGS(tsk, mem_allowed_ptr),

	TP_STRUCT__entry(
		__array( char,		comm,		TASK_COMM_LEN		)
		__field( pid_t,		pid					)
		__field( pid_t,		tgid					)
		__field( pid_t,		ngid					)
		__array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES))
	),

	TP_fast_assign(
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
		__entry->pid		 = task_pid_nr(tsk);
		__entry->tgid		 = task_tgid_nr(tsk);
		__entry->ngid		 = task_numa_group_id(tsk);
		BUILD_BUG_ON(sizeof(nodemask_t) != \
			     BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long));
		memcpy(__entry->mem_allowed, mem_allowed_ptr->bits,
		       sizeof(__entry->mem_allowed));
	),

	TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl",
		  __entry->comm,
		  __entry->pid,
		  __entry->tgid,
		  __entry->ngid,
		  MAX_NUMNODES, __entry->mem_allowed)
);
#endif /* CONFIG_NUMA_BALANCING */

/*
+3 −1
Original line number Diff line number Diff line
@@ -3333,8 +3333,10 @@ static void task_numa_work(struct callback_head *work)
	 * Memory is pinned to only one NUMA node via cpuset.mems, naturally
	 * no page can be migrated.
	 */
	if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1)
	if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1) {
		trace_sched_skip_cpuset_numa(current, &cpuset_current_mems_allowed);
		return;
	}

	if (!mm->numa_next_scan) {
		mm->numa_next_scan = now +