Commit 503efe85 authored by Wang Yaxin's avatar Wang Yaxin Committed by Andrew Morton
Browse files

delayacct: add timestamp of delay max

Problem
=======
Commit 658eb5ab ("delayacct: add delay max to record delay peak")
introduced the delay max for getdelays, which records abnormal latency
peaks and helps us understand the magnitude of such delays.  However, the
peak latency value alone is insufficient for effective root cause
analysis.  Without the precise timestamp of when the peak occurred, we
still lack the critical context needed to correlate it with other system
events.

Solution
========
To address this, we need to additionally record a precise timestamp when
the maximum latency occurs.  By correlating this timestamp with system
logs and monitoring metrics, we can identify processes with abnormal
resource usage at the same moment, which can help us to pinpoint root
causes.

Use Case
========
bash-4.4# ./getdelays -d -t 227
print delayacct stats ON
TGID    227
CPU         count     real total  virtual total    delay total  delay average      delay max      delay min      delay max timestamp
               46      188000000      192348334        4098012          0.089ms     0.429260ms     0.051205ms    2026-01-15T15:06:58
IO          count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
SWAP        count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
RECLAIM     count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
THRAS HING   count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
COMPACT     count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
WPCOPY      count    delay total  delay average      delay max      delay min      delay max timestamp
              182       19413338          0.107ms     0.547353ms     0.022462ms    2026-01-15T15:05:24
IRQ         count    delay total  delay average      delay max      delay min      delay max timestamp
                0              0          0.000ms     0.000000ms     0.000000ms                    N/A

Link: https://lkml.kernel.org/r/20260119100241520gWubW8-5QfhSf9gjqcc_E@zte.com.cn


Signed-off-by: default avatarWang Yaxin <wang.yaxin@zte.com.cn>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent cc20650a
Loading
Loading
Loading
Loading
+16 −16
Original line number Diff line number Diff line
@@ -107,22 +107,22 @@ Get sum and peak of delays, since system boot, for all pids with tgid 242::
	TGID    242


	CPU         count     real total  virtual total    delay total  delay average      delay max      delay min
	               39      156000000      156576579        2111069          0.054ms     0.212296ms     0.031307ms
	IO          count    delay total  delay average      delay max      delay min
	                0              0          0.000ms     0.000000ms     0.000000ms
	SWAP        count    delay total  delay average      delay max      delay min
	                0              0          0.000ms     0.000000ms     0.000000ms
	RECLAIM     count    delay total  delay average      delay max      delay min
	                0              0          0.000ms     0.000000ms     0.000000ms
	THRASHING   count    delay total  delay average      delay max      delay min
	                0              0          0.000ms     0.000000ms     0.000000ms
	COMPACT     count    delay total  delay average      delay max      delay min
	                0              0          0.000ms     0.000000ms     0.000000ms
	WPCOPY      count    delay total  delay average      delay max      delay min
	              156       11215873          0.072ms     0.207403ms     0.033913ms
	IRQ         count    delay total  delay average      delay max      delay min
	                0              0          0.000ms     0.000000ms     0.000000ms
	CPU         count     real total  virtual total    delay total  delay average      delay max      delay min      delay max timestamp
	               46      188000000      192348334        4098012          0.089ms     0.429260ms     0.051205ms    2026-01-15T15:06:58
	IO          count    delay total  delay average      delay max      delay min      delay max timestamp
	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
	SWAP        count    delay total  delay average      delay max      delay min      delay max timestamp
	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
	RECLAIM     count    delay total  delay average      delay max      delay min      delay max timestamp
	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
	THRASHING   count    delay total  delay average      delay max      delay min      delay max timestamp
	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
	COMPACT     count    delay total  delay average      delay max      delay min      delay max timestamp
	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A
	WPCOPY      count    delay total  delay average      delay max      delay min      delay max timestamp
	              182       19413338          0.107ms     0.547353ms     0.022462ms    2026-01-15T15:05:24
	IRQ         count    delay total  delay average      delay max      delay min      delay max timestamp
	                0              0          0.000ms     0.000000ms     0.000000ms                    N/A

Get IO accounting for pid 1, it works only with -p::

+8 −0
Original line number Diff line number Diff line
@@ -69,6 +69,14 @@ struct task_delay_info {
	u32 compact_count;	/* total count of memory compact */
	u32 wpcopy_count;	/* total count of write-protect copy */
	u32 irq_count;	/* total count of IRQ/SOFTIRQ */

	struct timespec64 blkio_delay_max_ts;
	struct timespec64 swapin_delay_max_ts;
	struct timespec64 freepages_delay_max_ts;
	struct timespec64 thrashing_delay_max_ts;
	struct timespec64 compact_delay_max_ts;
	struct timespec64 wpcopy_delay_max_ts;
	struct timespec64 irq_delay_max_ts;
};
#endif

+5 −0
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@
#include <linux/tracepoint-defs.h>
#include <linux/unwind_deferred_types.h>
#include <asm/kmap_size.h>
#include <linux/time64.h>
#ifndef COMPILE_OFFSETS
#include <generated/rq-offsets.h>
#endif
@@ -86,6 +87,7 @@ struct signal_struct;
struct task_delay_info;
struct task_group;
struct task_struct;
struct timespec64;
struct user_event_mm;

#include <linux/sched/ext.h>
@@ -435,6 +437,9 @@ struct sched_info {
	/* When were we last queued to run? */
	unsigned long long		last_queued;

	/* Timestamp of max time spent waiting on a runqueue: */
	struct timespec64		max_run_delay_ts;

#endif /* CONFIG_SCHED_INFO */
};

+21 −1
Original line number Diff line number Diff line
@@ -18,6 +18,16 @@
#define _LINUX_TASKSTATS_H

#include <linux/types.h>
#ifdef __KERNEL__
#include <linux/time64.h>
#else
#ifndef _LINUX_TIME64_H
struct timespec64 {
	__s64   tv_sec;         /* seconds */
	long    tv_nsec;        /* nanoseconds */
};
#endif
#endif

/* Format for per-task data returned to userland when
 *	- a task exits
@@ -34,7 +44,7 @@
 */


#define TASKSTATS_VERSION	16
#define TASKSTATS_VERSION	17
#define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
					 * in linux/sched.h */

@@ -230,6 +240,16 @@ struct taskstats {

	__u64	irq_delay_max;
	__u64	irq_delay_min;

	/*v17: delay max timestamp record*/
	struct timespec64 cpu_delay_max_ts;
	struct timespec64 blkio_delay_max_ts;
	struct timespec64 swapin_delay_max_ts;
	struct timespec64 freepages_delay_max_ts;
	struct timespec64 thrashing_delay_max_ts;
	struct timespec64 compact_delay_max_ts;
	struct timespec64 wpcopy_delay_max_ts;
	struct timespec64 irq_delay_max_ts;
};


+22 −9
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
do { \
	d->type##_delay_max = tsk->delays->type##_delay_max; \
	d->type##_delay_min = tsk->delays->type##_delay_min; \
	d->type##_delay_max_ts = tsk->delays->type##_delay_max_ts; \
	tmp = d->type##_delay_total + tsk->delays->type##_delay; \
	d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
	d->type##_count += tsk->delays->type##_count; \
@@ -104,7 +105,8 @@ void __delayacct_tsk_init(struct task_struct *tsk)
 * Finish delay accounting for a statistic using its timestamps (@start),
 * accumulator (@total) and @count
 */
static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min)
static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count,
							 u64 *max, u64 *min, struct timespec64 *ts)
{
	s64 ns = local_clock() - *start;
	unsigned long flags;
@@ -113,8 +115,10 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou
		raw_spin_lock_irqsave(lock, flags);
		*total += ns;
		(*count)++;
		if (ns > *max)
		if (ns > *max) {
			*max = ns;
			ktime_get_real_ts64(ts);
		}
		if (*min == 0 || ns < *min)
			*min = ns;
		raw_spin_unlock_irqrestore(lock, flags);
@@ -137,7 +141,8 @@ void __delayacct_blkio_end(struct task_struct *p)
		      &p->delays->blkio_delay,
		      &p->delays->blkio_count,
		      &p->delays->blkio_delay_max,
		      &p->delays->blkio_delay_min);
		      &p->delays->blkio_delay_min,
		      &p->delays->blkio_delay_max_ts);
}

int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -170,6 +175,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)

	d->cpu_delay_max = tsk->sched_info.max_run_delay;
	d->cpu_delay_min = tsk->sched_info.min_run_delay;
	d->cpu_delay_max_ts = tsk->sched_info.max_run_delay_ts;
	tmp = (s64)d->cpu_delay_total + t2;
	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
	tmp = (s64)d->cpu_run_virtual_total + t3;
@@ -217,7 +223,8 @@ void __delayacct_freepages_end(void)
		      &current->delays->freepages_delay,
		      &current->delays->freepages_count,
		      &current->delays->freepages_delay_max,
		      &current->delays->freepages_delay_min);
		      &current->delays->freepages_delay_min,
		      &current->delays->freepages_delay_max_ts);
}

void __delayacct_thrashing_start(bool *in_thrashing)
@@ -241,7 +248,8 @@ void __delayacct_thrashing_end(bool *in_thrashing)
		      &current->delays->thrashing_delay,
		      &current->delays->thrashing_count,
		      &current->delays->thrashing_delay_max,
		      &current->delays->thrashing_delay_min);
		      &current->delays->thrashing_delay_min,
		      &current->delays->thrashing_delay_max_ts);
}

void __delayacct_swapin_start(void)
@@ -256,7 +264,8 @@ void __delayacct_swapin_end(void)
		      &current->delays->swapin_delay,
		      &current->delays->swapin_count,
		      &current->delays->swapin_delay_max,
		      &current->delays->swapin_delay_min);
		      &current->delays->swapin_delay_min,
		      &current->delays->swapin_delay_max_ts);
}

void __delayacct_compact_start(void)
@@ -271,7 +280,8 @@ void __delayacct_compact_end(void)
		      &current->delays->compact_delay,
		      &current->delays->compact_count,
		      &current->delays->compact_delay_max,
		      &current->delays->compact_delay_min);
		      &current->delays->compact_delay_min,
		      &current->delays->compact_delay_max_ts);
}

void __delayacct_wpcopy_start(void)
@@ -286,7 +296,8 @@ void __delayacct_wpcopy_end(void)
		      &current->delays->wpcopy_delay,
		      &current->delays->wpcopy_count,
		      &current->delays->wpcopy_delay_max,
		      &current->delays->wpcopy_delay_min);
		      &current->delays->wpcopy_delay_min,
		      &current->delays->wpcopy_delay_max_ts);
}

void __delayacct_irq(struct task_struct *task, u32 delta)
@@ -296,8 +307,10 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
	raw_spin_lock_irqsave(&task->delays->lock, flags);
	task->delays->irq_delay += delta;
	task->delays->irq_count++;
	if (delta > task->delays->irq_delay_max)
	if (delta > task->delays->irq_delay_max) {
		task->delays->irq_delay_max = delta;
		ktime_get_real_ts64(&task->delays->irq_delay_max_ts);
	}
	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
		task->delays->irq_delay_min = delta;
	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
Loading