Commit 6860d28c authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

timekeeping: Reorder struct timekeeper



struct timekeeper is ordered suboptimal vs. cachelines. The layout,
including the preceding seqcount (see struct tk_core in timekeeper.c) is:

 cacheline 0:   seqcount, tkr_mono
 cacheline 1:   tkr_raw, xtime_sec
 cacheline 2:   ktime_sec ... tai_offset, internal variables
 cacheline 3:	next_leap_ktime, raw_sec, internal variables
 cacheline 4:	internal variables

So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW
will use either cachelines 0 + 1 or cachelines 0 + 2. Access to
CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3.

Reorder the members so that the result is more efficient:

 cacheline 0:   seqcount, tkr_mono
 cacheline 1:   xtime_sec, ktime_sec ... tai_offset
 cacheline 2:	tkr_raw, raw_sec
 cacheline 3:	internal variables
 cacheline 4:	internal variables

That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW
access will use cachelines 0 + 2.

Update kernel-doc and fix formatting issues while at it. Also fix a typo
in struct tk_read_base kernel-doc.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarAnna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Acked-by: default avatarJohn Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241015100839.12702-1-anna-maria@linutronix.de
parent c2a32956
Loading
Loading
Loading
Loading
+65 −41
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@
 * occupies a single 64byte cache line.
 *
 * The struct is separate from struct timekeeper as it is also used
 * for a fast NMI safe accessors.
 * for the fast NMI safe accessors.
 *
 * @base_real is for the fast NMI safe accessor to allow reading clock
 * realtime from any context.
@@ -45,7 +45,6 @@ struct tk_read_base {
/**
 * struct timekeeper - Structure holding internal timekeeping values.
 * @tkr_mono:			The readout base structure for CLOCK_MONOTONIC
 * @tkr_raw:		The readout base structure for CLOCK_MONOTONIC_RAW
 * @xtime_sec:			Current CLOCK_REALTIME time in seconds
 * @ktime_sec:			Current CLOCK_MONOTONIC time in seconds
 * @wall_to_monotonic:		CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -53,10 +52,10 @@ struct tk_read_base {
 * @offs_boot:			Offset clock monotonic -> clock boottime
 * @offs_tai:			Offset clock monotonic -> clock tai
 * @tai_offset:			The current UTC to TAI offset in seconds
 * @tkr_raw:			The readout base structure for CLOCK_MONOTONIC_RAW
 * @raw_sec:			CLOCK_MONOTONIC_RAW  time in seconds
 * @clock_was_set_seq:		The sequence number of clock was set events
 * @cs_was_changed_seq:		The sequence number of clocksource change events
 * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
 * @raw_sec:		CLOCK_MONOTONIC_RAW  time in seconds
 * @monotonic_to_boot:		CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
 * @cycle_interval:		Number of clock cycles in one NTP interval
 * @xtime_interval:		Number of clock shifted nano seconds in one NTP
@@ -64,10 +63,19 @@ struct tk_read_base {
 * @xtime_remainder:		Shifted nano seconds left over when rounding
 *				@cycle_interval
 * @raw_interval:		Shifted raw nano seconds accumulated per NTP interval.
 * @next_leap_ktime:		CLOCK_MONOTONIC time value of a pending leap-second
 * @ntp_tick:			The ntp_tick_length() value currently being
 *				used. This cached copy ensures we consistently
 *				apply the tick length for an entire tick, as
 *				ntp_tick_length may change mid-tick, and we don't
 *				want to apply that new value to the tick in
 *				progress.
 * @ntp_error:			Difference between accumulated time and NTP time in ntp
 *				shifted nano seconds.
 * @ntp_error_shift:		Shift conversion between clock shifted nano seconds and
 *				ntp shifted nano seconds.
 * @ntp_err_mult:		Multiplication factor for scaled math conversion
 * @skip_second_overflow:	Flag used to avoid updating NTP twice with same second
 * @last_warning:		Warning ratelimiter (DEBUG_TIMEKEEPING)
 * @underflow_seen:		Underflow warning flag (DEBUG_TIMEKEEPING)
 * @overflow_seen:		Overflow warning flag (DEBUG_TIMEKEEPING)
@@ -88,10 +96,28 @@ struct tk_read_base {
 *
 * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
 * accelerate the VDSO update for CLOCK_BOOTTIME.
 *
 * The cacheline ordering of the structure is optimized for in kernel usage of
 * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
 * timekeeper is prepended in the core timekeeping code with a sequence count,
 * which results in the following cacheline layout:
 *
 * 0:	seqcount, tkr_mono
 * 1:	xtime_sec ... tai_offset
 * 2:	tkr_raw, raw_sec
 * 3,4: Internal variables
 *
 * Cacheline 0,1 contain the data which is used for accessing
 * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
 * data for accessing CLOCK_MONOTONIC_RAW.  Cacheline 3,4 are internal
 * variables which are only accessed during timekeeper updates once per
 * tick.
 */
struct timekeeper {
	/* Cacheline 0 (together with prepended seqcount of timekeeper core): */
	struct tk_read_base	tkr_mono;
	struct tk_read_base	tkr_raw;

	/* Cacheline 1: */
	u64			xtime_sec;
	unsigned long		ktime_sec;
	struct timespec64	wall_to_monotonic;
@@ -99,31 +125,29 @@ struct timekeeper {
	ktime_t			offs_boot;
	ktime_t			offs_tai;
	s32			tai_offset;

	/* Cacheline 2: */
	struct tk_read_base	tkr_raw;
	u64			raw_sec;

	/* Cachline 3 and 4 (timekeeping internal variables): */
	unsigned int		clock_was_set_seq;
	u8			cs_was_changed_seq;
	ktime_t			next_leap_ktime;
	u64			raw_sec;

	struct timespec64	monotonic_to_boot;

	/* The following members are for timekeeping internal use */
	u64			cycle_interval;
	u64			xtime_interval;
	s64			xtime_remainder;
	u64			raw_interval;
	/* The ntp_tick_length() value currently being used.
	 * This cached copy ensures we consistently apply the tick
	 * length for an entire tick, as ntp_tick_length may change
	 * mid-tick, and we don't want to apply that new value to
	 * the tick in progress.
	 */

	ktime_t			next_leap_ktime;
	u64			ntp_tick;
	/* Difference between accumulated time and NTP time in ntp
	 * shifted nano seconds. */
	s64			ntp_error;
	u32			ntp_error_shift;
	u32			ntp_err_mult;
	/* Flag used to avoid updating NTP twice with same second */
	u32			skip_second_overflow;

#ifdef CONFIG_DEBUG_TIMEKEEPING
	long			last_warning;
	/*