Commit 6455c6c1 authored by Nam Cao's avatar Nam Cao Committed by Paul Walmsley
Browse files

riscv: Clean up & optimize unaligned scalar access probe



check_unaligned_access_speed_all_cpus() is more complicated than it should
be:

  - It uses on_each_cpu() to probe unaligned memory access on all CPUs but
    excludes CPU0 with a check in the callback function. So an IPI to CPU0
    is wasted.

  - Probing on CPU0 is done with smp_call_on_cpu(), which is not as fast as
    on_each_cpu().

The reason for this design is because the probe is timed with jiffies.
Therefore on_each_cpu() excludes CPU0 because that CPU needs to tend to
jiffies.

Instead, replace jiffies usage with ktime_get_mono_fast_ns(). With jiffies
out of the way, on_each_cpu() can be used for all CPUs and
smp_call_on_cpu() can be dropped.

To make ktime_get_mono_fast_ns() usable, move this probe to late_initcall.
Anything after clocksource's fs_initcall works, but avoid depending on
clocksource staying at fs_initcall.

The choice of probe time is now 8000000 ns, which is the same as before (2
jiffies) for riscv defconfig. This is excessive for the CPUs I have, and
probably should be reduced; but that's a different discussion.

Suggested-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarNam Cao <namcao@linutronix.de>
Link: https://patch.msgid.link/9b9a20affe2e4f5c380926ceb885a47e20a59395.1770830596.git.namcao@linutronix.de


Signed-off-by: default avatarPaul Walmsley <pjw@kernel.org>
parent bef64bcb
Loading
Loading
Loading
Loading
+10 −18
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include "copy-unaligned.h"

#define MISALIGNED_ACCESS_JIFFIES_LG2 1
#define MISALIGNED_ACCESS_NS 8000000
#define MISALIGNED_BUFFER_SIZE 0x4000
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
@@ -36,8 +37,8 @@ static int check_unaligned_access(void *param)
	u64 start_cycles, end_cycles;
	u64 word_cycles;
	u64 byte_cycles;
	u64 start_ns;
	int ratio;
	unsigned long start_jiffies, now;
	struct page *page = param;
	void *dst;
	void *src;
@@ -55,15 +56,13 @@ static int check_unaligned_access(void *param)
	/* Do a warmup. */
	__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
	preempt_disable();
	start_jiffies = jiffies;
	while ((now = jiffies) == start_jiffies)
		cpu_relax();

	/*
	 * For a fixed amount of time, repeatedly try the function, and take
	 * the best time in cycles as the measurement.
	 */
	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
	start_ns = ktime_get_mono_fast_ns();
	while (ktime_get_mono_fast_ns() < start_ns + MISALIGNED_ACCESS_NS) {
		start_cycles = get_cycles64();
		/* Ensure the CSR read can't reorder WRT to the copy. */
		mb();
@@ -77,11 +76,9 @@ static int check_unaligned_access(void *param)

	byte_cycles = -1ULL;
	__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
	start_jiffies = jiffies;
	while ((now = jiffies) == start_jiffies)
		cpu_relax();

	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
	start_ns = ktime_get_mono_fast_ns();
	while (ktime_get_mono_fast_ns() < start_ns + MISALIGNED_ACCESS_NS) {
		start_cycles = get_cycles64();
		mb();
		__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
@@ -125,12 +122,11 @@ static int check_unaligned_access(void *param)
	return 0;
}

static void __init check_unaligned_access_nonboot_cpu(void *param)
static void __init _check_unaligned_access(void *param)
{
	unsigned int cpu = smp_processor_id();
	struct page **pages = param;

	if (smp_processor_id() != 0)
	check_unaligned_access(pages[cpu]);
}

@@ -158,11 +154,7 @@ static void __init check_unaligned_access_speed_all_cpus(void)
		}
	}

	/* Check everybody except 0, who stays behind to tend jiffies. */
	on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);

	/* Check core 0. */
	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
	on_each_cpu(_check_unaligned_access, bufs, 1);

out:
	for_each_cpu(cpu, cpu_online_mask) {
@@ -494,4 +486,4 @@ static int __init check_unaligned_access_all_cpus(void)
	return 0;
}

arch_initcall(check_unaligned_access_all_cpus);
late_initcall(check_unaligned_access_all_cpus);