Commit 0c2678ef authored by Xin Li (Intel)'s avatar Xin Li (Intel) Committed by Ingo Molnar
Browse files

x86/pvops/msr: Refactor pv_cpu_ops.write_msr{,_safe}()



An MSR value is represented as a 64-bit unsigned integer, with existing
MSR instructions storing it in EDX:EAX as two 32-bit segments.

The new immediate form MSR instructions, however, utilize a 64-bit
general-purpose register to store the MSR value.  To unify the usage of
all MSR instructions, let the default MSR access APIs accept an MSR
value as a single 64-bit argument instead of two 32-bit segments.

The dual 32-bit APIs are still available as convenient wrappers over the
APIs that handle an MSR value as a single 64-bit argument.

The following illustrates the updated derivation of the MSR write APIs:

                 __wrmsrq(u32 msr, u64 val)
                   /                  \
                  /                    \
           native_wrmsrq(msr, val)    native_wrmsr(msr, low, high)
                 |
                 |
           native_write_msr(msr, val)
                /          \
               /            \
       wrmsrq(msr, val)    wrmsr(msr, low, high)

When CONFIG_PARAVIRT is enabled, wrmsrq() and wrmsr() are defined on top
of paravirt_write_msr():

            paravirt_write_msr(u32 msr, u64 val)
               /             \
              /               \
          wrmsrq(msr, val)    wrmsr(msr, low, high)

paravirt_write_msr() invokes cpu.write_msr(msr, val), an indirect layer
of pv_ops MSR write call:

    If on native:

            cpu.write_msr = native_write_msr

    If on Xen:

            cpu.write_msr = xen_write_msr

Therefore, refactor pv_cpu_ops.write_msr{_safe}() to accept an MSR value
in a single u64 argument, replacing the current dual u32 arguments.

No functional change intended.

Signed-off-by: default avatarXin Li (Intel) <xin@zytor.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Reviewed-by: default avatarJuergen Gross <jgross@suse.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20250427092027.1598740-14-xin@zytor.com
parent 2b7e2530
Loading
Loading
Loading
Loading
+15 −20
Original line number Diff line number Diff line
@@ -75,12 +75,12 @@ static __always_inline u64 __rdmsr(u32 msr)
	return EAX_EDX_VAL(val, low, high);
}

static __always_inline void __wrmsr(u32 msr, u32 low, u32 high)
static __always_inline void __wrmsrq(u32 msr, u64 val)
{
	asm volatile("1: wrmsr\n"
		     "2:\n"
		     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
		     : : "c" (msr), "a"(low), "d" (high) : "memory");
		     : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
}

#define native_rdmsr(msr, val1, val2)			\
@@ -96,11 +96,10 @@ static __always_inline u64 native_rdmsrq(u32 msr)
}

#define native_wrmsr(msr, low, high)			\
	__wrmsr(msr, low, high)
	__wrmsrq((msr), (u64)(high) << 32 | (low))

#define native_wrmsrq(msr, val)				\
	__wrmsr((msr), (u32)((u64)(val)),		\
		       (u32)((u64)(val) >> 32))
	__wrmsrq((msr), (val))

static inline u64 native_read_msr(u32 msr)
{
@@ -129,11 +128,8 @@ static inline u64 native_read_msr_safe(u32 msr, int *err)
}

/* Can be uninlined because referenced by paravirt */
static inline void notrace
native_write_msr(u32 msr, u32 low, u32 high)
static inline void notrace native_write_msr(u32 msr, u64 val)
{
	u64 val = (u64)high << 32 | low;

	native_wrmsrq(msr, val);

	if (tracepoint_enabled(write_msr))
@@ -141,8 +137,7 @@ native_write_msr(u32 msr, u32 low, u32 high)
}

/* Can be uninlined because referenced by paravirt */
static inline int notrace
native_write_msr_safe(u32 msr, u32 low, u32 high)
static inline int notrace native_write_msr_safe(u32 msr, u64 val)
{
	int err;

@@ -150,10 +145,10 @@ native_write_msr_safe(u32 msr, u32 low, u32 high)
		     "2:\n\t"
		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
		     : [err] "=a" (err)
		     : "c" (msr), "0" (low), "d" (high)
		     : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
		     : "memory");
	if (tracepoint_enabled(write_msr))
		do_trace_write_msr(msr, ((u64)high << 32 | low), err);
		do_trace_write_msr(msr, val, err);
	return err;
}

@@ -189,7 +184,7 @@ do { \

static inline void wrmsr(u32 msr, u32 low, u32 high)
{
	native_write_msr(msr, low, high);
	native_write_msr(msr, (u64)high << 32 | low);
}

#define rdmsrq(msr, val)			\
@@ -197,13 +192,13 @@ static inline void wrmsr(u32 msr, u32 low, u32 high)

static inline void wrmsrq(u32 msr, u64 val)
{
	native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
	native_write_msr(msr, val);
}

/* wrmsr with exception handling */
static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
static inline int wrmsrq_safe(u32 msr, u64 val)
{
	return native_write_msr_safe(msr, low, high);
	return native_write_msr_safe(msr, val);
}

/* rdmsr with exception handling */
@@ -247,11 +242,11 @@ static __always_inline void wrmsrns(u32 msr, u64 val)
}

/*
 * 64-bit version of wrmsr_safe():
 * Dual u32 version of wrmsrq_safe():
 */
static inline int wrmsrq_safe(u32 msr, u64 val)
static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
{
	return wrmsr_safe(msr, (u32)val,  (u32)(val >> 32));
	return wrmsrq_safe(msr, (u64)high << 32 | low);
}

struct msr __percpu *msrs_alloc(void);
+14 −13
Original line number Diff line number Diff line
@@ -180,10 +180,9 @@ static inline u64 paravirt_read_msr(unsigned msr)
	return PVOP_CALL1(u64, cpu.read_msr, msr);
}

static inline void paravirt_write_msr(unsigned msr,
				      unsigned low, unsigned high)
static inline void paravirt_write_msr(u32 msr, u64 val)
{
	PVOP_VCALL3(cpu.write_msr, msr, low, high);
	PVOP_VCALL2(cpu.write_msr, msr, val);
}

static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
@@ -191,10 +190,9 @@ static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
	return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
}

static inline int paravirt_write_msr_safe(unsigned msr,
					  unsigned low, unsigned high)
static inline int paravirt_write_msr_safe(u32 msr, u64 val)
{
	return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high);
	return PVOP_CALL2(int, cpu.write_msr_safe, msr, val);
}

#define rdmsr(msr, val1, val2)			\
@@ -204,22 +202,25 @@ do { \
	val2 = _l >> 32;			\
} while (0)

#define wrmsr(msr, val1, val2)			\
do {						\
	paravirt_write_msr(msr, val1, val2);	\
} while (0)
static __always_inline void wrmsr(u32 msr, u32 low, u32 high)
{
	paravirt_write_msr(msr, (u64)high << 32 | low);
}

#define rdmsrq(msr, val)			\
do {						\
	val = paravirt_read_msr(msr);		\
} while (0)

static inline void wrmsrq(unsigned msr, u64 val)
static inline void wrmsrq(u32 msr, u64 val)
{
	wrmsr(msr, (u32)val, (u32)(val>>32));
	paravirt_write_msr(msr, val);
}

#define wrmsr_safe(msr, a, b)	paravirt_write_msr_safe(msr, a, b)
static inline int wrmsrq_safe(u32 msr, u64 val)
{
	return paravirt_write_msr_safe(msr, val);
}

/* rdmsr with exception handling */
#define rdmsr_safe(msr, a, b)				\
+2 −2
Original line number Diff line number Diff line
@@ -92,14 +92,14 @@ struct pv_cpu_ops {

	/* Unsafe MSR operations.  These will warn or panic on failure. */
	u64 (*read_msr)(unsigned int msr);
	void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
	void (*write_msr)(u32 msr, u64 val);

	/*
	 * Safe MSR operations.
	 * read sets err to 0 or -EIO.  write returns 0 or -EIO.
	 */
	u64 (*read_msr_safe)(unsigned int msr, int *err);
	int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
	int (*write_msr_safe)(u32 msr, u64 val);

	u64 (*read_pmc)(int counter);

+1 −1
Original line number Diff line number Diff line
@@ -196,7 +196,7 @@ static void kvm_setup_secondary_clock(void)
void kvmclock_disable(void)
{
	if (msr_kvm_system_time)
		native_write_msr(msr_kvm_system_time, 0, 0);
		native_write_msr(msr_kvm_system_time, 0);
}

static void __init kvmclock_init_mem(void)
+3 −12
Original line number Diff line number Diff line
@@ -476,7 +476,6 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu)

static void svm_init_erratum_383(void)
{
	u32 low, high;
	int err;
	u64 val;

@@ -490,10 +489,7 @@ static void svm_init_erratum_383(void)

	val |= (1ULL << 47);

	low  = lower_32_bits(val);
	high = upper_32_bits(val);

	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
	native_write_msr_safe(MSR_AMD64_DC_CFG, val);

	erratum_383_found = true;
}
@@ -2168,17 +2164,12 @@ static bool is_erratum_383(void)

	/* Clear MCi_STATUS registers */
	for (i = 0; i < 6; ++i)
		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0);

	value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
	if (!err) {
		u32 low, high;

		value &= ~(1ULL << 2);
		low    = lower_32_bits(value);
		high   = upper_32_bits(value);

		native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
		native_write_msr_safe(MSR_IA32_MCG_STATUS, value);
	}

	/* Flush tlb to evict multi-match entries */
Loading