Commit d466c19c authored by Anup Patel's avatar Anup Patel Committed by Anup Patel
Browse files

RISC-V: KVM: Add common nested acceleration support



Add a common nested acceleration support which will be shared by
all parts of KVM RISC-V. This nested acceleration support detects
and enables SBI NACL extension usage based on static keys which
ensures minimum impact on the non-nested scenario.

Signed-off-by: default avatarAnup Patel <apatel@ventanamicro.com>
Reviewed-by: default avatarAtish Patra <atishp@rivosinc.com>
Link: https://lore.kernel.org/r/20241020194734.58686-9-apatel@ventanamicro.com


Signed-off-by: default avatarAnup Patel <anup@brainfault.org>
parent 5daf89e7
Loading
Loading
Loading
Loading
+239 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 2024 Ventana Micro Systems Inc.
 */

#ifndef __KVM_NACL_H
#define __KVM_NACL_H

#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <asm/byteorder.h>
#include <asm/csr.h>
#include <asm/sbi.h>

DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_available);
#define kvm_riscv_nacl_available() \
	static_branch_unlikely(&kvm_riscv_nacl_available)

DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available);
#define kvm_riscv_nacl_sync_csr_available() \
	static_branch_unlikely(&kvm_riscv_nacl_sync_csr_available)

DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available);
#define kvm_riscv_nacl_sync_hfence_available() \
	static_branch_unlikely(&kvm_riscv_nacl_sync_hfence_available)

DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available);
#define kvm_riscv_nacl_sync_sret_available() \
	static_branch_unlikely(&kvm_riscv_nacl_sync_sret_available)

DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available);
#define kvm_riscv_nacl_autoswap_csr_available() \
	static_branch_unlikely(&kvm_riscv_nacl_autoswap_csr_available)

struct kvm_riscv_nacl {
	void *shmem;
	phys_addr_t shmem_phys;
};
DECLARE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl);

void __kvm_riscv_nacl_hfence(void *shmem,
			     unsigned long control,
			     unsigned long page_num,
			     unsigned long page_count);

int kvm_riscv_nacl_enable(void);

void kvm_riscv_nacl_disable(void);

void kvm_riscv_nacl_exit(void);

int kvm_riscv_nacl_init(void);

#ifdef CONFIG_32BIT
#define lelong_to_cpu(__x)	le32_to_cpu(__x)
#define cpu_to_lelong(__x)	cpu_to_le32(__x)
#else
#define lelong_to_cpu(__x)	le64_to_cpu(__x)
#define cpu_to_lelong(__x)	cpu_to_le64(__x)
#endif

#define nacl_shmem()							\
	this_cpu_ptr(&kvm_riscv_nacl)->shmem

#define nacl_scratch_read_long(__shmem, __offset)			\
({									\
	unsigned long *__p = (__shmem) +				\
			     SBI_NACL_SHMEM_SCRATCH_OFFSET +		\
			     (__offset);				\
	lelong_to_cpu(*__p);						\
})

#define nacl_scratch_write_long(__shmem, __offset, __val)		\
do {									\
	unsigned long *__p = (__shmem) +				\
			     SBI_NACL_SHMEM_SCRATCH_OFFSET +		\
			     (__offset);				\
	*__p = cpu_to_lelong(__val);					\
} while (0)

#define nacl_scratch_write_longs(__shmem, __offset, __array, __count)	\
do {									\
	unsigned int __i;						\
	unsigned long *__p = (__shmem) +				\
			     SBI_NACL_SHMEM_SCRATCH_OFFSET +		\
			     (__offset);				\
	for (__i = 0; __i < (__count); __i++)				\
		__p[__i] = cpu_to_lelong((__array)[__i]);		\
} while (0)

#define nacl_sync_hfence(__e)						\
	sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_HFENCE,		\
		  (__e), 0, 0, 0, 0, 0)

#define nacl_hfence_mkconfig(__type, __order, __vmid, __asid)		\
({									\
	unsigned long __c = SBI_NACL_SHMEM_HFENCE_CONFIG_PEND;		\
	__c |= ((__type) & SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK)	\
		<< SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT;		\
	__c |= (((__order) - SBI_NACL_SHMEM_HFENCE_ORDER_BASE) &	\
		SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK)		\
		<< SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT;		\
	__c |= ((__vmid) & SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK)	\
		<< SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT;		\
	__c |= ((__asid) & SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK);	\
	__c;								\
})

#define nacl_hfence_mkpnum(__order, __addr)				\
	((__addr) >> (__order))

#define nacl_hfence_mkpcount(__order, __size)				\
	((__size) >> (__order))

#define nacl_hfence_gvma(__shmem, __gpa, __gpsz, __order)		\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA,		\
			   __order, 0, 0),				\
	nacl_hfence_mkpnum(__order, __gpa),				\
	nacl_hfence_mkpcount(__order, __gpsz))

#define nacl_hfence_gvma_all(__shmem)					\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL,	\
			   0, 0, 0), 0, 0)

#define nacl_hfence_gvma_vmid(__shmem, __vmid, __gpa, __gpsz, __order)	\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID,	\
			   __order, __vmid, 0),				\
	nacl_hfence_mkpnum(__order, __gpa),				\
	nacl_hfence_mkpcount(__order, __gpsz))

#define nacl_hfence_gvma_vmid_all(__shmem, __vmid)			\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL,	\
			   0, __vmid, 0), 0, 0)

#define nacl_hfence_vvma(__shmem, __vmid, __gva, __gvsz, __order)	\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA,		\
			   __order, __vmid, 0),				\
	nacl_hfence_mkpnum(__order, __gva),				\
	nacl_hfence_mkpcount(__order, __gvsz))

#define nacl_hfence_vvma_all(__shmem, __vmid)				\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL,	\
			   0, __vmid, 0), 0, 0)

#define nacl_hfence_vvma_asid(__shmem, __vmid, __asid, __gva, __gvsz, __order)\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID,	\
			   __order, __vmid, __asid),			\
	nacl_hfence_mkpnum(__order, __gva),				\
	nacl_hfence_mkpcount(__order, __gvsz))

#define nacl_hfence_vvma_asid_all(__shmem, __vmid, __asid)		\
__kvm_riscv_nacl_hfence(__shmem,					\
	nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL,	\
			   0, __vmid, __asid), 0, 0)

#define nacl_csr_read(__shmem, __csr)					\
({									\
	unsigned long *__a = (__shmem) + SBI_NACL_SHMEM_CSR_OFFSET;	\
	lelong_to_cpu(__a[SBI_NACL_SHMEM_CSR_INDEX(__csr)]);		\
})

#define nacl_csr_write(__shmem, __csr, __val)				\
do {									\
	void *__s = (__shmem);						\
	unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr);		\
	unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET;		\
	u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET;		\
	__a[__i] = cpu_to_lelong(__val);				\
	__b[__i >> 3] |= 1U << (__i & 0x7);				\
} while (0)

#define nacl_csr_swap(__shmem, __csr, __val)				\
({									\
	void *__s = (__shmem);						\
	unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr);		\
	unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET;		\
	u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET;		\
	unsigned long __r = lelong_to_cpu(__a[__i]);			\
	__a[__i] = cpu_to_lelong(__val);				\
	__b[__i >> 3] |= 1U << (__i & 0x7);				\
	__r;								\
})

#define nacl_sync_csr(__csr)						\
	sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_CSR,			\
		  (__csr), 0, 0, 0, 0, 0)

/*
 * Each ncsr_xyz() macro defined below has it's own static-branch so every
 * use of ncsr_xyz() macro emits a patchable direct jump. This means multiple
 * back-to-back ncsr_xyz() macro usage will emit multiple patchable direct
 * jumps which is sub-optimal.
 *
 * Based on the above, it is recommended to avoid multiple back-to-back
 * ncsr_xyz() macro usage.
 */

#define ncsr_read(__csr)						\
({									\
	unsigned long __r;						\
	if (kvm_riscv_nacl_available())					\
		__r = nacl_csr_read(nacl_shmem(), __csr);		\
	else								\
		__r = csr_read(__csr);					\
	__r;								\
})

#define ncsr_write(__csr, __val)					\
do {									\
	if (kvm_riscv_nacl_sync_csr_available())			\
		nacl_csr_write(nacl_shmem(), __csr, __val);		\
	else								\
		csr_write(__csr, __val);				\
} while (0)

#define ncsr_swap(__csr, __val)						\
({									\
	unsigned long __r;						\
	if (kvm_riscv_nacl_sync_csr_available())			\
		__r = nacl_csr_swap(nacl_shmem(), __csr, __val);	\
	else								\
		__r = csr_swap(__csr, __val);				\
	__r;								\
})

#define nsync_csr(__csr)						\
do {									\
	if (kvm_riscv_nacl_sync_csr_available())			\
		nacl_sync_csr(__csr);					\
} while (0)

#endif
+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ kvm-y += aia_device.o
kvm-y += aia_imsic.o
kvm-y += main.o
kvm-y += mmu.o
kvm-y += nacl.o
kvm-y += tlb.o
kvm-y += vcpu.o
kvm-y += vcpu_exit.o
+49 −2
Original line number Diff line number Diff line
@@ -10,8 +10,8 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
#include <asm/cpufeature.h>
#include <asm/kvm_nacl.h>
#include <asm/sbi.h>

long kvm_arch_dev_ioctl(struct file *filp,
@@ -22,6 +22,12 @@ long kvm_arch_dev_ioctl(struct file *filp,

int kvm_arch_enable_virtualization_cpu(void)
{
	int rc;

	rc = kvm_riscv_nacl_enable();
	if (rc)
		return rc;

	csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
	csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);

@@ -49,17 +55,21 @@ void kvm_arch_disable_virtualization_cpu(void)
	csr_write(CSR_HVIP, 0);
	csr_write(CSR_HEDELEG, 0);
	csr_write(CSR_HIDELEG, 0);

	kvm_riscv_nacl_disable();
}

static void kvm_riscv_teardown(void)
{
	kvm_riscv_aia_exit();
	kvm_riscv_nacl_exit();
	kvm_unregister_perf_callbacks();
}

static int __init riscv_kvm_init(void)
{
	int rc;
	char slist[64];
	const char *str;

	if (!riscv_isa_extension_available(NULL, h)) {
@@ -77,16 +87,53 @@ static int __init riscv_kvm_init(void)
		return -ENODEV;
	}

	rc = kvm_riscv_nacl_init();
	if (rc && rc != -ENODEV)
		return rc;

	kvm_riscv_gstage_mode_detect();

	kvm_riscv_gstage_vmid_detect();

	rc = kvm_riscv_aia_init();
	if (rc && rc != -ENODEV)
	if (rc && rc != -ENODEV) {
		kvm_riscv_nacl_exit();
		return rc;
	}

	kvm_info("hypervisor extension available\n");

	if (kvm_riscv_nacl_available()) {
		rc = 0;
		slist[0] = '\0';
		if (kvm_riscv_nacl_sync_csr_available()) {
			if (rc)
				strcat(slist, ", ");
			strcat(slist, "sync_csr");
			rc++;
		}
		if (kvm_riscv_nacl_sync_hfence_available()) {
			if (rc)
				strcat(slist, ", ");
			strcat(slist, "sync_hfence");
			rc++;
		}
		if (kvm_riscv_nacl_sync_sret_available()) {
			if (rc)
				strcat(slist, ", ");
			strcat(slist, "sync_sret");
			rc++;
		}
		if (kvm_riscv_nacl_autoswap_csr_available()) {
			if (rc)
				strcat(slist, ", ");
			strcat(slist, "autoswap_csr");
			rc++;
		}
		kvm_info("using SBI nested acceleration with %s\n",
			 (rc) ? slist : "no features");
	}

	switch (kvm_riscv_gstage_mode()) {
	case HGATP_MODE_SV32X4:
		str = "Sv32x4";

arch/riscv/kvm/nacl.c

0 → 100644
+152 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2024 Ventana Micro Systems Inc.
 */

#include <linux/kvm_host.h>
#include <linux/vmalloc.h>
#include <asm/kvm_nacl.h>

DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available);
DEFINE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl);

void __kvm_riscv_nacl_hfence(void *shmem,
			     unsigned long control,
			     unsigned long page_num,
			     unsigned long page_count)
{
	int i, ent = -1, try_count = 5;
	unsigned long *entp;

again:
	for (i = 0; i < SBI_NACL_SHMEM_HFENCE_ENTRY_MAX; i++) {
		entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i);
		if (lelong_to_cpu(*entp) & SBI_NACL_SHMEM_HFENCE_CONFIG_PEND)
			continue;

		ent = i;
		break;
	}

	if (ent < 0) {
		if (try_count) {
			nacl_sync_hfence(-1UL);
			goto again;
		} else {
			pr_warn("KVM: No free entry in NACL shared memory\n");
			return;
		}
	}

	entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i);
	*entp = cpu_to_lelong(control);
	entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(i);
	*entp = cpu_to_lelong(page_num);
	entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(i);
	*entp = cpu_to_lelong(page_count);
}

int kvm_riscv_nacl_enable(void)
{
	int rc;
	struct sbiret ret;
	struct kvm_riscv_nacl *nacl;

	if (!kvm_riscv_nacl_available())
		return 0;
	nacl = this_cpu_ptr(&kvm_riscv_nacl);

	ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM,
			nacl->shmem_phys, 0, 0, 0, 0, 0);
	rc = sbi_err_map_linux_errno(ret.error);
	if (rc)
		return rc;

	return 0;
}

void kvm_riscv_nacl_disable(void)
{
	if (!kvm_riscv_nacl_available())
		return;

	sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM,
		  SBI_SHMEM_DISABLE, SBI_SHMEM_DISABLE, 0, 0, 0, 0);
}

void kvm_riscv_nacl_exit(void)
{
	int cpu;
	struct kvm_riscv_nacl *nacl;

	if (!kvm_riscv_nacl_available())
		return;

	/* Allocate per-CPU shared memory */
	for_each_possible_cpu(cpu) {
		nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu);
		if (!nacl->shmem)
			continue;

		free_pages((unsigned long)nacl->shmem,
			   get_order(SBI_NACL_SHMEM_SIZE));
		nacl->shmem = NULL;
		nacl->shmem_phys = 0;
	}
}

static long nacl_probe_feature(long feature_id)
{
	struct sbiret ret;

	if (!kvm_riscv_nacl_available())
		return 0;

	ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_PROBE_FEATURE,
			feature_id, 0, 0, 0, 0, 0);
	return ret.value;
}

int kvm_riscv_nacl_init(void)
{
	int cpu;
	struct page *shmem_page;
	struct kvm_riscv_nacl *nacl;

	if (sbi_spec_version < sbi_mk_version(1, 0) ||
	    sbi_probe_extension(SBI_EXT_NACL) <= 0)
		return -ENODEV;

	/* Enable NACL support */
	static_branch_enable(&kvm_riscv_nacl_available);

	/* Probe NACL features */
	if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_CSR))
		static_branch_enable(&kvm_riscv_nacl_sync_csr_available);
	if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_HFENCE))
		static_branch_enable(&kvm_riscv_nacl_sync_hfence_available);
	if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_SRET))
		static_branch_enable(&kvm_riscv_nacl_sync_sret_available);
	if (nacl_probe_feature(SBI_NACL_FEAT_AUTOSWAP_CSR))
		static_branch_enable(&kvm_riscv_nacl_autoswap_csr_available);

	/* Allocate per-CPU shared memory */
	for_each_possible_cpu(cpu) {
		nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu);

		shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
					 get_order(SBI_NACL_SHMEM_SIZE));
		if (!shmem_page) {
			kvm_riscv_nacl_exit();
			return -ENOMEM;
		}
		nacl->shmem = page_to_virt(shmem_page);
		nacl->shmem_phys = page_to_phys(shmem_page);
	}

	return 0;
}