Commit cd6b97bc authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/pkvm-6.16 into kvm-arm64/pkvm-np-thp-6.16



* kvm-arm64/pkvm-6.16:
  : .
  : pKVM memory management cleanups, courtesy of Quentin Perret.
  : From the cover letter:
  :
  : "This series moves the hypervisor's ownership state to the hyp_vmemmap,
  : as discussed in [1]. The two main benefits are:
  :
  :  1. much cheaper hyp state lookups, since we can avoid the hyp stage-1
  :     page-table walk;
  :
  :  2. de-correlates the hyp state from the presence of a mapping in the
  :     linear map range of the hypervisor; which enables a bunch of
  :     clean-ups in the existing code and will simplify the introduction of
  :     other features in the future (hyp tracing, ...)"
  : .
  KVM: arm64: Unconditionally cross check hyp state
  KVM: arm64: Defer EL2 stage-1 mapping on share
  KVM: arm64: Move hyp state to hyp_vmemmap
  KVM: arm64: Introduce {get,set}_host_state() helpers
  KVM: arm64: Use 0b11 for encoding PKVM_NOPAGE
  KVM: arm64: Fix pKVM page-tracking comments
  KVM: arm64: Track SVE state in the hypervisor vcpu structure

Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents b4432656 43c47550
Loading
Loading
Loading
Loading
+7 −5
Original line number Diff line number Diff line
@@ -971,20 +971,22 @@ struct kvm_vcpu_arch {
#define vcpu_sve_zcr_elx(vcpu)						\
	(unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1)

#define vcpu_sve_state_size(vcpu) ({					\
#define sve_state_size_from_vl(sve_max_vl) ({				\
	size_t __size_ret;						\
	unsigned int __vcpu_vq;						\
	unsigned int __vq;						\
									\
	if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) {		\
	if (WARN_ON(!sve_vl_valid(sve_max_vl))) {			\
		__size_ret = 0;						\
	} else {							\
		__vcpu_vq = vcpu_sve_max_vq(vcpu);			\
		__size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq);		\
		__vq = sve_vq_from_vl(sve_max_vl);			\
		__size_ret = SVE_SIG_REGS_SIZE(__vq);			\
	}								\
									\
	__size_ret;							\
})

#define vcpu_sve_state_size(vcpu) sve_state_size_from_vl((vcpu)->arch.sve_max_vl)

#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
				 KVM_GUESTDBG_USE_SW_BP | \
				 KVM_GUESTDBG_USE_HW | \
+46 −12
Original line number Diff line number Diff line
@@ -8,23 +8,30 @@
#include <linux/types.h>

/*
 * Bits 0-1 are reserved to track the memory ownership state of each page:
 *   00: The page is owned exclusively by the page-table owner.
 *   01: The page is owned by the page-table owner, but is shared
 *       with another entity.
 *   10: The page is shared with, but not owned by the page-table owner.
 *   11: Reserved for future use (lending).
 * Bits 0-1 are used to encode the memory ownership state of each page from the
 * point of view of a pKVM "component" (host, hyp, guest, ... see enum
 * pkvm_component_id):
 *   00: The page is owned and exclusively accessible by the component;
 *   01: The page is owned and accessible by the component, but is also
 *       accessible by another component;
 *   10: The page is accessible but not owned by the component;
 * The storage of this state depends on the component: either in the
 * hyp_vmemmap for the host and hyp states or in PTE software bits for guests.
 */
enum pkvm_page_state {
	PKVM_PAGE_OWNED			= 0ULL,
	PKVM_PAGE_SHARED_OWNED		= BIT(0),
	PKVM_PAGE_SHARED_BORROWED	= BIT(1),
	__PKVM_PAGE_RESERVED		= BIT(0) | BIT(1),

	/* Meta-states which aren't encoded directly in the PTE's SW bits */
	PKVM_NOPAGE			= BIT(2),
	/*
	 * 'Meta-states' are not stored directly in PTE SW bits for guest
	 * states, but inferred from the context (e.g. invalid PTE entries).
	 * For the host and hyp, meta-states are stored directly in the
	 * struct hyp_page.
	 */
	PKVM_NOPAGE			= BIT(0) | BIT(1),
};
#define PKVM_PAGE_META_STATES_MASK	(~__PKVM_PAGE_RESERVED)
#define PKVM_PAGE_STATE_MASK		(BIT(0) | BIT(1))

#define PKVM_PAGE_STATE_PROT_MASK	(KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
@@ -44,8 +51,15 @@ struct hyp_page {
	u16 refcount;
	u8 order;

	/* Host (non-meta) state. Guarded by the host stage-2 lock. */
	enum pkvm_page_state host_state : 8;
	/* Host state. Guarded by the host stage-2 lock. */
	unsigned __host_state : 4;

	/*
	 * Complement of the hyp state. Guarded by the hyp stage-1 lock. We use
	 * the complement so that the initial 0 in __hyp_state_comp (due to the
	 * entire vmemmap starting off zeroed) encodes PKVM_NOPAGE.
	 */
	unsigned __hyp_state_comp : 4;

	u32 host_share_guest_count;
};
@@ -82,6 +96,26 @@ static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
#define hyp_page_to_virt(page)	__hyp_va(hyp_page_to_phys(page))
#define hyp_page_to_pool(page)	(((struct hyp_page *)page)->pool)

static inline enum pkvm_page_state get_host_state(phys_addr_t phys)
{
	return (enum pkvm_page_state)hyp_phys_to_page(phys)->__host_state;
}

static inline void set_host_state(phys_addr_t phys, enum pkvm_page_state state)
{
	hyp_phys_to_page(phys)->__host_state = state;
}

static inline enum pkvm_page_state get_hyp_state(phys_addr_t phys)
{
	return hyp_phys_to_page(phys)->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK;
}

static inline void set_hyp_state(phys_addr_t phys, enum pkvm_page_state state)
{
	hyp_phys_to_page(phys)->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK;
}

/*
 * Refcounting for 'struct hyp_page'.
 * hyp_pool::lock must be held if atomic access to the refcount is required.
+0 −4
Original line number Diff line number Diff line
@@ -123,10 +123,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)

	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;

	hyp_vcpu->vcpu.arch.sve_state	= kern_hyp_va(host_vcpu->arch.sve_state);
	/* Limit guest vector length to the maximum supported by the host.  */
	hyp_vcpu->vcpu.arch.sve_max_vl	= min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);

	hyp_vcpu->vcpu.arch.mdcr_el2	= host_vcpu->arch.mdcr_el2;
	hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
	hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
+54 −52
Original line number Diff line number Diff line
@@ -467,7 +467,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
		return -EAGAIN;

	if (pte) {
		WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
		WARN_ON(addr_is_memory(addr) && get_host_state(addr) != PKVM_NOPAGE);
		return -EPERM;
	}

@@ -496,7 +496,7 @@ static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_
	phys_addr_t end = addr + size;

	for (; addr < end; addr += PAGE_SIZE)
		hyp_phys_to_page(addr)->host_state = state;
		set_host_state(addr, state);
}

int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
@@ -627,7 +627,7 @@ static int __host_check_page_state_range(u64 addr, u64 size,

	hyp_assert_lock_held(&host_mmu.lock);
	for (; addr < end; addr += PAGE_SIZE) {
		if (hyp_phys_to_page(addr)->host_state != state)
		if (get_host_state(addr) != state)
			return -EPERM;
	}

@@ -637,7 +637,7 @@ static int __host_check_page_state_range(u64 addr, u64 size,
static int __host_set_page_state_range(u64 addr, u64 size,
				       enum pkvm_page_state state)
{
	if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
	if (get_host_state(addr) == PKVM_NOPAGE) {
		int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);

		if (ret)
@@ -649,24 +649,24 @@ static int __host_set_page_state_range(u64 addr, u64 size,
	return 0;
}

static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
{
	if (!kvm_pte_valid(pte))
		return PKVM_NOPAGE;
	phys_addr_t end = phys + size;

	return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
	for (; phys < end; phys += PAGE_SIZE)
		set_hyp_state(phys, state);
}

static int __hyp_check_page_state_range(u64 addr, u64 size,
					enum pkvm_page_state state)
static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
{
	struct check_walk_data d = {
		.desired	= state,
		.get_page_state	= hyp_get_page_state,
	};
	phys_addr_t end = phys + size;

	hyp_assert_lock_held(&pkvm_pgd_lock);
	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
	for (; phys < end; phys += PAGE_SIZE) {
		if (get_hyp_state(phys) != state)
			return -EPERM;
	}

	return 0;
}

static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
@@ -693,8 +693,6 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
int __pkvm_host_share_hyp(u64 pfn)
{
	u64 phys = hyp_pfn_to_phys(pfn);
	void *virt = __hyp_va(phys);
	enum kvm_pgtable_prot prot;
	u64 size = PAGE_SIZE;
	int ret;

@@ -704,14 +702,11 @@ int __pkvm_host_share_hyp(u64 pfn)
	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
	if (ret)
		goto unlock;
	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
	ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
	if (ret)
		goto unlock;
	}

	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
	__hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));

unlock:
@@ -734,7 +729,7 @@ int __pkvm_host_unshare_hyp(u64 pfn)
	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
	if (ret)
		goto unlock;
	ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
	ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
	if (ret)
		goto unlock;
	if (hyp_page_count((void *)virt)) {
@@ -742,7 +737,7 @@ int __pkvm_host_unshare_hyp(u64 pfn)
		goto unlock;
	}

	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
	__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));

unlock:
@@ -757,7 +752,6 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
	u64 phys = hyp_pfn_to_phys(pfn);
	u64 size = PAGE_SIZE * nr_pages;
	void *virt = __hyp_va(phys);
	enum kvm_pgtable_prot prot;
	int ret;

	host_lock_component();
@@ -766,14 +760,12 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
	if (ret)
		goto unlock;
	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
	ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
	if (ret)
		goto unlock;
	}

	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
	__hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));
	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));

unlock:
@@ -793,15 +785,14 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
	host_lock_component();
	hyp_lock_component();

	ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
	ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
	if (ret)
		goto unlock;
	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
	ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
	if (ret)
		goto unlock;
	}

	__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));

@@ -816,24 +807,30 @@ int hyp_pin_shared_mem(void *from, void *to)
{
	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
	u64 end = PAGE_ALIGN((u64)to);
	u64 phys = __hyp_pa(start);
	u64 size = end - start;
	struct hyp_page *p;
	int ret;

	host_lock_component();
	hyp_lock_component();

	ret = __host_check_page_state_range(__hyp_pa(start), size,
					    PKVM_PAGE_SHARED_OWNED);
	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
	if (ret)
		goto unlock;

	ret = __hyp_check_page_state_range(start, size,
					   PKVM_PAGE_SHARED_BORROWED);
	ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
	if (ret)
		goto unlock;

	for (cur = start; cur < end; cur += PAGE_SIZE)
		hyp_page_ref_inc(hyp_virt_to_page(cur));
	for (cur = start; cur < end; cur += PAGE_SIZE) {
		p = hyp_virt_to_page(cur);
		hyp_page_ref_inc(p);
		if (p->refcount == 1)
			WARN_ON(pkvm_create_mappings_locked((void *)cur,
							    (void *)cur + PAGE_SIZE,
							    PAGE_HYP));
	}

unlock:
	hyp_unlock_component();
@@ -846,12 +843,17 @@ void hyp_unpin_shared_mem(void *from, void *to)
{
	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
	u64 end = PAGE_ALIGN((u64)to);
	struct hyp_page *p;

	host_lock_component();
	hyp_lock_component();

	for (cur = start; cur < end; cur += PAGE_SIZE)
		hyp_page_ref_dec(hyp_virt_to_page(cur));
	for (cur = start; cur < end; cur += PAGE_SIZE) {
		p = hyp_virt_to_page(cur);
		if (p->refcount == 1)
			WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);
		hyp_page_ref_dec(p);
	}

	hyp_unlock_component();
	host_unlock_component();
@@ -911,7 +913,7 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
		goto unlock;

	page = hyp_phys_to_page(phys);
	switch (page->host_state) {
	switch (get_host_state(phys)) {
	case PKVM_PAGE_OWNED:
		WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
		break;
@@ -964,9 +966,9 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip
	if (WARN_ON(ret))
		return ret;

	page = hyp_phys_to_page(phys);
	if (page->host_state != PKVM_PAGE_SHARED_OWNED)
	if (get_host_state(phys) != PKVM_PAGE_SHARED_OWNED)
		return -EPERM;
	page = hyp_phys_to_page(phys);
	if (WARN_ON(!page->host_share_guest_count))
		return -EINVAL;

+44 −3
Original line number Diff line number Diff line
@@ -372,6 +372,18 @@ static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
		hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
}

static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
	void *sve_state;

	if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
		return;

	sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
	hyp_unpin_shared_mem(sve_state,
			     sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
}

static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
			     unsigned int nr_vcpus)
{
@@ -384,6 +396,7 @@ static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
			continue;

		unpin_host_vcpu(hyp_vcpu->host_vcpu);
		unpin_host_sve_state(hyp_vcpu);
	}
}

@@ -398,12 +411,40 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
	pkvm_init_features_from_host(hyp_vm, host_kvm);
}

static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
{
	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
	unsigned int sve_max_vl;
	size_t sve_state_size;
	void *sve_state;
	int ret = 0;

	if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
	if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
		vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
		return 0;
	}

	/* Limit guest vector length to the maximum supported by the host. */
	sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl);
	sve_state_size = sve_state_size_from_vl(sve_max_vl);
	sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state));

	if (!sve_state || !sve_state_size) {
		ret = -EINVAL;
		goto err;
	}

	ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size);
	if (ret)
		goto err;

	vcpu->arch.sve_state = sve_state;
	vcpu->arch.sve_max_vl = sve_max_vl;

	return 0;
err:
	clear_bit(KVM_ARM_VCPU_SVE, vcpu->kvm->arch.vcpu_features);
	return ret;
}

static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -432,7 +473,7 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
	if (ret)
		goto done;

	pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
	ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
done:
	if (ret)
		unpin_host_vcpu(host_vcpu);
Loading