Commit 6c58f914 authored by Will Deacon's avatar Will Deacon Committed by Marc Zyngier
Browse files

KVM: arm64: Split teardown hypercall into two phases



In preparation for reclaiming protected guest VM pages from the host
during teardown, split the current 'pkvm_teardown_vm' hypercall into
separate 'start' and 'finalise' calls.

The 'pkvm_start_teardown_vm' hypercall puts the VM into a new 'is_dying'
state, which is a point of no return past which no vCPU of the pVM is
allowed to run any more.  Once in this new state,
'pkvm_finalize_teardown_vm' can be used to reclaim meta-data and
page-table pages from the VM. A subsequent patch will add support for
reclaiming the individual guest memory pages.

Reviewed-by: default avatarFuad Tabba <tabba@google.com>
Tested-by: default avatarFuad Tabba <tabba@google.com>
Tested-by: default avatarMostafa Saleh <smostafa@google.com>
Co-developed-by: default avatarQuentin Perret <qperret@google.com>
Signed-off-by: default avatarQuentin Perret <qperret@google.com>
Signed-off-by: default avatarWill Deacon <will@kernel.org>
Link: https://patch.msgid.link/20260330144841.26181-12-will@kernel.org


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent 73c55be0
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -89,7 +89,8 @@ enum __kvm_host_smccc_func {
	__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
	__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
	__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
	__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
	__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
	__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
	__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
+7 −0
Original line number Diff line number Diff line
@@ -255,6 +255,13 @@ struct kvm_protected_vm {
	struct kvm_hyp_memcache stage2_teardown_mc;
	bool is_protected;
	bool is_created;

	/*
	 * True when the guest is being torn down. When in this state, the
	 * guest's vCPUs can't be loaded anymore, but its pages can be
	 * reclaimed by the host.
	 */
	bool is_dying;
};

struct kvm_mpidr_data {
+3 −1
Original line number Diff line number Diff line
@@ -73,7 +73,9 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
		   unsigned long pgd_hva);
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
		     unsigned long vcpu_hva);
int __pkvm_teardown_vm(pkvm_handle_t handle);

int __pkvm_start_teardown_vm(pkvm_handle_t handle);
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);

struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
					 unsigned int vcpu_idx);
+11 −3
Original line number Diff line number Diff line
@@ -553,11 +553,18 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
	cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
}

static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
{
	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);

	cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
	cpu_reg(host_ctxt, 1) = __pkvm_start_teardown_vm(handle);
}

static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt)
{
	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);

	cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
}

typedef void (*hcall_t)(struct kvm_cpu_context *);
@@ -598,7 +605,8 @@ static const hcall_t host_hcall[] = {
	HANDLE_FUNC(__pkvm_unreserve_vm),
	HANDLE_FUNC(__pkvm_init_vm),
	HANDLE_FUNC(__pkvm_init_vcpu),
	HANDLE_FUNC(__pkvm_teardown_vm),
	HANDLE_FUNC(__pkvm_start_teardown_vm),
	HANDLE_FUNC(__pkvm_finalize_teardown_vm),
	HANDLE_FUNC(__pkvm_vcpu_load),
	HANDLE_FUNC(__pkvm_vcpu_put),
	HANDLE_FUNC(__pkvm_tlb_flush_vmid),
+38 −6
Original line number Diff line number Diff line
@@ -255,7 +255,10 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,

	hyp_spin_lock(&vm_table_lock);
	hyp_vm = get_vm_by_handle(handle);
	if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx)
	if (!hyp_vm || hyp_vm->kvm.arch.pkvm.is_dying)
		goto unlock;

	if (hyp_vm->kvm.created_vcpus <= vcpu_idx)
		goto unlock;

	hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
@@ -301,8 +304,14 @@ struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)

	hyp_spin_lock(&vm_table_lock);
	hyp_vm = get_vm_by_handle(handle);
	if (hyp_vm)
	if (!hyp_vm)
		goto unlock;

	if (hyp_vm->kvm.arch.pkvm.is_dying)
		hyp_vm = NULL;
	else
		hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
unlock:
	hyp_spin_unlock(&vm_table_lock);

	return hyp_vm;
@@ -859,7 +868,32 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
	unmap_donated_memory_noclear(addr, size);
}

int __pkvm_teardown_vm(pkvm_handle_t handle)
int __pkvm_start_teardown_vm(pkvm_handle_t handle)
{
	struct pkvm_hyp_vm *hyp_vm;
	int ret = 0;

	hyp_spin_lock(&vm_table_lock);
	hyp_vm = get_vm_by_handle(handle);
	if (!hyp_vm) {
		ret = -ENOENT;
		goto unlock;
	} else if (WARN_ON(hyp_page_count(hyp_vm))) {
		ret = -EBUSY;
		goto unlock;
	} else if (hyp_vm->kvm.arch.pkvm.is_dying) {
		ret = -EINVAL;
		goto unlock;
	}

	hyp_vm->kvm.arch.pkvm.is_dying = true;
unlock:
	hyp_spin_unlock(&vm_table_lock);

	return ret;
}

int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
{
	struct kvm_hyp_memcache *mc, *stage2_mc;
	struct pkvm_hyp_vm *hyp_vm;
@@ -873,9 +907,7 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
	if (!hyp_vm) {
		err = -ENOENT;
		goto err_unlock;
	}

	if (WARN_ON(hyp_page_count(hyp_vm))) {
	} else if (!hyp_vm->kvm.arch.pkvm.is_dying) {
		err = -EBUSY;
		goto err_unlock;
	}
Loading