Commit 621191d7 authored by Nuno Das Neves's avatar Nuno Das Neves Committed by Wei Liu
Browse files

Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs



Provide a set of IOCTLs for creating and managing child partitions when
running as root partition on Hyper-V. The new driver is enabled via
CONFIG_MSHV_ROOT.

A brief overview of the interface:

MSHV_CREATE_PARTITION is the entry point, returning a file descriptor
representing a child partition. IOCTLs on this fd can be used to map
memory, create VPs, etc.

Creating a VP returns another file descriptor representing that VP which
in turn has another set of corresponding IOCTLs for running the VP,
getting/setting state, etc.

MSHV_ROOT_HVCALL is a generic "passthrough" hypercall IOCTL which can be
used for a number of partition or VP hypercalls. This is for hypercalls
that do not affect any state in the kernel driver, such as getting and
setting VP registers and partition properties, translating addresses,
etc. It is "passthrough" because the binary input and output for the
hypercall is only interpreted by the VMM - the kernel driver does
nothing but insert the VP and partition id where necessary (which are
always in the same place), and execute the hypercall.

Co-developed-by: default avatarAnirudh Rayabharam <anrayabh@linux.microsoft.com>
Signed-off-by: default avatarAnirudh Rayabharam <anrayabh@linux.microsoft.com>
Co-developed-by: default avatarJinank Jain <jinankjain@microsoft.com>
Signed-off-by: default avatarJinank Jain <jinankjain@microsoft.com>
Co-developed-by: default avatarMukesh Rathor <mrathor@linux.microsoft.com>
Signed-off-by: default avatarMukesh Rathor <mrathor@linux.microsoft.com>
Co-developed-by: default avatarMuminul Islam <muislam@microsoft.com>
Signed-off-by: default avatarMuminul Islam <muislam@microsoft.com>
Co-developed-by: default avatarPraveen K Paladugu <prapal@linux.microsoft.com>
Signed-off-by: default avatarPraveen K Paladugu <prapal@linux.microsoft.com>
Co-developed-by: default avatarStanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Signed-off-by: default avatarStanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Co-developed-by: default avatarWei Liu <wei.liu@kernel.org>
Signed-off-by: default avatarNuno Das Neves <nunodasneves@linux.microsoft.com>
Reviewed-by: default avatarRoman Kisel <romank@linux.microsoft.com>
Link: https://lore.kernel.org/r/1741980536-3865-11-git-send-email-nunodasneves@linux.microsoft.com


Signed-off-by: default avatarWei Liu <wei.liu@kernel.org>
Message-ID: <1741980536-3865-11-git-send-email-nunodasneves@linux.microsoft.com>
parent 0bd921a4
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -370,6 +370,8 @@ Code Seq# Include File Comments
0xB7  all    uapi/linux/remoteproc_cdev.h                            <mailto:linux-remoteproc@vger.kernel.org>
0xB7  all    uapi/linux/nsfs.h                                       <mailto:Andrei Vagin <avagin@openvz.org>>
0xB8  01-02  uapi/misc/mrvl_cn10k_dpi.h                              Marvell CN10K DPI driver
0xB8  all    uapi/linux/mshv.h                                       Microsoft Hyper-V /dev/mshv driver
                                                                     <mailto:linux-hyperv@vger.kernel.org>
0xC0  00-0F  linux/usb/iowarrior.h
0xCA  00-0F  uapi/misc/cxl.h
0xCA  10-2F  uapi/misc/ocxl.h
+1 −0
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ config MSHV_ROOT
	# e.g. When withdrawing memory, the hypervisor gives back 4k pages in
	# no particular order, making it impossible to reassemble larger pages
	depends on PAGE_SIZE_4KB
	select EVENTFD
	default n
	help
	  Select this option to enable support for booting and running as root
+4 −1
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
obj-$(CONFIG_HYPERV)		+= hv_vmbus.o
obj-$(CONFIG_HYPERV_UTILS)	+= hv_utils.o
obj-$(CONFIG_HYPERV_BALLOON)	+= hv_balloon.o
obj-$(CONFIG_MSHV_ROOT)		+= mshv_root.o

CFLAGS_hv_trace.o = -I$(src)
CFLAGS_hv_balloon.o = -I$(src)
@@ -11,7 +12,9 @@ hv_vmbus-y := vmbus_drv.o \
		 channel_mgmt.o ring_buffer.o hv_trace.o
hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
	       mshv_root_hv_call.o mshv_portid_table.o

# Code that must be built-in
obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o
obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o mshv_common.o

drivers/hv/mshv.h

0 → 100644
+30 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 2023, Microsoft Corporation.
 */

#ifndef _MSHV_H_
#define _MSHV_H_

#include <linux/stddef.h>
#include <linux/string.h>
#include <hyperv/hvhdk.h>

#define mshv_field_nonzero(STRUCT, MEMBER) \
	memchr_inv(&((STRUCT).MEMBER), \
		   0, sizeof_field(typeof(STRUCT), MEMBER))

int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
			     union hv_input_vtl input_vtl,
			     struct hv_register_assoc *registers);

int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
			     union hv_input_vtl input_vtl,
			     struct hv_register_assoc *registers);

int hv_call_get_partition_property(u64 partition_id, u64 property_code,
				   u64 *property_value);

int mshv_do_pre_guest_mode_work(ulong th_flags);

#endif /* _MSHV_H */
+161 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2024, Microsoft Corporation.
 *
 * This file contains functions that will be called from one or more modules.
 * If any of these modules are configured to build, this file is built and just
 * statically linked in.
 *
 * Authors: Microsoft Linux virtualization team
 */

#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/mshyperv.h>
#include <linux/resume_user_mode.h>

#include "mshv.h"

#define HV_GET_REGISTER_BATCH_SIZE	\
	(HV_HYP_PAGE_SIZE / sizeof(union hv_register_value))
#define HV_SET_REGISTER_BATCH_SIZE	\
	((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_registers)) \
		/ sizeof(struct hv_register_assoc))

int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
			     union hv_input_vtl input_vtl,
			     struct hv_register_assoc *registers)
{
	struct hv_input_get_vp_registers *input_page;
	union hv_register_value *output_page;
	u16 completed = 0;
	unsigned long remaining = count;
	int rep_count, i;
	u64 status = HV_STATUS_SUCCESS;
	unsigned long flags;

	local_irq_save(flags);

	input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
	output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);

	input_page->partition_id = partition_id;
	input_page->vp_index = vp_index;
	input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
	input_page->rsvd_z8 = 0;
	input_page->rsvd_z16 = 0;

	while (remaining) {
		rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
		for (i = 0; i < rep_count; ++i)
			input_page->names[i] = registers[i].name;

		status = hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS, rep_count,
					     0, input_page, output_page);
		if (!hv_result_success(status))
			break;

		completed = hv_repcomp(status);
		for (i = 0; i < completed; ++i)
			registers[i].value = output_page[i];

		registers += completed;
		remaining -= completed;
	}
	local_irq_restore(flags);

	return hv_result_to_errno(status);
}
EXPORT_SYMBOL_GPL(hv_call_get_vp_registers);

int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
			     union hv_input_vtl input_vtl,
			     struct hv_register_assoc *registers)
{
	struct hv_input_set_vp_registers *input_page;
	u16 completed = 0;
	unsigned long remaining = count;
	int rep_count;
	u64 status = HV_STATUS_SUCCESS;
	unsigned long flags;

	local_irq_save(flags);
	input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);

	input_page->partition_id = partition_id;
	input_page->vp_index = vp_index;
	input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
	input_page->rsvd_z8 = 0;
	input_page->rsvd_z16 = 0;

	while (remaining) {
		rep_count = min(remaining, HV_SET_REGISTER_BATCH_SIZE);
		memcpy(input_page->elements, registers,
		       sizeof(struct hv_register_assoc) * rep_count);

		status = hv_do_rep_hypercall(HVCALL_SET_VP_REGISTERS, rep_count,
					     0, input_page, NULL);
		if (!hv_result_success(status))
			break;

		completed = hv_repcomp(status);
		registers += completed;
		remaining -= completed;
	}

	local_irq_restore(flags);

	return hv_result_to_errno(status);
}
EXPORT_SYMBOL_GPL(hv_call_set_vp_registers);

int hv_call_get_partition_property(u64 partition_id,
				   u64 property_code,
				   u64 *property_value)
{
	u64 status;
	unsigned long flags;
	struct hv_input_get_partition_property *input;
	struct hv_output_get_partition_property *output;

	local_irq_save(flags);
	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
	output = *this_cpu_ptr(hyperv_pcpu_output_arg);
	memset(input, 0, sizeof(*input));
	input->partition_id = partition_id;
	input->property_code = property_code;
	status = hv_do_hypercall(HVCALL_GET_PARTITION_PROPERTY, input, output);

	if (!hv_result_success(status)) {
		local_irq_restore(flags);
		return hv_result_to_errno(status);
	}
	*property_value = output->property_value;

	local_irq_restore(flags);

	return 0;
}
EXPORT_SYMBOL_GPL(hv_call_get_partition_property);

/*
 * Handle any pre-processing before going into the guest mode on this cpu, most
 * notably call schedule(). Must be invoked with both preemption and
 * interrupts enabled.
 *
 * Returns: 0 on success, -errno on error.
 */
int mshv_do_pre_guest_mode_work(ulong th_flags)
{
	if (th_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
		return -EINTR;

	if (th_flags & _TIF_NEED_RESCHED)
		schedule();

	if (th_flags & _TIF_NOTIFY_RESUME)
		resume_user_mode_work(NULL);

	return 0;
}
EXPORT_SYMBOL_GPL(mshv_do_pre_guest_mode_work);
Loading