Commit b3adabae authored by David Woodhouse's avatar David Woodhouse Committed by Ingo Molnar
Browse files

x86/kexec: Drop page_list argument from relocate_kernel()



The kernel's virtual mapping of the relocate_kernel page currently needs
to be RWX because it is written to before the %cr3 switch.

Now that the relocate_kernel page has its own .data section and local
variables, it can also have *global* variables. So eliminate the separate
page_list argument, and write the same information directly to variables
in the relocate_kernel page instead. This way, the relocate_kernel code
itself doesn't need to copy it.

Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20241205153343.3275139-11-dwmw2@infradead.org
parent 8dbec5c7
Loading
Loading
Loading
Loading
+5 −7
Original line number Diff line number Diff line
@@ -8,12 +8,6 @@
# define PA_PGD			2
# define PA_SWAP_PAGE		3
# define PAGES_NR		4
#else
# define PA_CONTROL_PAGE	0
# define VA_CONTROL_PAGE	1
# define PA_TABLE_PAGE		2
# define PA_SWAP_PAGE		3
# define PAGES_NR		4
#endif

# define KEXEC_CONTROL_PAGE_SIZE	4096
@@ -60,6 +54,10 @@ struct kimage;

/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64

extern unsigned long kexec_va_control_page;
extern unsigned long kexec_pa_table_page;
extern unsigned long kexec_pa_swap_page;
#endif

/*
@@ -122,7 +120,7 @@ relocate_kernel(unsigned long indirection_page,
#else
unsigned long
relocate_kernel(unsigned long indirection_page,
		unsigned long page_list,
		unsigned long pa_control_page,
		unsigned long start_address,
		unsigned int preserve_context,
		unsigned int host_mem_enc_active);
+7 −11
Original line number Diff line number Diff line
@@ -315,6 +315,11 @@ int machine_kexec_prepare(struct kimage *image)
	result = init_pgtable(image, __pa(control_page));
	if (result)
		return result;
	kexec_va_control_page = (unsigned long)control_page;
	kexec_pa_table_page = (unsigned long)__pa(image->arch.pgd);

	if (image->type == KEXEC_TYPE_DEFAULT)
		kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT;

	__memcpy(control_page, __relocate_kernel_start, reloc_end - reloc_start);

@@ -339,12 +344,11 @@ void machine_kexec_cleanup(struct kimage *image)
void machine_kexec(struct kimage *image)
{
	unsigned long (*relocate_kernel_ptr)(unsigned long indirection_page,
					     unsigned long page_list,
					     unsigned long pa_control_page,
					     unsigned long start_address,
					     unsigned int preserve_context,
					     unsigned int host_mem_enc_active);
	unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
	unsigned long page_list[PAGES_NR];
	unsigned int host_mem_enc_active;
	int save_ftrace_enabled;
	void *control_page;
@@ -382,14 +386,6 @@ void machine_kexec(struct kimage *image)

	control_page = page_address(image->control_code_page);

	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
	page_list[PA_TABLE_PAGE] = (unsigned long)__pa(image->arch.pgd);

	if (image->type == KEXEC_TYPE_DEFAULT)
		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
						<< PAGE_SHIFT);

	/*
	 * Allow for the possibility that relocate_kernel might not be at
	 * the very start of the page.
@@ -417,7 +413,7 @@ void machine_kexec(struct kimage *image)

	/* now call it */
	image->start = relocate_kernel_ptr((unsigned long)image->head,
					   (unsigned long)page_list,
					   virt_to_phys(control_page),
					   image->start,
					   image->preserve_context,
					   host_mem_enc_active);
+12 −24
Original line number Diff line number Diff line
@@ -34,9 +34,9 @@ SYM_DATA_LOCAL(saved_cr0, .quad 0)
SYM_DATA_LOCAL(saved_cr3, .quad 0)
SYM_DATA_LOCAL(saved_cr4, .quad 0)
	/* other data */
SYM_DATA_LOCAL(va_control_page, .quad 0)
SYM_DATA_LOCAL(pa_table_page, .quad 0)
SYM_DATA_LOCAL(pa_swap_page, .quad 0)
SYM_DATA(kexec_va_control_page, .quad 0)
SYM_DATA(kexec_pa_table_page, .quad 0)
SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)

	.section .text.relocate_kernel,"ax";
@@ -46,7 +46,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
	ANNOTATE_NOENDBR
	/*
	 * %rdi indirection_page
	 * %rsi page_list
	 * %rsi pa_control_page
	 * %rdx start address
	 * %rcx preserve_context
	 * %r8  host_mem_enc_active
@@ -79,31 +79,19 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
	/* Save SME active flag */
	movq	%r8, %r12

	/*
	 * get physical and virtual address of control page now
	 * this is impossible after page table switch
	 */
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11

	/* get physical address of page table now too */
	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9

	/* get physical address of swap page now */
	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10

	/* save some information for jumping back */
	movq	%r9, pa_table_page(%rip)
	movq	%r10, pa_swap_page(%rip)
	/* save indirection list for jumping back */
	movq	%rdi, pa_backup_pages_map(%rip)
	movq	%r11, va_control_page(%rip)

	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
	movq	%rcx, %r11

	/* Switch to the identity mapped page tables */
	movq	kexec_pa_table_page(%rip), %r9
	movq	%r9, %cr3

	/* Physical address of control page */
	movq    %rsi, %r8

	/* setup a new stack at the end of the physical control page */
	lea	PAGE_SIZE(%r8), %rsp

@@ -227,13 +215,13 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
	/* get the re-entry point of the peer system */
	movq	0(%rsp), %rbp
	leaq	relocate_kernel(%rip), %r8
	movq	pa_swap_page(%rip), %r10
	movq	kexec_pa_swap_page(%rip), %r10
	movq	pa_backup_pages_map(%rip), %rdi
	movq	pa_table_page(%rip), %rax
	movq	kexec_pa_table_page(%rip), %rax
	movq	%rax, %cr3
	lea	PAGE_SIZE(%r8), %rsp
	call	swap_pages
	movq	va_control_page(%rip), %rax
	movq	kexec_va_control_page(%rip), %rax
	addq	$(virtual_mapped - relocate_kernel), %rax
	pushq	%rax
	ANNOTATE_UNRET_SAFE