Commit 8dbec5c7 authored by David Woodhouse's avatar David Woodhouse Committed by Ingo Molnar
Browse files

x86/kexec: Add data section to relocate_kernel



Now that the relocate_kernel page is handled sanely by a linker script
we can have actual data, and just use %rip-relative addressing to access
it.

Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20241205153343.3275139-10-dwmw2@infradead.org
parent cb33ff9e
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -343,6 +343,7 @@ void machine_kexec(struct kimage *image)
					     unsigned long start_address,
					     unsigned int preserve_context,
					     unsigned int host_mem_enc_active);
	unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
	unsigned long page_list[PAGES_NR];
	unsigned int host_mem_enc_active;
	int save_ftrace_enabled;
@@ -389,7 +390,12 @@ void machine_kexec(struct kimage *image)
		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
						<< PAGE_SHIFT);

	relocate_kernel_ptr = control_page;
	/*
	 * Allow for the possibility that relocate_kernel might not be at
	 * the very start of the page.
	 */
	relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel -
		reloc_start;

	/*
	 * The segment registers are funny things, they have both a
+30 −32
Original line number Diff line number Diff line
@@ -23,23 +23,21 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

/*
 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
 * ~ control_page + PAGE_SIZE are used as data storage and stack for
 * jumping back
 * The .text.relocate_kernel and .data.relocate_kernel sections are copied
 * into the control page, and the remainder of the page is used as the stack.
 */
#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))

	.section .data.relocate_kernel,"a";
/* Minimal CPU state */
#define RSP			DATA(0x0)
#define CR0			DATA(0x8)
#define CR3			DATA(0x10)
#define CR4			DATA(0x18)

SYM_DATA_LOCAL(saved_rsp, .quad 0)
SYM_DATA_LOCAL(saved_cr0, .quad 0)
SYM_DATA_LOCAL(saved_cr3, .quad 0)
SYM_DATA_LOCAL(saved_cr4, .quad 0)
	/* other data */
#define CP_PA_TABLE_PAGE	DATA(0x20)
#define CP_PA_SWAP_PAGE		DATA(0x28)
#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
#define CP_VA_CONTROL_PAGE	DATA(0x38)
SYM_DATA_LOCAL(va_control_page, .quad 0)
SYM_DATA_LOCAL(pa_table_page, .quad 0)
SYM_DATA_LOCAL(pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)

	.section .text.relocate_kernel,"ax";
	.code64
@@ -63,14 +61,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
	pushq %r15
	pushf

	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
	movq	%rsp, RSP(%r11)
	movq	%rsp, saved_rsp(%rip)
	movq	%cr0, %rax
	movq	%rax, CR0(%r11)
	movq	%rax, saved_cr0(%rip)
	movq	%cr3, %rax
	movq	%rax, CR3(%r11)
	movq	%rax, saved_cr3(%rip)
	movq	%cr4, %rax
	movq	%rax, CR4(%r11)
	movq	%rax, saved_cr4(%rip)

	/* Save CR4. Required to enable the right paging mode later. */
	movq	%rax, %r13
@@ -83,10 +80,11 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
	movq	%r8, %r12

	/*
	 * get physical address of control page now
	 * get physical and virtual address of control page now
	 * this is impossible after page table switch
	 */
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11

	/* get physical address of page table now too */
	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
@@ -95,10 +93,10 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10

	/* save some information for jumping back */
	movq	%r9, CP_PA_TABLE_PAGE(%r11)
	movq	%r10, CP_PA_SWAP_PAGE(%r11)
	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
	movq	%r11, CP_VA_CONTROL_PAGE(%r11)
	movq	%r9, pa_table_page(%rip)
	movq	%r10, pa_swap_page(%rip)
	movq	%rdi, pa_backup_pages_map(%rip)
	movq	%r11, va_control_page(%rip)

	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
	movq	%rcx, %r11
@@ -229,13 +227,13 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
	/* get the re-entry point of the peer system */
	movq	0(%rsp), %rbp
	leaq	relocate_kernel(%rip), %r8
	movq	CP_PA_SWAP_PAGE(%r8), %r10
	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
	movq	CP_PA_TABLE_PAGE(%r8), %rax
	movq	pa_swap_page(%rip), %r10
	movq	pa_backup_pages_map(%rip), %rdi
	movq	pa_table_page(%rip), %rax
	movq	%rax, %cr3
	lea	PAGE_SIZE(%r8), %rsp
	call	swap_pages
	movq	CP_VA_CONTROL_PAGE(%r8), %rax
	movq	va_control_page(%rip), %rax
	addq	$(virtual_mapped - relocate_kernel), %rax
	pushq	%rax
	ANNOTATE_UNRET_SAFE
@@ -246,11 +244,11 @@ SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
	UNWIND_HINT_END_OF_STACK
	ANNOTATE_NOENDBR // RET target, above
	movq	RSP(%r8), %rsp
	movq	CR4(%r8), %rax
	movq	saved_rsp(%rip), %rsp
	movq	saved_cr4(%rip), %rax
	movq	%rax, %cr4
	movq	CR3(%r8), %rax
	movq	CR0(%r8), %r8
	movq	saved_cr3(%rip), %rax
	movq	saved_cr0(%rip), %r8
	movq	%rax, %cr3
	movq	%r8, %cr0

+1 −0
Original line number Diff line number Diff line
@@ -101,6 +101,7 @@ const_pcpu_hot = pcpu_hot;
	. = ALIGN(0x100);					\
	__relocate_kernel_start = .;				\
	*(.text.relocate_kernel);				\
	*(.data.relocate_kernel);				\
	__relocate_kernel_end = .;

ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX_SIZE,