| /* SPDX-License-Identifier: GPL-2.0-only */ |
| /* |
| * relocate_kernel.S - put the kernel image in place to boot |
| * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> |
| */ |
| |
| #include <linux/linkage.h> |
| #include <linux/stringify.h> |
| #include <asm/alternative.h> |
| #include <asm/page_types.h> |
| #include <asm/kexec.h> |
| #include <asm/processor-flags.h> |
| #include <asm/pgtable_types.h> |
| #include <asm/nospec-branch.h> |
| #include <asm/unwind_hints.h> |
| #include <asm/asm-offsets.h> |
| |
| /* |
| * Must be relocatable PIC code callable as a C function, in particular |
| * there must be a plain RET and not jump to return thunk. |
| */ |
| |
| #define PTR(x) (x << 3) |
| #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
| |
| /* |
| * The .text..relocate_kernel and .data..relocate_kernel sections are copied |
| * into the control page, and the remainder of the page is used as the stack. |
| */ |
| |
| .section .data..relocate_kernel,"a"; |
| /* Minimal CPU state */ |
| SYM_DATA_LOCAL(saved_rsp, .quad 0) |
| SYM_DATA_LOCAL(saved_cr0, .quad 0) |
| SYM_DATA_LOCAL(saved_cr3, .quad 0) |
| SYM_DATA_LOCAL(saved_cr4, .quad 0) |
| /* other data */ |
| SYM_DATA(kexec_va_control_page, .quad 0) |
| SYM_DATA(kexec_pa_table_page, .quad 0) |
| SYM_DATA(kexec_pa_swap_page, .quad 0) |
| SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) |
| SYM_DATA(kexec_debug_8250_mmio32, .quad 0) |
| SYM_DATA(kexec_debug_8250_port, .word 0) |
| |
| .balign 16 |
| SYM_DATA_START_LOCAL(kexec_debug_gdt) |
| .word kexec_debug_gdt_end - kexec_debug_gdt - 1 |
| .long 0 |
| .word 0 |
| .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ |
| .quad 0x00af9a000000ffff /* __KERNEL_CS */ |
| .quad 0x00cf92000000ffff /* __KERNEL_DS */ |
| SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) |
| |
| .balign 8 |
| SYM_DATA_START(kexec_debug_idt) |
| .skip 0x100, 0x00 |
| SYM_DATA_END(kexec_debug_idt) |
| |
| .section .text..relocate_kernel,"ax"; |
| .code64 |
| SYM_CODE_START_NOALIGN(relocate_kernel) |
| UNWIND_HINT_END_OF_STACK |
| ANNOTATE_NOENDBR |
| /* |
| * %rdi indirection_page |
| * %rsi pa_control_page |
| * %rdx start address |
| * %rcx preserve_context |
| * %r8 host_mem_enc_active |
| */ |
| |
| /* Save the CPU context, used for jumping back */ |
| pushq %rbx |
| pushq %rbp |
| pushq %r12 |
| pushq %r13 |
| pushq %r14 |
| pushq %r15 |
| pushf |
| |
| /* Invalidate GDT/IDT, zero out flags */ |
| pushq $0 |
| pushq $0 |
| |
| lidt (%rsp) |
| lgdt (%rsp) |
| addq $8, %rsp |
| popfq |
| |
| /* Switch to the identity mapped page tables */ |
| movq %cr3, %rax |
| movq kexec_pa_table_page(%rip), %r9 |
| movq %r9, %cr3 |
| |
| /* Leave CR4 in %r13 to enable the right paging mode later. */ |
| movq %cr4, %r13 |
| |
| /* Disable global pages immediately to ensure this mapping is RWX */ |
| movq %r13, %r12 |
| andq $~(X86_CR4_PGE), %r12 |
| movq %r12, %cr4 |
| |
| /* Save %rsp and CRs. */ |
| movq %r13, saved_cr4(%rip) |
| movq %rsp, saved_rsp(%rip) |
| movq %rax, saved_cr3(%rip) |
| movq %cr0, %rax |
| movq %rax, saved_cr0(%rip) |
| |
| /* save indirection list for jumping back */ |
| movq %rdi, pa_backup_pages_map(%rip) |
| |
| /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ |
| movq %rcx, %r11 |
| |
| /* setup a new stack at the end of the physical control page */ |
| lea PAGE_SIZE(%rsi), %rsp |
| |
| /* jump to identity mapped page */ |
| 0: addq $identity_mapped - 0b, %rsi |
| subq $__relocate_kernel_start - 0b, %rsi |
| ANNOTATE_RETPOLINE_SAFE |
| jmp *%rsi |
| SYM_CODE_END(relocate_kernel) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) |
| UNWIND_HINT_END_OF_STACK |
| /* |
| * %rdi indirection page |
| * %rdx start address |
| * %r8 host_mem_enc_active |
| * %r9 page table page |
| * %r11 preserve_context |
| * %r13 original CR4 when relocate_kernel() was invoked |
| */ |
| |
| /* store the start address on the stack */ |
| pushq %rdx |
| |
| /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ |
| leaq kexec_debug_gdt(%rip), %rax |
| pushq %rax |
| pushw (%rax) |
| |
| /* Load the GDT, put the stack back */ |
| lgdt (%rsp) |
| addq $10, %rsp |
| |
| /* Test that we can load segments */ |
| movq %ds, %rax |
| movq %rax, %ds |
| |
| /* Now an IDTR on the stack to load the IDT the kernel created */ |
| leaq kexec_debug_idt(%rip), %rsi |
| pushq %rsi |
| pushw $0xff |
| lidt (%rsp) |
| addq $10, %rsp |
| |
| //int3 |
| |
| /* |
| * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP |
| * below. |
| */ |
| movq %cr4, %rax |
| andq $~(X86_CR4_CET), %rax |
| movq %rax, %cr4 |
| |
| /* |
| * Set cr0 to a known state: |
| * - Paging enabled |
| * - Alignment check disabled |
| * - Write protect disabled |
| * - No task switch |
| * - Don't do FP software emulation. |
| * - Protected mode enabled |
| */ |
| movq %cr0, %rax |
| andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax |
| orl $(X86_CR0_PG | X86_CR0_PE), %eax |
| movq %rax, %cr0 |
| |
| /* |
| * Set cr4 to a known state: |
| * - physical address extension enabled |
| * - 5-level paging, if it was enabled before |
| * - Machine check exception on TDX guest, if it was enabled before. |
| * Clearing MCE might not be allowed in TDX guests, depending on setup. |
| * |
| * Use R13 that contains the original CR4 value, read in relocate_kernel(). |
| * PAE is always set in the original CR4. |
| */ |
| andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d |
| ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST |
| movq %r13, %cr4 |
| |
| /* Flush the TLB (needed?) */ |
| movq %r9, %cr3 |
| |
| /* |
| * If SME is active, there could be old encrypted cache line |
| * entries that will conflict with the now unencrypted memory |
| * used by kexec. Flush the caches before copying the kernel. |
| */ |
| testq %r8, %r8 |
| jz .Lsme_off |
| wbinvd |
| .Lsme_off: |
| |
| call swap_pages |
| |
| /* |
| * To be certain of avoiding problems with self-modifying code |
| * I need to execute a serializing instruction here. |
| * So I flush the TLB by reloading %cr3 here, it's handy, |
| * and not processor dependent. |
| */ |
| movq %cr3, %rax |
| movq %rax, %cr3 |
| |
| testq %r11, %r11 /* preserve_context */ |
| jnz .Lrelocate |
| |
| /* |
| * set all of the registers to known values |
| * leave %rsp alone |
| */ |
| |
| xorl %eax, %eax |
| xorl %ebx, %ebx |
| xorl %ecx, %ecx |
| xorl %edx, %edx |
| xorl %esi, %esi |
| xorl %edi, %edi |
| xorl %ebp, %ebp |
| xorl %r8d, %r8d |
| xorl %r9d, %r9d |
| xorl %r10d, %r10d |
| xorl %r11d, %r11d |
| xorl %r12d, %r12d |
| xorl %r13d, %r13d |
| xorl %r14d, %r14d |
| xorl %r15d, %r15d |
| |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| |
| .Lrelocate: |
| popq %rdx |
| |
| /* Use the swap page for the callee's stack */ |
| movq kexec_pa_swap_page(%rip), %r10 |
| leaq PAGE_SIZE(%r10), %rsp |
| |
| /* push the existing entry point onto the callee's stack */ |
| pushq %rdx |
| |
| ANNOTATE_RETPOLINE_SAFE |
| call *%rdx |
| |
| /* get the re-entry point of the peer system */ |
| popq %rbp |
| movq kexec_pa_swap_page(%rip), %r10 |
| movq pa_backup_pages_map(%rip), %rdi |
| movq kexec_pa_table_page(%rip), %rax |
| movq %rax, %cr3 |
| |
| /* Find start (and end) of this physical mapping of control page */ |
| leaq (%rip), %r8 |
| ANNOTATE_NOENDBR |
| andq $PAGE_MASK, %r8 |
| lea PAGE_SIZE(%r8), %rsp |
| movl $1, %r11d /* Ensure preserve_context flag is set */ |
| call swap_pages |
| movq kexec_va_control_page(%rip), %rax |
| 0: addq $virtual_mapped - 0b, %rax |
| subq $__relocate_kernel_start - 0b, %rax |
| pushq %rax |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(identity_mapped) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) |
| UNWIND_HINT_END_OF_STACK |
| ANNOTATE_NOENDBR // RET target, above |
| movq saved_rsp(%rip), %rsp |
| movq saved_cr4(%rip), %rax |
| movq %rax, %cr4 |
| movq saved_cr3(%rip), %rax |
| movq saved_cr0(%rip), %r8 |
| movq %rax, %cr3 |
| movq %r8, %cr0 |
| |
| #ifdef CONFIG_KEXEC_JUMP |
| /* Saved in save_processor_state. */ |
| movq $saved_context, %rax |
| lgdt saved_context_gdt_desc(%rax) |
| #endif |
| |
| /* relocate_kernel() returns the re-entry point for next time */ |
| movq %rbp, %rax |
| |
| popf |
| popq %r15 |
| popq %r14 |
| popq %r13 |
| popq %r12 |
| popq %rbp |
| popq %rbx |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(virtual_mapped) |
| |
| /* Do the copies */ |
| SYM_CODE_START_LOCAL_NOALIGN(swap_pages) |
| UNWIND_HINT_END_OF_STACK |
| /* |
| * %rdi indirection page |
| * %r11 preserve_context |
| */ |
| movq %rdi, %rcx /* Put the indirection_page in %rcx */ |
| xorl %edi, %edi |
| xorl %esi, %esi |
| jmp .Lstart /* Should start with an indirection record */ |
| |
| .Lloop: /* top, read another word for the indirection page */ |
| |
| movq (%rbx), %rcx |
| addq $8, %rbx |
| .Lstart: |
| testb $0x1, %cl /* is it a destination page? */ |
| jz .Lnotdest |
| movq %rcx, %rdi |
| andq $0xfffffffffffff000, %rdi |
| jmp .Lloop |
| .Lnotdest: |
| testb $0x2, %cl /* is it an indirection page? */ |
| jz .Lnotind |
| movq %rcx, %rbx |
| andq $0xfffffffffffff000, %rbx |
| jmp .Lloop |
| .Lnotind: |
| testb $0x4, %cl /* is it the done indicator? */ |
| jz .Lnotdone |
| jmp .Ldone |
| .Lnotdone: |
| testb $0x8, %cl /* is it the source indicator? */ |
| jz .Lloop /* Ignore it otherwise */ |
| movq %rcx, %rsi /* For ever source page do a copy */ |
| andq $0xfffffffffffff000, %rsi |
| |
| movq %rdi, %rdx /* Save destination page to %rdx */ |
| movq %rsi, %rax /* Save source page to %rax */ |
| |
| testq %r11, %r11 /* Only actually swap for ::preserve_context */ |
| jz .Lnoswap |
| |
| /* copy source page to swap page */ |
| movq kexec_pa_swap_page(%rip), %rdi |
| movl $512, %ecx |
| rep movsq |
| |
| /* copy destination page to source page */ |
| movq %rax, %rdi |
| movq %rdx, %rsi |
| movl $512, %ecx |
| rep movsq |
| |
| /* copy swap page to destination page */ |
| movq %rdx, %rdi |
| movq kexec_pa_swap_page(%rip), %rsi |
| .Lnoswap: |
| movl $512, %ecx |
| rep movsq |
| |
| lea PAGE_SIZE(%rax), %rsi |
| jmp .Lloop |
| .Ldone: |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(swap_pages) |
| |
| /* |
| * Generic 'print character' routine |
| * - %al: Character to be printed (may clobber %rax) |
| * - %rdx: MMIO address or port. |
| */ |
| #define XMTRDY 0x20 |
| |
| #define TXR 0 /* Transmit register (WRITE) */ |
| #define LSR 5 /* Line Status */ |
| |
| SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250) |
| UNWIND_HINT_FUNC |
| ANNOTATE_NOENDBR |
| addw $LSR, %dx |
| xchg %al, %ah |
| .Lxmtrdy_loop: |
| inb %dx, %al |
| testb $XMTRDY, %al |
| jnz .Lready |
| pause |
| jmp .Lxmtrdy_loop |
| |
| .Lready: |
| subw $LSR, %dx |
| xchg %al, %ah |
| outb %al, %dx |
| pr_char_null: |
| ANNOTATE_NOENDBR |
| |
| ANNOTATE_UNRET_SAFE |
| ret |
| SYM_CODE_END(pr_char_8250) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32) |
| UNWIND_HINT_FUNC |
| ANNOTATE_NOENDBR |
| .Lxmtrdy_loop_mmio: |
| movb (LSR*4)(%rdx), %ah |
| testb $XMTRDY, %ah |
| jnz .Lready_mmio |
| pause |
| jmp .Lxmtrdy_loop_mmio |
| |
| .Lready_mmio: |
| movb %al, (%rdx) |
| ANNOTATE_UNRET_SAFE |
| ret |
| SYM_CODE_END(pr_char_8250_mmio32) |
| |
| /* |
| * Load pr_char function pointer into %rsi and load %rdx with whatever |
| * that function wants to see there (typically port/MMIO address). |
| */ |
| .macro pr_setup |
| leaq pr_char_8250(%rip), %rsi |
| movw kexec_debug_8250_port(%rip), %dx |
| testw %dx, %dx |
| jnz 1f |
| |
| leaq pr_char_8250_mmio32(%rip), %rsi |
| movq kexec_debug_8250_mmio32(%rip), %rdx |
| testq %rdx, %rdx |
| jnz 1f |
| |
| leaq pr_char_null(%rip), %rsi |
| 1: |
| .endm |
| |
| /* Print the nybble in %bl, clobber %rax */ |
| SYM_CODE_START_LOCAL_NOALIGN(pr_nybble) |
| UNWIND_HINT_FUNC |
| movb %bl, %al |
| nop |
| andb $0x0f, %al |
| addb $0x30, %al |
| cmpb $0x3a, %al |
| jb 1f |
| addb $('a' - '0' - 10), %al |
| ANNOTATE_RETPOLINE_SAFE |
| 1: jmp *%rsi |
| SYM_CODE_END(pr_nybble) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(pr_qword) |
| UNWIND_HINT_FUNC |
| movq $16, %rcx |
| 1: rolq $4, %rbx |
| call pr_nybble |
| loop 1b |
| movb $'\n', %al |
| ANNOTATE_RETPOLINE_SAFE |
| jmp *%rsi |
| SYM_CODE_END(pr_qword) |
| |
| .macro print_reg a, b, c, d, r |
| movb $\a, %al |
| ANNOTATE_RETPOLINE_SAFE |
| call *%rsi |
| movb $\b, %al |
| ANNOTATE_RETPOLINE_SAFE |
| call *%rsi |
| movb $\c, %al |
| ANNOTATE_RETPOLINE_SAFE |
| call *%rsi |
| movb $\d, %al |
| ANNOTATE_RETPOLINE_SAFE |
| call *%rsi |
| movq \r, %rbx |
| call pr_qword |
| .endm |
| |
| SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors) |
| /* Each of these is 6 bytes. */ |
| .macro vec_err exc |
| UNWIND_HINT_ENTRY |
| . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) |
| nop |
| nop |
| pushq $\exc |
| jmp exc_handler |
| .endm |
| |
| .macro vec_noerr exc |
| UNWIND_HINT_ENTRY |
| . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) |
| pushq $0 |
| pushq $\exc |
| jmp exc_handler |
| .endm |
| |
| ANNOTATE_NOENDBR |
| vec_noerr 0 // #DE |
| vec_noerr 1 // #DB |
| vec_noerr 2 // #NMI |
| vec_noerr 3 // #BP |
| vec_noerr 4 // #OF |
| vec_noerr 5 // #BR |
| vec_noerr 6 // #UD |
| vec_noerr 7 // #NM |
| vec_err 8 // #DF |
| vec_noerr 9 |
| vec_err 10 // #TS |
| vec_err 11 // #NP |
| vec_err 12 // #SS |
| vec_err 13 // #GP |
| vec_err 14 // #PF |
| vec_noerr 15 |
| SYM_CODE_END(kexec_debug_exc_vectors) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(exc_handler) |
| /* No need for RET mitigations during kexec */ |
| VALIDATE_UNRET_END |
| |
| pushq %rax |
| pushq %rbx |
| pushq %rcx |
| pushq %rdx |
| pushq %rsi |
| |
| /* Stack frame */ |
| #define EXC_SS 0x58 /* Architectural... */ |
| #define EXC_RSP 0x50 |
| #define EXC_EFLAGS 0x48 |
| #define EXC_CS 0x40 |
| #define EXC_RIP 0x38 |
| #define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */ |
| #define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */ |
| #define EXC_RAX 0x20 /* Pushed just above in exc_handler */ |
| #define EXC_RBX 0x18 |
| #define EXC_RCX 0x10 |
| #define EXC_RDX 0x08 |
| #define EXC_RSI 0x00 |
| |
| /* Set up %rdx/%rsi for debug output */ |
| pr_setup |
| |
| /* rip and exception info */ |
| print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp) |
| print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp) |
| print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp) |
| print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp) |
| |
| /* We spilled these to the stack */ |
| print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp) |
| print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp) |
| print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp) |
| print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp) |
| print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp) |
| |
| /* Other registers untouched */ |
| print_reg 'r', 'd', 'i', ':', %rdi |
| print_reg 'r', '8', ' ', ':', %r8 |
| print_reg 'r', '9', ' ', ':', %r9 |
| print_reg 'r', '1', '0', ':', %r10 |
| print_reg 'r', '1', '1', ':', %r11 |
| print_reg 'r', '1', '2', ':', %r12 |
| print_reg 'r', '1', '3', ':', %r13 |
| print_reg 'r', '1', '4', ':', %r14 |
| print_reg 'r', '1', '5', ':', %r15 |
| print_reg 'c', 'r', '2', ':', %cr2 |
| |
| /* Only return from INT3 */ |
| cmpq $3, EXC_EXCEPTION(%rsp) |
| jne .Ldie |
| |
| popq %rsi |
| popq %rdx |
| popq %rcx |
| popq %rbx |
| popq %rax |
| |
| addq $16, %rsp |
| iretq |
| |
| .Ldie: |
| hlt |
| jmp .Ldie |
| |
| SYM_CODE_END(exc_handler) |