/* $OpenBSD: locore.S,v 1.141 2023/10/24 13:20:09 claudio Exp $ */ /* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ /* * Copyright-o-rama! */ /* * Copyright (c) 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Frank van der Linden for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)locore.s 7.3 (Berkeley) 5/13/91 */ #include "assym.h" #include "efi.h" #include "lapic.h" #include "ksyms.h" #include "xen.h" #include "hyperv.h" #include #include #include #include #include #include #include /* T_PROTFLT */ #include #if NLAPIC > 0 #include #endif /* * override user-land alignment before including asm.h */ #define ALIGN_DATA .align 8,0xcc #include #define SET_CURPROC(proc,cpu) \ movq CPUVAR(SELF),cpu ; \ movq proc,CPUVAR(CURPROC) ; \ movq cpu,P_CPU(proc) #define GET_CURPCB(reg) movq CPUVAR(CURPCB),reg #define SET_CURPCB(reg) movq reg,CPUVAR(CURPCB) /* * Initialization */ .data #if NLAPIC > 0 .align NBPG, 0xcc .globl local_apic, lapic_id, lapic_tpr local_apic: .space LAPIC_ID lapic_id: .long 0x00000000 .space LAPIC_TPRI-(LAPIC_ID+4) lapic_tpr: .space LAPIC_PPRI-LAPIC_TPRI lapic_ppr: .space LAPIC_ISR-LAPIC_PPRI lapic_isr: .space NBPG-LAPIC_ISR #endif /*****************************************************************************/ /* * Signal trampoline; copied to a page mapped into userspace. * gdb's backtrace logic matches against the instructions in this. */ .section .rodata .globl sigcode sigcode: endbr64 call 1f movq %rsp,%rdi pushq %rdi /* fake return address */ movq $SYS_sigreturn,%rax syscall .globl sigcoderet sigcoderet: movq $SYS_exit,%rax syscall _ALIGN_TRAPS 1: JMP_RETPOLINE(rax) .globl esigcode esigcode: .globl sigfill sigfill: int3 esigfill: .globl sigfillsiz sigfillsiz: .long esigfill - sigfill .text /* * void lgdt(struct region_descriptor *rdp); * Change the global descriptor table. */ NENTRY(lgdt) RETGUARD_SETUP(lgdt, r11) /* Reload the descriptor table. */ movq %rdi,%rax lgdt (%rax) /* Flush the prefetch q. */ jmp 1f nop 1: /* Reload "stale" selectors. */ movl $GSEL(GDATA_SEL, SEL_KPL),%eax movl %eax,%ds movl %eax,%es movl %eax,%ss /* Reload code selector by doing intersegment return. */ popq %rax pushq $GSEL(GCODE_SEL, SEL_KPL) pushq %rax RETGUARD_CHECK(lgdt, r11) lretq END(lgdt) #if defined(DDB) || NEFI > 0 ENTRY(setjmp) RETGUARD_SETUP(setjmp, r11) /* * Only save registers that must be preserved across function * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) * and %rip. */ movq %rdi,%rax movq %rbx,(%rax) movq %rsp,8(%rax) movq %rbp,16(%rax) movq %r12,24(%rax) movq %r13,32(%rax) movq %r14,40(%rax) movq %r15,48(%rax) movq (%rsp),%rdx movq %rdx,56(%rax) xorl %eax,%eax RETGUARD_CHECK(setjmp, r11) ret lfence END(setjmp) ENTRY(longjmp) movq %rdi,%rax movq 8(%rax),%rsp movq 56(%rax),%rdx movq %rdx,(%rsp) RETGUARD_SETUP(longjmp, r11) movq (%rax),%rbx movq 16(%rax),%rbp movq 24(%rax),%r12 movq 32(%rax),%r13 movq 40(%rax),%r14 movq 48(%rax),%r15 xorl %eax,%eax incl %eax RETGUARD_CHECK(longjmp, r11) ret lfence END(longjmp) #endif /* DDB || NEFI > 0 */ /*****************************************************************************/ /* * int cpu_switchto(struct proc *old, struct proc *new) * Switch from "old" proc to "new". */ ENTRY(cpu_switchto) pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rdi, %r13 movq %rsi, %r12 /* Record new proc. */ movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC SET_CURPROC(%r12,%rcx) movl CPUVAR(CPUID),%r9d /* for the FPU/"extended CPU state" handling below */ movq xsave_mask(%rip),%rdx movl %edx,%eax shrq $32,%rdx /* If old proc exited, don't bother. */ xorl %ecx,%ecx testq %r13,%r13 jz switch_exited /* * Save old context. * * Registers: * %rax - scratch * %r13 - old proc, then old pcb * %rcx - old pmap if not P_SYSTEM * %r12 - new proc * %r9d - cpuid */ /* remember the pmap if not P_SYSTEM */ testl $P_SYSTEM,P_FLAG(%r13) movq P_ADDR(%r13),%r13 jnz 0f movq PCB_PMAP(%r13),%rcx 0: /* Save stack pointers. */ movq %rsp,PCB_RSP(%r13) movq %rbp,PCB_RBP(%r13) /* * If the old proc ran in userspace then save the * floating-point/"extended state" registers */ testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) jz .Lxstate_reset movq %r13, %rdi #if PCB_SAVEFPU != 0 addq $PCB_SAVEFPU,%rdi #endif CODEPATCH_START fxsave64 (%rdi) CODEPATCH_END(CPTAG_XSAVE) switch_exited: /* now clear the xstate */ movq proc0paddr(%rip),%rdi #if PCB_SAVEFPU != 0 addq $PCB_SAVEFPU,%rdi #endif CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTORS) andl $~CPUPF_USERXSTATE,CPUVAR(PFLAGS) .Lxstate_reset: /* * If the segment registers haven't been reset since the old proc * ran in userspace then reset them now */ testl $CPUPF_USERSEGS,CPUVAR(PFLAGS) jz restore_saved andl $~CPUPF_USERSEGS,CPUVAR(PFLAGS) /* set %ds, %es, %fs, and %gs to expected value to prevent info leak */ movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax movw %ax,%ds movw %ax,%es movw %ax,%fs cli /* block interrupts when on user GS.base */ swapgs /* switch from kernel to user GS.base */ movw %ax,%gs /* set %gs to UDATA and GS.base to 0 */ swapgs /* back to kernel GS.base */ restore_saved: /* * Restore saved context. * * Registers: * %rax, %rdx - scratch * %rcx - old pmap if not P_SYSTEM * %r12 - new process * %r13 - new pcb * %rbx - new pmap if not P_SYSTEM */ movq P_ADDR(%r12),%r13 /* remember the pmap if not P_SYSTEM */ xorl %ebx,%ebx testl $P_SYSTEM,P_FLAG(%r12) jnz 1f movq PCB_PMAP(%r13),%rbx 1: /* No interrupts while loading new state. */ cli /* Restore stack pointers. */ movq PCB_RSP(%r13),%rsp movq PCB_RBP(%r13),%rbp /* Stack pivot done, setup RETGUARD */ RETGUARD_SETUP_OFF(cpu_switchto, r11, 6*8) /* don't switch cr3 to the same thing it already was */ movq PCB_CR3(%r13),%rax movq %cr3,%rdi xorq %rax,%rdi btrq $63,%rdi /* ignore CR3_REUSE_PCID */ testq %rdi,%rdi jz .Lsame_cr3 #ifdef DIAGNOSTIC /* verify ci_proc_pmap had been updated properly */ cmpq %rcx,CPUVAR(PROC_PMAP) jnz .Lbogus_proc_pmap #endif /* record which pmap this CPU should get IPIs for */ movq %rbx,CPUVAR(PROC_PMAP) .Lset_cr3: movq %rax,%cr3 /* %rax used below too */ .Lsame_cr3: /* * If we switched from a userland thread with a shallow call stack * (e.g interrupt->ast->mi_ast->prempt->mi_switch->cpu_switchto) * then the RSB may have attacker controlled entries when we switch * to a deeper call stack in the new thread. Refill the RSB with * entries safe to speculate into/through. */ RET_STACK_REFILL_WITH_RCX /* Don't bother with the rest if switching to a system process. */ testq %rbx,%rbx jz switch_restored /* record the bits needed for future U-->K transition */ movq PCB_KSTACK(%r13),%rdx subq $FRAMESIZE,%rdx movq %rdx,CPUVAR(KERN_RSP) CODEPATCH_START /* * Meltdown: iff we're doing separate U+K and U-K page tables, * then record them in cpu_info for easy access in syscall and * interrupt trampolines. */ movq PM_PDIRPA_INTEL(%rbx),%rdx orq cr3_reuse_pcid,%rax orq cr3_pcid_proc_intel,%rdx movq %rax,CPUVAR(KERN_CR3) movq %rdx,CPUVAR(USER_CR3) CODEPATCH_END(CPTAG_MELTDOWN_NOP) switch_restored: SET_CURPCB(%r13) /* Interrupts are okay again. */ sti popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx RETGUARD_CHECK(cpu_switchto, r11) ret lfence #ifdef DIAGNOSTIC .Lbogus_proc_pmap: leaq bogus_proc_pmap,%rdi call panic int3 /* NOTREACHED */ .pushsection .rodata bogus_proc_pmap: .asciz "curcpu->ci_proc_pmap didn't point to previous pmap" .popsection #endif /* DIAGNOSTIC */ END(cpu_switchto) NENTRY(retpoline_rax) CODEPATCH_START JMP_RETPOLINE(rax) CODEPATCH_END(CPTAG_RETPOLINE_RAX) END(retpoline_rax) NENTRY(__x86_indirect_thunk_r11) CODEPATCH_START JMP_RETPOLINE(r11) CODEPATCH_END(CPTAG_RETPOLINE_R11) END(__x86_indirect_thunk_r11) ENTRY(cpu_idle_cycle_hlt) RETGUARD_SETUP(cpu_idle_cycle_hlt, r11) sti hlt RETGUARD_CHECK(cpu_idle_cycle_hlt, r11) ret lfence END(cpu_idle_cycle_hlt) /* * savectx(struct pcb *pcb); * Update pcb, saving current processor state. */ ENTRY(savectx) RETGUARD_SETUP(savectx, r11) /* Save stack pointers. */ movq %rsp,PCB_RSP(%rdi) movq %rbp,PCB_RBP(%rdi) RETGUARD_CHECK(savectx, r11) ret lfence END(savectx) IDTVEC(syscall32) sysret /* go away please */ END(Xsyscall32) /* * syscall insn entry. * Enter here with interrupts blocked; %rcx contains the caller's * %rip and the original rflags has been copied to %r11. %cs and * %ss have been updated to the kernel segments, but %rsp is still * the user-space value. * First order of business is to swap to the kernel GS.base so that * we can access our struct cpu_info. After possibly mucking with * pagetables, we switch to our kernel stack. Once that's in place * we can save the rest of the syscall frame and unblock interrupts. */ KUTEXT_PAGE_START .align NBPG, 0xcc XUsyscall_meltdown: /* * This is the real Xsyscall_meltdown page, which is mapped into * the U-K page tables at the same location as Xsyscall_meltdown * below. For this, the Meltdown case, we use the scratch space * in cpu_info so we can switch to the kernel page tables * (thank you, Intel), at which point we'll continue at the * "SYSCALL_ENTRY" after Xsyscall below. * In case the CPU speculates past the mov to cr3, we put a * retpoline-style pause-lfence-jmp-to-pause loop. */ endbr64 swapgs movq %rax,CPUVAR(SCRATCH) movq CPUVAR(KERN_CR3),%rax movq %rax,%cr3 0: pause lfence jmp 0b KUTEXT_PAGE_END KTEXT_PAGE_START .align NBPG, 0xcc GENTRY(Xsyscall_meltdown) /* pad to match real Xsyscall_meltdown positioning above */ movq CPUVAR(KERN_CR3),%rax movq %rax,%cr3 GENTRY(Xsyscall) endbr64 swapgs movq %rax,CPUVAR(SCRATCH) SYSCALL_ENTRY /* create trapframe */ sti movq CPUVAR(CURPROC),%r14 movq %rsp,P_MD_REGS(%r14) # save pointer to frame andl $~MDP_IRET,P_MD_FLAGS(%r14) movq %rsp,%rdi call syscall .Lsyscall_check_asts: /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%r11) je 2f CLEAR_ASTPENDING(%r11) sti movq %rsp,%rdi call ast jmp .Lsyscall_check_asts 2: #ifdef DIAGNOSTIC cmpl $IPL_NONE,CPUVAR(ILEVEL) jne .Lsyscall_spl_not_lowered #endif /* DIAGNOSTIC */ /* Could registers have been changed that require an iretq? */ testl $MDP_IRET, P_MD_FLAGS(%r14) jne intr_user_exit_post_ast /* Restore FPU/"extended CPU state" if it's not already in the CPU */ testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) jz .Lsyscall_restore_xstate /* Restore FS.base if it's not already in the CPU */ testl $CPUPF_USERSEGS,CPUVAR(PFLAGS) jz .Lsyscall_restore_fsbase .Lsyscall_restore_registers: call pku_xonly RET_STACK_REFILL_WITH_RCX movq TF_R8(%rsp),%r8 movq TF_R9(%rsp),%r9 movq TF_R10(%rsp),%r10 movq TF_R12(%rsp),%r12 movq TF_R13(%rsp),%r13 movq TF_R14(%rsp),%r14 movq TF_R15(%rsp),%r15 CODEPATCH_START movw %ds,TF_R8(%rsp) verw TF_R8(%rsp) CODEPATCH_END(CPTAG_MDS) movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RBP(%rsp),%rbp movq TF_RBX(%rsp),%rbx /* * We need to finish reading from the trapframe, then switch * to the user page tables, swapgs, and return. We need * to get the final value for the register that was used * for the mov to %cr3 from somewhere accessible on the * user page tables, so save it in CPUVAR(SCRATCH) across * the switch. */ movq TF_RDX(%rsp),%rdx movq TF_RAX(%rsp),%rax movq TF_RIP(%rsp),%rcx movq TF_RFLAGS(%rsp),%r11 movq TF_RSP(%rsp),%rsp CODEPATCH_START movq %rax,CPUVAR(SCRATCH) movq CPUVAR(USER_CR3),%rax PCID_SET_REUSE_NOP movq %rax,%cr3 Xsyscall_trampback: 0: pause lfence jmp 0b CODEPATCH_END(CPTAG_MELTDOWN_NOP) swapgs sysretq END(Xsyscall) END(Xsyscall_meltdown) KTEXT_PAGE_END KUTEXT_PAGE_START .space (Xsyscall_trampback - Xsyscall_meltdown) - \ (. - XUsyscall_meltdown), 0xcc movq %rax,%cr3 movq CPUVAR(SCRATCH),%rax swapgs sysretq KUTEXT_PAGE_END .text _ALIGN_TRAPS /* in this case, need FS.base but not xstate, rarely happens */ .Lsyscall_restore_fsbase: /* CPU doesn't have curproc's FS.base */ orl $CPUPF_USERSEGS,CPUVAR(PFLAGS) movq CPUVAR(CURPCB),%rdi jmp .Lsyscall_restore_fsbase_real _ALIGN_TRAPS .Lsyscall_restore_xstate: /* CPU doesn't have curproc's xstate */ orl $(CPUPF_USERXSTATE|CPUPF_USERSEGS),CPUVAR(PFLAGS) movq CPUVAR(CURPCB),%rdi movq xsave_mask(%rip),%rdx movl %edx,%eax shrq $32,%rdx #if PCB_SAVEFPU != 0 addq $PCB_SAVEFPU,%rdi #endif /* untouched state so can't fault */ CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTORS) #if PCB_SAVEFPU != 0 subq $PCB_SAVEFPU,%rdi #endif .Lsyscall_restore_fsbase_real: movq PCB_FSBASE(%rdi),%rdx movl %edx,%eax shrq $32,%rdx movl $MSR_FSBASE,%ecx wrmsr jmp .Lsyscall_restore_registers #ifdef DIAGNOSTIC .Lsyscall_spl_not_lowered: leaq spl_lowered(%rip), %rdi movl TF_ERR(%rsp),%esi /* syscall # stashed above */ movl TF_RDI(%rsp),%edx movl %ebx,%ecx movl CPUVAR(ILEVEL),%r8d xorq %rax,%rax call printf #ifdef DDB int $3 #endif /* DDB */ movl $IPL_NONE,CPUVAR(ILEVEL) jmp .Lsyscall_check_asts .section .rodata spl_lowered: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n" .text #endif NENTRY(proc_trampoline) call proc_trampoline_mi movq %r13,%rdi movq %r12,%rax call retpoline_rax movq CPUVAR(CURPROC),%r14 jmp .Lsyscall_check_asts END(proc_trampoline) /* * Returning to userspace via iretq. We do things in this order: * - check for ASTs * - restore FPU/"extended CPU state" if it's not already in the CPU * - DIAGNOSTIC: no more C calls after this, so check the SPL * - restore FS.base if it's not already in the CPU * - restore most registers * - update the iret frame from the trapframe * - finish reading from the trapframe * - switch to the trampoline stack \ * - jump to the .kutext segment |-- Meltdown workaround * - switch to the user page tables / * - swapgs * - iretq */ KTEXT_PAGE_START _ALIGN_TRAPS GENTRY(intr_user_exit) #ifdef DIAGNOSTIC pushfq popq %rdx testq $PSL_I,%rdx jnz .Lintr_user_exit_not_blocked #endif /* DIAGNOSTIC */ /* Check for ASTs */ CHECK_ASTPENDING(%r11) je intr_user_exit_post_ast CLEAR_ASTPENDING(%r11) sti movq %rsp,%rdi call ast cli jmp intr_user_exit intr_user_exit_post_ast: /* Restore FPU/"extended CPU state" if it's not already in the CPU */ testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) jz .Lintr_restore_xstate #ifdef DIAGNOSTIC /* no more C calls after this, so check the SPL */ cmpl $0,CPUVAR(ILEVEL) jne .Luser_spl_not_lowered #endif /* DIAGNOSTIC */ /* Restore FS.base if it's not already in the CPU */ testl $CPUPF_USERSEGS,CPUVAR(PFLAGS) jz .Lintr_restore_fsbase .Lintr_restore_registers: call pku_xonly RET_STACK_REFILL_WITH_RCX movq TF_R8(%rsp),%r8 movq TF_R9(%rsp),%r9 movq TF_R10(%rsp),%r10 movq TF_R12(%rsp),%r12 movq TF_R13(%rsp),%r13 movq TF_R14(%rsp),%r14 movq TF_R15(%rsp),%r15 CODEPATCH_START movw %ds,TF_R8(%rsp) verw TF_R8(%rsp) CODEPATCH_END(CPTAG_MDS) movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RBP(%rsp),%rbp movq TF_RBX(%rsp),%rbx /* * To get the final value for the register that was used * for the mov to %cr3, we need access to somewhere accessible * on the user page tables, so we save it in CPUVAR(SCRATCH) * across the switch. */ /* update iret frame */ movq CPUVAR(INTR_RSP),%rdx movq $(GSEL(GUCODE_SEL,SEL_UPL)),IRETQ_CS(%rdx) movq TF_RIP(%rsp),%rax movq %rax,IRETQ_RIP(%rdx) movq TF_RFLAGS(%rsp),%rax movq %rax,IRETQ_RFLAGS(%rdx) movq TF_RSP(%rsp),%rax movq %rax,IRETQ_RSP(%rdx) movq $(GSEL(GUDATA_SEL,SEL_UPL)),IRETQ_SS(%rdx) /* finish with the trap frame */ movq TF_RAX(%rsp),%rax movq TF_RCX(%rsp),%rcx movq TF_R11(%rsp),%r11 /* switch to the trampoline stack */ xchgq %rdx,%rsp movq TF_RDX(%rdx),%rdx CODEPATCH_START movq %rax,CPUVAR(SCRATCH) movq CPUVAR(USER_CR3),%rax PCID_SET_REUSE_NOP movq %rax,%cr3 Xiretq_trampback: KTEXT_PAGE_END /* the movq %cr3 switches to this "KUTEXT" page */ KUTEXT_PAGE_START .space (Xiretq_trampback - Xsyscall_meltdown) - \ (. - XUsyscall_meltdown), 0xcc movq CPUVAR(SCRATCH),%rax .Liretq_swapgs: swapgs doreti_iret_meltdown: iretq KUTEXT_PAGE_END /* * Back to the "KTEXT" page to fill in the speculation trap and the * swapgs+iretq used for non-Meltdown kernels. This switching back * and forth between segments is so that we can do the .space * calculation below to guarantee the iretq's above and below line * up, so the 'doreti_iret' label lines up with the iretq whether * the CPU is affected by Meltdown or not. */ KTEXT_PAGE_START 0: pause lfence jmp 0b .space (.Liretq_swapgs - XUsyscall_meltdown) - \ (. - Xsyscall_meltdown), 0xcc CODEPATCH_END(CPTAG_MELTDOWN_NOP) swapgs .globl doreti_iret doreti_iret: iretq KTEXT_PAGE_END .text _ALIGN_TRAPS .Lintr_restore_xstate: /* CPU doesn't have curproc's xstate */ orl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) movq CPUVAR(CURPCB),%rdi #if PCB_SAVEFPU != 0 addq $PCB_SAVEFPU,%rdi #endif movq xsave_mask(%rip),%rdx movl %edx,%eax shrq $32, %rdx CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTORS) //testl %eax,%eax //jnz .Lintr_xrstor_faulted .Lintr_restore_fsbase: /* CPU doesn't have curproc's FS.base */ orl $CPUPF_USERSEGS,CPUVAR(PFLAGS) movq CPUVAR(CURPCB),%rdx movq PCB_FSBASE(%rdx),%rdx movl %edx,%eax shrq $32,%rdx movl $MSR_FSBASE,%ecx wrmsr jmp .Lintr_restore_registers .Lintr_xrstor_faulted: /* * xrstor faulted; we need to reset the FPU state and call trap() * to post a signal, which requires interrupts be enabled. */ sti movq proc0paddr(%rip),%rdi #if PCB_SAVEFPU != 0 addq $PCB_SAVEFPU,%rdi #endif CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTORS) movq $T_PROTFLT,TF_TRAPNO(%rsp) jmp recall_trap #ifdef DIAGNOSTIC .Lintr_user_exit_not_blocked: movl warn_once(%rip),%edi testl %edi,%edi jnz 1f incl %edi movl %edi,warn_once(%rip) leaq .Lnot_blocked(%rip),%rdi call printf #ifdef DDB int $3 #endif /* DDB */ 1: cli jmp intr_user_exit .Luser_spl_not_lowered: sti leaq intr_spl_lowered(%rip),%rdi movl CPUVAR(ILEVEL),%esi xorl %edx,%edx /* always SPL zero for userspace */ xorl %eax,%eax call printf #ifdef DDB int $3 #endif /* DDB */ movl $0,CPUVAR(ILEVEL) cli jmp intr_user_exit .section .rodata intr_spl_lowered: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n" .text #endif /* DIAGNOSTIC */ END(Xintr_user_exit) /* * Return to supervisor mode from trap or interrupt */ NENTRY(intr_fast_exit) #ifdef DIAGNOSTIC pushfq popq %rdx testq $PSL_I,%rdx jnz .Lintr_exit_not_blocked #endif /* DIAGNOSTIC */ movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_R8(%rsp),%r8 movq TF_R9(%rsp),%r9 movq TF_R10(%rsp),%r10 movq TF_R12(%rsp),%r12 movq TF_R13(%rsp),%r13 movq TF_R14(%rsp),%r14 movq TF_R15(%rsp),%r15 movq TF_RBP(%rsp),%rbp movq TF_RBX(%rsp),%rbx movq TF_RDX(%rsp),%rdx movq TF_RCX(%rsp),%rcx movq TF_R11(%rsp),%r11 movq TF_RAX(%rsp),%rax addq $TF_RIP,%rsp iretq #ifdef DIAGNOSTIC .Lintr_exit_not_blocked: movl warn_once(%rip),%edi testl %edi,%edi jnz 1f incl %edi movl %edi,warn_once(%rip) leaq .Lnot_blocked(%rip),%rdi call printf #ifdef DDB int $3 #endif /* DDB */ 1: cli jmp intr_fast_exit .data .global warn_once warn_once: .long 0 .section .rodata .Lnot_blocked: .asciz "WARNING: INTERRUPTS NOT BLOCKED ON INTERRUPT RETURN: 0x%x 0x%x\n" .text #endif END(intr_fast_exit) /* * FPU/"extended CPU state" handling * void xrstor_kern(sfp, mask) * using first of xrstors/xrstor/fxrstor, load given state * which is assumed to be trusted: i.e., unaltered from * xsaves/xsaveopt/xsave/fxsave by kernel * int xrstor_user(sfp, mask) * using first of xrstor/fxrstor, load given state which might * not be trustable: #GP faults will be caught; returns 0/1 if * okay/it trapped. * void fpusave(sfp) * save current state, but retain it in the FPU * void fpusavereset(sfp) * save current state and reset FPU to initial/kernel state * int xsetbv_user(reg, mask) * load specified %xcr# register, returns 0/1 if okay/it trapped */ ENTRY(xrstor_kern) RETGUARD_SETUP(xrstor_kern, r11) movq %rsi, %rdx movl %esi, %eax shrq $32, %rdx CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTORS) RETGUARD_CHECK(xrstor_kern, r11) ret lfence END(xrstor_kern) ENTRY(xrstor_user) RETGUARD_SETUP(xrstor_user, r11) movq %rsi, %rdx movl %esi, %eax shrq $32, %rdx .globl xrstor_fault xrstor_fault: CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTOR) xorl %eax, %eax RETGUARD_CHECK(xrstor_user, r11) ret lfence NENTRY(xrstor_resume) movl $1, %eax RETGUARD_CHECK(xrstor_user, r11) ret lfence END(xrstor_user) ENTRY(fpusave) RETGUARD_SETUP(fpusave, r11) movq xsave_mask(%rip),%rdx movl %edx,%eax shrq $32,%rdx CODEPATCH_START fxsave64 (%rdi) CODEPATCH_END(CPTAG_XSAVE) RETGUARD_CHECK(fpusave, r11) ret lfence END(fpusave) ENTRY(fpusavereset) RETGUARD_SETUP(fpusavereset, r11) movq xsave_mask(%rip),%rdx movl %edx,%eax shrq $32,%rdx CODEPATCH_START fxsave64 (%rdi) CODEPATCH_END(CPTAG_XSAVE) movq proc0paddr(%rip),%rdi #if PCB_SAVEFPU != 0 addq $PCB_SAVEFPU,%rdi #endif CODEPATCH_START fxrstor64 (%rdi) CODEPATCH_END(CPTAG_XRSTORS) RETGUARD_CHECK(fpusavereset, r11) ret lfence END(fpusavereset) ENTRY(xsetbv_user) RETGUARD_SETUP(xsetbv_user, r11) movl %edi, %ecx movq %rsi, %rdx movl %esi, %eax shrq $32, %rdx .globl xsetbv_fault xsetbv_fault: xsetbv xorl %eax, %eax RETGUARD_CHECK(xsetbv_user, r11) ret lfence NENTRY(xsetbv_resume) movl $1, %eax RETGUARD_CHECK(xsetbv_user, r11) ret lfence END(xsetbv_user) CODEPATCH_CODE(_xrstor, xrstor64 (%rdi)) CODEPATCH_CODE(_xrstors, xrstors64 (%rdi)) CODEPATCH_CODE(_xsave, xsave64 (%rdi)) CODEPATCH_CODE(_xsaves, xsaves64 (%rdi)) CODEPATCH_CODE(_xsaveopt, xsaveopt64 (%rdi)) CODEPATCH_CODE(_pcid_set_reuse, orl $(CR3_REUSE_PCID >> 32),CPUVAR(USER_CR3 + 4)) CODEPATCH_CODE_LEN(_jmprax, jmp *%rax; int3) CODEPATCH_CODE_LEN(_jmpr11, jmp *%r11; int3) CODEPATCH_CODE_LEN(_jmpr13, jmp *%r13; int3) ENTRY(pagezero) RETGUARD_SETUP(pagezero, r11) movq $-PAGE_SIZE,%rdx subq %rdx,%rdi xorq %rax,%rax 1: movnti %rax,(%rdi,%rdx) movnti %rax,8(%rdi,%rdx) movnti %rax,16(%rdi,%rdx) movnti %rax,24(%rdi,%rdx) addq $32,%rdx jne 1b sfence RETGUARD_CHECK(pagezero, r11) ret lfence END(pagezero) /* void pku_xonly(void) */ ENTRY(pku_xonly) movq pg_xo,%rax /* have PKU support? */ cmpq $0,%rax je 1f movl $0,%ecx /* force PKRU for xonly restriction */ movl $0,%edx movl $PGK_VALUE,%eax /* key0 normal, key1 is exec without read */ wrpkru 1: ret lfence END(pku_xonly) /* int rdmsr_safe(u_int msr, uint64_t *data) */ ENTRY(rdmsr_safe) RETGUARD_SETUP(rdmsr_safe, r10) movl %edi, %ecx /* u_int msr */ .globl rdmsr_safe_fault rdmsr_safe_fault: rdmsr salq $32, %rdx movl %eax, %eax orq %rdx, %rax movq %rax, (%rsi) /* *data */ xorq %rax, %rax RETGUARD_CHECK(rdmsr_safe, r10) ret lfence NENTRY(rdmsr_resume) movl $0x1, %eax RETGUARD_CHECK(rdmsr_safe, r10) ret lfence END(rdmsr_safe) #if NHYPERV > 0 /* uint64_t hv_hypercall_trampoline(uint64_t control, paddr_t input, paddr_t output) */ NENTRY(hv_hypercall_trampoline) endbr64 mov %rdx, %r8 mov %rsi, %rdx mov %rdi, %rcx jmp hv_hypercall_page END(hv_hypercall_trampoline) /* Hypercall page needs to be page aligned */ .text .align NBPG, 0xcc .globl hv_hypercall_page hv_hypercall_page: .skip 0x1000, 0xcc #endif /* NHYPERV > 0 */ #if NXEN > 0 /* Hypercall page needs to be page aligned */ .text .align NBPG, 0xcc .globl xen_hypercall_page xen_hypercall_page: .skip 0x1000, 0xcc #endif /* NXEN > 0 */