entry.S
上传用户:jlfgdled
上传日期:2013-04-10
资源大小:33168k
文件大小:13k
- /*
- * linux/arch/x86_64/entry.S
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- *
- * $Id: entry.S,v 1.81 2002/09/12 12:55:25 ak Exp $
- */
- /*
- * entry.S contains the system-call and fault low-level handling routines.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after an interrupt and after each system call.
- *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for PT_TRACESYS, signals or fork/exec et.al.
- *
- * TODO:
- * - schedule it carefully for the final hardware.
- *
- */
- #define ASSEMBLY 1
- #include <linux/config.h>
- #include <linux/linkage.h>
- #include <asm/segment.h>
- #include <asm/current.h>
- #include <asm/smp.h>
- #include <asm/cache.h>
- #include <asm/errno.h>
- #include <asm/calling.h>
- #include <asm/offset.h>
- #include <asm/msr.h>
- #include <asm/unistd.h>
- .code64
- #define PDAREF(field) %gs:field
- /*
- * C code is not supposed to know about partial frames. Everytime a C function
- * that looks at the pt_regs is called these two macros are executed around it.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
- /* %rsp:at FRAMEEND */
- .macro FIXUP_TOP_OF_STACK tmp
- movq PDAREF(pda_oldrsp),tmp
- movq tmp,RSP(%rsp)
- movq $__USER_DS,SS(%rsp)
- movq $__USER_CS,CS(%rsp)
- movq $-1,RCX(%rsp) /* contains return address, already in RIP */
- movq R11(%rsp),tmp /* get eflags */
- movq tmp,EFLAGS(%rsp)
- .endm
- .macro RESTORE_TOP_OF_STACK tmp,offset=0
- movq RSP-offset(%rsp),tmp
- movq tmp,PDAREF(pda_oldrsp)
- movq EFLAGS-offset(%rsp),tmp
- movq tmp,R11-offset(%rsp)
- .endm
- /*
- * A newly forked process directly context switches into this.
- */
- ENTRY(ret_from_fork)
- movq %rax,%rdi /* return value of __switch_to -> prev task */
- call schedule_tail
- GET_CURRENT(%rcx)
- testb $PT_TRACESYS,tsk_ptrace(%rcx)
- jnz 2f
- 1:
- RESTORE_REST
- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz int_ret_from_sys_call
- testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
- jnz int_ret_from_sys_call
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
- jmp ret_from_sys_call
- 2:
- movq %rsp,%rdi
- call syscall_trace
- jmp 1b
- /*
- * System call entry. Upto 6 arguments in registers are supported.
- *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the
- * current kernel stack.
- */
-
- /*
- * Register setup:
- * rax system call number
- * rdi arg0
- * rcx return address for syscall/sysret, C arg3
- * rsi arg1
- * rdx arg2
- * r10 arg3 (--> moved to rcx for C)
- * r8 arg4
- * r9 arg5
- * r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
- *
- * Interrupts are off on entry.
- * Only called from user space.
- */
- ENTRY(system_call)
- swapgs
- movq %rsp,PDAREF(pda_oldrsp)
- movq PDAREF(pda_kernelstack),%rsp
- sti
- SAVE_ARGS 8,1
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- movq %rcx,RIP-ARGOFFSET(%rsp)
- GET_CURRENT(%rcx)
- testl $PT_TRACESYS,tsk_ptrace(%rcx)
- jne tracesys
- cmpq $__NR_syscall_max,%rax
- ja badsys
- movq %r10,%rcx
- call *sys_call_table(,%rax,8) # XXX: rip relative
- movq %rax,RAX-ARGOFFSET(%rsp)
- .globl ret_from_sys_call
- ret_from_sys_call:
- sysret_with_reschedule:
- GET_CURRENT(%rcx)
- cli
- cmpq $0,tsk_need_resched(%rcx)
- jne sysret_reschedule
- cmpl $0,tsk_sigpending(%rcx)
- jne sysret_signal
- sysret_restore_args:
- movq RIP-ARGOFFSET(%rsp),%rcx
- RESTORE_ARGS 0,-ARG_SKIP,1
- movq PDAREF(pda_oldrsp),%rsp
- swapgs
- sysretq
-
- sysret_signal:
- sti
- xorl %esi,%esi # oldset
- leaq -ARGOFFSET(%rsp),%rdi # regs
- leaq do_signal(%rip),%rax
- call ptregscall_common
- sysret_signal_test:
- GET_CURRENT(%rcx)
- cli
- cmpq $0,tsk_need_resched(%rcx)
- je sysret_restore_args
- sti
- call schedule
- jmp sysret_signal_test
-
- sysret_reschedule:
- sti
- call schedule
- jmp sysret_with_reschedule
-
- tracesys:
- SAVE_REST
- movq $-ENOSYS,RAX(%rsp)
- FIXUP_TOP_OF_STACK %rdi
- movq %rsp,%rdi
- call syscall_trace
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
- cmpq $__NR_syscall_max,%rax
- ja tracesys_done
- tracesys_call: /* backtrace marker */
- movq %r10,%rcx /* fixup for C */
- call *sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
- tracesys_done: /* backtrace marker */
- SAVE_REST
- movq %rsp,%rdi
- call syscall_trace
- RESTORE_TOP_OF_STACK %rbx
- RESTORE_REST
- jmp ret_from_sys_call
-
- badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp ret_from_sys_call
- /*
- * Syscall return path ending with IRET.
- * This can be either 64bit calls that require restoring of all registers
- * (impossible with sysret) or 32bit calls.
- */
- ENTRY(int_ret_from_sys_call)
- intret_test_kernel:
- testl $3,CS-ARGOFFSET(%rsp)
- je retint_restore_args
- intret_with_reschedule:
- GET_CURRENT(%rcx)
- cli
- cmpq $0,tsk_need_resched(%rcx)
- jne intret_reschedule
- cmpl $0,tsk_sigpending(%rcx)
- jne intret_signal
- jmp retint_restore_args_swapgs
-
- intret_reschedule:
- sti
- call schedule
- jmp intret_with_reschedule
- intret_signal:
- sti
- SAVE_REST
- xorq %rsi,%rsi # oldset -> arg2
- movq %rsp,%rdi # &ptregs -> arg1
- call do_signal
- RESTORE_REST
- intret_signal_test:
- GET_CURRENT(%rcx)
- cli
- cmpq $0,tsk_need_resched(%rcx)
- je retint_restore_args_swapgs
- sti
- call schedule
- jmp intret_signal_test
-
- /*
- * Certain special system calls that need to save a complete stack frame.
- */
-
- .macro PTREGSCALL label,func
- .globl label
- label:
- leaq func(%rip),%rax
- jmp ptregscall_common
- .endm
- PTREGSCALL stub_clone, sys_clone
- PTREGSCALL stub_fork, sys_fork
- PTREGSCALL stub_vfork, sys_vfork
- PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
- PTREGSCALL stub_sigaltstack, sys_sigaltstack
- PTREGSCALL stub_iopl, sys_iopl
- ENTRY(ptregscall_common)
- popq %r11
- SAVE_REST
- movq %r11, %r15
- FIXUP_TOP_OF_STACK %r11
- call *%rax
- RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- RESTORE_REST
- pushq %r11
- ret
-
- ENTRY(stub_execve)
- popq %r11
- SAVE_REST
- movq %r11, %r15
- FIXUP_TOP_OF_STACK %r11
- call sys_execve
- GET_CURRENT(%rcx)
- testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
- jnz exec_32bit
- RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- RESTORE_REST
- push %r11
- ret
- exec_32bit:
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
-
- /*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
- ENTRY(stub_rt_sigreturn)
- addq $8, %rsp
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call sys_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_REST
- jmp int_ret_from_sys_call
- /*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers, except
- * for signals again.
- *
- * Entry runs with interrupts off.
- */
- /* 0(%rsp): interrupt number */
- ENTRY(common_interrupt)
- testl $3,16(%rsp) # from kernel?
- je 1f
- swapgs
- 1: cld
- SAVE_ARGS
- leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
- addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount
- movq PDAREF(pda_irqstackptr),%rax
- cmoveq %rax,%rsp
- pushq %rdi # save old stack
- call do_IRQ
- /* 0(%rsp): oldrsp-ARGOFFSET */
- ENTRY(ret_from_intr)
- cli
- popq %rdi
- subl $1,PDAREF(pda_irqcount)
- leaq ARGOFFSET(%rdi),%rsp
- testl $3,CS(%rdi) # from kernel?
- je retint_restore_args
- /* Interrupt came from user space */
- retint_with_reschedule:
- GET_CURRENT(%rcx)
- cmpq $0,tsk_need_resched(%rcx)
- jne retint_reschedule
- cmpl $0,tsk_sigpending(%rcx)
- jne retint_signal
- retint_restore_args_swapgs:
- swapgs
- retint_restore_args:
- RESTORE_ARGS 0,8
- iret_label:
- iretq
- .section __ex_table,"a"
- .align 8
- .quad iret_label,bad_iret
- .previous
- .section .fixup,"ax"
- /* force a signal here? this matches i386 behaviour */
- bad_iret:
- movq $-9999,%rdi /* better code? */
- jmp do_exit
- .previous
- retint_signal:
- sti
- SAVE_REST
- movq $-1,ORIG_RAX(%rsp)
- xorq %rsi,%rsi # oldset
- movq %rsp,%rdi # &pt_regs
- call do_signal
- RESTORE_REST
- retint_signal_test:
- cli
- GET_CURRENT(%rcx)
- cmpq $0,tsk_need_resched(%rcx)
- je retint_restore_args_swapgs
- sti
- call schedule
- jmp retint_signal_test
-
- retint_reschedule:
- sti
- call schedule
- cli
- jmp retint_with_reschedule
-
- /*
- * Exception entry points.
- */
- .macro zeroentry sym
- pushq $0 /* push error code/oldrax */
- pushq %rax /* push real oldrax to the rdi slot */
- leaq sym(%rip),%rax
- jmp error_entry
- .endm
- .macro errorentry sym
- pushq %rax
- leaq sym(%rip),%rax
- jmp error_entry
- .endm
- /*
- * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
- */
- ALIGN
- error_entry:
- /* rdi slot contains rax, oldrax contains error code */
- pushq %rsi
- movq 8(%rsp),%rsi /* load rax */
- pushq %rdx
- pushq %rcx
- pushq %rsi /* store rax */
- pushq %r8
- pushq %r9
- pushq %r10
- pushq %r11
- cld
- SAVE_REST
- testl $3,CS(%rsp)
- je error_kernelspace
- swapgs
- movl $1,%r15d
- error_action:
- sti
- movq %rdi,RDI(%rsp)
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp)
- call *%rax
- /* r15d: swapgs flag */
- error_exit:
- testl %r15d,%r15d
- jz error_restore
- error_test:
- cli
- GET_CURRENT(%rcx)
- cmpq $0,tsk_need_resched(%rcx)
- jne error_reschedule
- cmpl $0,tsk_sigpending(%rcx)
- jne error_signal
- error_restore_swapgs:
- swapgs
- error_restore:
- RESTORE_REST
- jmp retint_restore_args
-
- error_reschedule:
- sti
- call schedule
- jmp error_test
- error_signal:
- sti
- xorq %rsi,%rsi
- movq %rsp,%rdi
- call do_signal
- error_signal_test:
- GET_CURRENT(%rcx)
- cli
- cmpq $0,tsk_need_resched(%rcx)
- je error_restore_swapgs
- sti
- call schedule
- jmp error_signal_test
-
- error_kernelspace:
- xorl %r15d,%r15d
- cmpq $iret_label,RIP(%rsp)
- jne error_action
- movl $1,%r15d
- swapgs
- jmp error_action
- /*
- * Create a kernel thread.
- *
- * C extern interface:
- * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- *
- * asm input arguments:
- * rdi: fn, rsi: arg, rdx: flags
- */
- ENTRY(kernel_thread)
- FAKE_STACK_FRAME $child_rip
- SAVE_ALL
- # rdi: flags, rsi: usp, rdx: will be &pt_regs
- movq %rdx,%rdi
- orq $CLONE_VM, %rdi
- movq $-1, %rsi
- movq %rsp, %rdx
- # clone now
- call do_fork
- # save retval on the stack so it's popped before `ret`
- movq %rax, RAX(%rsp)
- /*
- * It isn't worth to check for reschedule here,
- * so internally to the x86_64 port you can rely on kernel_thread()
- * not to reschedule the child before returning, this avoids the need
- * of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
- */
- RESTORE_ALL
- UNFAKE_STACK_FRAME
- ret
-
- child_rip:
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- movq %rdi, %rax
- movq %rsi, %rdi
- call *%rax
- # exit
- xorq %rdi, %rdi
- call do_exit
- /*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
- */
- ENTRY(execve)
- FAKE_STACK_FRAME $0
- SAVE_ALL
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- je int_ret_from_sys_call
- RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- ENTRY(page_fault)
- #ifdef CONFIG_KDB
- pushq %rcx
- pushq %rdx
- pushq %rax
- movl $473,%ecx
- rdmsr
- andl $0xfffffffe,%eax /* Disable last branch recording */
- wrmsr
- popq %rax
- popq %rdx
- popq %rcx
- #endif
- errorentry do_page_fault
- ENTRY(coprocessor_error)
- zeroentry do_coprocessor_error
- ENTRY(simd_coprocessor_error)
- zeroentry do_simd_coprocessor_error
- ENTRY(device_not_available)
- pushq $-1
- SAVE_ALL
- xorl %r15d,%r15d
- testl $3,CS(%rsp)
- jz 1f
- swapgs
- movl $1,%r15d
- 1:
- movq %cr0,%rax
- leaq math_state_restore(%rip),%rcx
- leaq math_emulate(%rip),%rbx
- testl $0x4,%eax
- cmoveq %rcx,%rbx
- call *%rbx
- jmp error_exit
- ENTRY(debug)
- zeroentry do_debug
- ENTRY(nmi)
- pushq $-1
- SAVE_ALL
- /* NMI could happen inside the critical section of a swapgs,
- so it is needed to use this expensive way to check.
- Rely on arch_prctl forbiding user space from setting a negative
- GS. Only the kernel value is negative. */
- movl $MSR_GS_BASE,%ecx
- rdmsr
- xorl %ebx,%ebx
- testl %edx,%edx
- js 1f
- swapgs
- movl $1,%ebx
- 1: movq %rsp,%rdi
- call do_nmi
- cli
- testl %ebx,%ebx
- jz error_restore
- swapgs
- jmp error_restore
-
- ENTRY(int3)
- zeroentry do_int3
- ENTRY(overflow)
- zeroentry do_overflow
- ENTRY(bounds)
- zeroentry do_bounds
- ENTRY(invalid_op)
- zeroentry do_invalid_op
- ENTRY(coprocessor_segment_overrun)
- zeroentry do_coprocessor_segment_overrun
- ENTRY(reserved)
- zeroentry do_reserved
- ENTRY(double_fault)
- errorentry do_double_fault
- ENTRY(invalid_TSS)
- errorentry do_invalid_TSS
- ENTRY(segment_not_present)
- errorentry do_segment_not_present
- ENTRY(stack_segment)
- errorentry do_stack_segment
- ENTRY(general_protection)
- errorentry do_general_protection
- ENTRY(alignment_check)
- errorentry do_alignment_check
- ENTRY(divide_error)
- zeroentry do_divide_error
- ENTRY(spurious_interrupt_bug)
- zeroentry do_spurious_interrupt_bug
- ENTRY(machine_check)
- zeroentry do_machine_check
- ENTRY(call_debug)
- zeroentry do_call_debug
-