entry_32.S 33.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
17
 * Stack layout in 'syscall_exit':
Linus Torvalds's avatar
Linus Torvalds committed
18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
32
 *	24(%esp) - %fs
33 34 35 36 37 38 39
 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
 *	2C(%esp) - orig_eax
 *	30(%esp) - %eip
 *	34(%esp) - %cs
 *	38(%esp) - %eflags
 *	3C(%esp) - %oldesp
 *	40(%esp) - %oldss
Linus Torvalds's avatar
Linus Torvalds committed
40 41 42 43 44 45
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
#include <asm/thread_info.h>
46
#include <asm/irqflags.h>
Linus Torvalds's avatar
Linus Torvalds committed
47 48 49
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
50
#include <asm/page_types.h>
Stas Sergeev's avatar
Stas Sergeev committed
51
#include <asm/percpu.h>
52
#include <asm/dwarf2.h>
53
#include <asm/processor-flags.h>
54
#include <asm/ftrace.h>
55
#include <asm/irq_vectors.h>
56
#include <asm/cpufeature.h>
Linus Torvalds's avatar
Linus Torvalds committed
57

Roland McGrath's avatar
Roland McGrath committed
58 59 60 61 62 63 64 65 66 67
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE	   0x40000000

#ifndef CONFIG_AUDITSYSCALL
#define sysenter_audit	syscall_trace_entry
#define sysexit_audit	syscall_exit_work
#endif

68 69 70 71 72
/*
 * We use macros for low-level operations which need to be overridden
 * for paravirtualization.  The following will never clobber any registers:
 *   INTERRUPT_RETURN (aka. "iret")
 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
73
 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
74 75 76 77 78 79 80
 *
 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
 * Allowing a register to be clobbered can shrink the paravirt replacement
 * enough to patch inline, increasing performance.
 */

Linus Torvalds's avatar
Linus Torvalds committed
81 82 83
#define nr_syscalls ((syscall_table_size)/4)

#ifdef CONFIG_PREEMPT
84
#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
85
#else
86
#define preempt_stop(clobbers)
87
#define resume_kernel		restore_all
Linus Torvalds's avatar
Linus Torvalds committed
88 89
#endif

90 91
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
92
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
93 94 95 96 97 98
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

99 100 101 102 103 104
#ifdef CONFIG_VM86
#define resume_userspace_sig	check_userspace
#else
#define resume_userspace_sig	resume_userspace
#endif

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
/*
 * User gs save/restore
 *
 * %gs is used for userland TLS and kernel only uses it for stack
 * canary which is required to be at %gs:20 by gcc.  Read the comment
 * at the top of stackprotector.h for more info.
 *
 * Local labels 98 and 99 are used.
 */
#ifdef CONFIG_X86_32_LAZY_GS

 /* unfortunately push/pop can't be no-op */
.macro PUSH_GS
	pushl $0
	CFI_ADJUST_CFA_OFFSET 4
.endm
.macro POP_GS pop=0
	addl $(4 + \pop), %esp
	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
.endm
.macro POP_GS_EX
.endm

 /* all the rest are no-op */
.macro PTGS_TO_GS
.endm
.macro PTGS_TO_GS_EX
.endm
.macro GS_TO_REG reg
.endm
.macro REG_TO_PTGS reg
.endm
.macro SET_KERNEL_GS reg
.endm

#else	/* CONFIG_X86_32_LAZY_GS */

.macro PUSH_GS
	pushl %gs
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET gs, 0*/
.endm

.macro POP_GS pop=0
98:	popl %gs
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_RESTORE gs*/
  .if \pop <> 0
	add $\pop, %esp
	CFI_ADJUST_CFA_OFFSET -\pop
  .endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, (%esp)
	jmp 98b
.section __ex_table, "a"
	.align 4
	.long 98b, 99b
.popsection
.endm

.macro PTGS_TO_GS
98:	mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, PT_GS(%esp)
	jmp 98b
.section __ex_table, "a"
	.align 4
	.long 98b, 99b
.popsection
.endm

.macro GS_TO_REG reg
	movl %gs, \reg
	/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
	movl \reg, PT_GS(%esp)
	/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
189
	movl $(__KERNEL_STACK_CANARY), \reg
190 191 192 193 194
	movl \reg, %gs
.endm

#endif	/* CONFIG_X86_32_LAZY_GS */

195 196
.macro SAVE_ALL
	cld
197
	PUSH_GS
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	pushl %fs
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET fs, 0;*/
	pushl %es
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET es, 0;*/
	pushl %ds
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ds, 0;*/
	pushl %eax
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eax, 0
	pushl %ebp
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebp, 0
	pushl %edi
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edi, 0
	pushl %esi
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esi, 0
	pushl %edx
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edx, 0
	pushl %ecx
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ecx, 0
	pushl %ebx
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebx, 0
	movl $(__USER_DS), %edx
	movl %edx, %ds
	movl %edx, %es
	movl $(__KERNEL_PERCPU), %edx
232
	movl %edx, %fs
233
	SET_KERNEL_GS %edx
234
.endm
Linus Torvalds's avatar
Linus Torvalds committed
235

236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
.macro RESTORE_INT_REGS
	popl %ebx
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE ebx
	popl %ecx
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE ecx
	popl %edx
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE edx
	popl %esi
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE esi
	popl %edi
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE edi
	popl %ebp
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE ebp
	popl %eax
	CFI_ADJUST_CFA_OFFSET -4
257
	CFI_RESTORE eax
258
.endm
Linus Torvalds's avatar
Linus Torvalds committed
259

260
.macro RESTORE_REGS pop=0
261 262 263 264 265 266 267 268 269 270
	RESTORE_INT_REGS
1:	popl %ds
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_RESTORE ds;*/
2:	popl %es
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_RESTORE es;*/
3:	popl %fs
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_RESTORE fs;*/
271
	POP_GS \pop
272 273 274 275 276 277 278 279 280 281 282 283
.pushsection .fixup, "ax"
4:	movl $0, (%esp)
	jmp 1b
5:	movl $0, (%esp)
	jmp 2b
6:	movl $0, (%esp)
	jmp 3b
.section __ex_table, "a"
	.align 4
	.long 1b, 4b
	.long 2b, 5b
	.long 3b, 6b
284
.popsection
285
	POP_GS_EX
286
.endm
Linus Torvalds's avatar
Linus Torvalds committed
287

288 289 290 291 292
.macro RING0_INT_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 3*4
	/*CFI_OFFSET cs, -2*4;*/
293
	CFI_OFFSET eip, -3*4
294
.endm
295

296 297 298 299 300
.macro RING0_EC_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 4*4
	/*CFI_OFFSET cs, -2*4;*/
301
	CFI_OFFSET eip, -3*4
302
.endm
303

304 305 306 307 308 309 310 311 312 313 314 315 316 317
.macro RING0_PTREGS_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
	CFI_OFFSET eip, PT_EIP-PT_OLDESP
	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
	CFI_OFFSET eax, PT_EAX-PT_OLDESP
	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
	CFI_OFFSET edi, PT_EDI-PT_OLDESP
	CFI_OFFSET esi, PT_ESI-PT_OLDESP
	CFI_OFFSET edx, PT_EDX-PT_OLDESP
	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
318
	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
319
.endm
Linus Torvalds's avatar
Linus Torvalds committed
320 321

ENTRY(ret_from_fork)
322
	CFI_STARTPROC
Linus Torvalds's avatar
Linus Torvalds committed
323
	pushl %eax
324
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
325 326 327
	call schedule_tail
	GET_THREAD_INFO(%ebp)
	popl %eax
328
	CFI_ADJUST_CFA_OFFSET -4
329 330 331 332
	pushl $0x0202			# Reset kernel eflags
	CFI_ADJUST_CFA_OFFSET 4
	popfl
	CFI_ADJUST_CFA_OFFSET -4
Linus Torvalds's avatar
Linus Torvalds committed
333
	jmp syscall_exit
334
	CFI_ENDPROC
335
END(ret_from_fork)
Linus Torvalds's avatar
Linus Torvalds committed
336

337 338 339 340
/*
 * Interrupt exit functions should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
Linus Torvalds's avatar
Linus Torvalds committed
341 342 343 344 345 346 347 348 349
/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
350
	RING0_PTREGS_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
351
ret_from_exception:
352
	preempt_stop(CLBR_ANY)
Linus Torvalds's avatar
Linus Torvalds committed
353 354
ret_from_intr:
	GET_THREAD_INFO(%ebp)
355
check_userspace:
356 357
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
	movb PT_CS(%esp), %al
358
	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
359 360
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
361

Linus Torvalds's avatar
Linus Torvalds committed
362
ENTRY(resume_userspace)
363
	LOCKDEP_SYS_EXIT
364
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
Linus Torvalds's avatar
Linus Torvalds committed
365 366
					# setting need_resched or sigpending
					# between sampling and the iret
367
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
368 369 370 371 372
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all
373
END(ret_from_exception)
Linus Torvalds's avatar
Linus Torvalds committed
374 375 376

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
377
	DISABLE_INTERRUPTS(CLBR_ANY)
Linus Torvalds's avatar
Linus Torvalds committed
378
	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
379
	jnz restore_all
Linus Torvalds's avatar
Linus Torvalds committed
380 381 382 383
need_resched:
	movl TI_flags(%ebp), %ecx	# need_resched set ?
	testb $_TIF_NEED_RESCHED, %cl
	jz restore_all
384
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
Linus Torvalds's avatar
Linus Torvalds committed
385 386 387
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
388
END(resume_kernel)
Linus Torvalds's avatar
Linus Torvalds committed
389
#endif
390
	CFI_ENDPROC
391 392 393 394
/*
 * End of kprobes section
 */
	.popsection
Linus Torvalds's avatar
Linus Torvalds committed
395 396 397 398 399

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
400
ENTRY(ia32_sysenter_target)
401
	CFI_STARTPROC simple
402
	CFI_SIGNAL_FRAME
403 404
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
405
	movl TSS_sysenter_sp0(%esp),%esp
Linus Torvalds's avatar
Linus Torvalds committed
406
sysenter_past_esp:
407
	/*
408 409 410
	 * Interrupts are disabled here, but we can't trace it until
	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
	 * we immediately enable interrupts at that point anyway.
411
	 */
Linus Torvalds's avatar
Linus Torvalds committed
412
	pushl $(__USER_DS)
413 414
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ss, 0*/
Linus Torvalds's avatar
Linus Torvalds committed
415
	pushl %ebp
416 417
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esp, 0
Linus Torvalds's avatar
Linus Torvalds committed
418
	pushfl
419
	orl $X86_EFLAGS_IF, (%esp)
420
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
421
	pushl $(__USER_CS)
422 423
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET cs, 0*/
424 425 426 427 428 429
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
430 431
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eip, 0
Linus Torvalds's avatar
Linus Torvalds committed
432

433 434 435 436 437
	pushl %eax
	CFI_ADJUST_CFA_OFFSET 4
	SAVE_ALL
	ENABLE_INTERRUPTS(CLBR_NONE)

Linus Torvalds's avatar
Linus Torvalds committed
438 439 440 441 442 443 444
/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
1:	movl (%ebp),%ebp
445
	movl %ebp,PT_EBP(%esp)
Linus Torvalds's avatar
Linus Torvalds committed
446 447 448 449 450 451 452
.section __ex_table,"a"
	.align 4
	.long 1b,syscall_fault
.previous

	GET_THREAD_INFO(%ebp)

453
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
Roland McGrath's avatar
Roland McGrath committed
454 455
	jnz sysenter_audit
sysenter_do_call:
Linus Torvalds's avatar
Linus Torvalds committed
456 457 458
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
	call *sys_call_table(,%eax,4)
459
	movl %eax,PT_EAX(%esp)
460
	LOCKDEP_SYS_EXIT
461
	DISABLE_INTERRUPTS(CLBR_ANY)
462
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
463
	movl TI_flags(%ebp), %ecx
464
	testl $_TIF_ALLWORK_MASK, %ecx
Roland McGrath's avatar
Roland McGrath committed
465 466
	jne sysexit_audit
sysenter_exit:
Linus Torvalds's avatar
Linus Torvalds committed
467
/* if something modifies registers it must also disable sysexit */
468 469
	movl PT_EIP(%esp), %edx
	movl PT_OLDESP(%esp), %ecx
Linus Torvalds's avatar
Linus Torvalds committed
470
	xorl %ebp,%ebp
471
	TRACE_IRQS_ON
472
1:	mov  PT_FS(%esp), %fs
473
	PTGS_TO_GS
474
	ENABLE_INTERRUPTS_SYSEXIT
Roland McGrath's avatar
Roland McGrath committed
475 476 477

#ifdef CONFIG_AUDITSYSCALL
sysenter_audit:
478
	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
Roland McGrath's avatar
Roland McGrath committed
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
	jnz syscall_trace_entry
	addl $4,%esp
	CFI_ADJUST_CFA_OFFSET -4
	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
	movl %eax,%edx			/* 2nd arg: syscall number */
	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
	call audit_syscall_entry
	pushl %ebx
	CFI_ADJUST_CFA_OFFSET 4
	movl PT_EAX(%esp),%eax		/* reload syscall number */
	jmp sysenter_do_call

sysexit_audit:
495
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
Roland McGrath's avatar
Roland McGrath committed
496 497 498 499 500 501 502 503 504 505 506 507
	jne syscall_exit_work
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_ANY)
	movl %eax,%edx		/* second arg, syscall return value */
	cmpl $0,%eax		/* is it < 0? */
	setl %al		/* 1 if so, 0 if not */
	movzbl %al,%eax		/* zero-extend that */
	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
	call audit_syscall_exit
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	movl TI_flags(%ebp), %ecx
508
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
Roland McGrath's avatar
Roland McGrath committed
509 510 511 512 513
	jne syscall_exit_work
	movl PT_EAX(%esp),%eax	/* reload syscall return value */
	jmp sysenter_exit
#endif

514
	CFI_ENDPROC
515
.pushsection .fixup,"ax"
516
2:	movl $0,PT_FS(%esp)
517 518 519 520 521
	jmp 1b
.section __ex_table,"a"
	.align 4
	.long 1b,2b
.popsection
522
	PTGS_TO_GS_EX
523
ENDPROC(ia32_sysenter_target)
Linus Torvalds's avatar
Linus Torvalds committed
524

525 526 527 528
/*
 * syscall stub including irq exit should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
Linus Torvalds's avatar
Linus Torvalds committed
529 530
	# system call handler stub
ENTRY(system_call)
531
	RING0_INT_FRAME			# can't unwind into user space anyway
Linus Torvalds's avatar
Linus Torvalds committed
532
	pushl %eax			# save orig_eax
533
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
534 535
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
536
					# system call tracing in operation / emulation
537
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
Linus Torvalds's avatar
Linus Torvalds committed
538 539 540 541 542
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
543
	movl %eax,PT_EAX(%esp)		# store the return value
Linus Torvalds's avatar
Linus Torvalds committed
544
syscall_exit:
545
	LOCKDEP_SYS_EXIT
546
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
Linus Torvalds's avatar
Linus Torvalds committed
547 548
					# setting need_resched or sigpending
					# between sampling and the iret
549
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
550
	movl TI_flags(%ebp), %ecx
551
	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
Linus Torvalds's avatar
Linus Torvalds committed
552 553 554
	jne syscall_exit_work

restore_all:
555 556
	TRACE_IRQS_IRET
restore_all_notrace:
557 558
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
559 560
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
561 562
	movb PT_OLDSS(%esp), %ah
	movb PT_CS(%esp), %al
563
	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
564
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
565
	CFI_REMEMBER_STATE
Linus Torvalds's avatar
Linus Torvalds committed
566 567
	je ldt_ss			# returning to user-space with LDT SS
restore_nocheck:
568
	RESTORE_REGS 4			# skip orig_eax/error_code
569
	CFI_ADJUST_CFA_OFFSET -4
570
irq_return:
Ingo Molnar's avatar
Ingo Molnar committed
571
	INTERRUPT_RETURN
Linus Torvalds's avatar
Linus Torvalds committed
572
.section .fixup,"ax"
573
ENTRY(iret_exc)
574 575 576
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
Linus Torvalds's avatar
Linus Torvalds committed
577 578 579
.previous
.section __ex_table,"a"
	.align 4
Ingo Molnar's avatar
Ingo Molnar committed
580
	.long irq_return,iret_exc
Linus Torvalds's avatar
Linus Torvalds committed
581 582
.previous

583
	CFI_RESTORE_STATE
Linus Torvalds's avatar
Linus Torvalds committed
584
ldt_ss:
585
	larl PT_OLDSS(%esp), %eax
Linus Torvalds's avatar
Linus Torvalds committed
586 587 588
	jnz restore_nocheck
	testl $0x00400000, %eax		# returning to 32bit stack?
	jnz restore_nocheck		# allright, normal return
589 590 591 592 593 594 595 596 597 598

#ifdef CONFIG_PARAVIRT
	/*
	 * The kernel can't run on a non-flat stack if paravirt mode
	 * is active.  Rather than try to fixup the high bits of
	 * ESP, bypass this code entirely.  This may break DOSemu
	 * and/or Wine support in a paravirt VM, although the option
	 * is still available to implement the setting of the high
	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
	 */
599
	cmpl $0, pv_info+PARAVIRT_enabled
600 601 602
	jne restore_nocheck
#endif

603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
/*
 * Setup and switch to ESPFIX stack
 *
 * We're returning to userspace with a 16 bit stack. The CPU will not
 * restore the high word of ESP for us on executing iret... This is an
 * "official" bug of all the x86-compatible CPUs, which we can work
 * around to make dosemu and wine happy. We do this by preloading the
 * high word of ESP with the high word of the userspace ESP while
 * compensating for the offset by changing to the ESPFIX segment with
 * a base address that matches for the difference.
 */
	mov %esp, %edx			/* load kernel esp */
	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
	mov %dx, %ax			/* eax: new kernel esp */
	sub %eax, %edx			/* offset (low word is 0) */
	PER_CPU(gdt_page, %ebx)
	shr $16, %edx
	mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
	mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
Stas Sergeev's avatar
Stas Sergeev committed
622 623
	pushl $__ESPFIX_SS
	CFI_ADJUST_CFA_OFFSET 4
624
	push %eax			/* new kernel esp */
Stas Sergeev's avatar
Stas Sergeev committed
625
	CFI_ADJUST_CFA_OFFSET 4
626 627 628
	/* Disable interrupts, but do not irqtrace this section: we
	 * will soon execute iret and the tracer was already set to
	 * the irqstate after the iret */
629
	DISABLE_INTERRUPTS(CLBR_EAX)
630
	lss (%esp), %esp		/* switch to espfix segment */
Stas Sergeev's avatar
Stas Sergeev committed
631 632
	CFI_ADJUST_CFA_OFFSET -8
	jmp restore_nocheck
633
	CFI_ENDPROC
634
ENDPROC(system_call)
Linus Torvalds's avatar
Linus Torvalds committed
635 636 637

	# perform work that needs to be done immediately before resumption
	ALIGN
638
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
Linus Torvalds's avatar
Linus Torvalds committed
639 640 641 642 643
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
644
	LOCKDEP_SYS_EXIT
645
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
Linus Torvalds's avatar
Linus Torvalds committed
646 647
					# setting need_resched or sigpending
					# between sampling and the iret
648
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
649 650 651 652 653 654 655 656 657
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
658
#ifdef CONFIG_VM86
659
	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
Linus Torvalds's avatar
Linus Torvalds committed
660 661 662 663 664
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
	xorl %edx, %edx
	call do_notify_resume
665
	jmp resume_userspace_sig
Linus Torvalds's avatar
Linus Torvalds committed
666 667 668 669

	ALIGN
work_notifysig_v86:
	pushl %ecx			# save ti_flags for do_notify_resume
670
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
671 672
	call save_v86_state		# %eax contains pt_regs pointer
	popl %ecx
673
	CFI_ADJUST_CFA_OFFSET -4
Linus Torvalds's avatar
Linus Torvalds committed
674
	movl %eax, %esp
675 676 677
#else
	movl %esp, %eax
#endif
Linus Torvalds's avatar
Linus Torvalds committed
678 679
	xorl %edx, %edx
	call do_notify_resume
680
	jmp resume_userspace_sig
681
END(work_pending)
Linus Torvalds's avatar
Linus Torvalds committed
682 683 684 685

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
686
	movl $-ENOSYS,PT_EAX(%esp)
Linus Torvalds's avatar
Linus Torvalds committed
687
	movl %esp, %eax
688 689
	call syscall_trace_enter
	/* What it returned is what we'll actually use.  */
Linus Torvalds's avatar
Linus Torvalds committed
690 691 692
	cmpl $(nr_syscalls), %eax
	jnae syscall_call
	jmp syscall_exit
693
END(syscall_trace_entry)
Linus Torvalds's avatar
Linus Torvalds committed
694 695 696 697

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
698
	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
Linus Torvalds's avatar
Linus Torvalds committed
699
	jz work_pending
700
	TRACE_IRQS_ON
701
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
Linus Torvalds's avatar
Linus Torvalds committed
702 703
					# schedule() instead
	movl %esp, %eax
704
	call syscall_trace_leave
Linus Torvalds's avatar
Linus Torvalds committed
705
	jmp resume_userspace
706
END(syscall_exit_work)
707
	CFI_ENDPROC
Linus Torvalds's avatar
Linus Torvalds committed
708

709
	RING0_INT_FRAME			# can't unwind into user space anyway
Linus Torvalds's avatar
Linus Torvalds committed
710 711
syscall_fault:
	GET_THREAD_INFO(%ebp)
712
	movl $-EFAULT,PT_EAX(%esp)
Linus Torvalds's avatar
Linus Torvalds committed
713
	jmp resume_userspace
714
END(syscall_fault)
Linus Torvalds's avatar
Linus Torvalds committed
715 716

syscall_badsys:
717
	movl $-ENOSYS,PT_EAX(%esp)
Linus Torvalds's avatar
Linus Torvalds committed
718
	jmp resume_userspace
719
END(syscall_badsys)
720
	CFI_ENDPROC
721 722 723 724
/*
 * End of kprobes section
 */
	.popsection
Linus Torvalds's avatar
Linus Torvalds committed
725

726 727 728
/*
 * System calls that need a pt_regs pointer.
 */
Brian Gerst's avatar
Brian Gerst committed
729
#define PTREGSCALL0(name) \
730 731 732 733 734
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%eax; \
	jmp sys_##name;

Brian Gerst's avatar
Brian Gerst committed
735 736 737 738
#define PTREGSCALL1(name) \
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%edx; \
739
	movl (PT_EBX+4)(%esp),%eax; \
Brian Gerst's avatar
Brian Gerst committed
740 741 742 743 744 745
	jmp sys_##name;

#define PTREGSCALL2(name) \
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%ecx; \
746 747
	movl (PT_ECX+4)(%esp),%edx; \
	movl (PT_EBX+4)(%esp),%eax; \
Brian Gerst's avatar
Brian Gerst committed
748 749 750 751 752 753 754 755 756 757 758 759 760 761
	jmp sys_##name;

#define PTREGSCALL3(name) \
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%eax; \
	pushl %eax; \
	movl PT_EDX(%eax),%ecx; \
	movl PT_ECX(%eax),%edx; \
	movl PT_EBX(%eax),%eax; \
	call sys_##name; \
	addl $4,%esp; \
	ret

Brian Gerst's avatar
Brian Gerst committed
762
PTREGSCALL1(iopl)
Brian Gerst's avatar
Brian Gerst committed
763 764
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
Brian Gerst's avatar
Brian Gerst committed
765
PTREGSCALL3(execve)
Brian Gerst's avatar
Brian Gerst committed
766
PTREGSCALL2(sigaltstack)
Brian Gerst's avatar
Brian Gerst committed
767 768
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
769 770
PTREGSCALL2(vm86)
PTREGSCALL1(vm86old)
771

Brian Gerst's avatar
Brian Gerst committed
772 773 774 775 776 777 778 779 780 781 782 783 784
/* Clone is an oddball.  The 4th arg is in %edi */
	ALIGN;
ptregs_clone:
	leal 4(%esp),%eax
	pushl %eax
	pushl PT_EDI(%eax)
	movl PT_EDX(%eax),%ecx
	movl PT_ECX(%eax),%edx
	movl PT_EBX(%eax),%eax
	call sys_clone
	addl $8,%esp
	ret

785
.macro FIXUP_ESPFIX_STACK
786 787 788 789 790 791 792 793
/*
 * Switch back for ESPFIX stack to the normal zerobased stack
 *
 * We can't call C functions using the ESPFIX stack. This code reads
 * the high word of the segment base from the GDT and swiches to the
 * normal stack and adjusts ESP with the matching offset.
 */
	/* fixup the stack */
794
	PER_CPU(gdt_page, %ebx)
795 796 797 798
	mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
	mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
	shl $16, %eax
	addl %esp, %eax			/* the adjusted stack pointer */
799 800 801 802
	pushl $__KERNEL_DS
	CFI_ADJUST_CFA_OFFSET 4
	pushl %eax
	CFI_ADJUST_CFA_OFFSET 4
803
	lss (%esp), %esp		/* switch to the normal stack segment */
804 805 806 807 808 809 810 811 812 813 814 815 816 817
	CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
	movl %ss, %eax
	/* see if on espfix stack */
	cmpw $__ESPFIX_SS, %ax
	jne 27f
	movl $__KERNEL_DS, %eax
	movl %eax, %ds
	movl %eax, %es
	/* switch to normal stack */
	FIXUP_ESPFIX_STACK
27:
.endm
Linus Torvalds's avatar
Linus Torvalds committed
818 819

/*
820 821 822
 * Build the entry stubs and pointer table with some assembler magic.
 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 * single cache line on all modern x86 implementations.
Linus Torvalds's avatar
Linus Torvalds committed
823
 */
824
.section .init.rodata,"a"
Linus Torvalds's avatar
Linus Torvalds committed
825 826
ENTRY(interrupt)
.text
827 828
	.p2align 5
	.p2align CONFIG_X86_L1_CACHE_SHIFT
Linus Torvalds's avatar
Linus Torvalds committed
829
ENTRY(irq_entries_start)
830
	RING0_INT_FRAME
831
vector=FIRST_EXTERNAL_VECTOR
832 833 834 835
.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
	.balign 32
  .rept	7
    .if vector < NR_VECTORS
836
      .if vector <> FIRST_EXTERNAL_VECTOR
837
	CFI_ADJUST_CFA_OFFSET -4
838 839
      .endif
1:	pushl $(~vector+0x80)	/* Note: always in signed byte range */
840
	CFI_ADJUST_CFA_OFFSET 4
841
      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
842 843 844
	jmp 2f
      .endif
      .previous
Linus Torvalds's avatar
Linus Torvalds committed
845
	.long 1b
846
      .text
Linus Torvalds's avatar
Linus Torvalds committed
847
vector=vector+1
848 849 850
    .endif
  .endr
2:	jmp common_interrupt
Linus Torvalds's avatar
Linus Torvalds committed
851
.endr
852 853 854 855 856
END(irq_entries_start)

.previous
END(interrupt)
.previous
Linus Torvalds's avatar
Linus Torvalds committed
857

858 859 860 861
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
862
	.p2align CONFIG_X86_L1_CACHE_SHIFT
Linus Torvalds's avatar
Linus Torvalds committed
863
common_interrupt:
864
	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
Linus Torvalds's avatar
Linus Torvalds committed
865
	SAVE_ALL
866
	TRACE_IRQS_OFF
Linus Torvalds's avatar
Linus Torvalds committed
867 868 869
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
870
ENDPROC(common_interrupt)
871
	CFI_ENDPROC
Linus Torvalds's avatar
Linus Torvalds committed
872

873 874 875 876
/*
 *  Irq entries should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
Tejun Heo's avatar
Tejun Heo committed
877
#define BUILD_INTERRUPT3(name, nr, fn)	\
Linus Torvalds's avatar
Linus Torvalds committed
878
ENTRY(name)				\
879
	RING0_INT_FRAME;		\
880
	pushl $~(nr);			\
881 882
	CFI_ADJUST_CFA_OFFSET 4;	\
	SAVE_ALL;			\
883
	TRACE_IRQS_OFF			\
Linus Torvalds's avatar
Linus Torvalds committed
884
	movl %esp,%eax;			\
Tejun Heo's avatar
Tejun Heo committed
885
	call fn;			\
886
	jmp ret_from_intr;		\
887 888
	CFI_ENDPROC;			\
ENDPROC(name)
Linus Torvalds's avatar
Linus Torvalds committed
889

Tejun Heo's avatar
Tejun Heo committed
890 891
#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)

Linus Torvalds's avatar
Linus Torvalds committed
892
/* The include is where all of the SMP etc. interrupts come from */
893
#include <asm/entry_arch.h>
Linus Torvalds's avatar
Linus Torvalds committed
894 895

ENTRY(coprocessor_error)
896
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
897
	pushl $0
898
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
899
	pushl $do_coprocessor_error
900
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
901
	jmp error_code
902
	CFI_ENDPROC
903
END(coprocessor_error)
Linus Torvalds's avatar
Linus Torvalds committed
904 905

ENTRY(simd_coprocessor_error)
906
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
907
	pushl $0
908
	CFI_ADJUST_CFA_OFFSET 4
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
#ifdef CONFIG_X86_INVD_BUG
	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
661:	pushl $do_general_protection
662:
.section .altinstructions,"a"
	.balign 4
	.long 661b
	.long 663f
	.byte X86_FEATURE_XMM
	.byte 662b-661b
	.byte 664f-663f
.previous
.section .altinstr_replacement,"ax"
663:	pushl $do_simd_coprocessor_error
664:
.previous
#else
Linus Torvalds's avatar
Linus Torvalds committed
926
	pushl $do_simd_coprocessor_error
927
#endif
928
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
929
	jmp error_code
930
	CFI_ENDPROC
931
END(simd_coprocessor_error)
Linus Torvalds's avatar
Linus Torvalds committed
932 933

ENTRY(device_not_available)
934
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
935
	pushl $-1			# mark this as an int
936
	CFI_ADJUST_CFA_OFFSET 4
937
	pushl $do_device_not_available
938
	CFI_ADJUST_CFA_OFFSET 4
939
	jmp error_code
940
	CFI_ENDPROC
941
END(device_not_available)
Linus Torvalds's avatar
Linus Torvalds committed
942

943 944
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
Ingo Molnar's avatar
Ingo Molnar committed
945
	iret
946 947
.section __ex_table,"a"
	.align 4
Ingo Molnar's avatar
Ingo Molnar committed
948
	.long native_iret, iret_exc
949
.previous
950
END(native_iret)
951

952
ENTRY(native_irq_enable_sysexit)
953 954
	sti
	sysexit
955
END(native_irq_enable_sysexit)
956 957
#endif

Linus Torvalds's avatar
Linus Torvalds committed
958
ENTRY(overflow)
959
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
960
	pushl $0
961
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
962
	pushl $do_overflow
963
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
964
	jmp error_code
965
	CFI_ENDPROC
966
END(overflow)
Linus Torvalds's avatar
Linus Torvalds committed
967 968

ENTRY(bounds)
969
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
970
	pushl $0
971
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
972
	pushl $do_bounds
973
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
974
	jmp error_code
975
	CFI_ENDPROC
976
END(bounds)
Linus Torvalds's avatar
Linus Torvalds committed
977 978

ENTRY(invalid_op)
979
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
980
	pushl $0
981
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
982
	pushl $do_invalid_op
983
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
984
	jmp error_code
985
	CFI_ENDPROC
986
END(invalid_op)
Linus Torvalds's avatar
Linus Torvalds committed
987 988

ENTRY(coprocessor_segment_overrun)
989
	RING0_INT_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
990
	pushl $0
991
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
992
	pushl $do_coprocessor_segment_overrun
993
	CFI_ADJUST_CFA_OFFSET 4
Linus Torvalds's avatar
Linus Torvalds committed
994
	jmp error_code
995
	CFI_ENDPROC
996
END(coprocessor_segment_overrun)
Linus Torvalds's avatar
Linus Torvalds committed
997 998

ENTRY(invalid_TSS)
999
	RING0_EC_FRAME
Linus Torvalds's avatar
Linus Torvalds committed
1000
	pushl $do_invalid_TSS