Skip to content
  • Alexander van Heukelum's avatar
    i386: fix return to 16-bit stack from NMI handler · 2e04bc76
    Alexander van Heukelum authored
    
    
    Returning to a task with a 16-bit stack requires special care: the iret
    instruction does not restore the high word of esp in that case. The
    espfix code fixes this, but currently is not invoked on NMIs. This means
    that a running task gets the upper word of esp clobbered due intervening
    NMIs. To reproduce, compile and run the following program with the nmi
    watchdog enabled (nmi_watchdog=2 on the command line). Using gdb you can
    see that the high bits of esp contain garbage, while the low bits are
    still correct.
    
    This patch puts the espfix code back into the NMI code path.
    
    The patch is slightly complicated due to the irqtrace infrastructure not
    being NMI-safe. The NMI return path cannot call TRACE_IRQS_IRET.
    Otherwise, the tail of the normal iret-code is correct for the nmi code
    path too. To be able to share this code-path, the TRACE_IRQS_IRET was
    move up a bit. The espfix code exists after the TRACE_IRQS_IRET, but
    this code explicitly disables interrupts. This short interrupts-off
    section is now not traced anymore. The return-to-kernel path now always
    includes the preliminary test to decide if the espfix code should be
    called. This is never the case, but doing it this way keeps the patch as
    simple as possible and the few extra instructions should not affect
    timing in any significant way.
    
     #define _GNU_SOURCE
     #include <stdio.h>
     #include <sys/types.h>
     #include <sys/mman.h>
     #include <unistd.h>
     #include <sys/syscall.h>
     #include <asm/ldt.h>
    
    int modify_ldt(int func, void *ptr, unsigned long bytecount)
    {
            return syscall(SYS_modify_ldt, func, ptr, bytecount);
    }
    
    /* this is assumed to be usable */
     #define SEGBASEADDR 0x10000
     #define SEGLIMIT 0x20000
    
    /* 16-bit segment */
    struct user_desc desc = {
            .entry_number = 0,
            .base_addr = SEGBASEADDR,
            .limit = SEGLIMIT,
            .seg_32bit = 0,
            .contents = 0, /* ??? */
            .read_exec_only = 0,
            .limit_in_pages = 0,
            .seg_not_present = 0,
            .useable = 1
    };
    
    int main(void)
    {
            setvbuf(stdout, NULL, _IONBF, 0);
    
            /* map a 64 kb segment */
            char *pointer = mmap((void *)SEGBASEADDR, SEGLIMIT+1,
                            PROT_EXEC|PROT_READ|PROT_WRITE,
                            MAP_SHARED|MAP_ANONYMOUS, -1, 0);
            if (pointer == NULL) {
                    printf("could not map space\n");
                    return 0;
            }
    
            /* write ldt, new mode */
            int err = modify_ldt(0x11, &desc, sizeof(desc));
            if (err) {
                    printf("error modifying ldt: %i\n", err);
                    return 0;
            }
    
            for (int i=0; i<1000; i++) {
            asm volatile (
                    "pusha\n\t"
                    "mov %ss, %eax\n\t" /* preserve ss:esp */
                    "mov %esp, %ebp\n\t"
                    "push $7\n\t" /* index 0, ldt, user mode */
                    "push $65536-4096\n\t" /* esp */
                    "lss (%esp), %esp\n\t" /* switch to new stack */
                    "push %eax\n\t" /* save old ss:esp on new stack */
                    "push %ebp\n\t"
                    "add $17*65536, %esp\n\t" /* set high bits */
                    "mov %esp, %edx\n\t"
    
                    "mov $10000000, %ecx\n\t" /* wait... */
                    "1: loop 1b\n\t" /* ... a bit */
    
                    "cmp %esp, %edx\n\t"
                    "je 1f\n\t"
                    "ud2\n\t" /* esp changed inexplicably! */
                    "1:\n\t"
                    "sub $17*65536, %esp\n\t" /* restore high bits */
                    "lss (%esp), %esp\n\t" /* restore old ss:esp */
                    "popa\n\t");
    
                    printf("\rx%ix", i);
            }
    
            return 0;
    }
    
    Signed-off-by: default avatarAlexander van Heukelum <heukelum@fastmail.fm>
    Acked-by: default avatarStas Sergeev <stsp@aknet.ru>
    Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
    2e04bc76