
/* [<][>][^][v][top][bottom][index][help] */


This source file includes following definitions.
  1. GDT3
  2. LDT3
  3. sethandler
  4. clearhandler
  5. add_ldt
  6. setup_ldt
  7. ssptr
  8. csptr
  9. ssptr
  10. csptr
  11. cs_bitness
  12. is_valid_ss
  13. validate_signal_ss
  14. sigusr1
  15. sigtrap
  16. sigusr2
  17. test_nonstrict_ss
  18. find_cs
  19. test_valid_sigreturn
  20. test_bad_iret
  21. main

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
   4  * Copyright (c) 2014-2015 Andrew Lutomirski
   5  *
   6  * This is a series of tests that exercises the sigreturn(2) syscall and
   7  * the IRET / SYSRET paths in the kernel.
   8  *
   9  * For now, this focuses on the effects of unusual CS and SS values,
  10  * and it has a bunch of tests to make sure that ESP/RSP is restored
  11  * properly.
  12  *
  13  * The basic idea behind these tests is to raise(SIGUSR1) to create a
  14  * sigcontext frame, plug in the values to be tested, and then return,
  15  * which implicitly invokes sigreturn(2) and programs the user context
  16  * as desired.
  17  *
  18  * For tests for which we expect sigreturn and the subsequent return to
  19  * user mode to succeed, we return to a short trampoline that generates
  20  * SIGTRAP so that the meat of the tests can be ordinary C code in a
  21  * SIGTRAP handler.
  22  *
  23  * The inner workings of each test is documented below.
  24  *
  25  * Do not run on outdated, unpatched kernels at risk of nasty crashes.
  26  */
  28 #define _GNU_SOURCE
  30 #include <sys/time.h>
  31 #include <time.h>
  32 #include <stdlib.h>
  33 #include <sys/syscall.h>
  34 #include <unistd.h>
  35 #include <stdio.h>
  36 #include <string.h>
  37 #include <inttypes.h>
  38 #include <sys/mman.h>
  39 #include <sys/signal.h>
  40 #include <sys/ucontext.h>
  41 #include <asm/ldt.h>
  42 #include <err.h>
  43 #include <setjmp.h>
  44 #include <stddef.h>
  45 #include <stdbool.h>
  46 #include <sys/ptrace.h>
  47 #include <sys/user.h>
  49 /* Pull in AR_xyz defines. */
  50 typedef unsigned int u32;
  51 typedef unsigned short u16;
  52 #include "../../../../arch/x86/include/asm/desc_defs.h"
  54 /*
  55  * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
  56  * headers.
  57  */
  58 #ifdef __x86_64__
  59 /*
  60  * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
  61  * kernels that save SS in the sigcontext.  All kernels that set
  62  * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
  63  * regardless of SS (i.e. they implement espfix).
  64  *
  65  * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
  66  * when delivering a signal that came from 64-bit code.
  67  *
  68  * Sigreturn restores SS as follows:
  69  *
  70  * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
  71  *     saved CS is not 64-bit)
  72  *         new SS = saved SS  (will fail IRET and signal if invalid)
  73  * else
  74  *         new SS = a flat 32-bit data segment
  75  */
  76 #define UC_SIGCONTEXT_SS       0x2
  77 #define UC_STRICT_RESTORE_SS   0x4
  78 #endif
  80 /*
  81  * In principle, this test can run on Linux emulation layers (e.g.
  82  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
  83  * entries 0-5 for their own internal purposes, so start our LDT
  84  * allocations above that reservation.  (The tests don't pass on LX
  85  * branded zones, but at least this lets them run.)
  86  */
  87 #define LDT_OFFSET 6
  89 /* An aligned stack accessible through some of our segments. */
  90 static unsigned char stack16[65536] __attribute__((aligned(4096)));
  92 /*
  93  * An aligned int3 instruction used as a trampoline.  Some of the tests
  94  * want to fish out their ss values, so this trampoline copies ss to eax
  95  * before the int3.
  96  */
  97 asm (".pushsection .text\n\t"
  98      ".type int3, @function\n\t"
  99      ".align 4096\n\t"
 100      "int3:\n\t"
 101      "mov %ss,%ecx\n\t"
 102      "int3\n\t"
 103      ".size int3, . - int3\n\t"
 104      ".align 4096, 0xcc\n\t"
 105      ".popsection");
 106 extern char int3[4096];
 108 /*
 109  * At startup, we prepapre:
 110  *
 111  * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
 112  *   descriptor or out of bounds).
 113  * - code16_sel: A 16-bit LDT code segment pointing to int3.
 114  * - data16_sel: A 16-bit LDT data segment pointing to stack16.
 115  * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
 116  * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
 117  * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
 118  * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
 119  *   stack16.
 120  *
 121  * For no particularly good reason, xyz_sel is a selector value with the
 122  * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
 123  * descriptor table.  These variables will be zero if their respective
 124  * segments could not be allocated.
 125  */
 126 static unsigned short ldt_nonexistent_sel;
 127 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
 129 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
 131 static unsigned short GDT3(int idx)
 132 {
 133         return (idx << 3) | 3;
 134 }
 136 static unsigned short LDT3(int idx)
 137 {
 138         return (idx << 3) | 7;
 139 }
 141 /* Our sigaltstack scratch space. */
 142 static char altstack_data[SIGSTKSZ];
 144 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 145                        int flags)
 146 {
 147         struct sigaction sa;
 148         memset(&sa, 0, sizeof(sa));
 149         sa.sa_sigaction = handler;
 150         sa.sa_flags = SA_SIGINFO | flags;
 151         sigemptyset(&sa.sa_mask);
 152         if (sigaction(sig, &sa, 0))
 153                 err(1, "sigaction");
 154 }
 156 static void clearhandler(int sig)
 157 {
 158         struct sigaction sa;
 159         memset(&sa, 0, sizeof(sa));
 160         sa.sa_handler = SIG_DFL;
 161         sigemptyset(&sa.sa_mask);
 162         if (sigaction(sig, &sa, 0))
 163                 err(1, "sigaction");
 164 }
 166 static void add_ldt(const struct user_desc *desc, unsigned short *var,
 167                     const char *name)
 168 {
 169         if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
 170                 *var = LDT3(desc->entry_number);
 171         } else {
 172                 printf("[NOTE]\tFailed to create %s segment\n", name);
 173                 *var = 0;
 174         }
 175 }
 177 static void setup_ldt(void)
 178 {
 179         if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
 180                 errx(1, "stack16 is too high\n");
 181         if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
 182                 errx(1, "int3 is too high\n");
 184         ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
 186         const struct user_desc code16_desc = {
 187                 .entry_number    = LDT_OFFSET + 0,
 188                 .base_addr       = (unsigned long)int3,
 189                 .limit           = 4095,
 190                 .seg_32bit       = 0,
 191                 .contents        = 2, /* Code, not conforming */
 192                 .read_exec_only  = 0,
 193                 .limit_in_pages  = 0,
 194                 .seg_not_present = 0,
 195                 .useable         = 0
 196         };
 197         add_ldt(&code16_desc, &code16_sel, "code16");
 199         const struct user_desc data16_desc = {
 200                 .entry_number    = LDT_OFFSET + 1,
 201                 .base_addr       = (unsigned long)stack16,
 202                 .limit           = 0xffff,
 203                 .seg_32bit       = 0,
 204                 .contents        = 0, /* Data, grow-up */
 205                 .read_exec_only  = 0,
 206                 .limit_in_pages  = 0,
 207                 .seg_not_present = 0,
 208                 .useable         = 0
 209         };
 210         add_ldt(&data16_desc, &data16_sel, "data16");
 212         const struct user_desc npcode32_desc = {
 213                 .entry_number    = LDT_OFFSET + 3,
 214                 .base_addr       = (unsigned long)int3,
 215                 .limit           = 4095,
 216                 .seg_32bit       = 1,
 217                 .contents        = 2, /* Code, not conforming */
 218                 .read_exec_only  = 0,
 219                 .limit_in_pages  = 0,
 220                 .seg_not_present = 1,
 221                 .useable         = 0
 222         };
 223         add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
 225         const struct user_desc npdata32_desc = {
 226                 .entry_number    = LDT_OFFSET + 4,
 227                 .base_addr       = (unsigned long)stack16,
 228                 .limit           = 0xffff,
 229                 .seg_32bit       = 1,
 230                 .contents        = 0, /* Data, grow-up */
 231                 .read_exec_only  = 0,
 232                 .limit_in_pages  = 0,
 233                 .seg_not_present = 1,
 234                 .useable         = 0
 235         };
 236         add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
 238         struct user_desc gdt_data16_desc = {
 239                 .entry_number    = -1,
 240                 .base_addr       = (unsigned long)stack16,
 241                 .limit           = 0xffff,
 242                 .seg_32bit       = 0,
 243                 .contents        = 0, /* Data, grow-up */
 244                 .read_exec_only  = 0,
 245                 .limit_in_pages  = 0,
 246                 .seg_not_present = 0,
 247                 .useable         = 0
 248         };
 250         if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
 251                 /*
 252                  * This probably indicates vulnerability to CVE-2014-8133.
 253                  * Merely getting here isn't definitive, though, and we'll
 254                  * diagnose the problem for real later on.
 255                  */
 256                 printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
 257                        gdt_data16_desc.entry_number);
 258                 gdt_data16_idx = gdt_data16_desc.entry_number;
 259         } else {
 260                 printf("[OK]\tset_thread_area refused 16-bit data\n");
 261         }
 263         struct user_desc gdt_npdata32_desc = {
 264                 .entry_number    = -1,
 265                 .base_addr       = (unsigned long)stack16,
 266                 .limit           = 0xffff,
 267                 .seg_32bit       = 1,
 268                 .contents        = 0, /* Data, grow-up */
 269                 .read_exec_only  = 0,
 270                 .limit_in_pages  = 0,
 271                 .seg_not_present = 1,
 272                 .useable         = 0
 273         };
 275         if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
 276                 /*
 277                  * As a hardening measure, newer kernels don't allow this.
 278                  */
 279                 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
 280                        gdt_npdata32_desc.entry_number);
 281                 gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
 282         } else {
 283                 printf("[OK]\tset_thread_area refused 16-bit data\n");
 284         }
 285 }
 287 /* State used by our signal handlers. */
 288 static gregset_t initial_regs, requested_regs, resulting_regs;
 290 /* Instructions for the SIGUSR1 handler. */
 291 static volatile unsigned short sig_cs, sig_ss;
 292 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
 293 #ifdef __x86_64__
 294 static volatile sig_atomic_t sig_corrupt_final_ss;
 295 #endif
 297 /* Abstractions for some 32-bit vs 64-bit differences. */
 298 #ifdef __x86_64__
 299 # define REG_IP REG_RIP
 300 # define REG_SP REG_RSP
 301 # define REG_CX REG_RCX
 303 struct selectors {
 304         unsigned short cs, gs, fs, ss;
 305 };
 307 static unsigned short *ssptr(ucontext_t *ctx)
 308 {
 309         struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
 310         return &sels->ss;
 311 }
 313 static unsigned short *csptr(ucontext_t *ctx)
 314 {
 315         struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
 316         return &sels->cs;
 317 }
 318 #else
 319 # define REG_IP REG_EIP
 320 # define REG_SP REG_ESP
 321 # define REG_CX REG_ECX
 323 static greg_t *ssptr(ucontext_t *ctx)
 324 {
 325         return &ctx->uc_mcontext.gregs[REG_SS];
 326 }
 328 static greg_t *csptr(ucontext_t *ctx)
 329 {
 330         return &ctx->uc_mcontext.gregs[REG_CS];
 331 }
 332 #endif
 334 /*
 335  * Checks a given selector for its code bitness or returns -1 if it's not
 336  * a usable code segment selector.
 337  */
 338 int cs_bitness(unsigned short cs)
 339 {
 340         uint32_t valid = 0, ar;
 341         asm ("lar %[cs], %[ar]\n\t"
 342              "jnz 1f\n\t"
 343              "mov $1, %[valid]\n\t"
 344              "1:"
 345              : [ar] "=r" (ar), [valid] "+rm" (valid)
 346              : [cs] "r" (cs));
 348         if (!valid)
 349                 return -1;
 351         bool db = (ar & (1 << 22));
 352         bool l = (ar & (1 << 21));
 354         if (!(ar & (1<<11)))
 355             return -1;  /* Not code. */
 357         if (l && !db)
 358                 return 64;
 359         else if (!l && db)
 360                 return 32;
 361         else if (!l && !db)
 362                 return 16;
 363         else
 364                 return -1;      /* Unknown bitness. */
 365 }
 367 /*
 368  * Checks a given selector for its code bitness or returns -1 if it's not
 369  * a usable code segment selector.
 370  */
 371 bool is_valid_ss(unsigned short cs)
 372 {
 373         uint32_t valid = 0, ar;
 374         asm ("lar %[cs], %[ar]\n\t"
 375              "jnz 1f\n\t"
 376              "mov $1, %[valid]\n\t"
 377              "1:"
 378              : [ar] "=r" (ar), [valid] "+rm" (valid)
 379              : [cs] "r" (cs));
 381         if (!valid)
 382                 return false;
 384         if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
 385             (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
 386                 return false;
 388         return (ar & AR_P);
 389 }
 391 /* Number of errors in the current test case. */
 392 static volatile sig_atomic_t nerrs;
 394 static void validate_signal_ss(int sig, ucontext_t *ctx)
 395 {
 396 #ifdef __x86_64__
 397         bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
 399         if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
 400                 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
 401                 nerrs++;
 403                 /*
 404                  * This happens on Linux 4.1.  The rest will fail, too, so
 405                  * return now to reduce the noise.
 406                  */
 407                 return;
 408         }
 410         /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
 411         if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
 412                 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
 413                        sig);
 414                 nerrs++;
 415         }
 417         if (is_valid_ss(*ssptr(ctx))) {
 418                 /*
 419                  * DOSEMU was written before 64-bit sigcontext had SS, and
 420                  * it tries to figure out the signal source SS by looking at
 421                  * the physical register.  Make sure that keeps working.
 422                  */
 423                 unsigned short hw_ss;
 424                 asm ("mov %%ss, %0" : "=rm" (hw_ss));
 425                 if (hw_ss != *ssptr(ctx)) {
 426                         printf("[FAIL]\tHW SS didn't match saved SS\n");
 427                         nerrs++;
 428                 }
 429         }
 430 #endif
 431 }
 433 /*
 434  * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
 435  * int3 trampoline.  Sets SP to a large known value so that we can see
 436  * whether the value round-trips back to user mode correctly.
 437  */
 438 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
 439 {
 440         ucontext_t *ctx = (ucontext_t*)ctx_void;
 442         validate_signal_ss(sig, ctx);
 444         memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 446         *csptr(ctx) = sig_cs;
 447         *ssptr(ctx) = sig_ss;
 449         ctx->uc_mcontext.gregs[REG_IP] =
 450                 sig_cs == code16_sel ? 0 : (unsigned long)&int3;
 451         ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
 452         ctx->uc_mcontext.gregs[REG_CX] = 0;
 454 #ifdef __i386__
 455         /*
 456          * Make sure the kernel doesn't inadvertently use DS or ES-relative
 457          * accesses in a region where user DS or ES is loaded.
 458          *
 459          * Skip this for 64-bit builds because long mode doesn't care about
 460          * DS and ES and skipping it increases test coverage a little bit,
 461          * since 64-bit kernels can still run the 32-bit build.
 462          */
 463         ctx->uc_mcontext.gregs[REG_DS] = 0;
 464         ctx->uc_mcontext.gregs[REG_ES] = 0;
 465 #endif
 467         memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 468         requested_regs[REG_CX] = *ssptr(ctx);   /* The asm code does this. */
 470         return;
 471 }
 473 /*
 474  * Called after a successful sigreturn (via int3) or from a failed
 475  * sigreturn (directly by kernel).  Restores our state so that the
 476  * original raise(SIGUSR1) returns.
 477  */
 478 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 479 {
 480         ucontext_t *ctx = (ucontext_t*)ctx_void;
 482         validate_signal_ss(sig, ctx);
 484         sig_err = ctx->uc_mcontext.gregs[REG_ERR];
 485         sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
 487         unsigned short ss;
 488         asm ("mov %%ss,%0" : "=r" (ss));
 490         greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
 491         if (asm_ss != sig_ss && sig == SIGTRAP) {
 492                 /* Sanity check failure. */
 493                 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
 494                        ss, *ssptr(ctx), (unsigned long long)asm_ss);
 495                 nerrs++;
 496         }
 498         memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 499         memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
 501 #ifdef __x86_64__
 502         if (sig_corrupt_final_ss) {
 503                 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
 504                         printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
 505                         nerrs++;
 506                 } else {
 507                         /*
 508                          * DOSEMU transitions from 32-bit to 64-bit mode by
 509                          * adjusting sigcontext, and it requires that this work
 510                          * even if the saved SS is bogus.
 511                          */
 512                         printf("\tCorrupting SS on return to 64-bit mode\n");
 513                         *ssptr(ctx) = 0;
 514                 }
 515         }
 516 #endif
 518         sig_trapped = sig;
 519 }
 521 #ifdef __x86_64__
 522 /* Tests recovery if !UC_STRICT_RESTORE_SS */
 523 static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
 524 {
 525         ucontext_t *ctx = (ucontext_t*)ctx_void;
 527         if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
 528                 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
 529                 nerrs++;
 530                 return;  /* We can't do the rest. */
 531         }
 533         ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
 534         *ssptr(ctx) = 0;
 536         /* Return.  The kernel should recover without sending another signal. */
 537 }
 539 static int test_nonstrict_ss(void)
 540 {
 541         clearhandler(SIGUSR1);
 542         clearhandler(SIGTRAP);
 543         clearhandler(SIGSEGV);
 544         clearhandler(SIGILL);
 545         sethandler(SIGUSR2, sigusr2, 0);
 547         nerrs = 0;
 549         printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
 550         raise(SIGUSR2);
 551         if (!nerrs)
 552                 printf("[OK]\tIt worked\n");
 554         return nerrs;
 555 }
 556 #endif
 558 /* Finds a usable code segment of the requested bitness. */
 559 int find_cs(int bitness)
 560 {
 561         unsigned short my_cs;
 563         asm ("mov %%cs,%0" :  "=r" (my_cs));
 565         if (cs_bitness(my_cs) == bitness)
 566                 return my_cs;
 567         if (cs_bitness(my_cs + (2 << 3)) == bitness)
 568                 return my_cs + (2 << 3);
 569         if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
 570             return my_cs - (2 << 3);
 571         if (cs_bitness(code16_sel) == bitness)
 572                 return code16_sel;
 574         printf("[WARN]\tCould not find %d-bit CS\n", bitness);
 575         return -1;
 576 }
 578 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 579 {
 580         int cs = find_cs(cs_bits);
 581         if (cs == -1) {
 582                 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
 583                        cs_bits, use_16bit_ss ? 16 : 32);
 584                 return 0;
 585         }
 587         if (force_ss != -1) {
 588                 sig_ss = force_ss;
 589         } else {
 590                 if (use_16bit_ss) {
 591                         if (!data16_sel) {
 592                                 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
 593                                        cs_bits);
 594                                 return 0;
 595                         }
 596                         sig_ss = data16_sel;
 597                 } else {
 598                         asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
 599                 }
 600         }
 602         sig_cs = cs;
 604         printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
 605                cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
 606                (sig_ss & 4) ? "" : ", GDT");
 608         raise(SIGUSR1);
 610         nerrs = 0;
 612         /*
 613          * Check that each register had an acceptable value when the
 614          * int3 trampoline was invoked.
 615          */
 616         for (int i = 0; i < NGREG; i++) {
 617                 greg_t req = requested_regs[i], res = resulting_regs[i];
 619                 if (i == REG_TRAPNO || i == REG_IP)
 620                         continue;       /* don't care */
 622                 if (i == REG_SP) {
 623                         /*
 624                          * If we were using a 16-bit stack segment, then
 625                          * the kernel is a bit stuck: IRET only restores
 626                          * the low 16 bits of ESP/RSP if SS is 16-bit.
 627                          * The kernel uses a hack to restore bits 31:16,
 628                          * but that hack doesn't help with bits 63:32.
 629                          * On Intel CPUs, bits 63:32 end up zeroed, and, on
 630                          * AMD CPUs, they leak the high bits of the kernel
 631                          * espfix64 stack pointer.  There's very little that
 632                          * the kernel can do about it.
 633                          *
 634                          * Similarly, if we are returning to a 32-bit context,
 635                          * the CPU will often lose the high 32 bits of RSP.
 636                          */
 638                         if (res == req)
 639                                 continue;
 641                         if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
 642                                 printf("[NOTE]\tSP: %llx -> %llx\n",
 643                                        (unsigned long long)req,
 644                                        (unsigned long long)res);
 645                                 continue;
 646                         }
 648                         printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
 649                                (unsigned long long)requested_regs[i],
 650                                (unsigned long long)resulting_regs[i]);
 651                         nerrs++;
 652                         continue;
 653                 }
 655                 bool ignore_reg = false;
 656 #if __i386__
 657                 if (i == REG_UESP)
 658                         ignore_reg = true;
 659 #else
 660                 if (i == REG_CSGSFS) {
 661                         struct selectors *req_sels =
 662                                 (void *)&requested_regs[REG_CSGSFS];
 663                         struct selectors *res_sels =
 664                                 (void *)&resulting_regs[REG_CSGSFS];
 665                         if (req_sels->cs != res_sels->cs) {
 666                                 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
 667                                        req_sels->cs, res_sels->cs);
 668                                 nerrs++;
 669                         }
 671                         if (req_sels->ss != res_sels->ss) {
 672                                 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
 673                                        req_sels->ss, res_sels->ss);
 674                                 nerrs++;
 675                         }
 677                         continue;
 678                 }
 679 #endif
 681                 /* Sanity check on the kernel */
 682                 if (i == REG_CX && req != res) {
 683                         printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
 684                                (unsigned long long)req,
 685                                (unsigned long long)res);
 686                         nerrs++;
 687                         continue;
 688                 }
 690                 if (req != res && !ignore_reg) {
 691                         printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
 692                                i, (unsigned long long)req,
 693                                (unsigned long long)res);
 694                         nerrs++;
 695                 }
 696         }
 698         if (nerrs == 0)
 699                 printf("[OK]\tall registers okay\n");
 701         return nerrs;
 702 }
 704 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
 705 {
 706         int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
 707         if (cs == -1)
 708                 return 0;
 710         sig_cs = cs;
 711         sig_ss = ss;
 713         printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
 714                cs_bits, sig_cs, sig_ss);
 716         sig_trapped = 0;
 717         raise(SIGUSR1);
 718         if (sig_trapped) {
 719                 char errdesc[32] = "";
 720                 if (sig_err) {
 721                         const char *src = (sig_err & 1) ? " EXT" : "";
 722                         const char *table;
 723                         if ((sig_err & 0x6) == 0x0)
 724                                 table = "GDT";
 725                         else if ((sig_err & 0x6) == 0x4)
 726                                 table = "LDT";
 727                         else if ((sig_err & 0x6) == 0x2)
 728                                 table = "IDT";
 729                         else
 730                                 table = "???";
 732                         sprintf(errdesc, "%s%s index %d, ",
 733                                 table, src, sig_err >> 3);
 734                 }
 736                 char trapname[32];
 737                 if (sig_trapno == 13)
 738                         strcpy(trapname, "GP");
 739                 else if (sig_trapno == 11)
 740                         strcpy(trapname, "NP");
 741                 else if (sig_trapno == 12)
 742                         strcpy(trapname, "SS");
 743                 else if (sig_trapno == 32)
 744                         strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
 745                 else
 746                         sprintf(trapname, "%d", sig_trapno);
 748                 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
 749                        trapname, (unsigned long)sig_err,
 750                        errdesc, strsignal(sig_trapped));
 751                 return 0;
 752         } else {
 753                 /*
 754                  * This also implicitly tests UC_STRICT_RESTORE_SS:
 755                  * We check that these signals set UC_STRICT_RESTORE_SS and,
 756                  * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
 757                  * then we won't get SIGSEGV.
 758                  */
 759                 printf("[FAIL]\tDid not get SIGSEGV\n");
 760                 return 1;
 761         }
 762 }
 764 int main()
 765 {
 766         int total_nerrs = 0;
 767         unsigned short my_cs, my_ss;
 769         asm volatile ("mov %%cs,%0" : "=r" (my_cs));
 770         asm volatile ("mov %%ss,%0" : "=r" (my_ss));
 771         setup_ldt();
 773         stack_t stack = {
 774                 .ss_sp = altstack_data,
 775                 .ss_size = SIGSTKSZ,
 776         };
 777         if (sigaltstack(&stack, NULL) != 0)
 778                 err(1, "sigaltstack");
 780         sethandler(SIGUSR1, sigusr1, 0);
 781         sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
 783         /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
 784         total_nerrs += test_valid_sigreturn(64, false, -1);
 785         total_nerrs += test_valid_sigreturn(32, false, -1);
 786         total_nerrs += test_valid_sigreturn(16, false, -1);
 788         /*
 789          * Test easy espfix cases: return to a 16-bit LDT SS in each possible
 790          * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
 791          *
 792          * This catches the original missing-espfix-on-64-bit-kernels issue
 793          * as well as CVE-2014-8134.
 794          */
 795         total_nerrs += test_valid_sigreturn(64, true, -1);
 796         total_nerrs += test_valid_sigreturn(32, true, -1);
 797         total_nerrs += test_valid_sigreturn(16, true, -1);
 799         if (gdt_data16_idx) {
 800                 /*
 801                  * For performance reasons, Linux skips espfix if SS points
 802                  * to the GDT.  If we were able to allocate a 16-bit SS in
 803                  * the GDT, see if it leaks parts of the kernel stack pointer.
 804                  *
 805                  * This tests for CVE-2014-8133.
 806                  */
 807                 total_nerrs += test_valid_sigreturn(64, true,
 808                                                     GDT3(gdt_data16_idx));
 809                 total_nerrs += test_valid_sigreturn(32, true,
 810                                                     GDT3(gdt_data16_idx));
 811                 total_nerrs += test_valid_sigreturn(16, true,
 812                                                     GDT3(gdt_data16_idx));
 813         }
 815 #ifdef __x86_64__
 816         /* Nasty ABI case: check SS corruption handling. */
 817         sig_corrupt_final_ss = 1;
 818         total_nerrs += test_valid_sigreturn(32, false, -1);
 819         total_nerrs += test_valid_sigreturn(32, true, -1);
 820         sig_corrupt_final_ss = 0;
 821 #endif
 823         /*
 824          * We're done testing valid sigreturn cases.  Now we test states
 825          * for which sigreturn itself will succeed but the subsequent
 826          * entry to user mode will fail.
 827          *
 828          * Depending on the failure mode and the kernel bitness, these
 829          * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
 830          */
 831         clearhandler(SIGTRAP);
 832         sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
 833         sethandler(SIGBUS, sigtrap, SA_ONSTACK);
 834         sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
 836         /* Easy failures: invalid SS, resulting in #GP(0) */
 837         test_bad_iret(64, ldt_nonexistent_sel, -1);
 838         test_bad_iret(32, ldt_nonexistent_sel, -1);
 839         test_bad_iret(16, ldt_nonexistent_sel, -1);
 841         /* These fail because SS isn't a data segment, resulting in #GP(SS) */
 842         test_bad_iret(64, my_cs, -1);
 843         test_bad_iret(32, my_cs, -1);
 844         test_bad_iret(16, my_cs, -1);
 846         /* Try to return to a not-present code segment, triggering #NP(SS). */
 847         test_bad_iret(32, my_ss, npcode32_sel);
 849         /*
 850          * Try to return to a not-present but otherwise valid data segment.
 851          * This will cause IRET to fail with #SS on the espfix stack.  This
 852          * exercises CVE-2014-9322.
 853          *
 854          * Note that, if espfix is enabled, 64-bit Linux will lose track
 855          * of the actual cause of failure and report #GP(0) instead.
 856          * This would be very difficult for Linux to avoid, because
 857          * espfix64 causes IRET failures to be promoted to #DF, so the
 858          * original exception frame is never pushed onto the stack.
 859          */
 860         test_bad_iret(32, npdata32_sel, -1);
 862         /*
 863          * Try to return to a not-present but otherwise valid data
 864          * segment without invoking espfix.  Newer kernels don't allow
 865          * this to happen in the first place.  On older kernels, though,
 866          * this can trigger CVE-2014-9322.
 867          */
 868         if (gdt_npdata32_idx)
 869                 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
 871 #ifdef __x86_64__
 872         total_nerrs += test_nonstrict_ss();
 873 #endif
 875         return total_nerrs ? 1 : 0;
 876 }

/* [<][>][^][v][top][bottom][index][help] */