root/tools/testing/selftests/x86/fsgsbase.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sethandler
  2. clearhandler
  3. sigsegv
  4. sigill
  5. rdgsbase
  6. rdfsbase
  7. wrgsbase
  8. wrfsbase
  9. read_base
  10. check_gs_value
  11. mov_0_gs
  12. do_remote_base
  13. load_gs
  14. test_wrbase
  15. threadproc
  16. set_gs_and_switch_to
  17. test_unexpected_base
  18. test_ptrace_write_gsbase
  19. main

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * fsgsbase.c, an fsgsbase test
   4  * Copyright (c) 2014-2016 Andy Lutomirski
   5  */
   6 
   7 #define _GNU_SOURCE
   8 #include <stdio.h>
   9 #include <stdlib.h>
  10 #include <stdbool.h>
  11 #include <string.h>
  12 #include <sys/syscall.h>
  13 #include <unistd.h>
  14 #include <err.h>
  15 #include <sys/user.h>
  16 #include <asm/prctl.h>
  17 #include <sys/prctl.h>
  18 #include <signal.h>
  19 #include <limits.h>
  20 #include <sys/ucontext.h>
  21 #include <sched.h>
  22 #include <linux/futex.h>
  23 #include <pthread.h>
  24 #include <asm/ldt.h>
  25 #include <sys/mman.h>
  26 #include <stddef.h>
  27 #include <sys/ptrace.h>
  28 #include <sys/wait.h>
  29 #include <setjmp.h>
  30 
  31 #ifndef __x86_64__
  32 # error This test is 64-bit only
  33 #endif
  34 
  35 static volatile sig_atomic_t want_segv;
  36 static volatile unsigned long segv_addr;
  37 
  38 static unsigned short *shared_scratch;
  39 
  40 static int nerrs;
  41 
  42 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
  43                        int flags)
  44 {
  45         struct sigaction sa;
  46         memset(&sa, 0, sizeof(sa));
  47         sa.sa_sigaction = handler;
  48         sa.sa_flags = SA_SIGINFO | flags;
  49         sigemptyset(&sa.sa_mask);
  50         if (sigaction(sig, &sa, 0))
  51                 err(1, "sigaction");
  52 }
  53 
  54 static void clearhandler(int sig)
  55 {
  56         struct sigaction sa;
  57         memset(&sa, 0, sizeof(sa));
  58         sa.sa_handler = SIG_DFL;
  59         sigemptyset(&sa.sa_mask);
  60         if (sigaction(sig, &sa, 0))
  61                 err(1, "sigaction");
  62 }
  63 
  64 static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
  65 {
  66         ucontext_t *ctx = (ucontext_t*)ctx_void;
  67 
  68         if (!want_segv) {
  69                 clearhandler(SIGSEGV);
  70                 return;  /* Crash cleanly. */
  71         }
  72 
  73         want_segv = false;
  74         segv_addr = (unsigned long)si->si_addr;
  75 
  76         ctx->uc_mcontext.gregs[REG_RIP] += 4;   /* Skip the faulting mov */
  77 
  78 }
  79 
  80 static jmp_buf jmpbuf;
  81 
  82 static void sigill(int sig, siginfo_t *si, void *ctx_void)
  83 {
  84         siglongjmp(jmpbuf, 1);
  85 }
  86 
  87 static bool have_fsgsbase;
  88 
  89 static inline unsigned long rdgsbase(void)
  90 {
  91         unsigned long gsbase;
  92 
  93         asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
  94 
  95         return gsbase;
  96 }
  97 
  98 static inline unsigned long rdfsbase(void)
  99 {
 100         unsigned long fsbase;
 101 
 102         asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
 103 
 104         return fsbase;
 105 }
 106 
 107 static inline void wrgsbase(unsigned long gsbase)
 108 {
 109         asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
 110 }
 111 
 112 static inline void wrfsbase(unsigned long fsbase)
 113 {
 114         asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
 115 }
 116 
 117 enum which_base { FS, GS };
 118 
 119 static unsigned long read_base(enum which_base which)
 120 {
 121         unsigned long offset;
 122         /*
 123          * Unless we have FSGSBASE, there's no direct way to do this from
 124          * user mode.  We can get at it indirectly using signals, though.
 125          */
 126 
 127         want_segv = true;
 128 
 129         offset = 0;
 130         if (which == FS) {
 131                 /* Use a constant-length instruction here. */
 132                 asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
 133         } else {
 134                 asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
 135         }
 136         if (!want_segv)
 137                 return segv_addr + offset;
 138 
 139         /*
 140          * If that didn't segfault, try the other end of the address space.
 141          * Unless we get really unlucky and run into the vsyscall page, this
 142          * is guaranteed to segfault.
 143          */
 144 
 145         offset = (ULONG_MAX >> 1) + 1;
 146         if (which == FS) {
 147                 asm volatile ("mov %%fs:(%%rcx), %%rax"
 148                               : : "c" (offset) : "rax");
 149         } else {
 150                 asm volatile ("mov %%gs:(%%rcx), %%rax"
 151                               : : "c" (offset) : "rax");
 152         }
 153         if (!want_segv)
 154                 return segv_addr + offset;
 155 
 156         abort();
 157 }
 158 
 159 static void check_gs_value(unsigned long value)
 160 {
 161         unsigned long base;
 162         unsigned short sel;
 163 
 164         printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
 165         if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
 166                 err(1, "ARCH_SET_GS");
 167 
 168         asm volatile ("mov %%gs, %0" : "=rm" (sel));
 169         base = read_base(GS);
 170         if (base == value) {
 171                 printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
 172                        sel);
 173         } else {
 174                 nerrs++;
 175                 printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
 176                        base, sel);
 177         }
 178 
 179         if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
 180                 err(1, "ARCH_GET_GS");
 181         if (base == value) {
 182                 printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
 183                        sel);
 184         } else {
 185                 nerrs++;
 186                 printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
 187                        base, sel);
 188         }
 189 }
 190 
 191 static void mov_0_gs(unsigned long initial_base, bool schedule)
 192 {
 193         unsigned long base, arch_base;
 194 
 195         printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
 196         if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
 197                 err(1, "ARCH_SET_GS");
 198 
 199         if (schedule)
 200                 usleep(10);
 201 
 202         asm volatile ("mov %0, %%gs" : : "rm" (0));
 203         base = read_base(GS);
 204         if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
 205                 err(1, "ARCH_GET_GS");
 206         if (base == arch_base) {
 207                 printf("[OK]\tGSBASE is 0x%lx\n", base);
 208         } else {
 209                 nerrs++;
 210                 printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
 211         }
 212 }
 213 
 214 static volatile unsigned long remote_base;
 215 static volatile bool remote_hard_zero;
 216 static volatile unsigned int ftx;
 217 
 218 /*
 219  * ARCH_SET_FS/GS(0) may or may not program a selector of zero.  HARD_ZERO
 220  * means to force the selector to zero to improve test coverage.
 221  */
 222 #define HARD_ZERO 0xa1fa5f343cb85fa4
 223 
 224 static void do_remote_base()
 225 {
 226         unsigned long to_set = remote_base;
 227         bool hard_zero = false;
 228         if (to_set == HARD_ZERO) {
 229                 to_set = 0;
 230                 hard_zero = true;
 231         }
 232 
 233         if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
 234                 err(1, "ARCH_SET_GS");
 235 
 236         if (hard_zero)
 237                 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
 238 
 239         unsigned short sel;
 240         asm volatile ("mov %%gs, %0" : "=rm" (sel));
 241         printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
 242                to_set, hard_zero ? " and clear gs" : "", sel);
 243 }
 244 
 245 static __thread int set_thread_area_entry_number = -1;
 246 
 247 static unsigned short load_gs(void)
 248 {
 249         /*
 250          * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
 251          * that GSBASE == 0 (i.e. thread.gsbase == 0).
 252          */
 253 
 254         /* Step 1: tell the kernel that we have GSBASE == 0. */
 255         if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
 256                 err(1, "ARCH_SET_GS");
 257 
 258         /* Step 2: change GSBASE without telling the kernel. */
 259         struct user_desc desc = {
 260                 .entry_number    = 0,
 261                 .base_addr       = 0xBAADF00D,
 262                 .limit           = 0xfffff,
 263                 .seg_32bit       = 1,
 264                 .contents        = 0, /* Data, grow-up */
 265                 .read_exec_only  = 0,
 266                 .limit_in_pages  = 1,
 267                 .seg_not_present = 0,
 268                 .useable         = 0
 269         };
 270         if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
 271                 printf("\tusing LDT slot 0\n");
 272                 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
 273                 return 0x7;
 274         } else {
 275                 /* No modify_ldt for us (configured out, perhaps) */
 276 
 277                 struct user_desc *low_desc = mmap(
 278                         NULL, sizeof(desc),
 279                         PROT_READ | PROT_WRITE,
 280                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
 281                 memcpy(low_desc, &desc, sizeof(desc));
 282 
 283                 low_desc->entry_number = set_thread_area_entry_number;
 284 
 285                 /* 32-bit set_thread_area */
 286                 long ret;
 287                 asm volatile ("int $0x80"
 288                               : "=a" (ret) : "a" (243), "b" (low_desc)
 289                               : "r8", "r9", "r10", "r11");
 290                 memcpy(&desc, low_desc, sizeof(desc));
 291                 munmap(low_desc, sizeof(desc));
 292 
 293                 if (ret != 0) {
 294                         printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
 295                         return 0;
 296                 }
 297                 printf("\tusing GDT slot %d\n", desc.entry_number);
 298                 set_thread_area_entry_number = desc.entry_number;
 299 
 300                 unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
 301                 asm volatile ("mov %0, %%gs" : : "rm" (gs));
 302                 return gs;
 303         }
 304 }
 305 
 306 void test_wrbase(unsigned short index, unsigned long base)
 307 {
 308         unsigned short newindex;
 309         unsigned long newbase;
 310 
 311         printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
 312 
 313         asm volatile ("mov %0, %%gs" : : "rm" (index));
 314         wrgsbase(base);
 315 
 316         remote_base = 0;
 317         ftx = 1;
 318         syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 319         while (ftx != 0)
 320                 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
 321 
 322         asm volatile ("mov %%gs, %0" : "=rm" (newindex));
 323         newbase = rdgsbase();
 324 
 325         if (newindex == index && newbase == base) {
 326                 printf("[OK]\tIndex and base were preserved\n");
 327         } else {
 328                 printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
 329                        newindex, newbase);
 330                 nerrs++;
 331         }
 332 }
 333 
 334 static void *threadproc(void *ctx)
 335 {
 336         while (1) {
 337                 while (ftx == 0)
 338                         syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
 339                 if (ftx == 3)
 340                         return NULL;
 341 
 342                 if (ftx == 1) {
 343                         do_remote_base();
 344                 } else if (ftx == 2) {
 345                         /*
 346                          * On AMD chips, this causes GSBASE != 0, GS == 0, and
 347                          * thread.gsbase == 0.
 348                          */
 349 
 350                         load_gs();
 351                         asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
 352                 } else {
 353                         errx(1, "helper thread got bad command");
 354                 }
 355 
 356                 ftx = 0;
 357                 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 358         }
 359 }
 360 
 361 static void set_gs_and_switch_to(unsigned long local,
 362                                  unsigned short force_sel,
 363                                  unsigned long remote)
 364 {
 365         unsigned long base;
 366         unsigned short sel_pre_sched, sel_post_sched;
 367 
 368         bool hard_zero = false;
 369         if (local == HARD_ZERO) {
 370                 hard_zero = true;
 371                 local = 0;
 372         }
 373 
 374         printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
 375                local, hard_zero ? " and clear gs" : "", remote);
 376         if (force_sel)
 377                 printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
 378         if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
 379                 err(1, "ARCH_SET_GS");
 380         if (hard_zero)
 381                 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
 382 
 383         if (read_base(GS) != local) {
 384                 nerrs++;
 385                 printf("[FAIL]\tGSBASE wasn't set as expected\n");
 386         }
 387 
 388         if (force_sel) {
 389                 asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
 390                 sel_pre_sched = force_sel;
 391                 local = read_base(GS);
 392 
 393                 /*
 394                  * Signal delivery seems to mess up weird selectors.  Put it
 395                  * back.
 396                  */
 397                 asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
 398         } else {
 399                 asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
 400         }
 401 
 402         remote_base = remote;
 403         ftx = 1;
 404         syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 405         while (ftx != 0)
 406                 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
 407 
 408         asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
 409         base = read_base(GS);
 410         if (base == local && sel_pre_sched == sel_post_sched) {
 411                 printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
 412                        sel_pre_sched, local);
 413         } else {
 414                 nerrs++;
 415                 printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
 416                        sel_pre_sched, local, sel_post_sched, base);
 417         }
 418 }
 419 
 420 static void test_unexpected_base(void)
 421 {
 422         unsigned long base;
 423 
 424         printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
 425         if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
 426                 err(1, "ARCH_SET_GS");
 427         asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
 428 
 429         ftx = 2;
 430         syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 431         while (ftx != 0)
 432                 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
 433 
 434         base = read_base(GS);
 435         if (base == 0) {
 436                 printf("[OK]\tGSBASE remained 0\n");
 437         } else {
 438                 nerrs++;
 439                 printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
 440         }
 441 }
 442 
 443 #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
 444 
 445 static void test_ptrace_write_gsbase(void)
 446 {
 447         int status;
 448         pid_t child = fork();
 449 
 450         if (child < 0)
 451                 err(1, "fork");
 452 
 453         if (child == 0) {
 454                 printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
 455 
 456                 *shared_scratch = load_gs();
 457 
 458                 if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
 459                         err(1, "PTRACE_TRACEME");
 460 
 461                 raise(SIGTRAP);
 462                 _exit(0);
 463         }
 464 
 465         wait(&status);
 466 
 467         if (WSTOPSIG(status) == SIGTRAP) {
 468                 unsigned long gs, base;
 469                 unsigned long gs_offset = USER_REGS_OFFSET(gs);
 470                 unsigned long base_offset = USER_REGS_OFFSET(gs_base);
 471 
 472                 gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
 473 
 474                 if (gs != *shared_scratch) {
 475                         nerrs++;
 476                         printf("[FAIL]\tGS is not prepared with nonzero\n");
 477                         goto END;
 478                 }
 479 
 480                 if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
 481                         err(1, "PTRACE_POKEUSER");
 482 
 483                 gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
 484                 base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
 485 
 486                 /*
 487                  * In a non-FSGSBASE system, the nonzero selector will load
 488                  * GSBASE (again). But what is tested here is whether the
 489                  * selector value is changed or not by the GSBASE write in
 490                  * a ptracer.
 491                  */
 492                 if (gs == 0 && base == 0xFF) {
 493                         printf("[OK]\tGS was reset as expected\n");
 494                 } else {
 495                         nerrs++;
 496                         printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
 497                 }
 498         }
 499 
 500 END:
 501         ptrace(PTRACE_CONT, child, NULL, NULL);
 502 }
 503 
 504 int main()
 505 {
 506         pthread_t thread;
 507 
 508         shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
 509                               MAP_ANONYMOUS | MAP_SHARED, -1, 0);
 510 
 511         /* Probe FSGSBASE */
 512         sethandler(SIGILL, sigill, 0);
 513         if (sigsetjmp(jmpbuf, 1) == 0) {
 514                 rdfsbase();
 515                 have_fsgsbase = true;
 516                 printf("\tFSGSBASE instructions are enabled\n");
 517         } else {
 518                 printf("\tFSGSBASE instructions are disabled\n");
 519         }
 520         clearhandler(SIGILL);
 521 
 522         sethandler(SIGSEGV, sigsegv, 0);
 523 
 524         check_gs_value(0);
 525         check_gs_value(1);
 526         check_gs_value(0x200000000);
 527         check_gs_value(0);
 528         check_gs_value(0x200000000);
 529         check_gs_value(1);
 530 
 531         for (int sched = 0; sched < 2; sched++) {
 532                 mov_0_gs(0, !!sched);
 533                 mov_0_gs(1, !!sched);
 534                 mov_0_gs(0x200000000, !!sched);
 535         }
 536 
 537         /* Set up for multithreading. */
 538 
 539         cpu_set_t cpuset;
 540         CPU_ZERO(&cpuset);
 541         CPU_SET(0, &cpuset);
 542         if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
 543                 err(1, "sched_setaffinity to CPU 0");   /* should never fail */
 544 
 545         if (pthread_create(&thread, 0, threadproc, 0) != 0)
 546                 err(1, "pthread_create");
 547 
 548         static unsigned long bases_with_hard_zero[] = {
 549                 0, HARD_ZERO, 1, 0x200000000,
 550         };
 551 
 552         for (int local = 0; local < 4; local++) {
 553                 for (int remote = 0; remote < 4; remote++) {
 554                         for (unsigned short s = 0; s < 5; s++) {
 555                                 unsigned short sel = s;
 556                                 if (s == 4)
 557                                         asm ("mov %%ss, %0" : "=rm" (sel));
 558                                 set_gs_and_switch_to(
 559                                         bases_with_hard_zero[local],
 560                                         sel,
 561                                         bases_with_hard_zero[remote]);
 562                         }
 563                 }
 564         }
 565 
 566         test_unexpected_base();
 567 
 568         if (have_fsgsbase) {
 569                 unsigned short ss;
 570 
 571                 asm volatile ("mov %%ss, %0" : "=rm" (ss));
 572 
 573                 test_wrbase(0, 0);
 574                 test_wrbase(0, 1);
 575                 test_wrbase(0, 0x200000000);
 576                 test_wrbase(0, 0xffffffffffffffff);
 577                 test_wrbase(ss, 0);
 578                 test_wrbase(ss, 1);
 579                 test_wrbase(ss, 0x200000000);
 580                 test_wrbase(ss, 0xffffffffffffffff);
 581         }
 582 
 583         ftx = 3;  /* Kill the thread. */
 584         syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 585 
 586         if (pthread_join(thread, NULL) != 0)
 587                 err(1, "pthread_join");
 588 
 589         test_ptrace_write_gsbase();
 590 
 591         return nerrs == 0 ? 0 : 1;
 592 }

/* [<][>][^][v][top][bottom][index][help] */