root/tools/testing/selftests/seccomp/seccomp_bpf.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. seccomp
  2. TEST
  3. TEST_SIGNAL
  4. TEST
  5. TEST
  6. TEST
  7. TEST
  8. TEST
  9. TEST
  10. TEST
  11. TEST
  12. TEST
  13. TEST
  14. TEST_SIGNAL
  15. TEST_SIGNAL
  16. TEST_SIGNAL
  17. TEST_SIGNAL
  18. TEST_SIGNAL
  19. TEST_SIGNAL
  20. kill_thread
  21. kill_thread_or_group
  22. TEST
  23. TEST
  24. TEST
  25. TEST
  26. TEST
  27. TEST
  28. TEST
  29. FIXTURE_DATA
  30. FIXTURE_SETUP
  31. FIXTURE_TEARDOWN
  32. TEST_F_SIGNAL
  33. TEST_F_SIGNAL
  34. TRAP_action
  35. TEST_F
  36. FIXTURE_DATA
  37. FIXTURE_SETUP
  38. FIXTURE_TEARDOWN
  39. TEST_F
  40. TEST_F_SIGNAL
  41. TEST_F_SIGNAL
  42. TEST_F_SIGNAL
  43. TEST_F_SIGNAL
  44. TEST_F
  45. TEST_F
  46. TEST_F
  47. TEST_F
  48. TEST_F
  49. TEST_F
  50. tracer_stop
  51. start_tracer
  52. cont_handler
  53. setup_trace_fixture
  54. teardown_trace_fixture
  55. tracer_poke
  56. FIXTURE_DATA
  57. FIXTURE_SETUP
  58. FIXTURE_TEARDOWN
  59. TEST_F
  60. TEST_F
  61. get_syscall
  62. change_syscall
  63. tracer_syscall
  64. tracer_ptrace
  65. FIXTURE_DATA
  66. FIXTURE_SETUP
  67. FIXTURE_TEARDOWN
  68. TEST_F
  69. TEST_F
  70. TEST_F
  71. TEST_F
  72. TEST_F
  73. TEST_F
  74. TEST_F
  75. TEST_F
  76. TEST_F_SIGNAL
  77. TEST_F
  78. TEST_F_SIGNAL
  79. TEST
  80. TEST
  81. TEST
  82. TEST
  83. FIXTURE_DATA
  84. FIXTURE_SETUP
  85. FIXTURE_TEARDOWN
  86. tsync_sibling
  87. tsync_start_sibling
  88. TEST_F
  89. TEST_F
  90. TEST_F
  91. TEST_F
  92. TEST_F
  93. TEST_F
  94. TEST
  95. TEST_SIGNAL
  96. TEST
  97. TEST
  98. user_trap_syscall
  99. TEST
  100. TEST
  101. signal_handler
  102. TEST
  103. TEST
  104. TEST
  105. TEST
  106. TEST
  107. TEST

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   4  *
   5  * Test code for seccomp bpf.
   6  */
   7 
   8 #define _GNU_SOURCE
   9 #include <sys/types.h>
  10 
  11 /*
  12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
  13  * we need to use the kernel's siginfo.h file and trick glibc
  14  * into accepting it.
  15  */
  16 #if !__GLIBC_PREREQ(2, 26)
  17 # include <asm/siginfo.h>
  18 # define __have_siginfo_t 1
  19 # define __have_sigval_t 1
  20 # define __have_sigevent_t 1
  21 #endif
  22 
  23 #include <errno.h>
  24 #include <linux/filter.h>
  25 #include <sys/prctl.h>
  26 #include <sys/ptrace.h>
  27 #include <sys/user.h>
  28 #include <linux/prctl.h>
  29 #include <linux/ptrace.h>
  30 #include <linux/seccomp.h>
  31 #include <pthread.h>
  32 #include <semaphore.h>
  33 #include <signal.h>
  34 #include <stddef.h>
  35 #include <stdbool.h>
  36 #include <string.h>
  37 #include <time.h>
  38 #include <limits.h>
  39 #include <linux/elf.h>
  40 #include <sys/uio.h>
  41 #include <sys/utsname.h>
  42 #include <sys/fcntl.h>
  43 #include <sys/mman.h>
  44 #include <sys/times.h>
  45 #include <sys/socket.h>
  46 #include <sys/ioctl.h>
  47 
  48 #include <unistd.h>
  49 #include <sys/syscall.h>
  50 #include <poll.h>
  51 
  52 #include "../kselftest_harness.h"
  53 
  54 #ifndef PR_SET_PTRACER
  55 # define PR_SET_PTRACER 0x59616d61
  56 #endif
  57 
  58 #ifndef PR_SET_NO_NEW_PRIVS
  59 #define PR_SET_NO_NEW_PRIVS 38
  60 #define PR_GET_NO_NEW_PRIVS 39
  61 #endif
  62 
  63 #ifndef PR_SECCOMP_EXT
  64 #define PR_SECCOMP_EXT 43
  65 #endif
  66 
  67 #ifndef SECCOMP_EXT_ACT
  68 #define SECCOMP_EXT_ACT 1
  69 #endif
  70 
  71 #ifndef SECCOMP_EXT_ACT_TSYNC
  72 #define SECCOMP_EXT_ACT_TSYNC 1
  73 #endif
  74 
  75 #ifndef SECCOMP_MODE_STRICT
  76 #define SECCOMP_MODE_STRICT 1
  77 #endif
  78 
  79 #ifndef SECCOMP_MODE_FILTER
  80 #define SECCOMP_MODE_FILTER 2
  81 #endif
  82 
  83 #ifndef SECCOMP_RET_ALLOW
  84 struct seccomp_data {
  85         int nr;
  86         __u32 arch;
  87         __u64 instruction_pointer;
  88         __u64 args[6];
  89 };
  90 #endif
  91 
  92 #ifndef SECCOMP_RET_KILL_PROCESS
  93 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
  94 #define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
  95 #endif
  96 #ifndef SECCOMP_RET_KILL
  97 #define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
  98 #define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
  99 #define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
 100 #define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
 101 #define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
 102 #endif
 103 #ifndef SECCOMP_RET_LOG
 104 #define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
 105 #endif
 106 
 107 #ifndef __NR_seccomp
 108 # if defined(__i386__)
 109 #  define __NR_seccomp 354
 110 # elif defined(__x86_64__)
 111 #  define __NR_seccomp 317
 112 # elif defined(__arm__)
 113 #  define __NR_seccomp 383
 114 # elif defined(__aarch64__)
 115 #  define __NR_seccomp 277
 116 # elif defined(__hppa__)
 117 #  define __NR_seccomp 338
 118 # elif defined(__powerpc__)
 119 #  define __NR_seccomp 358
 120 # elif defined(__s390__)
 121 #  define __NR_seccomp 348
 122 # else
 123 #  warning "seccomp syscall number unknown for this architecture"
 124 #  define __NR_seccomp 0xffff
 125 # endif
 126 #endif
 127 
 128 #ifndef SECCOMP_SET_MODE_STRICT
 129 #define SECCOMP_SET_MODE_STRICT 0
 130 #endif
 131 
 132 #ifndef SECCOMP_SET_MODE_FILTER
 133 #define SECCOMP_SET_MODE_FILTER 1
 134 #endif
 135 
 136 #ifndef SECCOMP_GET_ACTION_AVAIL
 137 #define SECCOMP_GET_ACTION_AVAIL 2
 138 #endif
 139 
 140 #ifndef SECCOMP_GET_NOTIF_SIZES
 141 #define SECCOMP_GET_NOTIF_SIZES 3
 142 #endif
 143 
 144 #ifndef SECCOMP_FILTER_FLAG_TSYNC
 145 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
 146 #endif
 147 
 148 #ifndef SECCOMP_FILTER_FLAG_LOG
 149 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
 150 #endif
 151 
 152 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
 153 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
 154 #endif
 155 
 156 #ifndef PTRACE_SECCOMP_GET_METADATA
 157 #define PTRACE_SECCOMP_GET_METADATA     0x420d
 158 
 159 struct seccomp_metadata {
 160         __u64 filter_off;       /* Input: which filter */
 161         __u64 flags;             /* Output: filter's flags */
 162 };
 163 #endif
 164 
 165 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
 166 #define SECCOMP_FILTER_FLAG_NEW_LISTENER        (1UL << 3)
 167 
 168 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
 169 
 170 #define SECCOMP_IOC_MAGIC               '!'
 171 #define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
 172 #define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
 173 #define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
 174 #define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
 175 
 176 /* Flags for seccomp notification fd ioctl. */
 177 #define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
 178 #define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
 179                                                 struct seccomp_notif_resp)
 180 #define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOR(2, __u64)
 181 
 182 struct seccomp_notif {
 183         __u64 id;
 184         __u32 pid;
 185         __u32 flags;
 186         struct seccomp_data data;
 187 };
 188 
 189 struct seccomp_notif_resp {
 190         __u64 id;
 191         __s64 val;
 192         __s32 error;
 193         __u32 flags;
 194 };
 195 
 196 struct seccomp_notif_sizes {
 197         __u16 seccomp_notif;
 198         __u16 seccomp_notif_resp;
 199         __u16 seccomp_data;
 200 };
 201 #endif
 202 
 203 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
 204 #define PTRACE_EVENTMSG_SYSCALL_ENTRY   1
 205 #define PTRACE_EVENTMSG_SYSCALL_EXIT    2
 206 #endif
 207 
 208 #ifndef seccomp
 209 int seccomp(unsigned int op, unsigned int flags, void *args)
 210 {
 211         errno = 0;
 212         return syscall(__NR_seccomp, op, flags, args);
 213 }
 214 #endif
 215 
 216 #if __BYTE_ORDER == __LITTLE_ENDIAN
 217 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
 218 #elif __BYTE_ORDER == __BIG_ENDIAN
 219 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
 220 #else
 221 #error "wut? Unknown __BYTE_ORDER?!"
 222 #endif
 223 
 224 #define SIBLING_EXIT_UNKILLED   0xbadbeef
 225 #define SIBLING_EXIT_FAILURE    0xbadface
 226 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
 227 
 228 TEST(mode_strict_support)
 229 {
 230         long ret;
 231 
 232         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 233         ASSERT_EQ(0, ret) {
 234                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
 235         }
 236         syscall(__NR_exit, 0);
 237 }
 238 
 239 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
 240 {
 241         long ret;
 242 
 243         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 244         ASSERT_EQ(0, ret) {
 245                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
 246         }
 247         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
 248                 NULL, NULL, NULL);
 249         EXPECT_FALSE(true) {
 250                 TH_LOG("Unreachable!");
 251         }
 252 }
 253 
 254 /* Note! This doesn't test no new privs behavior */
 255 TEST(no_new_privs_support)
 256 {
 257         long ret;
 258 
 259         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 260         EXPECT_EQ(0, ret) {
 261                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 262         }
 263 }
 264 
 265 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 266 TEST(mode_filter_support)
 267 {
 268         long ret;
 269 
 270         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
 271         ASSERT_EQ(0, ret) {
 272                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 273         }
 274         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
 275         EXPECT_EQ(-1, ret);
 276         EXPECT_EQ(EFAULT, errno) {
 277                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
 278         }
 279 }
 280 
 281 TEST(mode_filter_without_nnp)
 282 {
 283         struct sock_filter filter[] = {
 284                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 285         };
 286         struct sock_fprog prog = {
 287                 .len = (unsigned short)ARRAY_SIZE(filter),
 288                 .filter = filter,
 289         };
 290         long ret;
 291 
 292         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
 293         ASSERT_LE(0, ret) {
 294                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
 295         }
 296         errno = 0;
 297         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 298         /* Succeeds with CAP_SYS_ADMIN, fails without */
 299         /* TODO(wad) check caps not euid */
 300         if (geteuid()) {
 301                 EXPECT_EQ(-1, ret);
 302                 EXPECT_EQ(EACCES, errno);
 303         } else {
 304                 EXPECT_EQ(0, ret);
 305         }
 306 }
 307 
 308 #define MAX_INSNS_PER_PATH 32768
 309 
 310 TEST(filter_size_limits)
 311 {
 312         int i;
 313         int count = BPF_MAXINSNS + 1;
 314         struct sock_filter allow[] = {
 315                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 316         };
 317         struct sock_filter *filter;
 318         struct sock_fprog prog = { };
 319         long ret;
 320 
 321         filter = calloc(count, sizeof(*filter));
 322         ASSERT_NE(NULL, filter);
 323 
 324         for (i = 0; i < count; i++)
 325                 filter[i] = allow[0];
 326 
 327         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 328         ASSERT_EQ(0, ret);
 329 
 330         prog.filter = filter;
 331         prog.len = count;
 332 
 333         /* Too many filter instructions in a single filter. */
 334         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 335         ASSERT_NE(0, ret) {
 336                 TH_LOG("Installing %d insn filter was allowed", prog.len);
 337         }
 338 
 339         /* One less is okay, though. */
 340         prog.len -= 1;
 341         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 342         ASSERT_EQ(0, ret) {
 343                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
 344         }
 345 }
 346 
 347 TEST(filter_chain_limits)
 348 {
 349         int i;
 350         int count = BPF_MAXINSNS;
 351         struct sock_filter allow[] = {
 352                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 353         };
 354         struct sock_filter *filter;
 355         struct sock_fprog prog = { };
 356         long ret;
 357 
 358         filter = calloc(count, sizeof(*filter));
 359         ASSERT_NE(NULL, filter);
 360 
 361         for (i = 0; i < count; i++)
 362                 filter[i] = allow[0];
 363 
 364         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 365         ASSERT_EQ(0, ret);
 366 
 367         prog.filter = filter;
 368         prog.len = 1;
 369 
 370         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 371         ASSERT_EQ(0, ret);
 372 
 373         prog.len = count;
 374 
 375         /* Too many total filter instructions. */
 376         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
 377                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 378                 if (ret != 0)
 379                         break;
 380         }
 381         ASSERT_NE(0, ret) {
 382                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
 383                        i, count, i * (count + 4));
 384         }
 385 }
 386 
 387 TEST(mode_filter_cannot_move_to_strict)
 388 {
 389         struct sock_filter filter[] = {
 390                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 391         };
 392         struct sock_fprog prog = {
 393                 .len = (unsigned short)ARRAY_SIZE(filter),
 394                 .filter = filter,
 395         };
 396         long ret;
 397 
 398         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 399         ASSERT_EQ(0, ret);
 400 
 401         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 402         ASSERT_EQ(0, ret);
 403 
 404         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
 405         EXPECT_EQ(-1, ret);
 406         EXPECT_EQ(EINVAL, errno);
 407 }
 408 
 409 
 410 TEST(mode_filter_get_seccomp)
 411 {
 412         struct sock_filter filter[] = {
 413                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 414         };
 415         struct sock_fprog prog = {
 416                 .len = (unsigned short)ARRAY_SIZE(filter),
 417                 .filter = filter,
 418         };
 419         long ret;
 420 
 421         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 422         ASSERT_EQ(0, ret);
 423 
 424         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 425         EXPECT_EQ(0, ret);
 426 
 427         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 428         ASSERT_EQ(0, ret);
 429 
 430         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 431         EXPECT_EQ(2, ret);
 432 }
 433 
 434 
 435 TEST(ALLOW_all)
 436 {
 437         struct sock_filter filter[] = {
 438                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 439         };
 440         struct sock_fprog prog = {
 441                 .len = (unsigned short)ARRAY_SIZE(filter),
 442                 .filter = filter,
 443         };
 444         long ret;
 445 
 446         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 447         ASSERT_EQ(0, ret);
 448 
 449         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 450         ASSERT_EQ(0, ret);
 451 }
 452 
 453 TEST(empty_prog)
 454 {
 455         struct sock_filter filter[] = {
 456         };
 457         struct sock_fprog prog = {
 458                 .len = (unsigned short)ARRAY_SIZE(filter),
 459                 .filter = filter,
 460         };
 461         long ret;
 462 
 463         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 464         ASSERT_EQ(0, ret);
 465 
 466         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 467         EXPECT_EQ(-1, ret);
 468         EXPECT_EQ(EINVAL, errno);
 469 }
 470 
 471 TEST(log_all)
 472 {
 473         struct sock_filter filter[] = {
 474                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
 475         };
 476         struct sock_fprog prog = {
 477                 .len = (unsigned short)ARRAY_SIZE(filter),
 478                 .filter = filter,
 479         };
 480         long ret;
 481         pid_t parent = getppid();
 482 
 483         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 484         ASSERT_EQ(0, ret);
 485 
 486         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 487         ASSERT_EQ(0, ret);
 488 
 489         /* getppid() should succeed and be logged (no check for logging) */
 490         EXPECT_EQ(parent, syscall(__NR_getppid));
 491 }
 492 
 493 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 494 {
 495         struct sock_filter filter[] = {
 496                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
 497         };
 498         struct sock_fprog prog = {
 499                 .len = (unsigned short)ARRAY_SIZE(filter),
 500                 .filter = filter,
 501         };
 502         long ret;
 503 
 504         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 505         ASSERT_EQ(0, ret);
 506 
 507         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 508         ASSERT_EQ(0, ret);
 509         EXPECT_EQ(0, syscall(__NR_getpid)) {
 510                 TH_LOG("getpid() shouldn't ever return");
 511         }
 512 }
 513 
 514 /* return code >= 0x80000000 is unused. */
 515 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
 516 {
 517         struct sock_filter filter[] = {
 518                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
 519         };
 520         struct sock_fprog prog = {
 521                 .len = (unsigned short)ARRAY_SIZE(filter),
 522                 .filter = filter,
 523         };
 524         long ret;
 525 
 526         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 527         ASSERT_EQ(0, ret);
 528 
 529         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 530         ASSERT_EQ(0, ret);
 531         EXPECT_EQ(0, syscall(__NR_getpid)) {
 532                 TH_LOG("getpid() shouldn't ever return");
 533         }
 534 }
 535 
 536 TEST_SIGNAL(KILL_all, SIGSYS)
 537 {
 538         struct sock_filter filter[] = {
 539                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 540         };
 541         struct sock_fprog prog = {
 542                 .len = (unsigned short)ARRAY_SIZE(filter),
 543                 .filter = filter,
 544         };
 545         long ret;
 546 
 547         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 548         ASSERT_EQ(0, ret);
 549 
 550         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 551         ASSERT_EQ(0, ret);
 552 }
 553 
 554 TEST_SIGNAL(KILL_one, SIGSYS)
 555 {
 556         struct sock_filter filter[] = {
 557                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 558                         offsetof(struct seccomp_data, nr)),
 559                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 560                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 561                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 562         };
 563         struct sock_fprog prog = {
 564                 .len = (unsigned short)ARRAY_SIZE(filter),
 565                 .filter = filter,
 566         };
 567         long ret;
 568         pid_t parent = getppid();
 569 
 570         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 571         ASSERT_EQ(0, ret);
 572 
 573         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 574         ASSERT_EQ(0, ret);
 575 
 576         EXPECT_EQ(parent, syscall(__NR_getppid));
 577         /* getpid() should never return. */
 578         EXPECT_EQ(0, syscall(__NR_getpid));
 579 }
 580 
 581 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
 582 {
 583         void *fatal_address;
 584         struct sock_filter filter[] = {
 585                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 586                         offsetof(struct seccomp_data, nr)),
 587                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
 588                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 589                 /* Only both with lower 32-bit for now. */
 590                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
 591                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
 592                         (unsigned long)&fatal_address, 0, 1),
 593                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 594                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 595         };
 596         struct sock_fprog prog = {
 597                 .len = (unsigned short)ARRAY_SIZE(filter),
 598                 .filter = filter,
 599         };
 600         long ret;
 601         pid_t parent = getppid();
 602         struct tms timebuf;
 603         clock_t clock = times(&timebuf);
 604 
 605         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 606         ASSERT_EQ(0, ret);
 607 
 608         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 609         ASSERT_EQ(0, ret);
 610 
 611         EXPECT_EQ(parent, syscall(__NR_getppid));
 612         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
 613         /* times() should never return. */
 614         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
 615 }
 616 
 617 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 618 {
 619 #ifndef __NR_mmap2
 620         int sysno = __NR_mmap;
 621 #else
 622         int sysno = __NR_mmap2;
 623 #endif
 624         struct sock_filter filter[] = {
 625                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 626                         offsetof(struct seccomp_data, nr)),
 627                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
 628                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 629                 /* Only both with lower 32-bit for now. */
 630                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
 631                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
 632                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 633                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 634         };
 635         struct sock_fprog prog = {
 636                 .len = (unsigned short)ARRAY_SIZE(filter),
 637                 .filter = filter,
 638         };
 639         long ret;
 640         pid_t parent = getppid();
 641         int fd;
 642         void *map1, *map2;
 643         int page_size = sysconf(_SC_PAGESIZE);
 644 
 645         ASSERT_LT(0, page_size);
 646 
 647         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 648         ASSERT_EQ(0, ret);
 649 
 650         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 651         ASSERT_EQ(0, ret);
 652 
 653         fd = open("/dev/zero", O_RDONLY);
 654         ASSERT_NE(-1, fd);
 655 
 656         EXPECT_EQ(parent, syscall(__NR_getppid));
 657         map1 = (void *)syscall(sysno,
 658                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 659         EXPECT_NE(MAP_FAILED, map1);
 660         /* mmap2() should never return. */
 661         map2 = (void *)syscall(sysno,
 662                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 663         EXPECT_EQ(MAP_FAILED, map2);
 664 
 665         /* The test failed, so clean up the resources. */
 666         munmap(map1, page_size);
 667         munmap(map2, page_size);
 668         close(fd);
 669 }
 670 
 671 /* This is a thread task to die via seccomp filter violation. */
 672 void *kill_thread(void *data)
 673 {
 674         bool die = (bool)data;
 675 
 676         if (die) {
 677                 prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 678                 return (void *)SIBLING_EXIT_FAILURE;
 679         }
 680 
 681         return (void *)SIBLING_EXIT_UNKILLED;
 682 }
 683 
 684 /* Prepare a thread that will kill itself or both of us. */
 685 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 686 {
 687         pthread_t thread;
 688         void *status;
 689         /* Kill only when calling __NR_prctl. */
 690         struct sock_filter filter_thread[] = {
 691                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 692                         offsetof(struct seccomp_data, nr)),
 693                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 694                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
 695                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 696         };
 697         struct sock_fprog prog_thread = {
 698                 .len = (unsigned short)ARRAY_SIZE(filter_thread),
 699                 .filter = filter_thread,
 700         };
 701         struct sock_filter filter_process[] = {
 702                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 703                         offsetof(struct seccomp_data, nr)),
 704                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 705                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
 706                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 707         };
 708         struct sock_fprog prog_process = {
 709                 .len = (unsigned short)ARRAY_SIZE(filter_process),
 710                 .filter = filter_process,
 711         };
 712 
 713         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
 714                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 715         }
 716 
 717         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
 718                              kill_process ? &prog_process : &prog_thread));
 719 
 720         /*
 721          * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
 722          * flag cannot be downgraded by a new filter.
 723          */
 724         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
 725 
 726         /* Start a thread that will exit immediately. */
 727         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
 728         ASSERT_EQ(0, pthread_join(thread, &status));
 729         ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
 730 
 731         /* Start a thread that will die immediately. */
 732         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
 733         ASSERT_EQ(0, pthread_join(thread, &status));
 734         ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
 735 
 736         /*
 737          * If we get here, only the spawned thread died. Let the parent know
 738          * the whole process didn't die (i.e. this thread, the spawner,
 739          * stayed running).
 740          */
 741         exit(42);
 742 }
 743 
 744 TEST(KILL_thread)
 745 {
 746         int status;
 747         pid_t child_pid;
 748 
 749         child_pid = fork();
 750         ASSERT_LE(0, child_pid);
 751         if (child_pid == 0) {
 752                 kill_thread_or_group(_metadata, false);
 753                 _exit(38);
 754         }
 755 
 756         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 757 
 758         /* If only the thread was killed, we'll see exit 42. */
 759         ASSERT_TRUE(WIFEXITED(status));
 760         ASSERT_EQ(42, WEXITSTATUS(status));
 761 }
 762 
 763 TEST(KILL_process)
 764 {
 765         int status;
 766         pid_t child_pid;
 767 
 768         child_pid = fork();
 769         ASSERT_LE(0, child_pid);
 770         if (child_pid == 0) {
 771                 kill_thread_or_group(_metadata, true);
 772                 _exit(38);
 773         }
 774 
 775         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 776 
 777         /* If the entire process was killed, we'll see SIGSYS. */
 778         ASSERT_TRUE(WIFSIGNALED(status));
 779         ASSERT_EQ(SIGSYS, WTERMSIG(status));
 780 }
 781 
 782 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 783 TEST(arg_out_of_range)
 784 {
 785         struct sock_filter filter[] = {
 786                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
 787                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 788         };
 789         struct sock_fprog prog = {
 790                 .len = (unsigned short)ARRAY_SIZE(filter),
 791                 .filter = filter,
 792         };
 793         long ret;
 794 
 795         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 796         ASSERT_EQ(0, ret);
 797 
 798         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 799         EXPECT_EQ(-1, ret);
 800         EXPECT_EQ(EINVAL, errno);
 801 }
 802 
 803 #define ERRNO_FILTER(name, errno)                                       \
 804         struct sock_filter _read_filter_##name[] = {                    \
 805                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
 806                         offsetof(struct seccomp_data, nr)),             \
 807                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
 808                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
 809                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
 810         };                                                              \
 811         struct sock_fprog prog_##name = {                               \
 812                 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
 813                 .filter = _read_filter_##name,                          \
 814         }
 815 
 816 /* Make sure basic errno values are correctly passed through a filter. */
 817 TEST(ERRNO_valid)
 818 {
 819         ERRNO_FILTER(valid, E2BIG);
 820         long ret;
 821         pid_t parent = getppid();
 822 
 823         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 824         ASSERT_EQ(0, ret);
 825 
 826         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 827         ASSERT_EQ(0, ret);
 828 
 829         EXPECT_EQ(parent, syscall(__NR_getppid));
 830         EXPECT_EQ(-1, read(0, NULL, 0));
 831         EXPECT_EQ(E2BIG, errno);
 832 }
 833 
 834 /* Make sure an errno of zero is correctly handled by the arch code. */
 835 TEST(ERRNO_zero)
 836 {
 837         ERRNO_FILTER(zero, 0);
 838         long ret;
 839         pid_t parent = getppid();
 840 
 841         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 842         ASSERT_EQ(0, ret);
 843 
 844         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 845         ASSERT_EQ(0, ret);
 846 
 847         EXPECT_EQ(parent, syscall(__NR_getppid));
 848         /* "errno" of 0 is ok. */
 849         EXPECT_EQ(0, read(0, NULL, 0));
 850 }
 851 
 852 /*
 853  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
 854  * This tests that the errno value gets capped correctly, fixed by
 855  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
 856  */
 857 TEST(ERRNO_capped)
 858 {
 859         ERRNO_FILTER(capped, 4096);
 860         long ret;
 861         pid_t parent = getppid();
 862 
 863         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 864         ASSERT_EQ(0, ret);
 865 
 866         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 867         ASSERT_EQ(0, ret);
 868 
 869         EXPECT_EQ(parent, syscall(__NR_getppid));
 870         EXPECT_EQ(-1, read(0, NULL, 0));
 871         EXPECT_EQ(4095, errno);
 872 }
 873 
 874 /*
 875  * Filters are processed in reverse order: last applied is executed first.
 876  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
 877  * SECCOMP_RET_DATA mask results will follow the most recently applied
 878  * matching filter return (and not the lowest or highest value).
 879  */
 880 TEST(ERRNO_order)
 881 {
 882         ERRNO_FILTER(first,  11);
 883         ERRNO_FILTER(second, 13);
 884         ERRNO_FILTER(third,  12);
 885         long ret;
 886         pid_t parent = getppid();
 887 
 888         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 889         ASSERT_EQ(0, ret);
 890 
 891         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
 892         ASSERT_EQ(0, ret);
 893 
 894         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
 895         ASSERT_EQ(0, ret);
 896 
 897         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
 898         ASSERT_EQ(0, ret);
 899 
 900         EXPECT_EQ(parent, syscall(__NR_getppid));
 901         EXPECT_EQ(-1, read(0, NULL, 0));
 902         EXPECT_EQ(12, errno);
 903 }
 904 
 905 FIXTURE_DATA(TRAP) {
 906         struct sock_fprog prog;
 907 };
 908 
 909 FIXTURE_SETUP(TRAP)
 910 {
 911         struct sock_filter filter[] = {
 912                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 913                         offsetof(struct seccomp_data, nr)),
 914                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 915                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 916                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 917         };
 918 
 919         memset(&self->prog, 0, sizeof(self->prog));
 920         self->prog.filter = malloc(sizeof(filter));
 921         ASSERT_NE(NULL, self->prog.filter);
 922         memcpy(self->prog.filter, filter, sizeof(filter));
 923         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
 924 }
 925 
 926 FIXTURE_TEARDOWN(TRAP)
 927 {
 928         if (self->prog.filter)
 929                 free(self->prog.filter);
 930 }
 931 
 932 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
 933 {
 934         long ret;
 935 
 936         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 937         ASSERT_EQ(0, ret);
 938 
 939         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 940         ASSERT_EQ(0, ret);
 941         syscall(__NR_getpid);
 942 }
 943 
 944 /* Ensure that SIGSYS overrides SIG_IGN */
 945 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 946 {
 947         long ret;
 948 
 949         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 950         ASSERT_EQ(0, ret);
 951 
 952         signal(SIGSYS, SIG_IGN);
 953 
 954         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 955         ASSERT_EQ(0, ret);
 956         syscall(__NR_getpid);
 957 }
 958 
 959 static siginfo_t TRAP_info;
 960 static volatile int TRAP_nr;
 961 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 962 {
 963         memcpy(&TRAP_info, info, sizeof(TRAP_info));
 964         TRAP_nr = nr;
 965 }
 966 
 967 TEST_F(TRAP, handler)
 968 {
 969         int ret, test;
 970         struct sigaction act;
 971         sigset_t mask;
 972 
 973         memset(&act, 0, sizeof(act));
 974         sigemptyset(&mask);
 975         sigaddset(&mask, SIGSYS);
 976 
 977         act.sa_sigaction = &TRAP_action;
 978         act.sa_flags = SA_SIGINFO;
 979         ret = sigaction(SIGSYS, &act, NULL);
 980         ASSERT_EQ(0, ret) {
 981                 TH_LOG("sigaction failed");
 982         }
 983         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
 984         ASSERT_EQ(0, ret) {
 985                 TH_LOG("sigprocmask failed");
 986         }
 987 
 988         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 989         ASSERT_EQ(0, ret);
 990         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 991         ASSERT_EQ(0, ret);
 992         TRAP_nr = 0;
 993         memset(&TRAP_info, 0, sizeof(TRAP_info));
 994         /* Expect the registers to be rolled back. (nr = error) may vary
 995          * based on arch. */
 996         ret = syscall(__NR_getpid);
 997         /* Silence gcc warning about volatile. */
 998         test = TRAP_nr;
 999         EXPECT_EQ(SIGSYS, test);
1000         struct local_sigsys {
1001                 void *_call_addr;       /* calling user insn */
1002                 int _syscall;           /* triggering system call number */
1003                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
1004         } *sigsys = (struct local_sigsys *)
1005 #ifdef si_syscall
1006                 &(TRAP_info.si_call_addr);
1007 #else
1008                 &TRAP_info.si_pid;
1009 #endif
1010         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1011         /* Make sure arch is non-zero. */
1012         EXPECT_NE(0, sigsys->_arch);
1013         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1014 }
1015 
1016 FIXTURE_DATA(precedence) {
1017         struct sock_fprog allow;
1018         struct sock_fprog log;
1019         struct sock_fprog trace;
1020         struct sock_fprog error;
1021         struct sock_fprog trap;
1022         struct sock_fprog kill;
1023 };
1024 
1025 FIXTURE_SETUP(precedence)
1026 {
1027         struct sock_filter allow_insns[] = {
1028                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1029         };
1030         struct sock_filter log_insns[] = {
1031                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1032                         offsetof(struct seccomp_data, nr)),
1033                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1034                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1035                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1036         };
1037         struct sock_filter trace_insns[] = {
1038                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1039                         offsetof(struct seccomp_data, nr)),
1040                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1041                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1042                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1043         };
1044         struct sock_filter error_insns[] = {
1045                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1046                         offsetof(struct seccomp_data, nr)),
1047                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1048                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1049                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1050         };
1051         struct sock_filter trap_insns[] = {
1052                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1053                         offsetof(struct seccomp_data, nr)),
1054                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1055                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1056                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1057         };
1058         struct sock_filter kill_insns[] = {
1059                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1060                         offsetof(struct seccomp_data, nr)),
1061                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1062                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1063                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1064         };
1065 
1066         memset(self, 0, sizeof(*self));
1067 #define FILTER_ALLOC(_x) \
1068         self->_x.filter = malloc(sizeof(_x##_insns)); \
1069         ASSERT_NE(NULL, self->_x.filter); \
1070         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1071         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1072         FILTER_ALLOC(allow);
1073         FILTER_ALLOC(log);
1074         FILTER_ALLOC(trace);
1075         FILTER_ALLOC(error);
1076         FILTER_ALLOC(trap);
1077         FILTER_ALLOC(kill);
1078 }
1079 
1080 FIXTURE_TEARDOWN(precedence)
1081 {
1082 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1083         FILTER_FREE(allow);
1084         FILTER_FREE(log);
1085         FILTER_FREE(trace);
1086         FILTER_FREE(error);
1087         FILTER_FREE(trap);
1088         FILTER_FREE(kill);
1089 }
1090 
1091 TEST_F(precedence, allow_ok)
1092 {
1093         pid_t parent, res = 0;
1094         long ret;
1095 
1096         parent = getppid();
1097         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1098         ASSERT_EQ(0, ret);
1099 
1100         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1101         ASSERT_EQ(0, ret);
1102         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1103         ASSERT_EQ(0, ret);
1104         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1105         ASSERT_EQ(0, ret);
1106         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1107         ASSERT_EQ(0, ret);
1108         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1109         ASSERT_EQ(0, ret);
1110         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1111         ASSERT_EQ(0, ret);
1112         /* Should work just fine. */
1113         res = syscall(__NR_getppid);
1114         EXPECT_EQ(parent, res);
1115 }
1116 
1117 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1118 {
1119         pid_t parent, res = 0;
1120         long ret;
1121 
1122         parent = getppid();
1123         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1124         ASSERT_EQ(0, ret);
1125 
1126         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1127         ASSERT_EQ(0, ret);
1128         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1129         ASSERT_EQ(0, ret);
1130         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1131         ASSERT_EQ(0, ret);
1132         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1133         ASSERT_EQ(0, ret);
1134         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1135         ASSERT_EQ(0, ret);
1136         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1137         ASSERT_EQ(0, ret);
1138         /* Should work just fine. */
1139         res = syscall(__NR_getppid);
1140         EXPECT_EQ(parent, res);
1141         /* getpid() should never return. */
1142         res = syscall(__NR_getpid);
1143         EXPECT_EQ(0, res);
1144 }
1145 
1146 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1147 {
1148         pid_t parent;
1149         long ret;
1150 
1151         parent = getppid();
1152         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1153         ASSERT_EQ(0, ret);
1154 
1155         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1156         ASSERT_EQ(0, ret);
1157         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1158         ASSERT_EQ(0, ret);
1159         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1160         ASSERT_EQ(0, ret);
1161         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1162         ASSERT_EQ(0, ret);
1163         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1164         ASSERT_EQ(0, ret);
1165         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1166         ASSERT_EQ(0, ret);
1167         /* Should work just fine. */
1168         EXPECT_EQ(parent, syscall(__NR_getppid));
1169         /* getpid() should never return. */
1170         EXPECT_EQ(0, syscall(__NR_getpid));
1171 }
1172 
1173 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1174 {
1175         pid_t parent;
1176         long ret;
1177 
1178         parent = getppid();
1179         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1180         ASSERT_EQ(0, ret);
1181 
1182         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1183         ASSERT_EQ(0, ret);
1184         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1185         ASSERT_EQ(0, ret);
1186         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1187         ASSERT_EQ(0, ret);
1188         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1189         ASSERT_EQ(0, ret);
1190         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1191         ASSERT_EQ(0, ret);
1192         /* Should work just fine. */
1193         EXPECT_EQ(parent, syscall(__NR_getppid));
1194         /* getpid() should never return. */
1195         EXPECT_EQ(0, syscall(__NR_getpid));
1196 }
1197 
1198 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1199 {
1200         pid_t parent;
1201         long ret;
1202 
1203         parent = getppid();
1204         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1205         ASSERT_EQ(0, ret);
1206 
1207         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1208         ASSERT_EQ(0, ret);
1209         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1210         ASSERT_EQ(0, ret);
1211         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1212         ASSERT_EQ(0, ret);
1213         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1214         ASSERT_EQ(0, ret);
1215         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1216         ASSERT_EQ(0, ret);
1217         /* Should work just fine. */
1218         EXPECT_EQ(parent, syscall(__NR_getppid));
1219         /* getpid() should never return. */
1220         EXPECT_EQ(0, syscall(__NR_getpid));
1221 }
1222 
1223 TEST_F(precedence, errno_is_third)
1224 {
1225         pid_t parent;
1226         long ret;
1227 
1228         parent = getppid();
1229         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1230         ASSERT_EQ(0, ret);
1231 
1232         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1233         ASSERT_EQ(0, ret);
1234         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1235         ASSERT_EQ(0, ret);
1236         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1237         ASSERT_EQ(0, ret);
1238         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1239         ASSERT_EQ(0, ret);
1240         /* Should work just fine. */
1241         EXPECT_EQ(parent, syscall(__NR_getppid));
1242         EXPECT_EQ(0, syscall(__NR_getpid));
1243 }
1244 
1245 TEST_F(precedence, errno_is_third_in_any_order)
1246 {
1247         pid_t parent;
1248         long ret;
1249 
1250         parent = getppid();
1251         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1252         ASSERT_EQ(0, ret);
1253 
1254         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1255         ASSERT_EQ(0, ret);
1256         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1257         ASSERT_EQ(0, ret);
1258         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1259         ASSERT_EQ(0, ret);
1260         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1261         ASSERT_EQ(0, ret);
1262         /* Should work just fine. */
1263         EXPECT_EQ(parent, syscall(__NR_getppid));
1264         EXPECT_EQ(0, syscall(__NR_getpid));
1265 }
1266 
1267 TEST_F(precedence, trace_is_fourth)
1268 {
1269         pid_t parent;
1270         long ret;
1271 
1272         parent = getppid();
1273         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1274         ASSERT_EQ(0, ret);
1275 
1276         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1277         ASSERT_EQ(0, ret);
1278         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1279         ASSERT_EQ(0, ret);
1280         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1281         ASSERT_EQ(0, ret);
1282         /* Should work just fine. */
1283         EXPECT_EQ(parent, syscall(__NR_getppid));
1284         /* No ptracer */
1285         EXPECT_EQ(-1, syscall(__NR_getpid));
1286 }
1287 
1288 TEST_F(precedence, trace_is_fourth_in_any_order)
1289 {
1290         pid_t parent;
1291         long ret;
1292 
1293         parent = getppid();
1294         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1295         ASSERT_EQ(0, ret);
1296 
1297         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1298         ASSERT_EQ(0, ret);
1299         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1300         ASSERT_EQ(0, ret);
1301         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1302         ASSERT_EQ(0, ret);
1303         /* Should work just fine. */
1304         EXPECT_EQ(parent, syscall(__NR_getppid));
1305         /* No ptracer */
1306         EXPECT_EQ(-1, syscall(__NR_getpid));
1307 }
1308 
1309 TEST_F(precedence, log_is_fifth)
1310 {
1311         pid_t mypid, parent;
1312         long ret;
1313 
1314         mypid = getpid();
1315         parent = getppid();
1316         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1317         ASSERT_EQ(0, ret);
1318 
1319         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1320         ASSERT_EQ(0, ret);
1321         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1322         ASSERT_EQ(0, ret);
1323         /* Should work just fine. */
1324         EXPECT_EQ(parent, syscall(__NR_getppid));
1325         /* Should also work just fine */
1326         EXPECT_EQ(mypid, syscall(__NR_getpid));
1327 }
1328 
1329 TEST_F(precedence, log_is_fifth_in_any_order)
1330 {
1331         pid_t mypid, parent;
1332         long ret;
1333 
1334         mypid = getpid();
1335         parent = getppid();
1336         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1337         ASSERT_EQ(0, ret);
1338 
1339         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1340         ASSERT_EQ(0, ret);
1341         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1342         ASSERT_EQ(0, ret);
1343         /* Should work just fine. */
1344         EXPECT_EQ(parent, syscall(__NR_getppid));
1345         /* Should also work just fine */
1346         EXPECT_EQ(mypid, syscall(__NR_getpid));
1347 }
1348 
1349 #ifndef PTRACE_O_TRACESECCOMP
1350 #define PTRACE_O_TRACESECCOMP   0x00000080
1351 #endif
1352 
1353 /* Catch the Ubuntu 12.04 value error. */
1354 #if PTRACE_EVENT_SECCOMP != 7
1355 #undef PTRACE_EVENT_SECCOMP
1356 #endif
1357 
1358 #ifndef PTRACE_EVENT_SECCOMP
1359 #define PTRACE_EVENT_SECCOMP 7
1360 #endif
1361 
1362 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1363 bool tracer_running;
1364 void tracer_stop(int sig)
1365 {
1366         tracer_running = false;
1367 }
1368 
1369 typedef void tracer_func_t(struct __test_metadata *_metadata,
1370                            pid_t tracee, int status, void *args);
1371 
1372 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1373             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1374 {
1375         int ret = -1;
1376         struct sigaction action = {
1377                 .sa_handler = tracer_stop,
1378         };
1379 
1380         /* Allow external shutdown. */
1381         tracer_running = true;
1382         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1383 
1384         errno = 0;
1385         while (ret == -1 && errno != EINVAL)
1386                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1387         ASSERT_EQ(0, ret) {
1388                 kill(tracee, SIGKILL);
1389         }
1390         /* Wait for attach stop */
1391         wait(NULL);
1392 
1393         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1394                                                       PTRACE_O_TRACESYSGOOD :
1395                                                       PTRACE_O_TRACESECCOMP);
1396         ASSERT_EQ(0, ret) {
1397                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1398                 kill(tracee, SIGKILL);
1399         }
1400         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1401                      tracee, NULL, 0);
1402         ASSERT_EQ(0, ret);
1403 
1404         /* Unblock the tracee */
1405         ASSERT_EQ(1, write(fd, "A", 1));
1406         ASSERT_EQ(0, close(fd));
1407 
1408         /* Run until we're shut down. Must assert to stop execution. */
1409         while (tracer_running) {
1410                 int status;
1411 
1412                 if (wait(&status) != tracee)
1413                         continue;
1414                 if (WIFSIGNALED(status) || WIFEXITED(status))
1415                         /* Child is dead. Time to go. */
1416                         return;
1417 
1418                 /* Check if this is a seccomp event. */
1419                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1420 
1421                 tracer_func(_metadata, tracee, status, args);
1422 
1423                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1424                              tracee, NULL, 0);
1425                 ASSERT_EQ(0, ret);
1426         }
1427         /* Directly report the status of our test harness results. */
1428         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1429 }
1430 
1431 /* Common tracer setup/teardown functions. */
1432 void cont_handler(int num)
1433 { }
1434 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1435                           tracer_func_t func, void *args, bool ptrace_syscall)
1436 {
1437         char sync;
1438         int pipefd[2];
1439         pid_t tracer_pid;
1440         pid_t tracee = getpid();
1441 
1442         /* Setup a pipe for clean synchronization. */
1443         ASSERT_EQ(0, pipe(pipefd));
1444 
1445         /* Fork a child which we'll promote to tracer */
1446         tracer_pid = fork();
1447         ASSERT_LE(0, tracer_pid);
1448         signal(SIGALRM, cont_handler);
1449         if (tracer_pid == 0) {
1450                 close(pipefd[0]);
1451                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1452                              ptrace_syscall);
1453                 syscall(__NR_exit, 0);
1454         }
1455         close(pipefd[1]);
1456         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1457         read(pipefd[0], &sync, 1);
1458         close(pipefd[0]);
1459 
1460         return tracer_pid;
1461 }
1462 void teardown_trace_fixture(struct __test_metadata *_metadata,
1463                             pid_t tracer)
1464 {
1465         if (tracer) {
1466                 int status;
1467                 /*
1468                  * Extract the exit code from the other process and
1469                  * adopt it for ourselves in case its asserts failed.
1470                  */
1471                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1472                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1473                 if (WEXITSTATUS(status))
1474                         _metadata->passed = 0;
1475         }
1476 }
1477 
1478 /* "poke" tracer arguments and function. */
1479 struct tracer_args_poke_t {
1480         unsigned long poke_addr;
1481 };
1482 
1483 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1484                  void *args)
1485 {
1486         int ret;
1487         unsigned long msg;
1488         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1489 
1490         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1491         EXPECT_EQ(0, ret);
1492         /* If this fails, don't try to recover. */
1493         ASSERT_EQ(0x1001, msg) {
1494                 kill(tracee, SIGKILL);
1495         }
1496         /*
1497          * Poke in the message.
1498          * Registers are not touched to try to keep this relatively arch
1499          * agnostic.
1500          */
1501         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1502         EXPECT_EQ(0, ret);
1503 }
1504 
1505 FIXTURE_DATA(TRACE_poke) {
1506         struct sock_fprog prog;
1507         pid_t tracer;
1508         long poked;
1509         struct tracer_args_poke_t tracer_args;
1510 };
1511 
1512 FIXTURE_SETUP(TRACE_poke)
1513 {
1514         struct sock_filter filter[] = {
1515                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1516                         offsetof(struct seccomp_data, nr)),
1517                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1518                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1519                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1520         };
1521 
1522         self->poked = 0;
1523         memset(&self->prog, 0, sizeof(self->prog));
1524         self->prog.filter = malloc(sizeof(filter));
1525         ASSERT_NE(NULL, self->prog.filter);
1526         memcpy(self->prog.filter, filter, sizeof(filter));
1527         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1528 
1529         /* Set up tracer args. */
1530         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1531 
1532         /* Launch tracer. */
1533         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1534                                            &self->tracer_args, false);
1535 }
1536 
1537 FIXTURE_TEARDOWN(TRACE_poke)
1538 {
1539         teardown_trace_fixture(_metadata, self->tracer);
1540         if (self->prog.filter)
1541                 free(self->prog.filter);
1542 }
1543 
1544 TEST_F(TRACE_poke, read_has_side_effects)
1545 {
1546         ssize_t ret;
1547 
1548         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1549         ASSERT_EQ(0, ret);
1550 
1551         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1552         ASSERT_EQ(0, ret);
1553 
1554         EXPECT_EQ(0, self->poked);
1555         ret = read(-1, NULL, 0);
1556         EXPECT_EQ(-1, ret);
1557         EXPECT_EQ(0x1001, self->poked);
1558 }
1559 
1560 TEST_F(TRACE_poke, getpid_runs_normally)
1561 {
1562         long ret;
1563 
1564         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1565         ASSERT_EQ(0, ret);
1566 
1567         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1568         ASSERT_EQ(0, ret);
1569 
1570         EXPECT_EQ(0, self->poked);
1571         EXPECT_NE(0, syscall(__NR_getpid));
1572         EXPECT_EQ(0, self->poked);
1573 }
1574 
1575 #if defined(__x86_64__)
1576 # define ARCH_REGS      struct user_regs_struct
1577 # define SYSCALL_NUM    orig_rax
1578 # define SYSCALL_RET    rax
1579 #elif defined(__i386__)
1580 # define ARCH_REGS      struct user_regs_struct
1581 # define SYSCALL_NUM    orig_eax
1582 # define SYSCALL_RET    eax
1583 #elif defined(__arm__)
1584 # define ARCH_REGS      struct pt_regs
1585 # define SYSCALL_NUM    ARM_r7
1586 # define SYSCALL_RET    ARM_r0
1587 #elif defined(__aarch64__)
1588 # define ARCH_REGS      struct user_pt_regs
1589 # define SYSCALL_NUM    regs[8]
1590 # define SYSCALL_RET    regs[0]
1591 #elif defined(__hppa__)
1592 # define ARCH_REGS      struct user_regs_struct
1593 # define SYSCALL_NUM    gr[20]
1594 # define SYSCALL_RET    gr[28]
1595 #elif defined(__powerpc__)
1596 # define ARCH_REGS      struct pt_regs
1597 # define SYSCALL_NUM    gpr[0]
1598 # define SYSCALL_RET    gpr[3]
1599 #elif defined(__s390__)
1600 # define ARCH_REGS     s390_regs
1601 # define SYSCALL_NUM   gprs[2]
1602 # define SYSCALL_RET   gprs[2]
1603 #elif defined(__mips__)
1604 # define ARCH_REGS      struct pt_regs
1605 # define SYSCALL_NUM    regs[2]
1606 # define SYSCALL_SYSCALL_NUM regs[4]
1607 # define SYSCALL_RET    regs[2]
1608 # define SYSCALL_NUM_RET_SHARE_REG
1609 #else
1610 # error "Do not know how to find your architecture's registers and syscalls"
1611 #endif
1612 
1613 /* When the syscall return can't be changed, stub out the tests for it. */
1614 #ifdef SYSCALL_NUM_RET_SHARE_REG
1615 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1616 #else
1617 # define EXPECT_SYSCALL_RETURN(val, action)             \
1618         do {                                            \
1619                 errno = 0;                              \
1620                 if (val < 0) {                          \
1621                         EXPECT_EQ(-1, action);          \
1622                         EXPECT_EQ(-(val), errno);       \
1623                 } else {                                \
1624                         EXPECT_EQ(val, action);         \
1625                 }                                       \
1626         } while (0)
1627 #endif
1628 
1629 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1630  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1631  */
1632 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1633 #define HAVE_GETREGS
1634 #endif
1635 
1636 /* Architecture-specific syscall fetching routine. */
1637 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1638 {
1639         ARCH_REGS regs;
1640 #ifdef HAVE_GETREGS
1641         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1642                 TH_LOG("PTRACE_GETREGS failed");
1643                 return -1;
1644         }
1645 #else
1646         struct iovec iov;
1647 
1648         iov.iov_base = &regs;
1649         iov.iov_len = sizeof(regs);
1650         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1651                 TH_LOG("PTRACE_GETREGSET failed");
1652                 return -1;
1653         }
1654 #endif
1655 
1656 #if defined(__mips__)
1657         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1658                 return regs.SYSCALL_SYSCALL_NUM;
1659 #endif
1660         return regs.SYSCALL_NUM;
1661 }
1662 
1663 /* Architecture-specific syscall changing routine. */
1664 void change_syscall(struct __test_metadata *_metadata,
1665                     pid_t tracee, int syscall, int result)
1666 {
1667         int ret;
1668         ARCH_REGS regs;
1669 #ifdef HAVE_GETREGS
1670         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1671 #else
1672         struct iovec iov;
1673         iov.iov_base = &regs;
1674         iov.iov_len = sizeof(regs);
1675         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1676 #endif
1677         EXPECT_EQ(0, ret) {}
1678 
1679 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1680     defined(__s390__) || defined(__hppa__)
1681         {
1682                 regs.SYSCALL_NUM = syscall;
1683         }
1684 #elif defined(__mips__)
1685         {
1686                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1687                         regs.SYSCALL_SYSCALL_NUM = syscall;
1688                 else
1689                         regs.SYSCALL_NUM = syscall;
1690         }
1691 
1692 #elif defined(__arm__)
1693 # ifndef PTRACE_SET_SYSCALL
1694 #  define PTRACE_SET_SYSCALL   23
1695 # endif
1696         {
1697                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1698                 EXPECT_EQ(0, ret);
1699         }
1700 
1701 #elif defined(__aarch64__)
1702 # ifndef NT_ARM_SYSTEM_CALL
1703 #  define NT_ARM_SYSTEM_CALL 0x404
1704 # endif
1705         {
1706                 iov.iov_base = &syscall;
1707                 iov.iov_len = sizeof(syscall);
1708                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1709                              &iov);
1710                 EXPECT_EQ(0, ret);
1711         }
1712 
1713 #else
1714         ASSERT_EQ(1, 0) {
1715                 TH_LOG("How is the syscall changed on this architecture?");
1716         }
1717 #endif
1718 
1719         /* If syscall is skipped, change return value. */
1720         if (syscall == -1)
1721 #ifdef SYSCALL_NUM_RET_SHARE_REG
1722                 TH_LOG("Can't modify syscall return on this architecture");
1723 #else
1724                 regs.SYSCALL_RET = result;
1725 #endif
1726 
1727 #ifdef HAVE_GETREGS
1728         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1729 #else
1730         iov.iov_base = &regs;
1731         iov.iov_len = sizeof(regs);
1732         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1733 #endif
1734         EXPECT_EQ(0, ret);
1735 }
1736 
1737 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1738                     int status, void *args)
1739 {
1740         int ret;
1741         unsigned long msg;
1742 
1743         /* Make sure we got the right message. */
1744         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1745         EXPECT_EQ(0, ret);
1746 
1747         /* Validate and take action on expected syscalls. */
1748         switch (msg) {
1749         case 0x1002:
1750                 /* change getpid to getppid. */
1751                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1752                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1753                 break;
1754         case 0x1003:
1755                 /* skip gettid with valid return code. */
1756                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1757                 change_syscall(_metadata, tracee, -1, 45000);
1758                 break;
1759         case 0x1004:
1760                 /* skip openat with error. */
1761                 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1762                 change_syscall(_metadata, tracee, -1, -ESRCH);
1763                 break;
1764         case 0x1005:
1765                 /* do nothing (allow getppid) */
1766                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1767                 break;
1768         default:
1769                 EXPECT_EQ(0, msg) {
1770                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1771                         kill(tracee, SIGKILL);
1772                 }
1773         }
1774 
1775 }
1776 
1777 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1778                    int status, void *args)
1779 {
1780         int ret, nr;
1781         unsigned long msg;
1782         static bool entry;
1783 
1784         /*
1785          * The traditional way to tell PTRACE_SYSCALL entry/exit
1786          * is by counting.
1787          */
1788         entry = !entry;
1789 
1790         /* Make sure we got an appropriate message. */
1791         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1792         EXPECT_EQ(0, ret);
1793         EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1794                         : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1795 
1796         if (!entry)
1797                 return;
1798 
1799         nr = get_syscall(_metadata, tracee);
1800 
1801         if (nr == __NR_getpid)
1802                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1803         if (nr == __NR_gettid)
1804                 change_syscall(_metadata, tracee, -1, 45000);
1805         if (nr == __NR_openat)
1806                 change_syscall(_metadata, tracee, -1, -ESRCH);
1807 }
1808 
1809 FIXTURE_DATA(TRACE_syscall) {
1810         struct sock_fprog prog;
1811         pid_t tracer, mytid, mypid, parent;
1812 };
1813 
1814 FIXTURE_SETUP(TRACE_syscall)
1815 {
1816         struct sock_filter filter[] = {
1817                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1818                         offsetof(struct seccomp_data, nr)),
1819                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1820                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1821                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1822                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1823                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1824                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1825                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1826                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1827                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1828         };
1829 
1830         memset(&self->prog, 0, sizeof(self->prog));
1831         self->prog.filter = malloc(sizeof(filter));
1832         ASSERT_NE(NULL, self->prog.filter);
1833         memcpy(self->prog.filter, filter, sizeof(filter));
1834         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1835 
1836         /* Prepare some testable syscall results. */
1837         self->mytid = syscall(__NR_gettid);
1838         ASSERT_GT(self->mytid, 0);
1839         ASSERT_NE(self->mytid, 1) {
1840                 TH_LOG("Running this test as init is not supported. :)");
1841         }
1842 
1843         self->mypid = getpid();
1844         ASSERT_GT(self->mypid, 0);
1845         ASSERT_EQ(self->mytid, self->mypid);
1846 
1847         self->parent = getppid();
1848         ASSERT_GT(self->parent, 0);
1849         ASSERT_NE(self->parent, self->mypid);
1850 
1851         /* Launch tracer. */
1852         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1853                                            false);
1854 }
1855 
1856 FIXTURE_TEARDOWN(TRACE_syscall)
1857 {
1858         teardown_trace_fixture(_metadata, self->tracer);
1859         if (self->prog.filter)
1860                 free(self->prog.filter);
1861 }
1862 
1863 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1864 {
1865         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1866         teardown_trace_fixture(_metadata, self->tracer);
1867         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1868                                            true);
1869 
1870         /* Tracer will redirect getpid to getppid. */
1871         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1872 }
1873 
1874 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1875 {
1876         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1877         teardown_trace_fixture(_metadata, self->tracer);
1878         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1879                                            true);
1880 
1881         /* Tracer should skip the open syscall, resulting in ESRCH. */
1882         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1883 }
1884 
1885 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1886 {
1887         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1888         teardown_trace_fixture(_metadata, self->tracer);
1889         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1890                                            true);
1891 
1892         /* Tracer should skip the gettid syscall, resulting fake pid. */
1893         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1894 }
1895 
1896 TEST_F(TRACE_syscall, syscall_allowed)
1897 {
1898         long ret;
1899 
1900         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1901         ASSERT_EQ(0, ret);
1902 
1903         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1904         ASSERT_EQ(0, ret);
1905 
1906         /* getppid works as expected (no changes). */
1907         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1908         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1909 }
1910 
1911 TEST_F(TRACE_syscall, syscall_redirected)
1912 {
1913         long ret;
1914 
1915         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1916         ASSERT_EQ(0, ret);
1917 
1918         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1919         ASSERT_EQ(0, ret);
1920 
1921         /* getpid has been redirected to getppid as expected. */
1922         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1923         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1924 }
1925 
1926 TEST_F(TRACE_syscall, syscall_errno)
1927 {
1928         long ret;
1929 
1930         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1931         ASSERT_EQ(0, ret);
1932 
1933         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1934         ASSERT_EQ(0, ret);
1935 
1936         /* openat has been skipped and an errno return. */
1937         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1938 }
1939 
1940 TEST_F(TRACE_syscall, syscall_faked)
1941 {
1942         long ret;
1943 
1944         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1945         ASSERT_EQ(0, ret);
1946 
1947         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1948         ASSERT_EQ(0, ret);
1949 
1950         /* gettid has been skipped and an altered return value stored. */
1951         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1952 }
1953 
1954 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1955 {
1956         struct sock_filter filter[] = {
1957                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1958                         offsetof(struct seccomp_data, nr)),
1959                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1960                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1961                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1962         };
1963         struct sock_fprog prog = {
1964                 .len = (unsigned short)ARRAY_SIZE(filter),
1965                 .filter = filter,
1966         };
1967         long ret;
1968 
1969         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1970         ASSERT_EQ(0, ret);
1971 
1972         /* Install fixture filter. */
1973         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1974         ASSERT_EQ(0, ret);
1975 
1976         /* Install "errno on getppid" filter. */
1977         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1978         ASSERT_EQ(0, ret);
1979 
1980         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1981         errno = 0;
1982         EXPECT_EQ(-1, syscall(__NR_getpid));
1983         EXPECT_EQ(EPERM, errno);
1984 }
1985 
1986 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1987 {
1988         struct sock_filter filter[] = {
1989                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1990                         offsetof(struct seccomp_data, nr)),
1991                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1992                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1993                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1994         };
1995         struct sock_fprog prog = {
1996                 .len = (unsigned short)ARRAY_SIZE(filter),
1997                 .filter = filter,
1998         };
1999         long ret;
2000 
2001         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2002         ASSERT_EQ(0, ret);
2003 
2004         /* Install fixture filter. */
2005         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2006         ASSERT_EQ(0, ret);
2007 
2008         /* Install "death on getppid" filter. */
2009         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2010         ASSERT_EQ(0, ret);
2011 
2012         /* Tracer will redirect getpid to getppid, and we should die. */
2013         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2014 }
2015 
2016 TEST_F(TRACE_syscall, skip_after_ptrace)
2017 {
2018         struct sock_filter filter[] = {
2019                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2020                         offsetof(struct seccomp_data, nr)),
2021                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2022                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2023                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2024         };
2025         struct sock_fprog prog = {
2026                 .len = (unsigned short)ARRAY_SIZE(filter),
2027                 .filter = filter,
2028         };
2029         long ret;
2030 
2031         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2032         teardown_trace_fixture(_metadata, self->tracer);
2033         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2034                                            true);
2035 
2036         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2037         ASSERT_EQ(0, ret);
2038 
2039         /* Install "errno on getppid" filter. */
2040         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2041         ASSERT_EQ(0, ret);
2042 
2043         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2044         EXPECT_EQ(-1, syscall(__NR_getpid));
2045         EXPECT_EQ(EPERM, errno);
2046 }
2047 
2048 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2049 {
2050         struct sock_filter filter[] = {
2051                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2052                         offsetof(struct seccomp_data, nr)),
2053                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2054                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2055                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2056         };
2057         struct sock_fprog prog = {
2058                 .len = (unsigned short)ARRAY_SIZE(filter),
2059                 .filter = filter,
2060         };
2061         long ret;
2062 
2063         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2064         teardown_trace_fixture(_metadata, self->tracer);
2065         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2066                                            true);
2067 
2068         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2069         ASSERT_EQ(0, ret);
2070 
2071         /* Install "death on getppid" filter. */
2072         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2073         ASSERT_EQ(0, ret);
2074 
2075         /* Tracer will redirect getpid to getppid, and we should die. */
2076         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2077 }
2078 
2079 TEST(seccomp_syscall)
2080 {
2081         struct sock_filter filter[] = {
2082                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2083         };
2084         struct sock_fprog prog = {
2085                 .len = (unsigned short)ARRAY_SIZE(filter),
2086                 .filter = filter,
2087         };
2088         long ret;
2089 
2090         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2091         ASSERT_EQ(0, ret) {
2092                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2093         }
2094 
2095         /* Reject insane operation. */
2096         ret = seccomp(-1, 0, &prog);
2097         ASSERT_NE(ENOSYS, errno) {
2098                 TH_LOG("Kernel does not support seccomp syscall!");
2099         }
2100         EXPECT_EQ(EINVAL, errno) {
2101                 TH_LOG("Did not reject crazy op value!");
2102         }
2103 
2104         /* Reject strict with flags or pointer. */
2105         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2106         EXPECT_EQ(EINVAL, errno) {
2107                 TH_LOG("Did not reject mode strict with flags!");
2108         }
2109         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2110         EXPECT_EQ(EINVAL, errno) {
2111                 TH_LOG("Did not reject mode strict with uargs!");
2112         }
2113 
2114         /* Reject insane args for filter. */
2115         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2116         EXPECT_EQ(EINVAL, errno) {
2117                 TH_LOG("Did not reject crazy filter flags!");
2118         }
2119         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2120         EXPECT_EQ(EFAULT, errno) {
2121                 TH_LOG("Did not reject NULL filter!");
2122         }
2123 
2124         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2125         EXPECT_EQ(0, errno) {
2126                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2127                         strerror(errno));
2128         }
2129 }
2130 
2131 TEST(seccomp_syscall_mode_lock)
2132 {
2133         struct sock_filter filter[] = {
2134                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2135         };
2136         struct sock_fprog prog = {
2137                 .len = (unsigned short)ARRAY_SIZE(filter),
2138                 .filter = filter,
2139         };
2140         long ret;
2141 
2142         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2143         ASSERT_EQ(0, ret) {
2144                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2145         }
2146 
2147         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2148         ASSERT_NE(ENOSYS, errno) {
2149                 TH_LOG("Kernel does not support seccomp syscall!");
2150         }
2151         EXPECT_EQ(0, ret) {
2152                 TH_LOG("Could not install filter!");
2153         }
2154 
2155         /* Make sure neither entry point will switch to strict. */
2156         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2157         EXPECT_EQ(EINVAL, errno) {
2158                 TH_LOG("Switched to mode strict!");
2159         }
2160 
2161         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2162         EXPECT_EQ(EINVAL, errno) {
2163                 TH_LOG("Switched to mode strict!");
2164         }
2165 }
2166 
2167 /*
2168  * Test detection of known and unknown filter flags. Userspace needs to be able
2169  * to check if a filter flag is supported by the current kernel and a good way
2170  * of doing that is by attempting to enter filter mode, with the flag bit in
2171  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2172  * that the flag is valid and EINVAL indicates that the flag is invalid.
2173  */
2174 TEST(detect_seccomp_filter_flags)
2175 {
2176         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2177                                  SECCOMP_FILTER_FLAG_LOG,
2178                                  SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2179                                  SECCOMP_FILTER_FLAG_NEW_LISTENER };
2180         unsigned int exclusive[] = {
2181                                 SECCOMP_FILTER_FLAG_TSYNC,
2182                                 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2183         unsigned int flag, all_flags, exclusive_mask;
2184         int i;
2185         long ret;
2186 
2187         /* Test detection of individual known-good filter flags */
2188         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2189                 int bits = 0;
2190 
2191                 flag = flags[i];
2192                 /* Make sure the flag is a single bit! */
2193                 while (flag) {
2194                         if (flag & 0x1)
2195                                 bits ++;
2196                         flag >>= 1;
2197                 }
2198                 ASSERT_EQ(1, bits);
2199                 flag = flags[i];
2200 
2201                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2202                 ASSERT_NE(ENOSYS, errno) {
2203                         TH_LOG("Kernel does not support seccomp syscall!");
2204                 }
2205                 EXPECT_EQ(-1, ret);
2206                 EXPECT_EQ(EFAULT, errno) {
2207                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2208                                flag);
2209                 }
2210 
2211                 all_flags |= flag;
2212         }
2213 
2214         /*
2215          * Test detection of all known-good filter flags combined. But
2216          * for the exclusive flags we need to mask them out and try them
2217          * individually for the "all flags" testing.
2218          */
2219         exclusive_mask = 0;
2220         for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2221                 exclusive_mask |= exclusive[i];
2222         for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2223                 flag = all_flags & ~exclusive_mask;
2224                 flag |= exclusive[i];
2225 
2226                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2227                 EXPECT_EQ(-1, ret);
2228                 EXPECT_EQ(EFAULT, errno) {
2229                         TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2230                                flag);
2231                 }
2232         }
2233 
2234         /* Test detection of an unknown filter flags, without exclusives. */
2235         flag = -1;
2236         flag &= ~exclusive_mask;
2237         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2238         EXPECT_EQ(-1, ret);
2239         EXPECT_EQ(EINVAL, errno) {
2240                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2241                        flag);
2242         }
2243 
2244         /*
2245          * Test detection of an unknown filter flag that may simply need to be
2246          * added to this test
2247          */
2248         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2249         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2250         EXPECT_EQ(-1, ret);
2251         EXPECT_EQ(EINVAL, errno) {
2252                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2253                        flag);
2254         }
2255 }
2256 
2257 TEST(TSYNC_first)
2258 {
2259         struct sock_filter filter[] = {
2260                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2261         };
2262         struct sock_fprog prog = {
2263                 .len = (unsigned short)ARRAY_SIZE(filter),
2264                 .filter = filter,
2265         };
2266         long ret;
2267 
2268         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2269         ASSERT_EQ(0, ret) {
2270                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2271         }
2272 
2273         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2274                       &prog);
2275         ASSERT_NE(ENOSYS, errno) {
2276                 TH_LOG("Kernel does not support seccomp syscall!");
2277         }
2278         EXPECT_EQ(0, ret) {
2279                 TH_LOG("Could not install initial filter with TSYNC!");
2280         }
2281 }
2282 
2283 #define TSYNC_SIBLINGS 2
2284 struct tsync_sibling {
2285         pthread_t tid;
2286         pid_t system_tid;
2287         sem_t *started;
2288         pthread_cond_t *cond;
2289         pthread_mutex_t *mutex;
2290         int diverge;
2291         int num_waits;
2292         struct sock_fprog *prog;
2293         struct __test_metadata *metadata;
2294 };
2295 
2296 /*
2297  * To avoid joining joined threads (which is not allowed by Bionic),
2298  * make sure we both successfully join and clear the tid to skip a
2299  * later join attempt during fixture teardown. Any remaining threads
2300  * will be directly killed during teardown.
2301  */
2302 #define PTHREAD_JOIN(tid, status)                                       \
2303         do {                                                            \
2304                 int _rc = pthread_join(tid, status);                    \
2305                 if (_rc) {                                              \
2306                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
2307                                 (unsigned int)tid, _rc);                \
2308                 } else {                                                \
2309                         tid = 0;                                        \
2310                 }                                                       \
2311         } while (0)
2312 
2313 FIXTURE_DATA(TSYNC) {
2314         struct sock_fprog root_prog, apply_prog;
2315         struct tsync_sibling sibling[TSYNC_SIBLINGS];
2316         sem_t started;
2317         pthread_cond_t cond;
2318         pthread_mutex_t mutex;
2319         int sibling_count;
2320 };
2321 
2322 FIXTURE_SETUP(TSYNC)
2323 {
2324         struct sock_filter root_filter[] = {
2325                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2326         };
2327         struct sock_filter apply_filter[] = {
2328                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2329                         offsetof(struct seccomp_data, nr)),
2330                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2331                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2332                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2333         };
2334 
2335         memset(&self->root_prog, 0, sizeof(self->root_prog));
2336         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2337         memset(&self->sibling, 0, sizeof(self->sibling));
2338         self->root_prog.filter = malloc(sizeof(root_filter));
2339         ASSERT_NE(NULL, self->root_prog.filter);
2340         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2341         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2342 
2343         self->apply_prog.filter = malloc(sizeof(apply_filter));
2344         ASSERT_NE(NULL, self->apply_prog.filter);
2345         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2346         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2347 
2348         self->sibling_count = 0;
2349         pthread_mutex_init(&self->mutex, NULL);
2350         pthread_cond_init(&self->cond, NULL);
2351         sem_init(&self->started, 0, 0);
2352         self->sibling[0].tid = 0;
2353         self->sibling[0].cond = &self->cond;
2354         self->sibling[0].started = &self->started;
2355         self->sibling[0].mutex = &self->mutex;
2356         self->sibling[0].diverge = 0;
2357         self->sibling[0].num_waits = 1;
2358         self->sibling[0].prog = &self->root_prog;
2359         self->sibling[0].metadata = _metadata;
2360         self->sibling[1].tid = 0;
2361         self->sibling[1].cond = &self->cond;
2362         self->sibling[1].started = &self->started;
2363         self->sibling[1].mutex = &self->mutex;
2364         self->sibling[1].diverge = 0;
2365         self->sibling[1].prog = &self->root_prog;
2366         self->sibling[1].num_waits = 1;
2367         self->sibling[1].metadata = _metadata;
2368 }
2369 
2370 FIXTURE_TEARDOWN(TSYNC)
2371 {
2372         int sib = 0;
2373 
2374         if (self->root_prog.filter)
2375                 free(self->root_prog.filter);
2376         if (self->apply_prog.filter)
2377                 free(self->apply_prog.filter);
2378 
2379         for ( ; sib < self->sibling_count; ++sib) {
2380                 struct tsync_sibling *s = &self->sibling[sib];
2381 
2382                 if (!s->tid)
2383                         continue;
2384                 /*
2385                  * If a thread is still running, it may be stuck, so hit
2386                  * it over the head really hard.
2387                  */
2388                 pthread_kill(s->tid, 9);
2389         }
2390         pthread_mutex_destroy(&self->mutex);
2391         pthread_cond_destroy(&self->cond);
2392         sem_destroy(&self->started);
2393 }
2394 
2395 void *tsync_sibling(void *data)
2396 {
2397         long ret = 0;
2398         struct tsync_sibling *me = data;
2399 
2400         me->system_tid = syscall(__NR_gettid);
2401 
2402         pthread_mutex_lock(me->mutex);
2403         if (me->diverge) {
2404                 /* Just re-apply the root prog to fork the tree */
2405                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2406                                 me->prog, 0, 0);
2407         }
2408         sem_post(me->started);
2409         /* Return outside of started so parent notices failures. */
2410         if (ret) {
2411                 pthread_mutex_unlock(me->mutex);
2412                 return (void *)SIBLING_EXIT_FAILURE;
2413         }
2414         do {
2415                 pthread_cond_wait(me->cond, me->mutex);
2416                 me->num_waits = me->num_waits - 1;
2417         } while (me->num_waits);
2418         pthread_mutex_unlock(me->mutex);
2419 
2420         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2421         if (!ret)
2422                 return (void *)SIBLING_EXIT_NEWPRIVS;
2423         read(0, NULL, 0);
2424         return (void *)SIBLING_EXIT_UNKILLED;
2425 }
2426 
2427 void tsync_start_sibling(struct tsync_sibling *sibling)
2428 {
2429         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2430 }
2431 
2432 TEST_F(TSYNC, siblings_fail_prctl)
2433 {
2434         long ret;
2435         void *status;
2436         struct sock_filter filter[] = {
2437                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2438                         offsetof(struct seccomp_data, nr)),
2439                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2440                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2441                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2442         };
2443         struct sock_fprog prog = {
2444                 .len = (unsigned short)ARRAY_SIZE(filter),
2445                 .filter = filter,
2446         };
2447 
2448         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2449                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2450         }
2451 
2452         /* Check prctl failure detection by requesting sib 0 diverge. */
2453         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2454         ASSERT_NE(ENOSYS, errno) {
2455                 TH_LOG("Kernel does not support seccomp syscall!");
2456         }
2457         ASSERT_EQ(0, ret) {
2458                 TH_LOG("setting filter failed");
2459         }
2460 
2461         self->sibling[0].diverge = 1;
2462         tsync_start_sibling(&self->sibling[0]);
2463         tsync_start_sibling(&self->sibling[1]);
2464 
2465         while (self->sibling_count < TSYNC_SIBLINGS) {
2466                 sem_wait(&self->started);
2467                 self->sibling_count++;
2468         }
2469 
2470         /* Signal the threads to clean up*/
2471         pthread_mutex_lock(&self->mutex);
2472         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2473                 TH_LOG("cond broadcast non-zero");
2474         }
2475         pthread_mutex_unlock(&self->mutex);
2476 
2477         /* Ensure diverging sibling failed to call prctl. */
2478         PTHREAD_JOIN(self->sibling[0].tid, &status);
2479         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2480         PTHREAD_JOIN(self->sibling[1].tid, &status);
2481         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2482 }
2483 
2484 TEST_F(TSYNC, two_siblings_with_ancestor)
2485 {
2486         long ret;
2487         void *status;
2488 
2489         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2490                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2491         }
2492 
2493         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2494         ASSERT_NE(ENOSYS, errno) {
2495                 TH_LOG("Kernel does not support seccomp syscall!");
2496         }
2497         ASSERT_EQ(0, ret) {
2498                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2499         }
2500         tsync_start_sibling(&self->sibling[0]);
2501         tsync_start_sibling(&self->sibling[1]);
2502 
2503         while (self->sibling_count < TSYNC_SIBLINGS) {
2504                 sem_wait(&self->started);
2505                 self->sibling_count++;
2506         }
2507 
2508         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2509                       &self->apply_prog);
2510         ASSERT_EQ(0, ret) {
2511                 TH_LOG("Could install filter on all threads!");
2512         }
2513         /* Tell the siblings to test the policy */
2514         pthread_mutex_lock(&self->mutex);
2515         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2516                 TH_LOG("cond broadcast non-zero");
2517         }
2518         pthread_mutex_unlock(&self->mutex);
2519         /* Ensure they are both killed and don't exit cleanly. */
2520         PTHREAD_JOIN(self->sibling[0].tid, &status);
2521         EXPECT_EQ(0x0, (long)status);
2522         PTHREAD_JOIN(self->sibling[1].tid, &status);
2523         EXPECT_EQ(0x0, (long)status);
2524 }
2525 
2526 TEST_F(TSYNC, two_sibling_want_nnp)
2527 {
2528         void *status;
2529 
2530         /* start siblings before any prctl() operations */
2531         tsync_start_sibling(&self->sibling[0]);
2532         tsync_start_sibling(&self->sibling[1]);
2533         while (self->sibling_count < TSYNC_SIBLINGS) {
2534                 sem_wait(&self->started);
2535                 self->sibling_count++;
2536         }
2537 
2538         /* Tell the siblings to test no policy */
2539         pthread_mutex_lock(&self->mutex);
2540         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2541                 TH_LOG("cond broadcast non-zero");
2542         }
2543         pthread_mutex_unlock(&self->mutex);
2544 
2545         /* Ensure they are both upset about lacking nnp. */
2546         PTHREAD_JOIN(self->sibling[0].tid, &status);
2547         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2548         PTHREAD_JOIN(self->sibling[1].tid, &status);
2549         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2550 }
2551 
2552 TEST_F(TSYNC, two_siblings_with_no_filter)
2553 {
2554         long ret;
2555         void *status;
2556 
2557         /* start siblings before any prctl() operations */
2558         tsync_start_sibling(&self->sibling[0]);
2559         tsync_start_sibling(&self->sibling[1]);
2560         while (self->sibling_count < TSYNC_SIBLINGS) {
2561                 sem_wait(&self->started);
2562                 self->sibling_count++;
2563         }
2564 
2565         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2566                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2567         }
2568 
2569         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2570                       &self->apply_prog);
2571         ASSERT_NE(ENOSYS, errno) {
2572                 TH_LOG("Kernel does not support seccomp syscall!");
2573         }
2574         ASSERT_EQ(0, ret) {
2575                 TH_LOG("Could install filter on all threads!");
2576         }
2577 
2578         /* Tell the siblings to test the policy */
2579         pthread_mutex_lock(&self->mutex);
2580         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2581                 TH_LOG("cond broadcast non-zero");
2582         }
2583         pthread_mutex_unlock(&self->mutex);
2584 
2585         /* Ensure they are both killed and don't exit cleanly. */
2586         PTHREAD_JOIN(self->sibling[0].tid, &status);
2587         EXPECT_EQ(0x0, (long)status);
2588         PTHREAD_JOIN(self->sibling[1].tid, &status);
2589         EXPECT_EQ(0x0, (long)status);
2590 }
2591 
2592 TEST_F(TSYNC, two_siblings_with_one_divergence)
2593 {
2594         long ret;
2595         void *status;
2596 
2597         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2598                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2599         }
2600 
2601         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2602         ASSERT_NE(ENOSYS, errno) {
2603                 TH_LOG("Kernel does not support seccomp syscall!");
2604         }
2605         ASSERT_EQ(0, ret) {
2606                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2607         }
2608         self->sibling[0].diverge = 1;
2609         tsync_start_sibling(&self->sibling[0]);
2610         tsync_start_sibling(&self->sibling[1]);
2611 
2612         while (self->sibling_count < TSYNC_SIBLINGS) {
2613                 sem_wait(&self->started);
2614                 self->sibling_count++;
2615         }
2616 
2617         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2618                       &self->apply_prog);
2619         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2620                 TH_LOG("Did not fail on diverged sibling.");
2621         }
2622 
2623         /* Wake the threads */
2624         pthread_mutex_lock(&self->mutex);
2625         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2626                 TH_LOG("cond broadcast non-zero");
2627         }
2628         pthread_mutex_unlock(&self->mutex);
2629 
2630         /* Ensure they are both unkilled. */
2631         PTHREAD_JOIN(self->sibling[0].tid, &status);
2632         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2633         PTHREAD_JOIN(self->sibling[1].tid, &status);
2634         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2635 }
2636 
2637 TEST_F(TSYNC, two_siblings_not_under_filter)
2638 {
2639         long ret, sib;
2640         void *status;
2641         struct timespec delay = { .tv_nsec = 100000000 };
2642 
2643         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2644                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2645         }
2646 
2647         /*
2648          * Sibling 0 will have its own seccomp policy
2649          * and Sibling 1 will not be under seccomp at
2650          * all. Sibling 1 will enter seccomp and 0
2651          * will cause failure.
2652          */
2653         self->sibling[0].diverge = 1;
2654         tsync_start_sibling(&self->sibling[0]);
2655         tsync_start_sibling(&self->sibling[1]);
2656 
2657         while (self->sibling_count < TSYNC_SIBLINGS) {
2658                 sem_wait(&self->started);
2659                 self->sibling_count++;
2660         }
2661 
2662         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2663         ASSERT_NE(ENOSYS, errno) {
2664                 TH_LOG("Kernel does not support seccomp syscall!");
2665         }
2666         ASSERT_EQ(0, ret) {
2667                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2668         }
2669 
2670         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2671                       &self->apply_prog);
2672         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2673                 TH_LOG("Did not fail on diverged sibling.");
2674         }
2675         sib = 1;
2676         if (ret == self->sibling[0].system_tid)
2677                 sib = 0;
2678 
2679         pthread_mutex_lock(&self->mutex);
2680 
2681         /* Increment the other siblings num_waits so we can clean up
2682          * the one we just saw.
2683          */
2684         self->sibling[!sib].num_waits += 1;
2685 
2686         /* Signal the thread to clean up*/
2687         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2688                 TH_LOG("cond broadcast non-zero");
2689         }
2690         pthread_mutex_unlock(&self->mutex);
2691         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2692         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2693         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2694         while (!kill(self->sibling[sib].system_tid, 0))
2695                 nanosleep(&delay, NULL);
2696         /* Switch to the remaining sibling */
2697         sib = !sib;
2698 
2699         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2700                       &self->apply_prog);
2701         ASSERT_EQ(0, ret) {
2702                 TH_LOG("Expected the remaining sibling to sync");
2703         };
2704 
2705         pthread_mutex_lock(&self->mutex);
2706 
2707         /* If remaining sibling didn't have a chance to wake up during
2708          * the first broadcast, manually reduce the num_waits now.
2709          */
2710         if (self->sibling[sib].num_waits > 1)
2711                 self->sibling[sib].num_waits = 1;
2712         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2713                 TH_LOG("cond broadcast non-zero");
2714         }
2715         pthread_mutex_unlock(&self->mutex);
2716         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2717         EXPECT_EQ(0, (long)status);
2718         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2719         while (!kill(self->sibling[sib].system_tid, 0))
2720                 nanosleep(&delay, NULL);
2721 
2722         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2723                       &self->apply_prog);
2724         ASSERT_EQ(0, ret);  /* just us chickens */
2725 }
2726 
2727 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2728 TEST(syscall_restart)
2729 {
2730         long ret;
2731         unsigned long msg;
2732         pid_t child_pid;
2733         int pipefd[2];
2734         int status;
2735         siginfo_t info = { };
2736         struct sock_filter filter[] = {
2737                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2738                          offsetof(struct seccomp_data, nr)),
2739 
2740 #ifdef __NR_sigreturn
2741                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2742 #endif
2743                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2744                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2745                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2746                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2747                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2748 
2749                 /* Allow __NR_write for easy logging. */
2750                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2751                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2752                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2753                 /* The nanosleep jump target. */
2754                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2755                 /* The restart_syscall jump target. */
2756                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2757         };
2758         struct sock_fprog prog = {
2759                 .len = (unsigned short)ARRAY_SIZE(filter),
2760                 .filter = filter,
2761         };
2762 #if defined(__arm__)
2763         struct utsname utsbuf;
2764 #endif
2765 
2766         ASSERT_EQ(0, pipe(pipefd));
2767 
2768         child_pid = fork();
2769         ASSERT_LE(0, child_pid);
2770         if (child_pid == 0) {
2771                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2772                 char buf = ' ';
2773                 struct timespec timeout = { };
2774 
2775                 /* Attach parent as tracer and stop. */
2776                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2777                 EXPECT_EQ(0, raise(SIGSTOP));
2778 
2779                 EXPECT_EQ(0, close(pipefd[1]));
2780 
2781                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2782                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2783                 }
2784 
2785                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2786                 EXPECT_EQ(0, ret) {
2787                         TH_LOG("Failed to install filter!");
2788                 }
2789 
2790                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2791                         TH_LOG("Failed to read() sync from parent");
2792                 }
2793                 EXPECT_EQ('.', buf) {
2794                         TH_LOG("Failed to get sync data from read()");
2795                 }
2796 
2797                 /* Start nanosleep to be interrupted. */
2798                 timeout.tv_sec = 1;
2799                 errno = 0;
2800                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2801                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2802                 }
2803 
2804                 /* Read final sync from parent. */
2805                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2806                         TH_LOG("Failed final read() from parent");
2807                 }
2808                 EXPECT_EQ('!', buf) {
2809                         TH_LOG("Failed to get final data from read()");
2810                 }
2811 
2812                 /* Directly report the status of our test harness results. */
2813                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2814                                                      : EXIT_FAILURE);
2815         }
2816         EXPECT_EQ(0, close(pipefd[0]));
2817 
2818         /* Attach to child, setup options, and release. */
2819         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2820         ASSERT_EQ(true, WIFSTOPPED(status));
2821         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2822                             PTRACE_O_TRACESECCOMP));
2823         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2824         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2825 
2826         /* Wait for nanosleep() to start. */
2827         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2828         ASSERT_EQ(true, WIFSTOPPED(status));
2829         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2830         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2831         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2832         ASSERT_EQ(0x100, msg);
2833         EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2834 
2835         /* Might as well check siginfo for sanity while we're here. */
2836         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2837         ASSERT_EQ(SIGTRAP, info.si_signo);
2838         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2839         EXPECT_EQ(0, info.si_errno);
2840         EXPECT_EQ(getuid(), info.si_uid);
2841         /* Verify signal delivery came from child (seccomp-triggered). */
2842         EXPECT_EQ(child_pid, info.si_pid);
2843 
2844         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2845         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2846         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2847         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2848         ASSERT_EQ(true, WIFSTOPPED(status));
2849         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2850         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2851         /*
2852          * There is no siginfo on SIGSTOP any more, so we can't verify
2853          * signal delivery came from parent now (getpid() == info.si_pid).
2854          * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2855          * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2856          */
2857         EXPECT_EQ(SIGSTOP, info.si_signo);
2858 
2859         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2860         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2861         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2862         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2863         ASSERT_EQ(true, WIFSTOPPED(status));
2864         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2865         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2866 
2867         /* Wait for restart_syscall() to start. */
2868         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2869         ASSERT_EQ(true, WIFSTOPPED(status));
2870         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2871         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2872         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2873 
2874         ASSERT_EQ(0x200, msg);
2875         ret = get_syscall(_metadata, child_pid);
2876 #if defined(__arm__)
2877         /*
2878          * FIXME:
2879          * - native ARM registers do NOT expose true syscall.
2880          * - compat ARM registers on ARM64 DO expose true syscall.
2881          */
2882         ASSERT_EQ(0, uname(&utsbuf));
2883         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2884                 EXPECT_EQ(__NR_nanosleep, ret);
2885         } else
2886 #endif
2887         {
2888                 EXPECT_EQ(__NR_restart_syscall, ret);
2889         }
2890 
2891         /* Write again to end test. */
2892         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2893         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2894         EXPECT_EQ(0, close(pipefd[1]));
2895 
2896         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2897         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2898                 _metadata->passed = 0;
2899 }
2900 
2901 TEST_SIGNAL(filter_flag_log, SIGSYS)
2902 {
2903         struct sock_filter allow_filter[] = {
2904                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2905         };
2906         struct sock_filter kill_filter[] = {
2907                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2908                         offsetof(struct seccomp_data, nr)),
2909                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2910                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2911                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2912         };
2913         struct sock_fprog allow_prog = {
2914                 .len = (unsigned short)ARRAY_SIZE(allow_filter),
2915                 .filter = allow_filter,
2916         };
2917         struct sock_fprog kill_prog = {
2918                 .len = (unsigned short)ARRAY_SIZE(kill_filter),
2919                 .filter = kill_filter,
2920         };
2921         long ret;
2922         pid_t parent = getppid();
2923 
2924         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2925         ASSERT_EQ(0, ret);
2926 
2927         /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2928         ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2929                       &allow_prog);
2930         ASSERT_NE(ENOSYS, errno) {
2931                 TH_LOG("Kernel does not support seccomp syscall!");
2932         }
2933         EXPECT_NE(0, ret) {
2934                 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2935         }
2936         EXPECT_EQ(EINVAL, errno) {
2937                 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2938         }
2939 
2940         /* Verify that a simple, permissive filter can be added with no flags */
2941         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2942         EXPECT_EQ(0, ret);
2943 
2944         /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2945         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2946                       &allow_prog);
2947         ASSERT_NE(EINVAL, errno) {
2948                 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2949         }
2950         EXPECT_EQ(0, ret);
2951 
2952         /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2953         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2954                       &kill_prog);
2955         EXPECT_EQ(0, ret);
2956 
2957         EXPECT_EQ(parent, syscall(__NR_getppid));
2958         /* getpid() should never return. */
2959         EXPECT_EQ(0, syscall(__NR_getpid));
2960 }
2961 
2962 TEST(get_action_avail)
2963 {
2964         __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2965                             SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2966                             SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2967         __u32 unknown_action = 0x10000000U;
2968         int i;
2969         long ret;
2970 
2971         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2972         ASSERT_NE(ENOSYS, errno) {
2973                 TH_LOG("Kernel does not support seccomp syscall!");
2974         }
2975         ASSERT_NE(EINVAL, errno) {
2976                 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2977         }
2978         EXPECT_EQ(ret, 0);
2979 
2980         for (i = 0; i < ARRAY_SIZE(actions); i++) {
2981                 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2982                 EXPECT_EQ(ret, 0) {
2983                         TH_LOG("Expected action (0x%X) not available!",
2984                                actions[i]);
2985                 }
2986         }
2987 
2988         /* Check that an unknown action is handled properly (EOPNOTSUPP) */
2989         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2990         EXPECT_EQ(ret, -1);
2991         EXPECT_EQ(errno, EOPNOTSUPP);
2992 }
2993 
2994 TEST(get_metadata)
2995 {
2996         pid_t pid;
2997         int pipefd[2];
2998         char buf;
2999         struct seccomp_metadata md;
3000         long ret;
3001 
3002         /* Only real root can get metadata. */
3003         if (geteuid()) {
3004                 XFAIL(return, "get_metadata requires real root");
3005                 return;
3006         }
3007 
3008         ASSERT_EQ(0, pipe(pipefd));
3009 
3010         pid = fork();
3011         ASSERT_GE(pid, 0);
3012         if (pid == 0) {
3013                 struct sock_filter filter[] = {
3014                         BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3015                 };
3016                 struct sock_fprog prog = {
3017                         .len = (unsigned short)ARRAY_SIZE(filter),
3018                         .filter = filter,
3019                 };
3020 
3021                 /* one with log, one without */
3022                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3023                                      SECCOMP_FILTER_FLAG_LOG, &prog));
3024                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3025 
3026                 EXPECT_EQ(0, close(pipefd[0]));
3027                 ASSERT_EQ(1, write(pipefd[1], "1", 1));
3028                 ASSERT_EQ(0, close(pipefd[1]));
3029 
3030                 while (1)
3031                         sleep(100);
3032         }
3033 
3034         ASSERT_EQ(0, close(pipefd[1]));
3035         ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3036 
3037         ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3038         ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3039 
3040         /* Past here must not use ASSERT or child process is never killed. */
3041 
3042         md.filter_off = 0;
3043         errno = 0;
3044         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3045         EXPECT_EQ(sizeof(md), ret) {
3046                 if (errno == EINVAL)
3047                         XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3048         }
3049 
3050         EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3051         EXPECT_EQ(md.filter_off, 0);
3052 
3053         md.filter_off = 1;
3054         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3055         EXPECT_EQ(sizeof(md), ret);
3056         EXPECT_EQ(md.flags, 0);
3057         EXPECT_EQ(md.filter_off, 1);
3058 
3059 skip:
3060         ASSERT_EQ(0, kill(pid, SIGKILL));
3061 }
3062 
3063 static int user_trap_syscall(int nr, unsigned int flags)
3064 {
3065         struct sock_filter filter[] = {
3066                 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3067                         offsetof(struct seccomp_data, nr)),
3068                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3069                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3070                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3071         };
3072 
3073         struct sock_fprog prog = {
3074                 .len = (unsigned short)ARRAY_SIZE(filter),
3075                 .filter = filter,
3076         };
3077 
3078         return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3079 }
3080 
3081 #define USER_NOTIF_MAGIC INT_MAX
3082 TEST(user_notification_basic)
3083 {
3084         pid_t pid;
3085         long ret;
3086         int status, listener;
3087         struct seccomp_notif req = {};
3088         struct seccomp_notif_resp resp = {};
3089         struct pollfd pollfd;
3090 
3091         struct sock_filter filter[] = {
3092                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3093         };
3094         struct sock_fprog prog = {
3095                 .len = (unsigned short)ARRAY_SIZE(filter),
3096                 .filter = filter,
3097         };
3098 
3099         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3100         ASSERT_EQ(0, ret) {
3101                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3102         }
3103 
3104         pid = fork();
3105         ASSERT_GE(pid, 0);
3106 
3107         /* Check that we get -ENOSYS with no listener attached */
3108         if (pid == 0) {
3109                 if (user_trap_syscall(__NR_getppid, 0) < 0)
3110                         exit(1);
3111                 ret = syscall(__NR_getppid);
3112                 exit(ret >= 0 || errno != ENOSYS);
3113         }
3114 
3115         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3116         EXPECT_EQ(true, WIFEXITED(status));
3117         EXPECT_EQ(0, WEXITSTATUS(status));
3118 
3119         /* Add some no-op filters for grins. */
3120         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3121         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3122         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3123         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3124 
3125         /* Check that the basic notification machinery works */
3126         listener = user_trap_syscall(__NR_getppid,
3127                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3128         ASSERT_GE(listener, 0);
3129 
3130         /* Installing a second listener in the chain should EBUSY */
3131         EXPECT_EQ(user_trap_syscall(__NR_getppid,
3132                                     SECCOMP_FILTER_FLAG_NEW_LISTENER),
3133                   -1);
3134         EXPECT_EQ(errno, EBUSY);
3135 
3136         pid = fork();
3137         ASSERT_GE(pid, 0);
3138 
3139         if (pid == 0) {
3140                 ret = syscall(__NR_getppid);
3141                 exit(ret != USER_NOTIF_MAGIC);
3142         }
3143 
3144         pollfd.fd = listener;
3145         pollfd.events = POLLIN | POLLOUT;
3146 
3147         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3148         EXPECT_EQ(pollfd.revents, POLLIN);
3149 
3150         /* Test that we can't pass garbage to the kernel. */
3151         memset(&req, 0, sizeof(req));
3152         req.pid = -1;
3153         errno = 0;
3154         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3155         EXPECT_EQ(-1, ret);
3156         EXPECT_EQ(EINVAL, errno);
3157 
3158         if (ret) {
3159                 req.pid = 0;
3160                 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3161         }
3162 
3163         pollfd.fd = listener;
3164         pollfd.events = POLLIN | POLLOUT;
3165 
3166         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3167         EXPECT_EQ(pollfd.revents, POLLOUT);
3168 
3169         EXPECT_EQ(req.data.nr,  __NR_getppid);
3170 
3171         resp.id = req.id;
3172         resp.error = 0;
3173         resp.val = USER_NOTIF_MAGIC;
3174 
3175         /* check that we make sure flags == 0 */
3176         resp.flags = 1;
3177         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3178         EXPECT_EQ(errno, EINVAL);
3179 
3180         resp.flags = 0;
3181         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3182 
3183         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3184         EXPECT_EQ(true, WIFEXITED(status));
3185         EXPECT_EQ(0, WEXITSTATUS(status));
3186 }
3187 
3188 TEST(user_notification_kill_in_middle)
3189 {
3190         pid_t pid;
3191         long ret;
3192         int listener;
3193         struct seccomp_notif req = {};
3194         struct seccomp_notif_resp resp = {};
3195 
3196         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3197         ASSERT_EQ(0, ret) {
3198                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3199         }
3200 
3201         listener = user_trap_syscall(__NR_getppid,
3202                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3203         ASSERT_GE(listener, 0);
3204 
3205         /*
3206          * Check that nothing bad happens when we kill the task in the middle
3207          * of a syscall.
3208          */
3209         pid = fork();
3210         ASSERT_GE(pid, 0);
3211 
3212         if (pid == 0) {
3213                 ret = syscall(__NR_getppid);
3214                 exit(ret != USER_NOTIF_MAGIC);
3215         }
3216 
3217         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3218         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3219 
3220         EXPECT_EQ(kill(pid, SIGKILL), 0);
3221         EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3222 
3223         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3224 
3225         resp.id = req.id;
3226         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3227         EXPECT_EQ(ret, -1);
3228         EXPECT_EQ(errno, ENOENT);
3229 }
3230 
3231 static int handled = -1;
3232 
3233 static void signal_handler(int signal)
3234 {
3235         if (write(handled, "c", 1) != 1)
3236                 perror("write from signal");
3237 }
3238 
3239 TEST(user_notification_signal)
3240 {
3241         pid_t pid;
3242         long ret;
3243         int status, listener, sk_pair[2];
3244         struct seccomp_notif req = {};
3245         struct seccomp_notif_resp resp = {};
3246         char c;
3247 
3248         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3249         ASSERT_EQ(0, ret) {
3250                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3251         }
3252 
3253         ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3254 
3255         listener = user_trap_syscall(__NR_gettid,
3256                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3257         ASSERT_GE(listener, 0);
3258 
3259         pid = fork();
3260         ASSERT_GE(pid, 0);
3261 
3262         if (pid == 0) {
3263                 close(sk_pair[0]);
3264                 handled = sk_pair[1];
3265                 if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3266                         perror("signal");
3267                         exit(1);
3268                 }
3269                 /*
3270                  * ERESTARTSYS behavior is a bit hard to test, because we need
3271                  * to rely on a signal that has not yet been handled. Let's at
3272                  * least check that the error code gets propagated through, and
3273                  * hope that it doesn't break when there is actually a signal :)
3274                  */
3275                 ret = syscall(__NR_gettid);
3276                 exit(!(ret == -1 && errno == 512));
3277         }
3278 
3279         close(sk_pair[1]);
3280 
3281         memset(&req, 0, sizeof(req));
3282         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3283 
3284         EXPECT_EQ(kill(pid, SIGUSR1), 0);
3285 
3286         /*
3287          * Make sure the signal really is delivered, which means we're not
3288          * stuck in the user notification code any more and the notification
3289          * should be dead.
3290          */
3291         EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3292 
3293         resp.id = req.id;
3294         resp.error = -EPERM;
3295         resp.val = 0;
3296 
3297         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3298         EXPECT_EQ(errno, ENOENT);
3299 
3300         memset(&req, 0, sizeof(req));
3301         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3302 
3303         resp.id = req.id;
3304         resp.error = -512; /* -ERESTARTSYS */
3305         resp.val = 0;
3306 
3307         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3308 
3309         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3310         EXPECT_EQ(true, WIFEXITED(status));
3311         EXPECT_EQ(0, WEXITSTATUS(status));
3312 }
3313 
3314 TEST(user_notification_closed_listener)
3315 {
3316         pid_t pid;
3317         long ret;
3318         int status, listener;
3319 
3320         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3321         ASSERT_EQ(0, ret) {
3322                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3323         }
3324 
3325         listener = user_trap_syscall(__NR_getppid,
3326                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3327         ASSERT_GE(listener, 0);
3328 
3329         /*
3330          * Check that we get an ENOSYS when the listener is closed.
3331          */
3332         pid = fork();
3333         ASSERT_GE(pid, 0);
3334         if (pid == 0) {
3335                 close(listener);
3336                 ret = syscall(__NR_getppid);
3337                 exit(ret != -1 && errno != ENOSYS);
3338         }
3339 
3340         close(listener);
3341 
3342         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3343         EXPECT_EQ(true, WIFEXITED(status));
3344         EXPECT_EQ(0, WEXITSTATUS(status));
3345 }
3346 
3347 /*
3348  * Check that a pid in a child namespace still shows up as valid in ours.
3349  */
3350 TEST(user_notification_child_pid_ns)
3351 {
3352         pid_t pid;
3353         int status, listener;
3354         struct seccomp_notif req = {};
3355         struct seccomp_notif_resp resp = {};
3356 
3357         ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3358 
3359         listener = user_trap_syscall(__NR_getppid,
3360                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3361         ASSERT_GE(listener, 0);
3362 
3363         pid = fork();
3364         ASSERT_GE(pid, 0);
3365 
3366         if (pid == 0)
3367                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3368 
3369         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3370         EXPECT_EQ(req.pid, pid);
3371 
3372         resp.id = req.id;
3373         resp.error = 0;
3374         resp.val = USER_NOTIF_MAGIC;
3375 
3376         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3377 
3378         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3379         EXPECT_EQ(true, WIFEXITED(status));
3380         EXPECT_EQ(0, WEXITSTATUS(status));
3381         close(listener);
3382 }
3383 
3384 /*
3385  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3386  * invalid.
3387  */
3388 TEST(user_notification_sibling_pid_ns)
3389 {
3390         pid_t pid, pid2;
3391         int status, listener;
3392         struct seccomp_notif req = {};
3393         struct seccomp_notif_resp resp = {};
3394 
3395         ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3396                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3397         }
3398 
3399         listener = user_trap_syscall(__NR_getppid,
3400                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3401         ASSERT_GE(listener, 0);
3402 
3403         pid = fork();
3404         ASSERT_GE(pid, 0);
3405 
3406         if (pid == 0) {
3407                 ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3408 
3409                 pid2 = fork();
3410                 ASSERT_GE(pid2, 0);
3411 
3412                 if (pid2 == 0)
3413                         exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3414 
3415                 EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3416                 EXPECT_EQ(true, WIFEXITED(status));
3417                 EXPECT_EQ(0, WEXITSTATUS(status));
3418                 exit(WEXITSTATUS(status));
3419         }
3420 
3421         /* Create the sibling ns, and sibling in it. */
3422         ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3423         ASSERT_EQ(errno, 0);
3424 
3425         pid2 = fork();
3426         ASSERT_GE(pid2, 0);
3427 
3428         if (pid2 == 0) {
3429                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3430                 /*
3431                  * The pid should be 0, i.e. the task is in some namespace that
3432                  * we can't "see".
3433                  */
3434                 EXPECT_EQ(req.pid, 0);
3435 
3436                 resp.id = req.id;
3437                 resp.error = 0;
3438                 resp.val = USER_NOTIF_MAGIC;
3439 
3440                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3441                 exit(0);
3442         }
3443 
3444         close(listener);
3445 
3446         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3447         EXPECT_EQ(true, WIFEXITED(status));
3448         EXPECT_EQ(0, WEXITSTATUS(status));
3449 
3450         EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3451         EXPECT_EQ(true, WIFEXITED(status));
3452         EXPECT_EQ(0, WEXITSTATUS(status));
3453 }
3454 
3455 TEST(user_notification_fault_recv)
3456 {
3457         pid_t pid;
3458         int status, listener;
3459         struct seccomp_notif req = {};
3460         struct seccomp_notif_resp resp = {};
3461 
3462         ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3463 
3464         listener = user_trap_syscall(__NR_getppid,
3465                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3466         ASSERT_GE(listener, 0);
3467 
3468         pid = fork();
3469         ASSERT_GE(pid, 0);
3470 
3471         if (pid == 0)
3472                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3473 
3474         /* Do a bad recv() */
3475         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3476         EXPECT_EQ(errno, EFAULT);
3477 
3478         /* We should still be able to receive this notification, though. */
3479         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3480         EXPECT_EQ(req.pid, pid);
3481 
3482         resp.id = req.id;
3483         resp.error = 0;
3484         resp.val = USER_NOTIF_MAGIC;
3485 
3486         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3487 
3488         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3489         EXPECT_EQ(true, WIFEXITED(status));
3490         EXPECT_EQ(0, WEXITSTATUS(status));
3491 }
3492 
3493 TEST(seccomp_get_notif_sizes)
3494 {
3495         struct seccomp_notif_sizes sizes;
3496 
3497         ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3498         EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3499         EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3500 }
3501 
3502 /*
3503  * TODO:
3504  * - add microbenchmarks
3505  * - expand NNP testing
3506  * - better arch-specific TRACE and TRAP handlers.
3507  * - endianness checking when appropriate
3508  * - 64-bit arg prodding
3509  * - arch value testing (x86 modes especially)
3510  * - verify that FILTER_FLAG_LOG filters generate log messages
3511  * - verify that RET_LOG generates log messages
3512  * - ...
3513  */
3514 
3515 TEST_HARNESS_MAIN

/* [<][>][^][v][top][bottom][index][help] */