This source file includes following definitions.
- pti_print_if_insecure
- pti_print_if_secure
- pti_check_boottime_disable
- __pti_set_user_pgtbl
- pti_user_pagetable_walk_p4d
- pti_user_pagetable_walk_pmd
- pti_user_pagetable_walk_pte
- pti_setup_vsyscall
- pti_setup_vsyscall
- pti_clone_pgtable
- pti_clone_p4d
- pti_clone_user_shared
- pti_clone_user_shared
- pti_setup_espfix64
- pti_clone_entry_text
- pti_kernel_image_global_ok
- pti_clone_kernel_text
- pti_set_kernel_image_nonglobal
- pti_init
- pti_finalize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/string.h>
24 #include <linux/types.h>
25 #include <linux/bug.h>
26 #include <linux/init.h>
27 #include <linux/spinlock.h>
28 #include <linux/mm.h>
29 #include <linux/uaccess.h>
30 #include <linux/cpu.h>
31
32 #include <asm/cpufeature.h>
33 #include <asm/hypervisor.h>
34 #include <asm/vsyscall.h>
35 #include <asm/cmdline.h>
36 #include <asm/pti.h>
37 #include <asm/pgtable.h>
38 #include <asm/pgalloc.h>
39 #include <asm/tlbflush.h>
40 #include <asm/desc.h>
41 #include <asm/sections.h>
42
43 #undef pr_fmt
44 #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
45
46
47 #ifndef __GFP_NOTRACK
48 #define __GFP_NOTRACK 0
49 #endif
50
51
52
53
54
55 #ifdef CONFIG_X86_64
56 #define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PMD
57 #else
58 #define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PTE
59 #endif
60
61 static void __init pti_print_if_insecure(const char *reason)
62 {
63 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
64 pr_info("%s\n", reason);
65 }
66
67 static void __init pti_print_if_secure(const char *reason)
68 {
69 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
70 pr_info("%s\n", reason);
71 }
72
73 static enum pti_mode {
74 PTI_AUTO = 0,
75 PTI_FORCE_OFF,
76 PTI_FORCE_ON
77 } pti_mode;
78
79 void __init pti_check_boottime_disable(void)
80 {
81 char arg[5];
82 int ret;
83
84
85 pti_mode = PTI_AUTO;
86
87 if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
88 pti_mode = PTI_FORCE_OFF;
89 pti_print_if_insecure("disabled on XEN PV.");
90 return;
91 }
92
93 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
94 if (ret > 0) {
95 if (ret == 3 && !strncmp(arg, "off", 3)) {
96 pti_mode = PTI_FORCE_OFF;
97 pti_print_if_insecure("disabled on command line.");
98 return;
99 }
100 if (ret == 2 && !strncmp(arg, "on", 2)) {
101 pti_mode = PTI_FORCE_ON;
102 pti_print_if_secure("force enabled on command line.");
103 goto enable;
104 }
105 if (ret == 4 && !strncmp(arg, "auto", 4)) {
106 pti_mode = PTI_AUTO;
107 goto autosel;
108 }
109 }
110
111 if (cmdline_find_option_bool(boot_command_line, "nopti") ||
112 cpu_mitigations_off()) {
113 pti_mode = PTI_FORCE_OFF;
114 pti_print_if_insecure("disabled on command line.");
115 return;
116 }
117
118 autosel:
119 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
120 return;
121 enable:
122 setup_force_cpu_cap(X86_FEATURE_PTI);
123 }
124
125 pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
126 {
127
128
129
130
131
132
133
134
135
136 if (!pgdp_maps_userspace(pgdp))
137 return pgd;
138
139
140
141
142
143 kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158 if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
159 (__supported_pte_mask & _PAGE_NX))
160 pgd.pgd |= _PAGE_NX;
161
162
163 return pgd;
164 }
165
166
167
168
169
170
171
172 static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
173 {
174 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
175 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
176
177 if (address < PAGE_OFFSET) {
178 WARN_ONCE(1, "attempt to walk user address\n");
179 return NULL;
180 }
181
182 if (pgd_none(*pgd)) {
183 unsigned long new_p4d_page = __get_free_page(gfp);
184 if (WARN_ON_ONCE(!new_p4d_page))
185 return NULL;
186
187 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
188 }
189 BUILD_BUG_ON(pgd_large(*pgd) != 0);
190
191 return p4d_offset(pgd, address);
192 }
193
194
195
196
197
198
199
200 static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
201 {
202 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
203 p4d_t *p4d;
204 pud_t *pud;
205
206 p4d = pti_user_pagetable_walk_p4d(address);
207 if (!p4d)
208 return NULL;
209
210 BUILD_BUG_ON(p4d_large(*p4d) != 0);
211 if (p4d_none(*p4d)) {
212 unsigned long new_pud_page = __get_free_page(gfp);
213 if (WARN_ON_ONCE(!new_pud_page))
214 return NULL;
215
216 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
217 }
218
219 pud = pud_offset(p4d, address);
220
221 if (pud_large(*pud)) {
222 WARN_ON(1);
223 return NULL;
224 }
225 if (pud_none(*pud)) {
226 unsigned long new_pmd_page = __get_free_page(gfp);
227 if (WARN_ON_ONCE(!new_pmd_page))
228 return NULL;
229
230 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
231 }
232
233 return pmd_offset(pud, address);
234 }
235
236
237
238
239
240
241
242
243
244
245 static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
246 {
247 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
248 pmd_t *pmd;
249 pte_t *pte;
250
251 pmd = pti_user_pagetable_walk_pmd(address);
252 if (!pmd)
253 return NULL;
254
255
256 if (pmd_large(*pmd)) {
257 WARN_ON(1);
258 return NULL;
259 }
260
261 if (pmd_none(*pmd)) {
262 unsigned long new_pte_page = __get_free_page(gfp);
263 if (!new_pte_page)
264 return NULL;
265
266 set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
267 }
268
269 pte = pte_offset_kernel(pmd, address);
270 if (pte_flags(*pte) & _PAGE_USER) {
271 WARN_ONCE(1, "attempt to walk to user pte\n");
272 return NULL;
273 }
274 return pte;
275 }
276
277 #ifdef CONFIG_X86_VSYSCALL_EMULATION
278 static void __init pti_setup_vsyscall(void)
279 {
280 pte_t *pte, *target_pte;
281 unsigned int level;
282
283 pte = lookup_address(VSYSCALL_ADDR, &level);
284 if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
285 return;
286
287 target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
288 if (WARN_ON(!target_pte))
289 return;
290
291 *target_pte = *pte;
292 set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
293 }
294 #else
295 static void __init pti_setup_vsyscall(void) { }
296 #endif
297
298 enum pti_clone_level {
299 PTI_CLONE_PMD,
300 PTI_CLONE_PTE,
301 };
302
303 static void
304 pti_clone_pgtable(unsigned long start, unsigned long end,
305 enum pti_clone_level level)
306 {
307 unsigned long addr;
308
309
310
311
312
313 for (addr = start; addr < end;) {
314 pte_t *pte, *target_pte;
315 pmd_t *pmd, *target_pmd;
316 pgd_t *pgd;
317 p4d_t *p4d;
318 pud_t *pud;
319
320
321 if (addr < start)
322 break;
323
324 pgd = pgd_offset_k(addr);
325 if (WARN_ON(pgd_none(*pgd)))
326 return;
327 p4d = p4d_offset(pgd, addr);
328 if (WARN_ON(p4d_none(*p4d)))
329 return;
330
331 pud = pud_offset(p4d, addr);
332 if (pud_none(*pud)) {
333 WARN_ON_ONCE(addr & ~PUD_MASK);
334 addr = round_up(addr + 1, PUD_SIZE);
335 continue;
336 }
337
338 pmd = pmd_offset(pud, addr);
339 if (pmd_none(*pmd)) {
340 WARN_ON_ONCE(addr & ~PMD_MASK);
341 addr = round_up(addr + 1, PMD_SIZE);
342 continue;
343 }
344
345 if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
346 target_pmd = pti_user_pagetable_walk_pmd(addr);
347 if (WARN_ON(!target_pmd))
348 return;
349
350
351
352
353
354
355
356 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
357 return;
358
359
360
361
362
363
364
365
366
367
368 if (boot_cpu_has(X86_FEATURE_PGE))
369 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
370
371
372
373
374
375
376 *target_pmd = *pmd;
377
378 addr += PMD_SIZE;
379
380 } else if (level == PTI_CLONE_PTE) {
381
382
383 pte = pte_offset_kernel(pmd, addr);
384 if (pte_none(*pte)) {
385 addr += PAGE_SIZE;
386 continue;
387 }
388
389
390 if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
391 return;
392
393
394 target_pte = pti_user_pagetable_walk_pte(addr);
395 if (WARN_ON(!target_pte))
396 return;
397
398
399 if (boot_cpu_has(X86_FEATURE_PGE))
400 *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
401
402
403 *target_pte = *pte;
404
405 addr += PAGE_SIZE;
406
407 } else {
408 BUG();
409 }
410 }
411 }
412
413 #ifdef CONFIG_X86_64
414
415
416
417
418 static void __init pti_clone_p4d(unsigned long addr)
419 {
420 p4d_t *kernel_p4d, *user_p4d;
421 pgd_t *kernel_pgd;
422
423 user_p4d = pti_user_pagetable_walk_p4d(addr);
424 if (!user_p4d)
425 return;
426
427 kernel_pgd = pgd_offset_k(addr);
428 kernel_p4d = p4d_offset(kernel_pgd, addr);
429 *user_p4d = *kernel_p4d;
430 }
431
432
433
434
435
436 static void __init pti_clone_user_shared(void)
437 {
438 unsigned int cpu;
439
440 pti_clone_p4d(CPU_ENTRY_AREA_BASE);
441
442 for_each_possible_cpu(cpu) {
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459 unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu);
460 phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
461 pte_t *target_pte;
462
463 target_pte = pti_user_pagetable_walk_pte(va);
464 if (WARN_ON(!target_pte))
465 return;
466
467 *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL);
468 }
469 }
470
471 #else
472
473
474
475
476
477
478
479 static void __init pti_clone_user_shared(void)
480 {
481 unsigned long start, end;
482
483 start = CPU_ENTRY_AREA_BASE;
484 end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
485
486 pti_clone_pgtable(start, end, PTI_CLONE_PMD);
487 }
488 #endif
489
490
491
492
493 static void __init pti_setup_espfix64(void)
494 {
495 #ifdef CONFIG_X86_ESPFIX64
496 pti_clone_p4d(ESPFIX_BASE_ADDR);
497 #endif
498 }
499
500
501
502
503 static void pti_clone_entry_text(void)
504 {
505 pti_clone_pgtable((unsigned long) __entry_text_start,
506 (unsigned long) __irqentry_text_end,
507 PTI_CLONE_PMD);
508 }
509
510
511
512
513
514
515
516
517
518
519 static inline bool pti_kernel_image_global_ok(void)
520 {
521
522
523
524
525 if (cpu_feature_enabled(X86_FEATURE_PCID))
526 return false;
527
528
529
530
531
532 if (pti_mode != PTI_AUTO)
533 return false;
534
535
536
537
538
539
540
541 if (boot_cpu_has(X86_FEATURE_K8))
542 return false;
543
544
545
546
547
548
549
550
551 if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
552 return false;
553
554 return true;
555 }
556
557
558
559
560
561 extern int set_memory_nonglobal(unsigned long addr, int numpages);
562 extern int set_memory_global(unsigned long addr, int numpages);
563
564
565
566
567
568 static void pti_clone_kernel_text(void)
569 {
570
571
572
573
574
575 unsigned long start = PFN_ALIGN(_text);
576 unsigned long end_clone = (unsigned long)__end_rodata_aligned;
577 unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table);
578
579 if (!pti_kernel_image_global_ok())
580 return;
581
582 pr_debug("mapping partial kernel image into user address space\n");
583
584
585
586
587
588
589 pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
590
591
592
593
594
595
596
597
598 set_memory_global(start, (end_global - start) >> PAGE_SHIFT);
599 }
600
601 static void pti_set_kernel_image_nonglobal(void)
602 {
603
604
605
606
607
608
609 unsigned long start = PFN_ALIGN(_text);
610 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
611
612
613
614
615
616
617 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
618 }
619
620
621
622
623 void __init pti_init(void)
624 {
625 if (!boot_cpu_has(X86_FEATURE_PTI))
626 return;
627
628 pr_info("enabled\n");
629
630 #ifdef CONFIG_X86_32
631
632
633
634
635
636
637 if (cpuid_ecx(0x1) & BIT(17)) {
638
639 printk(KERN_WARNING "\n");
640 printk(KERN_WARNING "************************************************************\n");
641 printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n");
642 printk(KERN_WARNING "** **\n");
643 printk(KERN_WARNING "** You are using 32-bit PTI on a 64-bit PCID-capable CPU. **\n");
644 printk(KERN_WARNING "** Your performance will increase dramatically if you **\n");
645 printk(KERN_WARNING "** switch to a 64-bit kernel! **\n");
646 printk(KERN_WARNING "** **\n");
647 printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n");
648 printk(KERN_WARNING "************************************************************\n");
649 }
650 #endif
651
652 pti_clone_user_shared();
653
654
655 pti_set_kernel_image_nonglobal();
656
657 pti_clone_entry_text();
658 pti_setup_espfix64();
659 pti_setup_vsyscall();
660 }
661
662
663
664
665
666
667
668
669 void pti_finalize(void)
670 {
671 if (!boot_cpu_has(X86_FEATURE_PTI))
672 return;
673
674
675
676
677 pti_clone_entry_text();
678 pti_clone_kernel_text();
679
680 debug_checkwx_user();
681 }