root/arch/x86/mm/init_32.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. one_md_table_init
  2. one_page_table_init
  3. populate_extra_pmd
  4. populate_extra_pte
  5. page_table_range_init_count
  6. page_table_kmap_check
  7. page_table_range_init
  8. is_kernel_text
  9. kernel_physical_mapping_init
  10. kmap_get_fixmap_pte
  11. kmap_init
  12. permanent_kmaps_init
  13. add_highpages_with_active_regions
  14. permanent_kmaps_init
  15. sync_initial_page_table
  16. native_pagetable_init
  17. early_ioremap_page_table_range_init
  18. pagetable_init
  19. parse_highmem
  20. lowmem_pfn_init
  21. highmem_pfn_init
  22. find_low_pfn_range
  23. initmem_init
  24. setup_bootmem_allocator
  25. paging_init
  26. test_wp_bit
  27. mem_init
  28. arch_add_memory
  29. arch_remove_memory
  30. set_kernel_text_rw
  31. set_kernel_text_ro
  32. mark_nxdata_nx
  33. mark_rodata_ro

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *
   4  *  Copyright (C) 1995  Linus Torvalds
   5  *
   6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   7  */
   8 
   9 #include <linux/signal.h>
  10 #include <linux/sched.h>
  11 #include <linux/kernel.h>
  12 #include <linux/errno.h>
  13 #include <linux/string.h>
  14 #include <linux/types.h>
  15 #include <linux/ptrace.h>
  16 #include <linux/mman.h>
  17 #include <linux/mm.h>
  18 #include <linux/hugetlb.h>
  19 #include <linux/swap.h>
  20 #include <linux/smp.h>
  21 #include <linux/init.h>
  22 #include <linux/highmem.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/pci.h>
  25 #include <linux/pfn.h>
  26 #include <linux/poison.h>
  27 #include <linux/memblock.h>
  28 #include <linux/proc_fs.h>
  29 #include <linux/memory_hotplug.h>
  30 #include <linux/initrd.h>
  31 #include <linux/cpumask.h>
  32 #include <linux/gfp.h>
  33 
  34 #include <asm/asm.h>
  35 #include <asm/bios_ebda.h>
  36 #include <asm/processor.h>
  37 #include <linux/uaccess.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/dma.h>
  40 #include <asm/fixmap.h>
  41 #include <asm/e820/api.h>
  42 #include <asm/apic.h>
  43 #include <asm/bugs.h>
  44 #include <asm/tlb.h>
  45 #include <asm/tlbflush.h>
  46 #include <asm/olpc_ofw.h>
  47 #include <asm/pgalloc.h>
  48 #include <asm/sections.h>
  49 #include <asm/paravirt.h>
  50 #include <asm/setup.h>
  51 #include <asm/set_memory.h>
  52 #include <asm/page_types.h>
  53 #include <asm/cpu_entry_area.h>
  54 #include <asm/init.h>
  55 
  56 #include "mm_internal.h"
  57 
  58 unsigned long highstart_pfn, highend_pfn;
  59 
  60 bool __read_mostly __vmalloc_start_set = false;
  61 
  62 /*
  63  * Creates a middle page table and puts a pointer to it in the
  64  * given global directory entry. This only returns the gd entry
  65  * in non-PAE compilation mode, since the middle layer is folded.
  66  */
  67 static pmd_t * __init one_md_table_init(pgd_t *pgd)
  68 {
  69         p4d_t *p4d;
  70         pud_t *pud;
  71         pmd_t *pmd_table;
  72 
  73 #ifdef CONFIG_X86_PAE
  74         if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
  75                 pmd_table = (pmd_t *)alloc_low_page();
  76                 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
  77                 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
  78                 p4d = p4d_offset(pgd, 0);
  79                 pud = pud_offset(p4d, 0);
  80                 BUG_ON(pmd_table != pmd_offset(pud, 0));
  81 
  82                 return pmd_table;
  83         }
  84 #endif
  85         p4d = p4d_offset(pgd, 0);
  86         pud = pud_offset(p4d, 0);
  87         pmd_table = pmd_offset(pud, 0);
  88 
  89         return pmd_table;
  90 }
  91 
  92 /*
  93  * Create a page table and place a pointer to it in a middle page
  94  * directory entry:
  95  */
  96 static pte_t * __init one_page_table_init(pmd_t *pmd)
  97 {
  98         if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
  99                 pte_t *page_table = (pte_t *)alloc_low_page();
 100 
 101                 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
 102                 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
 103                 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
 104         }
 105 
 106         return pte_offset_kernel(pmd, 0);
 107 }
 108 
 109 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
 110 {
 111         int pgd_idx = pgd_index(vaddr);
 112         int pmd_idx = pmd_index(vaddr);
 113 
 114         return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
 115 }
 116 
 117 pte_t * __init populate_extra_pte(unsigned long vaddr)
 118 {
 119         int pte_idx = pte_index(vaddr);
 120         pmd_t *pmd;
 121 
 122         pmd = populate_extra_pmd(vaddr);
 123         return one_page_table_init(pmd) + pte_idx;
 124 }
 125 
 126 static unsigned long __init
 127 page_table_range_init_count(unsigned long start, unsigned long end)
 128 {
 129         unsigned long count = 0;
 130 #ifdef CONFIG_HIGHMEM
 131         int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
 132         int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
 133         int pgd_idx, pmd_idx;
 134         unsigned long vaddr;
 135 
 136         if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
 137                 return 0;
 138 
 139         vaddr = start;
 140         pgd_idx = pgd_index(vaddr);
 141         pmd_idx = pmd_index(vaddr);
 142 
 143         for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
 144                 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
 145                                                         pmd_idx++) {
 146                         if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
 147                             (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
 148                                 count++;
 149                         vaddr += PMD_SIZE;
 150                 }
 151                 pmd_idx = 0;
 152         }
 153 #endif
 154         return count;
 155 }
 156 
 157 static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
 158                                            unsigned long vaddr, pte_t *lastpte,
 159                                            void **adr)
 160 {
 161 #ifdef CONFIG_HIGHMEM
 162         /*
 163          * Something (early fixmap) may already have put a pte
 164          * page here, which causes the page table allocation
 165          * to become nonlinear. Attempt to fix it, and if it
 166          * is still nonlinear then we have to bug.
 167          */
 168         int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
 169         int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
 170 
 171         if (pmd_idx_kmap_begin != pmd_idx_kmap_end
 172             && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
 173             && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
 174                 pte_t *newpte;
 175                 int i;
 176 
 177                 BUG_ON(after_bootmem);
 178                 newpte = *adr;
 179                 for (i = 0; i < PTRS_PER_PTE; i++)
 180                         set_pte(newpte + i, pte[i]);
 181                 *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
 182 
 183                 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
 184                 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
 185                 BUG_ON(newpte != pte_offset_kernel(pmd, 0));
 186                 __flush_tlb_all();
 187 
 188                 paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
 189                 pte = newpte;
 190         }
 191         BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
 192                && vaddr > fix_to_virt(FIX_KMAP_END)
 193                && lastpte && lastpte + PTRS_PER_PTE != pte);
 194 #endif
 195         return pte;
 196 }
 197 
 198 /*
 199  * This function initializes a certain range of kernel virtual memory
 200  * with new bootmem page tables, everywhere page tables are missing in
 201  * the given range.
 202  *
 203  * NOTE: The pagetables are allocated contiguous on the physical space
 204  * so we can cache the place of the first one and move around without
 205  * checking the pgd every time.
 206  */
 207 static void __init
 208 page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 209 {
 210         int pgd_idx, pmd_idx;
 211         unsigned long vaddr;
 212         pgd_t *pgd;
 213         pmd_t *pmd;
 214         pte_t *pte = NULL;
 215         unsigned long count = page_table_range_init_count(start, end);
 216         void *adr = NULL;
 217 
 218         if (count)
 219                 adr = alloc_low_pages(count);
 220 
 221         vaddr = start;
 222         pgd_idx = pgd_index(vaddr);
 223         pmd_idx = pmd_index(vaddr);
 224         pgd = pgd_base + pgd_idx;
 225 
 226         for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
 227                 pmd = one_md_table_init(pgd);
 228                 pmd = pmd + pmd_index(vaddr);
 229                 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
 230                                                         pmd++, pmd_idx++) {
 231                         pte = page_table_kmap_check(one_page_table_init(pmd),
 232                                                     pmd, vaddr, pte, &adr);
 233 
 234                         vaddr += PMD_SIZE;
 235                 }
 236                 pmd_idx = 0;
 237         }
 238 }
 239 
 240 static inline int is_kernel_text(unsigned long addr)
 241 {
 242         if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
 243                 return 1;
 244         return 0;
 245 }
 246 
 247 /*
 248  * This maps the physical memory to kernel virtual address space, a total
 249  * of max_low_pfn pages, by creating page tables starting from address
 250  * PAGE_OFFSET:
 251  */
 252 unsigned long __init
 253 kernel_physical_mapping_init(unsigned long start,
 254                              unsigned long end,
 255                              unsigned long page_size_mask)
 256 {
 257         int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
 258         unsigned long last_map_addr = end;
 259         unsigned long start_pfn, end_pfn;
 260         pgd_t *pgd_base = swapper_pg_dir;
 261         int pgd_idx, pmd_idx, pte_ofs;
 262         unsigned long pfn;
 263         pgd_t *pgd;
 264         pmd_t *pmd;
 265         pte_t *pte;
 266         unsigned pages_2m, pages_4k;
 267         int mapping_iter;
 268 
 269         start_pfn = start >> PAGE_SHIFT;
 270         end_pfn = end >> PAGE_SHIFT;
 271 
 272         /*
 273          * First iteration will setup identity mapping using large/small pages
 274          * based on use_pse, with other attributes same as set by
 275          * the early code in head_32.S
 276          *
 277          * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
 278          * as desired for the kernel identity mapping.
 279          *
 280          * This two pass mechanism conforms to the TLB app note which says:
 281          *
 282          *     "Software should not write to a paging-structure entry in a way
 283          *      that would change, for any linear address, both the page size
 284          *      and either the page frame or attributes."
 285          */
 286         mapping_iter = 1;
 287 
 288         if (!boot_cpu_has(X86_FEATURE_PSE))
 289                 use_pse = 0;
 290 
 291 repeat:
 292         pages_2m = pages_4k = 0;
 293         pfn = start_pfn;
 294         pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 295         pgd = pgd_base + pgd_idx;
 296         for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
 297                 pmd = one_md_table_init(pgd);
 298 
 299                 if (pfn >= end_pfn)
 300                         continue;
 301 #ifdef CONFIG_X86_PAE
 302                 pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 303                 pmd += pmd_idx;
 304 #else
 305                 pmd_idx = 0;
 306 #endif
 307                 for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
 308                      pmd++, pmd_idx++) {
 309                         unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
 310 
 311                         /*
 312                          * Map with big pages if possible, otherwise
 313                          * create normal page tables:
 314                          */
 315                         if (use_pse) {
 316                                 unsigned int addr2;
 317                                 pgprot_t prot = PAGE_KERNEL_LARGE;
 318                                 /*
 319                                  * first pass will use the same initial
 320                                  * identity mapping attribute + _PAGE_PSE.
 321                                  */
 322                                 pgprot_t init_prot =
 323                                         __pgprot(PTE_IDENT_ATTR |
 324                                                  _PAGE_PSE);
 325 
 326                                 pfn &= PMD_MASK >> PAGE_SHIFT;
 327                                 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
 328                                         PAGE_OFFSET + PAGE_SIZE-1;
 329 
 330                                 if (is_kernel_text(addr) ||
 331                                     is_kernel_text(addr2))
 332                                         prot = PAGE_KERNEL_LARGE_EXEC;
 333 
 334                                 pages_2m++;
 335                                 if (mapping_iter == 1)
 336                                         set_pmd(pmd, pfn_pmd(pfn, init_prot));
 337                                 else
 338                                         set_pmd(pmd, pfn_pmd(pfn, prot));
 339 
 340                                 pfn += PTRS_PER_PTE;
 341                                 continue;
 342                         }
 343                         pte = one_page_table_init(pmd);
 344 
 345                         pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 346                         pte += pte_ofs;
 347                         for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
 348                              pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
 349                                 pgprot_t prot = PAGE_KERNEL;
 350                                 /*
 351                                  * first pass will use the same initial
 352                                  * identity mapping attribute.
 353                                  */
 354                                 pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
 355 
 356                                 if (is_kernel_text(addr))
 357                                         prot = PAGE_KERNEL_EXEC;
 358 
 359                                 pages_4k++;
 360                                 if (mapping_iter == 1) {
 361                                         set_pte(pte, pfn_pte(pfn, init_prot));
 362                                         last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
 363                                 } else
 364                                         set_pte(pte, pfn_pte(pfn, prot));
 365                         }
 366                 }
 367         }
 368         if (mapping_iter == 1) {
 369                 /*
 370                  * update direct mapping page count only in the first
 371                  * iteration.
 372                  */
 373                 update_page_count(PG_LEVEL_2M, pages_2m);
 374                 update_page_count(PG_LEVEL_4K, pages_4k);
 375 
 376                 /*
 377                  * local global flush tlb, which will flush the previous
 378                  * mappings present in both small and large page TLB's.
 379                  */
 380                 __flush_tlb_all();
 381 
 382                 /*
 383                  * Second iteration will set the actual desired PTE attributes.
 384                  */
 385                 mapping_iter = 2;
 386                 goto repeat;
 387         }
 388         return last_map_addr;
 389 }
 390 
 391 pte_t *kmap_pte;
 392 
 393 static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
 394 {
 395         pgd_t *pgd = pgd_offset_k(vaddr);
 396         p4d_t *p4d = p4d_offset(pgd, vaddr);
 397         pud_t *pud = pud_offset(p4d, vaddr);
 398         pmd_t *pmd = pmd_offset(pud, vaddr);
 399         return pte_offset_kernel(pmd, vaddr);
 400 }
 401 
 402 static void __init kmap_init(void)
 403 {
 404         unsigned long kmap_vstart;
 405 
 406         /*
 407          * Cache the first kmap pte:
 408          */
 409         kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 410         kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
 411 }
 412 
 413 #ifdef CONFIG_HIGHMEM
 414 static void __init permanent_kmaps_init(pgd_t *pgd_base)
 415 {
 416         unsigned long vaddr;
 417         pgd_t *pgd;
 418         p4d_t *p4d;
 419         pud_t *pud;
 420         pmd_t *pmd;
 421         pte_t *pte;
 422 
 423         vaddr = PKMAP_BASE;
 424         page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 425 
 426         pgd = swapper_pg_dir + pgd_index(vaddr);
 427         p4d = p4d_offset(pgd, vaddr);
 428         pud = pud_offset(p4d, vaddr);
 429         pmd = pmd_offset(pud, vaddr);
 430         pte = pte_offset_kernel(pmd, vaddr);
 431         pkmap_page_table = pte;
 432 }
 433 
 434 void __init add_highpages_with_active_regions(int nid,
 435                          unsigned long start_pfn, unsigned long end_pfn)
 436 {
 437         phys_addr_t start, end;
 438         u64 i;
 439 
 440         for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
 441                 unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
 442                                             start_pfn, end_pfn);
 443                 unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
 444                                               start_pfn, end_pfn);
 445                 for ( ; pfn < e_pfn; pfn++)
 446                         if (pfn_valid(pfn))
 447                                 free_highmem_page(pfn_to_page(pfn));
 448         }
 449 }
 450 #else
 451 static inline void permanent_kmaps_init(pgd_t *pgd_base)
 452 {
 453 }
 454 #endif /* CONFIG_HIGHMEM */
 455 
 456 void __init sync_initial_page_table(void)
 457 {
 458         clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 459                         swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
 460                         KERNEL_PGD_PTRS);
 461 
 462         /*
 463          * sync back low identity map too.  It is used for example
 464          * in the 32-bit EFI stub.
 465          */
 466         clone_pgd_range(initial_page_table,
 467                         swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
 468                         min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 469 }
 470 
 471 void __init native_pagetable_init(void)
 472 {
 473         unsigned long pfn, va;
 474         pgd_t *pgd, *base = swapper_pg_dir;
 475         p4d_t *p4d;
 476         pud_t *pud;
 477         pmd_t *pmd;
 478         pte_t *pte;
 479 
 480         /*
 481          * Remove any mappings which extend past the end of physical
 482          * memory from the boot time page table.
 483          * In virtual address space, we should have at least two pages
 484          * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
 485          * definition. And max_low_pfn is set to VMALLOC_END physical
 486          * address. If initial memory mapping is doing right job, we
 487          * should have pte used near max_low_pfn or one pmd is not present.
 488          */
 489         for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
 490                 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
 491                 pgd = base + pgd_index(va);
 492                 if (!pgd_present(*pgd))
 493                         break;
 494 
 495                 p4d = p4d_offset(pgd, va);
 496                 pud = pud_offset(p4d, va);
 497                 pmd = pmd_offset(pud, va);
 498                 if (!pmd_present(*pmd))
 499                         break;
 500 
 501                 /* should not be large page here */
 502                 if (pmd_large(*pmd)) {
 503                         pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
 504                                 pfn, pmd, __pa(pmd));
 505                         BUG_ON(1);
 506                 }
 507 
 508                 pte = pte_offset_kernel(pmd, va);
 509                 if (!pte_present(*pte))
 510                         break;
 511 
 512                 printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
 513                                 pfn, pmd, __pa(pmd), pte, __pa(pte));
 514                 pte_clear(NULL, va, pte);
 515         }
 516         paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
 517         paging_init();
 518 }
 519 
 520 /*
 521  * Build a proper pagetable for the kernel mappings.  Up until this
 522  * point, we've been running on some set of pagetables constructed by
 523  * the boot process.
 524  *
 525  * If we're booting on native hardware, this will be a pagetable
 526  * constructed in arch/x86/kernel/head_32.S.  The root of the
 527  * pagetable will be swapper_pg_dir.
 528  *
 529  * If we're booting paravirtualized under a hypervisor, then there are
 530  * more options: we may already be running PAE, and the pagetable may
 531  * or may not be based in swapper_pg_dir.  In any case,
 532  * paravirt_pagetable_init() will set up swapper_pg_dir
 533  * appropriately for the rest of the initialization to work.
 534  *
 535  * In general, pagetable_init() assumes that the pagetable may already
 536  * be partially populated, and so it avoids stomping on any existing
 537  * mappings.
 538  */
 539 void __init early_ioremap_page_table_range_init(void)
 540 {
 541         pgd_t *pgd_base = swapper_pg_dir;
 542         unsigned long vaddr, end;
 543 
 544         /*
 545          * Fixed mappings, only the page table structure has to be
 546          * created - mappings will be set by set_fixmap():
 547          */
 548         vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 549         end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
 550         page_table_range_init(vaddr, end, pgd_base);
 551         early_ioremap_reset();
 552 }
 553 
 554 static void __init pagetable_init(void)
 555 {
 556         pgd_t *pgd_base = swapper_pg_dir;
 557 
 558         permanent_kmaps_init(pgd_base);
 559 }
 560 
 561 #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
 562 /* Bits supported by the hardware: */
 563 pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
 564 /* Bits allowed in normal kernel mappings: */
 565 pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
 566 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 567 /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
 568 EXPORT_SYMBOL(__default_kernel_pte_mask);
 569 
 570 /* user-defined highmem size */
 571 static unsigned int highmem_pages = -1;
 572 
 573 /*
 574  * highmem=size forces highmem to be exactly 'size' bytes.
 575  * This works even on boxes that have no highmem otherwise.
 576  * This also works to reduce highmem size on bigger boxes.
 577  */
 578 static int __init parse_highmem(char *arg)
 579 {
 580         if (!arg)
 581                 return -EINVAL;
 582 
 583         highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
 584         return 0;
 585 }
 586 early_param("highmem", parse_highmem);
 587 
 588 #define MSG_HIGHMEM_TOO_BIG \
 589         "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
 590 
 591 #define MSG_LOWMEM_TOO_SMALL \
 592         "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
 593 /*
 594  * All of RAM fits into lowmem - but if user wants highmem
 595  * artificially via the highmem=x boot parameter then create
 596  * it:
 597  */
 598 static void __init lowmem_pfn_init(void)
 599 {
 600         /* max_low_pfn is 0, we already have early_res support */
 601         max_low_pfn = max_pfn;
 602 
 603         if (highmem_pages == -1)
 604                 highmem_pages = 0;
 605 #ifdef CONFIG_HIGHMEM
 606         if (highmem_pages >= max_pfn) {
 607                 printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
 608                         pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
 609                 highmem_pages = 0;
 610         }
 611         if (highmem_pages) {
 612                 if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
 613                         printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
 614                                 pages_to_mb(highmem_pages));
 615                         highmem_pages = 0;
 616                 }
 617                 max_low_pfn -= highmem_pages;
 618         }
 619 #else
 620         if (highmem_pages)
 621                 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
 622 #endif
 623 }
 624 
 625 #define MSG_HIGHMEM_TOO_SMALL \
 626         "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
 627 
 628 #define MSG_HIGHMEM_TRIMMED \
 629         "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
 630 /*
 631  * We have more RAM than fits into lowmem - we try to put it into
 632  * highmem, also taking the highmem=x boot parameter into account:
 633  */
 634 static void __init highmem_pfn_init(void)
 635 {
 636         max_low_pfn = MAXMEM_PFN;
 637 
 638         if (highmem_pages == -1)
 639                 highmem_pages = max_pfn - MAXMEM_PFN;
 640 
 641         if (highmem_pages + MAXMEM_PFN < max_pfn)
 642                 max_pfn = MAXMEM_PFN + highmem_pages;
 643 
 644         if (highmem_pages + MAXMEM_PFN > max_pfn) {
 645                 printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
 646                         pages_to_mb(max_pfn - MAXMEM_PFN),
 647                         pages_to_mb(highmem_pages));
 648                 highmem_pages = 0;
 649         }
 650 #ifndef CONFIG_HIGHMEM
 651         /* Maximum memory usable is what is directly addressable */
 652         printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
 653         if (max_pfn > MAX_NONPAE_PFN)
 654                 printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
 655         else
 656                 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
 657         max_pfn = MAXMEM_PFN;
 658 #else /* !CONFIG_HIGHMEM */
 659 #ifndef CONFIG_HIGHMEM64G
 660         if (max_pfn > MAX_NONPAE_PFN) {
 661                 max_pfn = MAX_NONPAE_PFN;
 662                 printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
 663         }
 664 #endif /* !CONFIG_HIGHMEM64G */
 665 #endif /* !CONFIG_HIGHMEM */
 666 }
 667 
 668 /*
 669  * Determine low and high memory ranges:
 670  */
 671 void __init find_low_pfn_range(void)
 672 {
 673         /* it could update max_pfn */
 674 
 675         if (max_pfn <= MAXMEM_PFN)
 676                 lowmem_pfn_init();
 677         else
 678                 highmem_pfn_init();
 679 }
 680 
 681 #ifndef CONFIG_NEED_MULTIPLE_NODES
 682 void __init initmem_init(void)
 683 {
 684 #ifdef CONFIG_HIGHMEM
 685         highstart_pfn = highend_pfn = max_pfn;
 686         if (max_pfn > max_low_pfn)
 687                 highstart_pfn = max_low_pfn;
 688         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 689                 pages_to_mb(highend_pfn - highstart_pfn));
 690         high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 691 #else
 692         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 693 #endif
 694 
 695         memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
 696         sparse_memory_present_with_active_regions(0);
 697 
 698 #ifdef CONFIG_FLATMEM
 699         max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
 700 #endif
 701         __vmalloc_start_set = true;
 702 
 703         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 704                         pages_to_mb(max_low_pfn));
 705 
 706         setup_bootmem_allocator();
 707 }
 708 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 709 
 710 void __init setup_bootmem_allocator(void)
 711 {
 712         printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
 713                  max_pfn_mapped<<PAGE_SHIFT);
 714         printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
 715 }
 716 
 717 /*
 718  * paging_init() sets up the page tables - note that the first 8MB are
 719  * already mapped by head.S.
 720  *
 721  * This routines also unmaps the page at virtual kernel address 0, so
 722  * that we can trap those pesky NULL-reference errors in the kernel.
 723  */
 724 void __init paging_init(void)
 725 {
 726         pagetable_init();
 727 
 728         __flush_tlb_all();
 729 
 730         kmap_init();
 731 
 732         /*
 733          * NOTE: at this point the bootmem allocator is fully available.
 734          */
 735         olpc_dt_build_devicetree();
 736         sparse_memory_present_with_active_regions(MAX_NUMNODES);
 737         sparse_init();
 738         zone_sizes_init();
 739 }
 740 
 741 /*
 742  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
 743  * and also on some strange 486's. All 586+'s are OK. This used to involve
 744  * black magic jumps to work around some nasty CPU bugs, but fortunately the
 745  * switch to using exceptions got rid of all that.
 746  */
 747 static void __init test_wp_bit(void)
 748 {
 749         char z = 0;
 750 
 751         printk(KERN_INFO "Checking if this processor honours the WP bit even in supervisor mode...");
 752 
 753         __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
 754 
 755         if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) {
 756                 clear_fixmap(FIX_WP_TEST);
 757                 printk(KERN_CONT "Ok.\n");
 758                 return;
 759         }
 760 
 761         printk(KERN_CONT "No.\n");
 762         panic("Linux doesn't support CPUs with broken WP.");
 763 }
 764 
 765 void __init mem_init(void)
 766 {
 767         pci_iommu_alloc();
 768 
 769 #ifdef CONFIG_FLATMEM
 770         BUG_ON(!mem_map);
 771 #endif
 772         /*
 773          * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
 774          * be done before memblock_free_all(). Memblock use free low memory for
 775          * temporary data (see find_range_array()) and for this purpose can use
 776          * pages that was already passed to the buddy allocator, hence marked as
 777          * not accessible in the page tables when compiled with
 778          * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
 779          * important here.
 780          */
 781         set_highmem_pages_init();
 782 
 783         /* this will put all low memory onto the freelists */
 784         memblock_free_all();
 785 
 786         after_bootmem = 1;
 787         x86_init.hyper.init_after_bootmem();
 788 
 789         mem_init_print_info(NULL);
 790         printk(KERN_INFO "virtual kernel memory layout:\n"
 791                 "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 792                 "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 793 #ifdef CONFIG_HIGHMEM
 794                 "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 795 #endif
 796                 "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
 797                 "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
 798                 "      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 799                 "      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 800                 "      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
 801                 FIXADDR_START, FIXADDR_TOP,
 802                 (FIXADDR_TOP - FIXADDR_START) >> 10,
 803 
 804                 CPU_ENTRY_AREA_BASE,
 805                 CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
 806                 CPU_ENTRY_AREA_MAP_SIZE >> 10,
 807 
 808 #ifdef CONFIG_HIGHMEM
 809                 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
 810                 (LAST_PKMAP*PAGE_SIZE) >> 10,
 811 #endif
 812 
 813                 VMALLOC_START, VMALLOC_END,
 814                 (VMALLOC_END - VMALLOC_START) >> 20,
 815 
 816                 (unsigned long)__va(0), (unsigned long)high_memory,
 817                 ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
 818 
 819                 (unsigned long)&__init_begin, (unsigned long)&__init_end,
 820                 ((unsigned long)&__init_end -
 821                  (unsigned long)&__init_begin) >> 10,
 822 
 823                 (unsigned long)&_etext, (unsigned long)&_edata,
 824                 ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
 825 
 826                 (unsigned long)&_text, (unsigned long)&_etext,
 827                 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
 828 
 829         /*
 830          * Check boundaries twice: Some fundamental inconsistencies can
 831          * be detected at build time already.
 832          */
 833 #define __FIXADDR_TOP (-PAGE_SIZE)
 834 #ifdef CONFIG_HIGHMEM
 835         BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE  > FIXADDR_START);
 836         BUILD_BUG_ON(VMALLOC_END                        > PKMAP_BASE);
 837 #endif
 838 #define high_memory (-128UL << 20)
 839         BUILD_BUG_ON(VMALLOC_START                      >= VMALLOC_END);
 840 #undef high_memory
 841 #undef __FIXADDR_TOP
 842 
 843 #ifdef CONFIG_HIGHMEM
 844         BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE        > FIXADDR_START);
 845         BUG_ON(VMALLOC_END                              > PKMAP_BASE);
 846 #endif
 847         BUG_ON(VMALLOC_START                            >= VMALLOC_END);
 848         BUG_ON((unsigned long)high_memory               > VMALLOC_START);
 849 
 850         test_wp_bit();
 851 }
 852 
 853 #ifdef CONFIG_MEMORY_HOTPLUG
 854 int arch_add_memory(int nid, u64 start, u64 size,
 855                         struct mhp_restrictions *restrictions)
 856 {
 857         unsigned long start_pfn = start >> PAGE_SHIFT;
 858         unsigned long nr_pages = size >> PAGE_SHIFT;
 859 
 860         return __add_pages(nid, start_pfn, nr_pages, restrictions);
 861 }
 862 
 863 void arch_remove_memory(int nid, u64 start, u64 size,
 864                         struct vmem_altmap *altmap)
 865 {
 866         unsigned long start_pfn = start >> PAGE_SHIFT;
 867         unsigned long nr_pages = size >> PAGE_SHIFT;
 868 
 869         __remove_pages(start_pfn, nr_pages, altmap);
 870 }
 871 #endif
 872 
 873 int kernel_set_to_readonly __read_mostly;
 874 
 875 void set_kernel_text_rw(void)
 876 {
 877         unsigned long start = PFN_ALIGN(_text);
 878         unsigned long size = PFN_ALIGN(_etext) - start;
 879 
 880         if (!kernel_set_to_readonly)
 881                 return;
 882 
 883         pr_debug("Set kernel text: %lx - %lx for read write\n",
 884                  start, start+size);
 885 
 886         set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 887 }
 888 
 889 void set_kernel_text_ro(void)
 890 {
 891         unsigned long start = PFN_ALIGN(_text);
 892         unsigned long size = PFN_ALIGN(_etext) - start;
 893 
 894         if (!kernel_set_to_readonly)
 895                 return;
 896 
 897         pr_debug("Set kernel text: %lx - %lx for read only\n",
 898                  start, start+size);
 899 
 900         set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 901 }
 902 
 903 static void mark_nxdata_nx(void)
 904 {
 905         /*
 906          * When this called, init has already been executed and released,
 907          * so everything past _etext should be NX.
 908          */
 909         unsigned long start = PFN_ALIGN(_etext);
 910         /*
 911          * This comes from is_kernel_text upper limit. Also HPAGE where used:
 912          */
 913         unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
 914 
 915         if (__supported_pte_mask & _PAGE_NX)
 916                 printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
 917         set_memory_nx(start, size >> PAGE_SHIFT);
 918 }
 919 
 920 void mark_rodata_ro(void)
 921 {
 922         unsigned long start = PFN_ALIGN(_text);
 923         unsigned long size = (unsigned long)__end_rodata - start;
 924 
 925         set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 926         pr_info("Write protecting kernel text and read-only data: %luk\n",
 927                 size >> 10);
 928 
 929         kernel_set_to_readonly = 1;
 930 
 931 #ifdef CONFIG_CPA_DEBUG
 932         pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size);
 933         set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 934 
 935         pr_info("Testing CPA: write protecting again\n");
 936         set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 937 #endif
 938         mark_nxdata_nx();
 939         if (__supported_pte_mask & _PAGE_NX)
 940                 debug_checkwx();
 941 }

/* [<][>][^][v][top][bottom][index][help] */