root/arch/powerpc/kernel/fadump.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. fadump_cma_init
  2. fadump_cma_init
  3. early_init_dt_scan_fw_dump
  4. is_fadump_memory_area
  5. should_fadump_crash
  6. is_fadump_active
  7. is_fadump_mem_area_contiguous
  8. is_fadump_boot_mem_contiguous
  9. is_fadump_reserved_mem_contiguous
  10. fadump_show_config
  11. fadump_calculate_reserve_size
  12. get_fadump_area_size
  13. add_boot_mem_region
  14. add_boot_mem_regions
  15. fadump_get_boot_mem_regions
  16. fadump_reserve_mem
  17. early_fadump_param
  18. early_fadump_reserve_mem
  19. crash_fadump
  20. fadump_regs_to_elf_notes
  21. fadump_update_elfcore_header
  22. fadump_alloc_buffer
  23. fadump_free_buffer
  24. fadump_setup_cpu_notes_buf
  25. fadump_free_cpu_notes_buf
  26. fadump_free_mem_ranges
  27. fadump_alloc_mem_ranges
  28. fadump_add_mem_range
  29. fadump_exclude_reserved_area
  30. fadump_init_elfcore_header
  31. fadump_setup_crash_memory_ranges
  32. fadump_relocate
  33. fadump_create_elfcore_headers
  34. init_fadump_header
  35. register_fadump
  36. fadump_cleanup
  37. fadump_free_reserved_memory
  38. fadump_release_reserved_area
  39. sort_and_merge_mem_ranges
  40. fadump_scan_reserved_mem_ranges
  41. fadump_release_memory
  42. fadump_invalidate_release_mem
  43. fadump_release_memory_store
  44. fadump_enabled_show
  45. fadump_register_show
  46. fadump_register_store
  47. fadump_region_show
  48. fadump_init_files
  49. setup_fadump
  50. early_init_dt_scan_fw_dump
  51. fadump_reserve_mem
  52. fadump_reserve_crash_area
  53. arch_reserved_kernel_pages

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
   4  * dump with assistance from firmware. This approach does not use kexec,
   5  * instead firmware assists in booting the kdump kernel while preserving
   6  * memory contents. The most of the code implementation has been adapted
   7  * from phyp assisted dump implementation written by Linas Vepstas and
   8  * Manish Ahuja
   9  *
  10  * Copyright 2011 IBM Corporation
  11  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  12  */
  13 
  14 #undef DEBUG
  15 #define pr_fmt(fmt) "fadump: " fmt
  16 
  17 #include <linux/string.h>
  18 #include <linux/memblock.h>
  19 #include <linux/delay.h>
  20 #include <linux/seq_file.h>
  21 #include <linux/crash_dump.h>
  22 #include <linux/kobject.h>
  23 #include <linux/sysfs.h>
  24 #include <linux/slab.h>
  25 #include <linux/cma.h>
  26 #include <linux/hugetlb.h>
  27 
  28 #include <asm/debugfs.h>
  29 #include <asm/page.h>
  30 #include <asm/prom.h>
  31 #include <asm/fadump.h>
  32 #include <asm/fadump-internal.h>
  33 #include <asm/setup.h>
  34 
  35 static struct fw_dump fw_dump;
  36 
  37 static void __init fadump_reserve_crash_area(u64 base);
  38 
  39 #ifndef CONFIG_PRESERVE_FA_DUMP
  40 static DEFINE_MUTEX(fadump_mutex);
  41 struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
  42 struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
  43 
  44 #ifdef CONFIG_CMA
  45 static struct cma *fadump_cma;
  46 
  47 /*
  48  * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
  49  *
  50  * This function initializes CMA area from fadump reserved memory.
  51  * The total size of fadump reserved memory covers for boot memory size
  52  * + cpu data size + hpte size and metadata.
  53  * Initialize only the area equivalent to boot memory size for CMA use.
  54  * The reamining portion of fadump reserved memory will be not given
  55  * to CMA and pages for thoes will stay reserved. boot memory size is
  56  * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
  57  * But for some reason even if it fails we still have the memory reservation
  58  * with us and we can still continue doing fadump.
  59  */
  60 int __init fadump_cma_init(void)
  61 {
  62         unsigned long long base, size;
  63         int rc;
  64 
  65         if (!fw_dump.fadump_enabled)
  66                 return 0;
  67 
  68         /*
  69          * Do not use CMA if user has provided fadump=nocma kernel parameter.
  70          * Return 1 to continue with fadump old behaviour.
  71          */
  72         if (fw_dump.nocma)
  73                 return 1;
  74 
  75         base = fw_dump.reserve_dump_area_start;
  76         size = fw_dump.boot_memory_size;
  77 
  78         if (!size)
  79                 return 0;
  80 
  81         rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
  82         if (rc) {
  83                 pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
  84                 /*
  85                  * Though the CMA init has failed we still have memory
  86                  * reservation with us. The reserved memory will be
  87                  * blocked from production system usage.  Hence return 1,
  88                  * so that we can continue with fadump.
  89                  */
  90                 return 1;
  91         }
  92 
  93         /*
  94          * So we now have successfully initialized cma area for fadump.
  95          */
  96         pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
  97                 "bytes of memory reserved for firmware-assisted dump\n",
  98                 cma_get_size(fadump_cma),
  99                 (unsigned long)cma_get_base(fadump_cma) >> 20,
 100                 fw_dump.reserve_dump_area_size);
 101         return 1;
 102 }
 103 #else
 104 static int __init fadump_cma_init(void) { return 1; }
 105 #endif /* CONFIG_CMA */
 106 
 107 /* Scan the Firmware Assisted dump configuration details. */
 108 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
 109                                       int depth, void *data)
 110 {
 111         if (depth != 1)
 112                 return 0;
 113 
 114         if (strcmp(uname, "rtas") == 0) {
 115                 rtas_fadump_dt_scan(&fw_dump, node);
 116                 return 1;
 117         }
 118 
 119         if (strcmp(uname, "ibm,opal") == 0) {
 120                 opal_fadump_dt_scan(&fw_dump, node);
 121                 return 1;
 122         }
 123 
 124         return 0;
 125 }
 126 
 127 /*
 128  * If fadump is registered, check if the memory provided
 129  * falls within boot memory area and reserved memory area.
 130  */
 131 int is_fadump_memory_area(u64 addr, unsigned long size)
 132 {
 133         u64 d_start, d_end;
 134 
 135         if (!fw_dump.dump_registered)
 136                 return 0;
 137 
 138         if (!size)
 139                 return 0;
 140 
 141         d_start = fw_dump.reserve_dump_area_start;
 142         d_end = d_start + fw_dump.reserve_dump_area_size;
 143         if (((addr + size) > d_start) && (addr <= d_end))
 144                 return 1;
 145 
 146         return (addr <= fw_dump.boot_mem_top);
 147 }
 148 
 149 int should_fadump_crash(void)
 150 {
 151         if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
 152                 return 0;
 153         return 1;
 154 }
 155 
 156 int is_fadump_active(void)
 157 {
 158         return fw_dump.dump_active;
 159 }
 160 
 161 /*
 162  * Returns true, if there are no holes in memory area between d_start to d_end,
 163  * false otherwise.
 164  */
 165 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
 166 {
 167         struct memblock_region *reg;
 168         bool ret = false;
 169         u64 start, end;
 170 
 171         for_each_memblock(memory, reg) {
 172                 start = max_t(u64, d_start, reg->base);
 173                 end = min_t(u64, d_end, (reg->base + reg->size));
 174                 if (d_start < end) {
 175                         /* Memory hole from d_start to start */
 176                         if (start > d_start)
 177                                 break;
 178 
 179                         if (end == d_end) {
 180                                 ret = true;
 181                                 break;
 182                         }
 183 
 184                         d_start = end + 1;
 185                 }
 186         }
 187 
 188         return ret;
 189 }
 190 
 191 /*
 192  * Returns true, if there are no holes in boot memory area,
 193  * false otherwise.
 194  */
 195 bool is_fadump_boot_mem_contiguous(void)
 196 {
 197         unsigned long d_start, d_end;
 198         bool ret = false;
 199         int i;
 200 
 201         for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
 202                 d_start = fw_dump.boot_mem_addr[i];
 203                 d_end   = d_start + fw_dump.boot_mem_sz[i];
 204 
 205                 ret = is_fadump_mem_area_contiguous(d_start, d_end);
 206                 if (!ret)
 207                         break;
 208         }
 209 
 210         return ret;
 211 }
 212 
 213 /*
 214  * Returns true, if there are no holes in reserved memory area,
 215  * false otherwise.
 216  */
 217 bool is_fadump_reserved_mem_contiguous(void)
 218 {
 219         u64 d_start, d_end;
 220 
 221         d_start = fw_dump.reserve_dump_area_start;
 222         d_end   = d_start + fw_dump.reserve_dump_area_size;
 223         return is_fadump_mem_area_contiguous(d_start, d_end);
 224 }
 225 
 226 /* Print firmware assisted dump configurations for debugging purpose. */
 227 static void fadump_show_config(void)
 228 {
 229         int i;
 230 
 231         pr_debug("Support for firmware-assisted dump (fadump): %s\n",
 232                         (fw_dump.fadump_supported ? "present" : "no support"));
 233 
 234         if (!fw_dump.fadump_supported)
 235                 return;
 236 
 237         pr_debug("Fadump enabled    : %s\n",
 238                                 (fw_dump.fadump_enabled ? "yes" : "no"));
 239         pr_debug("Dump Active       : %s\n",
 240                                 (fw_dump.dump_active ? "yes" : "no"));
 241         pr_debug("Dump section sizes:\n");
 242         pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
 243         pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
 244         pr_debug("    Boot memory size   : %lx\n", fw_dump.boot_memory_size);
 245         pr_debug("    Boot memory top    : %llx\n", fw_dump.boot_mem_top);
 246         pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
 247         for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
 248                 pr_debug("[%03d] base = %llx, size = %llx\n", i,
 249                          fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
 250         }
 251 }
 252 
 253 /**
 254  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
 255  *
 256  * Function to find the largest memory size we need to reserve during early
 257  * boot process. This will be the size of the memory that is required for a
 258  * kernel to boot successfully.
 259  *
 260  * This function has been taken from phyp-assisted dump feature implementation.
 261  *
 262  * returns larger of 256MB or 5% rounded down to multiples of 256MB.
 263  *
 264  * TODO: Come up with better approach to find out more accurate memory size
 265  * that is required for a kernel to boot successfully.
 266  *
 267  */
 268 static inline u64 fadump_calculate_reserve_size(void)
 269 {
 270         u64 base, size, bootmem_min;
 271         int ret;
 272 
 273         if (fw_dump.reserve_bootvar)
 274                 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
 275 
 276         /*
 277          * Check if the size is specified through crashkernel= cmdline
 278          * option. If yes, then use that but ignore base as fadump reserves
 279          * memory at a predefined offset.
 280          */
 281         ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 282                                 &size, &base);
 283         if (ret == 0 && size > 0) {
 284                 unsigned long max_size;
 285 
 286                 if (fw_dump.reserve_bootvar)
 287                         pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
 288 
 289                 fw_dump.reserve_bootvar = (unsigned long)size;
 290 
 291                 /*
 292                  * Adjust if the boot memory size specified is above
 293                  * the upper limit.
 294                  */
 295                 max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
 296                 if (fw_dump.reserve_bootvar > max_size) {
 297                         fw_dump.reserve_bootvar = max_size;
 298                         pr_info("Adjusted boot memory size to %luMB\n",
 299                                 (fw_dump.reserve_bootvar >> 20));
 300                 }
 301 
 302                 return fw_dump.reserve_bootvar;
 303         } else if (fw_dump.reserve_bootvar) {
 304                 /*
 305                  * 'fadump_reserve_mem=' is being used to reserve memory
 306                  * for firmware-assisted dump.
 307                  */
 308                 return fw_dump.reserve_bootvar;
 309         }
 310 
 311         /* divide by 20 to get 5% of value */
 312         size = memblock_phys_mem_size() / 20;
 313 
 314         /* round it down in multiples of 256 */
 315         size = size & ~0x0FFFFFFFUL;
 316 
 317         /* Truncate to memory_limit. We don't want to over reserve the memory.*/
 318         if (memory_limit && size > memory_limit)
 319                 size = memory_limit;
 320 
 321         bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
 322         return (size > bootmem_min ? size : bootmem_min);
 323 }
 324 
 325 /*
 326  * Calculate the total memory size required to be reserved for
 327  * firmware-assisted dump registration.
 328  */
 329 static unsigned long get_fadump_area_size(void)
 330 {
 331         unsigned long size = 0;
 332 
 333         size += fw_dump.cpu_state_data_size;
 334         size += fw_dump.hpte_region_size;
 335         size += fw_dump.boot_memory_size;
 336         size += sizeof(struct fadump_crash_info_header);
 337         size += sizeof(struct elfhdr); /* ELF core header.*/
 338         size += sizeof(struct elf_phdr); /* place holder for cpu notes */
 339         /* Program headers for crash memory regions. */
 340         size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
 341 
 342         size = PAGE_ALIGN(size);
 343 
 344         /* This is to hold kernel metadata on platforms that support it */
 345         size += (fw_dump.ops->fadump_get_metadata_size ?
 346                  fw_dump.ops->fadump_get_metadata_size() : 0);
 347         return size;
 348 }
 349 
 350 static int __init add_boot_mem_region(unsigned long rstart,
 351                                       unsigned long rsize)
 352 {
 353         int i = fw_dump.boot_mem_regs_cnt++;
 354 
 355         if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
 356                 fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
 357                 return 0;
 358         }
 359 
 360         pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
 361                  i, rstart, (rstart + rsize));
 362         fw_dump.boot_mem_addr[i] = rstart;
 363         fw_dump.boot_mem_sz[i] = rsize;
 364         return 1;
 365 }
 366 
 367 /*
 368  * Firmware usually has a hard limit on the data it can copy per region.
 369  * Honour that by splitting a memory range into multiple regions.
 370  */
 371 static int __init add_boot_mem_regions(unsigned long mstart,
 372                                        unsigned long msize)
 373 {
 374         unsigned long rstart, rsize, max_size;
 375         int ret = 1;
 376 
 377         rstart = mstart;
 378         max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
 379         while (msize) {
 380                 if (msize > max_size)
 381                         rsize = max_size;
 382                 else
 383                         rsize = msize;
 384 
 385                 ret = add_boot_mem_region(rstart, rsize);
 386                 if (!ret)
 387                         break;
 388 
 389                 msize -= rsize;
 390                 rstart += rsize;
 391         }
 392 
 393         return ret;
 394 }
 395 
 396 static int __init fadump_get_boot_mem_regions(void)
 397 {
 398         unsigned long base, size, cur_size, hole_size, last_end;
 399         unsigned long mem_size = fw_dump.boot_memory_size;
 400         struct memblock_region *reg;
 401         int ret = 1;
 402 
 403         fw_dump.boot_mem_regs_cnt = 0;
 404 
 405         last_end = 0;
 406         hole_size = 0;
 407         cur_size = 0;
 408         for_each_memblock(memory, reg) {
 409                 base = reg->base;
 410                 size = reg->size;
 411                 hole_size += (base - last_end);
 412 
 413                 if ((cur_size + size) >= mem_size) {
 414                         size = (mem_size - cur_size);
 415                         ret = add_boot_mem_regions(base, size);
 416                         break;
 417                 }
 418 
 419                 mem_size -= size;
 420                 cur_size += size;
 421                 ret = add_boot_mem_regions(base, size);
 422                 if (!ret)
 423                         break;
 424 
 425                 last_end = base + size;
 426         }
 427         fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
 428 
 429         return ret;
 430 }
 431 
 432 int __init fadump_reserve_mem(void)
 433 {
 434         u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
 435         bool is_memblock_bottom_up = memblock_bottom_up();
 436         int ret = 1;
 437 
 438         if (!fw_dump.fadump_enabled)
 439                 return 0;
 440 
 441         if (!fw_dump.fadump_supported) {
 442                 pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
 443                 goto error_out;
 444         }
 445 
 446         /*
 447          * Initialize boot memory size
 448          * If dump is active then we have already calculated the size during
 449          * first kernel.
 450          */
 451         if (!fw_dump.dump_active) {
 452                 fw_dump.boot_memory_size =
 453                         PAGE_ALIGN(fadump_calculate_reserve_size());
 454 #ifdef CONFIG_CMA
 455                 if (!fw_dump.nocma) {
 456                         align = FADUMP_CMA_ALIGNMENT;
 457                         fw_dump.boot_memory_size =
 458                                 ALIGN(fw_dump.boot_memory_size, align);
 459                 }
 460 #endif
 461 
 462                 bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
 463                 if (fw_dump.boot_memory_size < bootmem_min) {
 464                         pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
 465                                fw_dump.boot_memory_size, bootmem_min);
 466                         goto error_out;
 467                 }
 468 
 469                 if (!fadump_get_boot_mem_regions()) {
 470                         pr_err("Too many holes in boot memory area to enable fadump\n");
 471                         goto error_out;
 472                 }
 473         }
 474 
 475         /*
 476          * Calculate the memory boundary.
 477          * If memory_limit is less than actual memory boundary then reserve
 478          * the memory for fadump beyond the memory_limit and adjust the
 479          * memory_limit accordingly, so that the running kernel can run with
 480          * specified memory_limit.
 481          */
 482         if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
 483                 size = get_fadump_area_size();
 484                 if ((memory_limit + size) < memblock_end_of_DRAM())
 485                         memory_limit += size;
 486                 else
 487                         memory_limit = memblock_end_of_DRAM();
 488                 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
 489                                 " dump, now %#016llx\n", memory_limit);
 490         }
 491         if (memory_limit)
 492                 mem_boundary = memory_limit;
 493         else
 494                 mem_boundary = memblock_end_of_DRAM();
 495 
 496         base = fw_dump.boot_mem_top;
 497         size = get_fadump_area_size();
 498         fw_dump.reserve_dump_area_size = size;
 499         if (fw_dump.dump_active) {
 500                 pr_info("Firmware-assisted dump is active.\n");
 501 
 502 #ifdef CONFIG_HUGETLB_PAGE
 503                 /*
 504                  * FADump capture kernel doesn't care much about hugepages.
 505                  * In fact, handling hugepages in capture kernel is asking for
 506                  * trouble. So, disable HugeTLB support when fadump is active.
 507                  */
 508                 hugetlb_disabled = true;
 509 #endif
 510                 /*
 511                  * If last boot has crashed then reserve all the memory
 512                  * above boot memory size so that we don't touch it until
 513                  * dump is written to disk by userspace tool. This memory
 514                  * can be released for general use by invalidating fadump.
 515                  */
 516                 fadump_reserve_crash_area(base);
 517 
 518                 pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
 519                 pr_debug("Reserve dump area start address: 0x%lx\n",
 520                          fw_dump.reserve_dump_area_start);
 521         } else {
 522                 /*
 523                  * Reserve memory at an offset closer to bottom of the RAM to
 524                  * minimize the impact of memory hot-remove operation.
 525                  */
 526                 memblock_set_bottom_up(true);
 527                 base = memblock_find_in_range(base, mem_boundary, size, align);
 528 
 529                 /* Restore the previous allocation mode */
 530                 memblock_set_bottom_up(is_memblock_bottom_up);
 531 
 532                 if (!base) {
 533                         pr_err("Failed to find memory chunk for reservation!\n");
 534                         goto error_out;
 535                 }
 536                 fw_dump.reserve_dump_area_start = base;
 537 
 538                 /*
 539                  * Calculate the kernel metadata address and register it with
 540                  * f/w if the platform supports.
 541                  */
 542                 if (fw_dump.ops->fadump_setup_metadata &&
 543                     (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
 544                         goto error_out;
 545 
 546                 if (memblock_reserve(base, size)) {
 547                         pr_err("Failed to reserve memory!\n");
 548                         goto error_out;
 549                 }
 550 
 551                 pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
 552                         (size >> 20), base, (memblock_phys_mem_size() >> 20));
 553 
 554                 ret = fadump_cma_init();
 555         }
 556 
 557         return ret;
 558 error_out:
 559         fw_dump.fadump_enabled = 0;
 560         return 0;
 561 }
 562 
 563 /* Look for fadump= cmdline option. */
 564 static int __init early_fadump_param(char *p)
 565 {
 566         if (!p)
 567                 return 1;
 568 
 569         if (strncmp(p, "on", 2) == 0)
 570                 fw_dump.fadump_enabled = 1;
 571         else if (strncmp(p, "off", 3) == 0)
 572                 fw_dump.fadump_enabled = 0;
 573         else if (strncmp(p, "nocma", 5) == 0) {
 574                 fw_dump.fadump_enabled = 1;
 575                 fw_dump.nocma = 1;
 576         }
 577 
 578         return 0;
 579 }
 580 early_param("fadump", early_fadump_param);
 581 
 582 /*
 583  * Look for fadump_reserve_mem= cmdline option
 584  * TODO: Remove references to 'fadump_reserve_mem=' parameter,
 585  *       the sooner 'crashkernel=' parameter is accustomed to.
 586  */
 587 static int __init early_fadump_reserve_mem(char *p)
 588 {
 589         if (p)
 590                 fw_dump.reserve_bootvar = memparse(p, &p);
 591         return 0;
 592 }
 593 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
 594 
 595 void crash_fadump(struct pt_regs *regs, const char *str)
 596 {
 597         struct fadump_crash_info_header *fdh = NULL;
 598         int old_cpu, this_cpu;
 599 
 600         if (!should_fadump_crash())
 601                 return;
 602 
 603         /*
 604          * old_cpu == -1 means this is the first CPU which has come here,
 605          * go ahead and trigger fadump.
 606          *
 607          * old_cpu != -1 means some other CPU has already on it's way
 608          * to trigger fadump, just keep looping here.
 609          */
 610         this_cpu = smp_processor_id();
 611         old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
 612 
 613         if (old_cpu != -1) {
 614                 /*
 615                  * We can't loop here indefinitely. Wait as long as fadump
 616                  * is in force. If we race with fadump un-registration this
 617                  * loop will break and then we go down to normal panic path
 618                  * and reboot. If fadump is in force the first crashing
 619                  * cpu will definitely trigger fadump.
 620                  */
 621                 while (fw_dump.dump_registered)
 622                         cpu_relax();
 623                 return;
 624         }
 625 
 626         fdh = __va(fw_dump.fadumphdr_addr);
 627         fdh->crashing_cpu = crashing_cpu;
 628         crash_save_vmcoreinfo();
 629 
 630         if (regs)
 631                 fdh->regs = *regs;
 632         else
 633                 ppc_save_regs(&fdh->regs);
 634 
 635         fdh->online_mask = *cpu_online_mask;
 636 
 637         fw_dump.ops->fadump_trigger(fdh, str);
 638 }
 639 
 640 u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
 641 {
 642         struct elf_prstatus prstatus;
 643 
 644         memset(&prstatus, 0, sizeof(prstatus));
 645         /*
 646          * FIXME: How do i get PID? Do I really need it?
 647          * prstatus.pr_pid = ????
 648          */
 649         elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
 650         buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
 651                               &prstatus, sizeof(prstatus));
 652         return buf;
 653 }
 654 
 655 void fadump_update_elfcore_header(char *bufp)
 656 {
 657         struct elfhdr *elf;
 658         struct elf_phdr *phdr;
 659 
 660         elf = (struct elfhdr *)bufp;
 661         bufp += sizeof(struct elfhdr);
 662 
 663         /* First note is a place holder for cpu notes info. */
 664         phdr = (struct elf_phdr *)bufp;
 665 
 666         if (phdr->p_type == PT_NOTE) {
 667                 phdr->p_paddr   = __pa(fw_dump.cpu_notes_buf_vaddr);
 668                 phdr->p_offset  = phdr->p_paddr;
 669                 phdr->p_filesz  = fw_dump.cpu_notes_buf_size;
 670                 phdr->p_memsz = fw_dump.cpu_notes_buf_size;
 671         }
 672         return;
 673 }
 674 
 675 static void *fadump_alloc_buffer(unsigned long size)
 676 {
 677         unsigned long count, i;
 678         struct page *page;
 679         void *vaddr;
 680 
 681         vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
 682         if (!vaddr)
 683                 return NULL;
 684 
 685         count = PAGE_ALIGN(size) / PAGE_SIZE;
 686         page = virt_to_page(vaddr);
 687         for (i = 0; i < count; i++)
 688                 mark_page_reserved(page + i);
 689         return vaddr;
 690 }
 691 
 692 static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
 693 {
 694         free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
 695 }
 696 
 697 s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
 698 {
 699         /* Allocate buffer to hold cpu crash notes. */
 700         fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
 701         fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
 702         fw_dump.cpu_notes_buf_vaddr =
 703                 (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
 704         if (!fw_dump.cpu_notes_buf_vaddr) {
 705                 pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
 706                        fw_dump.cpu_notes_buf_size);
 707                 return -ENOMEM;
 708         }
 709 
 710         pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
 711                  fw_dump.cpu_notes_buf_size,
 712                  fw_dump.cpu_notes_buf_vaddr);
 713         return 0;
 714 }
 715 
 716 void fadump_free_cpu_notes_buf(void)
 717 {
 718         if (!fw_dump.cpu_notes_buf_vaddr)
 719                 return;
 720 
 721         fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
 722                            fw_dump.cpu_notes_buf_size);
 723         fw_dump.cpu_notes_buf_vaddr = 0;
 724         fw_dump.cpu_notes_buf_size = 0;
 725 }
 726 
 727 static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
 728 {
 729         kfree(mrange_info->mem_ranges);
 730         mrange_info->mem_ranges = NULL;
 731         mrange_info->mem_ranges_sz = 0;
 732         mrange_info->max_mem_ranges = 0;
 733 }
 734 
 735 /*
 736  * Allocate or reallocate mem_ranges array in incremental units
 737  * of PAGE_SIZE.
 738  */
 739 static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
 740 {
 741         struct fadump_memory_range *new_array;
 742         u64 new_size;
 743 
 744         new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
 745         pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
 746                  new_size, mrange_info->name);
 747 
 748         new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
 749         if (new_array == NULL) {
 750                 pr_err("Insufficient memory for setting up %s memory ranges\n",
 751                        mrange_info->name);
 752                 fadump_free_mem_ranges(mrange_info);
 753                 return -ENOMEM;
 754         }
 755 
 756         mrange_info->mem_ranges = new_array;
 757         mrange_info->mem_ranges_sz = new_size;
 758         mrange_info->max_mem_ranges = (new_size /
 759                                        sizeof(struct fadump_memory_range));
 760         return 0;
 761 }
 762 
 763 static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
 764                                        u64 base, u64 end)
 765 {
 766         struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
 767         bool is_adjacent = false;
 768         u64 start, size;
 769 
 770         if (base == end)
 771                 return 0;
 772 
 773         /*
 774          * Fold adjacent memory ranges to bring down the memory ranges/
 775          * PT_LOAD segments count.
 776          */
 777         if (mrange_info->mem_range_cnt) {
 778                 start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
 779                 size  = mem_ranges[mrange_info->mem_range_cnt - 1].size;
 780 
 781                 if ((start + size) == base)
 782                         is_adjacent = true;
 783         }
 784         if (!is_adjacent) {
 785                 /* resize the array on reaching the limit */
 786                 if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
 787                         int ret;
 788 
 789                         ret = fadump_alloc_mem_ranges(mrange_info);
 790                         if (ret)
 791                                 return ret;
 792 
 793                         /* Update to the new resized array */
 794                         mem_ranges = mrange_info->mem_ranges;
 795                 }
 796 
 797                 start = base;
 798                 mem_ranges[mrange_info->mem_range_cnt].base = start;
 799                 mrange_info->mem_range_cnt++;
 800         }
 801 
 802         mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
 803         pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
 804                  mrange_info->name, (mrange_info->mem_range_cnt - 1),
 805                  start, end - 1, (end - start));
 806         return 0;
 807 }
 808 
 809 static int fadump_exclude_reserved_area(u64 start, u64 end)
 810 {
 811         u64 ra_start, ra_end;
 812         int ret = 0;
 813 
 814         ra_start = fw_dump.reserve_dump_area_start;
 815         ra_end = ra_start + fw_dump.reserve_dump_area_size;
 816 
 817         if ((ra_start < end) && (ra_end > start)) {
 818                 if ((start < ra_start) && (end > ra_end)) {
 819                         ret = fadump_add_mem_range(&crash_mrange_info,
 820                                                    start, ra_start);
 821                         if (ret)
 822                                 return ret;
 823 
 824                         ret = fadump_add_mem_range(&crash_mrange_info,
 825                                                    ra_end, end);
 826                 } else if (start < ra_start) {
 827                         ret = fadump_add_mem_range(&crash_mrange_info,
 828                                                    start, ra_start);
 829                 } else if (ra_end < end) {
 830                         ret = fadump_add_mem_range(&crash_mrange_info,
 831                                                    ra_end, end);
 832                 }
 833         } else
 834                 ret = fadump_add_mem_range(&crash_mrange_info, start, end);
 835 
 836         return ret;
 837 }
 838 
 839 static int fadump_init_elfcore_header(char *bufp)
 840 {
 841         struct elfhdr *elf;
 842 
 843         elf = (struct elfhdr *) bufp;
 844         bufp += sizeof(struct elfhdr);
 845         memcpy(elf->e_ident, ELFMAG, SELFMAG);
 846         elf->e_ident[EI_CLASS] = ELF_CLASS;
 847         elf->e_ident[EI_DATA] = ELF_DATA;
 848         elf->e_ident[EI_VERSION] = EV_CURRENT;
 849         elf->e_ident[EI_OSABI] = ELF_OSABI;
 850         memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 851         elf->e_type = ET_CORE;
 852         elf->e_machine = ELF_ARCH;
 853         elf->e_version = EV_CURRENT;
 854         elf->e_entry = 0;
 855         elf->e_phoff = sizeof(struct elfhdr);
 856         elf->e_shoff = 0;
 857 #if defined(_CALL_ELF)
 858         elf->e_flags = _CALL_ELF;
 859 #else
 860         elf->e_flags = 0;
 861 #endif
 862         elf->e_ehsize = sizeof(struct elfhdr);
 863         elf->e_phentsize = sizeof(struct elf_phdr);
 864         elf->e_phnum = 0;
 865         elf->e_shentsize = 0;
 866         elf->e_shnum = 0;
 867         elf->e_shstrndx = 0;
 868 
 869         return 0;
 870 }
 871 
 872 /*
 873  * Traverse through memblock structure and setup crash memory ranges. These
 874  * ranges will be used create PT_LOAD program headers in elfcore header.
 875  */
 876 static int fadump_setup_crash_memory_ranges(void)
 877 {
 878         struct memblock_region *reg;
 879         u64 start, end;
 880         int i, ret;
 881 
 882         pr_debug("Setup crash memory ranges.\n");
 883         crash_mrange_info.mem_range_cnt = 0;
 884 
 885         /*
 886          * Boot memory region(s) registered with firmware are moved to
 887          * different location at the time of crash. Create separate program
 888          * header(s) for this memory chunk(s) with the correct offset.
 889          */
 890         for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
 891                 start = fw_dump.boot_mem_addr[i];
 892                 end = start + fw_dump.boot_mem_sz[i];
 893                 ret = fadump_add_mem_range(&crash_mrange_info, start, end);
 894                 if (ret)
 895                         return ret;
 896         }
 897 
 898         for_each_memblock(memory, reg) {
 899                 start = (u64)reg->base;
 900                 end = start + (u64)reg->size;
 901 
 902                 /*
 903                  * skip the memory chunk that is already added
 904                  * (0 through boot_memory_top).
 905                  */
 906                 if (start < fw_dump.boot_mem_top) {
 907                         if (end > fw_dump.boot_mem_top)
 908                                 start = fw_dump.boot_mem_top;
 909                         else
 910                                 continue;
 911                 }
 912 
 913                 /* add this range excluding the reserved dump area. */
 914                 ret = fadump_exclude_reserved_area(start, end);
 915                 if (ret)
 916                         return ret;
 917         }
 918 
 919         return 0;
 920 }
 921 
 922 /*
 923  * If the given physical address falls within the boot memory region then
 924  * return the relocated address that points to the dump region reserved
 925  * for saving initial boot memory contents.
 926  */
 927 static inline unsigned long fadump_relocate(unsigned long paddr)
 928 {
 929         unsigned long raddr, rstart, rend, rlast, hole_size;
 930         int i;
 931 
 932         hole_size = 0;
 933         rlast = 0;
 934         raddr = paddr;
 935         for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
 936                 rstart = fw_dump.boot_mem_addr[i];
 937                 rend = rstart + fw_dump.boot_mem_sz[i];
 938                 hole_size += (rstart - rlast);
 939 
 940                 if (paddr >= rstart && paddr < rend) {
 941                         raddr += fw_dump.boot_mem_dest_addr - hole_size;
 942                         break;
 943                 }
 944 
 945                 rlast = rend;
 946         }
 947 
 948         pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
 949         return raddr;
 950 }
 951 
 952 static int fadump_create_elfcore_headers(char *bufp)
 953 {
 954         unsigned long long raddr, offset;
 955         struct elf_phdr *phdr;
 956         struct elfhdr *elf;
 957         int i, j;
 958 
 959         fadump_init_elfcore_header(bufp);
 960         elf = (struct elfhdr *)bufp;
 961         bufp += sizeof(struct elfhdr);
 962 
 963         /*
 964          * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
 965          * will be populated during second kernel boot after crash. Hence
 966          * this PT_NOTE will always be the first elf note.
 967          *
 968          * NOTE: Any new ELF note addition should be placed after this note.
 969          */
 970         phdr = (struct elf_phdr *)bufp;
 971         bufp += sizeof(struct elf_phdr);
 972         phdr->p_type = PT_NOTE;
 973         phdr->p_flags = 0;
 974         phdr->p_vaddr = 0;
 975         phdr->p_align = 0;
 976 
 977         phdr->p_offset = 0;
 978         phdr->p_paddr = 0;
 979         phdr->p_filesz = 0;
 980         phdr->p_memsz = 0;
 981 
 982         (elf->e_phnum)++;
 983 
 984         /* setup ELF PT_NOTE for vmcoreinfo */
 985         phdr = (struct elf_phdr *)bufp;
 986         bufp += sizeof(struct elf_phdr);
 987         phdr->p_type    = PT_NOTE;
 988         phdr->p_flags   = 0;
 989         phdr->p_vaddr   = 0;
 990         phdr->p_align   = 0;
 991 
 992         phdr->p_paddr   = fadump_relocate(paddr_vmcoreinfo_note());
 993         phdr->p_offset  = phdr->p_paddr;
 994         phdr->p_memsz   = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
 995 
 996         /* Increment number of program headers. */
 997         (elf->e_phnum)++;
 998 
 999         /* setup PT_LOAD sections. */
1000         j = 0;
1001         offset = 0;
1002         raddr = fw_dump.boot_mem_addr[0];
1003         for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
1004                 u64 mbase, msize;
1005 
1006                 mbase = crash_mrange_info.mem_ranges[i].base;
1007                 msize = crash_mrange_info.mem_ranges[i].size;
1008                 if (!msize)
1009                         continue;
1010 
1011                 phdr = (struct elf_phdr *)bufp;
1012                 bufp += sizeof(struct elf_phdr);
1013                 phdr->p_type    = PT_LOAD;
1014                 phdr->p_flags   = PF_R|PF_W|PF_X;
1015                 phdr->p_offset  = mbase;
1016 
1017                 if (mbase == raddr) {
1018                         /*
1019                          * The entire real memory region will be moved by
1020                          * firmware to the specified destination_address.
1021                          * Hence set the correct offset.
1022                          */
1023                         phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
1024                         if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
1025                                 offset += fw_dump.boot_mem_sz[j];
1026                                 raddr = fw_dump.boot_mem_addr[++j];
1027                         }
1028                 }
1029 
1030                 phdr->p_paddr = mbase;
1031                 phdr->p_vaddr = (unsigned long)__va(mbase);
1032                 phdr->p_filesz = msize;
1033                 phdr->p_memsz = msize;
1034                 phdr->p_align = 0;
1035 
1036                 /* Increment number of program headers. */
1037                 (elf->e_phnum)++;
1038         }
1039         return 0;
1040 }
1041 
1042 static unsigned long init_fadump_header(unsigned long addr)
1043 {
1044         struct fadump_crash_info_header *fdh;
1045 
1046         if (!addr)
1047                 return 0;
1048 
1049         fdh = __va(addr);
1050         addr += sizeof(struct fadump_crash_info_header);
1051 
1052         memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1053         fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
1054         fdh->elfcorehdr_addr = addr;
1055         /* We will set the crashing cpu id in crash_fadump() during crash. */
1056         fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
1057 
1058         return addr;
1059 }
1060 
1061 static int register_fadump(void)
1062 {
1063         unsigned long addr;
1064         void *vaddr;
1065         int ret;
1066 
1067         /*
1068          * If no memory is reserved then we can not register for firmware-
1069          * assisted dump.
1070          */
1071         if (!fw_dump.reserve_dump_area_size)
1072                 return -ENODEV;
1073 
1074         ret = fadump_setup_crash_memory_ranges();
1075         if (ret)
1076                 return ret;
1077 
1078         addr = fw_dump.fadumphdr_addr;
1079 
1080         /* Initialize fadump crash info header. */
1081         addr = init_fadump_header(addr);
1082         vaddr = __va(addr);
1083 
1084         pr_debug("Creating ELF core headers at %#016lx\n", addr);
1085         fadump_create_elfcore_headers(vaddr);
1086 
1087         /* register the future kernel dump with firmware. */
1088         pr_debug("Registering for firmware-assisted kernel dump...\n");
1089         return fw_dump.ops->fadump_register(&fw_dump);
1090 }
1091 
1092 void fadump_cleanup(void)
1093 {
1094         if (!fw_dump.fadump_supported)
1095                 return;
1096 
1097         /* Invalidate the registration only if dump is active. */
1098         if (fw_dump.dump_active) {
1099                 pr_debug("Invalidating firmware-assisted dump registration\n");
1100                 fw_dump.ops->fadump_invalidate(&fw_dump);
1101         } else if (fw_dump.dump_registered) {
1102                 /* Un-register Firmware-assisted dump if it was registered. */
1103                 fw_dump.ops->fadump_unregister(&fw_dump);
1104                 fadump_free_mem_ranges(&crash_mrange_info);
1105         }
1106 
1107         if (fw_dump.ops->fadump_cleanup)
1108                 fw_dump.ops->fadump_cleanup(&fw_dump);
1109 }
1110 
1111 static void fadump_free_reserved_memory(unsigned long start_pfn,
1112                                         unsigned long end_pfn)
1113 {
1114         unsigned long pfn;
1115         unsigned long time_limit = jiffies + HZ;
1116 
1117         pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1118                 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1119 
1120         for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1121                 free_reserved_page(pfn_to_page(pfn));
1122 
1123                 if (time_after(jiffies, time_limit)) {
1124                         cond_resched();
1125                         time_limit = jiffies + HZ;
1126                 }
1127         }
1128 }
1129 
1130 /*
1131  * Skip memory holes and free memory that was actually reserved.
1132  */
1133 static void fadump_release_reserved_area(u64 start, u64 end)
1134 {
1135         u64 tstart, tend, spfn, epfn;
1136         struct memblock_region *reg;
1137 
1138         spfn = PHYS_PFN(start);
1139         epfn = PHYS_PFN(end);
1140         for_each_memblock(memory, reg) {
1141                 tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
1142                 tend   = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
1143                 if (tstart < tend) {
1144                         fadump_free_reserved_memory(tstart, tend);
1145 
1146                         if (tend == epfn)
1147                                 break;
1148 
1149                         spfn = tend;
1150                 }
1151         }
1152 }
1153 
1154 /*
1155  * Sort the mem ranges in-place and merge adjacent ranges
1156  * to minimize the memory ranges count.
1157  */
1158 static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
1159 {
1160         struct fadump_memory_range *mem_ranges;
1161         struct fadump_memory_range tmp_range;
1162         u64 base, size;
1163         int i, j, idx;
1164 
1165         if (!reserved_mrange_info.mem_range_cnt)
1166                 return;
1167 
1168         /* Sort the memory ranges */
1169         mem_ranges = mrange_info->mem_ranges;
1170         for (i = 0; i < mrange_info->mem_range_cnt; i++) {
1171                 idx = i;
1172                 for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
1173                         if (mem_ranges[idx].base > mem_ranges[j].base)
1174                                 idx = j;
1175                 }
1176                 if (idx != i) {
1177                         tmp_range = mem_ranges[idx];
1178                         mem_ranges[idx] = mem_ranges[i];
1179                         mem_ranges[i] = tmp_range;
1180                 }
1181         }
1182 
1183         /* Merge adjacent reserved ranges */
1184         idx = 0;
1185         for (i = 1; i < mrange_info->mem_range_cnt; i++) {
1186                 base = mem_ranges[i-1].base;
1187                 size = mem_ranges[i-1].size;
1188                 if (mem_ranges[i].base == (base + size))
1189                         mem_ranges[idx].size += mem_ranges[i].size;
1190                 else {
1191                         idx++;
1192                         if (i == idx)
1193                                 continue;
1194 
1195                         mem_ranges[idx] = mem_ranges[i];
1196                 }
1197         }
1198         mrange_info->mem_range_cnt = idx + 1;
1199 }
1200 
1201 /*
1202  * Scan reserved-ranges to consider them while reserving/releasing
1203  * memory for FADump.
1204  */
1205 static inline int fadump_scan_reserved_mem_ranges(void)
1206 {
1207         struct device_node *root;
1208         const __be32 *prop;
1209         int len, ret = -1;
1210         unsigned long i;
1211 
1212         root = of_find_node_by_path("/");
1213         if (!root)
1214                 return ret;
1215 
1216         prop = of_get_property(root, "reserved-ranges", &len);
1217         if (!prop)
1218                 return ret;
1219 
1220         /*
1221          * Each reserved range is an (address,size) pair, 2 cells each,
1222          * totalling 4 cells per range.
1223          */
1224         for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
1225                 u64 base, size;
1226 
1227                 base = of_read_number(prop + (i * 4) + 0, 2);
1228                 size = of_read_number(prop + (i * 4) + 2, 2);
1229 
1230                 if (size) {
1231                         ret = fadump_add_mem_range(&reserved_mrange_info,
1232                                                    base, base + size);
1233                         if (ret < 0) {
1234                                 pr_warn("some reserved ranges are ignored!\n");
1235                                 break;
1236                         }
1237                 }
1238         }
1239 
1240         return ret;
1241 }
1242 
1243 /*
1244  * Release the memory that was reserved during early boot to preserve the
1245  * crash'ed kernel's memory contents except reserved dump area (permanent
1246  * reservation) and reserved ranges used by F/W. The released memory will
1247  * be available for general use.
1248  */
1249 static void fadump_release_memory(u64 begin, u64 end)
1250 {
1251         u64 ra_start, ra_end, tstart;
1252         int i, ret;
1253 
1254         fadump_scan_reserved_mem_ranges();
1255 
1256         ra_start = fw_dump.reserve_dump_area_start;
1257         ra_end = ra_start + fw_dump.reserve_dump_area_size;
1258 
1259         /*
1260          * Add reserved dump area to reserved ranges list
1261          * and exclude all these ranges while releasing memory.
1262          */
1263         ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
1264         if (ret != 0) {
1265                 /*
1266                  * Not enough memory to setup reserved ranges but the system is
1267                  * running shortage of memory. So, release all the memory except
1268                  * Reserved dump area (reused for next fadump registration).
1269                  */
1270                 if (begin < ra_end && end > ra_start) {
1271                         if (begin < ra_start)
1272                                 fadump_release_reserved_area(begin, ra_start);
1273                         if (end > ra_end)
1274                                 fadump_release_reserved_area(ra_end, end);
1275                 } else
1276                         fadump_release_reserved_area(begin, end);
1277 
1278                 return;
1279         }
1280 
1281         /* Get the reserved ranges list in order first. */
1282         sort_and_merge_mem_ranges(&reserved_mrange_info);
1283 
1284         /* Exclude reserved ranges and release remaining memory */
1285         tstart = begin;
1286         for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
1287                 ra_start = reserved_mrange_info.mem_ranges[i].base;
1288                 ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
1289 
1290                 if (tstart >= ra_end)
1291                         continue;
1292 
1293                 if (tstart < ra_start)
1294                         fadump_release_reserved_area(tstart, ra_start);
1295                 tstart = ra_end;
1296         }
1297 
1298         if (tstart < end)
1299                 fadump_release_reserved_area(tstart, end);
1300 }
1301 
1302 static void fadump_invalidate_release_mem(void)
1303 {
1304         mutex_lock(&fadump_mutex);
1305         if (!fw_dump.dump_active) {
1306                 mutex_unlock(&fadump_mutex);
1307                 return;
1308         }
1309 
1310         fadump_cleanup();
1311         mutex_unlock(&fadump_mutex);
1312 
1313         fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
1314         fadump_free_cpu_notes_buf();
1315 
1316         /*
1317          * Setup kernel metadata and initialize the kernel dump
1318          * memory structure for FADump re-registration.
1319          */
1320         if (fw_dump.ops->fadump_setup_metadata &&
1321             (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
1322                 pr_warn("Failed to setup kernel metadata!\n");
1323         fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1324 }
1325 
1326 static ssize_t fadump_release_memory_store(struct kobject *kobj,
1327                                         struct kobj_attribute *attr,
1328                                         const char *buf, size_t count)
1329 {
1330         int input = -1;
1331 
1332         if (!fw_dump.dump_active)
1333                 return -EPERM;
1334 
1335         if (kstrtoint(buf, 0, &input))
1336                 return -EINVAL;
1337 
1338         if (input == 1) {
1339                 /*
1340                  * Take away the '/proc/vmcore'. We are releasing the dump
1341                  * memory, hence it will not be valid anymore.
1342                  */
1343 #ifdef CONFIG_PROC_VMCORE
1344                 vmcore_cleanup();
1345 #endif
1346                 fadump_invalidate_release_mem();
1347 
1348         } else
1349                 return -EINVAL;
1350         return count;
1351 }
1352 
1353 static ssize_t fadump_enabled_show(struct kobject *kobj,
1354                                         struct kobj_attribute *attr,
1355                                         char *buf)
1356 {
1357         return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1358 }
1359 
1360 static ssize_t fadump_register_show(struct kobject *kobj,
1361                                         struct kobj_attribute *attr,
1362                                         char *buf)
1363 {
1364         return sprintf(buf, "%d\n", fw_dump.dump_registered);
1365 }
1366 
1367 static ssize_t fadump_register_store(struct kobject *kobj,
1368                                         struct kobj_attribute *attr,
1369                                         const char *buf, size_t count)
1370 {
1371         int ret = 0;
1372         int input = -1;
1373 
1374         if (!fw_dump.fadump_enabled || fw_dump.dump_active)
1375                 return -EPERM;
1376 
1377         if (kstrtoint(buf, 0, &input))
1378                 return -EINVAL;
1379 
1380         mutex_lock(&fadump_mutex);
1381 
1382         switch (input) {
1383         case 0:
1384                 if (fw_dump.dump_registered == 0) {
1385                         goto unlock_out;
1386                 }
1387 
1388                 /* Un-register Firmware-assisted dump */
1389                 pr_debug("Un-register firmware-assisted dump\n");
1390                 fw_dump.ops->fadump_unregister(&fw_dump);
1391                 break;
1392         case 1:
1393                 if (fw_dump.dump_registered == 1) {
1394                         /* Un-register Firmware-assisted dump */
1395                         fw_dump.ops->fadump_unregister(&fw_dump);
1396                 }
1397                 /* Register Firmware-assisted dump */
1398                 ret = register_fadump();
1399                 break;
1400         default:
1401                 ret = -EINVAL;
1402                 break;
1403         }
1404 
1405 unlock_out:
1406         mutex_unlock(&fadump_mutex);
1407         return ret < 0 ? ret : count;
1408 }
1409 
1410 static int fadump_region_show(struct seq_file *m, void *private)
1411 {
1412         if (!fw_dump.fadump_enabled)
1413                 return 0;
1414 
1415         mutex_lock(&fadump_mutex);
1416         fw_dump.ops->fadump_region_show(&fw_dump, m);
1417         mutex_unlock(&fadump_mutex);
1418         return 0;
1419 }
1420 
1421 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1422                                                 0200, NULL,
1423                                                 fadump_release_memory_store);
1424 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1425                                                 0444, fadump_enabled_show,
1426                                                 NULL);
1427 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1428                                                 0644, fadump_register_show,
1429                                                 fadump_register_store);
1430 
1431 DEFINE_SHOW_ATTRIBUTE(fadump_region);
1432 
1433 static void fadump_init_files(void)
1434 {
1435         struct dentry *debugfs_file;
1436         int rc = 0;
1437 
1438         rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1439         if (rc)
1440                 printk(KERN_ERR "fadump: unable to create sysfs file"
1441                         " fadump_enabled (%d)\n", rc);
1442 
1443         rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1444         if (rc)
1445                 printk(KERN_ERR "fadump: unable to create sysfs file"
1446                         " fadump_registered (%d)\n", rc);
1447 
1448         debugfs_file = debugfs_create_file("fadump_region", 0444,
1449                                         powerpc_debugfs_root, NULL,
1450                                         &fadump_region_fops);
1451         if (!debugfs_file)
1452                 printk(KERN_ERR "fadump: unable to create debugfs file"
1453                                 " fadump_region\n");
1454 
1455         if (fw_dump.dump_active) {
1456                 rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1457                 if (rc)
1458                         printk(KERN_ERR "fadump: unable to create sysfs file"
1459                                 " fadump_release_mem (%d)\n", rc);
1460         }
1461         return;
1462 }
1463 
1464 /*
1465  * Prepare for firmware-assisted dump.
1466  */
1467 int __init setup_fadump(void)
1468 {
1469         if (!fw_dump.fadump_enabled)
1470                 return 0;
1471 
1472         if (!fw_dump.fadump_supported) {
1473                 printk(KERN_ERR "Firmware-assisted dump is not supported on"
1474                         " this hardware\n");
1475                 return 0;
1476         }
1477 
1478         fadump_show_config();
1479         /*
1480          * If dump data is available then see if it is valid and prepare for
1481          * saving it to the disk.
1482          */
1483         if (fw_dump.dump_active) {
1484                 /*
1485                  * if dump process fails then invalidate the registration
1486                  * and release memory before proceeding for re-registration.
1487                  */
1488                 if (fw_dump.ops->fadump_process(&fw_dump) < 0)
1489                         fadump_invalidate_release_mem();
1490         }
1491         /* Initialize the kernel dump memory structure for FAD registration. */
1492         else if (fw_dump.reserve_dump_area_size)
1493                 fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1494 
1495         fadump_init_files();
1496 
1497         return 1;
1498 }
1499 subsys_initcall(setup_fadump);
1500 #else /* !CONFIG_PRESERVE_FA_DUMP */
1501 
1502 /* Scan the Firmware Assisted dump configuration details. */
1503 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
1504                                       int depth, void *data)
1505 {
1506         if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
1507                 return 0;
1508 
1509         opal_fadump_dt_scan(&fw_dump, node);
1510         return 1;
1511 }
1512 
1513 /*
1514  * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1515  * preserve crash data. The subsequent memory preserving kernel boot
1516  * is likely to process this crash data.
1517  */
1518 int __init fadump_reserve_mem(void)
1519 {
1520         if (fw_dump.dump_active) {
1521                 /*
1522                  * If last boot has crashed then reserve all the memory
1523                  * above boot memory to preserve crash data.
1524                  */
1525                 pr_info("Preserving crash data for processing in next boot.\n");
1526                 fadump_reserve_crash_area(fw_dump.boot_mem_top);
1527         } else
1528                 pr_debug("FADump-aware kernel..\n");
1529 
1530         return 1;
1531 }
1532 #endif /* CONFIG_PRESERVE_FA_DUMP */
1533 
1534 /* Preserve everything above the base address */
1535 static void __init fadump_reserve_crash_area(u64 base)
1536 {
1537         struct memblock_region *reg;
1538         u64 mstart, msize;
1539 
1540         for_each_memblock(memory, reg) {
1541                 mstart = reg->base;
1542                 msize  = reg->size;
1543 
1544                 if ((mstart + msize) < base)
1545                         continue;
1546 
1547                 if (mstart < base) {
1548                         msize -= (base - mstart);
1549                         mstart = base;
1550                 }
1551 
1552                 pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1553                         (msize >> 20), mstart);
1554                 memblock_reserve(mstart, msize);
1555         }
1556 }
1557 
1558 unsigned long __init arch_reserved_kernel_pages(void)
1559 {
1560         return memblock_reserved_size() / PAGE_SIZE;
1561 }

/* [<][>][^][v][top][bottom][index][help] */