root/arch/parisc/kernel/pdt.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. arch_report_meminfo
  2. get_info_pat_new
  3. get_info_pat_cell
  4. report_mem_err
  5. pdc_pdt_init
  6. pdt_mainloop
  7. pdt_initcall

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *    Page Deallocation Table (PDT) support
   4  *
   5  *    The Page Deallocation Table (PDT) is maintained by firmware and holds a
   6  *    list of memory addresses in which memory errors were detected.
   7  *    The list contains both single-bit (correctable) and double-bit
   8  *    (uncorrectable) errors.
   9  *
  10  *    Copyright 2017 by Helge Deller <deller@gmx.de>
  11  *
  12  *    possible future enhancements:
  13  *    - add userspace interface via procfs or sysfs to clear PDT
  14  */
  15 
  16 #include <linux/memblock.h>
  17 #include <linux/seq_file.h>
  18 #include <linux/kthread.h>
  19 #include <linux/initrd.h>
  20 
  21 #include <asm/pdc.h>
  22 #include <asm/pdcpat.h>
  23 #include <asm/sections.h>
  24 #include <asm/pgtable.h>
  25 
  26 enum pdt_access_type {
  27         PDT_NONE,
  28         PDT_PDC,
  29         PDT_PAT_NEW,
  30         PDT_PAT_CELL
  31 };
  32 
  33 static enum pdt_access_type pdt_type;
  34 
  35 /* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */
  36 #define PDT_POLL_INTERVAL_DEFAULT       (5*60*HZ)
  37 #define PDT_POLL_INTERVAL_SHORT         (1*60*HZ)
  38 static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT;
  39 
  40 /* global PDT status information */
  41 static struct pdc_mem_retinfo pdt_status;
  42 
  43 #define MAX_PDT_TABLE_SIZE      PAGE_SIZE
  44 #define MAX_PDT_ENTRIES         (MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
  45 static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;
  46 
  47 /*
  48  * Constants for the pdt_entry format:
  49  * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are
  50  * reserved, bit 62 is the perm bit and bit 63 is the error_type bit.
  51  * The perm bit indicates whether the error have been verified as a permanent
  52  * error (value of 1) or has not been verified, and may be transient (value
  53  * of 0). The error_type bit indicates whether the error is a single bit error
  54  * (value of 1) or a multiple bit error.
  55  * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit
  56  * 63. Those machines don't provide the perm bit.
  57  */
  58 
  59 #define PDT_ADDR_PHYS_MASK      (pdt_type != PDT_PDC ? ~0x3f : ~0x0f)
  60 #define PDT_ADDR_PERM_ERR       (pdt_type != PDT_PDC ? 2UL : 0UL)
  61 #define PDT_ADDR_SINGLE_ERR     1UL
  62 
  63 /* report PDT entries via /proc/meminfo */
  64 void arch_report_meminfo(struct seq_file *m)
  65 {
  66         if (pdt_type == PDT_NONE)
  67                 return;
  68 
  69         seq_printf(m, "PDT_max_entries: %7lu\n",
  70                         pdt_status.pdt_size);
  71         seq_printf(m, "PDT_cur_entries: %7lu\n",
  72                         pdt_status.pdt_entries);
  73 }
  74 
  75 static int get_info_pat_new(void)
  76 {
  77         struct pdc_pat_mem_retinfo pat_rinfo;
  78         int ret;
  79 
  80         /* newer PAT machines like C8000 report info for all cells */
  81         if (is_pdc_pat())
  82                 ret = pdc_pat_mem_pdt_info(&pat_rinfo);
  83         else
  84                 return PDC_BAD_PROC;
  85 
  86         pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
  87         pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
  88         pdt_status.pdt_status = 0;
  89         pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
  90         pdt_status.good_mem = pat_rinfo.good_mem;
  91 
  92         return ret;
  93 }
  94 
  95 static int get_info_pat_cell(void)
  96 {
  97         struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo;
  98         int ret;
  99 
 100         /* older PAT machines like rp5470 report cell info only */
 101         if (is_pdc_pat())
 102                 ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num);
 103         else
 104                 return PDC_BAD_PROC;
 105 
 106         pdt_status.pdt_size = cell_rinfo.max_pdt_entries;
 107         pdt_status.pdt_entries = cell_rinfo.current_pdt_entries;
 108         pdt_status.pdt_status = 0;
 109         pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc;
 110         pdt_status.good_mem = cell_rinfo.good_mem;
 111 
 112         return ret;
 113 }
 114 
 115 static void report_mem_err(unsigned long pde)
 116 {
 117         struct pdc_pat_mem_phys_mem_location loc;
 118         unsigned long addr;
 119         char dimm_txt[32];
 120 
 121         addr = pde & PDT_ADDR_PHYS_MASK;
 122 
 123         /* show DIMM slot description on PAT machines */
 124         if (is_pdc_pat()) {
 125                 pdc_pat_mem_get_dimm_phys_location(&loc, addr);
 126                 sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot);
 127         } else
 128                 dimm_txt[0] = 0;
 129 
 130         pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n",
 131                 addr, dimm_txt,
 132                 pde & PDT_ADDR_PERM_ERR ? "permanent ":"",
 133                 pde & PDT_ADDR_SINGLE_ERR ? "single":"multi");
 134 }
 135 
 136 
 137 /*
 138  * pdc_pdt_init()
 139  *
 140  * Initialize kernel PDT structures, read initial PDT table from firmware,
 141  * report all current PDT entries and mark bad memory with memblock_reserve()
 142  * to avoid that the kernel will use broken memory areas.
 143  *
 144  */
 145 void __init pdc_pdt_init(void)
 146 {
 147         int ret, i;
 148         unsigned long entries;
 149         struct pdc_mem_read_pdt pdt_read_ret;
 150 
 151         pdt_type = PDT_PAT_NEW;
 152         ret = get_info_pat_new();
 153 
 154         if (ret != PDC_OK) {
 155                 pdt_type = PDT_PAT_CELL;
 156                 ret = get_info_pat_cell();
 157         }
 158 
 159         if (ret != PDC_OK) {
 160                 pdt_type = PDT_PDC;
 161                 /* non-PAT machines provide the standard PDC call */
 162                 ret = pdc_mem_pdt_info(&pdt_status);
 163         }
 164 
 165         if (ret != PDC_OK) {
 166                 pdt_type = PDT_NONE;
 167                 pr_info("PDT: Firmware does not provide any page deallocation"
 168                         " information.\n");
 169                 return;
 170         }
 171 
 172         entries = pdt_status.pdt_entries;
 173         if (WARN_ON(entries > MAX_PDT_ENTRIES))
 174                 entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES;
 175 
 176         pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
 177                 " good_mem %lu MB\n",
 178                         pdt_type == PDT_PDC ? __stringify(PDT_PDC) :
 179                         pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL)
 180                                                  : __stringify(PDT_PAT_NEW),
 181                         pdt_status.pdt_size, pdt_status.pdt_entries,
 182                         pdt_status.pdt_status, pdt_status.first_dbe_loc,
 183                         pdt_status.good_mem / 1024 / 1024);
 184 
 185         if (entries == 0) {
 186                 pr_info("PDT: Firmware reports all memory OK.\n");
 187                 return;
 188         }
 189 
 190         if (pdt_status.first_dbe_loc &&
 191                 pdt_status.first_dbe_loc <= __pa((unsigned long)&_end))
 192                 pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n");
 193 
 194         pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n",
 195                 entries);
 196 
 197         if (pdt_type == PDT_PDC)
 198                 ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry);
 199         else {
 200 #ifdef CONFIG_64BIT
 201                 struct pdc_pat_mem_read_pd_retinfo pat_pret;
 202 
 203                 if (pdt_type == PDT_PAT_CELL)
 204                         ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
 205                                 MAX_PDT_ENTRIES);
 206                 else
 207                         ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
 208                                 MAX_PDT_TABLE_SIZE, 0);
 209 #else
 210                 ret = PDC_BAD_PROC;
 211 #endif
 212         }
 213 
 214         if (ret != PDC_OK) {
 215                 pdt_type = PDT_NONE;
 216                 pr_warn("PDT: Get PDT entries failed with %d\n", ret);
 217                 return;
 218         }
 219 
 220         for (i = 0; i < pdt_status.pdt_entries; i++) {
 221                 unsigned long addr;
 222 
 223                 report_mem_err(pdt_entry[i]);
 224 
 225                 addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK;
 226                 if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
 227                         addr >= initrd_start && addr < initrd_end)
 228                         pr_crit("CRITICAL: initrd possibly broken "
 229                                 "due to bad memory!\n");
 230 
 231                 /* mark memory page bad */
 232                 memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
 233         }
 234 }
 235 
 236 
 237 /*
 238  * This is the PDT kernel thread main loop.
 239  */
 240 
 241 static int pdt_mainloop(void *unused)
 242 {
 243         struct pdc_mem_read_pdt pdt_read_ret;
 244         struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused;
 245         unsigned long old_num_entries;
 246         unsigned long *bad_mem_ptr;
 247         int num, ret;
 248 
 249         for (;;) {
 250                 set_current_state(TASK_INTERRUPTIBLE);
 251 
 252                 old_num_entries = pdt_status.pdt_entries;
 253 
 254                 schedule_timeout(pdt_poll_interval);
 255                 if (kthread_should_stop())
 256                         break;
 257 
 258                 /* Do we have new PDT entries? */
 259                 switch (pdt_type) {
 260                 case PDT_PAT_NEW:
 261                         ret = get_info_pat_new();
 262                         break;
 263                 case PDT_PAT_CELL:
 264                         ret = get_info_pat_cell();
 265                         break;
 266                 default:
 267                         ret = pdc_mem_pdt_info(&pdt_status);
 268                         break;
 269                 }
 270 
 271                 if (ret != PDC_OK) {
 272                         pr_warn("PDT: unexpected failure %d\n", ret);
 273                         return -EINVAL;
 274                 }
 275 
 276                 /* if no new PDT entries, just wait again */
 277                 num = pdt_status.pdt_entries - old_num_entries;
 278                 if (num <= 0)
 279                         continue;
 280 
 281                 /* decrease poll interval in case we found memory errors */
 282                 if (pdt_status.pdt_entries &&
 283                         pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT)
 284                         pdt_poll_interval = PDT_POLL_INTERVAL_SHORT;
 285 
 286                 /* limit entries to get */
 287                 if (num > MAX_PDT_ENTRIES) {
 288                         num = MAX_PDT_ENTRIES;
 289                         pdt_status.pdt_entries = old_num_entries + num;
 290                 }
 291 
 292                 /* get new entries */
 293                 switch (pdt_type) {
 294 #ifdef CONFIG_64BIT
 295                 case PDT_PAT_CELL:
 296                         if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) {
 297                                 pr_crit("PDT: too many entries.\n");
 298                                 return -ENOMEM;
 299                         }
 300                         ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
 301                                 MAX_PDT_ENTRIES);
 302                         bad_mem_ptr = &pdt_entry[old_num_entries];
 303                         break;
 304                 case PDT_PAT_NEW:
 305                         ret = pdc_pat_mem_read_pd_pdt(&pat_pret,
 306                                 pdt_entry,
 307                                 num * sizeof(unsigned long),
 308                                 old_num_entries * sizeof(unsigned long));
 309                         bad_mem_ptr = &pdt_entry[0];
 310                         break;
 311 #endif
 312                 default:
 313                         ret = pdc_mem_pdt_read_entries(&pdt_read_ret,
 314                                 pdt_entry);
 315                         bad_mem_ptr = &pdt_entry[old_num_entries];
 316                         break;
 317                 }
 318 
 319                 /* report and mark memory broken */
 320                 while (num--) {
 321                         unsigned long pde = *bad_mem_ptr++;
 322 
 323                         report_mem_err(pde);
 324 
 325 #ifdef CONFIG_MEMORY_FAILURE
 326                         if ((pde & PDT_ADDR_PERM_ERR) ||
 327                             ((pde & PDT_ADDR_SINGLE_ERR) == 0))
 328                                 memory_failure(pde >> PAGE_SHIFT, 0);
 329                         else
 330                                 soft_offline_page(
 331                                         pfn_to_page(pde >> PAGE_SHIFT), 0);
 332 #else
 333                         pr_crit("PDT: memory error at 0x%lx ignored.\n"
 334                                 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
 335                                 "for real handling.\n",
 336                                 pde & PDT_ADDR_PHYS_MASK);
 337 #endif
 338 
 339                 }
 340         }
 341 
 342         return 0;
 343 }
 344 
 345 
 346 static int __init pdt_initcall(void)
 347 {
 348         struct task_struct *kpdtd_task;
 349 
 350         if (pdt_type == PDT_NONE)
 351                 return -ENODEV;
 352 
 353         kpdtd_task = kthread_create(pdt_mainloop, NULL, "kpdtd");
 354         if (IS_ERR(kpdtd_task))
 355                 return PTR_ERR(kpdtd_task);
 356 
 357         wake_up_process(kpdtd_task);
 358 
 359         return 0;
 360 }
 361 
 362 late_initcall(pdt_initcall);

/* [<][>][^][v][top][bottom][index][help] */