root/arch/powerpc/kernel/rtasd.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rtas_event_type
  2. printk_log_rtas
  3. log_rtas_len
  4. pSeries_log_error
  5. handle_prrn_event
  6. handle_rtas_event
  7. handle_rtas_event
  8. rtas_log_open
  9. rtas_log_release
  10. rtas_log_read
  11. rtas_log_poll
  12. enable_surveillance
  13. do_event_scan
  14. rtas_event_scan
  15. retrieve_nvram_error_log
  16. retrieve_nvram_error_log
  17. start_event_scan
  18. rtas_cancel_event_scan
  19. rtas_event_scan_init
  20. rtas_init
  21. surveillance_setup
  22. rtasmsgs_setup

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
   4  *
   5  * Communication to userspace based on kernel/printk.c
   6  */
   7 
   8 #include <linux/types.h>
   9 #include <linux/errno.h>
  10 #include <linux/sched.h>
  11 #include <linux/kernel.h>
  12 #include <linux/poll.h>
  13 #include <linux/proc_fs.h>
  14 #include <linux/init.h>
  15 #include <linux/vmalloc.h>
  16 #include <linux/spinlock.h>
  17 #include <linux/cpu.h>
  18 #include <linux/workqueue.h>
  19 #include <linux/slab.h>
  20 #include <linux/topology.h>
  21 
  22 #include <linux/uaccess.h>
  23 #include <asm/io.h>
  24 #include <asm/rtas.h>
  25 #include <asm/prom.h>
  26 #include <asm/nvram.h>
  27 #include <linux/atomic.h>
  28 #include <asm/machdep.h>
  29 #include <asm/topology.h>
  30 
  31 
  32 static DEFINE_SPINLOCK(rtasd_log_lock);
  33 
  34 static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
  35 
  36 static char *rtas_log_buf;
  37 static unsigned long rtas_log_start;
  38 static unsigned long rtas_log_size;
  39 
  40 static int surveillance_timeout = -1;
  41 
  42 static unsigned int rtas_error_log_max;
  43 static unsigned int rtas_error_log_buffer_max;
  44 
  45 /* RTAS service tokens */
  46 static unsigned int event_scan;
  47 static unsigned int rtas_event_scan_rate;
  48 
  49 static bool full_rtas_msgs;
  50 
  51 /* Stop logging to nvram after first fatal error */
  52 static int logging_enabled; /* Until we initialize everything,
  53                              * make sure we don't try logging
  54                              * anything */
  55 static int error_log_cnt;
  56 
  57 /*
  58  * Since we use 32 bit RTAS, the physical address of this must be below
  59  * 4G or else bad things happen. Allocate this in the kernel data and
  60  * make it big enough.
  61  */
  62 static unsigned char logdata[RTAS_ERROR_LOG_MAX];
  63 
  64 static char *rtas_type[] = {
  65         "Unknown", "Retry", "TCE Error", "Internal Device Failure",
  66         "Timeout", "Data Parity", "Address Parity", "Cache Parity",
  67         "Address Invalid", "ECC Uncorrected", "ECC Corrupted",
  68 };
  69 
  70 static char *rtas_event_type(int type)
  71 {
  72         if ((type > 0) && (type < 11))
  73                 return rtas_type[type];
  74 
  75         switch (type) {
  76                 case RTAS_TYPE_EPOW:
  77                         return "EPOW";
  78                 case RTAS_TYPE_PLATFORM:
  79                         return "Platform Error";
  80                 case RTAS_TYPE_IO:
  81                         return "I/O Event";
  82                 case RTAS_TYPE_INFO:
  83                         return "Platform Information Event";
  84                 case RTAS_TYPE_DEALLOC:
  85                         return "Resource Deallocation Event";
  86                 case RTAS_TYPE_DUMP:
  87                         return "Dump Notification Event";
  88                 case RTAS_TYPE_PRRN:
  89                         return "Platform Resource Reassignment Event";
  90                 case RTAS_TYPE_HOTPLUG:
  91                         return "Hotplug Event";
  92         }
  93 
  94         return rtas_type[0];
  95 }
  96 
  97 /* To see this info, grep RTAS /var/log/messages and each entry
  98  * will be collected together with obvious begin/end.
  99  * There will be a unique identifier on the begin and end lines.
 100  * This will persist across reboots.
 101  *
 102  * format of error logs returned from RTAS:
 103  * bytes        (size)  : contents
 104  * --------------------------------------------------------
 105  * 0-7          (8)     : rtas_error_log
 106  * 8-47         (40)    : extended info
 107  * 48-51        (4)     : vendor id
 108  * 52-1023 (vendor specific) : location code and debug data
 109  */
 110 static void printk_log_rtas(char *buf, int len)
 111 {
 112 
 113         int i,j,n = 0;
 114         int perline = 16;
 115         char buffer[64];
 116         char * str = "RTAS event";
 117 
 118         if (full_rtas_msgs) {
 119                 printk(RTAS_DEBUG "%d -------- %s begin --------\n",
 120                        error_log_cnt, str);
 121 
 122                 /*
 123                  * Print perline bytes on each line, each line will start
 124                  * with RTAS and a changing number, so syslogd will
 125                  * print lines that are otherwise the same.  Separate every
 126                  * 4 bytes with a space.
 127                  */
 128                 for (i = 0; i < len; i++) {
 129                         j = i % perline;
 130                         if (j == 0) {
 131                                 memset(buffer, 0, sizeof(buffer));
 132                                 n = sprintf(buffer, "RTAS %d:", i/perline);
 133                         }
 134 
 135                         if ((i % 4) == 0)
 136                                 n += sprintf(buffer+n, " ");
 137 
 138                         n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
 139 
 140                         if (j == (perline-1))
 141                                 printk(KERN_DEBUG "%s\n", buffer);
 142                 }
 143                 if ((i % perline) != 0)
 144                         printk(KERN_DEBUG "%s\n", buffer);
 145 
 146                 printk(RTAS_DEBUG "%d -------- %s end ----------\n",
 147                        error_log_cnt, str);
 148         } else {
 149                 struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
 150 
 151                 printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
 152                        error_log_cnt,
 153                        rtas_event_type(rtas_error_type(errlog)),
 154                        rtas_error_type(errlog),
 155                        rtas_error_severity(errlog));
 156         }
 157 }
 158 
 159 static int log_rtas_len(char * buf)
 160 {
 161         int len;
 162         struct rtas_error_log *err;
 163         uint32_t extended_log_length;
 164 
 165         /* rtas fixed header */
 166         len = 8;
 167         err = (struct rtas_error_log *)buf;
 168         extended_log_length = rtas_error_extended_log_length(err);
 169         if (rtas_error_extended(err) && extended_log_length) {
 170 
 171                 /* extended header */
 172                 len += extended_log_length;
 173         }
 174 
 175         if (rtas_error_log_max == 0)
 176                 rtas_error_log_max = rtas_get_error_log_max();
 177 
 178         if (len > rtas_error_log_max)
 179                 len = rtas_error_log_max;
 180 
 181         return len;
 182 }
 183 
 184 /*
 185  * First write to nvram, if fatal error, that is the only
 186  * place we log the info.  The error will be picked up
 187  * on the next reboot by rtasd.  If not fatal, run the
 188  * method for the type of error.  Currently, only RTAS
 189  * errors have methods implemented, but in the future
 190  * there might be a need to store data in nvram before a
 191  * call to panic().
 192  *
 193  * XXX We write to nvram periodically, to indicate error has
 194  * been written and sync'd, but there is a possibility
 195  * that if we don't shutdown correctly, a duplicate error
 196  * record will be created on next reboot.
 197  */
 198 void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 199 {
 200         unsigned long offset;
 201         unsigned long s;
 202         int len = 0;
 203 
 204         pr_debug("rtasd: logging event\n");
 205         if (buf == NULL)
 206                 return;
 207 
 208         spin_lock_irqsave(&rtasd_log_lock, s);
 209 
 210         /* get length and increase count */
 211         switch (err_type & ERR_TYPE_MASK) {
 212         case ERR_TYPE_RTAS_LOG:
 213                 len = log_rtas_len(buf);
 214                 if (!(err_type & ERR_FLAG_BOOT))
 215                         error_log_cnt++;
 216                 break;
 217         case ERR_TYPE_KERNEL_PANIC:
 218         default:
 219                 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 220                 spin_unlock_irqrestore(&rtasd_log_lock, s);
 221                 return;
 222         }
 223 
 224 #ifdef CONFIG_PPC64
 225         /* Write error to NVRAM */
 226         if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
 227                 nvram_write_error_log(buf, len, err_type, error_log_cnt);
 228 #endif /* CONFIG_PPC64 */
 229 
 230         /*
 231          * rtas errors can occur during boot, and we do want to capture
 232          * those somewhere, even if nvram isn't ready (why not?), and even
 233          * if rtasd isn't ready. Put them into the boot log, at least.
 234          */
 235         if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
 236                 printk_log_rtas(buf, len);
 237 
 238         /* Check to see if we need to or have stopped logging */
 239         if (fatal || !logging_enabled) {
 240                 logging_enabled = 0;
 241                 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 242                 spin_unlock_irqrestore(&rtasd_log_lock, s);
 243                 return;
 244         }
 245 
 246         /* call type specific method for error */
 247         switch (err_type & ERR_TYPE_MASK) {
 248         case ERR_TYPE_RTAS_LOG:
 249                 offset = rtas_error_log_buffer_max *
 250                         ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
 251 
 252                 /* First copy over sequence number */
 253                 memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
 254 
 255                 /* Second copy over error log data */
 256                 offset += sizeof(int);
 257                 memcpy(&rtas_log_buf[offset], buf, len);
 258 
 259                 if (rtas_log_size < LOG_NUMBER)
 260                         rtas_log_size += 1;
 261                 else
 262                         rtas_log_start += 1;
 263 
 264                 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 265                 spin_unlock_irqrestore(&rtasd_log_lock, s);
 266                 wake_up_interruptible(&rtas_log_wait);
 267                 break;
 268         case ERR_TYPE_KERNEL_PANIC:
 269         default:
 270                 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 271                 spin_unlock_irqrestore(&rtasd_log_lock, s);
 272                 return;
 273         }
 274 }
 275 
 276 #ifdef CONFIG_PPC_PSERIES
 277 static void handle_prrn_event(s32 scope)
 278 {
 279         /*
 280          * For PRRN, we must pass the negative of the scope value in
 281          * the RTAS event.
 282          */
 283         pseries_devicetree_update(-scope);
 284         numa_update_cpu_topology(false);
 285 }
 286 
 287 static void handle_rtas_event(const struct rtas_error_log *log)
 288 {
 289         if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
 290                 return;
 291 
 292         /* For PRRN Events the extended log length is used to denote
 293          * the scope for calling rtas update-nodes.
 294          */
 295         handle_prrn_event(rtas_error_extended_log_length(log));
 296 }
 297 
 298 #else
 299 
 300 static void handle_rtas_event(const struct rtas_error_log *log)
 301 {
 302         return;
 303 }
 304 
 305 #endif
 306 
 307 static int rtas_log_open(struct inode * inode, struct file * file)
 308 {
 309         return 0;
 310 }
 311 
 312 static int rtas_log_release(struct inode * inode, struct file * file)
 313 {
 314         return 0;
 315 }
 316 
 317 /* This will check if all events are logged, if they are then, we
 318  * know that we can safely clear the events in NVRAM.
 319  * Next we'll sit and wait for something else to log.
 320  */
 321 static ssize_t rtas_log_read(struct file * file, char __user * buf,
 322                          size_t count, loff_t *ppos)
 323 {
 324         int error;
 325         char *tmp;
 326         unsigned long s;
 327         unsigned long offset;
 328 
 329         if (!buf || count < rtas_error_log_buffer_max)
 330                 return -EINVAL;
 331 
 332         count = rtas_error_log_buffer_max;
 333 
 334         if (!access_ok(buf, count))
 335                 return -EFAULT;
 336 
 337         tmp = kmalloc(count, GFP_KERNEL);
 338         if (!tmp)
 339                 return -ENOMEM;
 340 
 341         spin_lock_irqsave(&rtasd_log_lock, s);
 342 
 343         /* if it's 0, then we know we got the last one (the one in NVRAM) */
 344         while (rtas_log_size == 0) {
 345                 if (file->f_flags & O_NONBLOCK) {
 346                         spin_unlock_irqrestore(&rtasd_log_lock, s);
 347                         error = -EAGAIN;
 348                         goto out;
 349                 }
 350 
 351                 if (!logging_enabled) {
 352                         spin_unlock_irqrestore(&rtasd_log_lock, s);
 353                         error = -ENODATA;
 354                         goto out;
 355                 }
 356 #ifdef CONFIG_PPC64
 357                 nvram_clear_error_log();
 358 #endif /* CONFIG_PPC64 */
 359 
 360                 spin_unlock_irqrestore(&rtasd_log_lock, s);
 361                 error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
 362                 if (error)
 363                         goto out;
 364                 spin_lock_irqsave(&rtasd_log_lock, s);
 365         }
 366 
 367         offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
 368         memcpy(tmp, &rtas_log_buf[offset], count);
 369 
 370         rtas_log_start += 1;
 371         rtas_log_size -= 1;
 372         spin_unlock_irqrestore(&rtasd_log_lock, s);
 373 
 374         error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
 375 out:
 376         kfree(tmp);
 377         return error;
 378 }
 379 
 380 static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
 381 {
 382         poll_wait(file, &rtas_log_wait, wait);
 383         if (rtas_log_size)
 384                 return EPOLLIN | EPOLLRDNORM;
 385         return 0;
 386 }
 387 
 388 static const struct file_operations proc_rtas_log_operations = {
 389         .read =         rtas_log_read,
 390         .poll =         rtas_log_poll,
 391         .open =         rtas_log_open,
 392         .release =      rtas_log_release,
 393         .llseek =       noop_llseek,
 394 };
 395 
 396 static int enable_surveillance(int timeout)
 397 {
 398         int error;
 399 
 400         error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
 401 
 402         if (error == 0)
 403                 return 0;
 404 
 405         if (error == -EINVAL) {
 406                 printk(KERN_DEBUG "rtasd: surveillance not supported\n");
 407                 return 0;
 408         }
 409 
 410         printk(KERN_ERR "rtasd: could not update surveillance\n");
 411         return -1;
 412 }
 413 
 414 static void do_event_scan(void)
 415 {
 416         int error;
 417         do {
 418                 memset(logdata, 0, rtas_error_log_max);
 419                 error = rtas_call(event_scan, 4, 1, NULL,
 420                                   RTAS_EVENT_SCAN_ALL_EVENTS, 0,
 421                                   __pa(logdata), rtas_error_log_max);
 422                 if (error == -1) {
 423                         printk(KERN_ERR "event-scan failed\n");
 424                         break;
 425                 }
 426 
 427                 if (error == 0) {
 428                         if (rtas_error_type((struct rtas_error_log *)logdata) !=
 429                             RTAS_TYPE_PRRN)
 430                                 pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG,
 431                                                   0);
 432                         handle_rtas_event((struct rtas_error_log *)logdata);
 433                 }
 434 
 435         } while(error == 0);
 436 }
 437 
 438 static void rtas_event_scan(struct work_struct *w);
 439 static DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
 440 
 441 /*
 442  * Delay should be at least one second since some machines have problems if
 443  * we call event-scan too quickly.
 444  */
 445 static unsigned long event_scan_delay = 1*HZ;
 446 static int first_pass = 1;
 447 
 448 static void rtas_event_scan(struct work_struct *w)
 449 {
 450         unsigned int cpu;
 451 
 452         do_event_scan();
 453 
 454         get_online_cpus();
 455 
 456         /* raw_ OK because just using CPU as starting point. */
 457         cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
 458         if (cpu >= nr_cpu_ids) {
 459                 cpu = cpumask_first(cpu_online_mask);
 460 
 461                 if (first_pass) {
 462                         first_pass = 0;
 463                         event_scan_delay = 30*HZ/rtas_event_scan_rate;
 464 
 465                         if (surveillance_timeout != -1) {
 466                                 pr_debug("rtasd: enabling surveillance\n");
 467                                 enable_surveillance(surveillance_timeout);
 468                                 pr_debug("rtasd: surveillance enabled\n");
 469                         }
 470                 }
 471         }
 472 
 473         schedule_delayed_work_on(cpu, &event_scan_work,
 474                 __round_jiffies_relative(event_scan_delay, cpu));
 475 
 476         put_online_cpus();
 477 }
 478 
 479 #ifdef CONFIG_PPC64
 480 static void retrieve_nvram_error_log(void)
 481 {
 482         unsigned int err_type ;
 483         int rc ;
 484 
 485         /* See if we have any error stored in NVRAM */
 486         memset(logdata, 0, rtas_error_log_max);
 487         rc = nvram_read_error_log(logdata, rtas_error_log_max,
 488                                   &err_type, &error_log_cnt);
 489         /* We can use rtas_log_buf now */
 490         logging_enabled = 1;
 491         if (!rc) {
 492                 if (err_type != ERR_FLAG_ALREADY_LOGGED) {
 493                         pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
 494                 }
 495         }
 496 }
 497 #else /* CONFIG_PPC64 */
 498 static void retrieve_nvram_error_log(void)
 499 {
 500 }
 501 #endif /* CONFIG_PPC64 */
 502 
 503 static void start_event_scan(void)
 504 {
 505         printk(KERN_DEBUG "RTAS daemon started\n");
 506         pr_debug("rtasd: will sleep for %d milliseconds\n",
 507                  (30000 / rtas_event_scan_rate));
 508 
 509         /* Retrieve errors from nvram if any */
 510         retrieve_nvram_error_log();
 511 
 512         schedule_delayed_work_on(cpumask_first(cpu_online_mask),
 513                                  &event_scan_work, event_scan_delay);
 514 }
 515 
 516 /* Cancel the rtas event scan work */
 517 void rtas_cancel_event_scan(void)
 518 {
 519         cancel_delayed_work_sync(&event_scan_work);
 520 }
 521 EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
 522 
 523 static int __init rtas_event_scan_init(void)
 524 {
 525         if (!machine_is(pseries) && !machine_is(chrp))
 526                 return 0;
 527 
 528         /* No RTAS */
 529         event_scan = rtas_token("event-scan");
 530         if (event_scan == RTAS_UNKNOWN_SERVICE) {
 531                 printk(KERN_INFO "rtasd: No event-scan on system\n");
 532                 return -ENODEV;
 533         }
 534 
 535         rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
 536         if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
 537                 printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
 538                 return -ENODEV;
 539         }
 540 
 541         if (!rtas_event_scan_rate) {
 542                 /* Broken firmware: take a rate of zero to mean don't scan */
 543                 printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n");
 544                 return 0;
 545         }
 546 
 547         /* Make room for the sequence number */
 548         rtas_error_log_max = rtas_get_error_log_max();
 549         rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
 550 
 551         rtas_log_buf = vmalloc(array_size(LOG_NUMBER,
 552                                           rtas_error_log_buffer_max));
 553         if (!rtas_log_buf) {
 554                 printk(KERN_ERR "rtasd: no memory\n");
 555                 return -ENOMEM;
 556         }
 557 
 558         start_event_scan();
 559 
 560         return 0;
 561 }
 562 arch_initcall(rtas_event_scan_init);
 563 
 564 static int __init rtas_init(void)
 565 {
 566         struct proc_dir_entry *entry;
 567 
 568         if (!machine_is(pseries) && !machine_is(chrp))
 569                 return 0;
 570 
 571         if (!rtas_log_buf)
 572                 return -ENODEV;
 573 
 574         entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
 575                             &proc_rtas_log_operations);
 576         if (!entry)
 577                 printk(KERN_ERR "Failed to create error_log proc entry\n");
 578 
 579         return 0;
 580 }
 581 __initcall(rtas_init);
 582 
 583 static int __init surveillance_setup(char *str)
 584 {
 585         int i;
 586 
 587         /* We only do surveillance on pseries */
 588         if (!machine_is(pseries))
 589                 return 0;
 590 
 591         if (get_option(&str,&i)) {
 592                 if (i >= 0 && i <= 255)
 593                         surveillance_timeout = i;
 594         }
 595 
 596         return 1;
 597 }
 598 __setup("surveillance=", surveillance_setup);
 599 
 600 static int __init rtasmsgs_setup(char *str)
 601 {
 602         return (kstrtobool(str, &full_rtas_msgs) == 0);
 603 }
 604 __setup("rtasmsgs=", rtasmsgs_setup);

/* [<][>][^][v][top][bottom][index][help] */