root/arch/powerpc/platforms/powernv/opal.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_configure_cores
  2. early_init_dt_scan_opal
  3. early_init_dt_scan_recoverable_ranges
  4. opal_register_exception_handlers
  5. opal_message_notifier_register
  6. opal_message_notifier_unregister
  7. opal_message_do_notify
  8. opal_handle_message
  9. opal_message_notify
  10. opal_message_init
  11. opal_get_chars
  12. __opal_put_chars
  13. opal_put_chars
  14. opal_put_chars_atomic
  15. __opal_flush_console
  16. opal_flush_console
  17. opal_flush_chars
  18. opal_recover_mce
  19. pnv_platform_error_reboot
  20. opal_machine_check
  21. opal_hmi_exception_early
  22. opal_hmi_exception_early2
  23. opal_handle_hmi_exception
  24. find_recovery_address
  25. opal_mce_check_early_recovery
  26. opal_sysfs_init
  27. symbol_map_read
  28. opal_export_symmap
  29. export_attr_read
  30. opal_export_attrs
  31. opal_dump_region_init
  32. opal_pdev_init
  33. opal_imc_init_dev
  34. kopald
  35. opal_wake_poller
  36. opal_init_heartbeat
  37. opal_init
  38. opal_shutdown
  39. opal_vmalloc_to_sg_list
  40. opal_free_sg_list
  41. opal_error_code
  42. powernv_set_nmmu_ptcr

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * PowerNV OPAL high level interfaces
   4  *
   5  * Copyright 2011 IBM Corp.
   6  */
   7 
   8 #define pr_fmt(fmt)     "opal: " fmt
   9 
  10 #include <linux/printk.h>
  11 #include <linux/types.h>
  12 #include <linux/of.h>
  13 #include <linux/of_fdt.h>
  14 #include <linux/of_platform.h>
  15 #include <linux/of_address.h>
  16 #include <linux/interrupt.h>
  17 #include <linux/notifier.h>
  18 #include <linux/slab.h>
  19 #include <linux/sched.h>
  20 #include <linux/kobject.h>
  21 #include <linux/delay.h>
  22 #include <linux/memblock.h>
  23 #include <linux/kthread.h>
  24 #include <linux/freezer.h>
  25 #include <linux/kmsg_dump.h>
  26 #include <linux/console.h>
  27 #include <linux/sched/debug.h>
  28 
  29 #include <asm/machdep.h>
  30 #include <asm/opal.h>
  31 #include <asm/firmware.h>
  32 #include <asm/mce.h>
  33 #include <asm/imc-pmu.h>
  34 #include <asm/bug.h>
  35 
  36 #include "powernv.h"
  37 
  38 /* /sys/firmware/opal */
  39 struct kobject *opal_kobj;
  40 
  41 struct opal {
  42         u64 base;
  43         u64 entry;
  44         u64 size;
  45 } opal;
  46 
  47 struct mcheck_recoverable_range {
  48         u64 start_addr;
  49         u64 end_addr;
  50         u64 recover_addr;
  51 };
  52 
  53 static struct mcheck_recoverable_range *mc_recoverable_range;
  54 static int mc_recoverable_range_len;
  55 
  56 struct device_node *opal_node;
  57 static DEFINE_SPINLOCK(opal_write_lock);
  58 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
  59 static uint32_t opal_heartbeat;
  60 static struct task_struct *kopald_tsk;
  61 static struct opal_msg *opal_msg;
  62 static u32 opal_msg_size __ro_after_init;
  63 
  64 void opal_configure_cores(void)
  65 {
  66         u64 reinit_flags = 0;
  67 
  68         /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
  69          *
  70          * It will preserve non volatile GPRs and HSPRG0/1. It will
  71          * also restore HIDs and other SPRs to their original value
  72          * but it might clobber a bunch.
  73          */
  74 #ifdef __BIG_ENDIAN__
  75         reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
  76 #else
  77         reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
  78 #endif
  79 
  80         /*
  81          * POWER9 always support running hash:
  82          *  ie. Host hash  supports  hash guests
  83          *      Host radix supports  hash/radix guests
  84          */
  85         if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
  86                 reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
  87                 if (early_radix_enabled())
  88                         reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
  89         }
  90 
  91         opal_reinit_cpus(reinit_flags);
  92 
  93         /* Restore some bits */
  94         if (cur_cpu_spec->cpu_restore)
  95                 cur_cpu_spec->cpu_restore();
  96 }
  97 
  98 int __init early_init_dt_scan_opal(unsigned long node,
  99                                    const char *uname, int depth, void *data)
 100 {
 101         const void *basep, *entryp, *sizep;
 102         int basesz, entrysz, runtimesz;
 103 
 104         if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 105                 return 0;
 106 
 107         basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
 108         entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
 109         sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
 110 
 111         if (!basep || !entryp || !sizep)
 112                 return 1;
 113 
 114         opal.base = of_read_number(basep, basesz/4);
 115         opal.entry = of_read_number(entryp, entrysz/4);
 116         opal.size = of_read_number(sizep, runtimesz/4);
 117 
 118         pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 119                  opal.base, basep, basesz);
 120         pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
 121                  opal.entry, entryp, entrysz);
 122         pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
 123                  opal.size, sizep, runtimesz);
 124 
 125         if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
 126                 powerpc_firmware_features |= FW_FEATURE_OPAL;
 127                 pr_debug("OPAL detected !\n");
 128         } else {
 129                 panic("OPAL != V3 detected, no longer supported.\n");
 130         }
 131 
 132         return 1;
 133 }
 134 
 135 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 136                                    const char *uname, int depth, void *data)
 137 {
 138         int i, psize, size;
 139         const __be32 *prop;
 140 
 141         if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 142                 return 0;
 143 
 144         prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
 145 
 146         if (!prop)
 147                 return 1;
 148 
 149         pr_debug("Found machine check recoverable ranges.\n");
 150 
 151         /*
 152          * Calculate number of available entries.
 153          *
 154          * Each recoverable address range entry is (start address, len,
 155          * recovery address), 2 cells each for start and recovery address,
 156          * 1 cell for len, totalling 5 cells per entry.
 157          */
 158         mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
 159 
 160         /* Sanity check */
 161         if (!mc_recoverable_range_len)
 162                 return 1;
 163 
 164         /* Size required to hold all the entries. */
 165         size = mc_recoverable_range_len *
 166                         sizeof(struct mcheck_recoverable_range);
 167 
 168         /*
 169          * Allocate a buffer to hold the MC recoverable ranges.
 170          */
 171         mc_recoverable_range = memblock_alloc(size, __alignof__(u64));
 172         if (!mc_recoverable_range)
 173                 panic("%s: Failed to allocate %u bytes align=0x%lx\n",
 174                       __func__, size, __alignof__(u64));
 175 
 176         for (i = 0; i < mc_recoverable_range_len; i++) {
 177                 mc_recoverable_range[i].start_addr =
 178                                         of_read_number(prop + (i * 5) + 0, 2);
 179                 mc_recoverable_range[i].end_addr =
 180                                         mc_recoverable_range[i].start_addr +
 181                                         of_read_number(prop + (i * 5) + 2, 1);
 182                 mc_recoverable_range[i].recover_addr =
 183                                         of_read_number(prop + (i * 5) + 3, 2);
 184 
 185                 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
 186                                 mc_recoverable_range[i].start_addr,
 187                                 mc_recoverable_range[i].end_addr,
 188                                 mc_recoverable_range[i].recover_addr);
 189         }
 190         return 1;
 191 }
 192 
 193 static int __init opal_register_exception_handlers(void)
 194 {
 195 #ifdef __BIG_ENDIAN__
 196         u64 glue;
 197 
 198         if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
 199                 return -ENODEV;
 200 
 201         /* Hookup some exception handlers except machine check. We use the
 202          * fwnmi area at 0x7000 to provide the glue space to OPAL
 203          */
 204         glue = 0x7000;
 205 
 206         /*
 207          * Only ancient OPAL firmware requires this.
 208          * Specifically, firmware from FW810.00 (released June 2014)
 209          * through FW810.20 (Released October 2014).
 210          *
 211          * Check if we are running on newer (post Oct 2014) firmware that
 212          * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to
 213          * patch the HMI interrupt and we catch it directly in Linux.
 214          *
 215          * For older firmware (i.e < FW810.20), we fallback to old behavior and
 216          * let OPAL patch the HMI vector and handle it inside OPAL firmware.
 217          *
 218          * For newer firmware we catch/handle the HMI directly in Linux.
 219          */
 220         if (!opal_check_token(OPAL_HANDLE_HMI)) {
 221                 pr_info("Old firmware detected, OPAL handles HMIs.\n");
 222                 opal_register_exception_handler(
 223                                 OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
 224                                 0, glue);
 225                 glue += 128;
 226         }
 227 
 228         /*
 229          * Only applicable to ancient firmware, all modern
 230          * (post March 2015/skiboot 5.0) firmware will just return
 231          * OPAL_UNSUPPORTED.
 232          */
 233         opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
 234 #endif
 235 
 236         return 0;
 237 }
 238 machine_early_initcall(powernv, opal_register_exception_handlers);
 239 
 240 /*
 241  * Opal message notifier based on message type. Allow subscribers to get
 242  * notified for specific messgae type.
 243  */
 244 int opal_message_notifier_register(enum opal_msg_type msg_type,
 245                                         struct notifier_block *nb)
 246 {
 247         if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
 248                 pr_warn("%s: Invalid arguments, msg_type:%d\n",
 249                         __func__, msg_type);
 250                 return -EINVAL;
 251         }
 252 
 253         return atomic_notifier_chain_register(
 254                                 &opal_msg_notifier_head[msg_type], nb);
 255 }
 256 EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 257 
 258 int opal_message_notifier_unregister(enum opal_msg_type msg_type,
 259                                      struct notifier_block *nb)
 260 {
 261         return atomic_notifier_chain_unregister(
 262                         &opal_msg_notifier_head[msg_type], nb);
 263 }
 264 EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 265 
 266 static void opal_message_do_notify(uint32_t msg_type, void *msg)
 267 {
 268         /* notify subscribers */
 269         atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
 270                                         msg_type, msg);
 271 }
 272 
 273 static void opal_handle_message(void)
 274 {
 275         s64 ret;
 276         u32 type;
 277 
 278         ret = opal_get_msg(__pa(opal_msg), opal_msg_size);
 279         /* No opal message pending. */
 280         if (ret == OPAL_RESOURCE)
 281                 return;
 282 
 283         /* check for errors. */
 284         if (ret) {
 285                 pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
 286                         __func__, ret);
 287                 return;
 288         }
 289 
 290         type = be32_to_cpu(opal_msg->msg_type);
 291 
 292         /* Sanity check */
 293         if (type >= OPAL_MSG_TYPE_MAX) {
 294                 pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
 295                 return;
 296         }
 297         opal_message_do_notify(type, (void *)opal_msg);
 298 }
 299 
 300 static irqreturn_t opal_message_notify(int irq, void *data)
 301 {
 302         opal_handle_message();
 303         return IRQ_HANDLED;
 304 }
 305 
 306 static int __init opal_message_init(struct device_node *opal_node)
 307 {
 308         int ret, i, irq;
 309 
 310         ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size);
 311         if (ret) {
 312                 pr_notice("Failed to read opal-msg-size property\n");
 313                 opal_msg_size = sizeof(struct opal_msg);
 314         }
 315 
 316         opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
 317         if (!opal_msg) {
 318                 opal_msg_size = sizeof(struct opal_msg);
 319                 /* Try to allocate fixed message size */
 320                 opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
 321                 BUG_ON(opal_msg == NULL);
 322         }
 323 
 324         for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
 325                 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
 326 
 327         irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
 328         if (!irq) {
 329                 pr_err("%s: Can't register OPAL event irq (%d)\n",
 330                        __func__, irq);
 331                 return irq;
 332         }
 333 
 334         ret = request_irq(irq, opal_message_notify,
 335                         IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
 336         if (ret) {
 337                 pr_err("%s: Can't request OPAL event irq (%d)\n",
 338                        __func__, ret);
 339                 return ret;
 340         }
 341 
 342         return 0;
 343 }
 344 
 345 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 346 {
 347         s64 rc;
 348         __be64 evt, len;
 349 
 350         if (!opal.entry)
 351                 return -ENODEV;
 352         opal_poll_events(&evt);
 353         if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 354                 return 0;
 355         len = cpu_to_be64(count);
 356         rc = opal_console_read(vtermno, &len, buf);
 357         if (rc == OPAL_SUCCESS)
 358                 return be64_to_cpu(len);
 359         return 0;
 360 }
 361 
 362 static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
 363 {
 364         unsigned long flags = 0 /* shut up gcc */;
 365         int written;
 366         __be64 olen;
 367         s64 rc;
 368 
 369         if (!opal.entry)
 370                 return -ENODEV;
 371 
 372         if (atomic)
 373                 spin_lock_irqsave(&opal_write_lock, flags);
 374         rc = opal_console_write_buffer_space(vtermno, &olen);
 375         if (rc || be64_to_cpu(olen) < total_len) {
 376                 /* Closed -> drop characters */
 377                 if (rc)
 378                         written = total_len;
 379                 else
 380                         written = -EAGAIN;
 381                 goto out;
 382         }
 383 
 384         /* Should not get a partial write here because space is available. */
 385         olen = cpu_to_be64(total_len);
 386         rc = opal_console_write(vtermno, &olen, data);
 387         if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 388                 if (rc == OPAL_BUSY_EVENT)
 389                         opal_poll_events(NULL);
 390                 written = -EAGAIN;
 391                 goto out;
 392         }
 393 
 394         /* Closed or other error drop */
 395         if (rc != OPAL_SUCCESS) {
 396                 written = opal_error_code(rc);
 397                 goto out;
 398         }
 399 
 400         written = be64_to_cpu(olen);
 401         if (written < total_len) {
 402                 if (atomic) {
 403                         /* Should not happen */
 404                         pr_warn("atomic console write returned partial "
 405                                 "len=%d written=%d\n", total_len, written);
 406                 }
 407                 if (!written)
 408                         written = -EAGAIN;
 409         }
 410 
 411 out:
 412         if (atomic)
 413                 spin_unlock_irqrestore(&opal_write_lock, flags);
 414 
 415         return written;
 416 }
 417 
 418 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 419 {
 420         return __opal_put_chars(vtermno, data, total_len, false);
 421 }
 422 
 423 /*
 424  * opal_put_chars_atomic will not perform partial-writes. Data will be
 425  * atomically written to the terminal or not at all. This is not strictly
 426  * true at the moment because console space can race with OPAL's console
 427  * writes.
 428  */
 429 int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
 430 {
 431         return __opal_put_chars(vtermno, data, total_len, true);
 432 }
 433 
 434 static s64 __opal_flush_console(uint32_t vtermno)
 435 {
 436         s64 rc;
 437 
 438         if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
 439                 __be64 evt;
 440 
 441                 /*
 442                  * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
 443                  * the console can still be flushed by calling the polling
 444                  * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
 445                  */
 446                 WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
 447 
 448                 opal_poll_events(&evt);
 449                 if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
 450                         return OPAL_SUCCESS;
 451                 return OPAL_BUSY;
 452 
 453         } else {
 454                 rc = opal_console_flush(vtermno);
 455                 if (rc == OPAL_BUSY_EVENT) {
 456                         opal_poll_events(NULL);
 457                         rc = OPAL_BUSY;
 458                 }
 459                 return rc;
 460         }
 461 
 462 }
 463 
 464 /*
 465  * opal_flush_console spins until the console is flushed
 466  */
 467 int opal_flush_console(uint32_t vtermno)
 468 {
 469         for (;;) {
 470                 s64 rc = __opal_flush_console(vtermno);
 471 
 472                 if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
 473                         mdelay(1);
 474                         continue;
 475                 }
 476 
 477                 return opal_error_code(rc);
 478         }
 479 }
 480 
 481 /*
 482  * opal_flush_chars is an hvc interface that sleeps until the console is
 483  * flushed if wait, otherwise it will return -EBUSY if the console has data,
 484  * -EAGAIN if it has data and some of it was flushed.
 485  */
 486 int opal_flush_chars(uint32_t vtermno, bool wait)
 487 {
 488         for (;;) {
 489                 s64 rc = __opal_flush_console(vtermno);
 490 
 491                 if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
 492                         if (wait) {
 493                                 msleep(OPAL_BUSY_DELAY_MS);
 494                                 continue;
 495                         }
 496                         if (rc == OPAL_PARTIAL)
 497                                 return -EAGAIN;
 498                 }
 499 
 500                 return opal_error_code(rc);
 501         }
 502 }
 503 
 504 static int opal_recover_mce(struct pt_regs *regs,
 505                                         struct machine_check_event *evt)
 506 {
 507         int recovered = 0;
 508 
 509         if (!(regs->msr & MSR_RI)) {
 510                 /* If MSR_RI isn't set, we cannot recover */
 511                 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 512                 recovered = 0;
 513         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 514                 /* Platform corrected itself */
 515                 recovered = 1;
 516         } else if (evt->severity == MCE_SEV_FATAL) {
 517                 /* Fatal machine check */
 518                 pr_err("Machine check interrupt is fatal\n");
 519                 recovered = 0;
 520         }
 521 
 522         if (!recovered && evt->sync_error) {
 523                 /*
 524                  * Try to kill processes if we get a synchronous machine check
 525                  * (e.g., one caused by execution of this instruction). This
 526                  * will devolve into a panic if we try to kill init or are in
 527                  * an interrupt etc.
 528                  *
 529                  * TODO: Queue up this address for hwpoisioning later.
 530                  * TODO: This is not quite right for d-side machine
 531                  *       checks ->nip is not necessarily the important
 532                  *       address.
 533                  */
 534                 if ((user_mode(regs))) {
 535                         _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 536                         recovered = 1;
 537                 } else if (die_will_crash()) {
 538                         /*
 539                          * die() would kill the kernel, so better to go via
 540                          * the platform reboot code that will log the
 541                          * machine check.
 542                          */
 543                         recovered = 0;
 544                 } else {
 545                         die("Machine check", regs, SIGBUS);
 546                         recovered = 1;
 547                 }
 548         }
 549 
 550         return recovered;
 551 }
 552 
 553 void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
 554 {
 555         panic_flush_kmsg_start();
 556 
 557         pr_emerg("Hardware platform error: %s\n", msg);
 558         if (regs)
 559                 show_regs(regs);
 560         smp_send_stop();
 561 
 562         panic_flush_kmsg_end();
 563 
 564         /*
 565          * Don't bother to shut things down because this will
 566          * xstop the system.
 567          */
 568         if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
 569                                                 == OPAL_UNSUPPORTED) {
 570                 pr_emerg("Reboot type %d not supported for %s\n",
 571                                 OPAL_REBOOT_PLATFORM_ERROR, msg);
 572         }
 573 
 574         /*
 575          * We reached here. There can be three possibilities:
 576          * 1. We are running on a firmware level that do not support
 577          *    opal_cec_reboot2()
 578          * 2. We are running on a firmware level that do not support
 579          *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
 580          * 3. We are running on FSP based system that does not need
 581          *    opal to trigger checkstop explicitly for error analysis.
 582          *    The FSP PRD component would have already got notified
 583          *    about this error through other channels.
 584          * 4. We are running on a newer skiboot that by default does
 585          *    not cause a checkstop, drops us back to the kernel to
 586          *    extract context and state at the time of the error.
 587          */
 588 
 589         panic(msg);
 590 }
 591 
 592 int opal_machine_check(struct pt_regs *regs)
 593 {
 594         struct machine_check_event evt;
 595 
 596         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 597                 return 0;
 598 
 599         /* Print things out */
 600         if (evt.version != MCE_V1) {
 601                 pr_err("Machine Check Exception, Unknown event version %d !\n",
 602                        evt.version);
 603                 return 0;
 604         }
 605         machine_check_print_event_info(&evt, user_mode(regs), false);
 606 
 607         if (opal_recover_mce(regs, &evt))
 608                 return 1;
 609 
 610         pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
 611 }
 612 
 613 /* Early hmi handler called in real mode. */
 614 int opal_hmi_exception_early(struct pt_regs *regs)
 615 {
 616         s64 rc;
 617 
 618         /*
 619          * call opal hmi handler. Pass paca address as token.
 620          * The return value OPAL_SUCCESS is an indication that there is
 621          * an HMI event generated waiting to pull by Linux.
 622          */
 623         rc = opal_handle_hmi();
 624         if (rc == OPAL_SUCCESS) {
 625                 local_paca->hmi_event_available = 1;
 626                 return 1;
 627         }
 628         return 0;
 629 }
 630 
 631 int opal_hmi_exception_early2(struct pt_regs *regs)
 632 {
 633         s64 rc;
 634         __be64 out_flags;
 635 
 636         /*
 637          * call opal hmi handler.
 638          * Check 64-bit flag mask to find out if an event was generated,
 639          * and whether TB is still valid or not etc.
 640          */
 641         rc = opal_handle_hmi2(&out_flags);
 642         if (rc != OPAL_SUCCESS)
 643                 return 0;
 644 
 645         if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
 646                 local_paca->hmi_event_available = 1;
 647         if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
 648                 tb_invalid = true;
 649         return 1;
 650 }
 651 
 652 /* HMI exception handler called in virtual mode during check_irq_replay. */
 653 int opal_handle_hmi_exception(struct pt_regs *regs)
 654 {
 655         /*
 656          * Check if HMI event is available.
 657          * if Yes, then wake kopald to process them.
 658          */
 659         if (!local_paca->hmi_event_available)
 660                 return 0;
 661 
 662         local_paca->hmi_event_available = 0;
 663         opal_wake_poller();
 664 
 665         return 1;
 666 }
 667 
 668 static uint64_t find_recovery_address(uint64_t nip)
 669 {
 670         int i;
 671 
 672         for (i = 0; i < mc_recoverable_range_len; i++)
 673                 if ((nip >= mc_recoverable_range[i].start_addr) &&
 674                     (nip < mc_recoverable_range[i].end_addr))
 675                     return mc_recoverable_range[i].recover_addr;
 676         return 0;
 677 }
 678 
 679 bool opal_mce_check_early_recovery(struct pt_regs *regs)
 680 {
 681         uint64_t recover_addr = 0;
 682 
 683         if (!opal.base || !opal.size)
 684                 goto out;
 685 
 686         if ((regs->nip >= opal.base) &&
 687                         (regs->nip < (opal.base + opal.size)))
 688                 recover_addr = find_recovery_address(regs->nip);
 689 
 690         /*
 691          * Setup regs->nip to rfi into fixup address.
 692          */
 693         if (recover_addr)
 694                 regs->nip = recover_addr;
 695 
 696 out:
 697         return !!recover_addr;
 698 }
 699 
 700 static int opal_sysfs_init(void)
 701 {
 702         opal_kobj = kobject_create_and_add("opal", firmware_kobj);
 703         if (!opal_kobj) {
 704                 pr_warn("kobject_create_and_add opal failed\n");
 705                 return -ENOMEM;
 706         }
 707 
 708         return 0;
 709 }
 710 
 711 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
 712                                struct bin_attribute *bin_attr,
 713                                char *buf, loff_t off, size_t count)
 714 {
 715         return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 716                                        bin_attr->size);
 717 }
 718 
 719 static struct bin_attribute symbol_map_attr = {
 720         .attr = {.name = "symbol_map", .mode = 0400},
 721         .read = symbol_map_read
 722 };
 723 
 724 static void opal_export_symmap(void)
 725 {
 726         const __be64 *syms;
 727         unsigned int size;
 728         struct device_node *fw;
 729         int rc;
 730 
 731         fw = of_find_node_by_path("/ibm,opal/firmware");
 732         if (!fw)
 733                 return;
 734         syms = of_get_property(fw, "symbol-map", &size);
 735         if (!syms || size != 2 * sizeof(__be64))
 736                 return;
 737 
 738         /* Setup attributes */
 739         symbol_map_attr.private = __va(be64_to_cpu(syms[0]));
 740         symbol_map_attr.size = be64_to_cpu(syms[1]);
 741 
 742         rc = sysfs_create_bin_file(opal_kobj, &symbol_map_attr);
 743         if (rc)
 744                 pr_warn("Error %d creating OPAL symbols file\n", rc);
 745 }
 746 
 747 static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
 748                                 struct bin_attribute *bin_attr, char *buf,
 749                                 loff_t off, size_t count)
 750 {
 751         return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 752                                        bin_attr->size);
 753 }
 754 
 755 /*
 756  * opal_export_attrs: creates a sysfs node for each property listed in
 757  * the device-tree under /ibm,opal/firmware/exports/
 758  * All new sysfs nodes are created under /opal/exports/.
 759  * This allows for reserved memory regions (e.g. HDAT) to be read.
 760  * The new sysfs nodes are only readable by root.
 761  */
 762 static void opal_export_attrs(void)
 763 {
 764         struct bin_attribute *attr;
 765         struct device_node *np;
 766         struct property *prop;
 767         struct kobject *kobj;
 768         u64 vals[2];
 769         int rc;
 770 
 771         np = of_find_node_by_path("/ibm,opal/firmware/exports");
 772         if (!np)
 773                 return;
 774 
 775         /* Create new 'exports' directory - /sys/firmware/opal/exports */
 776         kobj = kobject_create_and_add("exports", opal_kobj);
 777         if (!kobj) {
 778                 pr_warn("kobject_create_and_add() of exports failed\n");
 779                 return;
 780         }
 781 
 782         for_each_property_of_node(np, prop) {
 783                 if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
 784                         continue;
 785 
 786                 if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
 787                         continue;
 788 
 789                 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 790 
 791                 if (attr == NULL) {
 792                         pr_warn("Failed kmalloc for bin_attribute!");
 793                         continue;
 794                 }
 795 
 796                 sysfs_bin_attr_init(attr);
 797                 attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
 798                 attr->attr.mode = 0400;
 799                 attr->read = export_attr_read;
 800                 attr->private = __va(vals[0]);
 801                 attr->size = vals[1];
 802 
 803                 if (attr->attr.name == NULL) {
 804                         pr_warn("Failed kstrdup for bin_attribute attr.name");
 805                         kfree(attr);
 806                         continue;
 807                 }
 808 
 809                 rc = sysfs_create_bin_file(kobj, attr);
 810                 if (rc) {
 811                         pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
 812                                  rc, prop->name);
 813                         kfree(attr->attr.name);
 814                         kfree(attr);
 815                 }
 816         }
 817 
 818         of_node_put(np);
 819 }
 820 
 821 static void __init opal_dump_region_init(void)
 822 {
 823         void *addr;
 824         uint64_t size;
 825         int rc;
 826 
 827         if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
 828                 return;
 829 
 830         /* Register kernel log buffer */
 831         addr = log_buf_addr_get();
 832         if (addr == NULL)
 833                 return;
 834 
 835         size = log_buf_len_get();
 836         if (size == 0)
 837                 return;
 838 
 839         rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
 840                                        __pa(addr), size);
 841         /* Don't warn if this is just an older OPAL that doesn't
 842          * know about that call
 843          */
 844         if (rc && rc != OPAL_UNSUPPORTED)
 845                 pr_warn("DUMP: Failed to register kernel log buffer. "
 846                         "rc = %d\n", rc);
 847 }
 848 
 849 static void opal_pdev_init(const char *compatible)
 850 {
 851         struct device_node *np;
 852 
 853         for_each_compatible_node(np, NULL, compatible)
 854                 of_platform_device_create(np, NULL, NULL);
 855 }
 856 
 857 static void __init opal_imc_init_dev(void)
 858 {
 859         struct device_node *np;
 860 
 861         np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
 862         if (np)
 863                 of_platform_device_create(np, NULL, NULL);
 864 }
 865 
 866 static int kopald(void *unused)
 867 {
 868         unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
 869 
 870         set_freezable();
 871         do {
 872                 try_to_freeze();
 873 
 874                 opal_handle_events();
 875 
 876                 set_current_state(TASK_INTERRUPTIBLE);
 877                 if (opal_have_pending_events())
 878                         __set_current_state(TASK_RUNNING);
 879                 else
 880                         schedule_timeout(timeout);
 881 
 882         } while (!kthread_should_stop());
 883 
 884         return 0;
 885 }
 886 
 887 void opal_wake_poller(void)
 888 {
 889         if (kopald_tsk)
 890                 wake_up_process(kopald_tsk);
 891 }
 892 
 893 static void opal_init_heartbeat(void)
 894 {
 895         /* Old firwmware, we assume the HVC heartbeat is sufficient */
 896         if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
 897                                  &opal_heartbeat) != 0)
 898                 opal_heartbeat = 0;
 899 
 900         if (opal_heartbeat)
 901                 kopald_tsk = kthread_run(kopald, NULL, "kopald");
 902 }
 903 
 904 static int __init opal_init(void)
 905 {
 906         struct device_node *np, *consoles, *leds;
 907         int rc;
 908 
 909         opal_node = of_find_node_by_path("/ibm,opal");
 910         if (!opal_node) {
 911                 pr_warn("Device node not found\n");
 912                 return -ENODEV;
 913         }
 914 
 915         /* Register OPAL consoles if any ports */
 916         consoles = of_find_node_by_path("/ibm,opal/consoles");
 917         if (consoles) {
 918                 for_each_child_of_node(consoles, np) {
 919                         if (!of_node_name_eq(np, "serial"))
 920                                 continue;
 921                         of_platform_device_create(np, NULL, NULL);
 922                 }
 923                 of_node_put(consoles);
 924         }
 925 
 926         /* Initialise OPAL messaging system */
 927         opal_message_init(opal_node);
 928 
 929         /* Initialise OPAL asynchronous completion interface */
 930         opal_async_comp_init();
 931 
 932         /* Initialise OPAL sensor interface */
 933         opal_sensor_init();
 934 
 935         /* Initialise OPAL hypervisor maintainence interrupt handling */
 936         opal_hmi_handler_init();
 937 
 938         /* Create i2c platform devices */
 939         opal_pdev_init("ibm,opal-i2c");
 940 
 941         /* Handle non-volatile memory devices */
 942         opal_pdev_init("pmem-region");
 943 
 944         /* Setup a heatbeat thread if requested by OPAL */
 945         opal_init_heartbeat();
 946 
 947         /* Detect In-Memory Collection counters and create devices*/
 948         opal_imc_init_dev();
 949 
 950         /* Create leds platform devices */
 951         leds = of_find_node_by_path("/ibm,opal/leds");
 952         if (leds) {
 953                 of_platform_device_create(leds, "opal_leds", NULL);
 954                 of_node_put(leds);
 955         }
 956 
 957         /* Initialise OPAL message log interface */
 958         opal_msglog_init();
 959 
 960         /* Create "opal" kobject under /sys/firmware */
 961         rc = opal_sysfs_init();
 962         if (rc == 0) {
 963                 /* Export symbol map to userspace */
 964                 opal_export_symmap();
 965                 /* Setup dump region interface */
 966                 opal_dump_region_init();
 967                 /* Setup error log interface */
 968                 rc = opal_elog_init();
 969                 /* Setup code update interface */
 970                 opal_flash_update_init();
 971                 /* Setup platform dump extract interface */
 972                 opal_platform_dump_init();
 973                 /* Setup system parameters interface */
 974                 opal_sys_param_init();
 975                 /* Setup message log sysfs interface. */
 976                 opal_msglog_sysfs_init();
 977         }
 978 
 979         /* Export all properties */
 980         opal_export_attrs();
 981 
 982         /* Initialize platform devices: IPMI backend, PRD & flash interface */
 983         opal_pdev_init("ibm,opal-ipmi");
 984         opal_pdev_init("ibm,opal-flash");
 985         opal_pdev_init("ibm,opal-prd");
 986 
 987         /* Initialise platform device: oppanel interface */
 988         opal_pdev_init("ibm,opal-oppanel");
 989 
 990         /* Initialise OPAL kmsg dumper for flushing console on panic */
 991         opal_kmsg_init();
 992 
 993         /* Initialise OPAL powercap interface */
 994         opal_powercap_init();
 995 
 996         /* Initialise OPAL Power-Shifting-Ratio interface */
 997         opal_psr_init();
 998 
 999         /* Initialise OPAL sensor groups */
1000         opal_sensor_groups_init();
1001 
1002         /* Initialise OPAL Power control interface */
1003         opal_power_control_init();
1004 
1005         return 0;
1006 }
1007 machine_subsys_initcall(powernv, opal_init);
1008 
1009 void opal_shutdown(void)
1010 {
1011         long rc = OPAL_BUSY;
1012 
1013         opal_event_shutdown();
1014 
1015         /*
1016          * Then sync with OPAL which ensure anything that can
1017          * potentially write to our memory has completed such
1018          * as an ongoing dump retrieval
1019          */
1020         while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
1021                 rc = opal_sync_host_reboot();
1022                 if (rc == OPAL_BUSY)
1023                         opal_poll_events(NULL);
1024                 else
1025                         mdelay(10);
1026         }
1027 
1028         /* Unregister memory dump region */
1029         if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
1030                 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
1031 }
1032 
1033 /* Export this so that test modules can use it */
1034 EXPORT_SYMBOL_GPL(opal_invalid_call);
1035 EXPORT_SYMBOL_GPL(opal_xscom_read);
1036 EXPORT_SYMBOL_GPL(opal_xscom_write);
1037 EXPORT_SYMBOL_GPL(opal_ipmi_send);
1038 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
1039 EXPORT_SYMBOL_GPL(opal_flash_read);
1040 EXPORT_SYMBOL_GPL(opal_flash_write);
1041 EXPORT_SYMBOL_GPL(opal_flash_erase);
1042 EXPORT_SYMBOL_GPL(opal_prd_msg);
1043 EXPORT_SYMBOL_GPL(opal_check_token);
1044 
1045 /* Convert a region of vmalloc memory to an opal sg list */
1046 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
1047                                              unsigned long vmalloc_size)
1048 {
1049         struct opal_sg_list *sg, *first = NULL;
1050         unsigned long i = 0;
1051 
1052         sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1053         if (!sg)
1054                 goto nomem;
1055 
1056         first = sg;
1057 
1058         while (vmalloc_size > 0) {
1059                 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
1060                 uint64_t length = min(vmalloc_size, PAGE_SIZE);
1061 
1062                 sg->entry[i].data = cpu_to_be64(data);
1063                 sg->entry[i].length = cpu_to_be64(length);
1064                 i++;
1065 
1066                 if (i >= SG_ENTRIES_PER_NODE) {
1067                         struct opal_sg_list *next;
1068 
1069                         next = kzalloc(PAGE_SIZE, GFP_KERNEL);
1070                         if (!next)
1071                                 goto nomem;
1072 
1073                         sg->length = cpu_to_be64(
1074                                         i * sizeof(struct opal_sg_entry) + 16);
1075                         i = 0;
1076                         sg->next = cpu_to_be64(__pa(next));
1077                         sg = next;
1078                 }
1079 
1080                 vmalloc_addr += length;
1081                 vmalloc_size -= length;
1082         }
1083 
1084         sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
1085 
1086         return first;
1087 
1088 nomem:
1089         pr_err("%s : Failed to allocate memory\n", __func__);
1090         opal_free_sg_list(first);
1091         return NULL;
1092 }
1093 
1094 void opal_free_sg_list(struct opal_sg_list *sg)
1095 {
1096         while (sg) {
1097                 uint64_t next = be64_to_cpu(sg->next);
1098 
1099                 kfree(sg);
1100 
1101                 if (next)
1102                         sg = __va(next);
1103                 else
1104                         sg = NULL;
1105         }
1106 }
1107 
1108 int opal_error_code(int rc)
1109 {
1110         switch (rc) {
1111         case OPAL_SUCCESS:              return 0;
1112 
1113         case OPAL_PARAMETER:            return -EINVAL;
1114         case OPAL_ASYNC_COMPLETION:     return -EINPROGRESS;
1115         case OPAL_BUSY:
1116         case OPAL_BUSY_EVENT:           return -EBUSY;
1117         case OPAL_NO_MEM:               return -ENOMEM;
1118         case OPAL_PERMISSION:           return -EPERM;
1119 
1120         case OPAL_UNSUPPORTED:          return -EIO;
1121         case OPAL_HARDWARE:             return -EIO;
1122         case OPAL_INTERNAL_ERROR:       return -EIO;
1123         case OPAL_TIMEOUT:              return -ETIMEDOUT;
1124         default:
1125                 pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
1126                 return -EIO;
1127         }
1128 }
1129 
1130 void powernv_set_nmmu_ptcr(unsigned long ptcr)
1131 {
1132         int rc;
1133 
1134         if (firmware_has_feature(FW_FEATURE_OPAL)) {
1135                 rc = opal_nmmu_set_ptcr(-1UL, ptcr);
1136                 if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
1137                         pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
1138         }
1139 }
1140 
1141 EXPORT_SYMBOL_GPL(opal_poll_events);
1142 EXPORT_SYMBOL_GPL(opal_rtc_read);
1143 EXPORT_SYMBOL_GPL(opal_rtc_write);
1144 EXPORT_SYMBOL_GPL(opal_tpo_read);
1145 EXPORT_SYMBOL_GPL(opal_tpo_write);
1146 EXPORT_SYMBOL_GPL(opal_i2c_request);
1147 /* Export these symbols for PowerNV LED class driver */
1148 EXPORT_SYMBOL_GPL(opal_leds_get_ind);
1149 EXPORT_SYMBOL_GPL(opal_leds_set_ind);
1150 /* Export this symbol for PowerNV Operator Panel class driver */
1151 EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1152 /* Export this for KVM */
1153 EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1154 EXPORT_SYMBOL_GPL(opal_int_eoi);
1155 EXPORT_SYMBOL_GPL(opal_error_code);
1156 /* Export the below symbol for NX compression */
1157 EXPORT_SYMBOL(opal_nx_coproc_init);

/* [<][>][^][v][top][bottom][index][help] */