arch/powerpc/platforms/pseries/ras.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
rtas_mc_error_sub_type
init_ras_hotplug_IRQ
init_ras_IRQ
handle_system_shutdown
rtas_parse_epow_errlog
ras_hotplug_interrupt
ras_epow_interrupt
ras_error_interrupt
fwnmi_get_errlog
fwnmi_get_errinfo
fwnmi_release_errinfo
pSeries_system_reset_exception
mce_handle_error
mce_process_errlog_event
recover_mce
pSeries_machine_check_exception
pseries_machine_check_realmode
   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2001 Dave Engebretsen IBM Corporation
   4  */
   5 
   6 #include <linux/sched.h>
   7 #include <linux/interrupt.h>
   8 #include <linux/irq.h>
   9 #include <linux/of.h>
  10 #include <linux/fs.h>
  11 #include <linux/reboot.h>
  12 #include <linux/irq_work.h>
  13 
  14 #include <asm/machdep.h>
  15 #include <asm/rtas.h>
  16 #include <asm/firmware.h>
  17 #include <asm/mce.h>
  18 
  19 #include "pseries.h"
  20 
  21 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
  22 static DEFINE_SPINLOCK(ras_log_buf_lock);
  23 
  24 static int ras_check_exception_token;
  25 
  26 static void mce_process_errlog_event(struct irq_work *work);
  27 static struct irq_work mce_errlog_process_work = {
  28         .func = mce_process_errlog_event,
  29 };
  30 
  31 #define EPOW_SENSOR_TOKEN       9
  32 #define EPOW_SENSOR_INDEX       0
  33 
  34 /* EPOW events counter variable */
  35 static int num_epow_events;
  36 
  37 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  38 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  39 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  40 
  41 /* RTAS pseries MCE errorlog section. */
  42 struct pseries_mc_errorlog {
  43         __be32  fru_id;
  44         __be32  proc_id;
  45         u8      error_type;
  46         /*
  47          * sub_err_type (1 byte). Bit fields depends on error_type
  48          *
  49          *   MSB0
  50          *   |
  51          *   V
  52          *   01234567
  53          *   XXXXXXXX
  54          *
  55          * For error_type == MC_ERROR_TYPE_UE
  56          *   XXXXXXXX
  57          *   X          1: Permanent or Transient UE.
  58          *    X         1: Effective address provided.
  59          *     X        1: Logical address provided.
  60          *      XX      2: Reserved.
  61          *        XXX   3: Type of UE error.
  62          *
  63          * For error_type != MC_ERROR_TYPE_UE
  64          *   XXXXXXXX
  65          *   X          1: Effective address provided.
  66          *    XXXXX     5: Reserved.
  67          *         XX   2: Type of SLB/ERAT/TLB error.
  68          */
  69         u8      sub_err_type;
  70         u8      reserved_1[6];
  71         __be64  effective_address;
  72         __be64  logical_address;
  73 } __packed;
  74 
  75 /* RTAS pseries MCE error types */
  76 #define MC_ERROR_TYPE_UE                0x00
  77 #define MC_ERROR_TYPE_SLB               0x01
  78 #define MC_ERROR_TYPE_ERAT              0x02
  79 #define MC_ERROR_TYPE_UNKNOWN           0x03
  80 #define MC_ERROR_TYPE_TLB               0x04
  81 #define MC_ERROR_TYPE_D_CACHE           0x05
  82 #define MC_ERROR_TYPE_I_CACHE           0x07
  83 
  84 /* RTAS pseries MCE error sub types */
  85 #define MC_ERROR_UE_INDETERMINATE               0
  86 #define MC_ERROR_UE_IFETCH                      1
  87 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH      2
  88 #define MC_ERROR_UE_LOAD_STORE                  3
  89 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
  90 
  91 #define UE_EFFECTIVE_ADDR_PROVIDED              0x40
  92 #define UE_LOGICAL_ADDR_PROVIDED                0x20
  93 
  94 #define MC_ERROR_SLB_PARITY             0
  95 #define MC_ERROR_SLB_MULTIHIT           1
  96 #define MC_ERROR_SLB_INDETERMINATE      2
  97 
  98 #define MC_ERROR_ERAT_PARITY            1
  99 #define MC_ERROR_ERAT_MULTIHIT          2
 100 #define MC_ERROR_ERAT_INDETERMINATE     3
 101 
 102 #define MC_ERROR_TLB_PARITY             1
 103 #define MC_ERROR_TLB_MULTIHIT           2
 104 #define MC_ERROR_TLB_INDETERMINATE      3
 105 
 106 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 107 {
 108         switch (mlog->error_type) {
 109         case    MC_ERROR_TYPE_UE:
 110                 return (mlog->sub_err_type & 0x07);
 111         case    MC_ERROR_TYPE_SLB:
 112         case    MC_ERROR_TYPE_ERAT:
 113         case    MC_ERROR_TYPE_TLB:
 114                 return (mlog->sub_err_type & 0x03);
 115         default:
 116                 return 0;
 117         }
 118 }
 119 
 120 /*
 121  * Enable the hotplug interrupt late because processing them may touch other
 122  * devices or systems (e.g. hugepages) that have not been initialized at the
 123  * subsys stage.
 124  */
 125 int __init init_ras_hotplug_IRQ(void)
 126 {
 127         struct device_node *np;
 128 
 129         /* Hotplug Events */
 130         np = of_find_node_by_path("/event-sources/hot-plug-events");
 131         if (np != NULL) {
 132                 if (dlpar_workqueue_init() == 0)
 133                         request_event_sources_irqs(np, ras_hotplug_interrupt,
 134                                                    "RAS_HOTPLUG");
 135                 of_node_put(np);
 136         }
 137 
 138         return 0;
 139 }
 140 machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 141 
 142 /*
 143  * Initialize handlers for the set of interrupts caused by hardware errors
 144  * and power system events.
 145  */
 146 static int __init init_ras_IRQ(void)
 147 {
 148         struct device_node *np;
 149 
 150         ras_check_exception_token = rtas_token("check-exception");
 151 
 152         /* Internal Errors */
 153         np = of_find_node_by_path("/event-sources/internal-errors");
 154         if (np != NULL) {
 155                 request_event_sources_irqs(np, ras_error_interrupt,
 156                                            "RAS_ERROR");
 157                 of_node_put(np);
 158         }
 159 
 160         /* EPOW Events */
 161         np = of_find_node_by_path("/event-sources/epow-events");
 162         if (np != NULL) {
 163                 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 164                 of_node_put(np);
 165         }
 166 
 167         return 0;
 168 }
 169 machine_subsys_initcall(pseries, init_ras_IRQ);
 170 
 171 #define EPOW_SHUTDOWN_NORMAL                            1
 172 #define EPOW_SHUTDOWN_ON_UPS                            2
 173 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS        3
 174 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH      4
 175 
 176 static void handle_system_shutdown(char event_modifier)
 177 {
 178         switch (event_modifier) {
 179         case EPOW_SHUTDOWN_NORMAL:
 180                 pr_emerg("Power off requested\n");
 181                 orderly_poweroff(true);
 182                 break;
 183 
 184         case EPOW_SHUTDOWN_ON_UPS:
 185                 pr_emerg("Loss of system power detected. System is running on"
 186                          " UPS/battery. Check RTAS error log for details\n");
 187                 orderly_poweroff(true);
 188                 break;
 189 
 190         case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
 191                 pr_emerg("Loss of system critical functions detected. Check"
 192                          " RTAS error log for details\n");
 193                 orderly_poweroff(true);
 194                 break;
 195 
 196         case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 197                 pr_emerg("High ambient temperature detected. Check RTAS"
 198                          " error log for details\n");
 199                 orderly_poweroff(true);
 200                 break;
 201 
 202         default:
 203                 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 204                         event_modifier);
 205         }
 206 }
 207 
 208 struct epow_errorlog {
 209         unsigned char sensor_value;
 210         unsigned char event_modifier;
 211         unsigned char extended_modifier;
 212         unsigned char reserved;
 213         unsigned char platform_reason;
 214 };
 215 
 216 #define EPOW_RESET                      0
 217 #define EPOW_WARN_COOLING               1
 218 #define EPOW_WARN_POWER                 2
 219 #define EPOW_SYSTEM_SHUTDOWN            3
 220 #define EPOW_SYSTEM_HALT                4
 221 #define EPOW_MAIN_ENCLOSURE             5
 222 #define EPOW_POWER_OFF                  7
 223 
 224 static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 225 {
 226         struct pseries_errorlog *pseries_log;
 227         struct epow_errorlog *epow_log;
 228         char action_code;
 229         char modifier;
 230 
 231         pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
 232         if (pseries_log == NULL)
 233                 return;
 234 
 235         epow_log = (struct epow_errorlog *)pseries_log->data;
 236         action_code = epow_log->sensor_value & 0xF;     /* bottom 4 bits */
 237         modifier = epow_log->event_modifier & 0xF;      /* bottom 4 bits */
 238 
 239         switch (action_code) {
 240         case EPOW_RESET:
 241                 if (num_epow_events) {
 242                         pr_info("Non critical power/cooling issue cleared\n");
 243                         num_epow_events--;
 244                 }
 245                 break;
 246 
 247         case EPOW_WARN_COOLING:
 248                 pr_info("Non-critical cooling issue detected. Check RTAS error"
 249                         " log for details\n");
 250                 break;
 251 
 252         case EPOW_WARN_POWER:
 253                 pr_info("Non-critical power issue detected. Check RTAS error"
 254                         " log for details\n");
 255                 break;
 256 
 257         case EPOW_SYSTEM_SHUTDOWN:
 258                 handle_system_shutdown(epow_log->event_modifier);
 259                 break;
 260 
 261         case EPOW_SYSTEM_HALT:
 262                 pr_emerg("Critical power/cooling issue detected. Check RTAS"
 263                          " error log for details. Powering off.\n");
 264                 orderly_poweroff(true);
 265                 break;
 266 
 267         case EPOW_MAIN_ENCLOSURE:
 268         case EPOW_POWER_OFF:
 269                 pr_emerg("System about to lose power. Check RTAS error log "
 270                          " for details. Powering off immediately.\n");
 271                 emergency_sync();
 272                 kernel_power_off();
 273                 break;
 274 
 275         default:
 276                 pr_err("Unknown power/cooling event (action code  = %d)\n",
 277                         action_code);
 278         }
 279 
 280         /* Increment epow events counter variable */
 281         if (action_code != EPOW_RESET)
 282                 num_epow_events++;
 283 }
 284 
 285 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 286 {
 287         struct pseries_errorlog *pseries_log;
 288         struct pseries_hp_errorlog *hp_elog;
 289 
 290         spin_lock(&ras_log_buf_lock);
 291 
 292         rtas_call(ras_check_exception_token, 6, 1, NULL,
 293                   RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
 294                   RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
 295                   rtas_get_error_log_max());
 296 
 297         pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
 298                                            PSERIES_ELOG_SECT_ID_HOTPLUG);
 299         hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
 300 
 301         /*
 302          * Since PCI hotplug is not currently supported on pseries, put PCI
 303          * hotplug events on the ras_log_buf to be handled by rtas_errd.
 304          */
 305         if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
 306             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
 307             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 308                 queue_hotplug_event(hp_elog);
 309         else
 310                 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 311 
 312         spin_unlock(&ras_log_buf_lock);
 313         return IRQ_HANDLED;
 314 }
 315 
 316 /* Handle environmental and power warning (EPOW) interrupts. */
 317 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 318 {
 319         int status;
 320         int state;
 321         int critical;
 322 
 323         status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX,
 324                                       &state);
 325 
 326         if (state > 3)
 327                 critical = 1;           /* Time Critical */
 328         else
 329                 critical = 0;
 330 
 331         spin_lock(&ras_log_buf_lock);
 332 
 333         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 334                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 335                            virq_to_hw(irq),
 336                            RTAS_EPOW_WARNING,
 337                            critical, __pa(&ras_log_buf),
 338                                 rtas_get_error_log_max());
 339 
 340         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 341 
 342         rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
 343 
 344         spin_unlock(&ras_log_buf_lock);
 345         return IRQ_HANDLED;
 346 }
 347 
 348 /*
 349  * Handle hardware error interrupts.
 350  *
 351  * RTAS check-exception is called to collect data on the exception.  If
 352  * the error is deemed recoverable, we log a warning and return.
 353  * For nonrecoverable errors, an error is logged and we stop all processing
 354  * as quickly as possible in order to prevent propagation of the failure.
 355  */
 356 static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 357 {
 358         struct rtas_error_log *rtas_elog;
 359         int status;
 360         int fatal;
 361 
 362         spin_lock(&ras_log_buf_lock);
 363 
 364         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 365                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 366                            virq_to_hw(irq),
 367                            RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
 368                            __pa(&ras_log_buf),
 369                                 rtas_get_error_log_max());
 370 
 371         rtas_elog = (struct rtas_error_log *)ras_log_buf;
 372 
 373         if (status == 0 &&
 374             rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 375                 fatal = 1;
 376         else
 377                 fatal = 0;
 378 
 379         /* format and print the extended information */
 380         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 381 
 382         if (fatal) {
 383                 pr_emerg("Fatal hardware error detected. Check RTAS error"
 384                          " log for details. Powering off immediately\n");
 385                 emergency_sync();
 386                 kernel_power_off();
 387         } else {
 388                 pr_err("Recoverable hardware error detected\n");
 389         }
 390 
 391         spin_unlock(&ras_log_buf_lock);
 392         return IRQ_HANDLED;
 393 }
 394 
 395 /*
 396  * Some versions of FWNMI place the buffer inside the 4kB page starting at
 397  * 0x7000. Other versions place it inside the rtas buffer. We check both.
 398  */
 399 #define VALID_FWNMI_BUFFER(A) \
 400         ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
 401         (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
 402 
 403 static inline struct rtas_error_log *fwnmi_get_errlog(void)
 404 {
 405         return (struct rtas_error_log *)local_paca->mce_data_buf;
 406 }
 407 
 408 /*
 409  * Get the error information for errors coming through the
 410  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 411  * the actual r3 if possible, and a ptr to the error log entry
 412  * will be returned if found.
 413  *
 414  * Use one buffer mce_data_buf per cpu to store RTAS error.
 415  *
 416  * The mce_data_buf does not have any locks or protection around it,
 417  * if a second machine check comes in, or a system reset is done
 418  * before we have logged the error, then we will get corruption in the
 419  * error log.  This is preferable over holding off on calling
 420  * ibm,nmi-interlock which would result in us checkstopping if a
 421  * second machine check did come in.
 422  */
 423 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 424 {
 425         unsigned long *savep;
 426         struct rtas_error_log *h;
 427 
 428         /* Mask top two bits */
 429         regs->gpr[3] &= ~(0x3UL << 62);
 430 
 431         if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
 432                 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 433                 return NULL;
 434         }
 435 
 436         savep = __va(regs->gpr[3]);
 437         regs->gpr[3] = be64_to_cpu(savep[0]);   /* restore original r3 */
 438 
 439         h = (struct rtas_error_log *)&savep[1];
 440         /* Use the per cpu buffer from paca to store rtas error log */
 441         memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 442         if (!rtas_error_extended(h)) {
 443                 memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
 444         } else {
 445                 int len, error_log_length;
 446 
 447                 error_log_length = 8 + rtas_error_extended_log_length(h);
 448                 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 449                 memcpy(local_paca->mce_data_buf, h, len);
 450         }
 451 
 452         return (struct rtas_error_log *)local_paca->mce_data_buf;
 453 }
 454 
 455 /* Call this when done with the data returned by FWNMI_get_errinfo.
 456  * It will release the saved data area for other CPUs in the
 457  * partition to receive FWNMI errors.
 458  */
 459 static void fwnmi_release_errinfo(void)
 460 {
 461         int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
 462         if (ret != 0)
 463                 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
 464 }
 465 
 466 int pSeries_system_reset_exception(struct pt_regs *regs)
 467 {
 468 #ifdef __LITTLE_ENDIAN__
 469         /*
 470          * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
 471          * to detect the bad SRR1 pattern here. Flip the NIP back to correct
 472          * endian for reporting purposes. Unfortunately the MSR can't be fixed,
 473          * so clear it. It will be missing MSR_RI so we won't try to recover.
 474          */
 475         if ((be64_to_cpu(regs->msr) &
 476                         (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 477                          MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
 478                 regs->nip = be64_to_cpu((__be64)regs->nip);
 479                 regs->msr = 0;
 480         }
 481 #endif
 482 
 483         if (fwnmi_active) {
 484                 struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
 485                 if (errhdr) {
 486                         /* XXX Should look at FWNMI information */
 487                 }
 488                 fwnmi_release_errinfo();
 489         }
 490 
 491         if (smp_handle_nmi_ipi(regs))
 492                 return 1;
 493 
 494         return 0; /* need to perform reset */
 495 }
 496 
 497 
 498 static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 499 {
 500         struct mce_error_info mce_err = { 0 };
 501         unsigned long eaddr = 0, paddr = 0;
 502         struct pseries_errorlog *pseries_log;
 503         struct pseries_mc_errorlog *mce_log;
 504         int disposition = rtas_error_disposition(errp);
 505         int initiator = rtas_error_initiator(errp);
 506         int severity = rtas_error_severity(errp);
 507         u8 error_type, err_sub_type;
 508 
 509         if (initiator == RTAS_INITIATOR_UNKNOWN)
 510                 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 511         else if (initiator == RTAS_INITIATOR_CPU)
 512                 mce_err.initiator = MCE_INITIATOR_CPU;
 513         else if (initiator == RTAS_INITIATOR_PCI)
 514                 mce_err.initiator = MCE_INITIATOR_PCI;
 515         else if (initiator == RTAS_INITIATOR_ISA)
 516                 mce_err.initiator = MCE_INITIATOR_ISA;
 517         else if (initiator == RTAS_INITIATOR_MEMORY)
 518                 mce_err.initiator = MCE_INITIATOR_MEMORY;
 519         else if (initiator == RTAS_INITIATOR_POWERMGM)
 520                 mce_err.initiator = MCE_INITIATOR_POWERMGM;
 521         else
 522                 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 523 
 524         if (severity == RTAS_SEVERITY_NO_ERROR)
 525                 mce_err.severity = MCE_SEV_NO_ERROR;
 526         else if (severity == RTAS_SEVERITY_EVENT)
 527                 mce_err.severity = MCE_SEV_WARNING;
 528         else if (severity == RTAS_SEVERITY_WARNING)
 529                 mce_err.severity = MCE_SEV_WARNING;
 530         else if (severity == RTAS_SEVERITY_ERROR_SYNC)
 531                 mce_err.severity = MCE_SEV_SEVERE;
 532         else if (severity == RTAS_SEVERITY_ERROR)
 533                 mce_err.severity = MCE_SEV_SEVERE;
 534         else if (severity == RTAS_SEVERITY_FATAL)
 535                 mce_err.severity = MCE_SEV_FATAL;
 536         else
 537                 mce_err.severity = MCE_SEV_FATAL;
 538 
 539         if (severity <= RTAS_SEVERITY_ERROR_SYNC)
 540                 mce_err.sync_error = true;
 541         else
 542                 mce_err.sync_error = false;
 543 
 544         mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 545         mce_err.error_class = MCE_ECLASS_UNKNOWN;
 546 
 547         if (!rtas_error_extended(errp))
 548                 goto out;
 549 
 550         pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 551         if (pseries_log == NULL)
 552                 goto out;
 553 
 554         mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 555         error_type = mce_log->error_type;
 556         err_sub_type = rtas_mc_error_sub_type(mce_log);
 557 
 558         switch (mce_log->error_type) {
 559         case MC_ERROR_TYPE_UE:
 560                 mce_err.error_type = MCE_ERROR_TYPE_UE;
 561                 switch (err_sub_type) {
 562                 case MC_ERROR_UE_IFETCH:
 563                         mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
 564                         break;
 565                 case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
 566                         mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
 567                         break;
 568                 case MC_ERROR_UE_LOAD_STORE:
 569                         mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
 570                         break;
 571                 case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
 572                         mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
 573                         break;
 574                 case MC_ERROR_UE_INDETERMINATE:
 575                 default:
 576                         mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
 577                         break;
 578                 }
 579                 if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
 580                         eaddr = be64_to_cpu(mce_log->effective_address);
 581 
 582                 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
 583                         paddr = be64_to_cpu(mce_log->logical_address);
 584                 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
 585                         unsigned long pfn;
 586 
 587                         pfn = addr_to_pfn(regs, eaddr);
 588                         if (pfn != ULONG_MAX)
 589                                 paddr = pfn << PAGE_SHIFT;
 590                 }
 591 
 592                 break;
 593         case MC_ERROR_TYPE_SLB:
 594                 mce_err.error_type = MCE_ERROR_TYPE_SLB;
 595                 switch (err_sub_type) {
 596                 case MC_ERROR_SLB_PARITY:
 597                         mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
 598                         break;
 599                 case MC_ERROR_SLB_MULTIHIT:
 600                         mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
 601                         break;
 602                 case MC_ERROR_SLB_INDETERMINATE:
 603                 default:
 604                         mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
 605                         break;
 606                 }
 607                 if (mce_log->sub_err_type & 0x80)
 608                         eaddr = be64_to_cpu(mce_log->effective_address);
 609                 break;
 610         case MC_ERROR_TYPE_ERAT:
 611                 mce_err.error_type = MCE_ERROR_TYPE_ERAT;
 612                 switch (err_sub_type) {
 613                 case MC_ERROR_ERAT_PARITY:
 614                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
 615                         break;
 616                 case MC_ERROR_ERAT_MULTIHIT:
 617                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
 618                         break;
 619                 case MC_ERROR_ERAT_INDETERMINATE:
 620                 default:
 621                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
 622                         break;
 623                 }
 624                 if (mce_log->sub_err_type & 0x80)
 625                         eaddr = be64_to_cpu(mce_log->effective_address);
 626                 break;
 627         case MC_ERROR_TYPE_TLB:
 628                 mce_err.error_type = MCE_ERROR_TYPE_TLB;
 629                 switch (err_sub_type) {
 630                 case MC_ERROR_TLB_PARITY:
 631                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
 632                         break;
 633                 case MC_ERROR_TLB_MULTIHIT:
 634                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
 635                         break;
 636                 case MC_ERROR_TLB_INDETERMINATE:
 637                 default:
 638                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
 639                         break;
 640                 }
 641                 if (mce_log->sub_err_type & 0x80)
 642                         eaddr = be64_to_cpu(mce_log->effective_address);
 643                 break;
 644         case MC_ERROR_TYPE_D_CACHE:
 645                 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 646                 break;
 647         case MC_ERROR_TYPE_I_CACHE:
 648                 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 649                 break;
 650         case MC_ERROR_TYPE_UNKNOWN:
 651         default:
 652                 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 653                 break;
 654         }
 655 
 656 #ifdef CONFIG_PPC_BOOK3S_64
 657         if (disposition == RTAS_DISP_NOT_RECOVERED) {
 658                 switch (error_type) {
 659                 case    MC_ERROR_TYPE_SLB:
 660                 case    MC_ERROR_TYPE_ERAT:
 661                         /*
 662                          * Store the old slb content in paca before flushing.
 663                          * Print this when we go to virtual mode.
 664                          * There are chances that we may hit MCE again if there
 665                          * is a parity error on the SLB entry we trying to read
 666                          * for saving. Hence limit the slb saving to single
 667                          * level of recursion.
 668                          */
 669                         if (local_paca->in_mce == 1)
 670                                 slb_save_contents(local_paca->mce_faulty_slbs);
 671                         flush_and_reload_slb();
 672                         disposition = RTAS_DISP_FULLY_RECOVERED;
 673                         break;
 674                 default:
 675                         break;
 676                 }
 677         } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 678                 /* Platform corrected itself but could be degraded */
 679                 printk(KERN_ERR "MCE: limited recovery, system may "
 680                        "be degraded\n");
 681                 disposition = RTAS_DISP_FULLY_RECOVERED;
 682         }
 683 #endif
 684 
 685 out:
 686         /*
 687          * Enable translation as we will be accessing per-cpu variables
 688          * in save_mce_event() which may fall outside RMO region, also
 689          * leave it enabled because subsequently we will be queuing work
 690          * to workqueues where again per-cpu variables accessed, besides
 691          * fwnmi_release_errinfo() crashes when called in realmode on
 692          * pseries.
 693          * Note: All the realmode handling like flushing SLB entries for
 694          *       SLB multihit is done by now.
 695          */
 696         mtmsr(mfmsr() | MSR_IR | MSR_DR);
 697         save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
 698                         &mce_err, regs->nip, eaddr, paddr);
 699 
 700         return disposition;
 701 }
 702 
 703 /*
 704  * Process MCE rtas errlog event.
 705  */
 706 static void mce_process_errlog_event(struct irq_work *work)
 707 {
 708         struct rtas_error_log *err;
 709 
 710         err = fwnmi_get_errlog();
 711         log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
 712 }
 713 
 714 /*
 715  * See if we can recover from a machine check exception.
 716  * This is only called on power4 (or above) and only via
 717  * the Firmware Non-Maskable Interrupts (fwnmi) handler
 718  * which provides the error analysis for us.
 719  *
 720  * Return 1 if corrected (or delivered a signal).
 721  * Return 0 if there is nothing we can do.
 722  */
 723 static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 724 {
 725         int recovered = 0;
 726 
 727         if (!(regs->msr & MSR_RI)) {
 728                 /* If MSR_RI isn't set, we cannot recover */
 729                 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 730                 recovered = 0;
 731         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 732                 /* Platform corrected itself */
 733                 recovered = 1;
 734         } else if (evt->severity == MCE_SEV_FATAL) {
 735                 /* Fatal machine check */
 736                 pr_err("Machine check interrupt is fatal\n");
 737                 recovered = 0;
 738         }
 739 
 740         if (!recovered && evt->sync_error) {
 741                 /*
 742                  * Try to kill processes if we get a synchronous machine check
 743                  * (e.g., one caused by execution of this instruction). This
 744                  * will devolve into a panic if we try to kill init or are in
 745                  * an interrupt etc.
 746                  *
 747                  * TODO: Queue up this address for hwpoisioning later.
 748                  * TODO: This is not quite right for d-side machine
 749                  *       checks ->nip is not necessarily the important
 750                  *       address.
 751                  */
 752                 if ((user_mode(regs))) {
 753                         _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 754                         recovered = 1;
 755                 } else if (die_will_crash()) {
 756                         /*
 757                          * die() would kill the kernel, so better to go via
 758                          * the platform reboot code that will log the
 759                          * machine check.
 760                          */
 761                         recovered = 0;
 762                 } else {
 763                         die("Machine check", regs, SIGBUS);
 764                         recovered = 1;
 765                 }
 766         }
 767 
 768         return recovered;
 769 }
 770 
 771 /*
 772  * Handle a machine check.
 773  *
 774  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
 775  * should be present.  If so the handler which called us tells us if the
 776  * error was recovered (never true if RI=0).
 777  *
 778  * On hardware prior to Power 4 these exceptions were asynchronous which
 779  * means we can't tell exactly where it occurred and so we can't recover.
 780  */
 781 int pSeries_machine_check_exception(struct pt_regs *regs)
 782 {
 783         struct machine_check_event evt;
 784 
 785         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 786                 return 0;
 787 
 788         /* Print things out */
 789         if (evt.version != MCE_V1) {
 790                 pr_err("Machine Check Exception, Unknown event version %d !\n",
 791                        evt.version);
 792                 return 0;
 793         }
 794         machine_check_print_event_info(&evt, user_mode(regs), false);
 795 
 796         if (recover_mce(regs, &evt))
 797                 return 1;
 798 
 799         return 0;
 800 }
 801 
 802 long pseries_machine_check_realmode(struct pt_regs *regs)
 803 {
 804         struct rtas_error_log *errp;
 805         int disposition;
 806 
 807         if (fwnmi_active) {
 808                 errp = fwnmi_get_errinfo(regs);
 809                 /*
 810                  * Call to fwnmi_release_errinfo() in real mode causes kernel
 811                  * to panic. Hence we will call it as soon as we go into
 812                  * virtual mode.
 813                  */
 814                 disposition = mce_handle_error(regs, errp);
 815                 fwnmi_release_errinfo();
 816 
 817                 /* Queue irq work to log this rtas event later. */
 818                 irq_work_queue(&mce_errlog_process_work);
 819 
 820                 if (disposition == RTAS_DISP_FULLY_RECOVERED)
 821                         return 1;
 822         }
 823 
 824         return 0;
 825 }
/* [<][>][^][v][top][bottom][index][help] */
root/arch/powerpc/platforms/pseries/ras.c

DEFINITIONS