This source file includes following definitions.
- print_core_checkstop_reason
- print_nx_checkstop_reason
- print_npu_checkstop_reason
- print_checkstop_reason
- print_hmi_event_info
- hmi_event_handler
- opal_handle_hmi_event
- opal_hmi_handler_init
1
2
3
4
5
6
7
8
9 #undef DEBUG
10
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/of.h>
14 #include <linux/mm.h>
15 #include <linux/slab.h>
16
17 #include <asm/opal.h>
18 #include <asm/cputable.h>
19 #include <asm/machdep.h>
20
21 #include "powernv.h"
22
23 static int opal_hmi_handler_nb_init;
24 struct OpalHmiEvtNode {
25 struct list_head list;
26 struct OpalHMIEvent hmi_evt;
27 };
28
29 struct xstop_reason {
30 uint32_t xstop_reason;
31 const char *unit_failed;
32 const char *description;
33 };
34
35 static LIST_HEAD(opal_hmi_evt_list);
36 static DEFINE_SPINLOCK(opal_hmi_evt_lock);
37
38 static void print_core_checkstop_reason(const char *level,
39 struct OpalHMIEvent *hmi_evt)
40 {
41 int i;
42 static const struct xstop_reason xstop_reason[] = {
43 { CORE_CHECKSTOP_IFU_REGFILE, "IFU",
44 "RegFile core check stop" },
45 { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
46 { CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
47 "Core checkstop during recovery" },
48 { CORE_CHECKSTOP_ISU_REGFILE, "ISU",
49 "RegFile core check stop (mapper error)" },
50 { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
51 { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
52 { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
53 { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
54 "Recovery in maintenance mode" },
55 { CORE_CHECKSTOP_LSU_REGFILE, "LSU",
56 "RegFile core check stop" },
57 { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
58 "Forward Progress Error" },
59 { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
60 { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
61 { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
62 "Hypervisor Resource error - core check stop" },
63 { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
64 "Hang Recovery Failed (core check stop)" },
65 { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
66 "Ambiguous Hang Detected (unknown source)" },
67 { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
68 "Debug Trigger Error inject" },
69 { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
70 "Hypervisor check stop via SPRC/SPRD" },
71 };
72
73
74 if (!hmi_evt->u.xstop_error.xstop_reason) {
75 printk("%s Unknown Core check stop.\n", level);
76 return;
77 }
78
79 printk("%s CPU PIR: %08x\n", level,
80 be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
81 for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
82 if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
83 xstop_reason[i].xstop_reason)
84 printk("%s [Unit: %-3s] %s\n", level,
85 xstop_reason[i].unit_failed,
86 xstop_reason[i].description);
87 }
88
89 static void print_nx_checkstop_reason(const char *level,
90 struct OpalHMIEvent *hmi_evt)
91 {
92 int i;
93 static const struct xstop_reason xstop_reason[] = {
94 { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
95 "SHM invalid state error" },
96 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
97 "DMA invalid state error bit 15" },
98 { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
99 "DMA invalid state error bit 16" },
100 { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
101 "Channel 0 invalid state error" },
102 { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
103 "Channel 1 invalid state error" },
104 { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
105 "Channel 2 invalid state error" },
106 { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
107 "Channel 3 invalid state error" },
108 { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
109 "Channel 4 invalid state error" },
110 { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
111 "Channel 5 invalid state error" },
112 { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
113 "Channel 6 invalid state error" },
114 { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
115 "Channel 7 invalid state error" },
116 { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
117 "UE error on CRB(CSB address, CCB)" },
118 { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
119 "SUE error on CRB(CSB address, CCB)" },
120 { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
121 "CRB Kill ISN received while holding ISN with UE error" },
122 };
123
124
125 if (!hmi_evt->u.xstop_error.xstop_reason) {
126 printk("%s Unknown NX check stop.\n", level);
127 return;
128 }
129
130 printk("%s NX checkstop on CHIP ID: %x\n", level,
131 be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
132 for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
133 if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
134 xstop_reason[i].xstop_reason)
135 printk("%s [Unit: %-3s] %s\n", level,
136 xstop_reason[i].unit_failed,
137 xstop_reason[i].description);
138 }
139
140 static void print_npu_checkstop_reason(const char *level,
141 struct OpalHMIEvent *hmi_evt)
142 {
143 uint8_t reason, reason_count, i;
144
145
146
147
148
149 if (!hmi_evt->u.xstop_error.xstop_reason) {
150 printk("%s NPU checkstop on chip %x\n", level,
151 be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
152 return;
153 }
154
155
156
157
158
159
160
161
162
163
164
165 reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) /
166 sizeof(reason);
167 for (i = 0; i < reason_count; i++) {
168 reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF;
169 if (reason)
170 printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n",
171 level,
172 be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id),
173 reason >> 6, reason & 0x3F);
174 }
175 }
176
177 static void print_checkstop_reason(const char *level,
178 struct OpalHMIEvent *hmi_evt)
179 {
180 uint8_t type = hmi_evt->u.xstop_error.xstop_type;
181 switch (type) {
182 case CHECKSTOP_TYPE_CORE:
183 print_core_checkstop_reason(level, hmi_evt);
184 break;
185 case CHECKSTOP_TYPE_NX:
186 print_nx_checkstop_reason(level, hmi_evt);
187 break;
188 case CHECKSTOP_TYPE_NPU:
189 print_npu_checkstop_reason(level, hmi_evt);
190 break;
191 default:
192 printk("%s Unknown Malfunction Alert of type %d\n",
193 level, type);
194 break;
195 }
196 }
197
198 static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
199 {
200 const char *level, *sevstr, *error_info;
201 static const char *hmi_error_types[] = {
202 "Malfunction Alert",
203 "Processor Recovery done",
204 "Processor recovery occurred again",
205 "Processor recovery occurred for masked error",
206 "Timer facility experienced an error",
207 "TFMR SPR is corrupted",
208 "UPS (Uninterrupted Power System) Overflow indication",
209 "An XSCOM operation failure",
210 "An XSCOM operation completed",
211 "SCOM has set a reserved FIR bit to cause recovery",
212 "Debug trigger has set a reserved FIR bit to cause recovery",
213 "A hypervisor resource error occurred",
214 "CAPP recovery process is in progress",
215 };
216
217
218 if (hmi_evt->version < OpalHMIEvt_V1) {
219 pr_err("HMI Interrupt, Unknown event version %d !\n",
220 hmi_evt->version);
221 return;
222 }
223 switch (hmi_evt->severity) {
224 case OpalHMI_SEV_NO_ERROR:
225 level = KERN_INFO;
226 sevstr = "Harmless";
227 break;
228 case OpalHMI_SEV_WARNING:
229 level = KERN_WARNING;
230 sevstr = "";
231 break;
232 case OpalHMI_SEV_ERROR_SYNC:
233 level = KERN_ERR;
234 sevstr = "Severe";
235 break;
236 case OpalHMI_SEV_FATAL:
237 default:
238 level = KERN_ERR;
239 sevstr = "Fatal";
240 break;
241 }
242
243 printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
244 level, sevstr,
245 hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
246 "Recovered" : "Not recovered");
247 error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
248 hmi_error_types[hmi_evt->type]
249 : "Unknown";
250 printk("%s Error detail: %s\n", level, error_info);
251 printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
252 if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
253 (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
254 printk("%s TFMR: %016llx\n", level,
255 be64_to_cpu(hmi_evt->tfmr));
256
257 if (hmi_evt->version < OpalHMIEvt_V2)
258 return;
259
260
261 if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
262 print_checkstop_reason(level, hmi_evt);
263 }
264
265 static void hmi_event_handler(struct work_struct *work)
266 {
267 unsigned long flags;
268 struct OpalHMIEvent *hmi_evt;
269 struct OpalHmiEvtNode *msg_node;
270 uint8_t disposition;
271 struct opal_msg msg;
272 int unrecoverable = 0;
273
274 spin_lock_irqsave(&opal_hmi_evt_lock, flags);
275 while (!list_empty(&opal_hmi_evt_list)) {
276 msg_node = list_entry(opal_hmi_evt_list.next,
277 struct OpalHmiEvtNode, list);
278 list_del(&msg_node->list);
279 spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
280
281 hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
282 print_hmi_event_info(hmi_evt);
283 disposition = hmi_evt->disposition;
284 kfree(msg_node);
285
286
287
288
289
290
291
292 if (disposition != OpalHMI_DISPOSITION_RECOVERED)
293 unrecoverable = 1;
294
295 spin_lock_irqsave(&opal_hmi_evt_lock, flags);
296 }
297 spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
298
299 if (unrecoverable) {
300
301 while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
302 u32 type;
303
304 type = be32_to_cpu(msg.msg_type);
305
306
307 if (type != OPAL_MSG_HMI_EVT)
308 continue;
309
310
311 hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
312 print_hmi_event_info(hmi_evt);
313 }
314
315 pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
316 }
317 }
318
319 static DECLARE_WORK(hmi_event_work, hmi_event_handler);
320
321
322
323
324 static int opal_handle_hmi_event(struct notifier_block *nb,
325 unsigned long msg_type, void *msg)
326 {
327 unsigned long flags;
328 struct OpalHMIEvent *hmi_evt;
329 struct opal_msg *hmi_msg = msg;
330 struct OpalHmiEvtNode *msg_node;
331
332
333 if (msg_type != OPAL_MSG_HMI_EVT)
334 return 0;
335
336
337 hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
338
339
340 msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
341 if (!msg_node) {
342 pr_err("HMI: out of memory, Opal message event not handled\n");
343 return -ENOMEM;
344 }
345 memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
346
347 spin_lock_irqsave(&opal_hmi_evt_lock, flags);
348 list_add(&msg_node->list, &opal_hmi_evt_list);
349 spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
350
351 schedule_work(&hmi_event_work);
352 return 0;
353 }
354
355 static struct notifier_block opal_hmi_handler_nb = {
356 .notifier_call = opal_handle_hmi_event,
357 .next = NULL,
358 .priority = 0,
359 };
360
361 int __init opal_hmi_handler_init(void)
362 {
363 int ret;
364
365 if (!opal_hmi_handler_nb_init) {
366 ret = opal_message_notifier_register(
367 OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
368 if (ret) {
369 pr_err("%s: Can't register OPAL event notifier (%d)\n",
370 __func__, ret);
371 return ret;
372 }
373 opal_hmi_handler_nb_init = 1;
374 }
375 return 0;
376 }