1 /*
2  * The file intends to implement the platform dependent EEH operations on
3  * powernv platform. Actually, the powernv was created in order to fully
4  * hypervisor support.
5  *
6  * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  */
13 
14 #include <linux/atomic.h>
15 #include <linux/debugfs.h>
16 #include <linux/delay.h>
17 #include <linux/export.h>
18 #include <linux/init.h>
19 #include <linux/list.h>
20 #include <linux/msi.h>
21 #include <linux/of.h>
22 #include <linux/pci.h>
23 #include <linux/proc_fs.h>
24 #include <linux/rbtree.h>
25 #include <linux/sched.h>
26 #include <linux/seq_file.h>
27 #include <linux/spinlock.h>
28 
29 #include <asm/eeh.h>
30 #include <asm/eeh_event.h>
31 #include <asm/firmware.h>
32 #include <asm/io.h>
33 #include <asm/iommu.h>
34 #include <asm/machdep.h>
35 #include <asm/msi_bitmap.h>
36 #include <asm/opal.h>
37 #include <asm/ppc-pci.h>
38 
39 #include "powernv.h"
40 #include "pci.h"
41 
42 static bool pnv_eeh_nb_init = false;
43 
44 /**
45  * pnv_eeh_init - EEH platform dependent initialization
46  *
47  * EEH platform dependent initialization on powernv
48  */
pnv_eeh_init(void)49 static int pnv_eeh_init(void)
50 {
51 	struct pci_controller *hose;
52 	struct pnv_phb *phb;
53 
54 	/* We require OPALv3 */
55 	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
56 		pr_warn("%s: OPALv3 is required !\n",
57 			__func__);
58 		return -EINVAL;
59 	}
60 
61 	/* Set probe mode */
62 	eeh_add_flag(EEH_PROBE_MODE_DEV);
63 
64 	/*
65 	 * P7IOC blocks PCI config access to frozen PE, but PHB3
66 	 * doesn't do that. So we have to selectively enable I/O
67 	 * prior to collecting error log.
68 	 */
69 	list_for_each_entry(hose, &hose_list, list_node) {
70 		phb = hose->private_data;
71 
72 		if (phb->model == PNV_PHB_MODEL_P7IOC)
73 			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
74 
75 		/*
76 		 * PE#0 should be regarded as valid by EEH core
77 		 * if it's not the reserved one. Currently, we
78 		 * have the reserved PE#0 and PE#127 for PHB3
79 		 * and P7IOC separately. So we should regard
80 		 * PE#0 as valid for P7IOC.
81 		 */
82 		if (phb->ioda.reserved_pe != 0)
83 			eeh_add_flag(EEH_VALID_PE_ZERO);
84 
85 		break;
86 	}
87 
88 	return 0;
89 }
90 
pnv_eeh_event(struct notifier_block * nb,unsigned long events,void * change)91 static int pnv_eeh_event(struct notifier_block *nb,
92 			 unsigned long events, void *change)
93 {
94 	uint64_t changed_evts = (uint64_t)change;
95 
96 	/*
97 	 * We simply send special EEH event if EEH has
98 	 * been enabled, or clear pending events in
99 	 * case that we enable EEH soon
100 	 */
101 	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
102 	    !(events & OPAL_EVENT_PCI_ERROR))
103 		return 0;
104 
105 	if (eeh_enabled())
106 		eeh_send_failure_event(NULL);
107 	else
108 		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
109 
110 	return 0;
111 }
112 
113 static struct notifier_block pnv_eeh_nb = {
114 	.notifier_call	= pnv_eeh_event,
115 	.next		= NULL,
116 	.priority	= 0
117 };
118 
119 #ifdef CONFIG_DEBUG_FS
pnv_eeh_ei_write(struct file * filp,const char __user * user_buf,size_t count,loff_t * ppos)120 static ssize_t pnv_eeh_ei_write(struct file *filp,
121 				const char __user *user_buf,
122 				size_t count, loff_t *ppos)
123 {
124 	struct pci_controller *hose = filp->private_data;
125 	struct eeh_dev *edev;
126 	struct eeh_pe *pe;
127 	int pe_no, type, func;
128 	unsigned long addr, mask;
129 	char buf[50];
130 	int ret;
131 
132 	if (!eeh_ops || !eeh_ops->err_inject)
133 		return -ENXIO;
134 
135 	/* Copy over argument buffer */
136 	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
137 	if (!ret)
138 		return -EFAULT;
139 
140 	/* Retrieve parameters */
141 	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
142 		     &pe_no, &type, &func, &addr, &mask);
143 	if (ret != 5)
144 		return -EINVAL;
145 
146 	/* Retrieve PE */
147 	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
148 	if (!edev)
149 		return -ENOMEM;
150 	edev->phb = hose;
151 	edev->pe_config_addr = pe_no;
152 	pe = eeh_pe_get(edev);
153 	kfree(edev);
154 	if (!pe)
155 		return -ENODEV;
156 
157 	/* Do error injection */
158 	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
159 	return ret < 0 ? ret : count;
160 }
161 
162 static const struct file_operations pnv_eeh_ei_fops = {
163 	.open	= simple_open,
164 	.llseek	= no_llseek,
165 	.write	= pnv_eeh_ei_write,
166 };
167 
pnv_eeh_dbgfs_set(void * data,int offset,u64 val)168 static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
169 {
170 	struct pci_controller *hose = data;
171 	struct pnv_phb *phb = hose->private_data;
172 
173 	out_be64(phb->regs + offset, val);
174 	return 0;
175 }
176 
pnv_eeh_dbgfs_get(void * data,int offset,u64 * val)177 static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
178 {
179 	struct pci_controller *hose = data;
180 	struct pnv_phb *phb = hose->private_data;
181 
182 	*val = in_be64(phb->regs + offset);
183 	return 0;
184 }
185 
pnv_eeh_outb_dbgfs_set(void * data,u64 val)186 static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
187 {
188 	return pnv_eeh_dbgfs_set(data, 0xD10, val);
189 }
190 
pnv_eeh_outb_dbgfs_get(void * data,u64 * val)191 static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
192 {
193 	return pnv_eeh_dbgfs_get(data, 0xD10, val);
194 }
195 
pnv_eeh_inbA_dbgfs_set(void * data,u64 val)196 static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
197 {
198 	return pnv_eeh_dbgfs_set(data, 0xD90, val);
199 }
200 
pnv_eeh_inbA_dbgfs_get(void * data,u64 * val)201 static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
202 {
203 	return pnv_eeh_dbgfs_get(data, 0xD90, val);
204 }
205 
pnv_eeh_inbB_dbgfs_set(void * data,u64 val)206 static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
207 {
208 	return pnv_eeh_dbgfs_set(data, 0xE10, val);
209 }
210 
pnv_eeh_inbB_dbgfs_get(void * data,u64 * val)211 static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
212 {
213 	return pnv_eeh_dbgfs_get(data, 0xE10, val);
214 }
215 
216 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
217 			pnv_eeh_outb_dbgfs_set, "0x%llx\n");
218 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
219 			pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
220 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
221 			pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
222 #endif /* CONFIG_DEBUG_FS */
223 
224 /**
225  * pnv_eeh_post_init - EEH platform dependent post initialization
226  *
227  * EEH platform dependent post initialization on powernv. When
228  * the function is called, the EEH PEs and devices should have
229  * been built. If the I/O cache staff has been built, EEH is
230  * ready to supply service.
231  */
pnv_eeh_post_init(void)232 static int pnv_eeh_post_init(void)
233 {
234 	struct pci_controller *hose;
235 	struct pnv_phb *phb;
236 	int ret = 0;
237 
238 	/* Register OPAL event notifier */
239 	if (!pnv_eeh_nb_init) {
240 		ret = opal_notifier_register(&pnv_eeh_nb);
241 		if (ret) {
242 			pr_warn("%s: Can't register OPAL event notifier (%d)\n",
243 				__func__, ret);
244 			return ret;
245 		}
246 
247 		pnv_eeh_nb_init = true;
248 	}
249 
250 	list_for_each_entry(hose, &hose_list, list_node) {
251 		phb = hose->private_data;
252 
253 		/*
254 		 * If EEH is enabled, we're going to rely on that.
255 		 * Otherwise, we restore to conventional mechanism
256 		 * to clear frozen PE during PCI config access.
257 		 */
258 		if (eeh_enabled())
259 			phb->flags |= PNV_PHB_FLAG_EEH;
260 		else
261 			phb->flags &= ~PNV_PHB_FLAG_EEH;
262 
263 		/* Create debugfs entries */
264 #ifdef CONFIG_DEBUG_FS
265 		if (phb->has_dbgfs || !phb->dbgfs)
266 			continue;
267 
268 		phb->has_dbgfs = 1;
269 		debugfs_create_file("err_injct", 0200,
270 				    phb->dbgfs, hose,
271 				    &pnv_eeh_ei_fops);
272 
273 		debugfs_create_file("err_injct_outbound", 0600,
274 				    phb->dbgfs, hose,
275 				    &pnv_eeh_outb_dbgfs_ops);
276 		debugfs_create_file("err_injct_inboundA", 0600,
277 				    phb->dbgfs, hose,
278 				    &pnv_eeh_inbA_dbgfs_ops);
279 		debugfs_create_file("err_injct_inboundB", 0600,
280 				    phb->dbgfs, hose,
281 				    &pnv_eeh_inbB_dbgfs_ops);
282 #endif /* CONFIG_DEBUG_FS */
283 	}
284 
285 
286 	return ret;
287 }
288 
pnv_eeh_cap_start(struct pci_dn * pdn)289 static int pnv_eeh_cap_start(struct pci_dn *pdn)
290 {
291 	u32 status;
292 
293 	if (!pdn)
294 		return 0;
295 
296 	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
297 	if (!(status & PCI_STATUS_CAP_LIST))
298 		return 0;
299 
300 	return PCI_CAPABILITY_LIST;
301 }
302 
pnv_eeh_find_cap(struct pci_dn * pdn,int cap)303 static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
304 {
305 	int pos = pnv_eeh_cap_start(pdn);
306 	int cnt = 48;   /* Maximal number of capabilities */
307 	u32 id;
308 
309 	if (!pos)
310 		return 0;
311 
312 	while (cnt--) {
313 		pnv_pci_cfg_read(pdn, pos, 1, &pos);
314 		if (pos < 0x40)
315 			break;
316 
317 		pos &= ~3;
318 		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
319 		if (id == 0xff)
320 			break;
321 
322 		/* Found */
323 		if (id == cap)
324 			return pos;
325 
326 		/* Next one */
327 		pos += PCI_CAP_LIST_NEXT;
328 	}
329 
330 	return 0;
331 }
332 
pnv_eeh_find_ecap(struct pci_dn * pdn,int cap)333 static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
334 {
335 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
336 	u32 header;
337 	int pos = 256, ttl = (4096 - 256) / 8;
338 
339 	if (!edev || !edev->pcie_cap)
340 		return 0;
341 	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
342 		return 0;
343 	else if (!header)
344 		return 0;
345 
346 	while (ttl-- > 0) {
347 		if (PCI_EXT_CAP_ID(header) == cap && pos)
348 			return pos;
349 
350 		pos = PCI_EXT_CAP_NEXT(header);
351 		if (pos < 256)
352 			break;
353 
354 		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
355 			break;
356 	}
357 
358 	return 0;
359 }
360 
361 /**
362  * pnv_eeh_probe - Do probe on PCI device
363  * @pdn: PCI device node
364  * @data: unused
365  *
366  * When EEH module is installed during system boot, all PCI devices
367  * are checked one by one to see if it supports EEH. The function
368  * is introduced for the purpose. By default, EEH has been enabled
369  * on all PCI devices. That's to say, we only need do necessary
370  * initialization on the corresponding eeh device and create PE
371  * accordingly.
372  *
373  * It's notable that's unsafe to retrieve the EEH device through
374  * the corresponding PCI device. During the PCI device hotplug, which
375  * was possiblly triggered by EEH core, the binding between EEH device
376  * and the PCI device isn't built yet.
377  */
pnv_eeh_probe(struct pci_dn * pdn,void * data)378 static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
379 {
380 	struct pci_controller *hose = pdn->phb;
381 	struct pnv_phb *phb = hose->private_data;
382 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
383 	uint32_t pcie_flags;
384 	int ret;
385 
386 	/*
387 	 * When probing the root bridge, which doesn't have any
388 	 * subordinate PCI devices. We don't have OF node for
389 	 * the root bridge. So it's not reasonable to continue
390 	 * the probing.
391 	 */
392 	if (!edev || edev->pe)
393 		return NULL;
394 
395 	/* Skip for PCI-ISA bridge */
396 	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
397 		return NULL;
398 
399 	/* Initialize eeh device */
400 	edev->class_code = pdn->class_code;
401 	edev->mode	&= 0xFFFFFF00;
402 	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
403 	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
404 	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
405 	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
406 		edev->mode |= EEH_DEV_BRIDGE;
407 		if (edev->pcie_cap) {
408 			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
409 					 2, &pcie_flags);
410 			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
411 			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
412 				edev->mode |= EEH_DEV_ROOT_PORT;
413 			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
414 				edev->mode |= EEH_DEV_DS_PORT;
415 		}
416 	}
417 
418 	edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
419 	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
420 
421 	/* Create PE */
422 	ret = eeh_add_to_parent_pe(edev);
423 	if (ret) {
424 		pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
425 			__func__, hose->global_number, pdn->busno,
426 			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
427 		return NULL;
428 	}
429 
430 	/*
431 	 * If the PE contains any one of following adapters, the
432 	 * PCI config space can't be accessed when dumping EEH log.
433 	 * Otherwise, we will run into fenced PHB caused by shortage
434 	 * of outbound credits in the adapter. The PCI config access
435 	 * should be blocked until PE reset. MMIO access is dropped
436 	 * by hardware certainly. In order to drop PCI config requests,
437 	 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
438 	 * will be checked in the backend for PE state retrival. If
439 	 * the PE becomes frozen for the first time and the flag has
440 	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
441 	 * that PE to block its config space.
442 	 *
443 	 * Broadcom Austin 4-ports NICs (14e4:1657)
444 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
445 	 */
446 	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
447 	     pdn->device_id == 0x1657) ||
448 	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
449 	     pdn->device_id == 0x168e))
450 		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
451 
452 	/*
453 	 * Cache the PE primary bus, which can't be fetched when
454 	 * full hotplug is in progress. In that case, all child
455 	 * PCI devices of the PE are expected to be removed prior
456 	 * to PE reset.
457 	 */
458 	if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
459 		edev->pe->bus = pci_find_bus(hose->global_number,
460 					     pdn->busno);
461 		if (edev->pe->bus)
462 			edev->pe->state |= EEH_PE_PRI_BUS;
463 	}
464 
465 	/*
466 	 * Enable EEH explicitly so that we will do EEH check
467 	 * while accessing I/O stuff
468 	 */
469 	eeh_add_flag(EEH_ENABLED);
470 
471 	/* Save memory bars */
472 	eeh_save_bars(edev);
473 
474 	return NULL;
475 }
476 
477 /**
478  * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
479  * @pe: EEH PE
480  * @option: operation to be issued
481  *
482  * The function is used to control the EEH functionality globally.
483  * Currently, following options are support according to PAPR:
484  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
485  */
pnv_eeh_set_option(struct eeh_pe * pe,int option)486 static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
487 {
488 	struct pci_controller *hose = pe->phb;
489 	struct pnv_phb *phb = hose->private_data;
490 	bool freeze_pe = false;
491 	int opt, ret = 0;
492 	s64 rc;
493 
494 	/* Sanity check on option */
495 	switch (option) {
496 	case EEH_OPT_DISABLE:
497 		return -EPERM;
498 	case EEH_OPT_ENABLE:
499 		return 0;
500 	case EEH_OPT_THAW_MMIO:
501 		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
502 		break;
503 	case EEH_OPT_THAW_DMA:
504 		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
505 		break;
506 	case EEH_OPT_FREEZE_PE:
507 		freeze_pe = true;
508 		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
509 		break;
510 	default:
511 		pr_warn("%s: Invalid option %d\n", __func__, option);
512 		return -EINVAL;
513 	}
514 
515 	/* If PHB supports compound PE, to handle it */
516 	if (freeze_pe) {
517 		if (phb->freeze_pe) {
518 			phb->freeze_pe(phb, pe->addr);
519 		} else {
520 			rc = opal_pci_eeh_freeze_set(phb->opal_id,
521 						     pe->addr, opt);
522 			if (rc != OPAL_SUCCESS) {
523 				pr_warn("%s: Failure %lld freezing "
524 					"PHB#%x-PE#%x\n",
525 					__func__, rc,
526 					phb->hose->global_number, pe->addr);
527 				ret = -EIO;
528 			}
529 		}
530 	} else {
531 		if (phb->unfreeze_pe) {
532 			ret = phb->unfreeze_pe(phb, pe->addr, opt);
533 		} else {
534 			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
535 						       pe->addr, opt);
536 			if (rc != OPAL_SUCCESS) {
537 				pr_warn("%s: Failure %lld enable %d "
538 					"for PHB#%x-PE#%x\n",
539 					__func__, rc, option,
540 					phb->hose->global_number, pe->addr);
541 				ret = -EIO;
542 			}
543 		}
544 	}
545 
546 	return ret;
547 }
548 
549 /**
550  * pnv_eeh_get_pe_addr - Retrieve PE address
551  * @pe: EEH PE
552  *
553  * Retrieve the PE address according to the given tranditional
554  * PCI BDF (Bus/Device/Function) address.
555  */
pnv_eeh_get_pe_addr(struct eeh_pe * pe)556 static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
557 {
558 	return pe->addr;
559 }
560 
pnv_eeh_get_phb_diag(struct eeh_pe * pe)561 static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
562 {
563 	struct pnv_phb *phb = pe->phb->private_data;
564 	s64 rc;
565 
566 	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
567 					 PNV_PCI_DIAG_BUF_SIZE);
568 	if (rc != OPAL_SUCCESS)
569 		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
570 			__func__, rc, pe->phb->global_number);
571 }
572 
pnv_eeh_get_phb_state(struct eeh_pe * pe)573 static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
574 {
575 	struct pnv_phb *phb = pe->phb->private_data;
576 	u8 fstate;
577 	__be16 pcierr;
578 	s64 rc;
579 	int result = 0;
580 
581 	rc = opal_pci_eeh_freeze_status(phb->opal_id,
582 					pe->addr,
583 					&fstate,
584 					&pcierr,
585 					NULL);
586 	if (rc != OPAL_SUCCESS) {
587 		pr_warn("%s: Failure %lld getting PHB#%x state\n",
588 			__func__, rc, phb->hose->global_number);
589 		return EEH_STATE_NOT_SUPPORT;
590 	}
591 
592 	/*
593 	 * Check PHB state. If the PHB is frozen for the
594 	 * first time, to dump the PHB diag-data.
595 	 */
596 	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
597 		result = (EEH_STATE_MMIO_ACTIVE  |
598 			  EEH_STATE_DMA_ACTIVE   |
599 			  EEH_STATE_MMIO_ENABLED |
600 			  EEH_STATE_DMA_ENABLED);
601 	} else if (!(pe->state & EEH_PE_ISOLATED)) {
602 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
603 		pnv_eeh_get_phb_diag(pe);
604 
605 		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
606 			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
607 	}
608 
609 	return result;
610 }
611 
pnv_eeh_get_pe_state(struct eeh_pe * pe)612 static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
613 {
614 	struct pnv_phb *phb = pe->phb->private_data;
615 	u8 fstate;
616 	__be16 pcierr;
617 	s64 rc;
618 	int result;
619 
620 	/*
621 	 * We don't clobber hardware frozen state until PE
622 	 * reset is completed. In order to keep EEH core
623 	 * moving forward, we have to return operational
624 	 * state during PE reset.
625 	 */
626 	if (pe->state & EEH_PE_RESET) {
627 		result = (EEH_STATE_MMIO_ACTIVE  |
628 			  EEH_STATE_DMA_ACTIVE   |
629 			  EEH_STATE_MMIO_ENABLED |
630 			  EEH_STATE_DMA_ENABLED);
631 		return result;
632 	}
633 
634 	/*
635 	 * Fetch PE state from hardware. If the PHB
636 	 * supports compound PE, let it handle that.
637 	 */
638 	if (phb->get_pe_state) {
639 		fstate = phb->get_pe_state(phb, pe->addr);
640 	} else {
641 		rc = opal_pci_eeh_freeze_status(phb->opal_id,
642 						pe->addr,
643 						&fstate,
644 						&pcierr,
645 						NULL);
646 		if (rc != OPAL_SUCCESS) {
647 			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
648 				__func__, rc, phb->hose->global_number,
649 				pe->addr);
650 			return EEH_STATE_NOT_SUPPORT;
651 		}
652 	}
653 
654 	/* Figure out state */
655 	switch (fstate) {
656 	case OPAL_EEH_STOPPED_NOT_FROZEN:
657 		result = (EEH_STATE_MMIO_ACTIVE  |
658 			  EEH_STATE_DMA_ACTIVE   |
659 			  EEH_STATE_MMIO_ENABLED |
660 			  EEH_STATE_DMA_ENABLED);
661 		break;
662 	case OPAL_EEH_STOPPED_MMIO_FREEZE:
663 		result = (EEH_STATE_DMA_ACTIVE |
664 			  EEH_STATE_DMA_ENABLED);
665 		break;
666 	case OPAL_EEH_STOPPED_DMA_FREEZE:
667 		result = (EEH_STATE_MMIO_ACTIVE |
668 			  EEH_STATE_MMIO_ENABLED);
669 		break;
670 	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
671 		result = 0;
672 		break;
673 	case OPAL_EEH_STOPPED_RESET:
674 		result = EEH_STATE_RESET_ACTIVE;
675 		break;
676 	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
677 		result = EEH_STATE_UNAVAILABLE;
678 		break;
679 	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
680 		result = EEH_STATE_NOT_SUPPORT;
681 		break;
682 	default:
683 		result = EEH_STATE_NOT_SUPPORT;
684 		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
685 			__func__, phb->hose->global_number,
686 			pe->addr, fstate);
687 	}
688 
689 	/*
690 	 * If PHB supports compound PE, to freeze all
691 	 * slave PEs for consistency.
692 	 *
693 	 * If the PE is switching to frozen state for the
694 	 * first time, to dump the PHB diag-data.
695 	 */
696 	if (!(result & EEH_STATE_NOT_SUPPORT) &&
697 	    !(result & EEH_STATE_UNAVAILABLE) &&
698 	    !(result & EEH_STATE_MMIO_ACTIVE) &&
699 	    !(result & EEH_STATE_DMA_ACTIVE)  &&
700 	    !(pe->state & EEH_PE_ISOLATED)) {
701 		if (phb->freeze_pe)
702 			phb->freeze_pe(phb, pe->addr);
703 
704 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
705 		pnv_eeh_get_phb_diag(pe);
706 
707 		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
708 			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
709 	}
710 
711 	return result;
712 }
713 
714 /**
715  * pnv_eeh_get_state - Retrieve PE state
716  * @pe: EEH PE
717  * @delay: delay while PE state is temporarily unavailable
718  *
719  * Retrieve the state of the specified PE. For IODA-compitable
720  * platform, it should be retrieved from IODA table. Therefore,
721  * we prefer passing down to hardware implementation to handle
722  * it.
723  */
pnv_eeh_get_state(struct eeh_pe * pe,int * delay)724 static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
725 {
726 	int ret;
727 
728 	if (pe->type & EEH_PE_PHB)
729 		ret = pnv_eeh_get_phb_state(pe);
730 	else
731 		ret = pnv_eeh_get_pe_state(pe);
732 
733 	if (!delay)
734 		return ret;
735 
736 	/*
737 	 * If the PE state is temporarily unavailable,
738 	 * to inform the EEH core delay for default
739 	 * period (1 second)
740 	 */
741 	*delay = 0;
742 	if (ret & EEH_STATE_UNAVAILABLE)
743 		*delay = 1000;
744 
745 	return ret;
746 }
747 
pnv_eeh_phb_poll(struct pnv_phb * phb)748 static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
749 {
750 	s64 rc = OPAL_HARDWARE;
751 
752 	while (1) {
753 		rc = opal_pci_poll(phb->opal_id);
754 		if (rc <= 0)
755 			break;
756 
757 		if (system_state < SYSTEM_RUNNING)
758 			udelay(1000 * rc);
759 		else
760 			msleep(rc);
761 	}
762 
763 	return rc;
764 }
765 
pnv_eeh_phb_reset(struct pci_controller * hose,int option)766 int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
767 {
768 	struct pnv_phb *phb = hose->private_data;
769 	s64 rc = OPAL_HARDWARE;
770 
771 	pr_debug("%s: Reset PHB#%x, option=%d\n",
772 		 __func__, hose->global_number, option);
773 
774 	/* Issue PHB complete reset request */
775 	if (option == EEH_RESET_FUNDAMENTAL ||
776 	    option == EEH_RESET_HOT)
777 		rc = opal_pci_reset(phb->opal_id,
778 				    OPAL_RESET_PHB_COMPLETE,
779 				    OPAL_ASSERT_RESET);
780 	else if (option == EEH_RESET_DEACTIVATE)
781 		rc = opal_pci_reset(phb->opal_id,
782 				    OPAL_RESET_PHB_COMPLETE,
783 				    OPAL_DEASSERT_RESET);
784 	if (rc < 0)
785 		goto out;
786 
787 	/*
788 	 * Poll state of the PHB until the request is done
789 	 * successfully. The PHB reset is usually PHB complete
790 	 * reset followed by hot reset on root bus. So we also
791 	 * need the PCI bus settlement delay.
792 	 */
793 	rc = pnv_eeh_phb_poll(phb);
794 	if (option == EEH_RESET_DEACTIVATE) {
795 		if (system_state < SYSTEM_RUNNING)
796 			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
797 		else
798 			msleep(EEH_PE_RST_SETTLE_TIME);
799 	}
800 out:
801 	if (rc != OPAL_SUCCESS)
802 		return -EIO;
803 
804 	return 0;
805 }
806 
pnv_eeh_root_reset(struct pci_controller * hose,int option)807 static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
808 {
809 	struct pnv_phb *phb = hose->private_data;
810 	s64 rc = OPAL_HARDWARE;
811 
812 	pr_debug("%s: Reset PHB#%x, option=%d\n",
813 		 __func__, hose->global_number, option);
814 
815 	/*
816 	 * During the reset deassert time, we needn't care
817 	 * the reset scope because the firmware does nothing
818 	 * for fundamental or hot reset during deassert phase.
819 	 */
820 	if (option == EEH_RESET_FUNDAMENTAL)
821 		rc = opal_pci_reset(phb->opal_id,
822 				    OPAL_RESET_PCI_FUNDAMENTAL,
823 				    OPAL_ASSERT_RESET);
824 	else if (option == EEH_RESET_HOT)
825 		rc = opal_pci_reset(phb->opal_id,
826 				    OPAL_RESET_PCI_HOT,
827 				    OPAL_ASSERT_RESET);
828 	else if (option == EEH_RESET_DEACTIVATE)
829 		rc = opal_pci_reset(phb->opal_id,
830 				    OPAL_RESET_PCI_HOT,
831 				    OPAL_DEASSERT_RESET);
832 	if (rc < 0)
833 		goto out;
834 
835 	/* Poll state of the PHB until the request is done */
836 	rc = pnv_eeh_phb_poll(phb);
837 	if (option == EEH_RESET_DEACTIVATE)
838 		msleep(EEH_PE_RST_SETTLE_TIME);
839 out:
840 	if (rc != OPAL_SUCCESS)
841 		return -EIO;
842 
843 	return 0;
844 }
845 
pnv_eeh_bridge_reset(struct pci_dev * dev,int option)846 static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
847 {
848 	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
849 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
850 	int aer = edev ? edev->aer_cap : 0;
851 	u32 ctrl;
852 
853 	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
854 		 __func__, pci_domain_nr(dev->bus),
855 		 dev->bus->number, option);
856 
857 	switch (option) {
858 	case EEH_RESET_FUNDAMENTAL:
859 	case EEH_RESET_HOT:
860 		/* Don't report linkDown event */
861 		if (aer) {
862 			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
863 					     4, &ctrl);
864 			ctrl |= PCI_ERR_UNC_SURPDN;
865 			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
866 					      4, ctrl);
867 		}
868 
869 		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
870 		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
871 		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
872 
873 		msleep(EEH_PE_RST_HOLD_TIME);
874 		break;
875 	case EEH_RESET_DEACTIVATE:
876 		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
877 		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
878 		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
879 
880 		msleep(EEH_PE_RST_SETTLE_TIME);
881 
882 		/* Continue reporting linkDown event */
883 		if (aer) {
884 			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
885 					     4, &ctrl);
886 			ctrl &= ~PCI_ERR_UNC_SURPDN;
887 			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
888 					      4, ctrl);
889 		}
890 
891 		break;
892 	}
893 
894 	return 0;
895 }
896 
pnv_pci_reset_secondary_bus(struct pci_dev * dev)897 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
898 {
899 	struct pci_controller *hose;
900 
901 	if (pci_is_root_bus(dev->bus)) {
902 		hose = pci_bus_to_host(dev->bus);
903 		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
904 		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
905 	} else {
906 		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
907 		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
908 	}
909 }
910 
911 /**
912  * pnv_eeh_reset - Reset the specified PE
913  * @pe: EEH PE
914  * @option: reset option
915  *
916  * Do reset on the indicated PE. For PCI bus sensitive PE,
917  * we need to reset the parent p2p bridge. The PHB has to
918  * be reinitialized if the p2p bridge is root bridge. For
919  * PCI device sensitive PE, we will try to reset the device
920  * through FLR. For now, we don't have OPAL APIs to do HARD
921  * reset yet, so all reset would be SOFT (HOT) reset.
922  */
pnv_eeh_reset(struct eeh_pe * pe,int option)923 static int pnv_eeh_reset(struct eeh_pe *pe, int option)
924 {
925 	struct pci_controller *hose = pe->phb;
926 	struct pci_bus *bus;
927 	int ret;
928 
929 	/*
930 	 * For PHB reset, we always have complete reset. For those PEs whose
931 	 * primary bus derived from root complex (root bus) or root port
932 	 * (usually bus#1), we apply hot or fundamental reset on the root port.
933 	 * For other PEs, we always have hot reset on the PE primary bus.
934 	 *
935 	 * Here, we have different design to pHyp, which always clear the
936 	 * frozen state during PE reset. However, the good idea here from
937 	 * benh is to keep frozen state before we get PE reset done completely
938 	 * (until BAR restore). With the frozen state, HW drops illegal IO
939 	 * or MMIO access, which can incur recrusive frozen PE during PE
940 	 * reset. The side effect is that EEH core has to clear the frozen
941 	 * state explicitly after BAR restore.
942 	 */
943 	if (pe->type & EEH_PE_PHB) {
944 		ret = pnv_eeh_phb_reset(hose, option);
945 	} else {
946 		struct pnv_phb *phb;
947 		s64 rc;
948 
949 		/*
950 		 * The frozen PE might be caused by PAPR error injection
951 		 * registers, which are expected to be cleared after hitting
952 		 * frozen PE as stated in the hardware spec. Unfortunately,
953 		 * that's not true on P7IOC. So we have to clear it manually
954 		 * to avoid recursive EEH errors during recovery.
955 		 */
956 		phb = hose->private_data;
957 		if (phb->model == PNV_PHB_MODEL_P7IOC &&
958 		    (option == EEH_RESET_HOT ||
959 		    option == EEH_RESET_FUNDAMENTAL)) {
960 			rc = opal_pci_reset(phb->opal_id,
961 					    OPAL_RESET_PHB_ERROR,
962 					    OPAL_ASSERT_RESET);
963 			if (rc != OPAL_SUCCESS) {
964 				pr_warn("%s: Failure %lld clearing "
965 					"error injection registers\n",
966 					__func__, rc);
967 				return -EIO;
968 			}
969 		}
970 
971 		bus = eeh_pe_bus_get(pe);
972 		if (pci_is_root_bus(bus) ||
973 			pci_is_root_bus(bus->parent))
974 			ret = pnv_eeh_root_reset(hose, option);
975 		else
976 			ret = pnv_eeh_bridge_reset(bus->self, option);
977 	}
978 
979 	return ret;
980 }
981 
982 /**
983  * pnv_eeh_wait_state - Wait for PE state
984  * @pe: EEH PE
985  * @max_wait: maximal period in microsecond
986  *
987  * Wait for the state of associated PE. It might take some time
988  * to retrieve the PE's state.
989  */
pnv_eeh_wait_state(struct eeh_pe * pe,int max_wait)990 static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
991 {
992 	int ret;
993 	int mwait;
994 
995 	while (1) {
996 		ret = pnv_eeh_get_state(pe, &mwait);
997 
998 		/*
999 		 * If the PE's state is temporarily unavailable,
1000 		 * we have to wait for the specified time. Otherwise,
1001 		 * the PE's state will be returned immediately.
1002 		 */
1003 		if (ret != EEH_STATE_UNAVAILABLE)
1004 			return ret;
1005 
1006 		max_wait -= mwait;
1007 		if (max_wait <= 0) {
1008 			pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
1009 				__func__, pe->addr, max_wait);
1010 			return EEH_STATE_NOT_SUPPORT;
1011 		}
1012 
1013 		msleep(mwait);
1014 	}
1015 
1016 	return EEH_STATE_NOT_SUPPORT;
1017 }
1018 
1019 /**
1020  * pnv_eeh_get_log - Retrieve error log
1021  * @pe: EEH PE
1022  * @severity: temporary or permanent error log
1023  * @drv_log: driver log to be combined with retrieved error log
1024  * @len: length of driver log
1025  *
1026  * Retrieve the temporary or permanent error from the PE.
1027  */
pnv_eeh_get_log(struct eeh_pe * pe,int severity,char * drv_log,unsigned long len)1028 static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
1029 			   char *drv_log, unsigned long len)
1030 {
1031 	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
1032 		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
1033 
1034 	return 0;
1035 }
1036 
1037 /**
1038  * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
1039  * @pe: EEH PE
1040  *
1041  * The function will be called to reconfigure the bridges included
1042  * in the specified PE so that the mulfunctional PE would be recovered
1043  * again.
1044  */
pnv_eeh_configure_bridge(struct eeh_pe * pe)1045 static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
1046 {
1047 	return 0;
1048 }
1049 
1050 /**
1051  * pnv_pe_err_inject - Inject specified error to the indicated PE
1052  * @pe: the indicated PE
1053  * @type: error type
1054  * @func: specific error type
1055  * @addr: address
1056  * @mask: address mask
1057  *
1058  * The routine is called to inject specified error, which is
1059  * determined by @type and @func, to the indicated PE for
1060  * testing purpose.
1061  */
pnv_eeh_err_inject(struct eeh_pe * pe,int type,int func,unsigned long addr,unsigned long mask)1062 static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
1063 			      unsigned long addr, unsigned long mask)
1064 {
1065 	struct pci_controller *hose = pe->phb;
1066 	struct pnv_phb *phb = hose->private_data;
1067 	s64 rc;
1068 
1069 	/* Sanity check on error type */
1070 	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
1071 	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
1072 		pr_warn("%s: Invalid error type %d\n",
1073 			__func__, type);
1074 		return -ERANGE;
1075 	}
1076 
1077 	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
1078 	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
1079 		pr_warn("%s: Invalid error function %d\n",
1080 			__func__, func);
1081 		return -ERANGE;
1082 	}
1083 
1084 	/* Firmware supports error injection ? */
1085 	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
1086 		pr_warn("%s: Firmware doesn't support error injection\n",
1087 			__func__);
1088 		return -ENXIO;
1089 	}
1090 
1091 	/* Do error injection */
1092 	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
1093 				 type, func, addr, mask);
1094 	if (rc != OPAL_SUCCESS) {
1095 		pr_warn("%s: Failure %lld injecting error "
1096 			"%d-%d to PHB#%x-PE#%x\n",
1097 			__func__, rc, type, func,
1098 			hose->global_number, pe->addr);
1099 		return -EIO;
1100 	}
1101 
1102 	return 0;
1103 }
1104 
pnv_eeh_cfg_blocked(struct pci_dn * pdn)1105 static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
1106 {
1107 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1108 
1109 	if (!edev || !edev->pe)
1110 		return false;
1111 
1112 	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
1113 		return true;
1114 
1115 	return false;
1116 }
1117 
pnv_eeh_read_config(struct pci_dn * pdn,int where,int size,u32 * val)1118 static int pnv_eeh_read_config(struct pci_dn *pdn,
1119 			       int where, int size, u32 *val)
1120 {
1121 	if (!pdn)
1122 		return PCIBIOS_DEVICE_NOT_FOUND;
1123 
1124 	if (pnv_eeh_cfg_blocked(pdn)) {
1125 		*val = 0xFFFFFFFF;
1126 		return PCIBIOS_SET_FAILED;
1127 	}
1128 
1129 	return pnv_pci_cfg_read(pdn, where, size, val);
1130 }
1131 
pnv_eeh_write_config(struct pci_dn * pdn,int where,int size,u32 val)1132 static int pnv_eeh_write_config(struct pci_dn *pdn,
1133 				int where, int size, u32 val)
1134 {
1135 	if (!pdn)
1136 		return PCIBIOS_DEVICE_NOT_FOUND;
1137 
1138 	if (pnv_eeh_cfg_blocked(pdn))
1139 		return PCIBIOS_SET_FAILED;
1140 
1141 	return pnv_pci_cfg_write(pdn, where, size, val);
1142 }
1143 
pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData * data)1144 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
1145 {
1146 	/* GEM */
1147 	if (data->gemXfir || data->gemRfir ||
1148 	    data->gemRirqfir || data->gemMask || data->gemRwof)
1149 		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
1150 			be64_to_cpu(data->gemXfir),
1151 			be64_to_cpu(data->gemRfir),
1152 			be64_to_cpu(data->gemRirqfir),
1153 			be64_to_cpu(data->gemMask),
1154 			be64_to_cpu(data->gemRwof));
1155 
1156 	/* LEM */
1157 	if (data->lemFir || data->lemErrMask ||
1158 	    data->lemAction0 || data->lemAction1 || data->lemWof)
1159 		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
1160 			be64_to_cpu(data->lemFir),
1161 			be64_to_cpu(data->lemErrMask),
1162 			be64_to_cpu(data->lemAction0),
1163 			be64_to_cpu(data->lemAction1),
1164 			be64_to_cpu(data->lemWof));
1165 }
1166 
pnv_eeh_get_and_dump_hub_diag(struct pci_controller * hose)1167 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
1168 {
1169 	struct pnv_phb *phb = hose->private_data;
1170 	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
1171 	long rc;
1172 
1173 	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
1174 	if (rc != OPAL_SUCCESS) {
1175 		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
1176 			__func__, phb->hub_id, rc);
1177 		return;
1178 	}
1179 
1180 	switch (data->type) {
1181 	case OPAL_P7IOC_DIAG_TYPE_RGC:
1182 		pr_info("P7IOC diag-data for RGC\n\n");
1183 		pnv_eeh_dump_hub_diag_common(data);
1184 		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
1185 			pr_info("  RGC: %016llx %016llx\n",
1186 				be64_to_cpu(data->rgc.rgcStatus),
1187 				be64_to_cpu(data->rgc.rgcLdcp));
1188 		break;
1189 	case OPAL_P7IOC_DIAG_TYPE_BI:
1190 		pr_info("P7IOC diag-data for BI %s\n\n",
1191 			data->bi.biDownbound ? "Downbound" : "Upbound");
1192 		pnv_eeh_dump_hub_diag_common(data);
1193 		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
1194 		    data->bi.biLdcp2 || data->bi.biFenceStatus)
1195 			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
1196 				be64_to_cpu(data->bi.biLdcp0),
1197 				be64_to_cpu(data->bi.biLdcp1),
1198 				be64_to_cpu(data->bi.biLdcp2),
1199 				be64_to_cpu(data->bi.biFenceStatus));
1200 		break;
1201 	case OPAL_P7IOC_DIAG_TYPE_CI:
1202 		pr_info("P7IOC diag-data for CI Port %d\n\n",
1203 			data->ci.ciPort);
1204 		pnv_eeh_dump_hub_diag_common(data);
1205 		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
1206 			pr_info("  CI:  %016llx %016llx\n",
1207 				be64_to_cpu(data->ci.ciPortStatus),
1208 				be64_to_cpu(data->ci.ciPortLdcp));
1209 		break;
1210 	case OPAL_P7IOC_DIAG_TYPE_MISC:
1211 		pr_info("P7IOC diag-data for MISC\n\n");
1212 		pnv_eeh_dump_hub_diag_common(data);
1213 		break;
1214 	case OPAL_P7IOC_DIAG_TYPE_I2C:
1215 		pr_info("P7IOC diag-data for I2C\n\n");
1216 		pnv_eeh_dump_hub_diag_common(data);
1217 		break;
1218 	default:
1219 		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
1220 			__func__, phb->hub_id, data->type);
1221 	}
1222 }
1223 
pnv_eeh_get_pe(struct pci_controller * hose,u16 pe_no,struct eeh_pe ** pe)1224 static int pnv_eeh_get_pe(struct pci_controller *hose,
1225 			  u16 pe_no, struct eeh_pe **pe)
1226 {
1227 	struct pnv_phb *phb = hose->private_data;
1228 	struct pnv_ioda_pe *pnv_pe;
1229 	struct eeh_pe *dev_pe;
1230 	struct eeh_dev edev;
1231 
1232 	/*
1233 	 * If PHB supports compound PE, to fetch
1234 	 * the master PE because slave PE is invisible
1235 	 * to EEH core.
1236 	 */
1237 	pnv_pe = &phb->ioda.pe_array[pe_no];
1238 	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
1239 		pnv_pe = pnv_pe->master;
1240 		WARN_ON(!pnv_pe ||
1241 			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
1242 		pe_no = pnv_pe->pe_number;
1243 	}
1244 
1245 	/* Find the PE according to PE# */
1246 	memset(&edev, 0, sizeof(struct eeh_dev));
1247 	edev.phb = hose;
1248 	edev.pe_config_addr = pe_no;
1249 	dev_pe = eeh_pe_get(&edev);
1250 	if (!dev_pe)
1251 		return -EEXIST;
1252 
1253 	/* Freeze the (compound) PE */
1254 	*pe = dev_pe;
1255 	if (!(dev_pe->state & EEH_PE_ISOLATED))
1256 		phb->freeze_pe(phb, pe_no);
1257 
1258 	/*
1259 	 * At this point, we're sure the (compound) PE should
1260 	 * have been frozen. However, we still need poke until
1261 	 * hitting the frozen PE on top level.
1262 	 */
1263 	dev_pe = dev_pe->parent;
1264 	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
1265 		int ret;
1266 		int active_flags = (EEH_STATE_MMIO_ACTIVE |
1267 				    EEH_STATE_DMA_ACTIVE);
1268 
1269 		ret = eeh_ops->get_state(dev_pe, NULL);
1270 		if (ret <= 0 || (ret & active_flags) == active_flags) {
1271 			dev_pe = dev_pe->parent;
1272 			continue;
1273 		}
1274 
1275 		/* Frozen parent PE */
1276 		*pe = dev_pe;
1277 		if (!(dev_pe->state & EEH_PE_ISOLATED))
1278 			phb->freeze_pe(phb, dev_pe->addr);
1279 
1280 		/* Next one */
1281 		dev_pe = dev_pe->parent;
1282 	}
1283 
1284 	return 0;
1285 }
1286 
1287 /**
1288  * pnv_eeh_next_error - Retrieve next EEH error to handle
1289  * @pe: Affected PE
1290  *
1291  * The function is expected to be called by EEH core while it gets
1292  * special EEH event (without binding PE). The function calls to
1293  * OPAL APIs for next error to handle. The informational error is
1294  * handled internally by platform. However, the dead IOC, dead PHB,
1295  * fenced PHB and frozen PE should be handled by EEH core eventually.
1296  */
pnv_eeh_next_error(struct eeh_pe ** pe)1297 static int pnv_eeh_next_error(struct eeh_pe **pe)
1298 {
1299 	struct pci_controller *hose;
1300 	struct pnv_phb *phb;
1301 	struct eeh_pe *phb_pe, *parent_pe;
1302 	__be64 frozen_pe_no;
1303 	__be16 err_type, severity;
1304 	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1305 	long rc;
1306 	int state, ret = EEH_NEXT_ERR_NONE;
1307 
1308 	/*
1309 	 * While running here, it's safe to purge the event queue.
1310 	 * And we should keep the cached OPAL notifier event sychronized
1311 	 * between the kernel and firmware.
1312 	 */
1313 	eeh_remove_event(NULL, false);
1314 	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
1315 
1316 	list_for_each_entry(hose, &hose_list, list_node) {
1317 		/*
1318 		 * If the subordinate PCI buses of the PHB has been
1319 		 * removed or is exactly under error recovery, we
1320 		 * needn't take care of it any more.
1321 		 */
1322 		phb = hose->private_data;
1323 		phb_pe = eeh_phb_pe_get(hose);
1324 		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
1325 			continue;
1326 
1327 		rc = opal_pci_next_error(phb->opal_id,
1328 					 &frozen_pe_no, &err_type, &severity);
1329 		if (rc != OPAL_SUCCESS) {
1330 			pr_devel("%s: Invalid return value on "
1331 				 "PHB#%x (0x%lx) from opal_pci_next_error",
1332 				 __func__, hose->global_number, rc);
1333 			continue;
1334 		}
1335 
1336 		/* If the PHB doesn't have error, stop processing */
1337 		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
1338 		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
1339 			pr_devel("%s: No error found on PHB#%x\n",
1340 				 __func__, hose->global_number);
1341 			continue;
1342 		}
1343 
1344 		/*
1345 		 * Processing the error. We're expecting the error with
1346 		 * highest priority reported upon multiple errors on the
1347 		 * specific PHB.
1348 		 */
1349 		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1350 			__func__, be16_to_cpu(err_type),
1351 			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
1352 			hose->global_number);
1353 		switch (be16_to_cpu(err_type)) {
1354 		case OPAL_EEH_IOC_ERROR:
1355 			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
1356 				pr_err("EEH: dead IOC detected\n");
1357 				ret = EEH_NEXT_ERR_DEAD_IOC;
1358 			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1359 				pr_info("EEH: IOC informative error "
1360 					"detected\n");
1361 				pnv_eeh_get_and_dump_hub_diag(hose);
1362 				ret = EEH_NEXT_ERR_NONE;
1363 			}
1364 
1365 			break;
1366 		case OPAL_EEH_PHB_ERROR:
1367 			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
1368 				*pe = phb_pe;
1369 				pr_err("EEH: dead PHB#%x detected, "
1370 				       "location: %s\n",
1371 					hose->global_number,
1372 					eeh_pe_loc_get(phb_pe));
1373 				ret = EEH_NEXT_ERR_DEAD_PHB;
1374 			} else if (be16_to_cpu(severity) ==
1375 				   OPAL_EEH_SEV_PHB_FENCED) {
1376 				*pe = phb_pe;
1377 				pr_err("EEH: Fenced PHB#%x detected, "
1378 				       "location: %s\n",
1379 					hose->global_number,
1380 					eeh_pe_loc_get(phb_pe));
1381 				ret = EEH_NEXT_ERR_FENCED_PHB;
1382 			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1383 				pr_info("EEH: PHB#%x informative error "
1384 					"detected, location: %s\n",
1385 					hose->global_number,
1386 					eeh_pe_loc_get(phb_pe));
1387 				pnv_eeh_get_phb_diag(phb_pe);
1388 				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
1389 				ret = EEH_NEXT_ERR_NONE;
1390 			}
1391 
1392 			break;
1393 		case OPAL_EEH_PE_ERROR:
1394 			/*
1395 			 * If we can't find the corresponding PE, we
1396 			 * just try to unfreeze.
1397 			 */
1398 			if (pnv_eeh_get_pe(hose,
1399 				be64_to_cpu(frozen_pe_no), pe)) {
1400 				/* Try best to clear it */
1401 				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1402 					hose->global_number, frozen_pe_no);
1403 				pr_info("EEH: PHB location: %s\n",
1404 					eeh_pe_loc_get(phb_pe));
1405 				opal_pci_eeh_freeze_clear(phb->opal_id,
1406 					frozen_pe_no,
1407 					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
1408 				ret = EEH_NEXT_ERR_NONE;
1409 			} else if ((*pe)->state & EEH_PE_ISOLATED ||
1410 				   eeh_pe_passed(*pe)) {
1411 				ret = EEH_NEXT_ERR_NONE;
1412 			} else {
1413 				pr_err("EEH: Frozen PE#%x "
1414 				       "on PHB#%x detected\n",
1415 				       (*pe)->addr,
1416 					(*pe)->phb->global_number);
1417 				pr_err("EEH: PE location: %s, "
1418 				       "PHB location: %s\n",
1419 				       eeh_pe_loc_get(*pe),
1420 				       eeh_pe_loc_get(phb_pe));
1421 				ret = EEH_NEXT_ERR_FROZEN_PE;
1422 			}
1423 
1424 			break;
1425 		default:
1426 			pr_warn("%s: Unexpected error type %d\n",
1427 				__func__, be16_to_cpu(err_type));
1428 		}
1429 
1430 		/*
1431 		 * EEH core will try recover from fenced PHB or
1432 		 * frozen PE. In the time for frozen PE, EEH core
1433 		 * enable IO path for that before collecting logs,
1434 		 * but it ruins the site. So we have to dump the
1435 		 * log in advance here.
1436 		 */
1437 		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
1438 		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
1439 		    !((*pe)->state & EEH_PE_ISOLATED)) {
1440 			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1441 			pnv_eeh_get_phb_diag(*pe);
1442 
1443 			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
1444 				pnv_pci_dump_phb_diag_data((*pe)->phb,
1445 							   (*pe)->data);
1446 		}
1447 
1448 		/*
1449 		 * We probably have the frozen parent PE out there and
1450 		 * we need have to handle frozen parent PE firstly.
1451 		 */
1452 		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
1453 			parent_pe = (*pe)->parent;
1454 			while (parent_pe) {
1455 				/* Hit the ceiling ? */
1456 				if (parent_pe->type & EEH_PE_PHB)
1457 					break;
1458 
1459 				/* Frozen parent PE ? */
1460 				state = eeh_ops->get_state(parent_pe, NULL);
1461 				if (state > 0 &&
1462 				    (state & active_flags) != active_flags)
1463 					*pe = parent_pe;
1464 
1465 				/* Next parent level */
1466 				parent_pe = parent_pe->parent;
1467 			}
1468 
1469 			/* We possibly migrate to another PE */
1470 			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1471 		}
1472 
1473 		/*
1474 		 * If we have no errors on the specific PHB or only
1475 		 * informative error there, we continue poking it.
1476 		 * Otherwise, we need actions to be taken by upper
1477 		 * layer.
1478 		 */
1479 		if (ret > EEH_NEXT_ERR_INF)
1480 			break;
1481 	}
1482 
1483 	return ret;
1484 }
1485 
pnv_eeh_restore_config(struct pci_dn * pdn)1486 static int pnv_eeh_restore_config(struct pci_dn *pdn)
1487 {
1488 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1489 	struct pnv_phb *phb;
1490 	s64 ret;
1491 
1492 	if (!edev)
1493 		return -EEXIST;
1494 
1495 	phb = edev->phb->private_data;
1496 	ret = opal_pci_reinit(phb->opal_id,
1497 			      OPAL_REINIT_PCI_DEV, edev->config_addr);
1498 	if (ret) {
1499 		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
1500 			__func__, edev->config_addr, ret);
1501 		return -EIO;
1502 	}
1503 
1504 	return 0;
1505 }
1506 
1507 static struct eeh_ops pnv_eeh_ops = {
1508 	.name                   = "powernv",
1509 	.init                   = pnv_eeh_init,
1510 	.post_init              = pnv_eeh_post_init,
1511 	.probe			= pnv_eeh_probe,
1512 	.set_option             = pnv_eeh_set_option,
1513 	.get_pe_addr            = pnv_eeh_get_pe_addr,
1514 	.get_state              = pnv_eeh_get_state,
1515 	.reset                  = pnv_eeh_reset,
1516 	.wait_state             = pnv_eeh_wait_state,
1517 	.get_log                = pnv_eeh_get_log,
1518 	.configure_bridge       = pnv_eeh_configure_bridge,
1519 	.err_inject		= pnv_eeh_err_inject,
1520 	.read_config            = pnv_eeh_read_config,
1521 	.write_config           = pnv_eeh_write_config,
1522 	.next_error		= pnv_eeh_next_error,
1523 	.restore_config		= pnv_eeh_restore_config
1524 };
1525 
1526 /**
1527  * eeh_powernv_init - Register platform dependent EEH operations
1528  *
1529  * EEH initialization on powernv platform. This function should be
1530  * called before any EEH related functions.
1531  */
eeh_powernv_init(void)1532 static int __init eeh_powernv_init(void)
1533 {
1534 	int ret = -EINVAL;
1535 
1536 	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
1537 	ret = eeh_ops_register(&pnv_eeh_ops);
1538 	if (!ret)
1539 		pr_info("EEH: PowerNV platform initialized\n");
1540 	else
1541 		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
1542 
1543 	return ret;
1544 }
1545 machine_early_initcall(powernv, eeh_powernv_init);
1546