1/*
2 * The file intends to implement the platform dependent EEH operations on
3 * powernv platform. Actually, the powernv was created in order to fully
4 * hypervisor support.
5 *
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/atomic.h>
15#include <linux/debugfs.h>
16#include <linux/delay.h>
17#include <linux/export.h>
18#include <linux/init.h>
19#include <linux/list.h>
20#include <linux/msi.h>
21#include <linux/of.h>
22#include <linux/pci.h>
23#include <linux/proc_fs.h>
24#include <linux/rbtree.h>
25#include <linux/sched.h>
26#include <linux/seq_file.h>
27#include <linux/spinlock.h>
28
29#include <asm/eeh.h>
30#include <asm/eeh_event.h>
31#include <asm/firmware.h>
32#include <asm/io.h>
33#include <asm/iommu.h>
34#include <asm/machdep.h>
35#include <asm/msi_bitmap.h>
36#include <asm/opal.h>
37#include <asm/ppc-pci.h>
38
39#include "powernv.h"
40#include "pci.h"
41
42static bool pnv_eeh_nb_init = false;
43
44/**
45 * pnv_eeh_init - EEH platform dependent initialization
46 *
47 * EEH platform dependent initialization on powernv
48 */
49static int pnv_eeh_init(void)
50{
51	struct pci_controller *hose;
52	struct pnv_phb *phb;
53
54	/* We require OPALv3 */
55	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
56		pr_warn("%s: OPALv3 is required !\n",
57			__func__);
58		return -EINVAL;
59	}
60
61	/* Set probe mode */
62	eeh_add_flag(EEH_PROBE_MODE_DEV);
63
64	/*
65	 * P7IOC blocks PCI config access to frozen PE, but PHB3
66	 * doesn't do that. So we have to selectively enable I/O
67	 * prior to collecting error log.
68	 */
69	list_for_each_entry(hose, &hose_list, list_node) {
70		phb = hose->private_data;
71
72		if (phb->model == PNV_PHB_MODEL_P7IOC)
73			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
74
75		/*
76		 * PE#0 should be regarded as valid by EEH core
77		 * if it's not the reserved one. Currently, we
78		 * have the reserved PE#0 and PE#127 for PHB3
79		 * and P7IOC separately. So we should regard
80		 * PE#0 as valid for P7IOC.
81		 */
82		if (phb->ioda.reserved_pe != 0)
83			eeh_add_flag(EEH_VALID_PE_ZERO);
84
85		break;
86	}
87
88	return 0;
89}
90
91static int pnv_eeh_event(struct notifier_block *nb,
92			 unsigned long events, void *change)
93{
94	uint64_t changed_evts = (uint64_t)change;
95
96	/*
97	 * We simply send special EEH event if EEH has
98	 * been enabled, or clear pending events in
99	 * case that we enable EEH soon
100	 */
101	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
102	    !(events & OPAL_EVENT_PCI_ERROR))
103		return 0;
104
105	if (eeh_enabled())
106		eeh_send_failure_event(NULL);
107	else
108		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
109
110	return 0;
111}
112
113static struct notifier_block pnv_eeh_nb = {
114	.notifier_call	= pnv_eeh_event,
115	.next		= NULL,
116	.priority	= 0
117};
118
119#ifdef CONFIG_DEBUG_FS
120static ssize_t pnv_eeh_ei_write(struct file *filp,
121				const char __user *user_buf,
122				size_t count, loff_t *ppos)
123{
124	struct pci_controller *hose = filp->private_data;
125	struct eeh_dev *edev;
126	struct eeh_pe *pe;
127	int pe_no, type, func;
128	unsigned long addr, mask;
129	char buf[50];
130	int ret;
131
132	if (!eeh_ops || !eeh_ops->err_inject)
133		return -ENXIO;
134
135	/* Copy over argument buffer */
136	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
137	if (!ret)
138		return -EFAULT;
139
140	/* Retrieve parameters */
141	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
142		     &pe_no, &type, &func, &addr, &mask);
143	if (ret != 5)
144		return -EINVAL;
145
146	/* Retrieve PE */
147	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
148	if (!edev)
149		return -ENOMEM;
150	edev->phb = hose;
151	edev->pe_config_addr = pe_no;
152	pe = eeh_pe_get(edev);
153	kfree(edev);
154	if (!pe)
155		return -ENODEV;
156
157	/* Do error injection */
158	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
159	return ret < 0 ? ret : count;
160}
161
162static const struct file_operations pnv_eeh_ei_fops = {
163	.open	= simple_open,
164	.llseek	= no_llseek,
165	.write	= pnv_eeh_ei_write,
166};
167
168static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
169{
170	struct pci_controller *hose = data;
171	struct pnv_phb *phb = hose->private_data;
172
173	out_be64(phb->regs + offset, val);
174	return 0;
175}
176
177static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
178{
179	struct pci_controller *hose = data;
180	struct pnv_phb *phb = hose->private_data;
181
182	*val = in_be64(phb->regs + offset);
183	return 0;
184}
185
186static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
187{
188	return pnv_eeh_dbgfs_set(data, 0xD10, val);
189}
190
191static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
192{
193	return pnv_eeh_dbgfs_get(data, 0xD10, val);
194}
195
196static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
197{
198	return pnv_eeh_dbgfs_set(data, 0xD90, val);
199}
200
201static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
202{
203	return pnv_eeh_dbgfs_get(data, 0xD90, val);
204}
205
206static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
207{
208	return pnv_eeh_dbgfs_set(data, 0xE10, val);
209}
210
211static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
212{
213	return pnv_eeh_dbgfs_get(data, 0xE10, val);
214}
215
216DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
217			pnv_eeh_outb_dbgfs_set, "0x%llx\n");
218DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
219			pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
220DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
221			pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
222#endif /* CONFIG_DEBUG_FS */
223
224/**
225 * pnv_eeh_post_init - EEH platform dependent post initialization
226 *
227 * EEH platform dependent post initialization on powernv. When
228 * the function is called, the EEH PEs and devices should have
229 * been built. If the I/O cache staff has been built, EEH is
230 * ready to supply service.
231 */
232static int pnv_eeh_post_init(void)
233{
234	struct pci_controller *hose;
235	struct pnv_phb *phb;
236	int ret = 0;
237
238	/* Register OPAL event notifier */
239	if (!pnv_eeh_nb_init) {
240		ret = opal_notifier_register(&pnv_eeh_nb);
241		if (ret) {
242			pr_warn("%s: Can't register OPAL event notifier (%d)\n",
243				__func__, ret);
244			return ret;
245		}
246
247		pnv_eeh_nb_init = true;
248	}
249
250	list_for_each_entry(hose, &hose_list, list_node) {
251		phb = hose->private_data;
252
253		/*
254		 * If EEH is enabled, we're going to rely on that.
255		 * Otherwise, we restore to conventional mechanism
256		 * to clear frozen PE during PCI config access.
257		 */
258		if (eeh_enabled())
259			phb->flags |= PNV_PHB_FLAG_EEH;
260		else
261			phb->flags &= ~PNV_PHB_FLAG_EEH;
262
263		/* Create debugfs entries */
264#ifdef CONFIG_DEBUG_FS
265		if (phb->has_dbgfs || !phb->dbgfs)
266			continue;
267
268		phb->has_dbgfs = 1;
269		debugfs_create_file("err_injct", 0200,
270				    phb->dbgfs, hose,
271				    &pnv_eeh_ei_fops);
272
273		debugfs_create_file("err_injct_outbound", 0600,
274				    phb->dbgfs, hose,
275				    &pnv_eeh_outb_dbgfs_ops);
276		debugfs_create_file("err_injct_inboundA", 0600,
277				    phb->dbgfs, hose,
278				    &pnv_eeh_inbA_dbgfs_ops);
279		debugfs_create_file("err_injct_inboundB", 0600,
280				    phb->dbgfs, hose,
281				    &pnv_eeh_inbB_dbgfs_ops);
282#endif /* CONFIG_DEBUG_FS */
283	}
284
285
286	return ret;
287}
288
289static int pnv_eeh_cap_start(struct pci_dn *pdn)
290{
291	u32 status;
292
293	if (!pdn)
294		return 0;
295
296	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
297	if (!(status & PCI_STATUS_CAP_LIST))
298		return 0;
299
300	return PCI_CAPABILITY_LIST;
301}
302
303static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
304{
305	int pos = pnv_eeh_cap_start(pdn);
306	int cnt = 48;   /* Maximal number of capabilities */
307	u32 id;
308
309	if (!pos)
310		return 0;
311
312	while (cnt--) {
313		pnv_pci_cfg_read(pdn, pos, 1, &pos);
314		if (pos < 0x40)
315			break;
316
317		pos &= ~3;
318		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
319		if (id == 0xff)
320			break;
321
322		/* Found */
323		if (id == cap)
324			return pos;
325
326		/* Next one */
327		pos += PCI_CAP_LIST_NEXT;
328	}
329
330	return 0;
331}
332
333static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
334{
335	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
336	u32 header;
337	int pos = 256, ttl = (4096 - 256) / 8;
338
339	if (!edev || !edev->pcie_cap)
340		return 0;
341	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
342		return 0;
343	else if (!header)
344		return 0;
345
346	while (ttl-- > 0) {
347		if (PCI_EXT_CAP_ID(header) == cap && pos)
348			return pos;
349
350		pos = PCI_EXT_CAP_NEXT(header);
351		if (pos < 256)
352			break;
353
354		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
355			break;
356	}
357
358	return 0;
359}
360
361/**
362 * pnv_eeh_probe - Do probe on PCI device
363 * @pdn: PCI device node
364 * @data: unused
365 *
366 * When EEH module is installed during system boot, all PCI devices
367 * are checked one by one to see if it supports EEH. The function
368 * is introduced for the purpose. By default, EEH has been enabled
369 * on all PCI devices. That's to say, we only need do necessary
370 * initialization on the corresponding eeh device and create PE
371 * accordingly.
372 *
373 * It's notable that's unsafe to retrieve the EEH device through
374 * the corresponding PCI device. During the PCI device hotplug, which
375 * was possiblly triggered by EEH core, the binding between EEH device
376 * and the PCI device isn't built yet.
377 */
378static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
379{
380	struct pci_controller *hose = pdn->phb;
381	struct pnv_phb *phb = hose->private_data;
382	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
383	uint32_t pcie_flags;
384	int ret;
385
386	/*
387	 * When probing the root bridge, which doesn't have any
388	 * subordinate PCI devices. We don't have OF node for
389	 * the root bridge. So it's not reasonable to continue
390	 * the probing.
391	 */
392	if (!edev || edev->pe)
393		return NULL;
394
395	/* Skip for PCI-ISA bridge */
396	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
397		return NULL;
398
399	/* Initialize eeh device */
400	edev->class_code = pdn->class_code;
401	edev->mode	&= 0xFFFFFF00;
402	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
403	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
404	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
405	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
406		edev->mode |= EEH_DEV_BRIDGE;
407		if (edev->pcie_cap) {
408			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
409					 2, &pcie_flags);
410			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
411			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
412				edev->mode |= EEH_DEV_ROOT_PORT;
413			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
414				edev->mode |= EEH_DEV_DS_PORT;
415		}
416	}
417
418	edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
419	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
420
421	/* Create PE */
422	ret = eeh_add_to_parent_pe(edev);
423	if (ret) {
424		pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
425			__func__, hose->global_number, pdn->busno,
426			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
427		return NULL;
428	}
429
430	/*
431	 * If the PE contains any one of following adapters, the
432	 * PCI config space can't be accessed when dumping EEH log.
433	 * Otherwise, we will run into fenced PHB caused by shortage
434	 * of outbound credits in the adapter. The PCI config access
435	 * should be blocked until PE reset. MMIO access is dropped
436	 * by hardware certainly. In order to drop PCI config requests,
437	 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
438	 * will be checked in the backend for PE state retrival. If
439	 * the PE becomes frozen for the first time and the flag has
440	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
441	 * that PE to block its config space.
442	 *
443	 * Broadcom Austin 4-ports NICs (14e4:1657)
444	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
445	 */
446	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
447	     pdn->device_id == 0x1657) ||
448	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
449	     pdn->device_id == 0x168e))
450		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
451
452	/*
453	 * Cache the PE primary bus, which can't be fetched when
454	 * full hotplug is in progress. In that case, all child
455	 * PCI devices of the PE are expected to be removed prior
456	 * to PE reset.
457	 */
458	if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
459		edev->pe->bus = pci_find_bus(hose->global_number,
460					     pdn->busno);
461		if (edev->pe->bus)
462			edev->pe->state |= EEH_PE_PRI_BUS;
463	}
464
465	/*
466	 * Enable EEH explicitly so that we will do EEH check
467	 * while accessing I/O stuff
468	 */
469	eeh_add_flag(EEH_ENABLED);
470
471	/* Save memory bars */
472	eeh_save_bars(edev);
473
474	return NULL;
475}
476
477/**
478 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
479 * @pe: EEH PE
480 * @option: operation to be issued
481 *
482 * The function is used to control the EEH functionality globally.
483 * Currently, following options are support according to PAPR:
484 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
485 */
486static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
487{
488	struct pci_controller *hose = pe->phb;
489	struct pnv_phb *phb = hose->private_data;
490	bool freeze_pe = false;
491	int opt, ret = 0;
492	s64 rc;
493
494	/* Sanity check on option */
495	switch (option) {
496	case EEH_OPT_DISABLE:
497		return -EPERM;
498	case EEH_OPT_ENABLE:
499		return 0;
500	case EEH_OPT_THAW_MMIO:
501		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
502		break;
503	case EEH_OPT_THAW_DMA:
504		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
505		break;
506	case EEH_OPT_FREEZE_PE:
507		freeze_pe = true;
508		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
509		break;
510	default:
511		pr_warn("%s: Invalid option %d\n", __func__, option);
512		return -EINVAL;
513	}
514
515	/* If PHB supports compound PE, to handle it */
516	if (freeze_pe) {
517		if (phb->freeze_pe) {
518			phb->freeze_pe(phb, pe->addr);
519		} else {
520			rc = opal_pci_eeh_freeze_set(phb->opal_id,
521						     pe->addr, opt);
522			if (rc != OPAL_SUCCESS) {
523				pr_warn("%s: Failure %lld freezing "
524					"PHB#%x-PE#%x\n",
525					__func__, rc,
526					phb->hose->global_number, pe->addr);
527				ret = -EIO;
528			}
529		}
530	} else {
531		if (phb->unfreeze_pe) {
532			ret = phb->unfreeze_pe(phb, pe->addr, opt);
533		} else {
534			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
535						       pe->addr, opt);
536			if (rc != OPAL_SUCCESS) {
537				pr_warn("%s: Failure %lld enable %d "
538					"for PHB#%x-PE#%x\n",
539					__func__, rc, option,
540					phb->hose->global_number, pe->addr);
541				ret = -EIO;
542			}
543		}
544	}
545
546	return ret;
547}
548
549/**
550 * pnv_eeh_get_pe_addr - Retrieve PE address
551 * @pe: EEH PE
552 *
553 * Retrieve the PE address according to the given tranditional
554 * PCI BDF (Bus/Device/Function) address.
555 */
556static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
557{
558	return pe->addr;
559}
560
561static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
562{
563	struct pnv_phb *phb = pe->phb->private_data;
564	s64 rc;
565
566	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
567					 PNV_PCI_DIAG_BUF_SIZE);
568	if (rc != OPAL_SUCCESS)
569		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
570			__func__, rc, pe->phb->global_number);
571}
572
573static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
574{
575	struct pnv_phb *phb = pe->phb->private_data;
576	u8 fstate;
577	__be16 pcierr;
578	s64 rc;
579	int result = 0;
580
581	rc = opal_pci_eeh_freeze_status(phb->opal_id,
582					pe->addr,
583					&fstate,
584					&pcierr,
585					NULL);
586	if (rc != OPAL_SUCCESS) {
587		pr_warn("%s: Failure %lld getting PHB#%x state\n",
588			__func__, rc, phb->hose->global_number);
589		return EEH_STATE_NOT_SUPPORT;
590	}
591
592	/*
593	 * Check PHB state. If the PHB is frozen for the
594	 * first time, to dump the PHB diag-data.
595	 */
596	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
597		result = (EEH_STATE_MMIO_ACTIVE  |
598			  EEH_STATE_DMA_ACTIVE   |
599			  EEH_STATE_MMIO_ENABLED |
600			  EEH_STATE_DMA_ENABLED);
601	} else if (!(pe->state & EEH_PE_ISOLATED)) {
602		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
603		pnv_eeh_get_phb_diag(pe);
604
605		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
606			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
607	}
608
609	return result;
610}
611
612static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
613{
614	struct pnv_phb *phb = pe->phb->private_data;
615	u8 fstate;
616	__be16 pcierr;
617	s64 rc;
618	int result;
619
620	/*
621	 * We don't clobber hardware frozen state until PE
622	 * reset is completed. In order to keep EEH core
623	 * moving forward, we have to return operational
624	 * state during PE reset.
625	 */
626	if (pe->state & EEH_PE_RESET) {
627		result = (EEH_STATE_MMIO_ACTIVE  |
628			  EEH_STATE_DMA_ACTIVE   |
629			  EEH_STATE_MMIO_ENABLED |
630			  EEH_STATE_DMA_ENABLED);
631		return result;
632	}
633
634	/*
635	 * Fetch PE state from hardware. If the PHB
636	 * supports compound PE, let it handle that.
637	 */
638	if (phb->get_pe_state) {
639		fstate = phb->get_pe_state(phb, pe->addr);
640	} else {
641		rc = opal_pci_eeh_freeze_status(phb->opal_id,
642						pe->addr,
643						&fstate,
644						&pcierr,
645						NULL);
646		if (rc != OPAL_SUCCESS) {
647			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
648				__func__, rc, phb->hose->global_number,
649				pe->addr);
650			return EEH_STATE_NOT_SUPPORT;
651		}
652	}
653
654	/* Figure out state */
655	switch (fstate) {
656	case OPAL_EEH_STOPPED_NOT_FROZEN:
657		result = (EEH_STATE_MMIO_ACTIVE  |
658			  EEH_STATE_DMA_ACTIVE   |
659			  EEH_STATE_MMIO_ENABLED |
660			  EEH_STATE_DMA_ENABLED);
661		break;
662	case OPAL_EEH_STOPPED_MMIO_FREEZE:
663		result = (EEH_STATE_DMA_ACTIVE |
664			  EEH_STATE_DMA_ENABLED);
665		break;
666	case OPAL_EEH_STOPPED_DMA_FREEZE:
667		result = (EEH_STATE_MMIO_ACTIVE |
668			  EEH_STATE_MMIO_ENABLED);
669		break;
670	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
671		result = 0;
672		break;
673	case OPAL_EEH_STOPPED_RESET:
674		result = EEH_STATE_RESET_ACTIVE;
675		break;
676	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
677		result = EEH_STATE_UNAVAILABLE;
678		break;
679	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
680		result = EEH_STATE_NOT_SUPPORT;
681		break;
682	default:
683		result = EEH_STATE_NOT_SUPPORT;
684		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
685			__func__, phb->hose->global_number,
686			pe->addr, fstate);
687	}
688
689	/*
690	 * If PHB supports compound PE, to freeze all
691	 * slave PEs for consistency.
692	 *
693	 * If the PE is switching to frozen state for the
694	 * first time, to dump the PHB diag-data.
695	 */
696	if (!(result & EEH_STATE_NOT_SUPPORT) &&
697	    !(result & EEH_STATE_UNAVAILABLE) &&
698	    !(result & EEH_STATE_MMIO_ACTIVE) &&
699	    !(result & EEH_STATE_DMA_ACTIVE)  &&
700	    !(pe->state & EEH_PE_ISOLATED)) {
701		if (phb->freeze_pe)
702			phb->freeze_pe(phb, pe->addr);
703
704		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
705		pnv_eeh_get_phb_diag(pe);
706
707		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
708			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
709	}
710
711	return result;
712}
713
714/**
715 * pnv_eeh_get_state - Retrieve PE state
716 * @pe: EEH PE
717 * @delay: delay while PE state is temporarily unavailable
718 *
719 * Retrieve the state of the specified PE. For IODA-compitable
720 * platform, it should be retrieved from IODA table. Therefore,
721 * we prefer passing down to hardware implementation to handle
722 * it.
723 */
724static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
725{
726	int ret;
727
728	if (pe->type & EEH_PE_PHB)
729		ret = pnv_eeh_get_phb_state(pe);
730	else
731		ret = pnv_eeh_get_pe_state(pe);
732
733	if (!delay)
734		return ret;
735
736	/*
737	 * If the PE state is temporarily unavailable,
738	 * to inform the EEH core delay for default
739	 * period (1 second)
740	 */
741	*delay = 0;
742	if (ret & EEH_STATE_UNAVAILABLE)
743		*delay = 1000;
744
745	return ret;
746}
747
748static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
749{
750	s64 rc = OPAL_HARDWARE;
751
752	while (1) {
753		rc = opal_pci_poll(phb->opal_id);
754		if (rc <= 0)
755			break;
756
757		if (system_state < SYSTEM_RUNNING)
758			udelay(1000 * rc);
759		else
760			msleep(rc);
761	}
762
763	return rc;
764}
765
766int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
767{
768	struct pnv_phb *phb = hose->private_data;
769	s64 rc = OPAL_HARDWARE;
770
771	pr_debug("%s: Reset PHB#%x, option=%d\n",
772		 __func__, hose->global_number, option);
773
774	/* Issue PHB complete reset request */
775	if (option == EEH_RESET_FUNDAMENTAL ||
776	    option == EEH_RESET_HOT)
777		rc = opal_pci_reset(phb->opal_id,
778				    OPAL_RESET_PHB_COMPLETE,
779				    OPAL_ASSERT_RESET);
780	else if (option == EEH_RESET_DEACTIVATE)
781		rc = opal_pci_reset(phb->opal_id,
782				    OPAL_RESET_PHB_COMPLETE,
783				    OPAL_DEASSERT_RESET);
784	if (rc < 0)
785		goto out;
786
787	/*
788	 * Poll state of the PHB until the request is done
789	 * successfully. The PHB reset is usually PHB complete
790	 * reset followed by hot reset on root bus. So we also
791	 * need the PCI bus settlement delay.
792	 */
793	rc = pnv_eeh_phb_poll(phb);
794	if (option == EEH_RESET_DEACTIVATE) {
795		if (system_state < SYSTEM_RUNNING)
796			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
797		else
798			msleep(EEH_PE_RST_SETTLE_TIME);
799	}
800out:
801	if (rc != OPAL_SUCCESS)
802		return -EIO;
803
804	return 0;
805}
806
807static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
808{
809	struct pnv_phb *phb = hose->private_data;
810	s64 rc = OPAL_HARDWARE;
811
812	pr_debug("%s: Reset PHB#%x, option=%d\n",
813		 __func__, hose->global_number, option);
814
815	/*
816	 * During the reset deassert time, we needn't care
817	 * the reset scope because the firmware does nothing
818	 * for fundamental or hot reset during deassert phase.
819	 */
820	if (option == EEH_RESET_FUNDAMENTAL)
821		rc = opal_pci_reset(phb->opal_id,
822				    OPAL_RESET_PCI_FUNDAMENTAL,
823				    OPAL_ASSERT_RESET);
824	else if (option == EEH_RESET_HOT)
825		rc = opal_pci_reset(phb->opal_id,
826				    OPAL_RESET_PCI_HOT,
827				    OPAL_ASSERT_RESET);
828	else if (option == EEH_RESET_DEACTIVATE)
829		rc = opal_pci_reset(phb->opal_id,
830				    OPAL_RESET_PCI_HOT,
831				    OPAL_DEASSERT_RESET);
832	if (rc < 0)
833		goto out;
834
835	/* Poll state of the PHB until the request is done */
836	rc = pnv_eeh_phb_poll(phb);
837	if (option == EEH_RESET_DEACTIVATE)
838		msleep(EEH_PE_RST_SETTLE_TIME);
839out:
840	if (rc != OPAL_SUCCESS)
841		return -EIO;
842
843	return 0;
844}
845
846static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
847{
848	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
849	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
850	int aer = edev ? edev->aer_cap : 0;
851	u32 ctrl;
852
853	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
854		 __func__, pci_domain_nr(dev->bus),
855		 dev->bus->number, option);
856
857	switch (option) {
858	case EEH_RESET_FUNDAMENTAL:
859	case EEH_RESET_HOT:
860		/* Don't report linkDown event */
861		if (aer) {
862			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
863					     4, &ctrl);
864			ctrl |= PCI_ERR_UNC_SURPDN;
865			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
866					      4, ctrl);
867		}
868
869		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
870		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
871		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
872
873		msleep(EEH_PE_RST_HOLD_TIME);
874		break;
875	case EEH_RESET_DEACTIVATE:
876		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
877		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
878		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
879
880		msleep(EEH_PE_RST_SETTLE_TIME);
881
882		/* Continue reporting linkDown event */
883		if (aer) {
884			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
885					     4, &ctrl);
886			ctrl &= ~PCI_ERR_UNC_SURPDN;
887			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
888					      4, ctrl);
889		}
890
891		break;
892	}
893
894	return 0;
895}
896
897void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
898{
899	struct pci_controller *hose;
900
901	if (pci_is_root_bus(dev->bus)) {
902		hose = pci_bus_to_host(dev->bus);
903		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
904		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
905	} else {
906		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
907		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
908	}
909}
910
911/**
912 * pnv_eeh_reset - Reset the specified PE
913 * @pe: EEH PE
914 * @option: reset option
915 *
916 * Do reset on the indicated PE. For PCI bus sensitive PE,
917 * we need to reset the parent p2p bridge. The PHB has to
918 * be reinitialized if the p2p bridge is root bridge. For
919 * PCI device sensitive PE, we will try to reset the device
920 * through FLR. For now, we don't have OPAL APIs to do HARD
921 * reset yet, so all reset would be SOFT (HOT) reset.
922 */
923static int pnv_eeh_reset(struct eeh_pe *pe, int option)
924{
925	struct pci_controller *hose = pe->phb;
926	struct pci_bus *bus;
927	int ret;
928
929	/*
930	 * For PHB reset, we always have complete reset. For those PEs whose
931	 * primary bus derived from root complex (root bus) or root port
932	 * (usually bus#1), we apply hot or fundamental reset on the root port.
933	 * For other PEs, we always have hot reset on the PE primary bus.
934	 *
935	 * Here, we have different design to pHyp, which always clear the
936	 * frozen state during PE reset. However, the good idea here from
937	 * benh is to keep frozen state before we get PE reset done completely
938	 * (until BAR restore). With the frozen state, HW drops illegal IO
939	 * or MMIO access, which can incur recrusive frozen PE during PE
940	 * reset. The side effect is that EEH core has to clear the frozen
941	 * state explicitly after BAR restore.
942	 */
943	if (pe->type & EEH_PE_PHB) {
944		ret = pnv_eeh_phb_reset(hose, option);
945	} else {
946		struct pnv_phb *phb;
947		s64 rc;
948
949		/*
950		 * The frozen PE might be caused by PAPR error injection
951		 * registers, which are expected to be cleared after hitting
952		 * frozen PE as stated in the hardware spec. Unfortunately,
953		 * that's not true on P7IOC. So we have to clear it manually
954		 * to avoid recursive EEH errors during recovery.
955		 */
956		phb = hose->private_data;
957		if (phb->model == PNV_PHB_MODEL_P7IOC &&
958		    (option == EEH_RESET_HOT ||
959		    option == EEH_RESET_FUNDAMENTAL)) {
960			rc = opal_pci_reset(phb->opal_id,
961					    OPAL_RESET_PHB_ERROR,
962					    OPAL_ASSERT_RESET);
963			if (rc != OPAL_SUCCESS) {
964				pr_warn("%s: Failure %lld clearing "
965					"error injection registers\n",
966					__func__, rc);
967				return -EIO;
968			}
969		}
970
971		bus = eeh_pe_bus_get(pe);
972		if (pci_is_root_bus(bus) ||
973			pci_is_root_bus(bus->parent))
974			ret = pnv_eeh_root_reset(hose, option);
975		else
976			ret = pnv_eeh_bridge_reset(bus->self, option);
977	}
978
979	return ret;
980}
981
982/**
983 * pnv_eeh_wait_state - Wait for PE state
984 * @pe: EEH PE
985 * @max_wait: maximal period in microsecond
986 *
987 * Wait for the state of associated PE. It might take some time
988 * to retrieve the PE's state.
989 */
990static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
991{
992	int ret;
993	int mwait;
994
995	while (1) {
996		ret = pnv_eeh_get_state(pe, &mwait);
997
998		/*
999		 * If the PE's state is temporarily unavailable,
1000		 * we have to wait for the specified time. Otherwise,
1001		 * the PE's state will be returned immediately.
1002		 */
1003		if (ret != EEH_STATE_UNAVAILABLE)
1004			return ret;
1005
1006		max_wait -= mwait;
1007		if (max_wait <= 0) {
1008			pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
1009				__func__, pe->addr, max_wait);
1010			return EEH_STATE_NOT_SUPPORT;
1011		}
1012
1013		msleep(mwait);
1014	}
1015
1016	return EEH_STATE_NOT_SUPPORT;
1017}
1018
1019/**
1020 * pnv_eeh_get_log - Retrieve error log
1021 * @pe: EEH PE
1022 * @severity: temporary or permanent error log
1023 * @drv_log: driver log to be combined with retrieved error log
1024 * @len: length of driver log
1025 *
1026 * Retrieve the temporary or permanent error from the PE.
1027 */
1028static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
1029			   char *drv_log, unsigned long len)
1030{
1031	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
1032		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
1033
1034	return 0;
1035}
1036
1037/**
1038 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
1039 * @pe: EEH PE
1040 *
1041 * The function will be called to reconfigure the bridges included
1042 * in the specified PE so that the mulfunctional PE would be recovered
1043 * again.
1044 */
1045static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
1046{
1047	return 0;
1048}
1049
1050/**
1051 * pnv_pe_err_inject - Inject specified error to the indicated PE
1052 * @pe: the indicated PE
1053 * @type: error type
1054 * @func: specific error type
1055 * @addr: address
1056 * @mask: address mask
1057 *
1058 * The routine is called to inject specified error, which is
1059 * determined by @type and @func, to the indicated PE for
1060 * testing purpose.
1061 */
1062static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
1063			      unsigned long addr, unsigned long mask)
1064{
1065	struct pci_controller *hose = pe->phb;
1066	struct pnv_phb *phb = hose->private_data;
1067	s64 rc;
1068
1069	/* Sanity check on error type */
1070	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
1071	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
1072		pr_warn("%s: Invalid error type %d\n",
1073			__func__, type);
1074		return -ERANGE;
1075	}
1076
1077	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
1078	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
1079		pr_warn("%s: Invalid error function %d\n",
1080			__func__, func);
1081		return -ERANGE;
1082	}
1083
1084	/* Firmware supports error injection ? */
1085	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
1086		pr_warn("%s: Firmware doesn't support error injection\n",
1087			__func__);
1088		return -ENXIO;
1089	}
1090
1091	/* Do error injection */
1092	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
1093				 type, func, addr, mask);
1094	if (rc != OPAL_SUCCESS) {
1095		pr_warn("%s: Failure %lld injecting error "
1096			"%d-%d to PHB#%x-PE#%x\n",
1097			__func__, rc, type, func,
1098			hose->global_number, pe->addr);
1099		return -EIO;
1100	}
1101
1102	return 0;
1103}
1104
1105static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
1106{
1107	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1108
1109	if (!edev || !edev->pe)
1110		return false;
1111
1112	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
1113		return true;
1114
1115	return false;
1116}
1117
1118static int pnv_eeh_read_config(struct pci_dn *pdn,
1119			       int where, int size, u32 *val)
1120{
1121	if (!pdn)
1122		return PCIBIOS_DEVICE_NOT_FOUND;
1123
1124	if (pnv_eeh_cfg_blocked(pdn)) {
1125		*val = 0xFFFFFFFF;
1126		return PCIBIOS_SET_FAILED;
1127	}
1128
1129	return pnv_pci_cfg_read(pdn, where, size, val);
1130}
1131
1132static int pnv_eeh_write_config(struct pci_dn *pdn,
1133				int where, int size, u32 val)
1134{
1135	if (!pdn)
1136		return PCIBIOS_DEVICE_NOT_FOUND;
1137
1138	if (pnv_eeh_cfg_blocked(pdn))
1139		return PCIBIOS_SET_FAILED;
1140
1141	return pnv_pci_cfg_write(pdn, where, size, val);
1142}
1143
1144static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
1145{
1146	/* GEM */
1147	if (data->gemXfir || data->gemRfir ||
1148	    data->gemRirqfir || data->gemMask || data->gemRwof)
1149		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
1150			be64_to_cpu(data->gemXfir),
1151			be64_to_cpu(data->gemRfir),
1152			be64_to_cpu(data->gemRirqfir),
1153			be64_to_cpu(data->gemMask),
1154			be64_to_cpu(data->gemRwof));
1155
1156	/* LEM */
1157	if (data->lemFir || data->lemErrMask ||
1158	    data->lemAction0 || data->lemAction1 || data->lemWof)
1159		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
1160			be64_to_cpu(data->lemFir),
1161			be64_to_cpu(data->lemErrMask),
1162			be64_to_cpu(data->lemAction0),
1163			be64_to_cpu(data->lemAction1),
1164			be64_to_cpu(data->lemWof));
1165}
1166
1167static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
1168{
1169	struct pnv_phb *phb = hose->private_data;
1170	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
1171	long rc;
1172
1173	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
1174	if (rc != OPAL_SUCCESS) {
1175		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
1176			__func__, phb->hub_id, rc);
1177		return;
1178	}
1179
1180	switch (data->type) {
1181	case OPAL_P7IOC_DIAG_TYPE_RGC:
1182		pr_info("P7IOC diag-data for RGC\n\n");
1183		pnv_eeh_dump_hub_diag_common(data);
1184		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
1185			pr_info("  RGC: %016llx %016llx\n",
1186				be64_to_cpu(data->rgc.rgcStatus),
1187				be64_to_cpu(data->rgc.rgcLdcp));
1188		break;
1189	case OPAL_P7IOC_DIAG_TYPE_BI:
1190		pr_info("P7IOC diag-data for BI %s\n\n",
1191			data->bi.biDownbound ? "Downbound" : "Upbound");
1192		pnv_eeh_dump_hub_diag_common(data);
1193		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
1194		    data->bi.biLdcp2 || data->bi.biFenceStatus)
1195			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
1196				be64_to_cpu(data->bi.biLdcp0),
1197				be64_to_cpu(data->bi.biLdcp1),
1198				be64_to_cpu(data->bi.biLdcp2),
1199				be64_to_cpu(data->bi.biFenceStatus));
1200		break;
1201	case OPAL_P7IOC_DIAG_TYPE_CI:
1202		pr_info("P7IOC diag-data for CI Port %d\n\n",
1203			data->ci.ciPort);
1204		pnv_eeh_dump_hub_diag_common(data);
1205		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
1206			pr_info("  CI:  %016llx %016llx\n",
1207				be64_to_cpu(data->ci.ciPortStatus),
1208				be64_to_cpu(data->ci.ciPortLdcp));
1209		break;
1210	case OPAL_P7IOC_DIAG_TYPE_MISC:
1211		pr_info("P7IOC diag-data for MISC\n\n");
1212		pnv_eeh_dump_hub_diag_common(data);
1213		break;
1214	case OPAL_P7IOC_DIAG_TYPE_I2C:
1215		pr_info("P7IOC diag-data for I2C\n\n");
1216		pnv_eeh_dump_hub_diag_common(data);
1217		break;
1218	default:
1219		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
1220			__func__, phb->hub_id, data->type);
1221	}
1222}
1223
1224static int pnv_eeh_get_pe(struct pci_controller *hose,
1225			  u16 pe_no, struct eeh_pe **pe)
1226{
1227	struct pnv_phb *phb = hose->private_data;
1228	struct pnv_ioda_pe *pnv_pe;
1229	struct eeh_pe *dev_pe;
1230	struct eeh_dev edev;
1231
1232	/*
1233	 * If PHB supports compound PE, to fetch
1234	 * the master PE because slave PE is invisible
1235	 * to EEH core.
1236	 */
1237	pnv_pe = &phb->ioda.pe_array[pe_no];
1238	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
1239		pnv_pe = pnv_pe->master;
1240		WARN_ON(!pnv_pe ||
1241			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
1242		pe_no = pnv_pe->pe_number;
1243	}
1244
1245	/* Find the PE according to PE# */
1246	memset(&edev, 0, sizeof(struct eeh_dev));
1247	edev.phb = hose;
1248	edev.pe_config_addr = pe_no;
1249	dev_pe = eeh_pe_get(&edev);
1250	if (!dev_pe)
1251		return -EEXIST;
1252
1253	/* Freeze the (compound) PE */
1254	*pe = dev_pe;
1255	if (!(dev_pe->state & EEH_PE_ISOLATED))
1256		phb->freeze_pe(phb, pe_no);
1257
1258	/*
1259	 * At this point, we're sure the (compound) PE should
1260	 * have been frozen. However, we still need poke until
1261	 * hitting the frozen PE on top level.
1262	 */
1263	dev_pe = dev_pe->parent;
1264	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
1265		int ret;
1266		int active_flags = (EEH_STATE_MMIO_ACTIVE |
1267				    EEH_STATE_DMA_ACTIVE);
1268
1269		ret = eeh_ops->get_state(dev_pe, NULL);
1270		if (ret <= 0 || (ret & active_flags) == active_flags) {
1271			dev_pe = dev_pe->parent;
1272			continue;
1273		}
1274
1275		/* Frozen parent PE */
1276		*pe = dev_pe;
1277		if (!(dev_pe->state & EEH_PE_ISOLATED))
1278			phb->freeze_pe(phb, dev_pe->addr);
1279
1280		/* Next one */
1281		dev_pe = dev_pe->parent;
1282	}
1283
1284	return 0;
1285}
1286
1287/**
1288 * pnv_eeh_next_error - Retrieve next EEH error to handle
1289 * @pe: Affected PE
1290 *
1291 * The function is expected to be called by EEH core while it gets
1292 * special EEH event (without binding PE). The function calls to
1293 * OPAL APIs for next error to handle. The informational error is
1294 * handled internally by platform. However, the dead IOC, dead PHB,
1295 * fenced PHB and frozen PE should be handled by EEH core eventually.
1296 */
1297static int pnv_eeh_next_error(struct eeh_pe **pe)
1298{
1299	struct pci_controller *hose;
1300	struct pnv_phb *phb;
1301	struct eeh_pe *phb_pe, *parent_pe;
1302	__be64 frozen_pe_no;
1303	__be16 err_type, severity;
1304	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1305	long rc;
1306	int state, ret = EEH_NEXT_ERR_NONE;
1307
1308	/*
1309	 * While running here, it's safe to purge the event queue.
1310	 * And we should keep the cached OPAL notifier event sychronized
1311	 * between the kernel and firmware.
1312	 */
1313	eeh_remove_event(NULL, false);
1314	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
1315
1316	list_for_each_entry(hose, &hose_list, list_node) {
1317		/*
1318		 * If the subordinate PCI buses of the PHB has been
1319		 * removed or is exactly under error recovery, we
1320		 * needn't take care of it any more.
1321		 */
1322		phb = hose->private_data;
1323		phb_pe = eeh_phb_pe_get(hose);
1324		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
1325			continue;
1326
1327		rc = opal_pci_next_error(phb->opal_id,
1328					 &frozen_pe_no, &err_type, &severity);
1329		if (rc != OPAL_SUCCESS) {
1330			pr_devel("%s: Invalid return value on "
1331				 "PHB#%x (0x%lx) from opal_pci_next_error",
1332				 __func__, hose->global_number, rc);
1333			continue;
1334		}
1335
1336		/* If the PHB doesn't have error, stop processing */
1337		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
1338		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
1339			pr_devel("%s: No error found on PHB#%x\n",
1340				 __func__, hose->global_number);
1341			continue;
1342		}
1343
1344		/*
1345		 * Processing the error. We're expecting the error with
1346		 * highest priority reported upon multiple errors on the
1347		 * specific PHB.
1348		 */
1349		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1350			__func__, be16_to_cpu(err_type),
1351			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
1352			hose->global_number);
1353		switch (be16_to_cpu(err_type)) {
1354		case OPAL_EEH_IOC_ERROR:
1355			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
1356				pr_err("EEH: dead IOC detected\n");
1357				ret = EEH_NEXT_ERR_DEAD_IOC;
1358			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1359				pr_info("EEH: IOC informative error "
1360					"detected\n");
1361				pnv_eeh_get_and_dump_hub_diag(hose);
1362				ret = EEH_NEXT_ERR_NONE;
1363			}
1364
1365			break;
1366		case OPAL_EEH_PHB_ERROR:
1367			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
1368				*pe = phb_pe;
1369				pr_err("EEH: dead PHB#%x detected, "
1370				       "location: %s\n",
1371					hose->global_number,
1372					eeh_pe_loc_get(phb_pe));
1373				ret = EEH_NEXT_ERR_DEAD_PHB;
1374			} else if (be16_to_cpu(severity) ==
1375				   OPAL_EEH_SEV_PHB_FENCED) {
1376				*pe = phb_pe;
1377				pr_err("EEH: Fenced PHB#%x detected, "
1378				       "location: %s\n",
1379					hose->global_number,
1380					eeh_pe_loc_get(phb_pe));
1381				ret = EEH_NEXT_ERR_FENCED_PHB;
1382			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1383				pr_info("EEH: PHB#%x informative error "
1384					"detected, location: %s\n",
1385					hose->global_number,
1386					eeh_pe_loc_get(phb_pe));
1387				pnv_eeh_get_phb_diag(phb_pe);
1388				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
1389				ret = EEH_NEXT_ERR_NONE;
1390			}
1391
1392			break;
1393		case OPAL_EEH_PE_ERROR:
1394			/*
1395			 * If we can't find the corresponding PE, we
1396			 * just try to unfreeze.
1397			 */
1398			if (pnv_eeh_get_pe(hose,
1399				be64_to_cpu(frozen_pe_no), pe)) {
1400				/* Try best to clear it */
1401				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1402					hose->global_number, frozen_pe_no);
1403				pr_info("EEH: PHB location: %s\n",
1404					eeh_pe_loc_get(phb_pe));
1405				opal_pci_eeh_freeze_clear(phb->opal_id,
1406					frozen_pe_no,
1407					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
1408				ret = EEH_NEXT_ERR_NONE;
1409			} else if ((*pe)->state & EEH_PE_ISOLATED ||
1410				   eeh_pe_passed(*pe)) {
1411				ret = EEH_NEXT_ERR_NONE;
1412			} else {
1413				pr_err("EEH: Frozen PE#%x "
1414				       "on PHB#%x detected\n",
1415				       (*pe)->addr,
1416					(*pe)->phb->global_number);
1417				pr_err("EEH: PE location: %s, "
1418				       "PHB location: %s\n",
1419				       eeh_pe_loc_get(*pe),
1420				       eeh_pe_loc_get(phb_pe));
1421				ret = EEH_NEXT_ERR_FROZEN_PE;
1422			}
1423
1424			break;
1425		default:
1426			pr_warn("%s: Unexpected error type %d\n",
1427				__func__, be16_to_cpu(err_type));
1428		}
1429
1430		/*
1431		 * EEH core will try recover from fenced PHB or
1432		 * frozen PE. In the time for frozen PE, EEH core
1433		 * enable IO path for that before collecting logs,
1434		 * but it ruins the site. So we have to dump the
1435		 * log in advance here.
1436		 */
1437		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
1438		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
1439		    !((*pe)->state & EEH_PE_ISOLATED)) {
1440			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1441			pnv_eeh_get_phb_diag(*pe);
1442
1443			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
1444				pnv_pci_dump_phb_diag_data((*pe)->phb,
1445							   (*pe)->data);
1446		}
1447
1448		/*
1449		 * We probably have the frozen parent PE out there and
1450		 * we need have to handle frozen parent PE firstly.
1451		 */
1452		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
1453			parent_pe = (*pe)->parent;
1454			while (parent_pe) {
1455				/* Hit the ceiling ? */
1456				if (parent_pe->type & EEH_PE_PHB)
1457					break;
1458
1459				/* Frozen parent PE ? */
1460				state = eeh_ops->get_state(parent_pe, NULL);
1461				if (state > 0 &&
1462				    (state & active_flags) != active_flags)
1463					*pe = parent_pe;
1464
1465				/* Next parent level */
1466				parent_pe = parent_pe->parent;
1467			}
1468
1469			/* We possibly migrate to another PE */
1470			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1471		}
1472
1473		/*
1474		 * If we have no errors on the specific PHB or only
1475		 * informative error there, we continue poking it.
1476		 * Otherwise, we need actions to be taken by upper
1477		 * layer.
1478		 */
1479		if (ret > EEH_NEXT_ERR_INF)
1480			break;
1481	}
1482
1483	return ret;
1484}
1485
1486static int pnv_eeh_restore_config(struct pci_dn *pdn)
1487{
1488	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1489	struct pnv_phb *phb;
1490	s64 ret;
1491
1492	if (!edev)
1493		return -EEXIST;
1494
1495	phb = edev->phb->private_data;
1496	ret = opal_pci_reinit(phb->opal_id,
1497			      OPAL_REINIT_PCI_DEV, edev->config_addr);
1498	if (ret) {
1499		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
1500			__func__, edev->config_addr, ret);
1501		return -EIO;
1502	}
1503
1504	return 0;
1505}
1506
1507static struct eeh_ops pnv_eeh_ops = {
1508	.name                   = "powernv",
1509	.init                   = pnv_eeh_init,
1510	.post_init              = pnv_eeh_post_init,
1511	.probe			= pnv_eeh_probe,
1512	.set_option             = pnv_eeh_set_option,
1513	.get_pe_addr            = pnv_eeh_get_pe_addr,
1514	.get_state              = pnv_eeh_get_state,
1515	.reset                  = pnv_eeh_reset,
1516	.wait_state             = pnv_eeh_wait_state,
1517	.get_log                = pnv_eeh_get_log,
1518	.configure_bridge       = pnv_eeh_configure_bridge,
1519	.err_inject		= pnv_eeh_err_inject,
1520	.read_config            = pnv_eeh_read_config,
1521	.write_config           = pnv_eeh_write_config,
1522	.next_error		= pnv_eeh_next_error,
1523	.restore_config		= pnv_eeh_restore_config
1524};
1525
1526/**
1527 * eeh_powernv_init - Register platform dependent EEH operations
1528 *
1529 * EEH initialization on powernv platform. This function should be
1530 * called before any EEH related functions.
1531 */
1532static int __init eeh_powernv_init(void)
1533{
1534	int ret = -EINVAL;
1535
1536	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
1537	ret = eeh_ops_register(&pnv_eeh_ops);
1538	if (!ret)
1539		pr_info("EEH: PowerNV platform initialized\n");
1540	else
1541		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
1542
1543	return ret;
1544}
1545machine_early_initcall(powernv, eeh_powernv_init);
1546