root/arch/powerpc/kernel/eeh_event.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. eeh_event_handler
  2. eeh_event_init
  3. __eeh_send_failure_event
  4. eeh_send_failure_event
  5. eeh_remove_event

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *
   4  * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
   5  */
   6 
   7 #include <linux/delay.h>
   8 #include <linux/list.h>
   9 #include <linux/sched.h>
  10 #include <linux/semaphore.h>
  11 #include <linux/pci.h>
  12 #include <linux/slab.h>
  13 #include <linux/kthread.h>
  14 #include <asm/eeh_event.h>
  15 #include <asm/ppc-pci.h>
  16 
  17 /** Overview:
  18  *  EEH error states may be detected within exception handlers;
  19  *  however, the recovery processing needs to occur asynchronously
  20  *  in a normal kernel context and not an interrupt context.
  21  *  This pair of routines creates an event and queues it onto a
  22  *  work-queue, where a worker thread can drive recovery.
  23  */
  24 
  25 static DEFINE_SPINLOCK(eeh_eventlist_lock);
  26 static DECLARE_COMPLETION(eeh_eventlist_event);
  27 static LIST_HEAD(eeh_eventlist);
  28 
  29 /**
  30  * eeh_event_handler - Dispatch EEH events.
  31  * @dummy - unused
  32  *
  33  * The detection of a frozen slot can occur inside an interrupt,
  34  * where it can be hard to do anything about it.  The goal of this
  35  * routine is to pull these detection events out of the context
  36  * of the interrupt handler, and re-dispatch them for processing
  37  * at a later time in a normal context.
  38  */
  39 static int eeh_event_handler(void * dummy)
  40 {
  41         unsigned long flags;
  42         struct eeh_event *event;
  43 
  44         while (!kthread_should_stop()) {
  45                 if (wait_for_completion_interruptible(&eeh_eventlist_event))
  46                         break;
  47 
  48                 /* Fetch EEH event from the queue */
  49                 spin_lock_irqsave(&eeh_eventlist_lock, flags);
  50                 event = NULL;
  51                 if (!list_empty(&eeh_eventlist)) {
  52                         event = list_entry(eeh_eventlist.next,
  53                                            struct eeh_event, list);
  54                         list_del(&event->list);
  55                 }
  56                 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
  57                 if (!event)
  58                         continue;
  59 
  60                 /* We might have event without binding PE */
  61                 if (event->pe)
  62                         eeh_handle_normal_event(event->pe);
  63                 else
  64                         eeh_handle_special_event();
  65 
  66                 kfree(event);
  67         }
  68 
  69         return 0;
  70 }
  71 
  72 /**
  73  * eeh_event_init - Start kernel thread to handle EEH events
  74  *
  75  * This routine is called to start the kernel thread for processing
  76  * EEH event.
  77  */
  78 int eeh_event_init(void)
  79 {
  80         struct task_struct *t;
  81         int ret = 0;
  82 
  83         t = kthread_run(eeh_event_handler, NULL, "eehd");
  84         if (IS_ERR(t)) {
  85                 ret = PTR_ERR(t);
  86                 pr_err("%s: Failed to start EEH daemon (%d)\n",
  87                         __func__, ret);
  88                 return ret;
  89         }
  90 
  91         return 0;
  92 }
  93 
  94 /**
  95  * eeh_send_failure_event - Generate a PCI error event
  96  * @pe: EEH PE
  97  *
  98  * This routine can be called within an interrupt context;
  99  * the actual event will be delivered in a normal context
 100  * (from a workqueue).
 101  */
 102 int __eeh_send_failure_event(struct eeh_pe *pe)
 103 {
 104         unsigned long flags;
 105         struct eeh_event *event;
 106 
 107         event = kzalloc(sizeof(*event), GFP_ATOMIC);
 108         if (!event) {
 109                 pr_err("EEH: out of memory, event not handled\n");
 110                 return -ENOMEM;
 111         }
 112         event->pe = pe;
 113 
 114         /*
 115          * Mark the PE as recovering before inserting it in the queue.
 116          * This prevents the PE from being free()ed by a hotplug driver
 117          * while the PE is sitting in the event queue.
 118          */
 119         if (pe) {
 120 #ifdef CONFIG_STACKTRACE
 121                 /*
 122                  * Save the current stack trace so we can dump it from the
 123                  * event handler thread.
 124                  */
 125                 pe->trace_entries = stack_trace_save(pe->stack_trace,
 126                                          ARRAY_SIZE(pe->stack_trace), 0);
 127 #endif /* CONFIG_STACKTRACE */
 128 
 129                 eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 130         }
 131 
 132         /* We may or may not be called in an interrupt context */
 133         spin_lock_irqsave(&eeh_eventlist_lock, flags);
 134         list_add(&event->list, &eeh_eventlist);
 135         spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
 136 
 137         /* For EEH deamon to knick in */
 138         complete(&eeh_eventlist_event);
 139 
 140         return 0;
 141 }
 142 
 143 int eeh_send_failure_event(struct eeh_pe *pe)
 144 {
 145         /*
 146          * If we've manually supressed recovery events via debugfs
 147          * then just drop it on the floor.
 148          */
 149         if (eeh_debugfs_no_recover) {
 150                 pr_err("EEH: Event dropped due to no_recover setting\n");
 151                 return 0;
 152         }
 153 
 154         return __eeh_send_failure_event(pe);
 155 }
 156 
 157 /**
 158  * eeh_remove_event - Remove EEH event from the queue
 159  * @pe: Event binding to the PE
 160  * @force: Event will be removed unconditionally
 161  *
 162  * On PowerNV platform, we might have subsequent coming events
 163  * is part of the former one. For that case, those subsequent
 164  * coming events are totally duplicated and unnecessary, thus
 165  * they should be removed.
 166  */
 167 void eeh_remove_event(struct eeh_pe *pe, bool force)
 168 {
 169         unsigned long flags;
 170         struct eeh_event *event, *tmp;
 171 
 172         /*
 173          * If we have NULL PE passed in, we have dead IOC
 174          * or we're sure we can report all existing errors
 175          * by the caller.
 176          *
 177          * With "force", the event with associated PE that
 178          * have been isolated, the event won't be removed
 179          * to avoid event lost.
 180          */
 181         spin_lock_irqsave(&eeh_eventlist_lock, flags);
 182         list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
 183                 if (!force && event->pe &&
 184                     (event->pe->state & EEH_PE_ISOLATED))
 185                         continue;
 186 
 187                 if (!pe) {
 188                         list_del(&event->list);
 189                         kfree(event);
 190                 } else if (pe->type & EEH_PE_PHB) {
 191                         if (event->pe && event->pe->phb == pe->phb) {
 192                                 list_del(&event->list);
 193                                 kfree(event);
 194                         }
 195                 } else if (event->pe == pe) {
 196                         list_del(&event->list);
 197                         kfree(event);
 198                 }
 199         }
 200         spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
 201 }

/* [<][>][^][v][top][bottom][index][help] */