1/* 2 This file is provided under a dual BSD/GPLv2 license. When using or 3 redistributing this file, you may do so under either license. 4 5 GPL LICENSE SUMMARY 6 Copyright(c) 2014 Intel Corporation. 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of version 2 of the GNU General Public License as 9 published by the Free Software Foundation. 10 11 This program is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 Contact Information: 17 qat-linux@intel.com 18 19 BSD LICENSE 20 Copyright(c) 2014 Intel Corporation. 21 Redistribution and use in source and binary forms, with or without 22 modification, are permitted provided that the following conditions 23 are met: 24 25 * Redistributions of source code must retain the above copyright 26 notice, this list of conditions and the following disclaimer. 27 * Redistributions in binary form must reproduce the above copyright 28 notice, this list of conditions and the following disclaimer in 29 the documentation and/or other materials provided with the 30 distribution. 31 * Neither the name of Intel Corporation nor the names of its 32 contributors may be used to endorse or promote products derived 33 from this software without specific prior written permission. 34 35 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 36 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 38 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 39 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 45 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46*/ 47#include <linux/kernel.h> 48#include <linux/pci.h> 49#include <linux/aer.h> 50#include <linux/completion.h> 51#include <linux/workqueue.h> 52#include <linux/delay.h> 53#include "adf_accel_devices.h" 54#include "adf_common_drv.h" 55 56static struct workqueue_struct *device_reset_wq; 57 58static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, 59 pci_channel_state_t state) 60{ 61 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); 62 63 dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n"); 64 if (!accel_dev) { 65 dev_err(&pdev->dev, "Can't find acceleration device\n"); 66 return PCI_ERS_RESULT_DISCONNECT; 67 } 68 69 if (state == pci_channel_io_perm_failure) { 70 dev_err(&pdev->dev, "Can't recover from device error\n"); 71 return PCI_ERS_RESULT_DISCONNECT; 72 } 73 74 return PCI_ERS_RESULT_NEED_RESET; 75} 76 77/* reset dev data */ 78struct adf_reset_dev_data { 79 int mode; 80 struct adf_accel_dev *accel_dev; 81 struct completion compl; 82 struct work_struct reset_work; 83}; 84 85static void adf_dev_restore(struct adf_accel_dev *accel_dev) 86{ 87 struct pci_dev *pdev = accel_to_pci_dev(accel_dev); 88 struct pci_dev *parent = pdev->bus->self; 89 uint16_t bridge_ctl = 0; 90 91 dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", 92 accel_dev->accel_id); 93 94 if (!pci_wait_for_pending_transaction(pdev)) 95 dev_info(&GET_DEV(accel_dev), 96 "Transaction still in progress. Proceeding\n"); 97 98 pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); 99 bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; 100 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); 101 msleep(100); 102 bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET; 103 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); 104 msleep(100); 105 pci_restore_state(pdev); 106 pci_save_state(pdev); 107} 108 109static void adf_device_reset_worker(struct work_struct *work) 110{ 111 struct adf_reset_dev_data *reset_data = 112 container_of(work, struct adf_reset_dev_data, reset_work); 113 struct adf_accel_dev *accel_dev = reset_data->accel_dev; 114 115 adf_dev_restarting_notify(accel_dev); 116 adf_dev_stop(accel_dev); 117 adf_dev_shutdown(accel_dev); 118 adf_dev_restore(accel_dev); 119 if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { 120 /* The device hanged and we can't restart it so stop here */ 121 dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); 122 kfree(reset_data); 123 WARN(1, "QAT: device restart failed. Device is unusable\n"); 124 return; 125 } 126 adf_dev_restarted_notify(accel_dev); 127 clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); 128 129 /* The dev is back alive. Notify the caller if in sync mode */ 130 if (reset_data->mode == ADF_DEV_RESET_SYNC) 131 complete(&reset_data->compl); 132 else 133 kfree(reset_data); 134} 135 136static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, 137 enum adf_dev_reset_mode mode) 138{ 139 struct adf_reset_dev_data *reset_data; 140 141 if (!adf_dev_started(accel_dev) || 142 test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) 143 return 0; 144 145 set_bit(ADF_STATUS_RESTARTING, &accel_dev->status); 146 reset_data = kzalloc(sizeof(*reset_data), GFP_ATOMIC); 147 if (!reset_data) 148 return -ENOMEM; 149 reset_data->accel_dev = accel_dev; 150 init_completion(&reset_data->compl); 151 reset_data->mode = mode; 152 INIT_WORK(&reset_data->reset_work, adf_device_reset_worker); 153 queue_work(device_reset_wq, &reset_data->reset_work); 154 155 /* If in sync mode wait for the result */ 156 if (mode == ADF_DEV_RESET_SYNC) { 157 int ret = 0; 158 /* Maximum device reset time is 10 seconds */ 159 unsigned long wait_jiffies = msecs_to_jiffies(10000); 160 unsigned long timeout = wait_for_completion_timeout( 161 &reset_data->compl, wait_jiffies); 162 if (!timeout) { 163 dev_err(&GET_DEV(accel_dev), 164 "Reset device timeout expired\n"); 165 ret = -EFAULT; 166 } 167 kfree(reset_data); 168 return ret; 169 } 170 return 0; 171} 172 173static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) 174{ 175 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); 176 177 if (!accel_dev) { 178 pr_err("QAT: Can't find acceleration device\n"); 179 return PCI_ERS_RESULT_DISCONNECT; 180 } 181 pci_cleanup_aer_uncorrect_error_status(pdev); 182 if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC)) 183 return PCI_ERS_RESULT_DISCONNECT; 184 185 return PCI_ERS_RESULT_RECOVERED; 186} 187 188static void adf_resume(struct pci_dev *pdev) 189{ 190 dev_info(&pdev->dev, "Acceleration driver reset completed\n"); 191 dev_info(&pdev->dev, "Device is up and runnig\n"); 192} 193 194static struct pci_error_handlers adf_err_handler = { 195 .error_detected = adf_error_detected, 196 .slot_reset = adf_slot_reset, 197 .resume = adf_resume, 198}; 199 200/** 201 * adf_enable_aer() - Enable Advance Error Reporting for acceleration device 202 * @accel_dev: Pointer to acceleration device. 203 * @adf: PCI device driver owning the given acceleration device. 204 * 205 * Function enables PCI Advance Error Reporting for the 206 * QAT acceleration device accel_dev. 207 * To be used by QAT device specific drivers. 208 * 209 * Return: 0 on success, error code othewise. 210 */ 211int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf) 212{ 213 struct pci_dev *pdev = accel_to_pci_dev(accel_dev); 214 215 adf->err_handler = &adf_err_handler; 216 pci_enable_pcie_error_reporting(pdev); 217 return 0; 218} 219EXPORT_SYMBOL_GPL(adf_enable_aer); 220 221/** 222 * adf_disable_aer() - Enable Advance Error Reporting for acceleration device 223 * @accel_dev: Pointer to acceleration device. 224 * 225 * Function disables PCI Advance Error Reporting for the 226 * QAT acceleration device accel_dev. 227 * To be used by QAT device specific drivers. 228 * 229 * Return: void 230 */ 231void adf_disable_aer(struct adf_accel_dev *accel_dev) 232{ 233 struct pci_dev *pdev = accel_to_pci_dev(accel_dev); 234 235 pci_disable_pcie_error_reporting(pdev); 236} 237EXPORT_SYMBOL_GPL(adf_disable_aer); 238 239int adf_init_aer(void) 240{ 241 device_reset_wq = create_workqueue("qat_device_reset_wq"); 242 return !device_reset_wq ? -EFAULT : 0; 243} 244 245void adf_exit_aer(void) 246{ 247 if (device_reset_wq) 248 destroy_workqueue(device_reset_wq); 249 device_reset_wq = NULL; 250} 251