1/* 2 * PCI Backend Operations - respond to PCI requests from Frontend 3 * 4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil> 5 */ 6 7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9#include <linux/module.h> 10#include <linux/wait.h> 11#include <linux/bitops.h> 12#include <xen/events.h> 13#include <linux/sched.h> 14#include "pciback.h" 15 16int verbose_request; 17module_param(verbose_request, int, 0644); 18 19static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); 20 21/* Ensure a device is has the fake IRQ handler "turned on/off" and is 22 * ready to be exported. This MUST be run after xen_pcibk_reset_device 23 * which does the actual PCI device enable/disable. 24 */ 25static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) 26{ 27 struct xen_pcibk_dev_data *dev_data; 28 int rc; 29 int enable = 0; 30 31 dev_data = pci_get_drvdata(dev); 32 if (!dev_data) 33 return; 34 35 /* We don't deal with bridges */ 36 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 37 return; 38 39 if (reset) { 40 dev_data->enable_intx = 0; 41 dev_data->ack_intr = 0; 42 } 43 enable = dev_data->enable_intx; 44 45 /* Asked to disable, but ISR isn't runnig */ 46 if (!enable && !dev_data->isr_on) 47 return; 48 49 /* Squirrel away the IRQs in the dev_data. We need this 50 * b/c when device transitions to MSI, the dev->irq is 51 * overwritten with the MSI vector. 52 */ 53 if (enable) 54 dev_data->irq = dev->irq; 55 56 /* 57 * SR-IOV devices in all use MSI-X and have no legacy 58 * interrupts, so inhibit creating a fake IRQ handler for them. 59 */ 60 if (dev_data->irq == 0) 61 goto out; 62 63 dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", 64 dev_data->irq_name, 65 dev_data->irq, 66 pci_is_enabled(dev) ? "on" : "off", 67 dev->msi_enabled ? "MSI" : "", 68 dev->msix_enabled ? "MSI/X" : "", 69 dev_data->isr_on ? "enable" : "disable", 70 enable ? "enable" : "disable"); 71 72 if (enable) { 73 rc = request_irq(dev_data->irq, 74 xen_pcibk_guest_interrupt, IRQF_SHARED, 75 dev_data->irq_name, dev); 76 if (rc) { 77 dev_err(&dev->dev, "%s: failed to install fake IRQ " \ 78 "handler for IRQ %d! (rc:%d)\n", 79 dev_data->irq_name, dev_data->irq, rc); 80 goto out; 81 } 82 } else { 83 free_irq(dev_data->irq, dev); 84 dev_data->irq = 0; 85 } 86 dev_data->isr_on = enable; 87 dev_data->ack_intr = enable; 88out: 89 dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", 90 dev_data->irq_name, 91 dev_data->irq, 92 pci_is_enabled(dev) ? "on" : "off", 93 dev->msi_enabled ? "MSI" : "", 94 dev->msix_enabled ? "MSI/X" : "", 95 enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : 96 (dev_data->isr_on ? "failed to disable" : "disabled")); 97} 98 99/* Ensure a device is "turned off" and ready to be exported. 100 * (Also see xen_pcibk_config_reset to ensure virtual configuration space is 101 * ready to be re-exported) 102 */ 103void xen_pcibk_reset_device(struct pci_dev *dev) 104{ 105 u16 cmd; 106 107 xen_pcibk_control_isr(dev, 1 /* reset device */); 108 109 /* Disable devices (but not bridges) */ 110 if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { 111#ifdef CONFIG_PCI_MSI 112 /* The guest could have been abruptly killed without 113 * disabling MSI/MSI-X interrupts.*/ 114 if (dev->msix_enabled) 115 pci_disable_msix(dev); 116 if (dev->msi_enabled) 117 pci_disable_msi(dev); 118#endif 119 if (pci_is_enabled(dev)) 120 pci_disable_device(dev); 121 122 pci_write_config_word(dev, PCI_COMMAND, 0); 123 124 dev->is_busmaster = 0; 125 } else { 126 pci_read_config_word(dev, PCI_COMMAND, &cmd); 127 if (cmd & (PCI_COMMAND_INVALIDATE)) { 128 cmd &= ~(PCI_COMMAND_INVALIDATE); 129 pci_write_config_word(dev, PCI_COMMAND, cmd); 130 131 dev->is_busmaster = 0; 132 } 133 } 134} 135 136#ifdef CONFIG_PCI_MSI 137static 138int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, 139 struct pci_dev *dev, struct xen_pci_op *op) 140{ 141 struct xen_pcibk_dev_data *dev_data; 142 int status; 143 144 if (unlikely(verbose_request)) 145 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); 146 147 status = pci_enable_msi(dev); 148 149 if (status) { 150 pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", 151 pci_name(dev), pdev->xdev->otherend_id, 152 status); 153 op->value = 0; 154 return XEN_PCI_ERR_op_failed; 155 } 156 157 /* The value the guest needs is actually the IDT vector, not the 158 * the local domain's IRQ number. */ 159 160 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; 161 if (unlikely(verbose_request)) 162 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), 163 op->value); 164 165 dev_data = pci_get_drvdata(dev); 166 if (dev_data) 167 dev_data->ack_intr = 0; 168 169 return 0; 170} 171 172static 173int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, 174 struct pci_dev *dev, struct xen_pci_op *op) 175{ 176 struct xen_pcibk_dev_data *dev_data; 177 178 if (unlikely(verbose_request)) 179 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", 180 pci_name(dev)); 181 pci_disable_msi(dev); 182 183 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; 184 if (unlikely(verbose_request)) 185 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), 186 op->value); 187 dev_data = pci_get_drvdata(dev); 188 if (dev_data) 189 dev_data->ack_intr = 1; 190 return 0; 191} 192 193static 194int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, 195 struct pci_dev *dev, struct xen_pci_op *op) 196{ 197 struct xen_pcibk_dev_data *dev_data; 198 int i, result; 199 struct msix_entry *entries; 200 u16 cmd; 201 202 if (unlikely(verbose_request)) 203 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", 204 pci_name(dev)); 205 206 if (op->value > SH_INFO_MAX_VEC) 207 return -EINVAL; 208 209 if (dev->msix_enabled) 210 return -EALREADY; 211 212 /* 213 * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able 214 * to access the BARs where the MSI-X entries reside. 215 * But VF devices are unique in which the PF needs to be checked. 216 */ 217 pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); 218 if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) 219 return -ENXIO; 220 221 entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); 222 if (entries == NULL) 223 return -ENOMEM; 224 225 for (i = 0; i < op->value; i++) { 226 entries[i].entry = op->msix_entries[i].entry; 227 entries[i].vector = op->msix_entries[i].vector; 228 } 229 230 result = pci_enable_msix_exact(dev, entries, op->value); 231 if (result == 0) { 232 for (i = 0; i < op->value; i++) { 233 op->msix_entries[i].entry = entries[i].entry; 234 if (entries[i].vector) { 235 op->msix_entries[i].vector = 236 xen_pirq_from_irq(entries[i].vector); 237 if (unlikely(verbose_request)) 238 printk(KERN_DEBUG DRV_NAME ": %s: " \ 239 "MSI-X[%d]: %d\n", 240 pci_name(dev), i, 241 op->msix_entries[i].vector); 242 } 243 } 244 } else 245 pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", 246 pci_name(dev), pdev->xdev->otherend_id, 247 result); 248 kfree(entries); 249 250 op->value = result; 251 dev_data = pci_get_drvdata(dev); 252 if (dev_data) 253 dev_data->ack_intr = 0; 254 255 return result > 0 ? 0 : result; 256} 257 258static 259int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, 260 struct pci_dev *dev, struct xen_pci_op *op) 261{ 262 struct xen_pcibk_dev_data *dev_data; 263 if (unlikely(verbose_request)) 264 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", 265 pci_name(dev)); 266 pci_disable_msix(dev); 267 268 /* 269 * SR-IOV devices (which don't have any legacy IRQ) have 270 * an undefined IRQ value of zero. 271 */ 272 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; 273 if (unlikely(verbose_request)) 274 printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), 275 op->value); 276 dev_data = pci_get_drvdata(dev); 277 if (dev_data) 278 dev_data->ack_intr = 1; 279 return 0; 280} 281#endif 282/* 283* Now the same evtchn is used for both pcifront conf_read_write request 284* as well as pcie aer front end ack. We use a new work_queue to schedule 285* xen_pcibk conf_read_write service for avoiding confict with aer_core 286* do_recovery job which also use the system default work_queue 287*/ 288void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) 289{ 290 /* Check that frontend is requesting an operation and that we are not 291 * already processing a request */ 292 if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) 293 && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { 294 queue_work(xen_pcibk_wq, &pdev->op_work); 295 } 296 /*_XEN_PCIB_active should have been cleared by pcifront. And also make 297 sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ 298 if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) 299 && test_bit(_PCIB_op_pending, &pdev->flags)) { 300 wake_up(&xen_pcibk_aer_wait_queue); 301 } 302} 303 304/* Performing the configuration space reads/writes must not be done in atomic 305 * context because some of the pci_* functions can sleep (mostly due to ACPI 306 * use of semaphores). This function is intended to be called from a work 307 * queue in process context taking a struct xen_pcibk_device as a parameter */ 308 309void xen_pcibk_do_op(struct work_struct *data) 310{ 311 struct xen_pcibk_device *pdev = 312 container_of(data, struct xen_pcibk_device, op_work); 313 struct pci_dev *dev; 314 struct xen_pcibk_dev_data *dev_data = NULL; 315 struct xen_pci_op *op = &pdev->op; 316 int test_intx = 0; 317#ifdef CONFIG_PCI_MSI 318 unsigned int nr = 0; 319#endif 320 321 *op = pdev->sh_info->op; 322 barrier(); 323 dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); 324 325 if (dev == NULL) 326 op->err = XEN_PCI_ERR_dev_not_found; 327 else { 328 dev_data = pci_get_drvdata(dev); 329 if (dev_data) 330 test_intx = dev_data->enable_intx; 331 switch (op->cmd) { 332 case XEN_PCI_OP_conf_read: 333 op->err = xen_pcibk_config_read(dev, 334 op->offset, op->size, &op->value); 335 break; 336 case XEN_PCI_OP_conf_write: 337 op->err = xen_pcibk_config_write(dev, 338 op->offset, op->size, op->value); 339 break; 340#ifdef CONFIG_PCI_MSI 341 case XEN_PCI_OP_enable_msi: 342 op->err = xen_pcibk_enable_msi(pdev, dev, op); 343 break; 344 case XEN_PCI_OP_disable_msi: 345 op->err = xen_pcibk_disable_msi(pdev, dev, op); 346 break; 347 case XEN_PCI_OP_enable_msix: 348 nr = op->value; 349 op->err = xen_pcibk_enable_msix(pdev, dev, op); 350 break; 351 case XEN_PCI_OP_disable_msix: 352 op->err = xen_pcibk_disable_msix(pdev, dev, op); 353 break; 354#endif 355 default: 356 op->err = XEN_PCI_ERR_not_implemented; 357 break; 358 } 359 } 360 if (!op->err && dev && dev_data) { 361 /* Transition detected */ 362 if ((dev_data->enable_intx != test_intx)) 363 xen_pcibk_control_isr(dev, 0 /* no reset */); 364 } 365 pdev->sh_info->op.err = op->err; 366 pdev->sh_info->op.value = op->value; 367#ifdef CONFIG_PCI_MSI 368 if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { 369 unsigned int i; 370 371 for (i = 0; i < nr; i++) 372 pdev->sh_info->op.msix_entries[i].vector = 373 op->msix_entries[i].vector; 374 } 375#endif 376 /* Tell the driver domain that we're done. */ 377 wmb(); 378 clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); 379 notify_remote_via_irq(pdev->evtchn_irq); 380 381 /* Mark that we're done. */ 382 smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ 383 clear_bit(_PDEVF_op_active, &pdev->flags); 384 smp_mb__after_atomic(); /* /before/ final check for work */ 385 386 /* Check to see if the driver domain tried to start another request in 387 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. 388 */ 389 xen_pcibk_test_and_schedule_op(pdev); 390} 391 392irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) 393{ 394 struct xen_pcibk_device *pdev = dev_id; 395 396 xen_pcibk_test_and_schedule_op(pdev); 397 398 return IRQ_HANDLED; 399} 400static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) 401{ 402 struct pci_dev *dev = (struct pci_dev *)dev_id; 403 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); 404 405 if (dev_data->isr_on && dev_data->ack_intr) { 406 dev_data->handled++; 407 if ((dev_data->handled % 1000) == 0) { 408 if (xen_test_irq_shared(irq)) { 409 pr_info("%s IRQ line is not shared " 410 "with other domains. Turning ISR off\n", 411 dev_data->irq_name); 412 dev_data->ack_intr = 0; 413 } 414 } 415 return IRQ_HANDLED; 416 } 417 return IRQ_NONE; 418} 419