1/* 2 * PCI Backend Operations - respond to PCI requests from Frontend 3 * 4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil> 5 */ 6 7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9#include <linux/module.h> 10#include <linux/wait.h> 11#include <linux/bitops.h> 12#include <xen/events.h> 13#include <linux/sched.h> 14#include "pciback.h" 15 16int verbose_request; 17module_param(verbose_request, int, 0644); 18 19static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); 20 21/* Ensure a device is has the fake IRQ handler "turned on/off" and is 22 * ready to be exported. This MUST be run after xen_pcibk_reset_device 23 * which does the actual PCI device enable/disable. 24 */ 25static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) 26{ 27 struct xen_pcibk_dev_data *dev_data; 28 int rc; 29 int enable = 0; 30 31 dev_data = pci_get_drvdata(dev); 32 if (!dev_data) 33 return; 34 35 /* We don't deal with bridges */ 36 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 37 return; 38 39 if (reset) { 40 dev_data->enable_intx = 0; 41 dev_data->ack_intr = 0; 42 } 43 enable = dev_data->enable_intx; 44 45 /* Asked to disable, but ISR isn't runnig */ 46 if (!enable && !dev_data->isr_on) 47 return; 48 49 /* Squirrel away the IRQs in the dev_data. We need this 50 * b/c when device transitions to MSI, the dev->irq is 51 * overwritten with the MSI vector. 52 */ 53 if (enable) 54 dev_data->irq = dev->irq; 55 56 /* 57 * SR-IOV devices in all use MSI-X and have no legacy 58 * interrupts, so inhibit creating a fake IRQ handler for them. 59 */ 60 if (dev_data->irq == 0) 61 goto out; 62 63 dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", 64 dev_data->irq_name, 65 dev_data->irq, 66 pci_is_enabled(dev) ? "on" : "off", 67 dev->msi_enabled ? "MSI" : "", 68 dev->msix_enabled ? "MSI/X" : "", 69 dev_data->isr_on ? "enable" : "disable", 70 enable ? "enable" : "disable"); 71 72 if (enable) { 73 /* 74 * The MSI or MSI-X should not have an IRQ handler. Otherwise 75 * if the guest terminates we BUG_ON in free_msi_irqs. 76 */ 77 if (dev->msi_enabled || dev->msix_enabled) 78 goto out; 79 80 rc = request_irq(dev_data->irq, 81 xen_pcibk_guest_interrupt, IRQF_SHARED, 82 dev_data->irq_name, dev); 83 if (rc) { 84 dev_err(&dev->dev, "%s: failed to install fake IRQ " \ 85 "handler for IRQ %d! (rc:%d)\n", 86 dev_data->irq_name, dev_data->irq, rc); 87 goto out; 88 } 89 } else { 90 free_irq(dev_data->irq, dev); 91 dev_data->irq = 0; 92 } 93 dev_data->isr_on = enable; 94 dev_data->ack_intr = enable; 95out: 96 dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", 97 dev_data->irq_name, 98 dev_data->irq, 99 pci_is_enabled(dev) ? "on" : "off", 100 dev->msi_enabled ? "MSI" : "", 101 dev->msix_enabled ? "MSI/X" : "", 102 enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : 103 (dev_data->isr_on ? "failed to disable" : "disabled")); 104} 105 106/* Ensure a device is "turned off" and ready to be exported. 107 * (Also see xen_pcibk_config_reset to ensure virtual configuration space is 108 * ready to be re-exported) 109 */ 110void xen_pcibk_reset_device(struct pci_dev *dev) 111{ 112 u16 cmd; 113 114 xen_pcibk_control_isr(dev, 1 /* reset device */); 115 116 /* Disable devices (but not bridges) */ 117 if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { 118#ifdef CONFIG_PCI_MSI 119 /* The guest could have been abruptly killed without 120 * disabling MSI/MSI-X interrupts.*/ 121 if (dev->msix_enabled) 122 pci_disable_msix(dev); 123 if (dev->msi_enabled) 124 pci_disable_msi(dev); 125#endif 126 if (pci_is_enabled(dev)) 127 pci_disable_device(dev); 128 129 pci_write_config_word(dev, PCI_COMMAND, 0); 130 131 dev->is_busmaster = 0; 132 } else { 133 pci_read_config_word(dev, PCI_COMMAND, &cmd); 134 if (cmd & (PCI_COMMAND_INVALIDATE)) { 135 cmd &= ~(PCI_COMMAND_INVALIDATE); 136 pci_write_config_word(dev, PCI_COMMAND, cmd); 137 138 dev->is_busmaster = 0; 139 } 140 } 141} 142 143#ifdef CONFIG_PCI_MSI 144static 145int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, 146 struct pci_dev *dev, struct xen_pci_op *op) 147{ 148 struct xen_pcibk_dev_data *dev_data; 149 int status; 150 151 if (unlikely(verbose_request)) 152 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); 153 154 if (dev->msi_enabled) 155 status = -EALREADY; 156 else if (dev->msix_enabled) 157 status = -ENXIO; 158 else 159 status = pci_enable_msi(dev); 160 161 if (status) { 162 pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", 163 pci_name(dev), pdev->xdev->otherend_id, 164 status); 165 op->value = 0; 166 return XEN_PCI_ERR_op_failed; 167 } 168 169 /* The value the guest needs is actually the IDT vector, not the 170 * the local domain's IRQ number. */ 171 172 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; 173 if (unlikely(verbose_request)) 174 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), 175 op->value); 176 177 dev_data = pci_get_drvdata(dev); 178 if (dev_data) 179 dev_data->ack_intr = 0; 180 181 return 0; 182} 183 184static 185int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, 186 struct pci_dev *dev, struct xen_pci_op *op) 187{ 188 if (unlikely(verbose_request)) 189 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", 190 pci_name(dev)); 191 192 if (dev->msi_enabled) { 193 struct xen_pcibk_dev_data *dev_data; 194 195 pci_disable_msi(dev); 196 197 dev_data = pci_get_drvdata(dev); 198 if (dev_data) 199 dev_data->ack_intr = 1; 200 } 201 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; 202 if (unlikely(verbose_request)) 203 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), 204 op->value); 205 return 0; 206} 207 208static 209int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, 210 struct pci_dev *dev, struct xen_pci_op *op) 211{ 212 struct xen_pcibk_dev_data *dev_data; 213 int i, result; 214 struct msix_entry *entries; 215 u16 cmd; 216 217 if (unlikely(verbose_request)) 218 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", 219 pci_name(dev)); 220 221 if (op->value > SH_INFO_MAX_VEC) 222 return -EINVAL; 223 224 if (dev->msix_enabled) 225 return -EALREADY; 226 227 /* 228 * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able 229 * to access the BARs where the MSI-X entries reside. 230 * But VF devices are unique in which the PF needs to be checked. 231 */ 232 pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); 233 if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) 234 return -ENXIO; 235 236 entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); 237 if (entries == NULL) 238 return -ENOMEM; 239 240 for (i = 0; i < op->value; i++) { 241 entries[i].entry = op->msix_entries[i].entry; 242 entries[i].vector = op->msix_entries[i].vector; 243 } 244 245 result = pci_enable_msix_exact(dev, entries, op->value); 246 if (result == 0) { 247 for (i = 0; i < op->value; i++) { 248 op->msix_entries[i].entry = entries[i].entry; 249 if (entries[i].vector) { 250 op->msix_entries[i].vector = 251 xen_pirq_from_irq(entries[i].vector); 252 if (unlikely(verbose_request)) 253 printk(KERN_DEBUG DRV_NAME ": %s: " \ 254 "MSI-X[%d]: %d\n", 255 pci_name(dev), i, 256 op->msix_entries[i].vector); 257 } 258 } 259 } else 260 pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", 261 pci_name(dev), pdev->xdev->otherend_id, 262 result); 263 kfree(entries); 264 265 op->value = result; 266 dev_data = pci_get_drvdata(dev); 267 if (dev_data) 268 dev_data->ack_intr = 0; 269 270 return result > 0 ? 0 : result; 271} 272 273static 274int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, 275 struct pci_dev *dev, struct xen_pci_op *op) 276{ 277 if (unlikely(verbose_request)) 278 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", 279 pci_name(dev)); 280 281 if (dev->msix_enabled) { 282 struct xen_pcibk_dev_data *dev_data; 283 284 pci_disable_msix(dev); 285 286 dev_data = pci_get_drvdata(dev); 287 if (dev_data) 288 dev_data->ack_intr = 1; 289 } 290 /* 291 * SR-IOV devices (which don't have any legacy IRQ) have 292 * an undefined IRQ value of zero. 293 */ 294 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; 295 if (unlikely(verbose_request)) 296 printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", 297 pci_name(dev), op->value); 298 return 0; 299} 300#endif 301/* 302* Now the same evtchn is used for both pcifront conf_read_write request 303* as well as pcie aer front end ack. We use a new work_queue to schedule 304* xen_pcibk conf_read_write service for avoiding confict with aer_core 305* do_recovery job which also use the system default work_queue 306*/ 307void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) 308{ 309 /* Check that frontend is requesting an operation and that we are not 310 * already processing a request */ 311 if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) 312 && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { 313 queue_work(xen_pcibk_wq, &pdev->op_work); 314 } 315 /*_XEN_PCIB_active should have been cleared by pcifront. And also make 316 sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ 317 if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) 318 && test_bit(_PCIB_op_pending, &pdev->flags)) { 319 wake_up(&xen_pcibk_aer_wait_queue); 320 } 321} 322 323/* Performing the configuration space reads/writes must not be done in atomic 324 * context because some of the pci_* functions can sleep (mostly due to ACPI 325 * use of semaphores). This function is intended to be called from a work 326 * queue in process context taking a struct xen_pcibk_device as a parameter */ 327 328void xen_pcibk_do_op(struct work_struct *data) 329{ 330 struct xen_pcibk_device *pdev = 331 container_of(data, struct xen_pcibk_device, op_work); 332 struct pci_dev *dev; 333 struct xen_pcibk_dev_data *dev_data = NULL; 334 struct xen_pci_op *op = &pdev->op; 335 int test_intx = 0; 336#ifdef CONFIG_PCI_MSI 337 unsigned int nr = 0; 338#endif 339 340 *op = pdev->sh_info->op; 341 barrier(); 342 dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); 343 344 if (dev == NULL) 345 op->err = XEN_PCI_ERR_dev_not_found; 346 else { 347 dev_data = pci_get_drvdata(dev); 348 if (dev_data) 349 test_intx = dev_data->enable_intx; 350 switch (op->cmd) { 351 case XEN_PCI_OP_conf_read: 352 op->err = xen_pcibk_config_read(dev, 353 op->offset, op->size, &op->value); 354 break; 355 case XEN_PCI_OP_conf_write: 356 op->err = xen_pcibk_config_write(dev, 357 op->offset, op->size, op->value); 358 break; 359#ifdef CONFIG_PCI_MSI 360 case XEN_PCI_OP_enable_msi: 361 op->err = xen_pcibk_enable_msi(pdev, dev, op); 362 break; 363 case XEN_PCI_OP_disable_msi: 364 op->err = xen_pcibk_disable_msi(pdev, dev, op); 365 break; 366 case XEN_PCI_OP_enable_msix: 367 nr = op->value; 368 op->err = xen_pcibk_enable_msix(pdev, dev, op); 369 break; 370 case XEN_PCI_OP_disable_msix: 371 op->err = xen_pcibk_disable_msix(pdev, dev, op); 372 break; 373#endif 374 default: 375 op->err = XEN_PCI_ERR_not_implemented; 376 break; 377 } 378 } 379 if (!op->err && dev && dev_data) { 380 /* Transition detected */ 381 if ((dev_data->enable_intx != test_intx)) 382 xen_pcibk_control_isr(dev, 0 /* no reset */); 383 } 384 pdev->sh_info->op.err = op->err; 385 pdev->sh_info->op.value = op->value; 386#ifdef CONFIG_PCI_MSI 387 if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { 388 unsigned int i; 389 390 for (i = 0; i < nr; i++) 391 pdev->sh_info->op.msix_entries[i].vector = 392 op->msix_entries[i].vector; 393 } 394#endif 395 /* Tell the driver domain that we're done. */ 396 wmb(); 397 clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); 398 notify_remote_via_irq(pdev->evtchn_irq); 399 400 /* Mark that we're done. */ 401 smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ 402 clear_bit(_PDEVF_op_active, &pdev->flags); 403 smp_mb__after_atomic(); /* /before/ final check for work */ 404 405 /* Check to see if the driver domain tried to start another request in 406 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. 407 */ 408 xen_pcibk_test_and_schedule_op(pdev); 409} 410 411irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) 412{ 413 struct xen_pcibk_device *pdev = dev_id; 414 415 xen_pcibk_test_and_schedule_op(pdev); 416 417 return IRQ_HANDLED; 418} 419static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) 420{ 421 struct pci_dev *dev = (struct pci_dev *)dev_id; 422 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); 423 424 if (dev_data->isr_on && dev_data->ack_intr) { 425 dev_data->handled++; 426 if ((dev_data->handled % 1000) == 0) { 427 if (xen_test_irq_shared(irq)) { 428 pr_info("%s IRQ line is not shared " 429 "with other domains. Turning ISR off\n", 430 dev_data->irq_name); 431 dev_data->ack_intr = 0; 432 } 433 } 434 return IRQ_HANDLED; 435 } 436 return IRQ_NONE; 437} 438