1/*
2 * Kernel-based Virtual Machine - device assignment support
3 *
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.  See
7 * the COPYING file in the top-level directory.
8 *
9 */
10
11#include <linux/kvm_host.h>
12#include <linux/kvm.h>
13#include <linux/uaccess.h>
14#include <linux/vmalloc.h>
15#include <linux/errno.h>
16#include <linux/spinlock.h>
17#include <linux/pci.h>
18#include <linux/interrupt.h>
19#include <linux/slab.h>
20#include <linux/namei.h>
21#include <linux/fs.h>
22#include "irq.h"
23#include "assigned-dev.h"
24
25struct kvm_assigned_dev_kernel {
26	struct kvm_irq_ack_notifier ack_notifier;
27	struct list_head list;
28	int assigned_dev_id;
29	int host_segnr;
30	int host_busnr;
31	int host_devfn;
32	unsigned int entries_nr;
33	int host_irq;
34	bool host_irq_disabled;
35	bool pci_2_3;
36	struct msix_entry *host_msix_entries;
37	int guest_irq;
38	struct msix_entry *guest_msix_entries;
39	unsigned long irq_requested_type;
40	int irq_source_id;
41	int flags;
42	struct pci_dev *dev;
43	struct kvm *kvm;
44	spinlock_t intx_lock;
45	spinlock_t intx_mask_lock;
46	char irq_name[32];
47	struct pci_saved_state *pci_saved_state;
48};
49
50static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
51						      int assigned_dev_id)
52{
53	struct list_head *ptr;
54	struct kvm_assigned_dev_kernel *match;
55
56	list_for_each(ptr, head) {
57		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
58		if (match->assigned_dev_id == assigned_dev_id)
59			return match;
60	}
61	return NULL;
62}
63
64static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
65				    *assigned_dev, int irq)
66{
67	int i, index;
68	struct msix_entry *host_msix_entries;
69
70	host_msix_entries = assigned_dev->host_msix_entries;
71
72	index = -1;
73	for (i = 0; i < assigned_dev->entries_nr; i++)
74		if (irq == host_msix_entries[i].vector) {
75			index = i;
76			break;
77		}
78	if (index < 0)
79		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
80
81	return index;
82}
83
84static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
85{
86	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
87	int ret;
88
89	spin_lock(&assigned_dev->intx_lock);
90	if (pci_check_and_mask_intx(assigned_dev->dev)) {
91		assigned_dev->host_irq_disabled = true;
92		ret = IRQ_WAKE_THREAD;
93	} else
94		ret = IRQ_NONE;
95	spin_unlock(&assigned_dev->intx_lock);
96
97	return ret;
98}
99
100static void
101kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
102				 int vector)
103{
104	if (unlikely(assigned_dev->irq_requested_type &
105		     KVM_DEV_IRQ_GUEST_INTX)) {
106		spin_lock(&assigned_dev->intx_mask_lock);
107		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
108			kvm_set_irq(assigned_dev->kvm,
109				    assigned_dev->irq_source_id, vector, 1,
110				    false);
111		spin_unlock(&assigned_dev->intx_mask_lock);
112	} else
113		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
114			    vector, 1, false);
115}
116
117static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
118{
119	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
120
121	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
122		spin_lock_irq(&assigned_dev->intx_lock);
123		disable_irq_nosync(irq);
124		assigned_dev->host_irq_disabled = true;
125		spin_unlock_irq(&assigned_dev->intx_lock);
126	}
127
128	kvm_assigned_dev_raise_guest_irq(assigned_dev,
129					 assigned_dev->guest_irq);
130
131	return IRQ_HANDLED;
132}
133
134#ifdef __KVM_HAVE_MSI
135static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
136{
137	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
138	int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
139				       assigned_dev->irq_source_id,
140				       assigned_dev->guest_irq, 1);
141	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
142}
143
144static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
145{
146	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
147
148	kvm_assigned_dev_raise_guest_irq(assigned_dev,
149					 assigned_dev->guest_irq);
150
151	return IRQ_HANDLED;
152}
153#endif
154
155#ifdef __KVM_HAVE_MSIX
156static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
157{
158	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
159	int index = find_index_from_host_irq(assigned_dev, irq);
160	u32 vector;
161	int ret = 0;
162
163	if (index >= 0) {
164		vector = assigned_dev->guest_msix_entries[index].vector;
165		ret = kvm_set_irq_inatomic(assigned_dev->kvm,
166					   assigned_dev->irq_source_id,
167					   vector, 1);
168	}
169
170	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
171}
172
173static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
174{
175	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
176	int index = find_index_from_host_irq(assigned_dev, irq);
177	u32 vector;
178
179	if (index >= 0) {
180		vector = assigned_dev->guest_msix_entries[index].vector;
181		kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
182	}
183
184	return IRQ_HANDLED;
185}
186#endif
187
188/* Ack the irq line for an assigned device */
189static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
190{
191	struct kvm_assigned_dev_kernel *dev =
192		container_of(kian, struct kvm_assigned_dev_kernel,
193			     ack_notifier);
194
195	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
196
197	spin_lock(&dev->intx_mask_lock);
198
199	if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
200		bool reassert = false;
201
202		spin_lock_irq(&dev->intx_lock);
203		/*
204		 * The guest IRQ may be shared so this ack can come from an
205		 * IRQ for another guest device.
206		 */
207		if (dev->host_irq_disabled) {
208			if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
209				enable_irq(dev->host_irq);
210			else if (!pci_check_and_unmask_intx(dev->dev))
211				reassert = true;
212			dev->host_irq_disabled = reassert;
213		}
214		spin_unlock_irq(&dev->intx_lock);
215
216		if (reassert)
217			kvm_set_irq(dev->kvm, dev->irq_source_id,
218				    dev->guest_irq, 1, false);
219	}
220
221	spin_unlock(&dev->intx_mask_lock);
222}
223
224static void deassign_guest_irq(struct kvm *kvm,
225			       struct kvm_assigned_dev_kernel *assigned_dev)
226{
227	if (assigned_dev->ack_notifier.gsi != -1)
228		kvm_unregister_irq_ack_notifier(kvm,
229						&assigned_dev->ack_notifier);
230
231	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
232		    assigned_dev->guest_irq, 0, false);
233
234	if (assigned_dev->irq_source_id != -1)
235		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
236	assigned_dev->irq_source_id = -1;
237	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
238}
239
240/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
241static void deassign_host_irq(struct kvm *kvm,
242			      struct kvm_assigned_dev_kernel *assigned_dev)
243{
244	/*
245	 * We disable irq here to prevent further events.
246	 *
247	 * Notice this maybe result in nested disable if the interrupt type is
248	 * INTx, but it's OK for we are going to free it.
249	 *
250	 * If this function is a part of VM destroy, please ensure that till
251	 * now, the kvm state is still legal for probably we also have to wait
252	 * on a currently running IRQ handler.
253	 */
254	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
255		int i;
256		for (i = 0; i < assigned_dev->entries_nr; i++)
257			disable_irq(assigned_dev->host_msix_entries[i].vector);
258
259		for (i = 0; i < assigned_dev->entries_nr; i++)
260			free_irq(assigned_dev->host_msix_entries[i].vector,
261				 assigned_dev);
262
263		assigned_dev->entries_nr = 0;
264		kfree(assigned_dev->host_msix_entries);
265		kfree(assigned_dev->guest_msix_entries);
266		pci_disable_msix(assigned_dev->dev);
267	} else {
268		/* Deal with MSI and INTx */
269		if ((assigned_dev->irq_requested_type &
270		     KVM_DEV_IRQ_HOST_INTX) &&
271		    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
272			spin_lock_irq(&assigned_dev->intx_lock);
273			pci_intx(assigned_dev->dev, false);
274			spin_unlock_irq(&assigned_dev->intx_lock);
275			synchronize_irq(assigned_dev->host_irq);
276		} else
277			disable_irq(assigned_dev->host_irq);
278
279		free_irq(assigned_dev->host_irq, assigned_dev);
280
281		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
282			pci_disable_msi(assigned_dev->dev);
283	}
284
285	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
286}
287
288static int kvm_deassign_irq(struct kvm *kvm,
289			    struct kvm_assigned_dev_kernel *assigned_dev,
290			    unsigned long irq_requested_type)
291{
292	unsigned long guest_irq_type, host_irq_type;
293
294	if (!irqchip_in_kernel(kvm))
295		return -EINVAL;
296	/* no irq assignment to deassign */
297	if (!assigned_dev->irq_requested_type)
298		return -ENXIO;
299
300	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
301	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
302
303	if (host_irq_type)
304		deassign_host_irq(kvm, assigned_dev);
305	if (guest_irq_type)
306		deassign_guest_irq(kvm, assigned_dev);
307
308	return 0;
309}
310
311static void kvm_free_assigned_irq(struct kvm *kvm,
312				  struct kvm_assigned_dev_kernel *assigned_dev)
313{
314	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
315}
316
317static void kvm_free_assigned_device(struct kvm *kvm,
318				     struct kvm_assigned_dev_kernel
319				     *assigned_dev)
320{
321	kvm_free_assigned_irq(kvm, assigned_dev);
322
323	pci_reset_function(assigned_dev->dev);
324	if (pci_load_and_free_saved_state(assigned_dev->dev,
325					  &assigned_dev->pci_saved_state))
326		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
327		       __func__, dev_name(&assigned_dev->dev->dev));
328	else
329		pci_restore_state(assigned_dev->dev);
330
331	pci_clear_dev_assigned(assigned_dev->dev);
332
333	pci_release_regions(assigned_dev->dev);
334	pci_disable_device(assigned_dev->dev);
335	pci_dev_put(assigned_dev->dev);
336
337	list_del(&assigned_dev->list);
338	kfree(assigned_dev);
339}
340
341void kvm_free_all_assigned_devices(struct kvm *kvm)
342{
343	struct list_head *ptr, *ptr2;
344	struct kvm_assigned_dev_kernel *assigned_dev;
345
346	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
347		assigned_dev = list_entry(ptr,
348					  struct kvm_assigned_dev_kernel,
349					  list);
350
351		kvm_free_assigned_device(kvm, assigned_dev);
352	}
353}
354
355static int assigned_device_enable_host_intx(struct kvm *kvm,
356					    struct kvm_assigned_dev_kernel *dev)
357{
358	irq_handler_t irq_handler;
359	unsigned long flags;
360
361	dev->host_irq = dev->dev->irq;
362
363	/*
364	 * We can only share the IRQ line with other host devices if we are
365	 * able to disable the IRQ source at device-level - independently of
366	 * the guest driver. Otherwise host devices may suffer from unbounded
367	 * IRQ latencies when the guest keeps the line asserted.
368	 */
369	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
370		irq_handler = kvm_assigned_dev_intx;
371		flags = IRQF_SHARED;
372	} else {
373		irq_handler = NULL;
374		flags = IRQF_ONESHOT;
375	}
376	if (request_threaded_irq(dev->host_irq, irq_handler,
377				 kvm_assigned_dev_thread_intx, flags,
378				 dev->irq_name, dev))
379		return -EIO;
380
381	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
382		spin_lock_irq(&dev->intx_lock);
383		pci_intx(dev->dev, true);
384		spin_unlock_irq(&dev->intx_lock);
385	}
386	return 0;
387}
388
389#ifdef __KVM_HAVE_MSI
390static int assigned_device_enable_host_msi(struct kvm *kvm,
391					   struct kvm_assigned_dev_kernel *dev)
392{
393	int r;
394
395	if (!dev->dev->msi_enabled) {
396		r = pci_enable_msi(dev->dev);
397		if (r)
398			return r;
399	}
400
401	dev->host_irq = dev->dev->irq;
402	if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
403				 kvm_assigned_dev_thread_msi, 0,
404				 dev->irq_name, dev)) {
405		pci_disable_msi(dev->dev);
406		return -EIO;
407	}
408
409	return 0;
410}
411#endif
412
413#ifdef __KVM_HAVE_MSIX
414static int assigned_device_enable_host_msix(struct kvm *kvm,
415					    struct kvm_assigned_dev_kernel *dev)
416{
417	int i, r = -EINVAL;
418
419	/* host_msix_entries and guest_msix_entries should have been
420	 * initialized */
421	if (dev->entries_nr == 0)
422		return r;
423
424	r = pci_enable_msix_exact(dev->dev,
425				  dev->host_msix_entries, dev->entries_nr);
426	if (r)
427		return r;
428
429	for (i = 0; i < dev->entries_nr; i++) {
430		r = request_threaded_irq(dev->host_msix_entries[i].vector,
431					 kvm_assigned_dev_msix,
432					 kvm_assigned_dev_thread_msix,
433					 0, dev->irq_name, dev);
434		if (r)
435			goto err;
436	}
437
438	return 0;
439err:
440	for (i -= 1; i >= 0; i--)
441		free_irq(dev->host_msix_entries[i].vector, dev);
442	pci_disable_msix(dev->dev);
443	return r;
444}
445
446#endif
447
448static int assigned_device_enable_guest_intx(struct kvm *kvm,
449				struct kvm_assigned_dev_kernel *dev,
450				struct kvm_assigned_irq *irq)
451{
452	dev->guest_irq = irq->guest_irq;
453	dev->ack_notifier.gsi = irq->guest_irq;
454	return 0;
455}
456
457#ifdef __KVM_HAVE_MSI
458static int assigned_device_enable_guest_msi(struct kvm *kvm,
459			struct kvm_assigned_dev_kernel *dev,
460			struct kvm_assigned_irq *irq)
461{
462	dev->guest_irq = irq->guest_irq;
463	dev->ack_notifier.gsi = -1;
464	return 0;
465}
466#endif
467
468#ifdef __KVM_HAVE_MSIX
469static int assigned_device_enable_guest_msix(struct kvm *kvm,
470			struct kvm_assigned_dev_kernel *dev,
471			struct kvm_assigned_irq *irq)
472{
473	dev->guest_irq = irq->guest_irq;
474	dev->ack_notifier.gsi = -1;
475	return 0;
476}
477#endif
478
479static int assign_host_irq(struct kvm *kvm,
480			   struct kvm_assigned_dev_kernel *dev,
481			   __u32 host_irq_type)
482{
483	int r = -EEXIST;
484
485	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
486		return r;
487
488	snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
489		 pci_name(dev->dev));
490
491	switch (host_irq_type) {
492	case KVM_DEV_IRQ_HOST_INTX:
493		r = assigned_device_enable_host_intx(kvm, dev);
494		break;
495#ifdef __KVM_HAVE_MSI
496	case KVM_DEV_IRQ_HOST_MSI:
497		r = assigned_device_enable_host_msi(kvm, dev);
498		break;
499#endif
500#ifdef __KVM_HAVE_MSIX
501	case KVM_DEV_IRQ_HOST_MSIX:
502		r = assigned_device_enable_host_msix(kvm, dev);
503		break;
504#endif
505	default:
506		r = -EINVAL;
507	}
508	dev->host_irq_disabled = false;
509
510	if (!r)
511		dev->irq_requested_type |= host_irq_type;
512
513	return r;
514}
515
516static int assign_guest_irq(struct kvm *kvm,
517			    struct kvm_assigned_dev_kernel *dev,
518			    struct kvm_assigned_irq *irq,
519			    unsigned long guest_irq_type)
520{
521	int id;
522	int r = -EEXIST;
523
524	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
525		return r;
526
527	id = kvm_request_irq_source_id(kvm);
528	if (id < 0)
529		return id;
530
531	dev->irq_source_id = id;
532
533	switch (guest_irq_type) {
534	case KVM_DEV_IRQ_GUEST_INTX:
535		r = assigned_device_enable_guest_intx(kvm, dev, irq);
536		break;
537#ifdef __KVM_HAVE_MSI
538	case KVM_DEV_IRQ_GUEST_MSI:
539		r = assigned_device_enable_guest_msi(kvm, dev, irq);
540		break;
541#endif
542#ifdef __KVM_HAVE_MSIX
543	case KVM_DEV_IRQ_GUEST_MSIX:
544		r = assigned_device_enable_guest_msix(kvm, dev, irq);
545		break;
546#endif
547	default:
548		r = -EINVAL;
549	}
550
551	if (!r) {
552		dev->irq_requested_type |= guest_irq_type;
553		if (dev->ack_notifier.gsi != -1)
554			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
555	} else {
556		kvm_free_irq_source_id(kvm, dev->irq_source_id);
557		dev->irq_source_id = -1;
558	}
559
560	return r;
561}
562
563/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
564static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
565				   struct kvm_assigned_irq *assigned_irq)
566{
567	int r = -EINVAL;
568	struct kvm_assigned_dev_kernel *match;
569	unsigned long host_irq_type, guest_irq_type;
570
571	if (!irqchip_in_kernel(kvm))
572		return r;
573
574	mutex_lock(&kvm->lock);
575	r = -ENODEV;
576	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
577				      assigned_irq->assigned_dev_id);
578	if (!match)
579		goto out;
580
581	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
582	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
583
584	r = -EINVAL;
585	/* can only assign one type at a time */
586	if (hweight_long(host_irq_type) > 1)
587		goto out;
588	if (hweight_long(guest_irq_type) > 1)
589		goto out;
590	if (host_irq_type == 0 && guest_irq_type == 0)
591		goto out;
592
593	r = 0;
594	if (host_irq_type)
595		r = assign_host_irq(kvm, match, host_irq_type);
596	if (r)
597		goto out;
598
599	if (guest_irq_type)
600		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
601out:
602	mutex_unlock(&kvm->lock);
603	return r;
604}
605
606static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
607					 struct kvm_assigned_irq
608					 *assigned_irq)
609{
610	int r = -ENODEV;
611	struct kvm_assigned_dev_kernel *match;
612	unsigned long irq_type;
613
614	mutex_lock(&kvm->lock);
615
616	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
617				      assigned_irq->assigned_dev_id);
618	if (!match)
619		goto out;
620
621	irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
622					  KVM_DEV_IRQ_GUEST_MASK);
623	r = kvm_deassign_irq(kvm, match, irq_type);
624out:
625	mutex_unlock(&kvm->lock);
626	return r;
627}
628
629/*
630 * We want to test whether the caller has been granted permissions to
631 * use this device.  To be able to configure and control the device,
632 * the user needs access to PCI configuration space and BAR resources.
633 * These are accessed through PCI sysfs.  PCI config space is often
634 * passed to the process calling this ioctl via file descriptor, so we
635 * can't rely on access to that file.  We can check for permissions
636 * on each of the BAR resource files, which is a pretty clear
637 * indicator that the user has been granted access to the device.
638 */
639static int probe_sysfs_permissions(struct pci_dev *dev)
640{
641#ifdef CONFIG_SYSFS
642	int i;
643	bool bar_found = false;
644
645	for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
646		char *kpath, *syspath;
647		struct path path;
648		struct inode *inode;
649		int r;
650
651		if (!pci_resource_len(dev, i))
652			continue;
653
654		kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
655		if (!kpath)
656			return -ENOMEM;
657
658		/* Per sysfs-rules, sysfs is always at /sys */
659		syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
660		kfree(kpath);
661		if (!syspath)
662			return -ENOMEM;
663
664		r = kern_path(syspath, LOOKUP_FOLLOW, &path);
665		kfree(syspath);
666		if (r)
667			return r;
668
669		inode = d_backing_inode(path.dentry);
670
671		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
672		path_put(&path);
673		if (r)
674			return r;
675
676		bar_found = true;
677	}
678
679	/* If no resources, probably something special */
680	if (!bar_found)
681		return -EPERM;
682
683	return 0;
684#else
685	return -EINVAL; /* No way to control the device without sysfs */
686#endif
687}
688
689static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
690				      struct kvm_assigned_pci_dev *assigned_dev)
691{
692	int r = 0, idx;
693	struct kvm_assigned_dev_kernel *match;
694	struct pci_dev *dev;
695
696	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
697		return -EINVAL;
698
699	mutex_lock(&kvm->lock);
700	idx = srcu_read_lock(&kvm->srcu);
701
702	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
703				      assigned_dev->assigned_dev_id);
704	if (match) {
705		/* device already assigned */
706		r = -EEXIST;
707		goto out;
708	}
709
710	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
711	if (match == NULL) {
712		printk(KERN_INFO "%s: Couldn't allocate memory\n",
713		       __func__);
714		r = -ENOMEM;
715		goto out;
716	}
717	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
718				   assigned_dev->busnr,
719				   assigned_dev->devfn);
720	if (!dev) {
721		printk(KERN_INFO "%s: host device not found\n", __func__);
722		r = -EINVAL;
723		goto out_free;
724	}
725
726	/* Don't allow bridges to be assigned */
727	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
728		r = -EPERM;
729		goto out_put;
730	}
731
732	r = probe_sysfs_permissions(dev);
733	if (r)
734		goto out_put;
735
736	if (pci_enable_device(dev)) {
737		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
738		r = -EBUSY;
739		goto out_put;
740	}
741	r = pci_request_regions(dev, "kvm_assigned_device");
742	if (r) {
743		printk(KERN_INFO "%s: Could not get access to device regions\n",
744		       __func__);
745		goto out_disable;
746	}
747
748	pci_reset_function(dev);
749	pci_save_state(dev);
750	match->pci_saved_state = pci_store_saved_state(dev);
751	if (!match->pci_saved_state)
752		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
753		       __func__, dev_name(&dev->dev));
754
755	if (!pci_intx_mask_supported(dev))
756		assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
757
758	match->assigned_dev_id = assigned_dev->assigned_dev_id;
759	match->host_segnr = assigned_dev->segnr;
760	match->host_busnr = assigned_dev->busnr;
761	match->host_devfn = assigned_dev->devfn;
762	match->flags = assigned_dev->flags;
763	match->dev = dev;
764	spin_lock_init(&match->intx_lock);
765	spin_lock_init(&match->intx_mask_lock);
766	match->irq_source_id = -1;
767	match->kvm = kvm;
768	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
769
770	list_add(&match->list, &kvm->arch.assigned_dev_head);
771
772	if (!kvm->arch.iommu_domain) {
773		r = kvm_iommu_map_guest(kvm);
774		if (r)
775			goto out_list_del;
776	}
777	r = kvm_assign_device(kvm, match->dev);
778	if (r)
779		goto out_list_del;
780
781out:
782	srcu_read_unlock(&kvm->srcu, idx);
783	mutex_unlock(&kvm->lock);
784	return r;
785out_list_del:
786	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
787		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
788		       __func__, dev_name(&dev->dev));
789	list_del(&match->list);
790	pci_release_regions(dev);
791out_disable:
792	pci_disable_device(dev);
793out_put:
794	pci_dev_put(dev);
795out_free:
796	kfree(match);
797	srcu_read_unlock(&kvm->srcu, idx);
798	mutex_unlock(&kvm->lock);
799	return r;
800}
801
802static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
803		struct kvm_assigned_pci_dev *assigned_dev)
804{
805	int r = 0;
806	struct kvm_assigned_dev_kernel *match;
807
808	mutex_lock(&kvm->lock);
809
810	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
811				      assigned_dev->assigned_dev_id);
812	if (!match) {
813		printk(KERN_INFO "%s: device hasn't been assigned before, "
814		  "so cannot be deassigned\n", __func__);
815		r = -EINVAL;
816		goto out;
817	}
818
819	kvm_deassign_device(kvm, match->dev);
820
821	kvm_free_assigned_device(kvm, match);
822
823out:
824	mutex_unlock(&kvm->lock);
825	return r;
826}
827
828
829#ifdef __KVM_HAVE_MSIX
830static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
831				    struct kvm_assigned_msix_nr *entry_nr)
832{
833	int r = 0;
834	struct kvm_assigned_dev_kernel *adev;
835
836	mutex_lock(&kvm->lock);
837
838	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
839				      entry_nr->assigned_dev_id);
840	if (!adev) {
841		r = -EINVAL;
842		goto msix_nr_out;
843	}
844
845	if (adev->entries_nr == 0) {
846		adev->entries_nr = entry_nr->entry_nr;
847		if (adev->entries_nr == 0 ||
848		    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
849			r = -EINVAL;
850			goto msix_nr_out;
851		}
852
853		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
854						entry_nr->entry_nr,
855						GFP_KERNEL);
856		if (!adev->host_msix_entries) {
857			r = -ENOMEM;
858			goto msix_nr_out;
859		}
860		adev->guest_msix_entries =
861			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
862				GFP_KERNEL);
863		if (!adev->guest_msix_entries) {
864			kfree(adev->host_msix_entries);
865			r = -ENOMEM;
866			goto msix_nr_out;
867		}
868	} else /* Not allowed set MSI-X number twice */
869		r = -EINVAL;
870msix_nr_out:
871	mutex_unlock(&kvm->lock);
872	return r;
873}
874
875static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
876				       struct kvm_assigned_msix_entry *entry)
877{
878	int r = 0, i;
879	struct kvm_assigned_dev_kernel *adev;
880
881	mutex_lock(&kvm->lock);
882
883	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
884				      entry->assigned_dev_id);
885
886	if (!adev) {
887		r = -EINVAL;
888		goto msix_entry_out;
889	}
890
891	for (i = 0; i < adev->entries_nr; i++)
892		if (adev->guest_msix_entries[i].vector == 0 ||
893		    adev->guest_msix_entries[i].entry == entry->entry) {
894			adev->guest_msix_entries[i].entry = entry->entry;
895			adev->guest_msix_entries[i].vector = entry->gsi;
896			adev->host_msix_entries[i].entry = entry->entry;
897			break;
898		}
899	if (i == adev->entries_nr) {
900		r = -ENOSPC;
901		goto msix_entry_out;
902	}
903
904msix_entry_out:
905	mutex_unlock(&kvm->lock);
906
907	return r;
908}
909#endif
910
911static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
912		struct kvm_assigned_pci_dev *assigned_dev)
913{
914	int r = 0;
915	struct kvm_assigned_dev_kernel *match;
916
917	mutex_lock(&kvm->lock);
918
919	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
920				      assigned_dev->assigned_dev_id);
921	if (!match) {
922		r = -ENODEV;
923		goto out;
924	}
925
926	spin_lock(&match->intx_mask_lock);
927
928	match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
929	match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
930
931	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
932		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
933			kvm_set_irq(match->kvm, match->irq_source_id,
934				    match->guest_irq, 0, false);
935			/*
936			 * Masking at hardware-level is performed on demand,
937			 * i.e. when an IRQ actually arrives at the host.
938			 */
939		} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
940			/*
941			 * Unmask the IRQ line if required. Unmasking at
942			 * device level will be performed by user space.
943			 */
944			spin_lock_irq(&match->intx_lock);
945			if (match->host_irq_disabled) {
946				enable_irq(match->host_irq);
947				match->host_irq_disabled = false;
948			}
949			spin_unlock_irq(&match->intx_lock);
950		}
951	}
952
953	spin_unlock(&match->intx_mask_lock);
954
955out:
956	mutex_unlock(&kvm->lock);
957	return r;
958}
959
960long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
961				  unsigned long arg)
962{
963	void __user *argp = (void __user *)arg;
964	int r;
965
966	switch (ioctl) {
967	case KVM_ASSIGN_PCI_DEVICE: {
968		struct kvm_assigned_pci_dev assigned_dev;
969
970		r = -EFAULT;
971		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
972			goto out;
973		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
974		if (r)
975			goto out;
976		break;
977	}
978	case KVM_ASSIGN_IRQ: {
979		r = -EOPNOTSUPP;
980		break;
981	}
982	case KVM_ASSIGN_DEV_IRQ: {
983		struct kvm_assigned_irq assigned_irq;
984
985		r = -EFAULT;
986		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
987			goto out;
988		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
989		if (r)
990			goto out;
991		break;
992	}
993	case KVM_DEASSIGN_DEV_IRQ: {
994		struct kvm_assigned_irq assigned_irq;
995
996		r = -EFAULT;
997		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
998			goto out;
999		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1000		if (r)
1001			goto out;
1002		break;
1003	}
1004	case KVM_DEASSIGN_PCI_DEVICE: {
1005		struct kvm_assigned_pci_dev assigned_dev;
1006
1007		r = -EFAULT;
1008		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1009			goto out;
1010		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1011		if (r)
1012			goto out;
1013		break;
1014	}
1015#ifdef __KVM_HAVE_MSIX
1016	case KVM_ASSIGN_SET_MSIX_NR: {
1017		struct kvm_assigned_msix_nr entry_nr;
1018		r = -EFAULT;
1019		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1020			goto out;
1021		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1022		if (r)
1023			goto out;
1024		break;
1025	}
1026	case KVM_ASSIGN_SET_MSIX_ENTRY: {
1027		struct kvm_assigned_msix_entry entry;
1028		r = -EFAULT;
1029		if (copy_from_user(&entry, argp, sizeof entry))
1030			goto out;
1031		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1032		if (r)
1033			goto out;
1034		break;
1035	}
1036#endif
1037	case KVM_ASSIGN_SET_INTX_MASK: {
1038		struct kvm_assigned_pci_dev assigned_dev;
1039
1040		r = -EFAULT;
1041		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1042			goto out;
1043		r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1044		break;
1045	}
1046	default:
1047		r = -ENOTTY;
1048		break;
1049	}
1050out:
1051	return r;
1052}
1053