1/*
2 * VFIO generic eventfd code for IRQFD support.
3 * Derived from drivers/vfio/pci/vfio_pci_intrs.c
4 *
5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6 *     Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/vfio.h>
14#include <linux/eventfd.h>
15#include <linux/file.h>
16#include <linux/module.h>
17#include <linux/slab.h>
18
19#define DRIVER_VERSION  "0.1"
20#define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
21#define DRIVER_DESC     "IRQFD support for VFIO bus drivers"
22
23static struct workqueue_struct *vfio_irqfd_cleanup_wq;
24static DEFINE_SPINLOCK(virqfd_lock);
25
26static int __init vfio_virqfd_init(void)
27{
28	vfio_irqfd_cleanup_wq =
29		create_singlethread_workqueue("vfio-irqfd-cleanup");
30	if (!vfio_irqfd_cleanup_wq)
31		return -ENOMEM;
32
33	return 0;
34}
35
36static void __exit vfio_virqfd_exit(void)
37{
38	destroy_workqueue(vfio_irqfd_cleanup_wq);
39}
40
41static void virqfd_deactivate(struct virqfd *virqfd)
42{
43	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
44}
45
46static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
47{
48	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
49	unsigned long flags = (unsigned long)key;
50
51	if (flags & POLLIN) {
52		/* An event has been signaled, call function */
53		if ((!virqfd->handler ||
54		     virqfd->handler(virqfd->opaque, virqfd->data)) &&
55		    virqfd->thread)
56			schedule_work(&virqfd->inject);
57	}
58
59	if (flags & POLLHUP) {
60		unsigned long flags;
61		spin_lock_irqsave(&virqfd_lock, flags);
62
63		/*
64		 * The eventfd is closing, if the virqfd has not yet been
65		 * queued for release, as determined by testing whether the
66		 * virqfd pointer to it is still valid, queue it now.  As
67		 * with kvm irqfds, we know we won't race against the virqfd
68		 * going away because we hold the lock to get here.
69		 */
70		if (*(virqfd->pvirqfd) == virqfd) {
71			*(virqfd->pvirqfd) = NULL;
72			virqfd_deactivate(virqfd);
73		}
74
75		spin_unlock_irqrestore(&virqfd_lock, flags);
76	}
77
78	return 0;
79}
80
81static void virqfd_ptable_queue_proc(struct file *file,
82				     wait_queue_head_t *wqh, poll_table *pt)
83{
84	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
85	add_wait_queue(wqh, &virqfd->wait);
86}
87
88static void virqfd_shutdown(struct work_struct *work)
89{
90	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
91	u64 cnt;
92
93	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
94	flush_work(&virqfd->inject);
95	eventfd_ctx_put(virqfd->eventfd);
96
97	kfree(virqfd);
98}
99
100static void virqfd_inject(struct work_struct *work)
101{
102	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
103	if (virqfd->thread)
104		virqfd->thread(virqfd->opaque, virqfd->data);
105}
106
107int vfio_virqfd_enable(void *opaque,
108		       int (*handler)(void *, void *),
109		       void (*thread)(void *, void *),
110		       void *data, struct virqfd **pvirqfd, int fd)
111{
112	struct fd irqfd;
113	struct eventfd_ctx *ctx;
114	struct virqfd *virqfd;
115	int ret = 0;
116	unsigned int events;
117
118	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
119	if (!virqfd)
120		return -ENOMEM;
121
122	virqfd->pvirqfd = pvirqfd;
123	virqfd->opaque = opaque;
124	virqfd->handler = handler;
125	virqfd->thread = thread;
126	virqfd->data = data;
127
128	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
129	INIT_WORK(&virqfd->inject, virqfd_inject);
130
131	irqfd = fdget(fd);
132	if (!irqfd.file) {
133		ret = -EBADF;
134		goto err_fd;
135	}
136
137	ctx = eventfd_ctx_fileget(irqfd.file);
138	if (IS_ERR(ctx)) {
139		ret = PTR_ERR(ctx);
140		goto err_ctx;
141	}
142
143	virqfd->eventfd = ctx;
144
145	/*
146	 * virqfds can be released by closing the eventfd or directly
147	 * through ioctl.  These are both done through a workqueue, so
148	 * we update the pointer to the virqfd under lock to avoid
149	 * pushing multiple jobs to release the same virqfd.
150	 */
151	spin_lock_irq(&virqfd_lock);
152
153	if (*pvirqfd) {
154		spin_unlock_irq(&virqfd_lock);
155		ret = -EBUSY;
156		goto err_busy;
157	}
158	*pvirqfd = virqfd;
159
160	spin_unlock_irq(&virqfd_lock);
161
162	/*
163	 * Install our own custom wake-up handling so we are notified via
164	 * a callback whenever someone signals the underlying eventfd.
165	 */
166	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
167	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
168
169	events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
170
171	/*
172	 * Check if there was an event already pending on the eventfd
173	 * before we registered and trigger it as if we didn't miss it.
174	 */
175	if (events & POLLIN) {
176		if ((!handler || handler(opaque, data)) && thread)
177			schedule_work(&virqfd->inject);
178	}
179
180	/*
181	 * Do not drop the file until the irqfd is fully initialized,
182	 * otherwise we might race against the POLLHUP.
183	 */
184	fdput(irqfd);
185
186	return 0;
187err_busy:
188	eventfd_ctx_put(ctx);
189err_ctx:
190	fdput(irqfd);
191err_fd:
192	kfree(virqfd);
193
194	return ret;
195}
196EXPORT_SYMBOL_GPL(vfio_virqfd_enable);
197
198void vfio_virqfd_disable(struct virqfd **pvirqfd)
199{
200	unsigned long flags;
201
202	spin_lock_irqsave(&virqfd_lock, flags);
203
204	if (*pvirqfd) {
205		virqfd_deactivate(*pvirqfd);
206		*pvirqfd = NULL;
207	}
208
209	spin_unlock_irqrestore(&virqfd_lock, flags);
210
211	/*
212	 * Block until we know all outstanding shutdown jobs have completed.
213	 * Even if we don't queue the job, flush the wq to be sure it's
214	 * been released.
215	 */
216	flush_workqueue(vfio_irqfd_cleanup_wq);
217}
218EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
219
220module_init(vfio_virqfd_init);
221module_exit(vfio_virqfd_exit);
222
223MODULE_VERSION(DRIVER_VERSION);
224MODULE_LICENSE("GPL v2");
225MODULE_AUTHOR(DRIVER_AUTHOR);
226MODULE_DESCRIPTION(DRIVER_DESC);
227