root/drivers/infiniband/core/uverbs_main.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ib_uverbs_get_ucontext_file
  2. uverbs_dealloc_mw
  3. ib_uverbs_release_dev
  4. ib_uverbs_release_async_event_file
  5. ib_uverbs_release_ucq
  6. ib_uverbs_release_uevent
  7. ib_uverbs_detach_umcast
  8. ib_uverbs_comp_dev
  9. ib_uverbs_release_file
  10. ib_uverbs_event_read
  11. ib_uverbs_async_event_read
  12. ib_uverbs_comp_event_read
  13. ib_uverbs_event_poll
  14. ib_uverbs_async_event_poll
  15. ib_uverbs_comp_event_poll
  16. ib_uverbs_async_event_fasync
  17. ib_uverbs_comp_event_fasync
  18. ib_uverbs_async_event_close
  19. ib_uverbs_comp_event_close
  20. ib_uverbs_comp_handler
  21. ib_uverbs_async_handler
  22. ib_uverbs_cq_event_handler
  23. ib_uverbs_qp_event_handler
  24. ib_uverbs_wq_event_handler
  25. ib_uverbs_srq_event_handler
  26. ib_uverbs_event_handler
  27. ib_uverbs_free_async_event_file
  28. ib_uverbs_init_event_queue
  29. ib_uverbs_alloc_async_event_file
  30. verify_hdr
  31. ib_uverbs_write
  32. ib_uverbs_mmap
  33. rdma_umap_priv_init
  34. rdma_umap_open
  35. rdma_umap_close
  36. rdma_umap_fault
  37. rdma_user_mmap_io
  38. uverbs_user_mmap_disassociate
  39. ib_uverbs_open
  40. ib_uverbs_close
  41. ib_uverbs_get_nl_info
  42. ibdev_show
  43. abi_version_show
  44. ib_uverbs_create_uapi
  45. ib_uverbs_add_one
  46. ib_uverbs_free_hw_resources
  47. ib_uverbs_remove_one
  48. uverbs_devnode
  49. ib_uverbs_init
  50. ib_uverbs_cleanup

   1 /*
   2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   3  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
   4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   5  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6  * Copyright (c) 2005 PathScale, Inc. All rights reserved.
   7  *
   8  * This software is available to you under a choice of one of two
   9  * licenses.  You may choose to be licensed under the terms of the GNU
  10  * General Public License (GPL) Version 2, available from the file
  11  * COPYING in the main directory of this source tree, or the
  12  * OpenIB.org BSD license below:
  13  *
  14  *     Redistribution and use in source and binary forms, with or
  15  *     without modification, are permitted provided that the following
  16  *     conditions are met:
  17  *
  18  *      - Redistributions of source code must retain the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer.
  21  *
  22  *      - Redistributions in binary form must reproduce the above
  23  *        copyright notice, this list of conditions and the following
  24  *        disclaimer in the documentation and/or other materials
  25  *        provided with the distribution.
  26  *
  27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  34  * SOFTWARE.
  35  */
  36 
  37 #include <linux/module.h>
  38 #include <linux/init.h>
  39 #include <linux/device.h>
  40 #include <linux/err.h>
  41 #include <linux/fs.h>
  42 #include <linux/poll.h>
  43 #include <linux/sched.h>
  44 #include <linux/file.h>
  45 #include <linux/cdev.h>
  46 #include <linux/anon_inodes.h>
  47 #include <linux/slab.h>
  48 #include <linux/sched/mm.h>
  49 
  50 #include <linux/uaccess.h>
  51 
  52 #include <rdma/ib.h>
  53 #include <rdma/uverbs_std_types.h>
  54 #include <rdma/rdma_netlink.h>
  55 
  56 #include "uverbs.h"
  57 #include "core_priv.h"
  58 #include "rdma_core.h"
  59 
  60 MODULE_AUTHOR("Roland Dreier");
  61 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
  62 MODULE_LICENSE("Dual BSD/GPL");
  63 
  64 enum {
  65         IB_UVERBS_MAJOR       = 231,
  66         IB_UVERBS_BASE_MINOR  = 192,
  67         IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
  68         IB_UVERBS_NUM_FIXED_MINOR = 32,
  69         IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
  70 };
  71 
  72 #define IB_UVERBS_BASE_DEV      MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
  73 
  74 static dev_t dynamic_uverbs_dev;
  75 static struct class *uverbs_class;
  76 
  77 static DEFINE_IDA(uverbs_ida);
  78 static void ib_uverbs_add_one(struct ib_device *device);
  79 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
  80 
  81 /*
  82  * Must be called with the ufile->device->disassociate_srcu held, and the lock
  83  * must be held until use of the ucontext is finished.
  84  */
  85 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
  86 {
  87         /*
  88          * We do not hold the hw_destroy_rwsem lock for this flow, instead
  89          * srcu is used. It does not matter if someone races this with
  90          * get_context, we get NULL or valid ucontext.
  91          */
  92         struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
  93 
  94         if (!srcu_dereference(ufile->device->ib_dev,
  95                               &ufile->device->disassociate_srcu))
  96                 return ERR_PTR(-EIO);
  97 
  98         if (!ucontext)
  99                 return ERR_PTR(-EINVAL);
 100 
 101         return ucontext;
 102 }
 103 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
 104 
 105 int uverbs_dealloc_mw(struct ib_mw *mw)
 106 {
 107         struct ib_pd *pd = mw->pd;
 108         int ret;
 109 
 110         ret = mw->device->ops.dealloc_mw(mw);
 111         if (!ret)
 112                 atomic_dec(&pd->usecnt);
 113         return ret;
 114 }
 115 
 116 static void ib_uverbs_release_dev(struct device *device)
 117 {
 118         struct ib_uverbs_device *dev =
 119                         container_of(device, struct ib_uverbs_device, dev);
 120 
 121         uverbs_destroy_api(dev->uapi);
 122         cleanup_srcu_struct(&dev->disassociate_srcu);
 123         mutex_destroy(&dev->lists_mutex);
 124         mutex_destroy(&dev->xrcd_tree_mutex);
 125         kfree(dev);
 126 }
 127 
 128 static void ib_uverbs_release_async_event_file(struct kref *ref)
 129 {
 130         struct ib_uverbs_async_event_file *file =
 131                 container_of(ref, struct ib_uverbs_async_event_file, ref);
 132 
 133         kfree(file);
 134 }
 135 
 136 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
 137                           struct ib_uverbs_completion_event_file *ev_file,
 138                           struct ib_ucq_object *uobj)
 139 {
 140         struct ib_uverbs_event *evt, *tmp;
 141 
 142         if (ev_file) {
 143                 spin_lock_irq(&ev_file->ev_queue.lock);
 144                 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
 145                         list_del(&evt->list);
 146                         kfree(evt);
 147                 }
 148                 spin_unlock_irq(&ev_file->ev_queue.lock);
 149 
 150                 uverbs_uobject_put(&ev_file->uobj);
 151         }
 152 
 153         spin_lock_irq(&file->async_file->ev_queue.lock);
 154         list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
 155                 list_del(&evt->list);
 156                 kfree(evt);
 157         }
 158         spin_unlock_irq(&file->async_file->ev_queue.lock);
 159 }
 160 
 161 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
 162                               struct ib_uevent_object *uobj)
 163 {
 164         struct ib_uverbs_event *evt, *tmp;
 165 
 166         spin_lock_irq(&file->async_file->ev_queue.lock);
 167         list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
 168                 list_del(&evt->list);
 169                 kfree(evt);
 170         }
 171         spin_unlock_irq(&file->async_file->ev_queue.lock);
 172 }
 173 
 174 void ib_uverbs_detach_umcast(struct ib_qp *qp,
 175                              struct ib_uqp_object *uobj)
 176 {
 177         struct ib_uverbs_mcast_entry *mcast, *tmp;
 178 
 179         list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
 180                 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
 181                 list_del(&mcast->list);
 182                 kfree(mcast);
 183         }
 184 }
 185 
 186 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
 187 {
 188         complete(&dev->comp);
 189 }
 190 
 191 void ib_uverbs_release_file(struct kref *ref)
 192 {
 193         struct ib_uverbs_file *file =
 194                 container_of(ref, struct ib_uverbs_file, ref);
 195         struct ib_device *ib_dev;
 196         int srcu_key;
 197 
 198         release_ufile_idr_uobject(file);
 199 
 200         srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
 201         ib_dev = srcu_dereference(file->device->ib_dev,
 202                                   &file->device->disassociate_srcu);
 203         if (ib_dev && !ib_dev->ops.disassociate_ucontext)
 204                 module_put(ib_dev->ops.owner);
 205         srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 206 
 207         if (atomic_dec_and_test(&file->device->refcount))
 208                 ib_uverbs_comp_dev(file->device);
 209 
 210         if (file->async_file)
 211                 kref_put(&file->async_file->ref,
 212                          ib_uverbs_release_async_event_file);
 213         put_device(&file->device->dev);
 214 
 215         if (file->disassociate_page)
 216                 __free_pages(file->disassociate_page, 0);
 217         mutex_destroy(&file->umap_lock);
 218         mutex_destroy(&file->ucontext_lock);
 219         kfree(file);
 220 }
 221 
 222 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
 223                                     struct file *filp, char __user *buf,
 224                                     size_t count, loff_t *pos,
 225                                     size_t eventsz)
 226 {
 227         struct ib_uverbs_event *event;
 228         int ret = 0;
 229 
 230         spin_lock_irq(&ev_queue->lock);
 231 
 232         while (list_empty(&ev_queue->event_list)) {
 233                 spin_unlock_irq(&ev_queue->lock);
 234 
 235                 if (filp->f_flags & O_NONBLOCK)
 236                         return -EAGAIN;
 237 
 238                 if (wait_event_interruptible(ev_queue->poll_wait,
 239                                              (!list_empty(&ev_queue->event_list) ||
 240                                               ev_queue->is_closed)))
 241                         return -ERESTARTSYS;
 242 
 243                 spin_lock_irq(&ev_queue->lock);
 244 
 245                 /* If device was disassociated and no event exists set an error */
 246                 if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
 247                         spin_unlock_irq(&ev_queue->lock);
 248                         return -EIO;
 249                 }
 250         }
 251 
 252         event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
 253 
 254         if (eventsz > count) {
 255                 ret   = -EINVAL;
 256                 event = NULL;
 257         } else {
 258                 list_del(ev_queue->event_list.next);
 259                 if (event->counter) {
 260                         ++(*event->counter);
 261                         list_del(&event->obj_list);
 262                 }
 263         }
 264 
 265         spin_unlock_irq(&ev_queue->lock);
 266 
 267         if (event) {
 268                 if (copy_to_user(buf, event, eventsz))
 269                         ret = -EFAULT;
 270                 else
 271                         ret = eventsz;
 272         }
 273 
 274         kfree(event);
 275 
 276         return ret;
 277 }
 278 
 279 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
 280                                           size_t count, loff_t *pos)
 281 {
 282         struct ib_uverbs_async_event_file *file = filp->private_data;
 283 
 284         return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
 285                                     sizeof(struct ib_uverbs_async_event_desc));
 286 }
 287 
 288 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
 289                                          size_t count, loff_t *pos)
 290 {
 291         struct ib_uverbs_completion_event_file *comp_ev_file =
 292                 filp->private_data;
 293 
 294         return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
 295                                     pos,
 296                                     sizeof(struct ib_uverbs_comp_event_desc));
 297 }
 298 
 299 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
 300                                          struct file *filp,
 301                                          struct poll_table_struct *wait)
 302 {
 303         __poll_t pollflags = 0;
 304 
 305         poll_wait(filp, &ev_queue->poll_wait, wait);
 306 
 307         spin_lock_irq(&ev_queue->lock);
 308         if (!list_empty(&ev_queue->event_list))
 309                 pollflags = EPOLLIN | EPOLLRDNORM;
 310         else if (ev_queue->is_closed)
 311                 pollflags = EPOLLERR;
 312         spin_unlock_irq(&ev_queue->lock);
 313 
 314         return pollflags;
 315 }
 316 
 317 static __poll_t ib_uverbs_async_event_poll(struct file *filp,
 318                                                struct poll_table_struct *wait)
 319 {
 320         struct ib_uverbs_async_event_file *file = filp->private_data;
 321 
 322         return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
 323 }
 324 
 325 static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
 326                                               struct poll_table_struct *wait)
 327 {
 328         struct ib_uverbs_completion_event_file *comp_ev_file =
 329                 filp->private_data;
 330 
 331         return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
 332 }
 333 
 334 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
 335 {
 336         struct ib_uverbs_async_event_file *file = filp->private_data;
 337 
 338         return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
 339 }
 340 
 341 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
 342 {
 343         struct ib_uverbs_completion_event_file *comp_ev_file =
 344                 filp->private_data;
 345 
 346         return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
 347 }
 348 
 349 static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
 350 {
 351         struct ib_uverbs_async_event_file *file = filp->private_data;
 352         struct ib_uverbs_file *uverbs_file = file->uverbs_file;
 353         struct ib_uverbs_event *entry, *tmp;
 354         int closed_already = 0;
 355 
 356         mutex_lock(&uverbs_file->device->lists_mutex);
 357         spin_lock_irq(&file->ev_queue.lock);
 358         closed_already = file->ev_queue.is_closed;
 359         file->ev_queue.is_closed = 1;
 360         list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
 361                 if (entry->counter)
 362                         list_del(&entry->obj_list);
 363                 kfree(entry);
 364         }
 365         spin_unlock_irq(&file->ev_queue.lock);
 366         if (!closed_already) {
 367                 list_del(&file->list);
 368                 ib_unregister_event_handler(&uverbs_file->event_handler);
 369         }
 370         mutex_unlock(&uverbs_file->device->lists_mutex);
 371 
 372         kref_put(&uverbs_file->ref, ib_uverbs_release_file);
 373         kref_put(&file->ref, ib_uverbs_release_async_event_file);
 374 
 375         return 0;
 376 }
 377 
 378 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
 379 {
 380         struct ib_uobject *uobj = filp->private_data;
 381         struct ib_uverbs_completion_event_file *file = container_of(
 382                 uobj, struct ib_uverbs_completion_event_file, uobj);
 383         struct ib_uverbs_event *entry, *tmp;
 384 
 385         spin_lock_irq(&file->ev_queue.lock);
 386         list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
 387                 if (entry->counter)
 388                         list_del(&entry->obj_list);
 389                 kfree(entry);
 390         }
 391         file->ev_queue.is_closed = 1;
 392         spin_unlock_irq(&file->ev_queue.lock);
 393 
 394         uverbs_close_fd(filp);
 395 
 396         return 0;
 397 }
 398 
 399 const struct file_operations uverbs_event_fops = {
 400         .owner   = THIS_MODULE,
 401         .read    = ib_uverbs_comp_event_read,
 402         .poll    = ib_uverbs_comp_event_poll,
 403         .release = ib_uverbs_comp_event_close,
 404         .fasync  = ib_uverbs_comp_event_fasync,
 405         .llseek  = no_llseek,
 406 };
 407 
 408 static const struct file_operations uverbs_async_event_fops = {
 409         .owner   = THIS_MODULE,
 410         .read    = ib_uverbs_async_event_read,
 411         .poll    = ib_uverbs_async_event_poll,
 412         .release = ib_uverbs_async_event_close,
 413         .fasync  = ib_uverbs_async_event_fasync,
 414         .llseek  = no_llseek,
 415 };
 416 
 417 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 418 {
 419         struct ib_uverbs_event_queue   *ev_queue = cq_context;
 420         struct ib_ucq_object           *uobj;
 421         struct ib_uverbs_event         *entry;
 422         unsigned long                   flags;
 423 
 424         if (!ev_queue)
 425                 return;
 426 
 427         spin_lock_irqsave(&ev_queue->lock, flags);
 428         if (ev_queue->is_closed) {
 429                 spin_unlock_irqrestore(&ev_queue->lock, flags);
 430                 return;
 431         }
 432 
 433         entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 434         if (!entry) {
 435                 spin_unlock_irqrestore(&ev_queue->lock, flags);
 436                 return;
 437         }
 438 
 439         uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
 440 
 441         entry->desc.comp.cq_handle = cq->uobject->user_handle;
 442         entry->counter             = &uobj->comp_events_reported;
 443 
 444         list_add_tail(&entry->list, &ev_queue->event_list);
 445         list_add_tail(&entry->obj_list, &uobj->comp_list);
 446         spin_unlock_irqrestore(&ev_queue->lock, flags);
 447 
 448         wake_up_interruptible(&ev_queue->poll_wait);
 449         kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
 450 }
 451 
 452 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
 453                                     __u64 element, __u64 event,
 454                                     struct list_head *obj_list,
 455                                     u32 *counter)
 456 {
 457         struct ib_uverbs_event *entry;
 458         unsigned long flags;
 459 
 460         spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
 461         if (file->async_file->ev_queue.is_closed) {
 462                 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
 463                 return;
 464         }
 465 
 466         entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 467         if (!entry) {
 468                 spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
 469                 return;
 470         }
 471 
 472         entry->desc.async.element    = element;
 473         entry->desc.async.event_type = event;
 474         entry->desc.async.reserved   = 0;
 475         entry->counter               = counter;
 476 
 477         list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
 478         if (obj_list)
 479                 list_add_tail(&entry->obj_list, obj_list);
 480         spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
 481 
 482         wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
 483         kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
 484 }
 485 
 486 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 487 {
 488         struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
 489                                                   struct ib_ucq_object, uobject);
 490 
 491         ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
 492                                 event->event, &uobj->async_list,
 493                                 &uobj->async_events_reported);
 494 }
 495 
 496 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
 497 {
 498         struct ib_uevent_object *uobj;
 499 
 500         /* for XRC target qp's, check that qp is live */
 501         if (!event->element.qp->uobject)
 502                 return;
 503 
 504         uobj = container_of(event->element.qp->uobject,
 505                             struct ib_uevent_object, uobject);
 506 
 507         ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
 508                                 event->event, &uobj->event_list,
 509                                 &uobj->events_reported);
 510 }
 511 
 512 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
 513 {
 514         struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
 515                                                   struct ib_uevent_object, uobject);
 516 
 517         ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
 518                                 event->event, &uobj->event_list,
 519                                 &uobj->events_reported);
 520 }
 521 
 522 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
 523 {
 524         struct ib_uevent_object *uobj;
 525 
 526         uobj = container_of(event->element.srq->uobject,
 527                             struct ib_uevent_object, uobject);
 528 
 529         ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
 530                                 event->event, &uobj->event_list,
 531                                 &uobj->events_reported);
 532 }
 533 
 534 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 535                              struct ib_event *event)
 536 {
 537         struct ib_uverbs_file *file =
 538                 container_of(handler, struct ib_uverbs_file, event_handler);
 539 
 540         ib_uverbs_async_handler(file, event->element.port_num, event->event,
 541                                 NULL, NULL);
 542 }
 543 
 544 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
 545 {
 546         kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
 547         file->async_file = NULL;
 548 }
 549 
 550 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
 551 {
 552         spin_lock_init(&ev_queue->lock);
 553         INIT_LIST_HEAD(&ev_queue->event_list);
 554         init_waitqueue_head(&ev_queue->poll_wait);
 555         ev_queue->is_closed   = 0;
 556         ev_queue->async_queue = NULL;
 557 }
 558 
 559 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
 560                                               struct ib_device  *ib_dev)
 561 {
 562         struct ib_uverbs_async_event_file *ev_file;
 563         struct file *filp;
 564 
 565         ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
 566         if (!ev_file)
 567                 return ERR_PTR(-ENOMEM);
 568 
 569         ib_uverbs_init_event_queue(&ev_file->ev_queue);
 570         ev_file->uverbs_file = uverbs_file;
 571         kref_get(&ev_file->uverbs_file->ref);
 572         kref_init(&ev_file->ref);
 573         filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
 574                                   ev_file, O_RDONLY);
 575         if (IS_ERR(filp))
 576                 goto err_put_refs;
 577 
 578         mutex_lock(&uverbs_file->device->lists_mutex);
 579         list_add_tail(&ev_file->list,
 580                       &uverbs_file->device->uverbs_events_file_list);
 581         mutex_unlock(&uverbs_file->device->lists_mutex);
 582 
 583         WARN_ON(uverbs_file->async_file);
 584         uverbs_file->async_file = ev_file;
 585         kref_get(&uverbs_file->async_file->ref);
 586         INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
 587                               ib_dev,
 588                               ib_uverbs_event_handler);
 589         ib_register_event_handler(&uverbs_file->event_handler);
 590         /* At that point async file stuff was fully set */
 591 
 592         return filp;
 593 
 594 err_put_refs:
 595         kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
 596         kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
 597         return filp;
 598 }
 599 
 600 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
 601                           struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
 602                           const struct uverbs_api_write_method *method_elm)
 603 {
 604         if (method_elm->is_ex) {
 605                 count -= sizeof(*hdr) + sizeof(*ex_hdr);
 606 
 607                 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
 608                         return -EINVAL;
 609 
 610                 if (hdr->in_words * 8 < method_elm->req_size)
 611                         return -ENOSPC;
 612 
 613                 if (ex_hdr->cmd_hdr_reserved)
 614                         return -EINVAL;
 615 
 616                 if (ex_hdr->response) {
 617                         if (!hdr->out_words && !ex_hdr->provider_out_words)
 618                                 return -EINVAL;
 619 
 620                         if (hdr->out_words * 8 < method_elm->resp_size)
 621                                 return -ENOSPC;
 622 
 623                         if (!access_ok(u64_to_user_ptr(ex_hdr->response),
 624                                        (hdr->out_words + ex_hdr->provider_out_words) * 8))
 625                                 return -EFAULT;
 626                 } else {
 627                         if (hdr->out_words || ex_hdr->provider_out_words)
 628                                 return -EINVAL;
 629                 }
 630 
 631                 return 0;
 632         }
 633 
 634         /* not extended command */
 635         if (hdr->in_words * 4 != count)
 636                 return -EINVAL;
 637 
 638         if (count < method_elm->req_size + sizeof(hdr)) {
 639                 /*
 640                  * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
 641                  * with a 16 byte write instead of 24. Old kernels didn't
 642                  * check the size so they allowed this. Now that the size is
 643                  * checked provide a compatibility work around to not break
 644                  * those userspaces.
 645                  */
 646                 if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
 647                     count == 16) {
 648                         hdr->in_words = 6;
 649                         return 0;
 650                 }
 651                 return -ENOSPC;
 652         }
 653         if (hdr->out_words * 4 < method_elm->resp_size)
 654                 return -ENOSPC;
 655 
 656         return 0;
 657 }
 658 
 659 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 660                              size_t count, loff_t *pos)
 661 {
 662         struct ib_uverbs_file *file = filp->private_data;
 663         const struct uverbs_api_write_method *method_elm;
 664         struct uverbs_api *uapi = file->device->uapi;
 665         struct ib_uverbs_ex_cmd_hdr ex_hdr;
 666         struct ib_uverbs_cmd_hdr hdr;
 667         struct uverbs_attr_bundle bundle;
 668         int srcu_key;
 669         ssize_t ret;
 670 
 671         if (!ib_safe_file_access(filp)) {
 672                 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
 673                             task_tgid_vnr(current), current->comm);
 674                 return -EACCES;
 675         }
 676 
 677         if (count < sizeof(hdr))
 678                 return -EINVAL;
 679 
 680         if (copy_from_user(&hdr, buf, sizeof(hdr)))
 681                 return -EFAULT;
 682 
 683         method_elm = uapi_get_method(uapi, hdr.command);
 684         if (IS_ERR(method_elm))
 685                 return PTR_ERR(method_elm);
 686 
 687         if (method_elm->is_ex) {
 688                 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
 689                         return -EINVAL;
 690                 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
 691                         return -EFAULT;
 692         }
 693 
 694         ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
 695         if (ret)
 696                 return ret;
 697 
 698         srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
 699 
 700         buf += sizeof(hdr);
 701 
 702         memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
 703         bundle.ufile = file;
 704         bundle.context = NULL; /* only valid if bundle has uobject */
 705         if (!method_elm->is_ex) {
 706                 size_t in_len = hdr.in_words * 4 - sizeof(hdr);
 707                 size_t out_len = hdr.out_words * 4;
 708                 u64 response = 0;
 709 
 710                 if (method_elm->has_udata) {
 711                         bundle.driver_udata.inlen =
 712                                 in_len - method_elm->req_size;
 713                         in_len = method_elm->req_size;
 714                         if (bundle.driver_udata.inlen)
 715                                 bundle.driver_udata.inbuf = buf + in_len;
 716                         else
 717                                 bundle.driver_udata.inbuf = NULL;
 718                 } else {
 719                         memset(&bundle.driver_udata, 0,
 720                                sizeof(bundle.driver_udata));
 721                 }
 722 
 723                 if (method_elm->has_resp) {
 724                         /*
 725                          * The macros check that if has_resp is set
 726                          * then the command request structure starts
 727                          * with a '__aligned u64 response' member.
 728                          */
 729                         ret = get_user(response, (const u64 __user *)buf);
 730                         if (ret)
 731                                 goto out_unlock;
 732 
 733                         if (method_elm->has_udata) {
 734                                 bundle.driver_udata.outlen =
 735                                         out_len - method_elm->resp_size;
 736                                 out_len = method_elm->resp_size;
 737                                 if (bundle.driver_udata.outlen)
 738                                         bundle.driver_udata.outbuf =
 739                                                 u64_to_user_ptr(response +
 740                                                                 out_len);
 741                                 else
 742                                         bundle.driver_udata.outbuf = NULL;
 743                         }
 744                 } else {
 745                         bundle.driver_udata.outlen = 0;
 746                         bundle.driver_udata.outbuf = NULL;
 747                 }
 748 
 749                 ib_uverbs_init_udata_buf_or_null(
 750                         &bundle.ucore, buf, u64_to_user_ptr(response),
 751                         in_len, out_len);
 752         } else {
 753                 buf += sizeof(ex_hdr);
 754 
 755                 ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
 756                                         u64_to_user_ptr(ex_hdr.response),
 757                                         hdr.in_words * 8, hdr.out_words * 8);
 758 
 759                 ib_uverbs_init_udata_buf_or_null(
 760                         &bundle.driver_udata, buf + bundle.ucore.inlen,
 761                         u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
 762                         ex_hdr.provider_in_words * 8,
 763                         ex_hdr.provider_out_words * 8);
 764 
 765         }
 766 
 767         ret = method_elm->handler(&bundle);
 768 out_unlock:
 769         srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 770         return (ret) ? : count;
 771 }
 772 
 773 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 774 {
 775         struct ib_uverbs_file *file = filp->private_data;
 776         struct ib_ucontext *ucontext;
 777         int ret = 0;
 778         int srcu_key;
 779 
 780         srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
 781         ucontext = ib_uverbs_get_ucontext_file(file);
 782         if (IS_ERR(ucontext)) {
 783                 ret = PTR_ERR(ucontext);
 784                 goto out;
 785         }
 786 
 787         ret = ucontext->device->ops.mmap(ucontext, vma);
 788 out:
 789         srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 790         return ret;
 791 }
 792 
 793 /*
 794  * Each time we map IO memory into user space this keeps track of the mapping.
 795  * When the device is hot-unplugged we 'zap' the mmaps in user space to point
 796  * to the zero page and allow the hot unplug to proceed.
 797  *
 798  * This is necessary for cases like PCI physical hot unplug as the actual BAR
 799  * memory may vanish after this and access to it from userspace could MCE.
 800  *
 801  * RDMA drivers supporting disassociation must have their user space designed
 802  * to cope in some way with their IO pages going to the zero page.
 803  */
 804 struct rdma_umap_priv {
 805         struct vm_area_struct *vma;
 806         struct list_head list;
 807 };
 808 
 809 static const struct vm_operations_struct rdma_umap_ops;
 810 
 811 static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
 812                                 struct vm_area_struct *vma)
 813 {
 814         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
 815 
 816         priv->vma = vma;
 817         vma->vm_private_data = priv;
 818         vma->vm_ops = &rdma_umap_ops;
 819 
 820         mutex_lock(&ufile->umap_lock);
 821         list_add(&priv->list, &ufile->umaps);
 822         mutex_unlock(&ufile->umap_lock);
 823 }
 824 
 825 /*
 826  * The VMA has been dup'd, initialize the vm_private_data with a new tracking
 827  * struct
 828  */
 829 static void rdma_umap_open(struct vm_area_struct *vma)
 830 {
 831         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
 832         struct rdma_umap_priv *opriv = vma->vm_private_data;
 833         struct rdma_umap_priv *priv;
 834 
 835         if (!opriv)
 836                 return;
 837 
 838         /* We are racing with disassociation */
 839         if (!down_read_trylock(&ufile->hw_destroy_rwsem))
 840                 goto out_zap;
 841         /*
 842          * Disassociation already completed, the VMA should already be zapped.
 843          */
 844         if (!ufile->ucontext)
 845                 goto out_unlock;
 846 
 847         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 848         if (!priv)
 849                 goto out_unlock;
 850         rdma_umap_priv_init(priv, vma);
 851 
 852         up_read(&ufile->hw_destroy_rwsem);
 853         return;
 854 
 855 out_unlock:
 856         up_read(&ufile->hw_destroy_rwsem);
 857 out_zap:
 858         /*
 859          * We can't allow the VMA to be created with the actual IO pages, that
 860          * would break our API contract, and it can't be stopped at this
 861          * point, so zap it.
 862          */
 863         vma->vm_private_data = NULL;
 864         zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 865 }
 866 
 867 static void rdma_umap_close(struct vm_area_struct *vma)
 868 {
 869         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
 870         struct rdma_umap_priv *priv = vma->vm_private_data;
 871 
 872         if (!priv)
 873                 return;
 874 
 875         /*
 876          * The vma holds a reference on the struct file that created it, which
 877          * in turn means that the ib_uverbs_file is guaranteed to exist at
 878          * this point.
 879          */
 880         mutex_lock(&ufile->umap_lock);
 881         list_del(&priv->list);
 882         mutex_unlock(&ufile->umap_lock);
 883         kfree(priv);
 884 }
 885 
 886 /*
 887  * Once the zap_vma_ptes has been called touches to the VMA will come here and
 888  * we return a dummy writable zero page for all the pfns.
 889  */
 890 static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
 891 {
 892         struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
 893         struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
 894         vm_fault_t ret = 0;
 895 
 896         if (!priv)
 897                 return VM_FAULT_SIGBUS;
 898 
 899         /* Read only pages can just use the system zero page. */
 900         if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
 901                 vmf->page = ZERO_PAGE(vmf->address);
 902                 get_page(vmf->page);
 903                 return 0;
 904         }
 905 
 906         mutex_lock(&ufile->umap_lock);
 907         if (!ufile->disassociate_page)
 908                 ufile->disassociate_page =
 909                         alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
 910 
 911         if (ufile->disassociate_page) {
 912                 /*
 913                  * This VMA is forced to always be shared so this doesn't have
 914                  * to worry about COW.
 915                  */
 916                 vmf->page = ufile->disassociate_page;
 917                 get_page(vmf->page);
 918         } else {
 919                 ret = VM_FAULT_SIGBUS;
 920         }
 921         mutex_unlock(&ufile->umap_lock);
 922 
 923         return ret;
 924 }
 925 
 926 static const struct vm_operations_struct rdma_umap_ops = {
 927         .open = rdma_umap_open,
 928         .close = rdma_umap_close,
 929         .fault = rdma_umap_fault,
 930 };
 931 
 932 /*
 933  * Map IO memory into a process. This is to be called by drivers as part of
 934  * their mmap() functions if they wish to send something like PCI-E BAR memory
 935  * to userspace.
 936  */
 937 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
 938                       unsigned long pfn, unsigned long size, pgprot_t prot)
 939 {
 940         struct ib_uverbs_file *ufile = ucontext->ufile;
 941         struct rdma_umap_priv *priv;
 942 
 943         if (!(vma->vm_flags & VM_SHARED))
 944                 return -EINVAL;
 945 
 946         if (vma->vm_end - vma->vm_start != size)
 947                 return -EINVAL;
 948 
 949         /* Driver is using this wrong, must be called by ib_uverbs_mmap */
 950         if (WARN_ON(!vma->vm_file ||
 951                     vma->vm_file->private_data != ufile))
 952                 return -EINVAL;
 953         lockdep_assert_held(&ufile->device->disassociate_srcu);
 954 
 955         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 956         if (!priv)
 957                 return -ENOMEM;
 958 
 959         vma->vm_page_prot = prot;
 960         if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
 961                 kfree(priv);
 962                 return -EAGAIN;
 963         }
 964 
 965         rdma_umap_priv_init(priv, vma);
 966         return 0;
 967 }
 968 EXPORT_SYMBOL(rdma_user_mmap_io);
 969 
 970 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 971 {
 972         struct rdma_umap_priv *priv, *next_priv;
 973 
 974         lockdep_assert_held(&ufile->hw_destroy_rwsem);
 975 
 976         while (1) {
 977                 struct mm_struct *mm = NULL;
 978 
 979                 /* Get an arbitrary mm pointer that hasn't been cleaned yet */
 980                 mutex_lock(&ufile->umap_lock);
 981                 while (!list_empty(&ufile->umaps)) {
 982                         int ret;
 983 
 984                         priv = list_first_entry(&ufile->umaps,
 985                                                 struct rdma_umap_priv, list);
 986                         mm = priv->vma->vm_mm;
 987                         ret = mmget_not_zero(mm);
 988                         if (!ret) {
 989                                 list_del_init(&priv->list);
 990                                 mm = NULL;
 991                                 continue;
 992                         }
 993                         break;
 994                 }
 995                 mutex_unlock(&ufile->umap_lock);
 996                 if (!mm)
 997                         return;
 998 
 999                 /*
1000                  * The umap_lock is nested under mmap_sem since it used within
1001                  * the vma_ops callbacks, so we have to clean the list one mm
1002                  * at a time to get the lock ordering right. Typically there
1003                  * will only be one mm, so no big deal.
1004                  */
1005                 down_read(&mm->mmap_sem);
1006                 if (!mmget_still_valid(mm))
1007                         goto skip_mm;
1008                 mutex_lock(&ufile->umap_lock);
1009                 list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
1010                                           list) {
1011                         struct vm_area_struct *vma = priv->vma;
1012 
1013                         if (vma->vm_mm != mm)
1014                                 continue;
1015                         list_del_init(&priv->list);
1016 
1017                         zap_vma_ptes(vma, vma->vm_start,
1018                                      vma->vm_end - vma->vm_start);
1019                 }
1020                 mutex_unlock(&ufile->umap_lock);
1021         skip_mm:
1022                 up_read(&mm->mmap_sem);
1023                 mmput(mm);
1024         }
1025 }
1026 
1027 /*
1028  * ib_uverbs_open() does not need the BKL:
1029  *
1030  *  - the ib_uverbs_device structures are properly reference counted and
1031  *    everything else is purely local to the file being created, so
1032  *    races against other open calls are not a problem;
1033  *  - there is no ioctl method to race against;
1034  *  - the open method will either immediately run -ENXIO, or all
1035  *    required initialization will be done.
1036  */
1037 static int ib_uverbs_open(struct inode *inode, struct file *filp)
1038 {
1039         struct ib_uverbs_device *dev;
1040         struct ib_uverbs_file *file;
1041         struct ib_device *ib_dev;
1042         int ret;
1043         int module_dependent;
1044         int srcu_key;
1045 
1046         dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
1047         if (!atomic_inc_not_zero(&dev->refcount))
1048                 return -ENXIO;
1049 
1050         get_device(&dev->dev);
1051         srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1052         mutex_lock(&dev->lists_mutex);
1053         ib_dev = srcu_dereference(dev->ib_dev,
1054                                   &dev->disassociate_srcu);
1055         if (!ib_dev) {
1056                 ret = -EIO;
1057                 goto err;
1058         }
1059 
1060         if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) {
1061                 ret = -EPERM;
1062                 goto err;
1063         }
1064 
1065         /* In case IB device supports disassociate ucontext, there is no hard
1066          * dependency between uverbs device and its low level device.
1067          */
1068         module_dependent = !(ib_dev->ops.disassociate_ucontext);
1069 
1070         if (module_dependent) {
1071                 if (!try_module_get(ib_dev->ops.owner)) {
1072                         ret = -ENODEV;
1073                         goto err;
1074                 }
1075         }
1076 
1077         file = kzalloc(sizeof(*file), GFP_KERNEL);
1078         if (!file) {
1079                 ret = -ENOMEM;
1080                 if (module_dependent)
1081                         goto err_module;
1082 
1083                 goto err;
1084         }
1085 
1086         file->device     = dev;
1087         kref_init(&file->ref);
1088         mutex_init(&file->ucontext_lock);
1089 
1090         spin_lock_init(&file->uobjects_lock);
1091         INIT_LIST_HEAD(&file->uobjects);
1092         init_rwsem(&file->hw_destroy_rwsem);
1093         mutex_init(&file->umap_lock);
1094         INIT_LIST_HEAD(&file->umaps);
1095 
1096         filp->private_data = file;
1097         list_add_tail(&file->list, &dev->uverbs_file_list);
1098         mutex_unlock(&dev->lists_mutex);
1099         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1100 
1101         setup_ufile_idr_uobject(file);
1102 
1103         return stream_open(inode, filp);
1104 
1105 err_module:
1106         module_put(ib_dev->ops.owner);
1107 
1108 err:
1109         mutex_unlock(&dev->lists_mutex);
1110         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1111         if (atomic_dec_and_test(&dev->refcount))
1112                 ib_uverbs_comp_dev(dev);
1113 
1114         put_device(&dev->dev);
1115         return ret;
1116 }
1117 
1118 static int ib_uverbs_close(struct inode *inode, struct file *filp)
1119 {
1120         struct ib_uverbs_file *file = filp->private_data;
1121 
1122         uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
1123 
1124         mutex_lock(&file->device->lists_mutex);
1125         list_del_init(&file->list);
1126         mutex_unlock(&file->device->lists_mutex);
1127 
1128         kref_put(&file->ref, ib_uverbs_release_file);
1129 
1130         return 0;
1131 }
1132 
1133 static const struct file_operations uverbs_fops = {
1134         .owner   = THIS_MODULE,
1135         .write   = ib_uverbs_write,
1136         .open    = ib_uverbs_open,
1137         .release = ib_uverbs_close,
1138         .llseek  = no_llseek,
1139         .unlocked_ioctl = ib_uverbs_ioctl,
1140         .compat_ioctl = ib_uverbs_ioctl,
1141 };
1142 
1143 static const struct file_operations uverbs_mmap_fops = {
1144         .owner   = THIS_MODULE,
1145         .write   = ib_uverbs_write,
1146         .mmap    = ib_uverbs_mmap,
1147         .open    = ib_uverbs_open,
1148         .release = ib_uverbs_close,
1149         .llseek  = no_llseek,
1150         .unlocked_ioctl = ib_uverbs_ioctl,
1151         .compat_ioctl = ib_uverbs_ioctl,
1152 };
1153 
1154 static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
1155                                  struct ib_client_nl_info *res)
1156 {
1157         struct ib_uverbs_device *uverbs_dev = client_data;
1158         int ret;
1159 
1160         if (res->port != -1)
1161                 return -EINVAL;
1162 
1163         res->abi = ibdev->ops.uverbs_abi_ver;
1164         res->cdev = &uverbs_dev->dev;
1165 
1166         /*
1167          * To support DRIVER_ID binding in userspace some of the driver need
1168          * upgrading to expose their PCI dependent revision information
1169          * through get_context instead of relying on modalias matching. When
1170          * the drivers are fixed they can drop this flag.
1171          */
1172         if (!ibdev->ops.uverbs_no_driver_id_binding) {
1173                 ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
1174                                   ibdev->ops.driver_id);
1175                 if (ret)
1176                         return ret;
1177         }
1178         return 0;
1179 }
1180 
1181 static struct ib_client uverbs_client = {
1182         .name   = "uverbs",
1183         .no_kverbs_req = true,
1184         .add    = ib_uverbs_add_one,
1185         .remove = ib_uverbs_remove_one,
1186         .get_nl_info = ib_uverbs_get_nl_info,
1187 };
1188 MODULE_ALIAS_RDMA_CLIENT("uverbs");
1189 
1190 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
1191                           char *buf)
1192 {
1193         struct ib_uverbs_device *dev =
1194                         container_of(device, struct ib_uverbs_device, dev);
1195         int ret = -ENODEV;
1196         int srcu_key;
1197         struct ib_device *ib_dev;
1198 
1199         srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1200         ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1201         if (ib_dev)
1202                 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
1203         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1204 
1205         return ret;
1206 }
1207 static DEVICE_ATTR_RO(ibdev);
1208 
1209 static ssize_t abi_version_show(struct device *device,
1210                                 struct device_attribute *attr, char *buf)
1211 {
1212         struct ib_uverbs_device *dev =
1213                         container_of(device, struct ib_uverbs_device, dev);
1214         int ret = -ENODEV;
1215         int srcu_key;
1216         struct ib_device *ib_dev;
1217 
1218         srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1219         ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1220         if (ib_dev)
1221                 ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
1222         srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1223 
1224         return ret;
1225 }
1226 static DEVICE_ATTR_RO(abi_version);
1227 
1228 static struct attribute *ib_dev_attrs[] = {
1229         &dev_attr_abi_version.attr,
1230         &dev_attr_ibdev.attr,
1231         NULL,
1232 };
1233 
1234 static const struct attribute_group dev_attr_group = {
1235         .attrs = ib_dev_attrs,
1236 };
1237 
1238 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1239                          __stringify(IB_USER_VERBS_ABI_VERSION));
1240 
1241 static int ib_uverbs_create_uapi(struct ib_device *device,
1242                                  struct ib_uverbs_device *uverbs_dev)
1243 {
1244         struct uverbs_api *uapi;
1245 
1246         uapi = uverbs_alloc_api(device);
1247         if (IS_ERR(uapi))
1248                 return PTR_ERR(uapi);
1249 
1250         uverbs_dev->uapi = uapi;
1251         return 0;
1252 }
1253 
1254 static void ib_uverbs_add_one(struct ib_device *device)
1255 {
1256         int devnum;
1257         dev_t base;
1258         struct ib_uverbs_device *uverbs_dev;
1259         int ret;
1260 
1261         if (!device->ops.alloc_ucontext)
1262                 return;
1263 
1264         uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
1265         if (!uverbs_dev)
1266                 return;
1267 
1268         ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1269         if (ret) {
1270                 kfree(uverbs_dev);
1271                 return;
1272         }
1273 
1274         device_initialize(&uverbs_dev->dev);
1275         uverbs_dev->dev.class = uverbs_class;
1276         uverbs_dev->dev.parent = device->dev.parent;
1277         uverbs_dev->dev.release = ib_uverbs_release_dev;
1278         uverbs_dev->groups[0] = &dev_attr_group;
1279         uverbs_dev->dev.groups = uverbs_dev->groups;
1280         atomic_set(&uverbs_dev->refcount, 1);
1281         init_completion(&uverbs_dev->comp);
1282         uverbs_dev->xrcd_tree = RB_ROOT;
1283         mutex_init(&uverbs_dev->xrcd_tree_mutex);
1284         mutex_init(&uverbs_dev->lists_mutex);
1285         INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1286         INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1287         rcu_assign_pointer(uverbs_dev->ib_dev, device);
1288         uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1289 
1290         devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
1291                                GFP_KERNEL);
1292         if (devnum < 0)
1293                 goto err;
1294         uverbs_dev->devnum = devnum;
1295         if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1296                 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1297         else
1298                 base = IB_UVERBS_BASE_DEV + devnum;
1299 
1300         if (ib_uverbs_create_uapi(device, uverbs_dev))
1301                 goto err_uapi;
1302 
1303         uverbs_dev->dev.devt = base;
1304         dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
1305 
1306         cdev_init(&uverbs_dev->cdev,
1307                   device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
1308         uverbs_dev->cdev.owner = THIS_MODULE;
1309 
1310         ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
1311         if (ret)
1312                 goto err_uapi;
1313 
1314         ib_set_client_data(device, &uverbs_client, uverbs_dev);
1315         return;
1316 
1317 err_uapi:
1318         ida_free(&uverbs_ida, devnum);
1319 err:
1320         if (atomic_dec_and_test(&uverbs_dev->refcount))
1321                 ib_uverbs_comp_dev(uverbs_dev);
1322         wait_for_completion(&uverbs_dev->comp);
1323         put_device(&uverbs_dev->dev);
1324         return;
1325 }
1326 
1327 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1328                                         struct ib_device *ib_dev)
1329 {
1330         struct ib_uverbs_file *file;
1331         struct ib_uverbs_async_event_file *event_file;
1332         struct ib_event event;
1333 
1334         /* Pending running commands to terminate */
1335         uverbs_disassociate_api_pre(uverbs_dev);
1336         event.event = IB_EVENT_DEVICE_FATAL;
1337         event.element.port_num = 0;
1338         event.device = ib_dev;
1339 
1340         mutex_lock(&uverbs_dev->lists_mutex);
1341         while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1342                 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1343                                         struct ib_uverbs_file, list);
1344                 list_del_init(&file->list);
1345                 kref_get(&file->ref);
1346 
1347                 /* We must release the mutex before going ahead and calling
1348                  * uverbs_cleanup_ufile, as it might end up indirectly calling
1349                  * uverbs_close, for example due to freeing the resources (e.g
1350                  * mmput).
1351                  */
1352                 mutex_unlock(&uverbs_dev->lists_mutex);
1353 
1354                 ib_uverbs_event_handler(&file->event_handler, &event);
1355                 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
1356                 kref_put(&file->ref, ib_uverbs_release_file);
1357 
1358                 mutex_lock(&uverbs_dev->lists_mutex);
1359         }
1360 
1361         while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
1362                 event_file = list_first_entry(&uverbs_dev->
1363                                               uverbs_events_file_list,
1364                                               struct ib_uverbs_async_event_file,
1365                                               list);
1366                 spin_lock_irq(&event_file->ev_queue.lock);
1367                 event_file->ev_queue.is_closed = 1;
1368                 spin_unlock_irq(&event_file->ev_queue.lock);
1369 
1370                 list_del(&event_file->list);
1371                 ib_unregister_event_handler(
1372                         &event_file->uverbs_file->event_handler);
1373                 event_file->uverbs_file->event_handler.device =
1374                         NULL;
1375 
1376                 wake_up_interruptible(&event_file->ev_queue.poll_wait);
1377                 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
1378         }
1379         mutex_unlock(&uverbs_dev->lists_mutex);
1380 
1381         uverbs_disassociate_api(uverbs_dev->uapi);
1382 }
1383 
1384 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1385 {
1386         struct ib_uverbs_device *uverbs_dev = client_data;
1387         int wait_clients = 1;
1388 
1389         if (!uverbs_dev)
1390                 return;
1391 
1392         cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
1393         ida_free(&uverbs_ida, uverbs_dev->devnum);
1394 
1395         if (device->ops.disassociate_ucontext) {
1396                 /* We disassociate HW resources and immediately return.
1397                  * Userspace will see a EIO errno for all future access.
1398                  * Upon returning, ib_device may be freed internally and is not
1399                  * valid any more.
1400                  * uverbs_device is still available until all clients close
1401                  * their files, then the uverbs device ref count will be zero
1402                  * and its resources will be freed.
1403                  * Note: At this point no more files can be opened since the
1404                  * cdev was deleted, however active clients can still issue
1405                  * commands and close their open files.
1406                  */
1407                 ib_uverbs_free_hw_resources(uverbs_dev, device);
1408                 wait_clients = 0;
1409         }
1410 
1411         if (atomic_dec_and_test(&uverbs_dev->refcount))
1412                 ib_uverbs_comp_dev(uverbs_dev);
1413         if (wait_clients)
1414                 wait_for_completion(&uverbs_dev->comp);
1415 
1416         put_device(&uverbs_dev->dev);
1417 }
1418 
1419 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1420 {
1421         if (mode)
1422                 *mode = 0666;
1423         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1424 }
1425 
1426 static int __init ib_uverbs_init(void)
1427 {
1428         int ret;
1429 
1430         ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1431                                      IB_UVERBS_NUM_FIXED_MINOR,
1432                                      "infiniband_verbs");
1433         if (ret) {
1434                 pr_err("user_verbs: couldn't register device number\n");
1435                 goto out;
1436         }
1437 
1438         ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1439                                   IB_UVERBS_NUM_DYNAMIC_MINOR,
1440                                   "infiniband_verbs");
1441         if (ret) {
1442                 pr_err("couldn't register dynamic device number\n");
1443                 goto out_alloc;
1444         }
1445 
1446         uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1447         if (IS_ERR(uverbs_class)) {
1448                 ret = PTR_ERR(uverbs_class);
1449                 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1450                 goto out_chrdev;
1451         }
1452 
1453         uverbs_class->devnode = uverbs_devnode;
1454 
1455         ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1456         if (ret) {
1457                 pr_err("user_verbs: couldn't create abi_version attribute\n");
1458                 goto out_class;
1459         }
1460 
1461         ret = ib_register_client(&uverbs_client);
1462         if (ret) {
1463                 pr_err("user_verbs: couldn't register client\n");
1464                 goto out_class;
1465         }
1466 
1467         return 0;
1468 
1469 out_class:
1470         class_destroy(uverbs_class);
1471 
1472 out_chrdev:
1473         unregister_chrdev_region(dynamic_uverbs_dev,
1474                                  IB_UVERBS_NUM_DYNAMIC_MINOR);
1475 
1476 out_alloc:
1477         unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1478                                  IB_UVERBS_NUM_FIXED_MINOR);
1479 
1480 out:
1481         return ret;
1482 }
1483 
1484 static void __exit ib_uverbs_cleanup(void)
1485 {
1486         ib_unregister_client(&uverbs_client);
1487         class_destroy(uverbs_class);
1488         unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1489                                  IB_UVERBS_NUM_FIXED_MINOR);
1490         unregister_chrdev_region(dynamic_uverbs_dev,
1491                                  IB_UVERBS_NUM_DYNAMIC_MINOR);
1492         mmu_notifier_synchronize();
1493 }
1494 
1495 module_init(ib_uverbs_init);
1496 module_exit(ib_uverbs_cleanup);

/* [<][>][^][v][top][bottom][index][help] */