root/net/rds/ib_rdma.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rds_ib_get_device
  2. rds_ib_add_ipaddr
  3. rds_ib_remove_ipaddr
  4. rds_ib_update_ipaddr
  5. rds_ib_add_conn
  6. rds_ib_remove_conn
  7. rds_ib_destroy_nodev_conns
  8. rds_ib_get_mr_info
  9. rds6_ib_get_mr_info
  10. rds_ib_reuse_mr
  11. rds_ib_sync_mr
  12. __rds_ib_teardown_mr
  13. rds_ib_teardown_mr
  14. rds_ib_flush_goal
  15. llist_append_to_list
  16. list_to_llist_nodes
  17. rds_ib_flush_mr_pool
  18. rds_ib_try_reuse_ibmr
  19. rds_ib_mr_pool_flush_worker
  20. rds_ib_free_mr
  21. rds_ib_flush_mrs
  22. rds_ib_get_mr
  23. rds_ib_destroy_mr_pool
  24. rds_ib_create_mr_pool
  25. rds_ib_mr_init
  26. rds_ib_mr_exit

   1 /*
   2  * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  *
  32  */
  33 #include <linux/kernel.h>
  34 #include <linux/slab.h>
  35 #include <linux/rculist.h>
  36 #include <linux/llist.h>
  37 
  38 #include "rds_single_path.h"
  39 #include "ib_mr.h"
  40 
  41 struct workqueue_struct *rds_ib_mr_wq;
  42 
  43 static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
  44 {
  45         struct rds_ib_device *rds_ibdev;
  46         struct rds_ib_ipaddr *i_ipaddr;
  47 
  48         rcu_read_lock();
  49         list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
  50                 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
  51                         if (i_ipaddr->ipaddr == ipaddr) {
  52                                 refcount_inc(&rds_ibdev->refcount);
  53                                 rcu_read_unlock();
  54                                 return rds_ibdev;
  55                         }
  56                 }
  57         }
  58         rcu_read_unlock();
  59 
  60         return NULL;
  61 }
  62 
  63 static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
  64 {
  65         struct rds_ib_ipaddr *i_ipaddr;
  66 
  67         i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL);
  68         if (!i_ipaddr)
  69                 return -ENOMEM;
  70 
  71         i_ipaddr->ipaddr = ipaddr;
  72 
  73         spin_lock_irq(&rds_ibdev->spinlock);
  74         list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
  75         spin_unlock_irq(&rds_ibdev->spinlock);
  76 
  77         return 0;
  78 }
  79 
  80 static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
  81 {
  82         struct rds_ib_ipaddr *i_ipaddr;
  83         struct rds_ib_ipaddr *to_free = NULL;
  84 
  85 
  86         spin_lock_irq(&rds_ibdev->spinlock);
  87         list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
  88                 if (i_ipaddr->ipaddr == ipaddr) {
  89                         list_del_rcu(&i_ipaddr->list);
  90                         to_free = i_ipaddr;
  91                         break;
  92                 }
  93         }
  94         spin_unlock_irq(&rds_ibdev->spinlock);
  95 
  96         if (to_free)
  97                 kfree_rcu(to_free, rcu);
  98 }
  99 
 100 int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
 101                          struct in6_addr *ipaddr)
 102 {
 103         struct rds_ib_device *rds_ibdev_old;
 104 
 105         rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]);
 106         if (!rds_ibdev_old)
 107                 return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
 108 
 109         if (rds_ibdev_old != rds_ibdev) {
 110                 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]);
 111                 rds_ib_dev_put(rds_ibdev_old);
 112                 return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
 113         }
 114         rds_ib_dev_put(rds_ibdev_old);
 115 
 116         return 0;
 117 }
 118 
 119 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
 120 {
 121         struct rds_ib_connection *ic = conn->c_transport_data;
 122 
 123         /* conn was previously on the nodev_conns_list */
 124         spin_lock_irq(&ib_nodev_conns_lock);
 125         BUG_ON(list_empty(&ib_nodev_conns));
 126         BUG_ON(list_empty(&ic->ib_node));
 127         list_del(&ic->ib_node);
 128 
 129         spin_lock(&rds_ibdev->spinlock);
 130         list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
 131         spin_unlock(&rds_ibdev->spinlock);
 132         spin_unlock_irq(&ib_nodev_conns_lock);
 133 
 134         ic->rds_ibdev = rds_ibdev;
 135         refcount_inc(&rds_ibdev->refcount);
 136 }
 137 
 138 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
 139 {
 140         struct rds_ib_connection *ic = conn->c_transport_data;
 141 
 142         /* place conn on nodev_conns_list */
 143         spin_lock(&ib_nodev_conns_lock);
 144 
 145         spin_lock_irq(&rds_ibdev->spinlock);
 146         BUG_ON(list_empty(&ic->ib_node));
 147         list_del(&ic->ib_node);
 148         spin_unlock_irq(&rds_ibdev->spinlock);
 149 
 150         list_add_tail(&ic->ib_node, &ib_nodev_conns);
 151 
 152         spin_unlock(&ib_nodev_conns_lock);
 153 
 154         ic->rds_ibdev = NULL;
 155         rds_ib_dev_put(rds_ibdev);
 156 }
 157 
 158 void rds_ib_destroy_nodev_conns(void)
 159 {
 160         struct rds_ib_connection *ic, *_ic;
 161         LIST_HEAD(tmp_list);
 162 
 163         /* avoid calling conn_destroy with irqs off */
 164         spin_lock_irq(&ib_nodev_conns_lock);
 165         list_splice(&ib_nodev_conns, &tmp_list);
 166         spin_unlock_irq(&ib_nodev_conns_lock);
 167 
 168         list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
 169                 rds_conn_destroy(ic->conn);
 170 }
 171 
 172 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
 173 {
 174         struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
 175 
 176         iinfo->rdma_mr_max = pool_1m->max_items;
 177         iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
 178 }
 179 
 180 #if IS_ENABLED(CONFIG_IPV6)
 181 void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
 182                          struct rds6_info_rdma_connection *iinfo6)
 183 {
 184         struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
 185 
 186         iinfo6->rdma_mr_max = pool_1m->max_items;
 187         iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages;
 188 }
 189 #endif
 190 
 191 struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
 192 {
 193         struct rds_ib_mr *ibmr = NULL;
 194         struct llist_node *ret;
 195         unsigned long flags;
 196 
 197         spin_lock_irqsave(&pool->clean_lock, flags);
 198         ret = llist_del_first(&pool->clean_list);
 199         spin_unlock_irqrestore(&pool->clean_lock, flags);
 200         if (ret) {
 201                 ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
 202                 if (pool->pool_type == RDS_IB_MR_8K_POOL)
 203                         rds_ib_stats_inc(s_ib_rdma_mr_8k_reused);
 204                 else
 205                         rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
 206         }
 207 
 208         return ibmr;
 209 }
 210 
 211 void rds_ib_sync_mr(void *trans_private, int direction)
 212 {
 213         struct rds_ib_mr *ibmr = trans_private;
 214         struct rds_ib_device *rds_ibdev = ibmr->device;
 215 
 216         switch (direction) {
 217         case DMA_FROM_DEVICE:
 218                 ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
 219                         ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
 220                 break;
 221         case DMA_TO_DEVICE:
 222                 ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg,
 223                         ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
 224                 break;
 225         }
 226 }
 227 
 228 void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
 229 {
 230         struct rds_ib_device *rds_ibdev = ibmr->device;
 231 
 232         if (ibmr->sg_dma_len) {
 233                 ib_dma_unmap_sg(rds_ibdev->dev,
 234                                 ibmr->sg, ibmr->sg_len,
 235                                 DMA_BIDIRECTIONAL);
 236                 ibmr->sg_dma_len = 0;
 237         }
 238 
 239         /* Release the s/g list */
 240         if (ibmr->sg_len) {
 241                 unsigned int i;
 242 
 243                 for (i = 0; i < ibmr->sg_len; ++i) {
 244                         struct page *page = sg_page(&ibmr->sg[i]);
 245 
 246                         /* FIXME we need a way to tell a r/w MR
 247                          * from a r/o MR */
 248                         WARN_ON(!page->mapping && irqs_disabled());
 249                         set_page_dirty(page);
 250                         put_page(page);
 251                 }
 252                 kfree(ibmr->sg);
 253 
 254                 ibmr->sg = NULL;
 255                 ibmr->sg_len = 0;
 256         }
 257 }
 258 
 259 void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
 260 {
 261         unsigned int pinned = ibmr->sg_len;
 262 
 263         __rds_ib_teardown_mr(ibmr);
 264         if (pinned) {
 265                 struct rds_ib_mr_pool *pool = ibmr->pool;
 266 
 267                 atomic_sub(pinned, &pool->free_pinned);
 268         }
 269 }
 270 
 271 static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all)
 272 {
 273         unsigned int item_count;
 274 
 275         item_count = atomic_read(&pool->item_count);
 276         if (free_all)
 277                 return item_count;
 278 
 279         return 0;
 280 }
 281 
 282 /*
 283  * given an llist of mrs, put them all into the list_head for more processing
 284  */
 285 static unsigned int llist_append_to_list(struct llist_head *llist,
 286                                          struct list_head *list)
 287 {
 288         struct rds_ib_mr *ibmr;
 289         struct llist_node *node;
 290         struct llist_node *next;
 291         unsigned int count = 0;
 292 
 293         node = llist_del_all(llist);
 294         while (node) {
 295                 next = node->next;
 296                 ibmr = llist_entry(node, struct rds_ib_mr, llnode);
 297                 list_add_tail(&ibmr->unmap_list, list);
 298                 node = next;
 299                 count++;
 300         }
 301         return count;
 302 }
 303 
 304 /*
 305  * this takes a list head of mrs and turns it into linked llist nodes
 306  * of clusters.  Each cluster has linked llist nodes of
 307  * MR_CLUSTER_SIZE mrs that are ready for reuse.
 308  */
 309 static void list_to_llist_nodes(struct list_head *list,
 310                                 struct llist_node **nodes_head,
 311                                 struct llist_node **nodes_tail)
 312 {
 313         struct rds_ib_mr *ibmr;
 314         struct llist_node *cur = NULL;
 315         struct llist_node **next = nodes_head;
 316 
 317         list_for_each_entry(ibmr, list, unmap_list) {
 318                 cur = &ibmr->llnode;
 319                 *next = cur;
 320                 next = &cur->next;
 321         }
 322         *next = NULL;
 323         *nodes_tail = cur;
 324 }
 325 
 326 /*
 327  * Flush our pool of MRs.
 328  * At a minimum, all currently unused MRs are unmapped.
 329  * If the number of MRs allocated exceeds the limit, we also try
 330  * to free as many MRs as needed to get back to this limit.
 331  */
 332 int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
 333                          int free_all, struct rds_ib_mr **ibmr_ret)
 334 {
 335         struct rds_ib_mr *ibmr;
 336         struct llist_node *clean_nodes;
 337         struct llist_node *clean_tail;
 338         LIST_HEAD(unmap_list);
 339         unsigned long unpinned = 0;
 340         unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
 341 
 342         if (pool->pool_type == RDS_IB_MR_8K_POOL)
 343                 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
 344         else
 345                 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush);
 346 
 347         if (ibmr_ret) {
 348                 DEFINE_WAIT(wait);
 349                 while (!mutex_trylock(&pool->flush_lock)) {
 350                         ibmr = rds_ib_reuse_mr(pool);
 351                         if (ibmr) {
 352                                 *ibmr_ret = ibmr;
 353                                 finish_wait(&pool->flush_wait, &wait);
 354                                 goto out_nolock;
 355                         }
 356 
 357                         prepare_to_wait(&pool->flush_wait, &wait,
 358                                         TASK_UNINTERRUPTIBLE);
 359                         if (llist_empty(&pool->clean_list))
 360                                 schedule();
 361 
 362                         ibmr = rds_ib_reuse_mr(pool);
 363                         if (ibmr) {
 364                                 *ibmr_ret = ibmr;
 365                                 finish_wait(&pool->flush_wait, &wait);
 366                                 goto out_nolock;
 367                         }
 368                 }
 369                 finish_wait(&pool->flush_wait, &wait);
 370         } else
 371                 mutex_lock(&pool->flush_lock);
 372 
 373         if (ibmr_ret) {
 374                 ibmr = rds_ib_reuse_mr(pool);
 375                 if (ibmr) {
 376                         *ibmr_ret = ibmr;
 377                         goto out;
 378                 }
 379         }
 380 
 381         /* Get the list of all MRs to be dropped. Ordering matters -
 382          * we want to put drop_list ahead of free_list.
 383          */
 384         dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
 385         dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
 386         if (free_all) {
 387                 unsigned long flags;
 388 
 389                 spin_lock_irqsave(&pool->clean_lock, flags);
 390                 llist_append_to_list(&pool->clean_list, &unmap_list);
 391                 spin_unlock_irqrestore(&pool->clean_lock, flags);
 392         }
 393 
 394         free_goal = rds_ib_flush_goal(pool, free_all);
 395 
 396         if (list_empty(&unmap_list))
 397                 goto out;
 398 
 399         if (pool->use_fastreg)
 400                 rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
 401         else
 402                 rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
 403 
 404         if (!list_empty(&unmap_list)) {
 405                 unsigned long flags;
 406 
 407                 list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail);
 408                 if (ibmr_ret) {
 409                         *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
 410                         clean_nodes = clean_nodes->next;
 411                 }
 412                 /* more than one entry in llist nodes */
 413                 if (clean_nodes) {
 414                         spin_lock_irqsave(&pool->clean_lock, flags);
 415                         llist_add_batch(clean_nodes, clean_tail,
 416                                         &pool->clean_list);
 417                         spin_unlock_irqrestore(&pool->clean_lock, flags);
 418                 }
 419         }
 420 
 421         atomic_sub(unpinned, &pool->free_pinned);
 422         atomic_sub(dirty_to_clean, &pool->dirty_count);
 423         atomic_sub(nfreed, &pool->item_count);
 424 
 425 out:
 426         mutex_unlock(&pool->flush_lock);
 427         if (waitqueue_active(&pool->flush_wait))
 428                 wake_up(&pool->flush_wait);
 429 out_nolock:
 430         return 0;
 431 }
 432 
 433 struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
 434 {
 435         struct rds_ib_mr *ibmr = NULL;
 436         int iter = 0;
 437 
 438         while (1) {
 439                 ibmr = rds_ib_reuse_mr(pool);
 440                 if (ibmr)
 441                         return ibmr;
 442 
 443                 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
 444                         break;
 445 
 446                 atomic_dec(&pool->item_count);
 447 
 448                 if (++iter > 2) {
 449                         if (pool->pool_type == RDS_IB_MR_8K_POOL)
 450                                 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
 451                         else
 452                                 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
 453                         break;
 454                 }
 455 
 456                 /* We do have some empty MRs. Flush them out. */
 457                 if (pool->pool_type == RDS_IB_MR_8K_POOL)
 458                         rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
 459                 else
 460                         rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
 461 
 462                 rds_ib_flush_mr_pool(pool, 0, &ibmr);
 463                 if (ibmr)
 464                         return ibmr;
 465         }
 466 
 467         return NULL;
 468 }
 469 
 470 static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
 471 {
 472         struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
 473 
 474         rds_ib_flush_mr_pool(pool, 0, NULL);
 475 }
 476 
 477 void rds_ib_free_mr(void *trans_private, int invalidate)
 478 {
 479         struct rds_ib_mr *ibmr = trans_private;
 480         struct rds_ib_mr_pool *pool = ibmr->pool;
 481         struct rds_ib_device *rds_ibdev = ibmr->device;
 482 
 483         rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
 484 
 485         /* Return it to the pool's free list */
 486         if (rds_ibdev->use_fastreg)
 487                 rds_ib_free_frmr_list(ibmr);
 488         else
 489                 rds_ib_free_fmr_list(ibmr);
 490 
 491         atomic_add(ibmr->sg_len, &pool->free_pinned);
 492         atomic_inc(&pool->dirty_count);
 493 
 494         /* If we've pinned too many pages, request a flush */
 495         if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
 496             atomic_read(&pool->dirty_count) >= pool->max_items / 5)
 497                 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
 498 
 499         if (invalidate) {
 500                 if (likely(!in_interrupt())) {
 501                         rds_ib_flush_mr_pool(pool, 0, NULL);
 502                 } else {
 503                         /* We get here if the user created a MR marked
 504                          * as use_once and invalidate at the same time.
 505                          */
 506                         queue_delayed_work(rds_ib_mr_wq,
 507                                            &pool->flush_worker, 10);
 508                 }
 509         }
 510 
 511         rds_ib_dev_put(rds_ibdev);
 512 }
 513 
 514 void rds_ib_flush_mrs(void)
 515 {
 516         struct rds_ib_device *rds_ibdev;
 517 
 518         down_read(&rds_ib_devices_lock);
 519         list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
 520                 if (rds_ibdev->mr_8k_pool)
 521                         rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL);
 522 
 523                 if (rds_ibdev->mr_1m_pool)
 524                         rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL);
 525         }
 526         up_read(&rds_ib_devices_lock);
 527 }
 528 
 529 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 530                     struct rds_sock *rs, u32 *key_ret,
 531                     struct rds_connection *conn)
 532 {
 533         struct rds_ib_device *rds_ibdev;
 534         struct rds_ib_mr *ibmr = NULL;
 535         struct rds_ib_connection *ic = NULL;
 536         int ret;
 537 
 538         rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]);
 539         if (!rds_ibdev) {
 540                 ret = -ENODEV;
 541                 goto out;
 542         }
 543 
 544         if (conn)
 545                 ic = conn->c_transport_data;
 546 
 547         if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
 548                 ret = -ENODEV;
 549                 goto out;
 550         }
 551 
 552         if (rds_ibdev->use_fastreg)
 553                 ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
 554         else
 555                 ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
 556         if (IS_ERR(ibmr)) {
 557                 ret = PTR_ERR(ibmr);
 558                 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
 559         } else {
 560                 return ibmr;
 561         }
 562 
 563  out:
 564         if (rds_ibdev)
 565                 rds_ib_dev_put(rds_ibdev);
 566 
 567         return ERR_PTR(ret);
 568 }
 569 
 570 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
 571 {
 572         cancel_delayed_work_sync(&pool->flush_worker);
 573         rds_ib_flush_mr_pool(pool, 1, NULL);
 574         WARN_ON(atomic_read(&pool->item_count));
 575         WARN_ON(atomic_read(&pool->free_pinned));
 576         kfree(pool);
 577 }
 578 
 579 struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
 580                                              int pool_type)
 581 {
 582         struct rds_ib_mr_pool *pool;
 583 
 584         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 585         if (!pool)
 586                 return ERR_PTR(-ENOMEM);
 587 
 588         pool->pool_type = pool_type;
 589         init_llist_head(&pool->free_list);
 590         init_llist_head(&pool->drop_list);
 591         init_llist_head(&pool->clean_list);
 592         spin_lock_init(&pool->clean_lock);
 593         mutex_init(&pool->flush_lock);
 594         init_waitqueue_head(&pool->flush_wait);
 595         INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
 596 
 597         if (pool_type == RDS_IB_MR_1M_POOL) {
 598                 /* +1 allows for unaligned MRs */
 599                 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
 600                 pool->max_items = rds_ibdev->max_1m_mrs;
 601         } else {
 602                 /* pool_type == RDS_IB_MR_8K_POOL */
 603                 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
 604                 pool->max_items = rds_ibdev->max_8k_mrs;
 605         }
 606 
 607         pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
 608         pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
 609         pool->fmr_attr.page_shift = PAGE_SHIFT;
 610         pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
 611         pool->use_fastreg = rds_ibdev->use_fastreg;
 612 
 613         return pool;
 614 }
 615 
 616 int rds_ib_mr_init(void)
 617 {
 618         rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0);
 619         if (!rds_ib_mr_wq)
 620                 return -ENOMEM;
 621         return 0;
 622 }
 623 
 624 /* By the time this is called all the IB devices should have been torn down and
 625  * had their pools freed.  As each pool is freed its work struct is waited on,
 626  * so the pool flushing work queue should be idle by the time we get here.
 627  */
 628 void rds_ib_mr_exit(void)
 629 {
 630         destroy_workqueue(rds_ib_mr_wq);
 631 }

/* [<][>][^][v][top][bottom][index][help] */