This source file includes following definitions.
- rds_ib_recv_init_ring
- list_splice_entire_tail
- rds_ib_cache_xfer_to_ready
- rds_ib_recv_alloc_cache
- rds_ib_recv_alloc_caches
- rds_ib_cache_splice_all_lists
- rds_ib_recv_free_caches
- rds_ib_frag_free
- rds_ib_inc_free
- rds_ib_recv_clear_one
- rds_ib_recv_clear_ring
- rds_ib_refill_one_inc
- rds_ib_refill_one_frag
- rds_ib_recv_refill_one
- acquire_refill
- release_refill
- rds_ib_recv_refill
- rds_ib_recv_cache_put
- rds_ib_recv_cache_get
- rds_ib_inc_copy_to_user
- rds_ib_recv_init_ack
- rds_ib_set_ack
- rds_ib_get_ack
- rds_ib_set_ack
- rds_ib_get_ack
- rds_ib_send_ack
- rds_ib_attempt_ack
- rds_ib_ack_send_complete
- rds_ib_piggyb_ack
- rds_ib_cong_recv
- rds_ib_process_recv
- rds_ib_recv_cqe_handler
- rds_ib_recv_path
- rds_ib_recv_init
- rds_ib_recv_exit
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 
  31 
  32 
  33 #include <linux/kernel.h>
  34 #include <linux/slab.h>
  35 #include <linux/pci.h>
  36 #include <linux/dma-mapping.h>
  37 #include <rdma/rdma_cm.h>
  38 
  39 #include "rds_single_path.h"
  40 #include "rds.h"
  41 #include "ib.h"
  42 
  43 static struct kmem_cache *rds_ib_incoming_slab;
  44 static struct kmem_cache *rds_ib_frag_slab;
  45 static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
  46 
  47 void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
  48 {
  49         struct rds_ib_recv_work *recv;
  50         u32 i;
  51 
  52         for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
  53                 struct ib_sge *sge;
  54 
  55                 recv->r_ibinc = NULL;
  56                 recv->r_frag = NULL;
  57 
  58                 recv->r_wr.next = NULL;
  59                 recv->r_wr.wr_id = i;
  60                 recv->r_wr.sg_list = recv->r_sge;
  61                 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
  62 
  63                 sge = &recv->r_sge[0];
  64                 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
  65                 sge->length = sizeof(struct rds_header);
  66                 sge->lkey = ic->i_pd->local_dma_lkey;
  67 
  68                 sge = &recv->r_sge[1];
  69                 sge->addr = 0;
  70                 sge->length = RDS_FRAG_SIZE;
  71                 sge->lkey = ic->i_pd->local_dma_lkey;
  72         }
  73 }
  74 
  75 
  76 
  77 
  78 
  79 static void list_splice_entire_tail(struct list_head *from,
  80                                     struct list_head *to)
  81 {
  82         struct list_head *from_last = from->prev;
  83 
  84         list_splice_tail(from_last, to);
  85         list_add_tail(from_last, to);
  86 }
  87 
  88 static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
  89 {
  90         struct list_head *tmp;
  91 
  92         tmp = xchg(&cache->xfer, NULL);
  93         if (tmp) {
  94                 if (cache->ready)
  95                         list_splice_entire_tail(tmp, cache->ready);
  96                 else
  97                         cache->ready = tmp;
  98         }
  99 }
 100 
 101 static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache, gfp_t gfp)
 102 {
 103         struct rds_ib_cache_head *head;
 104         int cpu;
 105 
 106         cache->percpu = alloc_percpu_gfp(struct rds_ib_cache_head, gfp);
 107         if (!cache->percpu)
 108                return -ENOMEM;
 109 
 110         for_each_possible_cpu(cpu) {
 111                 head = per_cpu_ptr(cache->percpu, cpu);
 112                 head->first = NULL;
 113                 head->count = 0;
 114         }
 115         cache->xfer = NULL;
 116         cache->ready = NULL;
 117 
 118         return 0;
 119 }
 120 
 121 int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp)
 122 {
 123         int ret;
 124 
 125         ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs, gfp);
 126         if (!ret) {
 127                 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags, gfp);
 128                 if (ret)
 129                         free_percpu(ic->i_cache_incs.percpu);
 130         }
 131 
 132         return ret;
 133 }
 134 
 135 static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
 136                                           struct list_head *caller_list)
 137 {
 138         struct rds_ib_cache_head *head;
 139         int cpu;
 140 
 141         for_each_possible_cpu(cpu) {
 142                 head = per_cpu_ptr(cache->percpu, cpu);
 143                 if (head->first) {
 144                         list_splice_entire_tail(head->first, caller_list);
 145                         head->first = NULL;
 146                 }
 147         }
 148 
 149         if (cache->ready) {
 150                 list_splice_entire_tail(cache->ready, caller_list);
 151                 cache->ready = NULL;
 152         }
 153 }
 154 
 155 void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
 156 {
 157         struct rds_ib_incoming *inc;
 158         struct rds_ib_incoming *inc_tmp;
 159         struct rds_page_frag *frag;
 160         struct rds_page_frag *frag_tmp;
 161         LIST_HEAD(list);
 162 
 163         rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
 164         rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
 165         free_percpu(ic->i_cache_incs.percpu);
 166 
 167         list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
 168                 list_del(&inc->ii_cache_entry);
 169                 WARN_ON(!list_empty(&inc->ii_frags));
 170                 kmem_cache_free(rds_ib_incoming_slab, inc);
 171                 atomic_dec(&rds_ib_allocation);
 172         }
 173 
 174         rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
 175         rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
 176         free_percpu(ic->i_cache_frags.percpu);
 177 
 178         list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
 179                 list_del(&frag->f_cache_entry);
 180                 WARN_ON(!list_empty(&frag->f_item));
 181                 kmem_cache_free(rds_ib_frag_slab, frag);
 182         }
 183 }
 184 
 185 
 186 static void rds_ib_recv_cache_put(struct list_head *new_item,
 187                                   struct rds_ib_refill_cache *cache);
 188 static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
 189 
 190 
 191 
 192 static void rds_ib_frag_free(struct rds_ib_connection *ic,
 193                              struct rds_page_frag *frag)
 194 {
 195         rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
 196 
 197         rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
 198         atomic_add(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
 199         rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
 200 }
 201 
 202 
 203 void rds_ib_inc_free(struct rds_incoming *inc)
 204 {
 205         struct rds_ib_incoming *ibinc;
 206         struct rds_page_frag *frag;
 207         struct rds_page_frag *pos;
 208         struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
 209 
 210         ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
 211 
 212         
 213         list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
 214                 list_del_init(&frag->f_item);
 215                 rds_ib_frag_free(ic, frag);
 216         }
 217         BUG_ON(!list_empty(&ibinc->ii_frags));
 218 
 219         rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
 220         rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
 221 }
 222 
 223 static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
 224                                   struct rds_ib_recv_work *recv)
 225 {
 226         if (recv->r_ibinc) {
 227                 rds_inc_put(&recv->r_ibinc->ii_inc);
 228                 recv->r_ibinc = NULL;
 229         }
 230         if (recv->r_frag) {
 231                 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
 232                 rds_ib_frag_free(ic, recv->r_frag);
 233                 recv->r_frag = NULL;
 234         }
 235 }
 236 
 237 void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
 238 {
 239         u32 i;
 240 
 241         for (i = 0; i < ic->i_recv_ring.w_nr; i++)
 242                 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
 243 }
 244 
 245 static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
 246                                                      gfp_t slab_mask)
 247 {
 248         struct rds_ib_incoming *ibinc;
 249         struct list_head *cache_item;
 250         int avail_allocs;
 251 
 252         cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
 253         if (cache_item) {
 254                 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
 255         } else {
 256                 avail_allocs = atomic_add_unless(&rds_ib_allocation,
 257                                                  1, rds_ib_sysctl_max_recv_allocation);
 258                 if (!avail_allocs) {
 259                         rds_ib_stats_inc(s_ib_rx_alloc_limit);
 260                         return NULL;
 261                 }
 262                 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
 263                 if (!ibinc) {
 264                         atomic_dec(&rds_ib_allocation);
 265                         return NULL;
 266                 }
 267                 rds_ib_stats_inc(s_ib_rx_total_incs);
 268         }
 269         INIT_LIST_HEAD(&ibinc->ii_frags);
 270         rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr);
 271 
 272         return ibinc;
 273 }
 274 
 275 static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
 276                                                     gfp_t slab_mask, gfp_t page_mask)
 277 {
 278         struct rds_page_frag *frag;
 279         struct list_head *cache_item;
 280         int ret;
 281 
 282         cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
 283         if (cache_item) {
 284                 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
 285                 atomic_sub(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
 286                 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
 287         } else {
 288                 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
 289                 if (!frag)
 290                         return NULL;
 291 
 292                 sg_init_table(&frag->f_sg, 1);
 293                 ret = rds_page_remainder_alloc(&frag->f_sg,
 294                                                RDS_FRAG_SIZE, page_mask);
 295                 if (ret) {
 296                         kmem_cache_free(rds_ib_frag_slab, frag);
 297                         return NULL;
 298                 }
 299                 rds_ib_stats_inc(s_ib_rx_total_frags);
 300         }
 301 
 302         INIT_LIST_HEAD(&frag->f_item);
 303 
 304         return frag;
 305 }
 306 
 307 static int rds_ib_recv_refill_one(struct rds_connection *conn,
 308                                   struct rds_ib_recv_work *recv, gfp_t gfp)
 309 {
 310         struct rds_ib_connection *ic = conn->c_transport_data;
 311         struct ib_sge *sge;
 312         int ret = -ENOMEM;
 313         gfp_t slab_mask = GFP_NOWAIT;
 314         gfp_t page_mask = GFP_NOWAIT;
 315 
 316         if (gfp & __GFP_DIRECT_RECLAIM) {
 317                 slab_mask = GFP_KERNEL;
 318                 page_mask = GFP_HIGHUSER;
 319         }
 320 
 321         if (!ic->i_cache_incs.ready)
 322                 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
 323         if (!ic->i_cache_frags.ready)
 324                 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
 325 
 326         
 327 
 328 
 329 
 330         if (!recv->r_ibinc) {
 331                 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
 332                 if (!recv->r_ibinc)
 333                         goto out;
 334         }
 335 
 336         WARN_ON(recv->r_frag); 
 337         recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
 338         if (!recv->r_frag)
 339                 goto out;
 340 
 341         ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
 342                             1, DMA_FROM_DEVICE);
 343         WARN_ON(ret != 1);
 344 
 345         sge = &recv->r_sge[0];
 346         sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
 347         sge->length = sizeof(struct rds_header);
 348 
 349         sge = &recv->r_sge[1];
 350         sge->addr = sg_dma_address(&recv->r_frag->f_sg);
 351         sge->length = sg_dma_len(&recv->r_frag->f_sg);
 352 
 353         ret = 0;
 354 out:
 355         return ret;
 356 }
 357 
 358 static int acquire_refill(struct rds_connection *conn)
 359 {
 360         return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0;
 361 }
 362 
 363 static void release_refill(struct rds_connection *conn)
 364 {
 365         clear_bit(RDS_RECV_REFILL, &conn->c_flags);
 366 
 367         
 368 
 369 
 370 
 371 
 372         if (waitqueue_active(&conn->c_waitq))
 373                 wake_up_all(&conn->c_waitq);
 374 }
 375 
 376 
 377 
 378 
 379 
 380 
 381 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 382 {
 383         struct rds_ib_connection *ic = conn->c_transport_data;
 384         struct rds_ib_recv_work *recv;
 385         unsigned int posted = 0;
 386         int ret = 0;
 387         bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
 388         bool must_wake = false;
 389         u32 pos;
 390 
 391         
 392 
 393 
 394 
 395         if (!acquire_refill(conn))
 396                 return;
 397 
 398         while ((prefill || rds_conn_up(conn)) &&
 399                rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
 400                 if (pos >= ic->i_recv_ring.w_nr) {
 401                         printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
 402                                         pos);
 403                         break;
 404                 }
 405 
 406                 recv = &ic->i_recvs[pos];
 407                 ret = rds_ib_recv_refill_one(conn, recv, gfp);
 408                 if (ret) {
 409                         must_wake = true;
 410                         break;
 411                 }
 412 
 413                 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
 414                          recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
 415                          (long)sg_dma_address(&recv->r_frag->f_sg));
 416 
 417                 
 418                 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
 419                 if (ret) {
 420                         rds_ib_conn_error(conn, "recv post on "
 421                                "%pI6c returned %d, disconnecting and "
 422                                "reconnecting\n", &conn->c_faddr,
 423                                ret);
 424                         break;
 425                 }
 426 
 427                 posted++;
 428 
 429                 if ((posted > 128 && need_resched()) || posted > 8192) {
 430                         must_wake = true;
 431                         break;
 432                 }
 433         }
 434 
 435         
 436         if (ic->i_flowctl && posted)
 437                 rds_ib_advertise_credits(conn, posted);
 438 
 439         if (ret)
 440                 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
 441 
 442         release_refill(conn);
 443 
 444         
 445 
 446 
 447 
 448 
 449 
 450 
 451 
 452 
 453 
 454         if (rds_conn_up(conn) &&
 455             (must_wake ||
 456             (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
 457             rds_ib_ring_empty(&ic->i_recv_ring))) {
 458                 queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
 459         }
 460         if (can_wait)
 461                 cond_resched();
 462 }
 463 
 464 
 465 
 466 
 467 
 468 
 469 
 470 
 471 
 472 
 473 
 474 
 475 
 476 
 477 static void rds_ib_recv_cache_put(struct list_head *new_item,
 478                                  struct rds_ib_refill_cache *cache)
 479 {
 480         unsigned long flags;
 481         struct list_head *old, *chpfirst;
 482 
 483         local_irq_save(flags);
 484 
 485         chpfirst = __this_cpu_read(cache->percpu->first);
 486         if (!chpfirst)
 487                 INIT_LIST_HEAD(new_item);
 488         else 
 489                 list_add_tail(new_item, chpfirst);
 490 
 491         __this_cpu_write(cache->percpu->first, new_item);
 492         __this_cpu_inc(cache->percpu->count);
 493 
 494         if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
 495                 goto end;
 496 
 497         
 498 
 499 
 500 
 501 
 502 
 503         do {
 504                 old = xchg(&cache->xfer, NULL);
 505                 if (old)
 506                         list_splice_entire_tail(old, chpfirst);
 507                 old = cmpxchg(&cache->xfer, NULL, chpfirst);
 508         } while (old);
 509 
 510 
 511         __this_cpu_write(cache->percpu->first, NULL);
 512         __this_cpu_write(cache->percpu->count, 0);
 513 end:
 514         local_irq_restore(flags);
 515 }
 516 
 517 static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
 518 {
 519         struct list_head *head = cache->ready;
 520 
 521         if (head) {
 522                 if (!list_empty(head)) {
 523                         cache->ready = head->next;
 524                         list_del_init(head);
 525                 } else
 526                         cache->ready = NULL;
 527         }
 528 
 529         return head;
 530 }
 531 
 532 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
 533 {
 534         struct rds_ib_incoming *ibinc;
 535         struct rds_page_frag *frag;
 536         unsigned long to_copy;
 537         unsigned long frag_off = 0;
 538         int copied = 0;
 539         int ret;
 540         u32 len;
 541 
 542         ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
 543         frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
 544         len = be32_to_cpu(inc->i_hdr.h_len);
 545 
 546         while (iov_iter_count(to) && copied < len) {
 547                 if (frag_off == RDS_FRAG_SIZE) {
 548                         frag = list_entry(frag->f_item.next,
 549                                           struct rds_page_frag, f_item);
 550                         frag_off = 0;
 551                 }
 552                 to_copy = min_t(unsigned long, iov_iter_count(to),
 553                                 RDS_FRAG_SIZE - frag_off);
 554                 to_copy = min_t(unsigned long, to_copy, len - copied);
 555 
 556                 
 557                 rds_stats_add(s_copy_to_user, to_copy);
 558                 ret = copy_page_to_iter(sg_page(&frag->f_sg),
 559                                         frag->f_sg.offset + frag_off,
 560                                         to_copy,
 561                                         to);
 562                 if (ret != to_copy)
 563                         return -EFAULT;
 564 
 565                 frag_off += to_copy;
 566                 copied += to_copy;
 567         }
 568 
 569         return copied;
 570 }
 571 
 572 
 573 void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
 574 {
 575         struct ib_send_wr *wr = &ic->i_ack_wr;
 576         struct ib_sge *sge = &ic->i_ack_sge;
 577 
 578         sge->addr = ic->i_ack_dma;
 579         sge->length = sizeof(struct rds_header);
 580         sge->lkey = ic->i_pd->local_dma_lkey;
 581 
 582         wr->sg_list = sge;
 583         wr->num_sge = 1;
 584         wr->opcode = IB_WR_SEND;
 585         wr->wr_id = RDS_IB_ACK_WR_ID;
 586         wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 587 }
 588 
 589 
 590 
 591 
 592 
 593 
 594 
 595 
 596 
 597 
 598 
 599 
 600 
 601 
 602 
 603 
 604 
 605 
 606 
 607 
 608 
 609 
 610 
 611 #ifndef KERNEL_HAS_ATOMIC64
 612 void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
 613 {
 614         unsigned long flags;
 615 
 616         spin_lock_irqsave(&ic->i_ack_lock, flags);
 617         ic->i_ack_next = seq;
 618         if (ack_required)
 619                 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 620         spin_unlock_irqrestore(&ic->i_ack_lock, flags);
 621 }
 622 
 623 static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
 624 {
 625         unsigned long flags;
 626         u64 seq;
 627 
 628         clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 629 
 630         spin_lock_irqsave(&ic->i_ack_lock, flags);
 631         seq = ic->i_ack_next;
 632         spin_unlock_irqrestore(&ic->i_ack_lock, flags);
 633 
 634         return seq;
 635 }
 636 #else
 637 void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
 638 {
 639         atomic64_set(&ic->i_ack_next, seq);
 640         if (ack_required) {
 641                 smp_mb__before_atomic();
 642                 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 643         }
 644 }
 645 
 646 static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
 647 {
 648         clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 649         smp_mb__after_atomic();
 650 
 651         return atomic64_read(&ic->i_ack_next);
 652 }
 653 #endif
 654 
 655 
 656 static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
 657 {
 658         struct rds_header *hdr = ic->i_ack;
 659         u64 seq;
 660         int ret;
 661 
 662         seq = rds_ib_get_ack(ic);
 663 
 664         rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
 665         rds_message_populate_header(hdr, 0, 0, 0);
 666         hdr->h_ack = cpu_to_be64(seq);
 667         hdr->h_credit = adv_credits;
 668         rds_message_make_checksum(hdr);
 669         ic->i_ack_queued = jiffies;
 670 
 671         ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL);
 672         if (unlikely(ret)) {
 673                 
 674 
 675 
 676                 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
 677                 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 678 
 679                 rds_ib_stats_inc(s_ib_ack_send_failure);
 680 
 681                 rds_ib_conn_error(ic->conn, "sending ack failed\n");
 682         } else
 683                 rds_ib_stats_inc(s_ib_ack_sent);
 684 }
 685 
 686 
 687 
 688 
 689 
 690 
 691 
 692 
 693 
 694 
 695 
 696 
 697 
 698 
 699 
 700 
 701 
 702 
 703 
 704 
 705 
 706 
 707 
 708 
 709 
 710 
 711 
 712 
 713 
 714 
 715 
 716 
 717 
 718 
 719 
 720 
 721 
 722 
 723 
 724 void rds_ib_attempt_ack(struct rds_ib_connection *ic)
 725 {
 726         unsigned int adv_credits;
 727 
 728         if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
 729                 return;
 730 
 731         if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
 732                 rds_ib_stats_inc(s_ib_ack_send_delayed);
 733                 return;
 734         }
 735 
 736         
 737         if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
 738                 rds_ib_stats_inc(s_ib_tx_throttle);
 739                 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
 740                 return;
 741         }
 742 
 743         clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 744         rds_ib_send_ack(ic, adv_credits);
 745 }
 746 
 747 
 748 
 749 
 750 
 751 void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
 752 {
 753         clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
 754         rds_ib_attempt_ack(ic);
 755 }
 756 
 757 
 758 
 759 
 760 
 761 u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
 762 {
 763         if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
 764                 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
 765         return rds_ib_get_ack(ic);
 766 }
 767 
 768 
 769 
 770 
 771 
 772 
 773 
 774 
 775 
 776 static void rds_ib_cong_recv(struct rds_connection *conn,
 777                               struct rds_ib_incoming *ibinc)
 778 {
 779         struct rds_cong_map *map;
 780         unsigned int map_off;
 781         unsigned int map_page;
 782         struct rds_page_frag *frag;
 783         unsigned long frag_off;
 784         unsigned long to_copy;
 785         unsigned long copied;
 786         __le64 uncongested = 0;
 787         void *addr;
 788 
 789         
 790         if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
 791                 return;
 792 
 793         map = conn->c_fcong;
 794         map_page = 0;
 795         map_off = 0;
 796 
 797         frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
 798         frag_off = 0;
 799 
 800         copied = 0;
 801 
 802         while (copied < RDS_CONG_MAP_BYTES) {
 803                 __le64 *src, *dst;
 804                 unsigned int k;
 805 
 806                 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
 807                 BUG_ON(to_copy & 7); 
 808 
 809                 addr = kmap_atomic(sg_page(&frag->f_sg));
 810 
 811                 src = addr + frag->f_sg.offset + frag_off;
 812                 dst = (void *)map->m_page_addrs[map_page] + map_off;
 813                 for (k = 0; k < to_copy; k += 8) {
 814                         
 815 
 816                         uncongested |= ~(*src) & *dst;
 817                         *dst++ = *src++;
 818                 }
 819                 kunmap_atomic(addr);
 820 
 821                 copied += to_copy;
 822 
 823                 map_off += to_copy;
 824                 if (map_off == PAGE_SIZE) {
 825                         map_off = 0;
 826                         map_page++;
 827                 }
 828 
 829                 frag_off += to_copy;
 830                 if (frag_off == RDS_FRAG_SIZE) {
 831                         frag = list_entry(frag->f_item.next,
 832                                           struct rds_page_frag, f_item);
 833                         frag_off = 0;
 834                 }
 835         }
 836 
 837         
 838         rds_cong_map_updated(map, le64_to_cpu(uncongested));
 839 }
 840 
 841 static void rds_ib_process_recv(struct rds_connection *conn,
 842                                 struct rds_ib_recv_work *recv, u32 data_len,
 843                                 struct rds_ib_ack_state *state)
 844 {
 845         struct rds_ib_connection *ic = conn->c_transport_data;
 846         struct rds_ib_incoming *ibinc = ic->i_ibinc;
 847         struct rds_header *ihdr, *hdr;
 848 
 849         
 850 
 851         rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
 852                  data_len);
 853 
 854         if (data_len < sizeof(struct rds_header)) {
 855                 rds_ib_conn_error(conn, "incoming message "
 856                        "from %pI6c didn't include a "
 857                        "header, disconnecting and "
 858                        "reconnecting\n",
 859                        &conn->c_faddr);
 860                 return;
 861         }
 862         data_len -= sizeof(struct rds_header);
 863 
 864         ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
 865 
 866         
 867         if (!rds_message_verify_checksum(ihdr)) {
 868                 rds_ib_conn_error(conn, "incoming message "
 869                        "from %pI6c has corrupted header - "
 870                        "forcing a reconnect\n",
 871                        &conn->c_faddr);
 872                 rds_stats_inc(s_recv_drop_bad_checksum);
 873                 return;
 874         }
 875 
 876         
 877         state->ack_recv = be64_to_cpu(ihdr->h_ack);
 878         state->ack_recv_valid = 1;
 879 
 880         
 881         if (ihdr->h_credit)
 882                 rds_ib_send_add_credits(conn, ihdr->h_credit);
 883 
 884         if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
 885                 
 886 
 887 
 888 
 889                 rds_ib_stats_inc(s_ib_ack_received);
 890 
 891                 
 892 
 893 
 894 
 895 
 896 
 897 
 898 
 899 
 900                 rds_ib_frag_free(ic, recv->r_frag);
 901                 recv->r_frag = NULL;
 902                 return;
 903         }
 904 
 905         
 906 
 907 
 908 
 909 
 910 
 911         if (!ibinc) {
 912                 ibinc = recv->r_ibinc;
 913                 recv->r_ibinc = NULL;
 914                 ic->i_ibinc = ibinc;
 915 
 916                 hdr = &ibinc->ii_inc.i_hdr;
 917                 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
 918                                 local_clock();
 919                 memcpy(hdr, ihdr, sizeof(*hdr));
 920                 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
 921                 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
 922                                 local_clock();
 923 
 924                 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
 925                          ic->i_recv_data_rem, hdr->h_flags);
 926         } else {
 927                 hdr = &ibinc->ii_inc.i_hdr;
 928                 
 929 
 930                 if (hdr->h_sequence != ihdr->h_sequence ||
 931                     hdr->h_len != ihdr->h_len ||
 932                     hdr->h_sport != ihdr->h_sport ||
 933                     hdr->h_dport != ihdr->h_dport) {
 934                         rds_ib_conn_error(conn,
 935                                 "fragment header mismatch; forcing reconnect\n");
 936                         return;
 937                 }
 938         }
 939 
 940         list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
 941         recv->r_frag = NULL;
 942 
 943         if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
 944                 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
 945         else {
 946                 ic->i_recv_data_rem = 0;
 947                 ic->i_ibinc = NULL;
 948 
 949                 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) {
 950                         rds_ib_cong_recv(conn, ibinc);
 951                 } else {
 952                         rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr,
 953                                           &ibinc->ii_inc, GFP_ATOMIC);
 954                         state->ack_next = be64_to_cpu(hdr->h_sequence);
 955                         state->ack_next_valid = 1;
 956                 }
 957 
 958                 
 959 
 960 
 961                 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
 962                         rds_stats_inc(s_recv_ack_required);
 963                         state->ack_required = 1;
 964                 }
 965 
 966                 rds_inc_put(&ibinc->ii_inc);
 967         }
 968 }
 969 
 970 void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
 971                              struct ib_wc *wc,
 972                              struct rds_ib_ack_state *state)
 973 {
 974         struct rds_connection *conn = ic->conn;
 975         struct rds_ib_recv_work *recv;
 976 
 977         rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
 978                  (unsigned long long)wc->wr_id, wc->status,
 979                  ib_wc_status_msg(wc->status), wc->byte_len,
 980                  be32_to_cpu(wc->ex.imm_data));
 981 
 982         rds_ib_stats_inc(s_ib_rx_cq_event);
 983         recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
 984         ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
 985                         DMA_FROM_DEVICE);
 986 
 987         
 988 
 989 
 990 
 991         if (wc->status == IB_WC_SUCCESS) {
 992                 rds_ib_process_recv(conn, recv, wc->byte_len, state);
 993         } else {
 994                 
 995                 if (rds_conn_up(conn) || rds_conn_connecting(conn))
 996                         rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
 997                                           &conn->c_laddr, &conn->c_faddr,
 998                                           conn->c_tos, wc->status,
 999                                           ib_wc_status_msg(wc->status));
1000         }
1001 
1002         
1003 
1004 
1005 
1006 
1007 
1008 
1009         if (recv->r_frag) {
1010                 rds_ib_frag_free(ic, recv->r_frag);
1011                 recv->r_frag = NULL;
1012         }
1013         rds_ib_ring_free(&ic->i_recv_ring, 1);
1014 
1015         
1016 
1017 
1018         if (rds_ib_ring_empty(&ic->i_recv_ring))
1019                 rds_ib_stats_inc(s_ib_rx_ring_empty);
1020 
1021         if (rds_ib_ring_low(&ic->i_recv_ring)) {
1022                 rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
1023                 rds_ib_stats_inc(s_ib_rx_refill_from_cq);
1024         }
1025 }
1026 
1027 int rds_ib_recv_path(struct rds_conn_path *cp)
1028 {
1029         struct rds_connection *conn = cp->cp_conn;
1030         struct rds_ib_connection *ic = conn->c_transport_data;
1031 
1032         rdsdebug("conn %p\n", conn);
1033         if (rds_conn_up(conn)) {
1034                 rds_ib_attempt_ack(ic);
1035                 rds_ib_recv_refill(conn, 0, GFP_KERNEL);
1036                 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
1037         }
1038 
1039         return 0;
1040 }
1041 
1042 int rds_ib_recv_init(void)
1043 {
1044         struct sysinfo si;
1045         int ret = -ENOMEM;
1046 
1047         
1048         si_meminfo(&si);
1049         rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
1050 
1051         rds_ib_incoming_slab =
1052                 kmem_cache_create_usercopy("rds_ib_incoming",
1053                                            sizeof(struct rds_ib_incoming),
1054                                            0, SLAB_HWCACHE_ALIGN,
1055                                            offsetof(struct rds_ib_incoming,
1056                                                     ii_inc.i_usercopy),
1057                                            sizeof(struct rds_inc_usercopy),
1058                                            NULL);
1059         if (!rds_ib_incoming_slab)
1060                 goto out;
1061 
1062         rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
1063                                         sizeof(struct rds_page_frag),
1064                                         0, SLAB_HWCACHE_ALIGN, NULL);
1065         if (!rds_ib_frag_slab) {
1066                 kmem_cache_destroy(rds_ib_incoming_slab);
1067                 rds_ib_incoming_slab = NULL;
1068         } else
1069                 ret = 0;
1070 out:
1071         return ret;
1072 }
1073 
1074 void rds_ib_recv_exit(void)
1075 {
1076         WARN_ON(atomic_read(&rds_ib_allocation));
1077 
1078         kmem_cache_destroy(rds_ib_incoming_slab);
1079         kmem_cache_destroy(rds_ib_frag_slab);
1080 }