1/* 2 * 3 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * redistributing this file, you may do so under either license. 5 * 6 * GPL LICENSE SUMMARY 7 * 8 * Copyright(c) 2015 Intel Corporation. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * BSD LICENSE 20 * 21 * Copyright(c) 2015 Intel Corporation. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 27 * - Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * - Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in 31 * the documentation and/or other materials provided with the 32 * distribution. 33 * - Neither the name of Intel Corporation nor the names of its 34 * contributors may be used to endorse or promote products derived 35 * from this software without specific prior written permission. 36 * 37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 * 49 */ 50 51#include <linux/err.h> 52#include <linux/vmalloc.h> 53#include <linux/hash.h> 54#include <linux/module.h> 55#include <linux/random.h> 56#include <linux/seq_file.h> 57 58#include "hfi.h" 59#include "qp.h" 60#include "trace.h" 61#include "sdma.h" 62 63#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) 64#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 65 66static unsigned int hfi1_qp_table_size = 256; 67module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); 68MODULE_PARM_DESC(qp_table_size, "QP table size"); 69 70static void flush_tx_list(struct hfi1_qp *qp); 71static int iowait_sleep( 72 struct sdma_engine *sde, 73 struct iowait *wait, 74 struct sdma_txreq *stx, 75 unsigned seq); 76static void iowait_wakeup(struct iowait *wait, int reason); 77 78static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt, 79 struct qpn_map *map, unsigned off) 80{ 81 return (map - qpt->map) * BITS_PER_PAGE + off; 82} 83 84/* 85 * Convert the AETH credit code into the number of credits. 86 */ 87static const u16 credit_table[31] = { 88 0, /* 0 */ 89 1, /* 1 */ 90 2, /* 2 */ 91 3, /* 3 */ 92 4, /* 4 */ 93 6, /* 5 */ 94 8, /* 6 */ 95 12, /* 7 */ 96 16, /* 8 */ 97 24, /* 9 */ 98 32, /* A */ 99 48, /* B */ 100 64, /* C */ 101 96, /* D */ 102 128, /* E */ 103 192, /* F */ 104 256, /* 10 */ 105 384, /* 11 */ 106 512, /* 12 */ 107 768, /* 13 */ 108 1024, /* 14 */ 109 1536, /* 15 */ 110 2048, /* 16 */ 111 3072, /* 17 */ 112 4096, /* 18 */ 113 6144, /* 19 */ 114 8192, /* 1A */ 115 12288, /* 1B */ 116 16384, /* 1C */ 117 24576, /* 1D */ 118 32768 /* 1E */ 119}; 120 121static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map) 122{ 123 unsigned long page = get_zeroed_page(GFP_KERNEL); 124 125 /* 126 * Free the page if someone raced with us installing it. 127 */ 128 129 spin_lock(&qpt->lock); 130 if (map->page) 131 free_page(page); 132 else 133 map->page = (void *)page; 134 spin_unlock(&qpt->lock); 135} 136 137/* 138 * Allocate the next available QPN or 139 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. 140 */ 141static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt, 142 enum ib_qp_type type, u8 port) 143{ 144 u32 i, offset, max_scan, qpn; 145 struct qpn_map *map; 146 u32 ret; 147 148 if (type == IB_QPT_SMI || type == IB_QPT_GSI) { 149 unsigned n; 150 151 ret = type == IB_QPT_GSI; 152 n = 1 << (ret + 2 * (port - 1)); 153 spin_lock(&qpt->lock); 154 if (qpt->flags & n) 155 ret = -EINVAL; 156 else 157 qpt->flags |= n; 158 spin_unlock(&qpt->lock); 159 goto bail; 160 } 161 162 qpn = qpt->last + qpt->incr; 163 if (qpn >= QPN_MAX) 164 qpn = qpt->incr | ((qpt->last & 1) ^ 1); 165 /* offset carries bit 0 */ 166 offset = qpn & BITS_PER_PAGE_MASK; 167 map = &qpt->map[qpn / BITS_PER_PAGE]; 168 max_scan = qpt->nmaps - !offset; 169 for (i = 0;;) { 170 if (unlikely(!map->page)) { 171 get_map_page(qpt, map); 172 if (unlikely(!map->page)) 173 break; 174 } 175 do { 176 if (!test_and_set_bit(offset, map->page)) { 177 qpt->last = qpn; 178 ret = qpn; 179 goto bail; 180 } 181 offset += qpt->incr; 182 /* 183 * This qpn might be bogus if offset >= BITS_PER_PAGE. 184 * That is OK. It gets re-assigned below 185 */ 186 qpn = mk_qpn(qpt, map, offset); 187 } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); 188 /* 189 * In order to keep the number of pages allocated to a 190 * minimum, we scan the all existing pages before increasing 191 * the size of the bitmap table. 192 */ 193 if (++i > max_scan) { 194 if (qpt->nmaps == QPNMAP_ENTRIES) 195 break; 196 map = &qpt->map[qpt->nmaps++]; 197 /* start at incr with current bit 0 */ 198 offset = qpt->incr | (offset & 1); 199 } else if (map < &qpt->map[qpt->nmaps]) { 200 ++map; 201 /* start at incr with current bit 0 */ 202 offset = qpt->incr | (offset & 1); 203 } else { 204 map = &qpt->map[0]; 205 /* wrap to first map page, invert bit 0 */ 206 offset = qpt->incr | ((offset & 1) ^ 1); 207 } 208 /* there can be no bits at shift and below */ 209 WARN_ON(offset & (dd->qos_shift - 1)); 210 qpn = mk_qpn(qpt, map, offset); 211 } 212 213 ret = -ENOMEM; 214 215bail: 216 return ret; 217} 218 219static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn) 220{ 221 struct qpn_map *map; 222 223 map = qpt->map + qpn / BITS_PER_PAGE; 224 if (map->page) 225 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); 226} 227 228/* 229 * Put the QP into the hash table. 230 * The hash table holds a reference to the QP. 231 */ 232static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp) 233{ 234 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 235 unsigned long flags; 236 237 atomic_inc(&qp->refcount); 238 spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); 239 240 if (qp->ibqp.qp_num <= 1) { 241 rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp); 242 } else { 243 u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num); 244 245 qp->next = dev->qp_dev->qp_table[n]; 246 rcu_assign_pointer(dev->qp_dev->qp_table[n], qp); 247 trace_hfi1_qpinsert(qp, n); 248 } 249 250 spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags); 251} 252 253/* 254 * Remove the QP from the table so it can't be found asynchronously by 255 * the receive interrupt routine. 256 */ 257static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp) 258{ 259 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 260 u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num); 261 unsigned long flags; 262 int removed = 1; 263 264 spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); 265 266 if (rcu_dereference_protected(ibp->qp[0], 267 lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) { 268 RCU_INIT_POINTER(ibp->qp[0], NULL); 269 } else if (rcu_dereference_protected(ibp->qp[1], 270 lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) { 271 RCU_INIT_POINTER(ibp->qp[1], NULL); 272 } else { 273 struct hfi1_qp *q; 274 struct hfi1_qp __rcu **qpp; 275 276 removed = 0; 277 qpp = &dev->qp_dev->qp_table[n]; 278 for (; (q = rcu_dereference_protected(*qpp, 279 lockdep_is_held(&dev->qp_dev->qpt_lock))) 280 != NULL; 281 qpp = &q->next) 282 if (q == qp) { 283 RCU_INIT_POINTER(*qpp, 284 rcu_dereference_protected(qp->next, 285 lockdep_is_held(&dev->qp_dev->qpt_lock))); 286 removed = 1; 287 trace_hfi1_qpremove(qp, n); 288 break; 289 } 290 } 291 292 spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags); 293 if (removed) { 294 synchronize_rcu(); 295 if (atomic_dec_and_test(&qp->refcount)) 296 wake_up(&qp->wait); 297 } 298} 299 300/** 301 * free_all_qps - check for QPs still in use 302 * @qpt: the QP table to empty 303 * 304 * There should not be any QPs still in use. 305 * Free memory for table. 306 */ 307static unsigned free_all_qps(struct hfi1_devdata *dd) 308{ 309 struct hfi1_ibdev *dev = &dd->verbs_dev; 310 unsigned long flags; 311 struct hfi1_qp *qp; 312 unsigned n, qp_inuse = 0; 313 314 for (n = 0; n < dd->num_pports; n++) { 315 struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; 316 317 if (!hfi1_mcast_tree_empty(ibp)) 318 qp_inuse++; 319 rcu_read_lock(); 320 if (rcu_dereference(ibp->qp[0])) 321 qp_inuse++; 322 if (rcu_dereference(ibp->qp[1])) 323 qp_inuse++; 324 rcu_read_unlock(); 325 } 326 327 if (!dev->qp_dev) 328 goto bail; 329 spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); 330 for (n = 0; n < dev->qp_dev->qp_table_size; n++) { 331 qp = rcu_dereference_protected(dev->qp_dev->qp_table[n], 332 lockdep_is_held(&dev->qp_dev->qpt_lock)); 333 RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL); 334 335 for (; qp; qp = rcu_dereference_protected(qp->next, 336 lockdep_is_held(&dev->qp_dev->qpt_lock))) 337 qp_inuse++; 338 } 339 spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags); 340 synchronize_rcu(); 341bail: 342 return qp_inuse; 343} 344 345/** 346 * reset_qp - initialize the QP state to the reset state 347 * @qp: the QP to reset 348 * @type: the QP type 349 */ 350static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type) 351{ 352 qp->remote_qpn = 0; 353 qp->qkey = 0; 354 qp->qp_access_flags = 0; 355 iowait_init( 356 &qp->s_iowait, 357 1, 358 hfi1_do_send, 359 iowait_sleep, 360 iowait_wakeup); 361 qp->s_flags &= HFI1_S_SIGNAL_REQ_WR; 362 qp->s_hdrwords = 0; 363 qp->s_wqe = NULL; 364 qp->s_draining = 0; 365 qp->s_next_psn = 0; 366 qp->s_last_psn = 0; 367 qp->s_sending_psn = 0; 368 qp->s_sending_hpsn = 0; 369 qp->s_psn = 0; 370 qp->r_psn = 0; 371 qp->r_msn = 0; 372 if (type == IB_QPT_RC) { 373 qp->s_state = IB_OPCODE_RC_SEND_LAST; 374 qp->r_state = IB_OPCODE_RC_SEND_LAST; 375 } else { 376 qp->s_state = IB_OPCODE_UC_SEND_LAST; 377 qp->r_state = IB_OPCODE_UC_SEND_LAST; 378 } 379 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 380 qp->r_nak_state = 0; 381 qp->r_aflags = 0; 382 qp->r_flags = 0; 383 qp->s_head = 0; 384 qp->s_tail = 0; 385 qp->s_cur = 0; 386 qp->s_acked = 0; 387 qp->s_last = 0; 388 qp->s_ssn = 1; 389 qp->s_lsn = 0; 390 clear_ahg(qp); 391 qp->s_mig_state = IB_MIG_MIGRATED; 392 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 393 qp->r_head_ack_queue = 0; 394 qp->s_tail_ack_queue = 0; 395 qp->s_num_rd_atomic = 0; 396 if (qp->r_rq.wq) { 397 qp->r_rq.wq->head = 0; 398 qp->r_rq.wq->tail = 0; 399 } 400 qp->r_sge.num_sge = 0; 401} 402 403static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) 404{ 405 unsigned n; 406 407 if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags)) 408 hfi1_put_ss(&qp->s_rdma_read_sge); 409 410 hfi1_put_ss(&qp->r_sge); 411 412 if (clr_sends) { 413 while (qp->s_last != qp->s_head) { 414 struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 415 unsigned i; 416 417 for (i = 0; i < wqe->wr.num_sge; i++) { 418 struct hfi1_sge *sge = &wqe->sg_list[i]; 419 420 hfi1_put_mr(sge->mr); 421 } 422 if (qp->ibqp.qp_type == IB_QPT_UD || 423 qp->ibqp.qp_type == IB_QPT_SMI || 424 qp->ibqp.qp_type == IB_QPT_GSI) 425 atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); 426 if (++qp->s_last >= qp->s_size) 427 qp->s_last = 0; 428 } 429 if (qp->s_rdma_mr) { 430 hfi1_put_mr(qp->s_rdma_mr); 431 qp->s_rdma_mr = NULL; 432 } 433 } 434 435 if (qp->ibqp.qp_type != IB_QPT_RC) 436 return; 437 438 for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { 439 struct hfi1_ack_entry *e = &qp->s_ack_queue[n]; 440 441 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && 442 e->rdma_sge.mr) { 443 hfi1_put_mr(e->rdma_sge.mr); 444 e->rdma_sge.mr = NULL; 445 } 446 } 447} 448 449/** 450 * hfi1_error_qp - put a QP into the error state 451 * @qp: the QP to put into the error state 452 * @err: the receive completion error to signal if a RWQE is active 453 * 454 * Flushes both send and receive work queues. 455 * Returns true if last WQE event should be generated. 456 * The QP r_lock and s_lock should be held and interrupts disabled. 457 * If we are already in error state, just return. 458 */ 459int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err) 460{ 461 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); 462 struct ib_wc wc; 463 int ret = 0; 464 465 if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) 466 goto bail; 467 468 qp->state = IB_QPS_ERR; 469 470 if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) { 471 qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR); 472 del_timer(&qp->s_timer); 473 } 474 475 if (qp->s_flags & HFI1_S_ANY_WAIT_SEND) 476 qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND; 477 478 write_seqlock(&dev->iowait_lock); 479 if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) { 480 qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; 481 list_del_init(&qp->s_iowait.list); 482 if (atomic_dec_and_test(&qp->refcount)) 483 wake_up(&qp->wait); 484 } 485 write_sequnlock(&dev->iowait_lock); 486 487 if (!(qp->s_flags & HFI1_S_BUSY)) { 488 qp->s_hdrwords = 0; 489 if (qp->s_rdma_mr) { 490 hfi1_put_mr(qp->s_rdma_mr); 491 qp->s_rdma_mr = NULL; 492 } 493 flush_tx_list(qp); 494 } 495 496 /* Schedule the sending tasklet to drain the send work queue. */ 497 if (qp->s_last != qp->s_head) 498 hfi1_schedule_send(qp); 499 500 clear_mr_refs(qp, 0); 501 502 memset(&wc, 0, sizeof(wc)); 503 wc.qp = &qp->ibqp; 504 wc.opcode = IB_WC_RECV; 505 506 if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) { 507 wc.wr_id = qp->r_wr_id; 508 wc.status = err; 509 hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 510 } 511 wc.status = IB_WC_WR_FLUSH_ERR; 512 513 if (qp->r_rq.wq) { 514 struct hfi1_rwq *wq; 515 u32 head; 516 u32 tail; 517 518 spin_lock(&qp->r_rq.lock); 519 520 /* sanity check pointers before trusting them */ 521 wq = qp->r_rq.wq; 522 head = wq->head; 523 if (head >= qp->r_rq.size) 524 head = 0; 525 tail = wq->tail; 526 if (tail >= qp->r_rq.size) 527 tail = 0; 528 while (tail != head) { 529 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; 530 if (++tail >= qp->r_rq.size) 531 tail = 0; 532 hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 533 } 534 wq->tail = tail; 535 536 spin_unlock(&qp->r_rq.lock); 537 } else if (qp->ibqp.event_handler) 538 ret = 1; 539 540bail: 541 return ret; 542} 543 544static void flush_tx_list(struct hfi1_qp *qp) 545{ 546 while (!list_empty(&qp->s_iowait.tx_head)) { 547 struct sdma_txreq *tx; 548 549 tx = list_first_entry( 550 &qp->s_iowait.tx_head, 551 struct sdma_txreq, 552 list); 553 list_del_init(&tx->list); 554 hfi1_put_txreq( 555 container_of(tx, struct verbs_txreq, txreq)); 556 } 557} 558 559static void flush_iowait(struct hfi1_qp *qp) 560{ 561 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); 562 unsigned long flags; 563 564 write_seqlock_irqsave(&dev->iowait_lock, flags); 565 if (!list_empty(&qp->s_iowait.list)) { 566 list_del_init(&qp->s_iowait.list); 567 if (atomic_dec_and_test(&qp->refcount)) 568 wake_up(&qp->wait); 569 } 570 write_sequnlock_irqrestore(&dev->iowait_lock, flags); 571} 572 573static inline int opa_mtu_enum_to_int(int mtu) 574{ 575 switch (mtu) { 576 case OPA_MTU_8192: return 8192; 577 case OPA_MTU_10240: return 10240; 578 default: return -1; 579 } 580} 581 582/** 583 * This function is what we would push to the core layer if we wanted to be a 584 * "first class citizen". Instead we hide this here and rely on Verbs ULPs 585 * to blindly pass the MTU enum value from the PathRecord to us. 586 * 587 * The actual flag used to determine "8k MTU" will change and is currently 588 * unknown. 589 */ 590static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 591{ 592 int val = opa_mtu_enum_to_int((int)mtu); 593 594 if (val > 0) 595 return val; 596 return ib_mtu_enum_to_int(mtu); 597} 598 599 600/** 601 * hfi1_modify_qp - modify the attributes of a queue pair 602 * @ibqp: the queue pair who's attributes we're modifying 603 * @attr: the new attributes 604 * @attr_mask: the mask of attributes to modify 605 * @udata: user data for libibverbs.so 606 * 607 * Returns 0 on success, otherwise returns an errno. 608 */ 609int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 610 int attr_mask, struct ib_udata *udata) 611{ 612 struct hfi1_ibdev *dev = to_idev(ibqp->device); 613 struct hfi1_qp *qp = to_iqp(ibqp); 614 enum ib_qp_state cur_state, new_state; 615 struct ib_event ev; 616 int lastwqe = 0; 617 int mig = 0; 618 int ret; 619 u32 pmtu = 0; /* for gcc warning only */ 620 struct hfi1_devdata *dd; 621 622 spin_lock_irq(&qp->r_lock); 623 spin_lock(&qp->s_lock); 624 625 cur_state = attr_mask & IB_QP_CUR_STATE ? 626 attr->cur_qp_state : qp->state; 627 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 628 629 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 630 attr_mask, IB_LINK_LAYER_UNSPECIFIED)) 631 goto inval; 632 633 if (attr_mask & IB_QP_AV) { 634 if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE) 635 goto inval; 636 if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr)) 637 goto inval; 638 } 639 640 if (attr_mask & IB_QP_ALT_PATH) { 641 if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE) 642 goto inval; 643 if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) 644 goto inval; 645 if (attr->alt_pkey_index >= hfi1_get_npkeys(dd_from_dev(dev))) 646 goto inval; 647 } 648 649 if (attr_mask & IB_QP_PKEY_INDEX) 650 if (attr->pkey_index >= hfi1_get_npkeys(dd_from_dev(dev))) 651 goto inval; 652 653 if (attr_mask & IB_QP_MIN_RNR_TIMER) 654 if (attr->min_rnr_timer > 31) 655 goto inval; 656 657 if (attr_mask & IB_QP_PORT) 658 if (qp->ibqp.qp_type == IB_QPT_SMI || 659 qp->ibqp.qp_type == IB_QPT_GSI || 660 attr->port_num == 0 || 661 attr->port_num > ibqp->device->phys_port_cnt) 662 goto inval; 663 664 if (attr_mask & IB_QP_DEST_QPN) 665 if (attr->dest_qp_num > HFI1_QPN_MASK) 666 goto inval; 667 668 if (attr_mask & IB_QP_RETRY_CNT) 669 if (attr->retry_cnt > 7) 670 goto inval; 671 672 if (attr_mask & IB_QP_RNR_RETRY) 673 if (attr->rnr_retry > 7) 674 goto inval; 675 676 /* 677 * Don't allow invalid path_mtu values. OK to set greater 678 * than the active mtu (or even the max_cap, if we have tuned 679 * that to a small mtu. We'll set qp->path_mtu 680 * to the lesser of requested attribute mtu and active, 681 * for packetizing messages. 682 * Note that the QP port has to be set in INIT and MTU in RTR. 683 */ 684 if (attr_mask & IB_QP_PATH_MTU) { 685 int mtu, pidx = qp->port_num - 1; 686 687 dd = dd_from_dev(dev); 688 mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu); 689 if (mtu == -1) 690 goto inval; 691 692 if (mtu > dd->pport[pidx].ibmtu) 693 pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); 694 else 695 pmtu = attr->path_mtu; 696 } 697 698 if (attr_mask & IB_QP_PATH_MIG_STATE) { 699 if (attr->path_mig_state == IB_MIG_REARM) { 700 if (qp->s_mig_state == IB_MIG_ARMED) 701 goto inval; 702 if (new_state != IB_QPS_RTS) 703 goto inval; 704 } else if (attr->path_mig_state == IB_MIG_MIGRATED) { 705 if (qp->s_mig_state == IB_MIG_REARM) 706 goto inval; 707 if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) 708 goto inval; 709 if (qp->s_mig_state == IB_MIG_ARMED) 710 mig = 1; 711 } else 712 goto inval; 713 } 714 715 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 716 if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC) 717 goto inval; 718 719 switch (new_state) { 720 case IB_QPS_RESET: 721 if (qp->state != IB_QPS_RESET) { 722 qp->state = IB_QPS_RESET; 723 flush_iowait(qp); 724 qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); 725 spin_unlock(&qp->s_lock); 726 spin_unlock_irq(&qp->r_lock); 727 /* Stop the sending work queue and retry timer */ 728 cancel_work_sync(&qp->s_iowait.iowork); 729 del_timer_sync(&qp->s_timer); 730 iowait_sdma_drain(&qp->s_iowait); 731 flush_tx_list(qp); 732 remove_qp(dev, qp); 733 wait_event(qp->wait, !atomic_read(&qp->refcount)); 734 spin_lock_irq(&qp->r_lock); 735 spin_lock(&qp->s_lock); 736 clear_mr_refs(qp, 1); 737 clear_ahg(qp); 738 reset_qp(qp, ibqp->qp_type); 739 } 740 break; 741 742 case IB_QPS_RTR: 743 /* Allow event to re-trigger if QP set to RTR more than once */ 744 qp->r_flags &= ~HFI1_R_COMM_EST; 745 qp->state = new_state; 746 break; 747 748 case IB_QPS_SQD: 749 qp->s_draining = qp->s_last != qp->s_cur; 750 qp->state = new_state; 751 break; 752 753 case IB_QPS_SQE: 754 if (qp->ibqp.qp_type == IB_QPT_RC) 755 goto inval; 756 qp->state = new_state; 757 break; 758 759 case IB_QPS_ERR: 760 lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); 761 break; 762 763 default: 764 qp->state = new_state; 765 break; 766 } 767 768 if (attr_mask & IB_QP_PKEY_INDEX) 769 qp->s_pkey_index = attr->pkey_index; 770 771 if (attr_mask & IB_QP_PORT) 772 qp->port_num = attr->port_num; 773 774 if (attr_mask & IB_QP_DEST_QPN) 775 qp->remote_qpn = attr->dest_qp_num; 776 777 if (attr_mask & IB_QP_SQ_PSN) { 778 qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK; 779 qp->s_psn = qp->s_next_psn; 780 qp->s_sending_psn = qp->s_next_psn; 781 qp->s_last_psn = qp->s_next_psn - 1; 782 qp->s_sending_hpsn = qp->s_last_psn; 783 } 784 785 if (attr_mask & IB_QP_RQ_PSN) 786 qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK; 787 788 if (attr_mask & IB_QP_ACCESS_FLAGS) 789 qp->qp_access_flags = attr->qp_access_flags; 790 791 if (attr_mask & IB_QP_AV) { 792 qp->remote_ah_attr = attr->ah_attr; 793 qp->s_srate = attr->ah_attr.static_rate; 794 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); 795 } 796 797 if (attr_mask & IB_QP_ALT_PATH) { 798 qp->alt_ah_attr = attr->alt_ah_attr; 799 qp->s_alt_pkey_index = attr->alt_pkey_index; 800 } 801 802 if (attr_mask & IB_QP_PATH_MIG_STATE) { 803 qp->s_mig_state = attr->path_mig_state; 804 if (mig) { 805 qp->remote_ah_attr = qp->alt_ah_attr; 806 qp->port_num = qp->alt_ah_attr.port_num; 807 qp->s_pkey_index = qp->s_alt_pkey_index; 808 qp->s_flags |= HFI1_S_AHG_CLEAR; 809 } 810 } 811 812 if (attr_mask & IB_QP_PATH_MTU) { 813 struct hfi1_ibport *ibp; 814 u8 sc, vl; 815 u32 mtu; 816 817 dd = dd_from_dev(dev); 818 ibp = &dd->pport[qp->port_num - 1].ibport_data; 819 820 sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; 821 vl = sc_to_vlt(dd, sc); 822 823 mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu); 824 if (vl < PER_VL_SEND_CONTEXTS) 825 mtu = min_t(u32, mtu, dd->vld[vl].mtu); 826 pmtu = mtu_to_enum(mtu, OPA_MTU_8192); 827 828 qp->path_mtu = pmtu; 829 qp->pmtu = mtu; 830 } 831 832 if (attr_mask & IB_QP_RETRY_CNT) { 833 qp->s_retry_cnt = attr->retry_cnt; 834 qp->s_retry = attr->retry_cnt; 835 } 836 837 if (attr_mask & IB_QP_RNR_RETRY) { 838 qp->s_rnr_retry_cnt = attr->rnr_retry; 839 qp->s_rnr_retry = attr->rnr_retry; 840 } 841 842 if (attr_mask & IB_QP_MIN_RNR_TIMER) 843 qp->r_min_rnr_timer = attr->min_rnr_timer; 844 845 if (attr_mask & IB_QP_TIMEOUT) { 846 qp->timeout = attr->timeout; 847 qp->timeout_jiffies = 848 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 849 1000UL); 850 } 851 852 if (attr_mask & IB_QP_QKEY) 853 qp->qkey = attr->qkey; 854 855 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 856 qp->r_max_rd_atomic = attr->max_dest_rd_atomic; 857 858 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) 859 qp->s_max_rd_atomic = attr->max_rd_atomic; 860 861 spin_unlock(&qp->s_lock); 862 spin_unlock_irq(&qp->r_lock); 863 864 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 865 insert_qp(dev, qp); 866 867 if (lastwqe) { 868 ev.device = qp->ibqp.device; 869 ev.element.qp = &qp->ibqp; 870 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 871 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 872 } 873 if (mig) { 874 ev.device = qp->ibqp.device; 875 ev.element.qp = &qp->ibqp; 876 ev.event = IB_EVENT_PATH_MIG; 877 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 878 } 879 ret = 0; 880 goto bail; 881 882inval: 883 spin_unlock(&qp->s_lock); 884 spin_unlock_irq(&qp->r_lock); 885 ret = -EINVAL; 886 887bail: 888 return ret; 889} 890 891int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 892 int attr_mask, struct ib_qp_init_attr *init_attr) 893{ 894 struct hfi1_qp *qp = to_iqp(ibqp); 895 896 attr->qp_state = qp->state; 897 attr->cur_qp_state = attr->qp_state; 898 attr->path_mtu = qp->path_mtu; 899 attr->path_mig_state = qp->s_mig_state; 900 attr->qkey = qp->qkey; 901 attr->rq_psn = mask_psn(qp->r_psn); 902 attr->sq_psn = mask_psn(qp->s_next_psn); 903 attr->dest_qp_num = qp->remote_qpn; 904 attr->qp_access_flags = qp->qp_access_flags; 905 attr->cap.max_send_wr = qp->s_size - 1; 906 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; 907 attr->cap.max_send_sge = qp->s_max_sge; 908 attr->cap.max_recv_sge = qp->r_rq.max_sge; 909 attr->cap.max_inline_data = 0; 910 attr->ah_attr = qp->remote_ah_attr; 911 attr->alt_ah_attr = qp->alt_ah_attr; 912 attr->pkey_index = qp->s_pkey_index; 913 attr->alt_pkey_index = qp->s_alt_pkey_index; 914 attr->en_sqd_async_notify = 0; 915 attr->sq_draining = qp->s_draining; 916 attr->max_rd_atomic = qp->s_max_rd_atomic; 917 attr->max_dest_rd_atomic = qp->r_max_rd_atomic; 918 attr->min_rnr_timer = qp->r_min_rnr_timer; 919 attr->port_num = qp->port_num; 920 attr->timeout = qp->timeout; 921 attr->retry_cnt = qp->s_retry_cnt; 922 attr->rnr_retry = qp->s_rnr_retry_cnt; 923 attr->alt_port_num = qp->alt_ah_attr.port_num; 924 attr->alt_timeout = qp->alt_timeout; 925 926 init_attr->event_handler = qp->ibqp.event_handler; 927 init_attr->qp_context = qp->ibqp.qp_context; 928 init_attr->send_cq = qp->ibqp.send_cq; 929 init_attr->recv_cq = qp->ibqp.recv_cq; 930 init_attr->srq = qp->ibqp.srq; 931 init_attr->cap = attr->cap; 932 if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR) 933 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 934 else 935 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 936 init_attr->qp_type = qp->ibqp.qp_type; 937 init_attr->port_num = qp->port_num; 938 return 0; 939} 940 941/** 942 * hfi1_compute_aeth - compute the AETH (syndrome + MSN) 943 * @qp: the queue pair to compute the AETH for 944 * 945 * Returns the AETH. 946 */ 947__be32 hfi1_compute_aeth(struct hfi1_qp *qp) 948{ 949 u32 aeth = qp->r_msn & HFI1_MSN_MASK; 950 951 if (qp->ibqp.srq) { 952 /* 953 * Shared receive queues don't generate credits. 954 * Set the credit field to the invalid value. 955 */ 956 aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT; 957 } else { 958 u32 min, max, x; 959 u32 credits; 960 struct hfi1_rwq *wq = qp->r_rq.wq; 961 u32 head; 962 u32 tail; 963 964 /* sanity check pointers before trusting them */ 965 head = wq->head; 966 if (head >= qp->r_rq.size) 967 head = 0; 968 tail = wq->tail; 969 if (tail >= qp->r_rq.size) 970 tail = 0; 971 /* 972 * Compute the number of credits available (RWQEs). 973 * There is a small chance that the pair of reads are 974 * not atomic, which is OK, since the fuzziness is 975 * resolved as further ACKs go out. 976 */ 977 credits = head - tail; 978 if ((int)credits < 0) 979 credits += qp->r_rq.size; 980 /* 981 * Binary search the credit table to find the code to 982 * use. 983 */ 984 min = 0; 985 max = 31; 986 for (;;) { 987 x = (min + max) / 2; 988 if (credit_table[x] == credits) 989 break; 990 if (credit_table[x] > credits) 991 max = x; 992 else if (min == x) 993 break; 994 else 995 min = x; 996 } 997 aeth |= x << HFI1_AETH_CREDIT_SHIFT; 998 } 999 return cpu_to_be32(aeth); 1000} 1001 1002/** 1003 * hfi1_create_qp - create a queue pair for a device 1004 * @ibpd: the protection domain who's device we create the queue pair for 1005 * @init_attr: the attributes of the queue pair 1006 * @udata: user data for libibverbs.so 1007 * 1008 * Returns the queue pair on success, otherwise returns an errno. 1009 * 1010 * Called by the ib_create_qp() core verbs function. 1011 */ 1012struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, 1013 struct ib_qp_init_attr *init_attr, 1014 struct ib_udata *udata) 1015{ 1016 struct hfi1_qp *qp; 1017 int err; 1018 struct hfi1_swqe *swq = NULL; 1019 struct hfi1_ibdev *dev; 1020 struct hfi1_devdata *dd; 1021 size_t sz; 1022 size_t sg_list_sz; 1023 struct ib_qp *ret; 1024 1025 if (init_attr->cap.max_send_sge > hfi1_max_sges || 1026 init_attr->cap.max_send_wr > hfi1_max_qp_wrs || 1027 init_attr->create_flags) { 1028 ret = ERR_PTR(-EINVAL); 1029 goto bail; 1030 } 1031 1032 /* Check receive queue parameters if no SRQ is specified. */ 1033 if (!init_attr->srq) { 1034 if (init_attr->cap.max_recv_sge > hfi1_max_sges || 1035 init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) { 1036 ret = ERR_PTR(-EINVAL); 1037 goto bail; 1038 } 1039 if (init_attr->cap.max_send_sge + 1040 init_attr->cap.max_send_wr + 1041 init_attr->cap.max_recv_sge + 1042 init_attr->cap.max_recv_wr == 0) { 1043 ret = ERR_PTR(-EINVAL); 1044 goto bail; 1045 } 1046 } 1047 1048 switch (init_attr->qp_type) { 1049 case IB_QPT_SMI: 1050 case IB_QPT_GSI: 1051 if (init_attr->port_num == 0 || 1052 init_attr->port_num > ibpd->device->phys_port_cnt) { 1053 ret = ERR_PTR(-EINVAL); 1054 goto bail; 1055 } 1056 case IB_QPT_UC: 1057 case IB_QPT_RC: 1058 case IB_QPT_UD: 1059 sz = sizeof(struct hfi1_sge) * 1060 init_attr->cap.max_send_sge + 1061 sizeof(struct hfi1_swqe); 1062 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); 1063 if (swq == NULL) { 1064 ret = ERR_PTR(-ENOMEM); 1065 goto bail; 1066 } 1067 sz = sizeof(*qp); 1068 sg_list_sz = 0; 1069 if (init_attr->srq) { 1070 struct hfi1_srq *srq = to_isrq(init_attr->srq); 1071 1072 if (srq->rq.max_sge > 1) 1073 sg_list_sz = sizeof(*qp->r_sg_list) * 1074 (srq->rq.max_sge - 1); 1075 } else if (init_attr->cap.max_recv_sge > 1) 1076 sg_list_sz = sizeof(*qp->r_sg_list) * 1077 (init_attr->cap.max_recv_sge - 1); 1078 qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); 1079 if (!qp) { 1080 ret = ERR_PTR(-ENOMEM); 1081 goto bail_swq; 1082 } 1083 RCU_INIT_POINTER(qp->next, NULL); 1084 qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); 1085 if (!qp->s_hdr) { 1086 ret = ERR_PTR(-ENOMEM); 1087 goto bail_qp; 1088 } 1089 qp->timeout_jiffies = 1090 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1091 1000UL); 1092 if (init_attr->srq) 1093 sz = 0; 1094 else { 1095 qp->r_rq.size = init_attr->cap.max_recv_wr + 1; 1096 qp->r_rq.max_sge = init_attr->cap.max_recv_sge; 1097 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + 1098 sizeof(struct hfi1_rwqe); 1099 qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + 1100 qp->r_rq.size * sz); 1101 if (!qp->r_rq.wq) { 1102 ret = ERR_PTR(-ENOMEM); 1103 goto bail_qp; 1104 } 1105 } 1106 1107 /* 1108 * ib_create_qp() will initialize qp->ibqp 1109 * except for qp->ibqp.qp_num. 1110 */ 1111 spin_lock_init(&qp->r_lock); 1112 spin_lock_init(&qp->s_lock); 1113 spin_lock_init(&qp->r_rq.lock); 1114 atomic_set(&qp->refcount, 0); 1115 init_waitqueue_head(&qp->wait); 1116 init_timer(&qp->s_timer); 1117 qp->s_timer.data = (unsigned long)qp; 1118 INIT_LIST_HEAD(&qp->rspwait); 1119 qp->state = IB_QPS_RESET; 1120 qp->s_wq = swq; 1121 qp->s_size = init_attr->cap.max_send_wr + 1; 1122 qp->s_max_sge = init_attr->cap.max_send_sge; 1123 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 1124 qp->s_flags = HFI1_S_SIGNAL_REQ_WR; 1125 dev = to_idev(ibpd->device); 1126 dd = dd_from_dev(dev); 1127 err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type, 1128 init_attr->port_num); 1129 if (err < 0) { 1130 ret = ERR_PTR(err); 1131 vfree(qp->r_rq.wq); 1132 goto bail_qp; 1133 } 1134 qp->ibqp.qp_num = err; 1135 qp->port_num = init_attr->port_num; 1136 reset_qp(qp, init_attr->qp_type); 1137 1138 break; 1139 1140 default: 1141 /* Don't support raw QPs */ 1142 ret = ERR_PTR(-ENOSYS); 1143 goto bail; 1144 } 1145 1146 init_attr->cap.max_inline_data = 0; 1147 1148 /* 1149 * Return the address of the RWQ as the offset to mmap. 1150 * See hfi1_mmap() for details. 1151 */ 1152 if (udata && udata->outlen >= sizeof(__u64)) { 1153 if (!qp->r_rq.wq) { 1154 __u64 offset = 0; 1155 1156 err = ib_copy_to_udata(udata, &offset, 1157 sizeof(offset)); 1158 if (err) { 1159 ret = ERR_PTR(err); 1160 goto bail_ip; 1161 } 1162 } else { 1163 u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz; 1164 1165 qp->ip = hfi1_create_mmap_info(dev, s, 1166 ibpd->uobject->context, 1167 qp->r_rq.wq); 1168 if (!qp->ip) { 1169 ret = ERR_PTR(-ENOMEM); 1170 goto bail_ip; 1171 } 1172 1173 err = ib_copy_to_udata(udata, &(qp->ip->offset), 1174 sizeof(qp->ip->offset)); 1175 if (err) { 1176 ret = ERR_PTR(err); 1177 goto bail_ip; 1178 } 1179 } 1180 } 1181 1182 spin_lock(&dev->n_qps_lock); 1183 if (dev->n_qps_allocated == hfi1_max_qps) { 1184 spin_unlock(&dev->n_qps_lock); 1185 ret = ERR_PTR(-ENOMEM); 1186 goto bail_ip; 1187 } 1188 1189 dev->n_qps_allocated++; 1190 spin_unlock(&dev->n_qps_lock); 1191 1192 if (qp->ip) { 1193 spin_lock_irq(&dev->pending_lock); 1194 list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); 1195 spin_unlock_irq(&dev->pending_lock); 1196 } 1197 1198 ret = &qp->ibqp; 1199 1200 /* 1201 * We have our QP and its good, now keep track of what types of opcodes 1202 * can be processed on this QP. We do this by keeping track of what the 1203 * 3 high order bits of the opcode are. 1204 */ 1205 switch (init_attr->qp_type) { 1206 case IB_QPT_SMI: 1207 case IB_QPT_GSI: 1208 case IB_QPT_UD: 1209 qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK; 1210 break; 1211 case IB_QPT_RC: 1212 qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK; 1213 break; 1214 case IB_QPT_UC: 1215 qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK; 1216 break; 1217 default: 1218 ret = ERR_PTR(-EINVAL); 1219 goto bail_ip; 1220 } 1221 1222 goto bail; 1223 1224bail_ip: 1225 if (qp->ip) 1226 kref_put(&qp->ip->ref, hfi1_release_mmap_info); 1227 else 1228 vfree(qp->r_rq.wq); 1229 free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); 1230bail_qp: 1231 kfree(qp->s_hdr); 1232 kfree(qp); 1233bail_swq: 1234 vfree(swq); 1235bail: 1236 return ret; 1237} 1238 1239/** 1240 * hfi1_destroy_qp - destroy a queue pair 1241 * @ibqp: the queue pair to destroy 1242 * 1243 * Returns 0 on success. 1244 * 1245 * Note that this can be called while the QP is actively sending or 1246 * receiving! 1247 */ 1248int hfi1_destroy_qp(struct ib_qp *ibqp) 1249{ 1250 struct hfi1_qp *qp = to_iqp(ibqp); 1251 struct hfi1_ibdev *dev = to_idev(ibqp->device); 1252 1253 /* Make sure HW and driver activity is stopped. */ 1254 spin_lock_irq(&qp->r_lock); 1255 spin_lock(&qp->s_lock); 1256 if (qp->state != IB_QPS_RESET) { 1257 qp->state = IB_QPS_RESET; 1258 flush_iowait(qp); 1259 qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); 1260 spin_unlock(&qp->s_lock); 1261 spin_unlock_irq(&qp->r_lock); 1262 cancel_work_sync(&qp->s_iowait.iowork); 1263 del_timer_sync(&qp->s_timer); 1264 iowait_sdma_drain(&qp->s_iowait); 1265 flush_tx_list(qp); 1266 remove_qp(dev, qp); 1267 wait_event(qp->wait, !atomic_read(&qp->refcount)); 1268 spin_lock_irq(&qp->r_lock); 1269 spin_lock(&qp->s_lock); 1270 clear_mr_refs(qp, 1); 1271 clear_ahg(qp); 1272 } 1273 spin_unlock(&qp->s_lock); 1274 spin_unlock_irq(&qp->r_lock); 1275 1276 /* all user's cleaned up, mark it available */ 1277 free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); 1278 spin_lock(&dev->n_qps_lock); 1279 dev->n_qps_allocated--; 1280 spin_unlock(&dev->n_qps_lock); 1281 1282 if (qp->ip) 1283 kref_put(&qp->ip->ref, hfi1_release_mmap_info); 1284 else 1285 vfree(qp->r_rq.wq); 1286 vfree(qp->s_wq); 1287 kfree(qp->s_hdr); 1288 kfree(qp); 1289 return 0; 1290} 1291 1292/** 1293 * init_qpn_table - initialize the QP number table for a device 1294 * @qpt: the QPN table 1295 */ 1296static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt) 1297{ 1298 u32 offset, qpn, i; 1299 struct qpn_map *map; 1300 int ret = 0; 1301 1302 spin_lock_init(&qpt->lock); 1303 1304 qpt->last = 0; 1305 qpt->incr = 1 << dd->qos_shift; 1306 1307 /* insure we don't assign QPs from KDETH 64K window */ 1308 qpn = kdeth_qp << 16; 1309 qpt->nmaps = qpn / BITS_PER_PAGE; 1310 /* This should always be zero */ 1311 offset = qpn & BITS_PER_PAGE_MASK; 1312 map = &qpt->map[qpt->nmaps]; 1313 dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n", 1314 qpn, qpn + 65535); 1315 for (i = 0; i < 65536; i++) { 1316 if (!map->page) { 1317 get_map_page(qpt, map); 1318 if (!map->page) { 1319 ret = -ENOMEM; 1320 break; 1321 } 1322 } 1323 set_bit(offset, map->page); 1324 offset++; 1325 if (offset == BITS_PER_PAGE) { 1326 /* next page */ 1327 qpt->nmaps++; 1328 map++; 1329 offset = 0; 1330 } 1331 } 1332 return ret; 1333} 1334 1335/** 1336 * free_qpn_table - free the QP number table for a device 1337 * @qpt: the QPN table 1338 */ 1339static void free_qpn_table(struct hfi1_qpn_table *qpt) 1340{ 1341 int i; 1342 1343 for (i = 0; i < ARRAY_SIZE(qpt->map); i++) 1344 free_page((unsigned long) qpt->map[i].page); 1345} 1346 1347/** 1348 * hfi1_get_credit - flush the send work queue of a QP 1349 * @qp: the qp who's send work queue to flush 1350 * @aeth: the Acknowledge Extended Transport Header 1351 * 1352 * The QP s_lock should be held. 1353 */ 1354void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth) 1355{ 1356 u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; 1357 1358 /* 1359 * If the credit is invalid, we can send 1360 * as many packets as we like. Otherwise, we have to 1361 * honor the credit field. 1362 */ 1363 if (credit == HFI1_AETH_CREDIT_INVAL) { 1364 if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) { 1365 qp->s_flags |= HFI1_S_UNLIMITED_CREDIT; 1366 if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) { 1367 qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT; 1368 hfi1_schedule_send(qp); 1369 } 1370 } 1371 } else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) { 1372 /* Compute new LSN (i.e., MSN + credit) */ 1373 credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK; 1374 if (cmp_msn(credit, qp->s_lsn) > 0) { 1375 qp->s_lsn = credit; 1376 if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) { 1377 qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT; 1378 hfi1_schedule_send(qp); 1379 } 1380 } 1381 } 1382} 1383 1384void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag) 1385{ 1386 unsigned long flags; 1387 1388 spin_lock_irqsave(&qp->s_lock, flags); 1389 if (qp->s_flags & flag) { 1390 qp->s_flags &= ~flag; 1391 trace_hfi1_qpwakeup(qp, flag); 1392 hfi1_schedule_send(qp); 1393 } 1394 spin_unlock_irqrestore(&qp->s_lock, flags); 1395 /* Notify hfi1_destroy_qp() if it is waiting. */ 1396 if (atomic_dec_and_test(&qp->refcount)) 1397 wake_up(&qp->wait); 1398} 1399 1400static int iowait_sleep( 1401 struct sdma_engine *sde, 1402 struct iowait *wait, 1403 struct sdma_txreq *stx, 1404 unsigned seq) 1405{ 1406 struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); 1407 struct hfi1_qp *qp; 1408 unsigned long flags; 1409 int ret = 0; 1410 struct hfi1_ibdev *dev; 1411 1412 qp = tx->qp; 1413 1414 spin_lock_irqsave(&qp->s_lock, flags); 1415 if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { 1416 1417 /* 1418 * If we couldn't queue the DMA request, save the info 1419 * and try again later rather than destroying the 1420 * buffer and undoing the side effects of the copy. 1421 */ 1422 /* Make a common routine? */ 1423 dev = &sde->dd->verbs_dev; 1424 list_add_tail(&stx->list, &wait->tx_head); 1425 write_seqlock(&dev->iowait_lock); 1426 if (sdma_progress(sde, seq, stx)) 1427 goto eagain; 1428 if (list_empty(&qp->s_iowait.list)) { 1429 struct hfi1_ibport *ibp = 1430 to_iport(qp->ibqp.device, qp->port_num); 1431 1432 ibp->n_dmawait++; 1433 qp->s_flags |= HFI1_S_WAIT_DMA_DESC; 1434 list_add_tail(&qp->s_iowait.list, &sde->dmawait); 1435 trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC); 1436 atomic_inc(&qp->refcount); 1437 } 1438 write_sequnlock(&dev->iowait_lock); 1439 qp->s_flags &= ~HFI1_S_BUSY; 1440 spin_unlock_irqrestore(&qp->s_lock, flags); 1441 ret = -EBUSY; 1442 } else { 1443 spin_unlock_irqrestore(&qp->s_lock, flags); 1444 hfi1_put_txreq(tx); 1445 } 1446 return ret; 1447eagain: 1448 write_sequnlock(&dev->iowait_lock); 1449 spin_unlock_irqrestore(&qp->s_lock, flags); 1450 list_del_init(&stx->list); 1451 return -EAGAIN; 1452} 1453 1454static void iowait_wakeup(struct iowait *wait, int reason) 1455{ 1456 struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait); 1457 1458 WARN_ON(reason != SDMA_AVAIL_REASON); 1459 hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC); 1460} 1461 1462int hfi1_qp_init(struct hfi1_ibdev *dev) 1463{ 1464 struct hfi1_devdata *dd = dd_from_dev(dev); 1465 int i; 1466 int ret = -ENOMEM; 1467 1468 /* allocate parent object */ 1469 dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL); 1470 if (!dev->qp_dev) 1471 goto nomem; 1472 /* allocate hash table */ 1473 dev->qp_dev->qp_table_size = hfi1_qp_table_size; 1474 dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size); 1475 dev->qp_dev->qp_table = 1476 kmalloc(dev->qp_dev->qp_table_size * 1477 sizeof(*dev->qp_dev->qp_table), 1478 GFP_KERNEL); 1479 if (!dev->qp_dev->qp_table) 1480 goto nomem; 1481 for (i = 0; i < dev->qp_dev->qp_table_size; i++) 1482 RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL); 1483 spin_lock_init(&dev->qp_dev->qpt_lock); 1484 /* initialize qpn map */ 1485 ret = init_qpn_table(dd, &dev->qp_dev->qpn_table); 1486 if (ret) 1487 goto nomem; 1488 return ret; 1489nomem: 1490 if (dev->qp_dev) { 1491 kfree(dev->qp_dev->qp_table); 1492 free_qpn_table(&dev->qp_dev->qpn_table); 1493 kfree(dev->qp_dev); 1494 } 1495 return ret; 1496} 1497 1498void hfi1_qp_exit(struct hfi1_ibdev *dev) 1499{ 1500 struct hfi1_devdata *dd = dd_from_dev(dev); 1501 u32 qps_inuse; 1502 1503 qps_inuse = free_all_qps(dd); 1504 if (qps_inuse) 1505 dd_dev_err(dd, "QP memory leak! %u still in use\n", 1506 qps_inuse); 1507 if (dev->qp_dev) { 1508 kfree(dev->qp_dev->qp_table); 1509 free_qpn_table(&dev->qp_dev->qpn_table); 1510 kfree(dev->qp_dev); 1511 } 1512} 1513 1514/** 1515 * 1516 * qp_to_sdma_engine - map a qp to a send engine 1517 * @qp: the QP 1518 * @sc5: the 5 bit sc 1519 * 1520 * Return: 1521 * A send engine for the qp or NULL for SMI type qp. 1522 */ 1523struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5) 1524{ 1525 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 1526 struct sdma_engine *sde; 1527 1528 if (!(dd->flags & HFI1_HAS_SEND_DMA)) 1529 return NULL; 1530 switch (qp->ibqp.qp_type) { 1531 case IB_QPT_UC: 1532 case IB_QPT_RC: 1533 break; 1534 case IB_QPT_SMI: 1535 return NULL; 1536 default: 1537 break; 1538 } 1539 sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5); 1540 return sde; 1541} 1542 1543struct qp_iter { 1544 struct hfi1_ibdev *dev; 1545 struct hfi1_qp *qp; 1546 int specials; 1547 int n; 1548}; 1549 1550struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) 1551{ 1552 struct qp_iter *iter; 1553 1554 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 1555 if (!iter) 1556 return NULL; 1557 1558 iter->dev = dev; 1559 iter->specials = dev->ibdev.phys_port_cnt * 2; 1560 if (qp_iter_next(iter)) { 1561 kfree(iter); 1562 return NULL; 1563 } 1564 1565 return iter; 1566} 1567 1568int qp_iter_next(struct qp_iter *iter) 1569{ 1570 struct hfi1_ibdev *dev = iter->dev; 1571 int n = iter->n; 1572 int ret = 1; 1573 struct hfi1_qp *pqp = iter->qp; 1574 struct hfi1_qp *qp; 1575 1576 /* 1577 * The approach is to consider the special qps 1578 * as an additional table entries before the 1579 * real hash table. Since the qp code sets 1580 * the qp->next hash link to NULL, this works just fine. 1581 * 1582 * iter->specials is 2 * # ports 1583 * 1584 * n = 0..iter->specials is the special qp indices 1585 * 1586 * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are 1587 * the potential hash bucket entries 1588 * 1589 */ 1590 for (; n < dev->qp_dev->qp_table_size + iter->specials; n++) { 1591 if (pqp) { 1592 qp = rcu_dereference(pqp->next); 1593 } else { 1594 if (n < iter->specials) { 1595 struct hfi1_pportdata *ppd; 1596 struct hfi1_ibport *ibp; 1597 int pidx; 1598 1599 pidx = n % dev->ibdev.phys_port_cnt; 1600 ppd = &dd_from_dev(dev)->pport[pidx]; 1601 ibp = &ppd->ibport_data; 1602 1603 if (!(n & 1)) 1604 qp = rcu_dereference(ibp->qp[0]); 1605 else 1606 qp = rcu_dereference(ibp->qp[1]); 1607 } else { 1608 qp = rcu_dereference( 1609 dev->qp_dev->qp_table[ 1610 (n - iter->specials)]); 1611 } 1612 } 1613 pqp = qp; 1614 if (qp) { 1615 iter->qp = qp; 1616 iter->n = n; 1617 return 0; 1618 } 1619 } 1620 return ret; 1621} 1622 1623static const char * const qp_type_str[] = { 1624 "SMI", "GSI", "RC", "UC", "UD", 1625}; 1626 1627static int qp_idle(struct hfi1_qp *qp) 1628{ 1629 return 1630 qp->s_last == qp->s_acked && 1631 qp->s_acked == qp->s_cur && 1632 qp->s_cur == qp->s_tail && 1633 qp->s_tail == qp->s_head; 1634} 1635 1636void qp_iter_print(struct seq_file *s, struct qp_iter *iter) 1637{ 1638 struct hfi1_swqe *wqe; 1639 struct hfi1_qp *qp = iter->qp; 1640 struct sdma_engine *sde; 1641 1642 sde = qp_to_sdma_engine(qp, qp->s_sc); 1643 wqe = get_swqe_ptr(qp, qp->s_last); 1644 seq_printf(s, 1645 "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n", 1646 iter->n, 1647 qp_idle(qp) ? "I" : "B", 1648 qp->ibqp.qp_num, 1649 atomic_read(&qp->refcount), 1650 qp_type_str[qp->ibqp.qp_type], 1651 qp->state, 1652 wqe ? wqe->wr.opcode : 0, 1653 qp->s_hdrwords, 1654 qp->s_flags, 1655 atomic_read(&qp->s_iowait.sdma_busy), 1656 !list_empty(&qp->s_iowait.list), 1657 qp->timeout, 1658 wqe ? wqe->ssn : 0, 1659 qp->s_lsn, 1660 qp->s_last_psn, 1661 qp->s_psn, qp->s_next_psn, 1662 qp->s_sending_psn, qp->s_sending_hpsn, 1663 qp->s_last, qp->s_acked, qp->s_cur, 1664 qp->s_tail, qp->s_head, qp->s_size, 1665 qp->remote_qpn, 1666 qp->remote_ah_attr.dlid, 1667 qp->remote_ah_attr.sl, 1668 qp->pmtu, 1669 qp->s_retry_cnt, 1670 qp->timeout, 1671 qp->s_rnr_retry_cnt, 1672 sde, 1673 sde ? sde->this_idx : 0); 1674} 1675 1676void qp_comm_est(struct hfi1_qp *qp) 1677{ 1678 qp->r_flags |= HFI1_R_COMM_EST; 1679 if (qp->ibqp.event_handler) { 1680 struct ib_event ev; 1681 1682 ev.device = qp->ibqp.device; 1683 ev.element.qp = &qp->ibqp; 1684 ev.event = IB_EVENT_COMM_EST; 1685 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 1686 } 1687} 1688