1/* 2 * 3 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * redistributing this file, you may do so under either license. 5 * 6 * GPL LICENSE SUMMARY 7 * 8 * Copyright(c) 2015 Intel Corporation. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * BSD LICENSE 20 * 21 * Copyright(c) 2015 Intel Corporation. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 27 * - Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * - Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in 31 * the documentation and/or other materials provided with the 32 * distribution. 33 * - Neither the name of Intel Corporation nor the names of its 34 * contributors may be used to endorse or promote products derived 35 * from this software without specific prior written permission. 36 * 37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 * 49 */ 50 51#include <linux/spinlock.h> 52#include <linux/pci.h> 53#include <linux/io.h> 54#include <linux/delay.h> 55#include <linux/netdevice.h> 56#include <linux/vmalloc.h> 57#include <linux/module.h> 58#include <linux/prefetch.h> 59 60#include "hfi.h" 61#include "trace.h" 62#include "qp.h" 63#include "sdma.h" 64 65#undef pr_fmt 66#define pr_fmt(fmt) DRIVER_NAME ": " fmt 67 68/* 69 * The size has to be longer than this string, so we can append 70 * board/chip information to it in the initialization code. 71 */ 72const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; 73 74DEFINE_SPINLOCK(hfi1_devs_lock); 75LIST_HEAD(hfi1_dev_list); 76DEFINE_MUTEX(hfi1_mutex); /* general driver use */ 77 78unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; 79module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO); 80MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is 8192"); 81 82unsigned int hfi1_cu = 1; 83module_param_named(cu, hfi1_cu, uint, S_IRUGO); 84MODULE_PARM_DESC(cu, "Credit return units"); 85 86unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT; 87static int hfi1_caps_set(const char *, const struct kernel_param *); 88static int hfi1_caps_get(char *, const struct kernel_param *); 89static const struct kernel_param_ops cap_ops = { 90 .set = hfi1_caps_set, 91 .get = hfi1_caps_get 92}; 93module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO); 94MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); 95 96MODULE_LICENSE("Dual BSD/GPL"); 97MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); 98MODULE_VERSION(HFI1_DRIVER_VERSION); 99 100/* 101 * MAX_PKT_RCV is the max # if packets processed per receive interrupt. 102 */ 103#define MAX_PKT_RECV 64 104#define EGR_HEAD_UPDATE_THRESHOLD 16 105 106struct hfi1_ib_stats hfi1_stats; 107 108static int hfi1_caps_set(const char *val, const struct kernel_param *kp) 109{ 110 int ret = 0; 111 unsigned long *cap_mask_ptr = (unsigned long *)kp->arg, 112 cap_mask = *cap_mask_ptr, value, diff, 113 write_mask = ((HFI1_CAP_WRITABLE_MASK << HFI1_CAP_USER_SHIFT) | 114 HFI1_CAP_WRITABLE_MASK); 115 116 ret = kstrtoul(val, 0, &value); 117 if (ret) { 118 pr_warn("Invalid module parameter value for 'cap_mask'\n"); 119 goto done; 120 } 121 /* Get the changed bits (except the locked bit) */ 122 diff = value ^ (cap_mask & ~HFI1_CAP_LOCKED_SMASK); 123 124 /* Remove any bits that are not allowed to change after driver load */ 125 if (HFI1_CAP_LOCKED() && (diff & ~write_mask)) { 126 pr_warn("Ignoring non-writable capability bits %#lx\n", 127 diff & ~write_mask); 128 diff &= write_mask; 129 } 130 131 /* Mask off any reserved bits */ 132 diff &= ~HFI1_CAP_RESERVED_MASK; 133 /* Clear any previously set and changing bits */ 134 cap_mask &= ~diff; 135 /* Update the bits with the new capability */ 136 cap_mask |= (value & diff); 137 /* Check for any kernel/user restrictions */ 138 diff = (cap_mask & (HFI1_CAP_MUST_HAVE_KERN << HFI1_CAP_USER_SHIFT)) ^ 139 ((cap_mask & HFI1_CAP_MUST_HAVE_KERN) << HFI1_CAP_USER_SHIFT); 140 cap_mask &= ~diff; 141 /* Set the bitmask to the final set */ 142 *cap_mask_ptr = cap_mask; 143done: 144 return ret; 145} 146 147static int hfi1_caps_get(char *buffer, const struct kernel_param *kp) 148{ 149 unsigned long cap_mask = *(unsigned long *)kp->arg; 150 151 cap_mask &= ~HFI1_CAP_LOCKED_SMASK; 152 cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT); 153 154 return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask); 155} 156 157const char *get_unit_name(int unit) 158{ 159 static char iname[16]; 160 161 snprintf(iname, sizeof(iname), DRIVER_NAME"_%u", unit); 162 return iname; 163} 164 165/* 166 * Return count of units with at least one port ACTIVE. 167 */ 168int hfi1_count_active_units(void) 169{ 170 struct hfi1_devdata *dd; 171 struct hfi1_pportdata *ppd; 172 unsigned long flags; 173 int pidx, nunits_active = 0; 174 175 spin_lock_irqsave(&hfi1_devs_lock, flags); 176 list_for_each_entry(dd, &hfi1_dev_list, list) { 177 if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase) 178 continue; 179 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 180 ppd = dd->pport + pidx; 181 if (ppd->lid && ppd->linkup) { 182 nunits_active++; 183 break; 184 } 185 } 186 } 187 spin_unlock_irqrestore(&hfi1_devs_lock, flags); 188 return nunits_active; 189} 190 191/* 192 * Return count of all units, optionally return in arguments 193 * the number of usable (present) units, and the number of 194 * ports that are up. 195 */ 196int hfi1_count_units(int *npresentp, int *nupp) 197{ 198 int nunits = 0, npresent = 0, nup = 0; 199 struct hfi1_devdata *dd; 200 unsigned long flags; 201 int pidx; 202 struct hfi1_pportdata *ppd; 203 204 spin_lock_irqsave(&hfi1_devs_lock, flags); 205 206 list_for_each_entry(dd, &hfi1_dev_list, list) { 207 nunits++; 208 if ((dd->flags & HFI1_PRESENT) && dd->kregbase) 209 npresent++; 210 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 211 ppd = dd->pport + pidx; 212 if (ppd->lid && ppd->linkup) 213 nup++; 214 } 215 } 216 217 spin_unlock_irqrestore(&hfi1_devs_lock, flags); 218 219 if (npresentp) 220 *npresentp = npresent; 221 if (nupp) 222 *nupp = nup; 223 224 return nunits; 225} 226 227/* 228 * Get address of eager buffer from it's index (allocated in chunks, not 229 * contiguous). 230 */ 231static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, 232 u8 *update) 233{ 234 u32 idx = rhf_egr_index(rhf), offset = rhf_egr_buf_offset(rhf); 235 236 *update |= !(idx & (rcd->egrbufs.threshold - 1)) && !offset; 237 return (void *)(((u64)(rcd->egrbufs.rcvtids[idx].addr)) + 238 (offset * RCV_BUF_BLOCK_SIZE)); 239} 240 241/* 242 * Validate and encode the a given RcvArray Buffer size. 243 * The function will check whether the given size falls within 244 * allowed size ranges for the respective type and, optionally, 245 * return the proper encoding. 246 */ 247inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) 248{ 249 if (unlikely(!IS_ALIGNED(size, PAGE_SIZE))) 250 return 0; 251 if (unlikely(size < MIN_EAGER_BUFFER)) 252 return 0; 253 if (size > 254 (type == PT_EAGER ? MAX_EAGER_BUFFER : MAX_EXPECTED_BUFFER)) 255 return 0; 256 if (encoded) 257 *encoded = ilog2(size / PAGE_SIZE) + 1; 258 return 1; 259} 260 261static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, 262 struct hfi1_packet *packet) 263{ 264 struct hfi1_message_header *rhdr = packet->hdr; 265 u32 rte = rhf_rcv_type_err(packet->rhf); 266 int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; 267 struct hfi1_ibport *ibp = &ppd->ibport_data; 268 269 if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) 270 return; 271 272 if (packet->rhf & RHF_TID_ERR) { 273 /* For TIDERR and RC QPs preemptively schedule a NAK */ 274 struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr; 275 struct hfi1_other_headers *ohdr = NULL; 276 u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ 277 u16 lid = be16_to_cpu(hdr->lrh[1]); 278 u32 qp_num; 279 u32 rcv_flags = 0; 280 281 /* Sanity check packet */ 282 if (tlen < 24) 283 goto drop; 284 285 /* Check for GRH */ 286 if (lnh == HFI1_LRH_BTH) 287 ohdr = &hdr->u.oth; 288 else if (lnh == HFI1_LRH_GRH) { 289 u32 vtf; 290 291 ohdr = &hdr->u.l.oth; 292 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) 293 goto drop; 294 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); 295 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) 296 goto drop; 297 rcv_flags |= HFI1_HAS_GRH; 298 } else 299 goto drop; 300 301 /* Get the destination QP number. */ 302 qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; 303 if (lid < HFI1_MULTICAST_LID_BASE) { 304 struct hfi1_qp *qp; 305 unsigned long flags; 306 307 rcu_read_lock(); 308 qp = hfi1_lookup_qpn(ibp, qp_num); 309 if (!qp) { 310 rcu_read_unlock(); 311 goto drop; 312 } 313 314 /* 315 * Handle only RC QPs - for other QP types drop error 316 * packet. 317 */ 318 spin_lock_irqsave(&qp->r_lock, flags); 319 320 /* Check for valid receive state. */ 321 if (!(ib_hfi1_state_ops[qp->state] & 322 HFI1_PROCESS_RECV_OK)) { 323 ibp->n_pkt_drops++; 324 } 325 326 switch (qp->ibqp.qp_type) { 327 case IB_QPT_RC: 328 hfi1_rc_hdrerr( 329 rcd, 330 hdr, 331 rcv_flags, 332 qp); 333 break; 334 default: 335 /* For now don't handle any other QP types */ 336 break; 337 } 338 339 spin_unlock_irqrestore(&qp->r_lock, flags); 340 rcu_read_unlock(); 341 } /* Unicast QP */ 342 } /* Valid packet with TIDErr */ 343 344 /* handle "RcvTypeErr" flags */ 345 switch (rte) { 346 case RHF_RTE_ERROR_OP_CODE_ERR: 347 { 348 u32 opcode; 349 void *ebuf = NULL; 350 __be32 *bth = NULL; 351 352 if (rhf_use_egr_bfr(packet->rhf)) 353 ebuf = packet->ebuf; 354 355 if (ebuf == NULL) 356 goto drop; /* this should never happen */ 357 358 if (lnh == HFI1_LRH_BTH) 359 bth = (__be32 *)ebuf; 360 else if (lnh == HFI1_LRH_GRH) 361 bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh)); 362 else 363 goto drop; 364 365 opcode = be32_to_cpu(bth[0]) >> 24; 366 opcode &= 0xff; 367 368 if (opcode == IB_OPCODE_CNP) { 369 /* 370 * Only in pre-B0 h/w is the CNP_OPCODE handled 371 * via this code path (errata 291394). 372 */ 373 struct hfi1_qp *qp = NULL; 374 u32 lqpn, rqpn; 375 u16 rlid; 376 u8 svc_type, sl, sc5; 377 378 sc5 = (be16_to_cpu(rhdr->lrh[0]) >> 12) & 0xf; 379 if (rhf_dc_info(packet->rhf)) 380 sc5 |= 0x10; 381 sl = ibp->sc_to_sl[sc5]; 382 383 lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK; 384 rcu_read_lock(); 385 qp = hfi1_lookup_qpn(ibp, lqpn); 386 if (qp == NULL) { 387 rcu_read_unlock(); 388 goto drop; 389 } 390 391 switch (qp->ibqp.qp_type) { 392 case IB_QPT_UD: 393 rlid = 0; 394 rqpn = 0; 395 svc_type = IB_CC_SVCTYPE_UD; 396 break; 397 case IB_QPT_UC: 398 rlid = be16_to_cpu(rhdr->lrh[3]); 399 rqpn = qp->remote_qpn; 400 svc_type = IB_CC_SVCTYPE_UC; 401 break; 402 default: 403 goto drop; 404 } 405 406 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); 407 rcu_read_unlock(); 408 } 409 410 packet->rhf &= ~RHF_RCV_TYPE_ERR_SMASK; 411 break; 412 } 413 default: 414 break; 415 } 416 417drop: 418 return; 419} 420 421static inline void init_packet(struct hfi1_ctxtdata *rcd, 422 struct hfi1_packet *packet) 423{ 424 425 packet->rsize = rcd->rcvhdrqentsize; /* words */ 426 packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */ 427 packet->rcd = rcd; 428 packet->updegr = 0; 429 packet->etail = -1; 430 packet->rhf_addr = get_rhf_addr(rcd); 431 packet->rhf = rhf_to_cpu(packet->rhf_addr); 432 packet->rhqoff = rcd->head; 433 packet->numpkt = 0; 434 packet->rcv_flags = 0; 435} 436 437#ifndef CONFIG_PRESCAN_RXQ 438static void prescan_rxq(struct hfi1_packet *packet) {} 439#else /* CONFIG_PRESCAN_RXQ */ 440static int prescan_receive_queue; 441 442static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr, 443 struct hfi1_other_headers *ohdr, 444 u64 rhf, struct ib_grh *grh) 445{ 446 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 447 u32 bth1; 448 u8 sc5, svc_type; 449 int is_fecn, is_becn; 450 451 switch (qp->ibqp.qp_type) { 452 case IB_QPT_UD: 453 svc_type = IB_CC_SVCTYPE_UD; 454 break; 455 case IB_QPT_UC: /* LATER */ 456 case IB_QPT_RC: /* LATER */ 457 default: 458 return; 459 } 460 461 is_fecn = (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & 462 HFI1_FECN_MASK; 463 is_becn = (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & 464 HFI1_BECN_MASK; 465 466 sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; 467 if (rhf_dc_info(rhf)) 468 sc5 |= 0x10; 469 470 if (is_fecn) { 471 u32 src_qpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK; 472 u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); 473 u16 dlid = be16_to_cpu(hdr->lrh[1]); 474 u16 slid = be16_to_cpu(hdr->lrh[3]); 475 476 return_cnp(ibp, qp, src_qpn, pkey, dlid, slid, sc5, grh); 477 } 478 479 if (is_becn) { 480 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 481 u32 lqpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; 482 u8 sl = ibp->sc_to_sl[sc5]; 483 484 process_becn(ppd, sl, 0, lqpn, 0, svc_type); 485 } 486 487 /* turn off BECN, or FECN */ 488 bth1 = be32_to_cpu(ohdr->bth[1]); 489 bth1 &= ~(HFI1_FECN_MASK << HFI1_FECN_SHIFT); 490 bth1 &= ~(HFI1_BECN_MASK << HFI1_BECN_SHIFT); 491 ohdr->bth[1] = cpu_to_be32(bth1); 492} 493 494struct ps_mdata { 495 struct hfi1_ctxtdata *rcd; 496 u32 rsize; 497 u32 maxcnt; 498 u32 ps_head; 499 u32 ps_tail; 500 u32 ps_seq; 501}; 502 503static inline void init_ps_mdata(struct ps_mdata *mdata, 504 struct hfi1_packet *packet) 505{ 506 struct hfi1_ctxtdata *rcd = packet->rcd; 507 508 mdata->rcd = rcd; 509 mdata->rsize = packet->rsize; 510 mdata->maxcnt = packet->maxcnt; 511 512 if (rcd->ps_state.initialized == 0) { 513 mdata->ps_head = packet->rhqoff; 514 rcd->ps_state.initialized++; 515 } else 516 mdata->ps_head = rcd->ps_state.ps_head; 517 518 if (HFI1_CAP_IS_KSET(DMA_RTAIL)) { 519 mdata->ps_tail = packet->hdrqtail; 520 mdata->ps_seq = 0; /* not used with DMA_RTAIL */ 521 } else { 522 mdata->ps_tail = 0; /* used only with DMA_RTAIL*/ 523 mdata->ps_seq = rcd->seq_cnt; 524 } 525} 526 527static inline int ps_done(struct ps_mdata *mdata, u64 rhf) 528{ 529 if (HFI1_CAP_IS_KSET(DMA_RTAIL)) 530 return mdata->ps_head == mdata->ps_tail; 531 return mdata->ps_seq != rhf_rcv_seq(rhf); 532} 533 534static inline void update_ps_mdata(struct ps_mdata *mdata) 535{ 536 struct hfi1_ctxtdata *rcd = mdata->rcd; 537 538 mdata->ps_head += mdata->rsize; 539 if (mdata->ps_head > mdata->maxcnt) 540 mdata->ps_head = 0; 541 rcd->ps_state.ps_head = mdata->ps_head; 542 if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { 543 if (++mdata->ps_seq > 13) 544 mdata->ps_seq = 1; 545 } 546} 547 548/* 549 * prescan_rxq - search through the receive queue looking for packets 550 * containing Excplicit Congestion Notifications (FECNs, or BECNs). 551 * When an ECN is found, process the Congestion Notification, and toggle 552 * it off. 553 */ 554static void prescan_rxq(struct hfi1_packet *packet) 555{ 556 struct hfi1_ctxtdata *rcd = packet->rcd; 557 struct ps_mdata mdata; 558 559 if (!prescan_receive_queue) 560 return; 561 562 init_ps_mdata(&mdata, packet); 563 564 while (1) { 565 struct hfi1_devdata *dd = rcd->dd; 566 struct hfi1_ibport *ibp = &rcd->ppd->ibport_data; 567 __le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head + 568 dd->rhf_offset; 569 struct hfi1_qp *qp; 570 struct hfi1_ib_header *hdr; 571 struct hfi1_other_headers *ohdr; 572 struct ib_grh *grh = NULL; 573 u64 rhf = rhf_to_cpu(rhf_addr); 574 u32 etype = rhf_rcv_type(rhf), qpn; 575 int is_ecn = 0; 576 u8 lnh; 577 578 if (ps_done(&mdata, rhf)) 579 break; 580 581 if (etype != RHF_RCV_TYPE_IB) 582 goto next; 583 584 hdr = (struct hfi1_ib_header *) 585 hfi1_get_msgheader(dd, rhf_addr); 586 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 587 588 if (lnh == HFI1_LRH_BTH) 589 ohdr = &hdr->u.oth; 590 else if (lnh == HFI1_LRH_GRH) { 591 ohdr = &hdr->u.l.oth; 592 grh = &hdr->u.l.grh; 593 } else 594 goto next; /* just in case */ 595 596 is_ecn |= be32_to_cpu(ohdr->bth[1]) & 597 (HFI1_FECN_MASK << HFI1_FECN_SHIFT); 598 is_ecn |= be32_to_cpu(ohdr->bth[1]) & 599 (HFI1_BECN_MASK << HFI1_BECN_SHIFT); 600 601 if (!is_ecn) 602 goto next; 603 604 qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; 605 rcu_read_lock(); 606 qp = hfi1_lookup_qpn(ibp, qpn); 607 608 if (qp == NULL) { 609 rcu_read_unlock(); 610 goto next; 611 } 612 613 process_ecn(qp, hdr, ohdr, rhf, grh); 614 rcu_read_unlock(); 615next: 616 update_ps_mdata(&mdata); 617 } 618} 619#endif /* CONFIG_PRESCAN_RXQ */ 620 621static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) 622{ 623 int ret = RCV_PKT_OK; 624 625 packet->hdr = hfi1_get_msgheader(packet->rcd->dd, 626 packet->rhf_addr); 627 packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; 628 packet->etype = rhf_rcv_type(packet->rhf); 629 /* total length */ 630 packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ 631 /* retrieve eager buffer details */ 632 packet->ebuf = NULL; 633 if (rhf_use_egr_bfr(packet->rhf)) { 634 packet->etail = rhf_egr_index(packet->rhf); 635 packet->ebuf = get_egrbuf(packet->rcd, packet->rhf, 636 &packet->updegr); 637 /* 638 * Prefetch the contents of the eager buffer. It is 639 * OK to send a negative length to prefetch_range(). 640 * The +2 is the size of the RHF. 641 */ 642 prefetch_range(packet->ebuf, 643 packet->tlen - ((packet->rcd->rcvhdrqentsize - 644 (rhf_hdrq_offset(packet->rhf)+2)) * 4)); 645 } 646 647 /* 648 * Call a type specific handler for the packet. We 649 * should be able to trust that etype won't be beyond 650 * the range of valid indexes. If so something is really 651 * wrong and we can probably just let things come 652 * crashing down. There is no need to eat another 653 * comparison in this performance critical code. 654 */ 655 packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); 656 packet->numpkt++; 657 658 /* Set up for the next packet */ 659 packet->rhqoff += packet->rsize; 660 if (packet->rhqoff >= packet->maxcnt) 661 packet->rhqoff = 0; 662 663 if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) { 664 if (thread) { 665 cond_resched(); 666 } else { 667 ret = RCV_PKT_LIMIT; 668 this_cpu_inc(*packet->rcd->dd->rcv_limit); 669 } 670 } 671 672 packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff + 673 packet->rcd->dd->rhf_offset; 674 packet->rhf = rhf_to_cpu(packet->rhf_addr); 675 676 return ret; 677} 678 679static inline void process_rcv_update(int last, struct hfi1_packet *packet) 680{ 681 /* 682 * Update head regs etc., every 16 packets, if not last pkt, 683 * to help prevent rcvhdrq overflows, when many packets 684 * are processed and queue is nearly full. 685 * Don't request an interrupt for intermediate updates. 686 */ 687 if (!last && !(packet->numpkt & 0xf)) { 688 update_usrhead(packet->rcd, packet->rhqoff, packet->updegr, 689 packet->etail, 0, 0); 690 packet->updegr = 0; 691 } 692 packet->rcv_flags = 0; 693} 694 695static inline void finish_packet(struct hfi1_packet *packet) 696{ 697 698 /* 699 * Nothing we need to free for the packet. 700 * 701 * The only thing we need to do is a final update and call for an 702 * interrupt 703 */ 704 update_usrhead(packet->rcd, packet->rcd->head, packet->updegr, 705 packet->etail, rcv_intr_dynamic, packet->numpkt); 706 707} 708 709static inline void process_rcv_qp_work(struct hfi1_packet *packet) 710{ 711 712 struct hfi1_ctxtdata *rcd; 713 struct hfi1_qp *qp, *nqp; 714 715 rcd = packet->rcd; 716 rcd->head = packet->rhqoff; 717 718 /* 719 * Iterate over all QPs waiting to respond. 720 * The list won't change since the IRQ is only run on one CPU. 721 */ 722 list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { 723 list_del_init(&qp->rspwait); 724 if (qp->r_flags & HFI1_R_RSP_NAK) { 725 qp->r_flags &= ~HFI1_R_RSP_NAK; 726 hfi1_send_rc_ack(rcd, qp, 0); 727 } 728 if (qp->r_flags & HFI1_R_RSP_SEND) { 729 unsigned long flags; 730 731 qp->r_flags &= ~HFI1_R_RSP_SEND; 732 spin_lock_irqsave(&qp->s_lock, flags); 733 if (ib_hfi1_state_ops[qp->state] & 734 HFI1_PROCESS_OR_FLUSH_SEND) 735 hfi1_schedule_send(qp); 736 spin_unlock_irqrestore(&qp->s_lock, flags); 737 } 738 if (atomic_dec_and_test(&qp->refcount)) 739 wake_up(&qp->wait); 740 } 741} 742 743/* 744 * Handle receive interrupts when using the no dma rtail option. 745 */ 746int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) 747{ 748 u32 seq; 749 int last = RCV_PKT_OK; 750 struct hfi1_packet packet; 751 752 init_packet(rcd, &packet); 753 seq = rhf_rcv_seq(packet.rhf); 754 if (seq != rcd->seq_cnt) { 755 last = RCV_PKT_DONE; 756 goto bail; 757 } 758 759 prescan_rxq(&packet); 760 761 while (last == RCV_PKT_OK) { 762 last = process_rcv_packet(&packet, thread); 763 seq = rhf_rcv_seq(packet.rhf); 764 if (++rcd->seq_cnt > 13) 765 rcd->seq_cnt = 1; 766 if (seq != rcd->seq_cnt) 767 last = RCV_PKT_DONE; 768 process_rcv_update(last, &packet); 769 } 770 process_rcv_qp_work(&packet); 771bail: 772 finish_packet(&packet); 773 return last; 774} 775 776int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) 777{ 778 u32 hdrqtail; 779 int last = RCV_PKT_OK; 780 struct hfi1_packet packet; 781 782 init_packet(rcd, &packet); 783 hdrqtail = get_rcvhdrtail(rcd); 784 if (packet.rhqoff == hdrqtail) { 785 last = RCV_PKT_DONE; 786 goto bail; 787 } 788 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ 789 790 prescan_rxq(&packet); 791 792 while (last == RCV_PKT_OK) { 793 last = process_rcv_packet(&packet, thread); 794 hdrqtail = get_rcvhdrtail(rcd); 795 if (packet.rhqoff == hdrqtail) 796 last = RCV_PKT_DONE; 797 process_rcv_update(last, &packet); 798 } 799 process_rcv_qp_work(&packet); 800bail: 801 finish_packet(&packet); 802 return last; 803} 804 805static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) 806{ 807 int i; 808 809 for (i = 0; i < dd->first_user_ctxt; i++) 810 dd->rcd[i]->do_interrupt = 811 &handle_receive_interrupt_nodma_rtail; 812} 813 814static inline void set_all_dma_rtail(struct hfi1_devdata *dd) 815{ 816 int i; 817 818 for (i = 0; i < dd->first_user_ctxt; i++) 819 dd->rcd[i]->do_interrupt = 820 &handle_receive_interrupt_dma_rtail; 821} 822 823/* 824 * handle_receive_interrupt - receive a packet 825 * @rcd: the context 826 * 827 * Called from interrupt handler for errors or receive interrupt. 828 * This is the slow path interrupt handler. 829 */ 830int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) 831{ 832 struct hfi1_devdata *dd = rcd->dd; 833 u32 hdrqtail; 834 int last = RCV_PKT_OK, needset = 1; 835 struct hfi1_packet packet; 836 837 init_packet(rcd, &packet); 838 839 if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { 840 u32 seq = rhf_rcv_seq(packet.rhf); 841 842 if (seq != rcd->seq_cnt) { 843 last = RCV_PKT_DONE; 844 goto bail; 845 } 846 hdrqtail = 0; 847 } else { 848 hdrqtail = get_rcvhdrtail(rcd); 849 if (packet.rhqoff == hdrqtail) { 850 last = RCV_PKT_DONE; 851 goto bail; 852 } 853 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ 854 } 855 856 prescan_rxq(&packet); 857 858 while (last == RCV_PKT_OK) { 859 860 if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet, 861 DROP_PACKET_OFF) == DROP_PACKET_ON)) { 862 dd->do_drop = 0; 863 864 /* On to the next packet */ 865 packet.rhqoff += packet.rsize; 866 packet.rhf_addr = (__le32 *) rcd->rcvhdrq + 867 packet.rhqoff + 868 dd->rhf_offset; 869 packet.rhf = rhf_to_cpu(packet.rhf_addr); 870 871 } else { 872 last = process_rcv_packet(&packet, thread); 873 } 874 875 if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { 876 u32 seq = rhf_rcv_seq(packet.rhf); 877 878 if (++rcd->seq_cnt > 13) 879 rcd->seq_cnt = 1; 880 if (seq != rcd->seq_cnt) 881 last = RCV_PKT_DONE; 882 if (needset) { 883 dd_dev_info(dd, 884 "Switching to NO_DMA_RTAIL\n"); 885 set_all_nodma_rtail(dd); 886 needset = 0; 887 } 888 } else { 889 if (packet.rhqoff == hdrqtail) 890 last = RCV_PKT_DONE; 891 if (needset) { 892 dd_dev_info(dd, 893 "Switching to DMA_RTAIL\n"); 894 set_all_dma_rtail(dd); 895 needset = 0; 896 } 897 } 898 899 process_rcv_update(last, &packet); 900 } 901 902 process_rcv_qp_work(&packet); 903 904bail: 905 /* 906 * Always write head at end, and setup rcv interrupt, even 907 * if no packets were processed. 908 */ 909 finish_packet(&packet); 910 return last; 911} 912 913/* 914 * Convert a given MTU size to the on-wire MAD packet enumeration. 915 * Return -1 if the size is invalid. 916 */ 917int mtu_to_enum(u32 mtu, int default_if_bad) 918{ 919 switch (mtu) { 920 case 0: return OPA_MTU_0; 921 case 256: return OPA_MTU_256; 922 case 512: return OPA_MTU_512; 923 case 1024: return OPA_MTU_1024; 924 case 2048: return OPA_MTU_2048; 925 case 4096: return OPA_MTU_4096; 926 case 8192: return OPA_MTU_8192; 927 case 10240: return OPA_MTU_10240; 928 } 929 return default_if_bad; 930} 931 932u16 enum_to_mtu(int mtu) 933{ 934 switch (mtu) { 935 case OPA_MTU_0: return 0; 936 case OPA_MTU_256: return 256; 937 case OPA_MTU_512: return 512; 938 case OPA_MTU_1024: return 1024; 939 case OPA_MTU_2048: return 2048; 940 case OPA_MTU_4096: return 4096; 941 case OPA_MTU_8192: return 8192; 942 case OPA_MTU_10240: return 10240; 943 default: return 0xffff; 944 } 945} 946 947/* 948 * set_mtu - set the MTU 949 * @ppd: the per port data 950 * 951 * We can handle "any" incoming size, the issue here is whether we 952 * need to restrict our outgoing size. We do not deal with what happens 953 * to programs that are already running when the size changes. 954 */ 955int set_mtu(struct hfi1_pportdata *ppd) 956{ 957 struct hfi1_devdata *dd = ppd->dd; 958 int i, drain, ret = 0, is_up = 0; 959 960 ppd->ibmtu = 0; 961 for (i = 0; i < ppd->vls_supported; i++) 962 if (ppd->ibmtu < dd->vld[i].mtu) 963 ppd->ibmtu = dd->vld[i].mtu; 964 ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd); 965 966 mutex_lock(&ppd->hls_lock); 967 if (ppd->host_link_state == HLS_UP_INIT 968 || ppd->host_link_state == HLS_UP_ARMED 969 || ppd->host_link_state == HLS_UP_ACTIVE) 970 is_up = 1; 971 972 drain = !is_ax(dd) && is_up; 973 974 if (drain) 975 /* 976 * MTU is specified per-VL. To ensure that no packet gets 977 * stuck (due, e.g., to the MTU for the packet's VL being 978 * reduced), empty the per-VL FIFOs before adjusting MTU. 979 */ 980 ret = stop_drain_data_vls(dd); 981 982 if (ret) { 983 dd_dev_err(dd, "%s: cannot stop/drain VLs - refusing to change per-VL MTUs\n", 984 __func__); 985 goto err; 986 } 987 988 hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_MTU, 0); 989 990 if (drain) 991 open_fill_data_vls(dd); /* reopen all VLs */ 992 993err: 994 mutex_unlock(&ppd->hls_lock); 995 996 return ret; 997} 998 999int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) 1000{ 1001 struct hfi1_devdata *dd = ppd->dd; 1002 1003 ppd->lid = lid; 1004 ppd->lmc = lmc; 1005 hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); 1006 1007 dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid); 1008 1009 return 0; 1010} 1011 1012/* 1013 * Following deal with the "obviously simple" task of overriding the state 1014 * of the LEDs, which normally indicate link physical and logical status. 1015 * The complications arise in dealing with different hardware mappings 1016 * and the board-dependent routine being called from interrupts. 1017 * and then there's the requirement to _flash_ them. 1018 */ 1019#define LED_OVER_FREQ_SHIFT 8 1020#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) 1021/* Below is "non-zero" to force override, but both actual LEDs are off */ 1022#define LED_OVER_BOTH_OFF (8) 1023 1024static void run_led_override(unsigned long opaque) 1025{ 1026 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque; 1027 struct hfi1_devdata *dd = ppd->dd; 1028 int timeoff; 1029 int ph_idx; 1030 1031 if (!(dd->flags & HFI1_INITTED)) 1032 return; 1033 1034 ph_idx = ppd->led_override_phase++ & 1; 1035 ppd->led_override = ppd->led_override_vals[ph_idx]; 1036 timeoff = ppd->led_override_timeoff; 1037 1038 /* 1039 * don't re-fire the timer if user asked for it to be off; we let 1040 * it fire one more time after they turn it off to simplify 1041 */ 1042 if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) 1043 mod_timer(&ppd->led_override_timer, jiffies + timeoff); 1044} 1045 1046void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val) 1047{ 1048 struct hfi1_devdata *dd = ppd->dd; 1049 int timeoff, freq; 1050 1051 if (!(dd->flags & HFI1_INITTED)) 1052 return; 1053 1054 /* First check if we are blinking. If not, use 1HZ polling */ 1055 timeoff = HZ; 1056 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; 1057 1058 if (freq) { 1059 /* For blink, set each phase from one nybble of val */ 1060 ppd->led_override_vals[0] = val & 0xF; 1061 ppd->led_override_vals[1] = (val >> 4) & 0xF; 1062 timeoff = (HZ << 4)/freq; 1063 } else { 1064 /* Non-blink set both phases the same. */ 1065 ppd->led_override_vals[0] = val & 0xF; 1066 ppd->led_override_vals[1] = val & 0xF; 1067 } 1068 ppd->led_override_timeoff = timeoff; 1069 1070 /* 1071 * If the timer has not already been started, do so. Use a "quick" 1072 * timeout so the function will be called soon, to look at our request. 1073 */ 1074 if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { 1075 /* Need to start timer */ 1076 setup_timer(&ppd->led_override_timer, run_led_override, 1077 (unsigned long)ppd); 1078 1079 ppd->led_override_timer.expires = jiffies + 1; 1080 add_timer(&ppd->led_override_timer); 1081 } else { 1082 if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) 1083 mod_timer(&ppd->led_override_timer, jiffies + 1); 1084 atomic_dec(&ppd->led_override_timer_active); 1085 } 1086} 1087 1088/** 1089 * hfi1_reset_device - reset the chip if possible 1090 * @unit: the device to reset 1091 * 1092 * Whether or not reset is successful, we attempt to re-initialize the chip 1093 * (that is, much like a driver unload/reload). We clear the INITTED flag 1094 * so that the various entry points will fail until we reinitialize. For 1095 * now, we only allow this if no user contexts are open that use chip resources 1096 */ 1097int hfi1_reset_device(int unit) 1098{ 1099 int ret, i; 1100 struct hfi1_devdata *dd = hfi1_lookup(unit); 1101 struct hfi1_pportdata *ppd; 1102 unsigned long flags; 1103 int pidx; 1104 1105 if (!dd) { 1106 ret = -ENODEV; 1107 goto bail; 1108 } 1109 1110 dd_dev_info(dd, "Reset on unit %u requested\n", unit); 1111 1112 if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) { 1113 dd_dev_info(dd, 1114 "Invalid unit number %u or not initialized or not present\n", 1115 unit); 1116 ret = -ENXIO; 1117 goto bail; 1118 } 1119 1120 spin_lock_irqsave(&dd->uctxt_lock, flags); 1121 if (dd->rcd) 1122 for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { 1123 if (!dd->rcd[i] || !dd->rcd[i]->cnt) 1124 continue; 1125 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1126 ret = -EBUSY; 1127 goto bail; 1128 } 1129 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1130 1131 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1132 ppd = dd->pport + pidx; 1133 if (atomic_read(&ppd->led_override_timer_active)) { 1134 /* Need to stop LED timer, _then_ shut off LEDs */ 1135 del_timer_sync(&ppd->led_override_timer); 1136 atomic_set(&ppd->led_override_timer_active, 0); 1137 } 1138 1139 /* Shut off LEDs after we are sure timer is not running */ 1140 ppd->led_override = LED_OVER_BOTH_OFF; 1141 } 1142 if (dd->flags & HFI1_HAS_SEND_DMA) 1143 sdma_exit(dd); 1144 1145 hfi1_reset_cpu_counters(dd); 1146 1147 ret = hfi1_init(dd, 1); 1148 1149 if (ret) 1150 dd_dev_err(dd, 1151 "Reinitialize unit %u after reset failed with %d\n", 1152 unit, ret); 1153 else 1154 dd_dev_info(dd, "Reinitialized unit %u after resetting\n", 1155 unit); 1156 1157bail: 1158 return ret; 1159} 1160 1161void handle_eflags(struct hfi1_packet *packet) 1162{ 1163 struct hfi1_ctxtdata *rcd = packet->rcd; 1164 u32 rte = rhf_rcv_type_err(packet->rhf); 1165 1166 dd_dev_err(rcd->dd, 1167 "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", 1168 rcd->ctxt, packet->rhf, 1169 packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", 1170 packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", 1171 packet->rhf & RHF_DC_ERR ? "dc " : "", 1172 packet->rhf & RHF_TID_ERR ? "tid " : "", 1173 packet->rhf & RHF_LEN_ERR ? "len " : "", 1174 packet->rhf & RHF_ECC_ERR ? "ecc " : "", 1175 packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", 1176 packet->rhf & RHF_ICRC_ERR ? "icrc " : "", 1177 rte); 1178 1179 rcv_hdrerr(rcd, rcd->ppd, packet); 1180} 1181 1182/* 1183 * The following functions are called by the interrupt handler. They are type 1184 * specific handlers for each packet type. 1185 */ 1186int process_receive_ib(struct hfi1_packet *packet) 1187{ 1188 trace_hfi1_rcvhdr(packet->rcd->ppd->dd, 1189 packet->rcd->ctxt, 1190 rhf_err_flags(packet->rhf), 1191 RHF_RCV_TYPE_IB, 1192 packet->hlen, 1193 packet->tlen, 1194 packet->updegr, 1195 rhf_egr_index(packet->rhf)); 1196 1197 if (unlikely(rhf_err_flags(packet->rhf))) { 1198 handle_eflags(packet); 1199 return RHF_RCV_CONTINUE; 1200 } 1201 1202 hfi1_ib_rcv(packet); 1203 return RHF_RCV_CONTINUE; 1204} 1205 1206int process_receive_bypass(struct hfi1_packet *packet) 1207{ 1208 if (unlikely(rhf_err_flags(packet->rhf))) 1209 handle_eflags(packet); 1210 1211 dd_dev_err(packet->rcd->dd, 1212 "Bypass packets are not supported in normal operation. Dropping\n"); 1213 return RHF_RCV_CONTINUE; 1214} 1215 1216int process_receive_error(struct hfi1_packet *packet) 1217{ 1218 handle_eflags(packet); 1219 1220 if (unlikely(rhf_err_flags(packet->rhf))) 1221 dd_dev_err(packet->rcd->dd, 1222 "Unhandled error packet received. Dropping.\n"); 1223 1224 return RHF_RCV_CONTINUE; 1225} 1226 1227int kdeth_process_expected(struct hfi1_packet *packet) 1228{ 1229 if (unlikely(rhf_err_flags(packet->rhf))) 1230 handle_eflags(packet); 1231 1232 dd_dev_err(packet->rcd->dd, 1233 "Unhandled expected packet received. Dropping.\n"); 1234 return RHF_RCV_CONTINUE; 1235} 1236 1237int kdeth_process_eager(struct hfi1_packet *packet) 1238{ 1239 if (unlikely(rhf_err_flags(packet->rhf))) 1240 handle_eflags(packet); 1241 1242 dd_dev_err(packet->rcd->dd, 1243 "Unhandled eager packet received. Dropping.\n"); 1244 return RHF_RCV_CONTINUE; 1245} 1246 1247int process_receive_invalid(struct hfi1_packet *packet) 1248{ 1249 dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", 1250 rhf_rcv_type(packet->rhf)); 1251 return RHF_RCV_CONTINUE; 1252} 1253