1/******************************************************************************* 2 3 Intel 10 Gigabit PCI Express Linux driver 4 Copyright(c) 1999 - 2013 Intel Corporation. 5 6 This program is free software; you can redistribute it and/or modify it 7 under the terms and conditions of the GNU General Public License, 8 version 2, as published by the Free Software Foundation. 9 10 This program is distributed in the hope it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 more details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 18 19 The full GNU General Public License is included in this distribution in 20 the file called "COPYING". 21 22 Contact Information: 23 Linux NICS <linux.nics@intel.com> 24 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 25 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 26 27*******************************************************************************/ 28 29#include "ixgbe.h" 30#include "ixgbe_sriov.h" 31 32#ifdef CONFIG_IXGBE_DCB 33/** 34 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV 35 * @adapter: board private structure to initialize 36 * 37 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It 38 * will also try to cache the proper offsets if RSS/FCoE are enabled along 39 * with VMDq. 40 * 41 **/ 42static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter) 43{ 44#ifdef IXGBE_FCOE 45 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE]; 46#endif /* IXGBE_FCOE */ 47 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; 48 int i; 49 u16 reg_idx; 50 u8 tcs = netdev_get_num_tc(adapter->netdev); 51 52 /* verify we have DCB queueing enabled before proceeding */ 53 if (tcs <= 1) 54 return false; 55 56 /* verify we have VMDq enabled before proceeding */ 57 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) 58 return false; 59 60 /* start at VMDq register offset for SR-IOV enabled setups */ 61 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 62 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) { 63 /* If we are greater than indices move to next pool */ 64 if ((reg_idx & ~vmdq->mask) >= tcs) 65 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 66 adapter->rx_ring[i]->reg_idx = reg_idx; 67 } 68 69 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 70 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) { 71 /* If we are greater than indices move to next pool */ 72 if ((reg_idx & ~vmdq->mask) >= tcs) 73 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 74 adapter->tx_ring[i]->reg_idx = reg_idx; 75 } 76 77#ifdef IXGBE_FCOE 78 /* nothing to do if FCoE is disabled */ 79 if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) 80 return true; 81 82 /* The work is already done if the FCoE ring is shared */ 83 if (fcoe->offset < tcs) 84 return true; 85 86 /* The FCoE rings exist separately, we need to move their reg_idx */ 87 if (fcoe->indices) { 88 u16 queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask); 89 u8 fcoe_tc = ixgbe_fcoe_get_tc(adapter); 90 91 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool; 92 for (i = fcoe->offset; i < adapter->num_rx_queues; i++) { 93 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc; 94 adapter->rx_ring[i]->reg_idx = reg_idx; 95 reg_idx++; 96 } 97 98 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool; 99 for (i = fcoe->offset; i < adapter->num_tx_queues; i++) { 100 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc; 101 adapter->tx_ring[i]->reg_idx = reg_idx; 102 reg_idx++; 103 } 104 } 105 106#endif /* IXGBE_FCOE */ 107 return true; 108} 109 110/* ixgbe_get_first_reg_idx - Return first register index associated with ring */ 111static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc, 112 unsigned int *tx, unsigned int *rx) 113{ 114 struct net_device *dev = adapter->netdev; 115 struct ixgbe_hw *hw = &adapter->hw; 116 u8 num_tcs = netdev_get_num_tc(dev); 117 118 *tx = 0; 119 *rx = 0; 120 121 switch (hw->mac.type) { 122 case ixgbe_mac_82598EB: 123 /* TxQs/TC: 4 RxQs/TC: 8 */ 124 *tx = tc << 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */ 125 *rx = tc << 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */ 126 break; 127 case ixgbe_mac_82599EB: 128 case ixgbe_mac_X540: 129 case ixgbe_mac_X550: 130 case ixgbe_mac_X550EM_x: 131 if (num_tcs > 4) { 132 /* 133 * TCs : TC0/1 TC2/3 TC4-7 134 * TxQs/TC: 32 16 8 135 * RxQs/TC: 16 16 16 136 */ 137 *rx = tc << 4; 138 if (tc < 3) 139 *tx = tc << 5; /* 0, 32, 64 */ 140 else if (tc < 5) 141 *tx = (tc + 2) << 4; /* 80, 96 */ 142 else 143 *tx = (tc + 8) << 3; /* 104, 112, 120 */ 144 } else { 145 /* 146 * TCs : TC0 TC1 TC2/3 147 * TxQs/TC: 64 32 16 148 * RxQs/TC: 32 32 32 149 */ 150 *rx = tc << 5; 151 if (tc < 2) 152 *tx = tc << 6; /* 0, 64 */ 153 else 154 *tx = (tc + 4) << 4; /* 96, 112 */ 155 } 156 default: 157 break; 158 } 159} 160 161/** 162 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB 163 * @adapter: board private structure to initialize 164 * 165 * Cache the descriptor ring offsets for DCB to the assigned rings. 166 * 167 **/ 168static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter) 169{ 170 struct net_device *dev = adapter->netdev; 171 unsigned int tx_idx, rx_idx; 172 int tc, offset, rss_i, i; 173 u8 num_tcs = netdev_get_num_tc(dev); 174 175 /* verify we have DCB queueing enabled before proceeding */ 176 if (num_tcs <= 1) 177 return false; 178 179 rss_i = adapter->ring_feature[RING_F_RSS].indices; 180 181 for (tc = 0, offset = 0; tc < num_tcs; tc++, offset += rss_i) { 182 ixgbe_get_first_reg_idx(adapter, tc, &tx_idx, &rx_idx); 183 for (i = 0; i < rss_i; i++, tx_idx++, rx_idx++) { 184 adapter->tx_ring[offset + i]->reg_idx = tx_idx; 185 adapter->rx_ring[offset + i]->reg_idx = rx_idx; 186 adapter->tx_ring[offset + i]->dcb_tc = tc; 187 adapter->rx_ring[offset + i]->dcb_tc = tc; 188 } 189 } 190 191 return true; 192} 193 194#endif 195/** 196 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov 197 * @adapter: board private structure to initialize 198 * 199 * SR-IOV doesn't use any descriptor rings but changes the default if 200 * no other mapping is used. 201 * 202 */ 203static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter) 204{ 205#ifdef IXGBE_FCOE 206 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE]; 207#endif /* IXGBE_FCOE */ 208 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; 209 struct ixgbe_ring_feature *rss = &adapter->ring_feature[RING_F_RSS]; 210 int i; 211 u16 reg_idx; 212 213 /* only proceed if VMDq is enabled */ 214 if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)) 215 return false; 216 217 /* start at VMDq register offset for SR-IOV enabled setups */ 218 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 219 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) { 220#ifdef IXGBE_FCOE 221 /* Allow first FCoE queue to be mapped as RSS */ 222 if (fcoe->offset && (i > fcoe->offset)) 223 break; 224#endif 225 /* If we are greater than indices move to next pool */ 226 if ((reg_idx & ~vmdq->mask) >= rss->indices) 227 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 228 adapter->rx_ring[i]->reg_idx = reg_idx; 229 } 230 231#ifdef IXGBE_FCOE 232 /* FCoE uses a linear block of queues so just assigning 1:1 */ 233 for (; i < adapter->num_rx_queues; i++, reg_idx++) 234 adapter->rx_ring[i]->reg_idx = reg_idx; 235 236#endif 237 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); 238 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) { 239#ifdef IXGBE_FCOE 240 /* Allow first FCoE queue to be mapped as RSS */ 241 if (fcoe->offset && (i > fcoe->offset)) 242 break; 243#endif 244 /* If we are greater than indices move to next pool */ 245 if ((reg_idx & rss->mask) >= rss->indices) 246 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); 247 adapter->tx_ring[i]->reg_idx = reg_idx; 248 } 249 250#ifdef IXGBE_FCOE 251 /* FCoE uses a linear block of queues so just assigning 1:1 */ 252 for (; i < adapter->num_tx_queues; i++, reg_idx++) 253 adapter->tx_ring[i]->reg_idx = reg_idx; 254 255#endif 256 257 return true; 258} 259 260/** 261 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS 262 * @adapter: board private structure to initialize 263 * 264 * Cache the descriptor ring offsets for RSS to the assigned rings. 265 * 266 **/ 267static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter) 268{ 269 int i; 270 271 for (i = 0; i < adapter->num_rx_queues; i++) 272 adapter->rx_ring[i]->reg_idx = i; 273 for (i = 0; i < adapter->num_tx_queues; i++) 274 adapter->tx_ring[i]->reg_idx = i; 275 276 return true; 277} 278 279/** 280 * ixgbe_cache_ring_register - Descriptor ring to register mapping 281 * @adapter: board private structure to initialize 282 * 283 * Once we know the feature-set enabled for the device, we'll cache 284 * the register offset the descriptor ring is assigned to. 285 * 286 * Note, the order the various feature calls is important. It must start with 287 * the "most" features enabled at the same time, then trickle down to the 288 * least amount of features turned on at once. 289 **/ 290static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) 291{ 292 /* start with default case */ 293 adapter->rx_ring[0]->reg_idx = 0; 294 adapter->tx_ring[0]->reg_idx = 0; 295 296#ifdef CONFIG_IXGBE_DCB 297 if (ixgbe_cache_ring_dcb_sriov(adapter)) 298 return; 299 300 if (ixgbe_cache_ring_dcb(adapter)) 301 return; 302 303#endif 304 if (ixgbe_cache_ring_sriov(adapter)) 305 return; 306 307 ixgbe_cache_ring_rss(adapter); 308} 309 310#define IXGBE_RSS_16Q_MASK 0xF 311#define IXGBE_RSS_8Q_MASK 0x7 312#define IXGBE_RSS_4Q_MASK 0x3 313#define IXGBE_RSS_2Q_MASK 0x1 314#define IXGBE_RSS_DISABLED_MASK 0x0 315 316#ifdef CONFIG_IXGBE_DCB 317/** 318 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB 319 * @adapter: board private structure to initialize 320 * 321 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues 322 * and VM pools where appropriate. Also assign queues based on DCB 323 * priorities and map accordingly.. 324 * 325 **/ 326static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter) 327{ 328 int i; 329 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit; 330 u16 vmdq_m = 0; 331#ifdef IXGBE_FCOE 332 u16 fcoe_i = 0; 333#endif 334 u8 tcs = netdev_get_num_tc(adapter->netdev); 335 336 /* verify we have DCB queueing enabled before proceeding */ 337 if (tcs <= 1) 338 return false; 339 340 /* verify we have VMDq enabled before proceeding */ 341 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) 342 return false; 343 344 /* Add starting offset to total pool count */ 345 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset; 346 347 /* 16 pools w/ 8 TC per pool */ 348 if (tcs > 4) { 349 vmdq_i = min_t(u16, vmdq_i, 16); 350 vmdq_m = IXGBE_82599_VMDQ_8Q_MASK; 351 /* 32 pools w/ 4 TC per pool */ 352 } else { 353 vmdq_i = min_t(u16, vmdq_i, 32); 354 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; 355 } 356 357#ifdef IXGBE_FCOE 358 /* queues in the remaining pools are available for FCoE */ 359 fcoe_i = (128 / __ALIGN_MASK(1, ~vmdq_m)) - vmdq_i; 360 361#endif 362 /* remove the starting offset from the pool count */ 363 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset; 364 365 /* save features for later use */ 366 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i; 367 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; 368 369 /* 370 * We do not support DCB, VMDq, and RSS all simultaneously 371 * so we will disable RSS since it is the lowest priority 372 */ 373 adapter->ring_feature[RING_F_RSS].indices = 1; 374 adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK; 375 376 /* disable ATR as it is not supported when VMDq is enabled */ 377 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 378 379 adapter->num_rx_pools = vmdq_i; 380 adapter->num_rx_queues_per_pool = tcs; 381 382 adapter->num_tx_queues = vmdq_i * tcs; 383 adapter->num_rx_queues = vmdq_i * tcs; 384 385#ifdef IXGBE_FCOE 386 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 387 struct ixgbe_ring_feature *fcoe; 388 389 fcoe = &adapter->ring_feature[RING_F_FCOE]; 390 391 /* limit ourselves based on feature limits */ 392 fcoe_i = min_t(u16, fcoe_i, fcoe->limit); 393 394 if (fcoe_i) { 395 /* alloc queues for FCoE separately */ 396 fcoe->indices = fcoe_i; 397 fcoe->offset = vmdq_i * tcs; 398 399 /* add queues to adapter */ 400 adapter->num_tx_queues += fcoe_i; 401 adapter->num_rx_queues += fcoe_i; 402 } else if (tcs > 1) { 403 /* use queue belonging to FcoE TC */ 404 fcoe->indices = 1; 405 fcoe->offset = ixgbe_fcoe_get_tc(adapter); 406 } else { 407 adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; 408 409 fcoe->indices = 0; 410 fcoe->offset = 0; 411 } 412 } 413 414#endif /* IXGBE_FCOE */ 415 /* configure TC to queue mapping */ 416 for (i = 0; i < tcs; i++) 417 netdev_set_tc_queue(adapter->netdev, i, 1, i); 418 419 return true; 420} 421 422static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) 423{ 424 struct net_device *dev = adapter->netdev; 425 struct ixgbe_ring_feature *f; 426 int rss_i, rss_m, i; 427 int tcs; 428 429 /* Map queue offset and counts onto allocated tx queues */ 430 tcs = netdev_get_num_tc(dev); 431 432 /* verify we have DCB queueing enabled before proceeding */ 433 if (tcs <= 1) 434 return false; 435 436 /* determine the upper limit for our current DCB mode */ 437 rss_i = dev->num_tx_queues / tcs; 438 if (adapter->hw.mac.type == ixgbe_mac_82598EB) { 439 /* 8 TC w/ 4 queues per TC */ 440 rss_i = min_t(u16, rss_i, 4); 441 rss_m = IXGBE_RSS_4Q_MASK; 442 } else if (tcs > 4) { 443 /* 8 TC w/ 8 queues per TC */ 444 rss_i = min_t(u16, rss_i, 8); 445 rss_m = IXGBE_RSS_8Q_MASK; 446 } else { 447 /* 4 TC w/ 16 queues per TC */ 448 rss_i = min_t(u16, rss_i, 16); 449 rss_m = IXGBE_RSS_16Q_MASK; 450 } 451 452 /* set RSS mask and indices */ 453 f = &adapter->ring_feature[RING_F_RSS]; 454 rss_i = min_t(int, rss_i, f->limit); 455 f->indices = rss_i; 456 f->mask = rss_m; 457 458 /* disable ATR as it is not supported when multiple TCs are enabled */ 459 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 460 461#ifdef IXGBE_FCOE 462 /* FCoE enabled queues require special configuration indexed 463 * by feature specific indices and offset. Here we map FCoE 464 * indices onto the DCB queue pairs allowing FCoE to own 465 * configuration later. 466 */ 467 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 468 u8 tc = ixgbe_fcoe_get_tc(adapter); 469 470 f = &adapter->ring_feature[RING_F_FCOE]; 471 f->indices = min_t(u16, rss_i, f->limit); 472 f->offset = rss_i * tc; 473 } 474 475#endif /* IXGBE_FCOE */ 476 for (i = 0; i < tcs; i++) 477 netdev_set_tc_queue(dev, i, rss_i, rss_i * i); 478 479 adapter->num_tx_queues = rss_i * tcs; 480 adapter->num_rx_queues = rss_i * tcs; 481 482 return true; 483} 484 485#endif 486/** 487 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices 488 * @adapter: board private structure to initialize 489 * 490 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues 491 * and VM pools where appropriate. If RSS is available, then also try and 492 * enable RSS and map accordingly. 493 * 494 **/ 495static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) 496{ 497 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit; 498 u16 vmdq_m = 0; 499 u16 rss_i = adapter->ring_feature[RING_F_RSS].limit; 500 u16 rss_m = IXGBE_RSS_DISABLED_MASK; 501#ifdef IXGBE_FCOE 502 u16 fcoe_i = 0; 503#endif 504 bool pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1); 505 506 /* only proceed if SR-IOV is enabled */ 507 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) 508 return false; 509 510 /* Add starting offset to total pool count */ 511 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset; 512 513 /* double check we are limited to maximum pools */ 514 vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); 515 516 /* 64 pool mode with 2 queues per pool */ 517 if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) { 518 vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; 519 rss_m = IXGBE_RSS_2Q_MASK; 520 rss_i = min_t(u16, rss_i, 2); 521 /* 32 pool mode with 4 queues per pool */ 522 } else { 523 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; 524 rss_m = IXGBE_RSS_4Q_MASK; 525 rss_i = 4; 526 } 527 528#ifdef IXGBE_FCOE 529 /* queues in the remaining pools are available for FCoE */ 530 fcoe_i = 128 - (vmdq_i * __ALIGN_MASK(1, ~vmdq_m)); 531 532#endif 533 /* remove the starting offset from the pool count */ 534 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset; 535 536 /* save features for later use */ 537 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i; 538 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; 539 540 /* limit RSS based on user input and save for later use */ 541 adapter->ring_feature[RING_F_RSS].indices = rss_i; 542 adapter->ring_feature[RING_F_RSS].mask = rss_m; 543 544 adapter->num_rx_pools = vmdq_i; 545 adapter->num_rx_queues_per_pool = rss_i; 546 547 adapter->num_rx_queues = vmdq_i * rss_i; 548 adapter->num_tx_queues = vmdq_i * rss_i; 549 550 /* disable ATR as it is not supported when VMDq is enabled */ 551 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 552 553#ifdef IXGBE_FCOE 554 /* 555 * FCoE can use rings from adjacent buffers to allow RSS 556 * like behavior. To account for this we need to add the 557 * FCoE indices to the total ring count. 558 */ 559 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 560 struct ixgbe_ring_feature *fcoe; 561 562 fcoe = &adapter->ring_feature[RING_F_FCOE]; 563 564 /* limit ourselves based on feature limits */ 565 fcoe_i = min_t(u16, fcoe_i, fcoe->limit); 566 567 if (vmdq_i > 1 && fcoe_i) { 568 /* alloc queues for FCoE separately */ 569 fcoe->indices = fcoe_i; 570 fcoe->offset = vmdq_i * rss_i; 571 } else { 572 /* merge FCoE queues with RSS queues */ 573 fcoe_i = min_t(u16, fcoe_i + rss_i, num_online_cpus()); 574 575 /* limit indices to rss_i if MSI-X is disabled */ 576 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) 577 fcoe_i = rss_i; 578 579 /* attempt to reserve some queues for just FCoE */ 580 fcoe->indices = min_t(u16, fcoe_i, fcoe->limit); 581 fcoe->offset = fcoe_i - fcoe->indices; 582 583 fcoe_i -= rss_i; 584 } 585 586 /* add queues to adapter */ 587 adapter->num_tx_queues += fcoe_i; 588 adapter->num_rx_queues += fcoe_i; 589 } 590 591#endif 592 return true; 593} 594 595/** 596 * ixgbe_set_rss_queues - Allocate queues for RSS 597 * @adapter: board private structure to initialize 598 * 599 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try 600 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. 601 * 602 **/ 603static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) 604{ 605 struct ixgbe_ring_feature *f; 606 u16 rss_i; 607 608 /* set mask for 16 queue limit of RSS */ 609 f = &adapter->ring_feature[RING_F_RSS]; 610 rss_i = f->limit; 611 612 f->indices = rss_i; 613 f->mask = IXGBE_RSS_16Q_MASK; 614 615 /* disable ATR by default, it will be configured below */ 616 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; 617 618 /* 619 * Use Flow Director in addition to RSS to ensure the best 620 * distribution of flows across cores, even when an FDIR flow 621 * isn't matched. 622 */ 623 if (rss_i > 1 && adapter->atr_sample_rate) { 624 f = &adapter->ring_feature[RING_F_FDIR]; 625 626 rss_i = f->indices = f->limit; 627 628 if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) 629 adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; 630 } 631 632#ifdef IXGBE_FCOE 633 /* 634 * FCoE can exist on the same rings as standard network traffic 635 * however it is preferred to avoid that if possible. In order 636 * to get the best performance we allocate as many FCoE queues 637 * as we can and we place them at the end of the ring array to 638 * avoid sharing queues with standard RSS on systems with 24 or 639 * more CPUs. 640 */ 641 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { 642 struct net_device *dev = adapter->netdev; 643 u16 fcoe_i; 644 645 f = &adapter->ring_feature[RING_F_FCOE]; 646 647 /* merge FCoE queues with RSS queues */ 648 fcoe_i = min_t(u16, f->limit + rss_i, num_online_cpus()); 649 fcoe_i = min_t(u16, fcoe_i, dev->num_tx_queues); 650 651 /* limit indices to rss_i if MSI-X is disabled */ 652 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) 653 fcoe_i = rss_i; 654 655 /* attempt to reserve some queues for just FCoE */ 656 f->indices = min_t(u16, fcoe_i, f->limit); 657 f->offset = fcoe_i - f->indices; 658 rss_i = max_t(u16, fcoe_i, rss_i); 659 } 660 661#endif /* IXGBE_FCOE */ 662 adapter->num_rx_queues = rss_i; 663 adapter->num_tx_queues = rss_i; 664 665 return true; 666} 667 668/** 669 * ixgbe_set_num_queues - Allocate queues for device, feature dependent 670 * @adapter: board private structure to initialize 671 * 672 * This is the top level queue allocation routine. The order here is very 673 * important, starting with the "most" number of features turned on at once, 674 * and ending with the smallest set of features. This way large combinations 675 * can be allocated if they're turned on, and smaller combinations are the 676 * fallthrough conditions. 677 * 678 **/ 679static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) 680{ 681 /* Start with base case */ 682 adapter->num_rx_queues = 1; 683 adapter->num_tx_queues = 1; 684 adapter->num_rx_pools = adapter->num_rx_queues; 685 adapter->num_rx_queues_per_pool = 1; 686 687#ifdef CONFIG_IXGBE_DCB 688 if (ixgbe_set_dcb_sriov_queues(adapter)) 689 return; 690 691 if (ixgbe_set_dcb_queues(adapter)) 692 return; 693 694#endif 695 if (ixgbe_set_sriov_queues(adapter)) 696 return; 697 698 ixgbe_set_rss_queues(adapter); 699} 700 701/** 702 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors 703 * @adapter: board private structure 704 * 705 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will 706 * return a negative error code if unable to acquire MSI-X vectors for any 707 * reason. 708 */ 709static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter) 710{ 711 struct ixgbe_hw *hw = &adapter->hw; 712 int i, vectors, vector_threshold; 713 714 /* We start by asking for one vector per queue pair */ 715 vectors = max(adapter->num_rx_queues, adapter->num_tx_queues); 716 717 /* It is easy to be greedy for MSI-X vectors. However, it really 718 * doesn't do much good if we have a lot more vectors than CPUs. We'll 719 * be somewhat conservative and only ask for (roughly) the same number 720 * of vectors as there are CPUs. 721 */ 722 vectors = min_t(int, vectors, num_online_cpus()); 723 724 /* Some vectors are necessary for non-queue interrupts */ 725 vectors += NON_Q_VECTORS; 726 727 /* Hardware can only support a maximum of hw.mac->max_msix_vectors. 728 * With features such as RSS and VMDq, we can easily surpass the 729 * number of Rx and Tx descriptor queues supported by our device. 730 * Thus, we cap the maximum in the rare cases where the CPU count also 731 * exceeds our vector limit 732 */ 733 vectors = min_t(int, vectors, hw->mac.max_msix_vectors); 734 735 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0] 736 * handler, and (2) an Other (Link Status Change, etc.) handler. 737 */ 738 vector_threshold = MIN_MSIX_COUNT; 739 740 adapter->msix_entries = kcalloc(vectors, 741 sizeof(struct msix_entry), 742 GFP_KERNEL); 743 if (!adapter->msix_entries) 744 return -ENOMEM; 745 746 for (i = 0; i < vectors; i++) 747 adapter->msix_entries[i].entry = i; 748 749 vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries, 750 vector_threshold, vectors); 751 752 if (vectors < 0) { 753 /* A negative count of allocated vectors indicates an error in 754 * acquiring within the specified range of MSI-X vectors 755 */ 756 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n", 757 vectors); 758 759 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; 760 kfree(adapter->msix_entries); 761 adapter->msix_entries = NULL; 762 763 return vectors; 764 } 765 766 /* we successfully allocated some number of vectors within our 767 * requested range. 768 */ 769 adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; 770 771 /* Adjust for only the vectors we'll use, which is minimum 772 * of max_q_vectors, or the number of vectors we were allocated. 773 */ 774 vectors -= NON_Q_VECTORS; 775 adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors); 776 777 return 0; 778} 779 780static void ixgbe_add_ring(struct ixgbe_ring *ring, 781 struct ixgbe_ring_container *head) 782{ 783 ring->next = head->ring; 784 head->ring = ring; 785 head->count++; 786} 787 788/** 789 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector 790 * @adapter: board private structure to initialize 791 * @v_count: q_vectors allocated on adapter, used for ring interleaving 792 * @v_idx: index of vector in adapter struct 793 * @txr_count: total number of Tx rings to allocate 794 * @txr_idx: index of first Tx ring to allocate 795 * @rxr_count: total number of Rx rings to allocate 796 * @rxr_idx: index of first Rx ring to allocate 797 * 798 * We allocate one q_vector. If allocation fails we return -ENOMEM. 799 **/ 800static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, 801 int v_count, int v_idx, 802 int txr_count, int txr_idx, 803 int rxr_count, int rxr_idx) 804{ 805 struct ixgbe_q_vector *q_vector; 806 struct ixgbe_ring *ring; 807 int node = NUMA_NO_NODE; 808 int cpu = -1; 809 int ring_count, size; 810 u8 tcs = netdev_get_num_tc(adapter->netdev); 811 812 ring_count = txr_count + rxr_count; 813 size = sizeof(struct ixgbe_q_vector) + 814 (sizeof(struct ixgbe_ring) * ring_count); 815 816 /* customize cpu for Flow Director mapping */ 817 if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) { 818 u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; 819 if (rss_i > 1 && adapter->atr_sample_rate) { 820 if (cpu_online(v_idx)) { 821 cpu = v_idx; 822 node = cpu_to_node(cpu); 823 } 824 } 825 } 826 827 /* allocate q_vector and rings */ 828 q_vector = kzalloc_node(size, GFP_KERNEL, node); 829 if (!q_vector) 830 q_vector = kzalloc(size, GFP_KERNEL); 831 if (!q_vector) 832 return -ENOMEM; 833 834 /* setup affinity mask and node */ 835 if (cpu != -1) 836 cpumask_set_cpu(cpu, &q_vector->affinity_mask); 837 q_vector->numa_node = node; 838 839#ifdef CONFIG_IXGBE_DCA 840 /* initialize CPU for DCA */ 841 q_vector->cpu = -1; 842 843#endif 844 /* initialize NAPI */ 845 netif_napi_add(adapter->netdev, &q_vector->napi, 846 ixgbe_poll, 64); 847 napi_hash_add(&q_vector->napi); 848 849#ifdef CONFIG_NET_RX_BUSY_POLL 850 /* initialize busy poll */ 851 atomic_set(&q_vector->state, IXGBE_QV_STATE_DISABLE); 852 853#endif 854 /* tie q_vector and adapter together */ 855 adapter->q_vector[v_idx] = q_vector; 856 q_vector->adapter = adapter; 857 q_vector->v_idx = v_idx; 858 859 /* initialize work limits */ 860 q_vector->tx.work_limit = adapter->tx_work_limit; 861 862 /* initialize pointer to rings */ 863 ring = q_vector->ring; 864 865 /* intialize ITR */ 866 if (txr_count && !rxr_count) { 867 /* tx only vector */ 868 if (adapter->tx_itr_setting == 1) 869 q_vector->itr = IXGBE_12K_ITR; 870 else 871 q_vector->itr = adapter->tx_itr_setting; 872 } else { 873 /* rx or rx/tx vector */ 874 if (adapter->rx_itr_setting == 1) 875 q_vector->itr = IXGBE_20K_ITR; 876 else 877 q_vector->itr = adapter->rx_itr_setting; 878 } 879 880 while (txr_count) { 881 /* assign generic ring traits */ 882 ring->dev = &adapter->pdev->dev; 883 ring->netdev = adapter->netdev; 884 885 /* configure backlink on ring */ 886 ring->q_vector = q_vector; 887 888 /* update q_vector Tx values */ 889 ixgbe_add_ring(ring, &q_vector->tx); 890 891 /* apply Tx specific ring traits */ 892 ring->count = adapter->tx_ring_count; 893 if (adapter->num_rx_pools > 1) 894 ring->queue_index = 895 txr_idx % adapter->num_rx_queues_per_pool; 896 else 897 ring->queue_index = txr_idx; 898 899 /* assign ring to adapter */ 900 adapter->tx_ring[txr_idx] = ring; 901 902 /* update count and index */ 903 txr_count--; 904 txr_idx += v_count; 905 906 /* push pointer to next ring */ 907 ring++; 908 } 909 910 while (rxr_count) { 911 /* assign generic ring traits */ 912 ring->dev = &adapter->pdev->dev; 913 ring->netdev = adapter->netdev; 914 915 /* configure backlink on ring */ 916 ring->q_vector = q_vector; 917 918 /* update q_vector Rx values */ 919 ixgbe_add_ring(ring, &q_vector->rx); 920 921 /* 922 * 82599 errata, UDP frames with a 0 checksum 923 * can be marked as checksum errors. 924 */ 925 if (adapter->hw.mac.type == ixgbe_mac_82599EB) 926 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state); 927 928#ifdef IXGBE_FCOE 929 if (adapter->netdev->features & NETIF_F_FCOE_MTU) { 930 struct ixgbe_ring_feature *f; 931 f = &adapter->ring_feature[RING_F_FCOE]; 932 if ((rxr_idx >= f->offset) && 933 (rxr_idx < f->offset + f->indices)) 934 set_bit(__IXGBE_RX_FCOE, &ring->state); 935 } 936 937#endif /* IXGBE_FCOE */ 938 /* apply Rx specific ring traits */ 939 ring->count = adapter->rx_ring_count; 940 if (adapter->num_rx_pools > 1) 941 ring->queue_index = 942 rxr_idx % adapter->num_rx_queues_per_pool; 943 else 944 ring->queue_index = rxr_idx; 945 946 /* assign ring to adapter */ 947 adapter->rx_ring[rxr_idx] = ring; 948 949 /* update count and index */ 950 rxr_count--; 951 rxr_idx += v_count; 952 953 /* push pointer to next ring */ 954 ring++; 955 } 956 957 return 0; 958} 959 960/** 961 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector 962 * @adapter: board private structure to initialize 963 * @v_idx: Index of vector to be freed 964 * 965 * This function frees the memory allocated to the q_vector. In addition if 966 * NAPI is enabled it will delete any references to the NAPI struct prior 967 * to freeing the q_vector. 968 **/ 969static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) 970{ 971 struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx]; 972 struct ixgbe_ring *ring; 973 974 ixgbe_for_each_ring(ring, q_vector->tx) 975 adapter->tx_ring[ring->queue_index] = NULL; 976 977 ixgbe_for_each_ring(ring, q_vector->rx) 978 adapter->rx_ring[ring->queue_index] = NULL; 979 980 adapter->q_vector[v_idx] = NULL; 981 napi_hash_del(&q_vector->napi); 982 netif_napi_del(&q_vector->napi); 983 984 /* 985 * ixgbe_get_stats64() might access the rings on this vector, 986 * we must wait a grace period before freeing it. 987 */ 988 kfree_rcu(q_vector, rcu); 989} 990 991/** 992 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors 993 * @adapter: board private structure to initialize 994 * 995 * We allocate one q_vector per queue interrupt. If allocation fails we 996 * return -ENOMEM. 997 **/ 998static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter) 999{ 1000 int q_vectors = adapter->num_q_vectors; 1001 int rxr_remaining = adapter->num_rx_queues; 1002 int txr_remaining = adapter->num_tx_queues; 1003 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 1004 int err; 1005 1006 /* only one q_vector if MSI-X is disabled. */ 1007 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) 1008 q_vectors = 1; 1009 1010 if (q_vectors >= (rxr_remaining + txr_remaining)) { 1011 for (; rxr_remaining; v_idx++) { 1012 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx, 1013 0, 0, 1, rxr_idx); 1014 1015 if (err) 1016 goto err_out; 1017 1018 /* update counts and index */ 1019 rxr_remaining--; 1020 rxr_idx++; 1021 } 1022 } 1023 1024 for (; v_idx < q_vectors; v_idx++) { 1025 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 1026 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 1027 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx, 1028 tqpv, txr_idx, 1029 rqpv, rxr_idx); 1030 1031 if (err) 1032 goto err_out; 1033 1034 /* update counts and index */ 1035 rxr_remaining -= rqpv; 1036 txr_remaining -= tqpv; 1037 rxr_idx++; 1038 txr_idx++; 1039 } 1040 1041 return 0; 1042 1043err_out: 1044 adapter->num_tx_queues = 0; 1045 adapter->num_rx_queues = 0; 1046 adapter->num_q_vectors = 0; 1047 1048 while (v_idx--) 1049 ixgbe_free_q_vector(adapter, v_idx); 1050 1051 return -ENOMEM; 1052} 1053 1054/** 1055 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors 1056 * @adapter: board private structure to initialize 1057 * 1058 * This function frees the memory allocated to the q_vectors. In addition if 1059 * NAPI is enabled it will delete any references to the NAPI struct prior 1060 * to freeing the q_vector. 1061 **/ 1062static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter) 1063{ 1064 int v_idx = adapter->num_q_vectors; 1065 1066 adapter->num_tx_queues = 0; 1067 adapter->num_rx_queues = 0; 1068 adapter->num_q_vectors = 0; 1069 1070 while (v_idx--) 1071 ixgbe_free_q_vector(adapter, v_idx); 1072} 1073 1074static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) 1075{ 1076 if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { 1077 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; 1078 pci_disable_msix(adapter->pdev); 1079 kfree(adapter->msix_entries); 1080 adapter->msix_entries = NULL; 1081 } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { 1082 adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; 1083 pci_disable_msi(adapter->pdev); 1084 } 1085} 1086 1087/** 1088 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported 1089 * @adapter: board private structure to initialize 1090 * 1091 * Attempt to configure the interrupts using the best available 1092 * capabilities of the hardware and the kernel. 1093 **/ 1094static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter) 1095{ 1096 int err; 1097 1098 /* We will try to get MSI-X interrupts first */ 1099 if (!ixgbe_acquire_msix_vectors(adapter)) 1100 return; 1101 1102 /* At this point, we do not have MSI-X capabilities. We need to 1103 * reconfigure or disable various features which require MSI-X 1104 * capability. 1105 */ 1106 1107 /* Disable DCB unless we only have a single traffic class */ 1108 if (netdev_get_num_tc(adapter->netdev) > 1) { 1109 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n"); 1110 netdev_reset_tc(adapter->netdev); 1111 1112 if (adapter->hw.mac.type == ixgbe_mac_82598EB) 1113 adapter->hw.fc.requested_mode = adapter->last_lfc_mode; 1114 1115 adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; 1116 adapter->temp_dcb_cfg.pfc_mode_enable = false; 1117 adapter->dcb_cfg.pfc_mode_enable = false; 1118 } 1119 1120 adapter->dcb_cfg.num_tcs.pg_tcs = 1; 1121 adapter->dcb_cfg.num_tcs.pfc_tcs = 1; 1122 1123 /* Disable SR-IOV support */ 1124 e_dev_warn("Disabling SR-IOV support\n"); 1125 ixgbe_disable_sriov(adapter); 1126 1127 /* Disable RSS */ 1128 e_dev_warn("Disabling RSS support\n"); 1129 adapter->ring_feature[RING_F_RSS].limit = 1; 1130 1131 /* recalculate number of queues now that many features have been 1132 * changed or disabled. 1133 */ 1134 ixgbe_set_num_queues(adapter); 1135 adapter->num_q_vectors = 1; 1136 1137 err = pci_enable_msi(adapter->pdev); 1138 if (err) 1139 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n", 1140 err); 1141 else 1142 adapter->flags |= IXGBE_FLAG_MSI_ENABLED; 1143} 1144 1145/** 1146 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme 1147 * @adapter: board private structure to initialize 1148 * 1149 * We determine which interrupt scheme to use based on... 1150 * - Kernel support (MSI, MSI-X) 1151 * - which can be user-defined (via MODULE_PARAM) 1152 * - Hardware queue count (num_*_queues) 1153 * - defined by miscellaneous hardware support/features (RSS, etc.) 1154 **/ 1155int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) 1156{ 1157 int err; 1158 1159 /* Number of supported queues */ 1160 ixgbe_set_num_queues(adapter); 1161 1162 /* Set interrupt mode */ 1163 ixgbe_set_interrupt_capability(adapter); 1164 1165 err = ixgbe_alloc_q_vectors(adapter); 1166 if (err) { 1167 e_dev_err("Unable to allocate memory for queue vectors\n"); 1168 goto err_alloc_q_vectors; 1169 } 1170 1171 ixgbe_cache_ring_register(adapter); 1172 1173 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", 1174 (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", 1175 adapter->num_rx_queues, adapter->num_tx_queues); 1176 1177 set_bit(__IXGBE_DOWN, &adapter->state); 1178 1179 return 0; 1180 1181err_alloc_q_vectors: 1182 ixgbe_reset_interrupt_capability(adapter); 1183 return err; 1184} 1185 1186/** 1187 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings 1188 * @adapter: board private structure to clear interrupt scheme on 1189 * 1190 * We go through and clear interrupt specific resources and reset the structure 1191 * to pre-load conditions 1192 **/ 1193void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter) 1194{ 1195 adapter->num_tx_queues = 0; 1196 adapter->num_rx_queues = 0; 1197 1198 ixgbe_free_q_vectors(adapter); 1199 ixgbe_reset_interrupt_capability(adapter); 1200} 1201 1202void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, 1203 u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx) 1204{ 1205 struct ixgbe_adv_tx_context_desc *context_desc; 1206 u16 i = tx_ring->next_to_use; 1207 1208 context_desc = IXGBE_TX_CTXTDESC(tx_ring, i); 1209 1210 i++; 1211 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1212 1213 /* set bits to identify this as an advanced context descriptor */ 1214 type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 1215 1216 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1217 context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); 1218 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1219 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1220} 1221 1222