1/******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver 4 * Copyright(c) 2013 - 2014 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27#include <linux/prefetch.h> 28#include <net/busy_poll.h> 29 30#include "i40evf.h" 31#include "i40e_prototype.h" 32 33static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 34 u32 td_tag) 35{ 36 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 37 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 38 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 39 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 40 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 41} 42 43#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 44 45/** 46 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 47 * @ring: the ring that owns the buffer 48 * @tx_buffer: the buffer to free 49 **/ 50static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 51 struct i40e_tx_buffer *tx_buffer) 52{ 53 if (tx_buffer->skb) { 54 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) 55 kfree(tx_buffer->raw_buf); 56 else 57 dev_kfree_skb_any(tx_buffer->skb); 58 59 if (dma_unmap_len(tx_buffer, len)) 60 dma_unmap_single(ring->dev, 61 dma_unmap_addr(tx_buffer, dma), 62 dma_unmap_len(tx_buffer, len), 63 DMA_TO_DEVICE); 64 } else if (dma_unmap_len(tx_buffer, len)) { 65 dma_unmap_page(ring->dev, 66 dma_unmap_addr(tx_buffer, dma), 67 dma_unmap_len(tx_buffer, len), 68 DMA_TO_DEVICE); 69 } 70 tx_buffer->next_to_watch = NULL; 71 tx_buffer->skb = NULL; 72 dma_unmap_len_set(tx_buffer, len, 0); 73 /* tx_buffer must be completely set up in the transmit path */ 74} 75 76/** 77 * i40evf_clean_tx_ring - Free any empty Tx buffers 78 * @tx_ring: ring to be cleaned 79 **/ 80void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) 81{ 82 unsigned long bi_size; 83 u16 i; 84 85 /* ring already cleared, nothing to do */ 86 if (!tx_ring->tx_bi) 87 return; 88 89 /* Free all the Tx ring sk_buffs */ 90 for (i = 0; i < tx_ring->count; i++) 91 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 92 93 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 94 memset(tx_ring->tx_bi, 0, bi_size); 95 96 /* Zero out the descriptor ring */ 97 memset(tx_ring->desc, 0, tx_ring->size); 98 99 tx_ring->next_to_use = 0; 100 tx_ring->next_to_clean = 0; 101 102 if (!tx_ring->netdev) 103 return; 104 105 /* cleanup Tx queue statistics */ 106 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 107 tx_ring->queue_index)); 108} 109 110/** 111 * i40evf_free_tx_resources - Free Tx resources per queue 112 * @tx_ring: Tx descriptor ring for a specific queue 113 * 114 * Free all transmit software resources 115 **/ 116void i40evf_free_tx_resources(struct i40e_ring *tx_ring) 117{ 118 i40evf_clean_tx_ring(tx_ring); 119 kfree(tx_ring->tx_bi); 120 tx_ring->tx_bi = NULL; 121 122 if (tx_ring->desc) { 123 dma_free_coherent(tx_ring->dev, tx_ring->size, 124 tx_ring->desc, tx_ring->dma); 125 tx_ring->desc = NULL; 126 } 127} 128 129/** 130 * i40e_get_head - Retrieve head from head writeback 131 * @tx_ring: tx ring to fetch head of 132 * 133 * Returns value of Tx ring head based on value stored 134 * in head write-back location 135 **/ 136static inline u32 i40e_get_head(struct i40e_ring *tx_ring) 137{ 138 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; 139 140 return le32_to_cpu(*(volatile __le32 *)head); 141} 142 143/** 144 * i40e_get_tx_pending - how many tx descriptors not processed 145 * @tx_ring: the ring of descriptors 146 * 147 * Since there is no access to the ring head register 148 * in XL710, we need to use our local copies 149 **/ 150static u32 i40e_get_tx_pending(struct i40e_ring *ring) 151{ 152 u32 head, tail; 153 154 head = i40e_get_head(ring); 155 tail = readl(ring->tail); 156 157 if (head != tail) 158 return (head < tail) ? 159 tail - head : (tail + ring->count - head); 160 161 return 0; 162} 163 164/** 165 * i40e_check_tx_hang - Is there a hang in the Tx queue 166 * @tx_ring: the ring of descriptors 167 **/ 168static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) 169{ 170 u32 tx_done = tx_ring->stats.packets; 171 u32 tx_done_old = tx_ring->tx_stats.tx_done_old; 172 u32 tx_pending = i40e_get_tx_pending(tx_ring); 173 bool ret = false; 174 175 clear_check_for_tx_hang(tx_ring); 176 177 /* Check for a hung queue, but be thorough. This verifies 178 * that a transmit has been completed since the previous 179 * check AND there is at least one packet pending. The 180 * ARMED bit is set to indicate a potential hang. The 181 * bit is cleared if a pause frame is received to remove 182 * false hang detection due to PFC or 802.3x frames. By 183 * requiring this to fail twice we avoid races with 184 * PFC clearing the ARMED bit and conditions where we 185 * run the check_tx_hang logic with a transmit completion 186 * pending but without time to complete it yet. 187 */ 188 if ((tx_done_old == tx_done) && tx_pending) { 189 /* make sure it is true for two checks in a row */ 190 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, 191 &tx_ring->state); 192 } else if (tx_done_old == tx_done && 193 (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { 194 /* update completed stats and disarm the hang check */ 195 tx_ring->tx_stats.tx_done_old = tx_done; 196 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); 197 } 198 199 return ret; 200} 201 202#define WB_STRIDE 0x3 203 204/** 205 * i40e_clean_tx_irq - Reclaim resources after transmit completes 206 * @tx_ring: tx ring to clean 207 * @budget: how many cleans we're allowed 208 * 209 * Returns true if there's any budget left (e.g. the clean is finished) 210 **/ 211static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) 212{ 213 u16 i = tx_ring->next_to_clean; 214 struct i40e_tx_buffer *tx_buf; 215 struct i40e_tx_desc *tx_head; 216 struct i40e_tx_desc *tx_desc; 217 unsigned int total_packets = 0; 218 unsigned int total_bytes = 0; 219 220 tx_buf = &tx_ring->tx_bi[i]; 221 tx_desc = I40E_TX_DESC(tx_ring, i); 222 i -= tx_ring->count; 223 224 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); 225 226 do { 227 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 228 229 /* if next_to_watch is not set then there is no work pending */ 230 if (!eop_desc) 231 break; 232 233 /* prevent any other reads prior to eop_desc */ 234 read_barrier_depends(); 235 236 /* we have caught up to head, no work left to do */ 237 if (tx_head == tx_desc) 238 break; 239 240 /* clear next_to_watch to prevent false hangs */ 241 tx_buf->next_to_watch = NULL; 242 243 /* update the statistics for this packet */ 244 total_bytes += tx_buf->bytecount; 245 total_packets += tx_buf->gso_segs; 246 247 /* free the skb */ 248 dev_kfree_skb_any(tx_buf->skb); 249 250 /* unmap skb header data */ 251 dma_unmap_single(tx_ring->dev, 252 dma_unmap_addr(tx_buf, dma), 253 dma_unmap_len(tx_buf, len), 254 DMA_TO_DEVICE); 255 256 /* clear tx_buffer data */ 257 tx_buf->skb = NULL; 258 dma_unmap_len_set(tx_buf, len, 0); 259 260 /* unmap remaining buffers */ 261 while (tx_desc != eop_desc) { 262 263 tx_buf++; 264 tx_desc++; 265 i++; 266 if (unlikely(!i)) { 267 i -= tx_ring->count; 268 tx_buf = tx_ring->tx_bi; 269 tx_desc = I40E_TX_DESC(tx_ring, 0); 270 } 271 272 /* unmap any remaining paged data */ 273 if (dma_unmap_len(tx_buf, len)) { 274 dma_unmap_page(tx_ring->dev, 275 dma_unmap_addr(tx_buf, dma), 276 dma_unmap_len(tx_buf, len), 277 DMA_TO_DEVICE); 278 dma_unmap_len_set(tx_buf, len, 0); 279 } 280 } 281 282 /* move us one more past the eop_desc for start of next pkt */ 283 tx_buf++; 284 tx_desc++; 285 i++; 286 if (unlikely(!i)) { 287 i -= tx_ring->count; 288 tx_buf = tx_ring->tx_bi; 289 tx_desc = I40E_TX_DESC(tx_ring, 0); 290 } 291 292 prefetch(tx_desc); 293 294 /* update budget accounting */ 295 budget--; 296 } while (likely(budget)); 297 298 i += tx_ring->count; 299 tx_ring->next_to_clean = i; 300 u64_stats_update_begin(&tx_ring->syncp); 301 tx_ring->stats.bytes += total_bytes; 302 tx_ring->stats.packets += total_packets; 303 u64_stats_update_end(&tx_ring->syncp); 304 tx_ring->q_vector->tx.total_bytes += total_bytes; 305 tx_ring->q_vector->tx.total_packets += total_packets; 306 307 if (budget && 308 !((i & WB_STRIDE) == WB_STRIDE) && 309 !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && 310 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) 311 tx_ring->arm_wb = true; 312 else 313 tx_ring->arm_wb = false; 314 315 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { 316 /* schedule immediate reset if we believe we hung */ 317 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" 318 " VSI <%d>\n" 319 " Tx Queue <%d>\n" 320 " next_to_use <%x>\n" 321 " next_to_clean <%x>\n", 322 tx_ring->vsi->seid, 323 tx_ring->queue_index, 324 tx_ring->next_to_use, i); 325 dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n" 326 " time_stamp <%lx>\n" 327 " jiffies <%lx>\n", 328 tx_ring->tx_bi[i].time_stamp, jiffies); 329 330 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 331 332 dev_info(tx_ring->dev, 333 "tx hang detected on queue %d, resetting adapter\n", 334 tx_ring->queue_index); 335 336 tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev); 337 338 /* the adapter is about to reset, no point in enabling stuff */ 339 return true; 340 } 341 342 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, 343 tx_ring->queue_index), 344 total_packets, total_bytes); 345 346#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 347 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 348 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 349 /* Make sure that anybody stopping the queue after this 350 * sees the new next_to_clean. 351 */ 352 smp_mb(); 353 if (__netif_subqueue_stopped(tx_ring->netdev, 354 tx_ring->queue_index) && 355 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) { 356 netif_wake_subqueue(tx_ring->netdev, 357 tx_ring->queue_index); 358 ++tx_ring->tx_stats.restart_queue; 359 } 360 } 361 362 return budget > 0; 363} 364 365/** 366 * i40e_force_wb -Arm hardware to do a wb on noncache aligned descriptors 367 * @vsi: the VSI we care about 368 * @q_vector: the vector on which to force writeback 369 * 370 **/ 371static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) 372{ 373 u32 val = I40E_VFINT_DYN_CTLN_INTENA_MASK | 374 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ 375 I40E_VFINT_DYN_CTLN_SWINT_TRIG_MASK | 376 I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK; 377 /* allow 00 to be written to the index */ 378 379 wr32(&vsi->back->hw, 380 I40E_VFINT_DYN_CTLN1(q_vector->v_idx + vsi->base_vector - 1), 381 val); 382} 383 384/** 385 * i40e_set_new_dynamic_itr - Find new ITR level 386 * @rc: structure containing ring performance data 387 * 388 * Stores a new ITR value based on packets and byte counts during 389 * the last interrupt. The advantage of per interrupt computation 390 * is faster updates and more accurate ITR for the current traffic 391 * pattern. Constants in this function were computed based on 392 * theoretical maximum wire speed and thresholds were set based on 393 * testing data as well as attempting to minimize response time 394 * while increasing bulk throughput. 395 **/ 396static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 397{ 398 enum i40e_latency_range new_latency_range = rc->latency_range; 399 u32 new_itr = rc->itr; 400 int bytes_per_int; 401 402 if (rc->total_packets == 0 || !rc->itr) 403 return; 404 405 /* simple throttlerate management 406 * 0-10MB/s lowest (100000 ints/s) 407 * 10-20MB/s low (20000 ints/s) 408 * 20-1249MB/s bulk (8000 ints/s) 409 */ 410 bytes_per_int = rc->total_bytes / rc->itr; 411 switch (rc->itr) { 412 case I40E_LOWEST_LATENCY: 413 if (bytes_per_int > 10) 414 new_latency_range = I40E_LOW_LATENCY; 415 break; 416 case I40E_LOW_LATENCY: 417 if (bytes_per_int > 20) 418 new_latency_range = I40E_BULK_LATENCY; 419 else if (bytes_per_int <= 10) 420 new_latency_range = I40E_LOWEST_LATENCY; 421 break; 422 case I40E_BULK_LATENCY: 423 if (bytes_per_int <= 20) 424 rc->latency_range = I40E_LOW_LATENCY; 425 break; 426 } 427 428 switch (new_latency_range) { 429 case I40E_LOWEST_LATENCY: 430 new_itr = I40E_ITR_100K; 431 break; 432 case I40E_LOW_LATENCY: 433 new_itr = I40E_ITR_20K; 434 break; 435 case I40E_BULK_LATENCY: 436 new_itr = I40E_ITR_8K; 437 break; 438 default: 439 break; 440 } 441 442 if (new_itr != rc->itr) { 443 /* do an exponential smoothing */ 444 new_itr = (10 * new_itr * rc->itr) / 445 ((9 * new_itr) + rc->itr); 446 rc->itr = new_itr & I40E_MAX_ITR; 447 } 448 449 rc->total_bytes = 0; 450 rc->total_packets = 0; 451} 452 453/** 454 * i40e_update_dynamic_itr - Adjust ITR based on bytes per int 455 * @q_vector: the vector to adjust 456 **/ 457static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector) 458{ 459 u16 vector = q_vector->vsi->base_vector + q_vector->v_idx; 460 struct i40e_hw *hw = &q_vector->vsi->back->hw; 461 u32 reg_addr; 462 u16 old_itr; 463 464 reg_addr = I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1); 465 old_itr = q_vector->rx.itr; 466 i40e_set_new_dynamic_itr(&q_vector->rx); 467 if (old_itr != q_vector->rx.itr) 468 wr32(hw, reg_addr, q_vector->rx.itr); 469 470 reg_addr = I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1); 471 old_itr = q_vector->tx.itr; 472 i40e_set_new_dynamic_itr(&q_vector->tx); 473 if (old_itr != q_vector->tx.itr) 474 wr32(hw, reg_addr, q_vector->tx.itr); 475} 476 477/** 478 * i40evf_setup_tx_descriptors - Allocate the Tx descriptors 479 * @tx_ring: the tx ring to set up 480 * 481 * Return 0 on success, negative on error 482 **/ 483int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) 484{ 485 struct device *dev = tx_ring->dev; 486 int bi_size; 487 488 if (!dev) 489 return -ENOMEM; 490 491 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 492 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 493 if (!tx_ring->tx_bi) 494 goto err; 495 496 /* round up to nearest 4K */ 497 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 498 /* add u32 for head writeback, align after this takes care of 499 * guaranteeing this is at least one cache line in size 500 */ 501 tx_ring->size += sizeof(u32); 502 tx_ring->size = ALIGN(tx_ring->size, 4096); 503 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 504 &tx_ring->dma, GFP_KERNEL); 505 if (!tx_ring->desc) { 506 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 507 tx_ring->size); 508 goto err; 509 } 510 511 tx_ring->next_to_use = 0; 512 tx_ring->next_to_clean = 0; 513 return 0; 514 515err: 516 kfree(tx_ring->tx_bi); 517 tx_ring->tx_bi = NULL; 518 return -ENOMEM; 519} 520 521/** 522 * i40evf_clean_rx_ring - Free Rx buffers 523 * @rx_ring: ring to be cleaned 524 **/ 525void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) 526{ 527 struct device *dev = rx_ring->dev; 528 struct i40e_rx_buffer *rx_bi; 529 unsigned long bi_size; 530 u16 i; 531 532 /* ring already cleared, nothing to do */ 533 if (!rx_ring->rx_bi) 534 return; 535 536 if (ring_is_ps_enabled(rx_ring)) { 537 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count; 538 539 rx_bi = &rx_ring->rx_bi[0]; 540 if (rx_bi->hdr_buf) { 541 dma_free_coherent(dev, 542 bufsz, 543 rx_bi->hdr_buf, 544 rx_bi->dma); 545 for (i = 0; i < rx_ring->count; i++) { 546 rx_bi = &rx_ring->rx_bi[i]; 547 rx_bi->dma = 0; 548 rx_bi->hdr_buf = NULL; 549 } 550 } 551 } 552 /* Free all the Rx ring sk_buffs */ 553 for (i = 0; i < rx_ring->count; i++) { 554 rx_bi = &rx_ring->rx_bi[i]; 555 if (rx_bi->dma) { 556 dma_unmap_single(dev, 557 rx_bi->dma, 558 rx_ring->rx_buf_len, 559 DMA_FROM_DEVICE); 560 rx_bi->dma = 0; 561 } 562 if (rx_bi->skb) { 563 dev_kfree_skb(rx_bi->skb); 564 rx_bi->skb = NULL; 565 } 566 if (rx_bi->page) { 567 if (rx_bi->page_dma) { 568 dma_unmap_page(dev, 569 rx_bi->page_dma, 570 PAGE_SIZE / 2, 571 DMA_FROM_DEVICE); 572 rx_bi->page_dma = 0; 573 } 574 __free_page(rx_bi->page); 575 rx_bi->page = NULL; 576 rx_bi->page_offset = 0; 577 } 578 } 579 580 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 581 memset(rx_ring->rx_bi, 0, bi_size); 582 583 /* Zero out the descriptor ring */ 584 memset(rx_ring->desc, 0, rx_ring->size); 585 586 rx_ring->next_to_clean = 0; 587 rx_ring->next_to_use = 0; 588} 589 590/** 591 * i40evf_free_rx_resources - Free Rx resources 592 * @rx_ring: ring to clean the resources from 593 * 594 * Free all receive software resources 595 **/ 596void i40evf_free_rx_resources(struct i40e_ring *rx_ring) 597{ 598 i40evf_clean_rx_ring(rx_ring); 599 kfree(rx_ring->rx_bi); 600 rx_ring->rx_bi = NULL; 601 602 if (rx_ring->desc) { 603 dma_free_coherent(rx_ring->dev, rx_ring->size, 604 rx_ring->desc, rx_ring->dma); 605 rx_ring->desc = NULL; 606 } 607} 608 609/** 610 * i40evf_alloc_rx_headers - allocate rx header buffers 611 * @rx_ring: ring to alloc buffers 612 * 613 * Allocate rx header buffers for the entire ring. As these are static, 614 * this is only called when setting up a new ring. 615 **/ 616void i40evf_alloc_rx_headers(struct i40e_ring *rx_ring) 617{ 618 struct device *dev = rx_ring->dev; 619 struct i40e_rx_buffer *rx_bi; 620 dma_addr_t dma; 621 void *buffer; 622 int buf_size; 623 int i; 624 625 if (rx_ring->rx_bi[0].hdr_buf) 626 return; 627 /* Make sure the buffers don't cross cache line boundaries. */ 628 buf_size = ALIGN(rx_ring->rx_hdr_len, 256); 629 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count, 630 &dma, GFP_KERNEL); 631 if (!buffer) 632 return; 633 for (i = 0; i < rx_ring->count; i++) { 634 rx_bi = &rx_ring->rx_bi[i]; 635 rx_bi->dma = dma + (i * buf_size); 636 rx_bi->hdr_buf = buffer + (i * buf_size); 637 } 638} 639 640/** 641 * i40evf_setup_rx_descriptors - Allocate Rx descriptors 642 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 643 * 644 * Returns 0 on success, negative on failure 645 **/ 646int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring) 647{ 648 struct device *dev = rx_ring->dev; 649 int bi_size; 650 651 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 652 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 653 if (!rx_ring->rx_bi) 654 goto err; 655 656 u64_stats_init(&rx_ring->syncp); 657 658 /* Round up to nearest 4K */ 659 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring) 660 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc) 661 : rx_ring->count * sizeof(union i40e_32byte_rx_desc); 662 rx_ring->size = ALIGN(rx_ring->size, 4096); 663 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 664 &rx_ring->dma, GFP_KERNEL); 665 666 if (!rx_ring->desc) { 667 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 668 rx_ring->size); 669 goto err; 670 } 671 672 rx_ring->next_to_clean = 0; 673 rx_ring->next_to_use = 0; 674 675 return 0; 676err: 677 kfree(rx_ring->rx_bi); 678 rx_ring->rx_bi = NULL; 679 return -ENOMEM; 680} 681 682/** 683 * i40e_release_rx_desc - Store the new tail and head values 684 * @rx_ring: ring to bump 685 * @val: new head index 686 **/ 687static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 688{ 689 rx_ring->next_to_use = val; 690 /* Force memory writes to complete before letting h/w 691 * know there are new descriptors to fetch. (Only 692 * applicable for weak-ordered memory model archs, 693 * such as IA-64). 694 */ 695 wmb(); 696 writel(val, rx_ring->tail); 697} 698 699/** 700 * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split 701 * @rx_ring: ring to place buffers on 702 * @cleaned_count: number of buffers to replace 703 **/ 704void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) 705{ 706 u16 i = rx_ring->next_to_use; 707 union i40e_rx_desc *rx_desc; 708 struct i40e_rx_buffer *bi; 709 710 /* do nothing if no valid netdev defined */ 711 if (!rx_ring->netdev || !cleaned_count) 712 return; 713 714 while (cleaned_count--) { 715 rx_desc = I40E_RX_DESC(rx_ring, i); 716 bi = &rx_ring->rx_bi[i]; 717 718 if (bi->skb) /* desc is in use */ 719 goto no_buffers; 720 if (!bi->page) { 721 bi->page = alloc_page(GFP_ATOMIC); 722 if (!bi->page) { 723 rx_ring->rx_stats.alloc_page_failed++; 724 goto no_buffers; 725 } 726 } 727 728 if (!bi->page_dma) { 729 /* use a half page if we're re-using */ 730 bi->page_offset ^= PAGE_SIZE / 2; 731 bi->page_dma = dma_map_page(rx_ring->dev, 732 bi->page, 733 bi->page_offset, 734 PAGE_SIZE / 2, 735 DMA_FROM_DEVICE); 736 if (dma_mapping_error(rx_ring->dev, 737 bi->page_dma)) { 738 rx_ring->rx_stats.alloc_page_failed++; 739 bi->page_dma = 0; 740 goto no_buffers; 741 } 742 } 743 744 dma_sync_single_range_for_device(rx_ring->dev, 745 bi->dma, 746 0, 747 rx_ring->rx_hdr_len, 748 DMA_FROM_DEVICE); 749 /* Refresh the desc even if buffer_addrs didn't change 750 * because each write-back erases this info. 751 */ 752 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 753 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 754 i++; 755 if (i == rx_ring->count) 756 i = 0; 757 } 758 759no_buffers: 760 if (rx_ring->next_to_use != i) 761 i40e_release_rx_desc(rx_ring, i); 762} 763 764/** 765 * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer 766 * @rx_ring: ring to place buffers on 767 * @cleaned_count: number of buffers to replace 768 **/ 769void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) 770{ 771 u16 i = rx_ring->next_to_use; 772 union i40e_rx_desc *rx_desc; 773 struct i40e_rx_buffer *bi; 774 struct sk_buff *skb; 775 776 /* do nothing if no valid netdev defined */ 777 if (!rx_ring->netdev || !cleaned_count) 778 return; 779 780 while (cleaned_count--) { 781 rx_desc = I40E_RX_DESC(rx_ring, i); 782 bi = &rx_ring->rx_bi[i]; 783 skb = bi->skb; 784 785 if (!skb) { 786 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 787 rx_ring->rx_buf_len); 788 if (!skb) { 789 rx_ring->rx_stats.alloc_buff_failed++; 790 goto no_buffers; 791 } 792 /* initialize queue mapping */ 793 skb_record_rx_queue(skb, rx_ring->queue_index); 794 bi->skb = skb; 795 } 796 797 if (!bi->dma) { 798 bi->dma = dma_map_single(rx_ring->dev, 799 skb->data, 800 rx_ring->rx_buf_len, 801 DMA_FROM_DEVICE); 802 if (dma_mapping_error(rx_ring->dev, bi->dma)) { 803 rx_ring->rx_stats.alloc_buff_failed++; 804 bi->dma = 0; 805 goto no_buffers; 806 } 807 } 808 809 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); 810 rx_desc->read.hdr_addr = 0; 811 i++; 812 if (i == rx_ring->count) 813 i = 0; 814 } 815 816no_buffers: 817 if (rx_ring->next_to_use != i) 818 i40e_release_rx_desc(rx_ring, i); 819} 820 821/** 822 * i40e_receive_skb - Send a completed packet up the stack 823 * @rx_ring: rx ring in play 824 * @skb: packet to send up 825 * @vlan_tag: vlan tag for packet 826 **/ 827static void i40e_receive_skb(struct i40e_ring *rx_ring, 828 struct sk_buff *skb, u16 vlan_tag) 829{ 830 struct i40e_q_vector *q_vector = rx_ring->q_vector; 831 struct i40e_vsi *vsi = rx_ring->vsi; 832 u64 flags = vsi->back->flags; 833 834 if (vlan_tag & VLAN_VID_MASK) 835 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 836 837 if (flags & I40E_FLAG_IN_NETPOLL) 838 netif_rx(skb); 839 else 840 napi_gro_receive(&q_vector->napi, skb); 841} 842 843/** 844 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 845 * @vsi: the VSI we care about 846 * @skb: skb currently being received and modified 847 * @rx_status: status value of last descriptor in packet 848 * @rx_error: error value of last descriptor in packet 849 * @rx_ptype: ptype value of last descriptor in packet 850 **/ 851static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 852 struct sk_buff *skb, 853 u32 rx_status, 854 u32 rx_error, 855 u16 rx_ptype) 856{ 857 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); 858 bool ipv4 = false, ipv6 = false; 859 bool ipv4_tunnel, ipv6_tunnel; 860 __wsum rx_udp_csum; 861 struct iphdr *iph; 862 __sum16 csum; 863 864 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && 865 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); 866 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && 867 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); 868 869 skb->ip_summed = CHECKSUM_NONE; 870 871 /* Rx csum enabled and ip headers found? */ 872 if (!(vsi->netdev->features & NETIF_F_RXCSUM)) 873 return; 874 875 /* did the hardware decode the packet and checksum? */ 876 if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 877 return; 878 879 /* both known and outer_ip must be set for the below code to work */ 880 if (!(decoded.known && decoded.outer_ip)) 881 return; 882 883 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 884 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) 885 ipv4 = true; 886 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 887 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 888 ipv6 = true; 889 890 if (ipv4 && 891 (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 892 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))) 893 goto checksum_fail; 894 895 /* likely incorrect csum if alternate IP extension headers found */ 896 if (ipv6 && 897 rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 898 /* don't increment checksum err here, non-fatal err */ 899 return; 900 901 /* there was some L4 error, count error and punt packet to the stack */ 902 if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) 903 goto checksum_fail; 904 905 /* handle packets that were not able to be checksummed due 906 * to arrival speed, in this case the stack can compute 907 * the csum. 908 */ 909 if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT)) 910 return; 911 912 /* If VXLAN traffic has an outer UDPv4 checksum we need to check 913 * it in the driver, hardware does not do it for us. 914 * Since L3L4P bit was set we assume a valid IHL value (>=5) 915 * so the total length of IPv4 header is IHL*4 bytes 916 * The UDP_0 bit *may* bet set if the *inner* header is UDP 917 */ 918 if (ipv4_tunnel) { 919 skb->transport_header = skb->mac_header + 920 sizeof(struct ethhdr) + 921 (ip_hdr(skb)->ihl * 4); 922 923 /* Add 4 bytes for VLAN tagged packets */ 924 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || 925 skb->protocol == htons(ETH_P_8021AD)) 926 ? VLAN_HLEN : 0; 927 928 if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && 929 (udp_hdr(skb)->check != 0)) { 930 rx_udp_csum = udp_csum(skb); 931 iph = ip_hdr(skb); 932 csum = csum_tcpudp_magic(iph->saddr, iph->daddr, 933 (skb->len - 934 skb_transport_offset(skb)), 935 IPPROTO_UDP, rx_udp_csum); 936 937 if (udp_hdr(skb)->check != csum) 938 goto checksum_fail; 939 940 } /* else its GRE and so no outer UDP header */ 941 } 942 943 skb->ip_summed = CHECKSUM_UNNECESSARY; 944 skb->csum_level = ipv4_tunnel || ipv6_tunnel; 945 946 return; 947 948checksum_fail: 949 vsi->back->hw_csum_rx_error++; 950} 951 952/** 953 * i40e_rx_hash - returns the hash value from the Rx descriptor 954 * @ring: descriptor ring 955 * @rx_desc: specific descriptor 956 **/ 957static inline u32 i40e_rx_hash(struct i40e_ring *ring, 958 union i40e_rx_desc *rx_desc) 959{ 960 const __le64 rss_mask = 961 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 962 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 963 964 if ((ring->netdev->features & NETIF_F_RXHASH) && 965 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) 966 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 967 else 968 return 0; 969} 970 971/** 972 * i40e_ptype_to_hash - get a hash type 973 * @ptype: the ptype value from the descriptor 974 * 975 * Returns a hash type to be used by skb_set_hash 976 **/ 977static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) 978{ 979 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); 980 981 if (!decoded.known) 982 return PKT_HASH_TYPE_NONE; 983 984 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 985 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) 986 return PKT_HASH_TYPE_L4; 987 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 988 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) 989 return PKT_HASH_TYPE_L3; 990 else 991 return PKT_HASH_TYPE_L2; 992} 993 994/** 995 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split 996 * @rx_ring: rx ring to clean 997 * @budget: how many cleans we're allowed 998 * 999 * Returns true if there's any budget left (e.g. the clean is finished) 1000 **/ 1001static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) 1002{ 1003 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 1004 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; 1005 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 1006 const int current_node = numa_node_id(); 1007 struct i40e_vsi *vsi = rx_ring->vsi; 1008 u16 i = rx_ring->next_to_clean; 1009 union i40e_rx_desc *rx_desc; 1010 u32 rx_error, rx_status; 1011 u8 rx_ptype; 1012 u64 qword; 1013 1014 do { 1015 struct i40e_rx_buffer *rx_bi; 1016 struct sk_buff *skb; 1017 u16 vlan_tag; 1018 /* return some buffers to hardware, one at a time is too slow */ 1019 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1020 i40evf_alloc_rx_buffers_ps(rx_ring, cleaned_count); 1021 cleaned_count = 0; 1022 } 1023 1024 i = rx_ring->next_to_clean; 1025 rx_desc = I40E_RX_DESC(rx_ring, i); 1026 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1027 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1028 I40E_RXD_QW1_STATUS_SHIFT; 1029 1030 if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT))) 1031 break; 1032 1033 /* This memory barrier is needed to keep us from reading 1034 * any other fields out of the rx_desc until we know the 1035 * DD bit is set. 1036 */ 1037 dma_rmb(); 1038 rx_bi = &rx_ring->rx_bi[i]; 1039 skb = rx_bi->skb; 1040 if (likely(!skb)) { 1041 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 1042 rx_ring->rx_hdr_len); 1043 if (!skb) { 1044 rx_ring->rx_stats.alloc_buff_failed++; 1045 break; 1046 } 1047 1048 /* initialize queue mapping */ 1049 skb_record_rx_queue(skb, rx_ring->queue_index); 1050 /* we are reusing so sync this buffer for CPU use */ 1051 dma_sync_single_range_for_cpu(rx_ring->dev, 1052 rx_bi->dma, 1053 0, 1054 rx_ring->rx_hdr_len, 1055 DMA_FROM_DEVICE); 1056 } 1057 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1058 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1059 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> 1060 I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1061 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> 1062 I40E_RXD_QW1_LENGTH_SPH_SHIFT; 1063 1064 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1065 I40E_RXD_QW1_ERROR_SHIFT; 1066 rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1067 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1068 1069 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1070 I40E_RXD_QW1_PTYPE_SHIFT; 1071 prefetch(rx_bi->page); 1072 rx_bi->skb = NULL; 1073 cleaned_count++; 1074 if (rx_hbo || rx_sph) { 1075 int len; 1076 if (rx_hbo) 1077 len = I40E_RX_HDR_SIZE; 1078 else 1079 len = rx_header_len; 1080 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); 1081 } else if (skb->len == 0) { 1082 int len; 1083 1084 len = (rx_packet_len > skb_headlen(skb) ? 1085 skb_headlen(skb) : rx_packet_len); 1086 memcpy(__skb_put(skb, len), 1087 rx_bi->page + rx_bi->page_offset, 1088 len); 1089 rx_bi->page_offset += len; 1090 rx_packet_len -= len; 1091 } 1092 1093 /* Get the rest of the data if this was a header split */ 1094 if (rx_packet_len) { 1095 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 1096 rx_bi->page, 1097 rx_bi->page_offset, 1098 rx_packet_len); 1099 1100 skb->len += rx_packet_len; 1101 skb->data_len += rx_packet_len; 1102 skb->truesize += rx_packet_len; 1103 1104 if ((page_count(rx_bi->page) == 1) && 1105 (page_to_nid(rx_bi->page) == current_node)) 1106 get_page(rx_bi->page); 1107 else 1108 rx_bi->page = NULL; 1109 1110 dma_unmap_page(rx_ring->dev, 1111 rx_bi->page_dma, 1112 PAGE_SIZE / 2, 1113 DMA_FROM_DEVICE); 1114 rx_bi->page_dma = 0; 1115 } 1116 I40E_RX_INCREMENT(rx_ring, i); 1117 1118 if (unlikely( 1119 !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 1120 struct i40e_rx_buffer *next_buffer; 1121 1122 next_buffer = &rx_ring->rx_bi[i]; 1123 next_buffer->skb = skb; 1124 rx_ring->rx_stats.non_eop_descs++; 1125 continue; 1126 } 1127 1128 /* ERR_MASK will only have valid bits if EOP set */ 1129 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1130 dev_kfree_skb_any(skb); 1131 /* TODO: shouldn't we increment a counter indicating the 1132 * drop? 1133 */ 1134 continue; 1135 } 1136 1137 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), 1138 i40e_ptype_to_hash(rx_ptype)); 1139 /* probably a little skewed due to removing CRC */ 1140 total_rx_bytes += skb->len; 1141 total_rx_packets++; 1142 1143 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1144 1145 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1146 1147 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1148 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1149 : 0; 1150#ifdef I40E_FCOE 1151 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) { 1152 dev_kfree_skb_any(skb); 1153 continue; 1154 } 1155#endif 1156 skb_mark_napi_id(skb, &rx_ring->q_vector->napi); 1157 i40e_receive_skb(rx_ring, skb, vlan_tag); 1158 1159 rx_ring->netdev->last_rx = jiffies; 1160 rx_desc->wb.qword1.status_error_len = 0; 1161 1162 } while (likely(total_rx_packets < budget)); 1163 1164 u64_stats_update_begin(&rx_ring->syncp); 1165 rx_ring->stats.packets += total_rx_packets; 1166 rx_ring->stats.bytes += total_rx_bytes; 1167 u64_stats_update_end(&rx_ring->syncp); 1168 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1169 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1170 1171 return total_rx_packets; 1172} 1173 1174/** 1175 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer 1176 * @rx_ring: rx ring to clean 1177 * @budget: how many cleans we're allowed 1178 * 1179 * Returns number of packets cleaned 1180 **/ 1181static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) 1182{ 1183 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 1184 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 1185 struct i40e_vsi *vsi = rx_ring->vsi; 1186 union i40e_rx_desc *rx_desc; 1187 u32 rx_error, rx_status; 1188 u16 rx_packet_len; 1189 u8 rx_ptype; 1190 u64 qword; 1191 u16 i; 1192 1193 do { 1194 struct i40e_rx_buffer *rx_bi; 1195 struct sk_buff *skb; 1196 u16 vlan_tag; 1197 /* return some buffers to hardware, one at a time is too slow */ 1198 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1199 i40evf_alloc_rx_buffers_1buf(rx_ring, cleaned_count); 1200 cleaned_count = 0; 1201 } 1202 1203 i = rx_ring->next_to_clean; 1204 rx_desc = I40E_RX_DESC(rx_ring, i); 1205 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1206 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1207 I40E_RXD_QW1_STATUS_SHIFT; 1208 1209 if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT))) 1210 break; 1211 1212 /* This memory barrier is needed to keep us from reading 1213 * any other fields out of the rx_desc until we know the 1214 * DD bit is set. 1215 */ 1216 dma_rmb(); 1217 1218 rx_bi = &rx_ring->rx_bi[i]; 1219 skb = rx_bi->skb; 1220 prefetch(skb->data); 1221 1222 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1223 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1224 1225 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1226 I40E_RXD_QW1_ERROR_SHIFT; 1227 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1228 1229 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1230 I40E_RXD_QW1_PTYPE_SHIFT; 1231 rx_bi->skb = NULL; 1232 cleaned_count++; 1233 1234 /* Get the header and possibly the whole packet 1235 * If this is an skb from previous receive dma will be 0 1236 */ 1237 skb_put(skb, rx_packet_len); 1238 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len, 1239 DMA_FROM_DEVICE); 1240 rx_bi->dma = 0; 1241 1242 I40E_RX_INCREMENT(rx_ring, i); 1243 1244 if (unlikely( 1245 !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 1246 rx_ring->rx_stats.non_eop_descs++; 1247 continue; 1248 } 1249 1250 /* ERR_MASK will only have valid bits if EOP set */ 1251 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1252 dev_kfree_skb_any(skb); 1253 /* TODO: shouldn't we increment a counter indicating the 1254 * drop? 1255 */ 1256 continue; 1257 } 1258 1259 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), 1260 i40e_ptype_to_hash(rx_ptype)); 1261 /* probably a little skewed due to removing CRC */ 1262 total_rx_bytes += skb->len; 1263 total_rx_packets++; 1264 1265 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1266 1267 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1268 1269 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1270 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1271 : 0; 1272 i40e_receive_skb(rx_ring, skb, vlan_tag); 1273 1274 rx_ring->netdev->last_rx = jiffies; 1275 rx_desc->wb.qword1.status_error_len = 0; 1276 } while (likely(total_rx_packets < budget)); 1277 1278 u64_stats_update_begin(&rx_ring->syncp); 1279 rx_ring->stats.packets += total_rx_packets; 1280 rx_ring->stats.bytes += total_rx_bytes; 1281 u64_stats_update_end(&rx_ring->syncp); 1282 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1283 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1284 1285 return total_rx_packets; 1286} 1287 1288/** 1289 * i40evf_napi_poll - NAPI polling Rx/Tx cleanup routine 1290 * @napi: napi struct with our devices info in it 1291 * @budget: amount of work driver is allowed to do this pass, in packets 1292 * 1293 * This function will clean all queues associated with a q_vector. 1294 * 1295 * Returns the amount of work done 1296 **/ 1297int i40evf_napi_poll(struct napi_struct *napi, int budget) 1298{ 1299 struct i40e_q_vector *q_vector = 1300 container_of(napi, struct i40e_q_vector, napi); 1301 struct i40e_vsi *vsi = q_vector->vsi; 1302 struct i40e_ring *ring; 1303 bool clean_complete = true; 1304 bool arm_wb = false; 1305 int budget_per_ring; 1306 int cleaned; 1307 1308 if (test_bit(__I40E_DOWN, &vsi->state)) { 1309 napi_complete(napi); 1310 return 0; 1311 } 1312 1313 /* Since the actual Tx work is minimal, we can give the Tx a larger 1314 * budget and be more aggressive about cleaning up the Tx descriptors. 1315 */ 1316 i40e_for_each_ring(ring, q_vector->tx) { 1317 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); 1318 arm_wb |= ring->arm_wb; 1319 } 1320 1321 /* We attempt to distribute budget to each Rx queue fairly, but don't 1322 * allow the budget to go below 1 because that would exit polling early. 1323 */ 1324 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 1325 1326 i40e_for_each_ring(ring, q_vector->rx) { 1327 if (ring_is_ps_enabled(ring)) 1328 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring); 1329 else 1330 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring); 1331 /* if we didn't clean as many as budgeted, we must be done */ 1332 clean_complete &= (budget_per_ring != cleaned); 1333 } 1334 1335 /* If work not completed, return budget and polling will return */ 1336 if (!clean_complete) { 1337 if (arm_wb) 1338 i40e_force_wb(vsi, q_vector); 1339 return budget; 1340 } 1341 1342 /* Work is done so exit the polling mode and re-enable the interrupt */ 1343 napi_complete(napi); 1344 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) || 1345 ITR_IS_DYNAMIC(vsi->tx_itr_setting)) 1346 i40e_update_dynamic_itr(q_vector); 1347 1348 if (!test_bit(__I40E_DOWN, &vsi->state)) 1349 i40evf_irq_enable_queues(vsi->back, 1 << q_vector->v_idx); 1350 1351 return 0; 1352} 1353 1354/** 1355 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 1356 * @skb: send buffer 1357 * @tx_ring: ring to send buffer on 1358 * @flags: the tx flags to be set 1359 * 1360 * Checks the skb and set up correspondingly several generic transmit flags 1361 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 1362 * 1363 * Returns error code indicate the frame should be dropped upon error and the 1364 * otherwise returns 0 to indicate the flags has been set properly. 1365 **/ 1366static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 1367 struct i40e_ring *tx_ring, 1368 u32 *flags) 1369{ 1370 __be16 protocol = skb->protocol; 1371 u32 tx_flags = 0; 1372 1373 if (protocol == htons(ETH_P_8021Q) && 1374 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { 1375 /* When HW VLAN acceleration is turned off by the user the 1376 * stack sets the protocol to 8021q so that the driver 1377 * can take any steps required to support the SW only 1378 * VLAN handling. In our case the driver doesn't need 1379 * to take any further steps so just set the protocol 1380 * to the encapsulated ethertype. 1381 */ 1382 skb->protocol = vlan_get_protocol(skb); 1383 goto out; 1384 } 1385 1386 /* if we have a HW VLAN tag being added, default to the HW one */ 1387 if (skb_vlan_tag_present(skb)) { 1388 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 1389 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1390 /* else if it is a SW VLAN, check the next protocol and store the tag */ 1391 } else if (protocol == htons(ETH_P_8021Q)) { 1392 struct vlan_hdr *vhdr, _vhdr; 1393 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 1394 if (!vhdr) 1395 return -EINVAL; 1396 1397 protocol = vhdr->h_vlan_encapsulated_proto; 1398 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 1399 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 1400 } 1401 1402out: 1403 *flags = tx_flags; 1404 return 0; 1405} 1406 1407/** 1408 * i40e_tso - set up the tso context descriptor 1409 * @tx_ring: ptr to the ring to send 1410 * @skb: ptr to the skb we're sending 1411 * @tx_flags: the collected send information 1412 * @protocol: the send protocol 1413 * @hdr_len: ptr to the size of the packet header 1414 * @cd_tunneling: ptr to context descriptor bits 1415 * 1416 * Returns 0 if no TSO can happen, 1 if tso is going, or error 1417 **/ 1418static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, 1419 u32 tx_flags, __be16 protocol, u8 *hdr_len, 1420 u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) 1421{ 1422 u32 cd_cmd, cd_tso_len, cd_mss; 1423 struct ipv6hdr *ipv6h; 1424 struct tcphdr *tcph; 1425 struct iphdr *iph; 1426 u32 l4len; 1427 int err; 1428 1429 if (!skb_is_gso(skb)) 1430 return 0; 1431 1432 err = skb_cow_head(skb, 0); 1433 if (err < 0) 1434 return err; 1435 1436 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 1437 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 1438 1439 if (iph->version == 4) { 1440 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1441 iph->tot_len = 0; 1442 iph->check = 0; 1443 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1444 0, IPPROTO_TCP, 0); 1445 } else if (ipv6h->version == 6) { 1446 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1447 ipv6h->payload_len = 0; 1448 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 1449 0, IPPROTO_TCP, 0); 1450 } 1451 1452 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); 1453 *hdr_len = (skb->encapsulation 1454 ? (skb_inner_transport_header(skb) - skb->data) 1455 : skb_transport_offset(skb)) + l4len; 1456 1457 /* find the field values */ 1458 cd_cmd = I40E_TX_CTX_DESC_TSO; 1459 cd_tso_len = skb->len - *hdr_len; 1460 cd_mss = skb_shinfo(skb)->gso_size; 1461 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 1462 ((u64)cd_tso_len << 1463 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 1464 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 1465 return 1; 1466} 1467 1468/** 1469 * i40e_tx_enable_csum - Enable Tx checksum offloads 1470 * @skb: send buffer 1471 * @tx_flags: Tx flags currently set 1472 * @td_cmd: Tx descriptor command bits to set 1473 * @td_offset: Tx descriptor header offsets to set 1474 * @cd_tunneling: ptr to context desc bits 1475 **/ 1476static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, 1477 u32 *td_cmd, u32 *td_offset, 1478 struct i40e_ring *tx_ring, 1479 u32 *cd_tunneling) 1480{ 1481 struct ipv6hdr *this_ipv6_hdr; 1482 unsigned int this_tcp_hdrlen; 1483 struct iphdr *this_ip_hdr; 1484 u32 network_hdr_len; 1485 u8 l4_hdr = 0; 1486 u32 l4_tunnel = 0; 1487 1488 if (skb->encapsulation) { 1489 switch (ip_hdr(skb)->protocol) { 1490 case IPPROTO_UDP: 1491 l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; 1492 break; 1493 default: 1494 return; 1495 } 1496 network_hdr_len = skb_inner_network_header_len(skb); 1497 this_ip_hdr = inner_ip_hdr(skb); 1498 this_ipv6_hdr = inner_ipv6_hdr(skb); 1499 this_tcp_hdrlen = inner_tcp_hdrlen(skb); 1500 1501 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1502 1503 if (tx_flags & I40E_TX_FLAGS_TSO) { 1504 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; 1505 ip_hdr(skb)->check = 0; 1506 } else { 1507 *cd_tunneling |= 1508 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1509 } 1510 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1511 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; 1512 if (tx_flags & I40E_TX_FLAGS_TSO) 1513 ip_hdr(skb)->check = 0; 1514 } 1515 1516 /* Now set the ctx descriptor fields */ 1517 *cd_tunneling |= (skb_network_header_len(skb) >> 2) << 1518 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | 1519 l4_tunnel | 1520 ((skb_inner_network_offset(skb) - 1521 skb_transport_offset(skb)) >> 1) << 1522 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 1523 if (this_ip_hdr->version == 6) { 1524 tx_flags &= ~I40E_TX_FLAGS_IPV4; 1525 tx_flags |= I40E_TX_FLAGS_IPV6; 1526 } 1527 1528 1529 } else { 1530 network_hdr_len = skb_network_header_len(skb); 1531 this_ip_hdr = ip_hdr(skb); 1532 this_ipv6_hdr = ipv6_hdr(skb); 1533 this_tcp_hdrlen = tcp_hdrlen(skb); 1534 } 1535 1536 /* Enable IP checksum offloads */ 1537 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1538 l4_hdr = this_ip_hdr->protocol; 1539 /* the stack computes the IP header already, the only time we 1540 * need the hardware to recompute it is in the case of TSO. 1541 */ 1542 if (tx_flags & I40E_TX_FLAGS_TSO) { 1543 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 1544 this_ip_hdr->check = 0; 1545 } else { 1546 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 1547 } 1548 /* Now set the td_offset for IP header length */ 1549 *td_offset = (network_hdr_len >> 2) << 1550 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1551 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1552 l4_hdr = this_ipv6_hdr->nexthdr; 1553 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 1554 /* Now set the td_offset for IP header length */ 1555 *td_offset = (network_hdr_len >> 2) << 1556 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1557 } 1558 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ 1559 *td_offset |= (skb_network_offset(skb) >> 1) << 1560 I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1561 1562 /* Enable L4 checksum offloads */ 1563 switch (l4_hdr) { 1564 case IPPROTO_TCP: 1565 /* enable checksum offloads */ 1566 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 1567 *td_offset |= (this_tcp_hdrlen >> 2) << 1568 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1569 break; 1570 case IPPROTO_SCTP: 1571 /* enable SCTP checksum offload */ 1572 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 1573 *td_offset |= (sizeof(struct sctphdr) >> 2) << 1574 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1575 break; 1576 case IPPROTO_UDP: 1577 /* enable UDP checksum offload */ 1578 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 1579 *td_offset |= (sizeof(struct udphdr) >> 2) << 1580 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1581 break; 1582 default: 1583 break; 1584 } 1585} 1586 1587/** 1588 * i40e_create_tx_ctx Build the Tx context descriptor 1589 * @tx_ring: ring to create the descriptor on 1590 * @cd_type_cmd_tso_mss: Quad Word 1 1591 * @cd_tunneling: Quad Word 0 - bits 0-31 1592 * @cd_l2tag2: Quad Word 0 - bits 32-63 1593 **/ 1594static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 1595 const u64 cd_type_cmd_tso_mss, 1596 const u32 cd_tunneling, const u32 cd_l2tag2) 1597{ 1598 struct i40e_tx_context_desc *context_desc; 1599 int i = tx_ring->next_to_use; 1600 1601 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && 1602 !cd_tunneling && !cd_l2tag2) 1603 return; 1604 1605 /* grab the next descriptor */ 1606 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 1607 1608 i++; 1609 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1610 1611 /* cpu_to_le32 and assign to struct fields */ 1612 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 1613 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 1614 context_desc->rsvd = cpu_to_le16(0); 1615 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 1616} 1617 1618 /** 1619 * i40e_chk_linearize - Check if there are more than 8 fragments per packet 1620 * @skb: send buffer 1621 * @tx_flags: collected send information 1622 * 1623 * Note: Our HW can't scatter-gather more than 8 fragments to build 1624 * a packet on the wire and so we need to figure out the cases where we 1625 * need to linearize the skb. 1626 **/ 1627static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) 1628{ 1629 struct skb_frag_struct *frag; 1630 bool linearize = false; 1631 unsigned int size = 0; 1632 u16 num_frags; 1633 u16 gso_segs; 1634 1635 num_frags = skb_shinfo(skb)->nr_frags; 1636 gso_segs = skb_shinfo(skb)->gso_segs; 1637 1638 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { 1639 u16 j = 0; 1640 1641 if (num_frags < (I40E_MAX_BUFFER_TXD)) 1642 goto linearize_chk_done; 1643 /* try the simple math, if we have too many frags per segment */ 1644 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > 1645 I40E_MAX_BUFFER_TXD) { 1646 linearize = true; 1647 goto linearize_chk_done; 1648 } 1649 frag = &skb_shinfo(skb)->frags[0]; 1650 /* we might still have more fragments per segment */ 1651 do { 1652 size += skb_frag_size(frag); 1653 frag++; j++; 1654 if ((size >= skb_shinfo(skb)->gso_size) && 1655 (j < I40E_MAX_BUFFER_TXD)) { 1656 size = (size % skb_shinfo(skb)->gso_size); 1657 j = (size) ? 1 : 0; 1658 } 1659 if (j == I40E_MAX_BUFFER_TXD) { 1660 linearize = true; 1661 break; 1662 } 1663 num_frags--; 1664 } while (num_frags); 1665 } else { 1666 if (num_frags >= I40E_MAX_BUFFER_TXD) 1667 linearize = true; 1668 } 1669 1670linearize_chk_done: 1671 return linearize; 1672} 1673 1674/** 1675 * i40e_tx_map - Build the Tx descriptor 1676 * @tx_ring: ring to send buffer on 1677 * @skb: send buffer 1678 * @first: first buffer info buffer to use 1679 * @tx_flags: collected send information 1680 * @hdr_len: size of the packet header 1681 * @td_cmd: the command field in the descriptor 1682 * @td_offset: offset for checksum or crc 1683 **/ 1684static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 1685 struct i40e_tx_buffer *first, u32 tx_flags, 1686 const u8 hdr_len, u32 td_cmd, u32 td_offset) 1687{ 1688 unsigned int data_len = skb->data_len; 1689 unsigned int size = skb_headlen(skb); 1690 struct skb_frag_struct *frag; 1691 struct i40e_tx_buffer *tx_bi; 1692 struct i40e_tx_desc *tx_desc; 1693 u16 i = tx_ring->next_to_use; 1694 u32 td_tag = 0; 1695 dma_addr_t dma; 1696 u16 gso_segs; 1697 1698 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 1699 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 1700 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 1701 I40E_TX_FLAGS_VLAN_SHIFT; 1702 } 1703 1704 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 1705 gso_segs = skb_shinfo(skb)->gso_segs; 1706 else 1707 gso_segs = 1; 1708 1709 /* multiply data chunks by size of headers */ 1710 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); 1711 first->gso_segs = gso_segs; 1712 first->skb = skb; 1713 first->tx_flags = tx_flags; 1714 1715 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1716 1717 tx_desc = I40E_TX_DESC(tx_ring, i); 1718 tx_bi = first; 1719 1720 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1721 if (dma_mapping_error(tx_ring->dev, dma)) 1722 goto dma_error; 1723 1724 /* record length, and DMA address */ 1725 dma_unmap_len_set(tx_bi, len, size); 1726 dma_unmap_addr_set(tx_bi, dma, dma); 1727 1728 tx_desc->buffer_addr = cpu_to_le64(dma); 1729 1730 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 1731 tx_desc->cmd_type_offset_bsz = 1732 build_ctob(td_cmd, td_offset, 1733 I40E_MAX_DATA_PER_TXD, td_tag); 1734 1735 tx_desc++; 1736 i++; 1737 if (i == tx_ring->count) { 1738 tx_desc = I40E_TX_DESC(tx_ring, 0); 1739 i = 0; 1740 } 1741 1742 dma += I40E_MAX_DATA_PER_TXD; 1743 size -= I40E_MAX_DATA_PER_TXD; 1744 1745 tx_desc->buffer_addr = cpu_to_le64(dma); 1746 } 1747 1748 if (likely(!data_len)) 1749 break; 1750 1751 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 1752 size, td_tag); 1753 1754 tx_desc++; 1755 i++; 1756 if (i == tx_ring->count) { 1757 tx_desc = I40E_TX_DESC(tx_ring, 0); 1758 i = 0; 1759 } 1760 1761 size = skb_frag_size(frag); 1762 data_len -= size; 1763 1764 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 1765 DMA_TO_DEVICE); 1766 1767 tx_bi = &tx_ring->tx_bi[i]; 1768 } 1769 1770 /* Place RS bit on last descriptor of any packet that spans across the 1771 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. 1772 */ 1773#define WB_STRIDE 0x3 1774 if (((i & WB_STRIDE) != WB_STRIDE) && 1775 (first <= &tx_ring->tx_bi[i]) && 1776 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { 1777 tx_desc->cmd_type_offset_bsz = 1778 build_ctob(td_cmd, td_offset, size, td_tag) | 1779 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << 1780 I40E_TXD_QW1_CMD_SHIFT); 1781 } else { 1782 tx_desc->cmd_type_offset_bsz = 1783 build_ctob(td_cmd, td_offset, size, td_tag) | 1784 cpu_to_le64((u64)I40E_TXD_CMD << 1785 I40E_TXD_QW1_CMD_SHIFT); 1786 } 1787 1788 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 1789 tx_ring->queue_index), 1790 first->bytecount); 1791 1792 /* set the timestamp */ 1793 first->time_stamp = jiffies; 1794 1795 /* Force memory writes to complete before letting h/w 1796 * know there are new descriptors to fetch. (Only 1797 * applicable for weak-ordered memory model archs, 1798 * such as IA-64). 1799 */ 1800 wmb(); 1801 1802 /* set next_to_watch value indicating a packet is present */ 1803 first->next_to_watch = tx_desc; 1804 1805 i++; 1806 if (i == tx_ring->count) 1807 i = 0; 1808 1809 tx_ring->next_to_use = i; 1810 1811 /* notify HW of packet */ 1812 writel(i, tx_ring->tail); 1813 1814 return; 1815 1816dma_error: 1817 dev_info(tx_ring->dev, "TX DMA map failed\n"); 1818 1819 /* clear dma mappings for failed tx_bi map */ 1820 for (;;) { 1821 tx_bi = &tx_ring->tx_bi[i]; 1822 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 1823 if (tx_bi == first) 1824 break; 1825 if (i == 0) 1826 i = tx_ring->count; 1827 i--; 1828 } 1829 1830 tx_ring->next_to_use = i; 1831} 1832 1833/** 1834 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions 1835 * @tx_ring: the ring to be checked 1836 * @size: the size buffer we want to assure is available 1837 * 1838 * Returns -EBUSY if a stop is needed, else 0 1839 **/ 1840static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1841{ 1842 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 1843 /* Memory barrier before checking head and tail */ 1844 smp_mb(); 1845 1846 /* Check again in a case another CPU has just made room available. */ 1847 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 1848 return -EBUSY; 1849 1850 /* A reprieve! - use start_queue because it doesn't call schedule */ 1851 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 1852 ++tx_ring->tx_stats.restart_queue; 1853 return 0; 1854} 1855 1856/** 1857 * i40e_maybe_stop_tx - 1st level check for tx stop conditions 1858 * @tx_ring: the ring to be checked 1859 * @size: the size buffer we want to assure is available 1860 * 1861 * Returns 0 if stop is not needed 1862 **/ 1863static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1864{ 1865 if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) 1866 return 0; 1867 return __i40e_maybe_stop_tx(tx_ring, size); 1868} 1869 1870/** 1871 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed 1872 * @skb: send buffer 1873 * @tx_ring: ring to send buffer on 1874 * 1875 * Returns number of data descriptors needed for this skb. Returns 0 to indicate 1876 * there is not enough descriptors available in this ring since we need at least 1877 * one descriptor. 1878 **/ 1879static int i40e_xmit_descriptor_count(struct sk_buff *skb, 1880 struct i40e_ring *tx_ring) 1881{ 1882 unsigned int f; 1883 int count = 0; 1884 1885 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 1886 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 1887 * + 4 desc gap to avoid the cache line where head is, 1888 * + 1 desc for context descriptor, 1889 * otherwise try next time 1890 */ 1891 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1892 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); 1893 1894 count += TXD_USE_COUNT(skb_headlen(skb)); 1895 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { 1896 tx_ring->tx_stats.tx_busy++; 1897 return 0; 1898 } 1899 return count; 1900} 1901 1902/** 1903 * i40e_xmit_frame_ring - Sends buffer on Tx ring 1904 * @skb: send buffer 1905 * @tx_ring: ring to send buffer on 1906 * 1907 * Returns NETDEV_TX_OK if sent, else an error code 1908 **/ 1909static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 1910 struct i40e_ring *tx_ring) 1911{ 1912 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 1913 u32 cd_tunneling = 0, cd_l2tag2 = 0; 1914 struct i40e_tx_buffer *first; 1915 u32 td_offset = 0; 1916 u32 tx_flags = 0; 1917 __be16 protocol; 1918 u32 td_cmd = 0; 1919 u8 hdr_len = 0; 1920 int tso; 1921 if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) 1922 return NETDEV_TX_BUSY; 1923 1924 /* prepare the xmit flags */ 1925 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 1926 goto out_drop; 1927 1928 /* obtain protocol of skb */ 1929 protocol = vlan_get_protocol(skb); 1930 1931 /* record the location of the first descriptor for this packet */ 1932 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 1933 1934 /* setup IPv4/IPv6 offloads */ 1935 if (protocol == htons(ETH_P_IP)) 1936 tx_flags |= I40E_TX_FLAGS_IPV4; 1937 else if (protocol == htons(ETH_P_IPV6)) 1938 tx_flags |= I40E_TX_FLAGS_IPV6; 1939 1940 tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, 1941 &cd_type_cmd_tso_mss, &cd_tunneling); 1942 1943 if (tso < 0) 1944 goto out_drop; 1945 else if (tso) 1946 tx_flags |= I40E_TX_FLAGS_TSO; 1947 1948 if (i40e_chk_linearize(skb, tx_flags)) 1949 if (skb_linearize(skb)) 1950 goto out_drop; 1951 1952 skb_tx_timestamp(skb); 1953 1954 /* always enable CRC insertion offload */ 1955 td_cmd |= I40E_TX_DESC_CMD_ICRC; 1956 1957 /* Always offload the checksum, since it's in the data descriptor */ 1958 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1959 tx_flags |= I40E_TX_FLAGS_CSUM; 1960 1961 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, 1962 tx_ring, &cd_tunneling); 1963 } 1964 1965 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 1966 cd_tunneling, cd_l2tag2); 1967 1968 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 1969 td_cmd, td_offset); 1970 1971 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); 1972 1973 return NETDEV_TX_OK; 1974 1975out_drop: 1976 dev_kfree_skb_any(skb); 1977 return NETDEV_TX_OK; 1978} 1979 1980/** 1981 * i40evf_xmit_frame - Selects the correct VSI and Tx queue to send buffer 1982 * @skb: send buffer 1983 * @netdev: network interface device structure 1984 * 1985 * Returns NETDEV_TX_OK if sent, else an error code 1986 **/ 1987netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 1988{ 1989 struct i40evf_adapter *adapter = netdev_priv(netdev); 1990 struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping]; 1991 1992 /* hardware can't handle really short frames, hardware padding works 1993 * beyond this point 1994 */ 1995 if (unlikely(skb->len < I40E_MIN_TX_LEN)) { 1996 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) 1997 return NETDEV_TX_OK; 1998 skb->len = I40E_MIN_TX_LEN; 1999 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); 2000 } 2001 2002 return i40e_xmit_frame_ring(skb, tx_ring); 2003} 2004