1/******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver 4 * Copyright(c) 2013 - 2014 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27#include <linux/prefetch.h> 28#include <net/busy_poll.h> 29 30#include "i40evf.h" 31#include "i40e_prototype.h" 32 33static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 34 u32 td_tag) 35{ 36 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 37 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 38 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 39 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 40 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 41} 42 43#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 44 45/** 46 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 47 * @ring: the ring that owns the buffer 48 * @tx_buffer: the buffer to free 49 **/ 50static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 51 struct i40e_tx_buffer *tx_buffer) 52{ 53 if (tx_buffer->skb) { 54 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) 55 kfree(tx_buffer->raw_buf); 56 else 57 dev_kfree_skb_any(tx_buffer->skb); 58 59 if (dma_unmap_len(tx_buffer, len)) 60 dma_unmap_single(ring->dev, 61 dma_unmap_addr(tx_buffer, dma), 62 dma_unmap_len(tx_buffer, len), 63 DMA_TO_DEVICE); 64 } else if (dma_unmap_len(tx_buffer, len)) { 65 dma_unmap_page(ring->dev, 66 dma_unmap_addr(tx_buffer, dma), 67 dma_unmap_len(tx_buffer, len), 68 DMA_TO_DEVICE); 69 } 70 tx_buffer->next_to_watch = NULL; 71 tx_buffer->skb = NULL; 72 dma_unmap_len_set(tx_buffer, len, 0); 73 /* tx_buffer must be completely set up in the transmit path */ 74} 75 76/** 77 * i40evf_clean_tx_ring - Free any empty Tx buffers 78 * @tx_ring: ring to be cleaned 79 **/ 80void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) 81{ 82 unsigned long bi_size; 83 u16 i; 84 85 /* ring already cleared, nothing to do */ 86 if (!tx_ring->tx_bi) 87 return; 88 89 /* Free all the Tx ring sk_buffs */ 90 for (i = 0; i < tx_ring->count; i++) 91 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 92 93 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 94 memset(tx_ring->tx_bi, 0, bi_size); 95 96 /* Zero out the descriptor ring */ 97 memset(tx_ring->desc, 0, tx_ring->size); 98 99 tx_ring->next_to_use = 0; 100 tx_ring->next_to_clean = 0; 101 102 if (!tx_ring->netdev) 103 return; 104 105 /* cleanup Tx queue statistics */ 106 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 107 tx_ring->queue_index)); 108} 109 110/** 111 * i40evf_free_tx_resources - Free Tx resources per queue 112 * @tx_ring: Tx descriptor ring for a specific queue 113 * 114 * Free all transmit software resources 115 **/ 116void i40evf_free_tx_resources(struct i40e_ring *tx_ring) 117{ 118 i40evf_clean_tx_ring(tx_ring); 119 kfree(tx_ring->tx_bi); 120 tx_ring->tx_bi = NULL; 121 122 if (tx_ring->desc) { 123 dma_free_coherent(tx_ring->dev, tx_ring->size, 124 tx_ring->desc, tx_ring->dma); 125 tx_ring->desc = NULL; 126 } 127} 128 129/** 130 * i40e_get_head - Retrieve head from head writeback 131 * @tx_ring: tx ring to fetch head of 132 * 133 * Returns value of Tx ring head based on value stored 134 * in head write-back location 135 **/ 136static inline u32 i40e_get_head(struct i40e_ring *tx_ring) 137{ 138 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; 139 140 return le32_to_cpu(*(volatile __le32 *)head); 141} 142 143#define WB_STRIDE 0x3 144 145/** 146 * i40e_clean_tx_irq - Reclaim resources after transmit completes 147 * @tx_ring: tx ring to clean 148 * @budget: how many cleans we're allowed 149 * 150 * Returns true if there's any budget left (e.g. the clean is finished) 151 **/ 152static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) 153{ 154 u16 i = tx_ring->next_to_clean; 155 struct i40e_tx_buffer *tx_buf; 156 struct i40e_tx_desc *tx_head; 157 struct i40e_tx_desc *tx_desc; 158 unsigned int total_packets = 0; 159 unsigned int total_bytes = 0; 160 161 tx_buf = &tx_ring->tx_bi[i]; 162 tx_desc = I40E_TX_DESC(tx_ring, i); 163 i -= tx_ring->count; 164 165 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); 166 167 do { 168 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 169 170 /* if next_to_watch is not set then there is no work pending */ 171 if (!eop_desc) 172 break; 173 174 /* prevent any other reads prior to eop_desc */ 175 read_barrier_depends(); 176 177 /* we have caught up to head, no work left to do */ 178 if (tx_head == tx_desc) 179 break; 180 181 /* clear next_to_watch to prevent false hangs */ 182 tx_buf->next_to_watch = NULL; 183 184 /* update the statistics for this packet */ 185 total_bytes += tx_buf->bytecount; 186 total_packets += tx_buf->gso_segs; 187 188 /* free the skb */ 189 dev_kfree_skb_any(tx_buf->skb); 190 191 /* unmap skb header data */ 192 dma_unmap_single(tx_ring->dev, 193 dma_unmap_addr(tx_buf, dma), 194 dma_unmap_len(tx_buf, len), 195 DMA_TO_DEVICE); 196 197 /* clear tx_buffer data */ 198 tx_buf->skb = NULL; 199 dma_unmap_len_set(tx_buf, len, 0); 200 201 /* unmap remaining buffers */ 202 while (tx_desc != eop_desc) { 203 204 tx_buf++; 205 tx_desc++; 206 i++; 207 if (unlikely(!i)) { 208 i -= tx_ring->count; 209 tx_buf = tx_ring->tx_bi; 210 tx_desc = I40E_TX_DESC(tx_ring, 0); 211 } 212 213 /* unmap any remaining paged data */ 214 if (dma_unmap_len(tx_buf, len)) { 215 dma_unmap_page(tx_ring->dev, 216 dma_unmap_addr(tx_buf, dma), 217 dma_unmap_len(tx_buf, len), 218 DMA_TO_DEVICE); 219 dma_unmap_len_set(tx_buf, len, 0); 220 } 221 } 222 223 /* move us one more past the eop_desc for start of next pkt */ 224 tx_buf++; 225 tx_desc++; 226 i++; 227 if (unlikely(!i)) { 228 i -= tx_ring->count; 229 tx_buf = tx_ring->tx_bi; 230 tx_desc = I40E_TX_DESC(tx_ring, 0); 231 } 232 233 prefetch(tx_desc); 234 235 /* update budget accounting */ 236 budget--; 237 } while (likely(budget)); 238 239 i += tx_ring->count; 240 tx_ring->next_to_clean = i; 241 u64_stats_update_begin(&tx_ring->syncp); 242 tx_ring->stats.bytes += total_bytes; 243 tx_ring->stats.packets += total_packets; 244 u64_stats_update_end(&tx_ring->syncp); 245 tx_ring->q_vector->tx.total_bytes += total_bytes; 246 tx_ring->q_vector->tx.total_packets += total_packets; 247 248 /* check to see if there are any non-cache aligned descriptors 249 * waiting to be written back, and kick the hardware to force 250 * them to be written back in case of napi polling 251 */ 252 if (budget && 253 !((i & WB_STRIDE) == WB_STRIDE) && 254 !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && 255 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) 256 tx_ring->arm_wb = true; 257 258 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, 259 tx_ring->queue_index), 260 total_packets, total_bytes); 261 262#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 263 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 264 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 265 /* Make sure that anybody stopping the queue after this 266 * sees the new next_to_clean. 267 */ 268 smp_mb(); 269 if (__netif_subqueue_stopped(tx_ring->netdev, 270 tx_ring->queue_index) && 271 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) { 272 netif_wake_subqueue(tx_ring->netdev, 273 tx_ring->queue_index); 274 ++tx_ring->tx_stats.restart_queue; 275 } 276 } 277 278 return !!budget; 279} 280 281/** 282 * i40evf_force_wb -Arm hardware to do a wb on noncache aligned descriptors 283 * @vsi: the VSI we care about 284 * @q_vector: the vector on which to force writeback 285 * 286 **/ 287static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) 288{ 289 u16 flags = q_vector->tx.ring[0].flags; 290 291 if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { 292 u32 val; 293 294 if (q_vector->arm_wb_state) 295 return; 296 297 val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK; 298 299 wr32(&vsi->back->hw, 300 I40E_VFINT_DYN_CTLN1(q_vector->v_idx + 301 vsi->base_vector - 1), 302 val); 303 q_vector->arm_wb_state = true; 304 } else { 305 u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | 306 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ 307 I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | 308 I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK; 309 /* allow 00 to be written to the index */ 310 311 wr32(&vsi->back->hw, 312 I40E_VFINT_DYN_CTLN1(q_vector->v_idx + 313 vsi->base_vector - 1), val); 314 } 315} 316 317/** 318 * i40e_set_new_dynamic_itr - Find new ITR level 319 * @rc: structure containing ring performance data 320 * 321 * Returns true if ITR changed, false if not 322 * 323 * Stores a new ITR value based on packets and byte counts during 324 * the last interrupt. The advantage of per interrupt computation 325 * is faster updates and more accurate ITR for the current traffic 326 * pattern. Constants in this function were computed based on 327 * theoretical maximum wire speed and thresholds were set based on 328 * testing data as well as attempting to minimize response time 329 * while increasing bulk throughput. 330 **/ 331static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 332{ 333 enum i40e_latency_range new_latency_range = rc->latency_range; 334 struct i40e_q_vector *qv = rc->ring->q_vector; 335 u32 new_itr = rc->itr; 336 int bytes_per_int; 337 int usecs; 338 339 if (rc->total_packets == 0 || !rc->itr) 340 return false; 341 342 /* simple throttlerate management 343 * 0-10MB/s lowest (50000 ints/s) 344 * 10-20MB/s low (20000 ints/s) 345 * 20-1249MB/s bulk (18000 ints/s) 346 * > 40000 Rx packets per second (8000 ints/s) 347 * 348 * The math works out because the divisor is in 10^(-6) which 349 * turns the bytes/us input value into MB/s values, but 350 * make sure to use usecs, as the register values written 351 * are in 2 usec increments in the ITR registers, and make sure 352 * to use the smoothed values that the countdown timer gives us. 353 */ 354 usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; 355 bytes_per_int = rc->total_bytes / usecs; 356 357 switch (new_latency_range) { 358 case I40E_LOWEST_LATENCY: 359 if (bytes_per_int > 10) 360 new_latency_range = I40E_LOW_LATENCY; 361 break; 362 case I40E_LOW_LATENCY: 363 if (bytes_per_int > 20) 364 new_latency_range = I40E_BULK_LATENCY; 365 else if (bytes_per_int <= 10) 366 new_latency_range = I40E_LOWEST_LATENCY; 367 break; 368 case I40E_BULK_LATENCY: 369 case I40E_ULTRA_LATENCY: 370 default: 371 if (bytes_per_int <= 20) 372 new_latency_range = I40E_LOW_LATENCY; 373 break; 374 } 375 376 /* this is to adjust RX more aggressively when streaming small 377 * packets. The value of 40000 was picked as it is just beyond 378 * what the hardware can receive per second if in low latency 379 * mode. 380 */ 381#define RX_ULTRA_PACKET_RATE 40000 382 383 if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && 384 (&qv->rx == rc)) 385 new_latency_range = I40E_ULTRA_LATENCY; 386 387 rc->latency_range = new_latency_range; 388 389 switch (new_latency_range) { 390 case I40E_LOWEST_LATENCY: 391 new_itr = I40E_ITR_50K; 392 break; 393 case I40E_LOW_LATENCY: 394 new_itr = I40E_ITR_20K; 395 break; 396 case I40E_BULK_LATENCY: 397 new_itr = I40E_ITR_18K; 398 break; 399 case I40E_ULTRA_LATENCY: 400 new_itr = I40E_ITR_8K; 401 break; 402 default: 403 break; 404 } 405 406 rc->total_bytes = 0; 407 rc->total_packets = 0; 408 409 if (new_itr != rc->itr) { 410 rc->itr = new_itr; 411 return true; 412 } 413 414 return false; 415} 416 417/* 418 * i40evf_setup_tx_descriptors - Allocate the Tx descriptors 419 * @tx_ring: the tx ring to set up 420 * 421 * Return 0 on success, negative on error 422 **/ 423int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) 424{ 425 struct device *dev = tx_ring->dev; 426 int bi_size; 427 428 if (!dev) 429 return -ENOMEM; 430 431 /* warn if we are about to overwrite the pointer */ 432 WARN_ON(tx_ring->tx_bi); 433 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 434 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 435 if (!tx_ring->tx_bi) 436 goto err; 437 438 /* round up to nearest 4K */ 439 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 440 /* add u32 for head writeback, align after this takes care of 441 * guaranteeing this is at least one cache line in size 442 */ 443 tx_ring->size += sizeof(u32); 444 tx_ring->size = ALIGN(tx_ring->size, 4096); 445 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 446 &tx_ring->dma, GFP_KERNEL); 447 if (!tx_ring->desc) { 448 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 449 tx_ring->size); 450 goto err; 451 } 452 453 tx_ring->next_to_use = 0; 454 tx_ring->next_to_clean = 0; 455 return 0; 456 457err: 458 kfree(tx_ring->tx_bi); 459 tx_ring->tx_bi = NULL; 460 return -ENOMEM; 461} 462 463/** 464 * i40evf_clean_rx_ring - Free Rx buffers 465 * @rx_ring: ring to be cleaned 466 **/ 467void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) 468{ 469 struct device *dev = rx_ring->dev; 470 struct i40e_rx_buffer *rx_bi; 471 unsigned long bi_size; 472 u16 i; 473 474 /* ring already cleared, nothing to do */ 475 if (!rx_ring->rx_bi) 476 return; 477 478 if (ring_is_ps_enabled(rx_ring)) { 479 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count; 480 481 rx_bi = &rx_ring->rx_bi[0]; 482 if (rx_bi->hdr_buf) { 483 dma_free_coherent(dev, 484 bufsz, 485 rx_bi->hdr_buf, 486 rx_bi->dma); 487 for (i = 0; i < rx_ring->count; i++) { 488 rx_bi = &rx_ring->rx_bi[i]; 489 rx_bi->dma = 0; 490 rx_bi->hdr_buf = NULL; 491 } 492 } 493 } 494 /* Free all the Rx ring sk_buffs */ 495 for (i = 0; i < rx_ring->count; i++) { 496 rx_bi = &rx_ring->rx_bi[i]; 497 if (rx_bi->dma) { 498 dma_unmap_single(dev, 499 rx_bi->dma, 500 rx_ring->rx_buf_len, 501 DMA_FROM_DEVICE); 502 rx_bi->dma = 0; 503 } 504 if (rx_bi->skb) { 505 dev_kfree_skb(rx_bi->skb); 506 rx_bi->skb = NULL; 507 } 508 if (rx_bi->page) { 509 if (rx_bi->page_dma) { 510 dma_unmap_page(dev, 511 rx_bi->page_dma, 512 PAGE_SIZE / 2, 513 DMA_FROM_DEVICE); 514 rx_bi->page_dma = 0; 515 } 516 __free_page(rx_bi->page); 517 rx_bi->page = NULL; 518 rx_bi->page_offset = 0; 519 } 520 } 521 522 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 523 memset(rx_ring->rx_bi, 0, bi_size); 524 525 /* Zero out the descriptor ring */ 526 memset(rx_ring->desc, 0, rx_ring->size); 527 528 rx_ring->next_to_clean = 0; 529 rx_ring->next_to_use = 0; 530} 531 532/** 533 * i40evf_free_rx_resources - Free Rx resources 534 * @rx_ring: ring to clean the resources from 535 * 536 * Free all receive software resources 537 **/ 538void i40evf_free_rx_resources(struct i40e_ring *rx_ring) 539{ 540 i40evf_clean_rx_ring(rx_ring); 541 kfree(rx_ring->rx_bi); 542 rx_ring->rx_bi = NULL; 543 544 if (rx_ring->desc) { 545 dma_free_coherent(rx_ring->dev, rx_ring->size, 546 rx_ring->desc, rx_ring->dma); 547 rx_ring->desc = NULL; 548 } 549} 550 551/** 552 * i40evf_alloc_rx_headers - allocate rx header buffers 553 * @rx_ring: ring to alloc buffers 554 * 555 * Allocate rx header buffers for the entire ring. As these are static, 556 * this is only called when setting up a new ring. 557 **/ 558void i40evf_alloc_rx_headers(struct i40e_ring *rx_ring) 559{ 560 struct device *dev = rx_ring->dev; 561 struct i40e_rx_buffer *rx_bi; 562 dma_addr_t dma; 563 void *buffer; 564 int buf_size; 565 int i; 566 567 if (rx_ring->rx_bi[0].hdr_buf) 568 return; 569 /* Make sure the buffers don't cross cache line boundaries. */ 570 buf_size = ALIGN(rx_ring->rx_hdr_len, 256); 571 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count, 572 &dma, GFP_KERNEL); 573 if (!buffer) 574 return; 575 for (i = 0; i < rx_ring->count; i++) { 576 rx_bi = &rx_ring->rx_bi[i]; 577 rx_bi->dma = dma + (i * buf_size); 578 rx_bi->hdr_buf = buffer + (i * buf_size); 579 } 580} 581 582/** 583 * i40evf_setup_rx_descriptors - Allocate Rx descriptors 584 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 585 * 586 * Returns 0 on success, negative on failure 587 **/ 588int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring) 589{ 590 struct device *dev = rx_ring->dev; 591 int bi_size; 592 593 /* warn if we are about to overwrite the pointer */ 594 WARN_ON(rx_ring->rx_bi); 595 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 596 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 597 if (!rx_ring->rx_bi) 598 goto err; 599 600 u64_stats_init(&rx_ring->syncp); 601 602 /* Round up to nearest 4K */ 603 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring) 604 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc) 605 : rx_ring->count * sizeof(union i40e_32byte_rx_desc); 606 rx_ring->size = ALIGN(rx_ring->size, 4096); 607 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 608 &rx_ring->dma, GFP_KERNEL); 609 610 if (!rx_ring->desc) { 611 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 612 rx_ring->size); 613 goto err; 614 } 615 616 rx_ring->next_to_clean = 0; 617 rx_ring->next_to_use = 0; 618 619 return 0; 620err: 621 kfree(rx_ring->rx_bi); 622 rx_ring->rx_bi = NULL; 623 return -ENOMEM; 624} 625 626/** 627 * i40e_release_rx_desc - Store the new tail and head values 628 * @rx_ring: ring to bump 629 * @val: new head index 630 **/ 631static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 632{ 633 rx_ring->next_to_use = val; 634 /* Force memory writes to complete before letting h/w 635 * know there are new descriptors to fetch. (Only 636 * applicable for weak-ordered memory model archs, 637 * such as IA-64). 638 */ 639 wmb(); 640 writel(val, rx_ring->tail); 641} 642 643/** 644 * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split 645 * @rx_ring: ring to place buffers on 646 * @cleaned_count: number of buffers to replace 647 **/ 648void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) 649{ 650 u16 i = rx_ring->next_to_use; 651 union i40e_rx_desc *rx_desc; 652 struct i40e_rx_buffer *bi; 653 654 /* do nothing if no valid netdev defined */ 655 if (!rx_ring->netdev || !cleaned_count) 656 return; 657 658 while (cleaned_count--) { 659 rx_desc = I40E_RX_DESC(rx_ring, i); 660 bi = &rx_ring->rx_bi[i]; 661 662 if (bi->skb) /* desc is in use */ 663 goto no_buffers; 664 if (!bi->page) { 665 bi->page = alloc_page(GFP_ATOMIC); 666 if (!bi->page) { 667 rx_ring->rx_stats.alloc_page_failed++; 668 goto no_buffers; 669 } 670 } 671 672 if (!bi->page_dma) { 673 /* use a half page if we're re-using */ 674 bi->page_offset ^= PAGE_SIZE / 2; 675 bi->page_dma = dma_map_page(rx_ring->dev, 676 bi->page, 677 bi->page_offset, 678 PAGE_SIZE / 2, 679 DMA_FROM_DEVICE); 680 if (dma_mapping_error(rx_ring->dev, 681 bi->page_dma)) { 682 rx_ring->rx_stats.alloc_page_failed++; 683 bi->page_dma = 0; 684 goto no_buffers; 685 } 686 } 687 688 dma_sync_single_range_for_device(rx_ring->dev, 689 bi->dma, 690 0, 691 rx_ring->rx_hdr_len, 692 DMA_FROM_DEVICE); 693 /* Refresh the desc even if buffer_addrs didn't change 694 * because each write-back erases this info. 695 */ 696 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 697 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 698 i++; 699 if (i == rx_ring->count) 700 i = 0; 701 } 702 703no_buffers: 704 if (rx_ring->next_to_use != i) 705 i40e_release_rx_desc(rx_ring, i); 706} 707 708/** 709 * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer 710 * @rx_ring: ring to place buffers on 711 * @cleaned_count: number of buffers to replace 712 **/ 713void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) 714{ 715 u16 i = rx_ring->next_to_use; 716 union i40e_rx_desc *rx_desc; 717 struct i40e_rx_buffer *bi; 718 struct sk_buff *skb; 719 720 /* do nothing if no valid netdev defined */ 721 if (!rx_ring->netdev || !cleaned_count) 722 return; 723 724 while (cleaned_count--) { 725 rx_desc = I40E_RX_DESC(rx_ring, i); 726 bi = &rx_ring->rx_bi[i]; 727 skb = bi->skb; 728 729 if (!skb) { 730 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 731 rx_ring->rx_buf_len); 732 if (!skb) { 733 rx_ring->rx_stats.alloc_buff_failed++; 734 goto no_buffers; 735 } 736 /* initialize queue mapping */ 737 skb_record_rx_queue(skb, rx_ring->queue_index); 738 bi->skb = skb; 739 } 740 741 if (!bi->dma) { 742 bi->dma = dma_map_single(rx_ring->dev, 743 skb->data, 744 rx_ring->rx_buf_len, 745 DMA_FROM_DEVICE); 746 if (dma_mapping_error(rx_ring->dev, bi->dma)) { 747 rx_ring->rx_stats.alloc_buff_failed++; 748 bi->dma = 0; 749 goto no_buffers; 750 } 751 } 752 753 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); 754 rx_desc->read.hdr_addr = 0; 755 i++; 756 if (i == rx_ring->count) 757 i = 0; 758 } 759 760no_buffers: 761 if (rx_ring->next_to_use != i) 762 i40e_release_rx_desc(rx_ring, i); 763} 764 765/** 766 * i40e_receive_skb - Send a completed packet up the stack 767 * @rx_ring: rx ring in play 768 * @skb: packet to send up 769 * @vlan_tag: vlan tag for packet 770 **/ 771static void i40e_receive_skb(struct i40e_ring *rx_ring, 772 struct sk_buff *skb, u16 vlan_tag) 773{ 774 struct i40e_q_vector *q_vector = rx_ring->q_vector; 775 776 if (vlan_tag & VLAN_VID_MASK) 777 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 778 779 napi_gro_receive(&q_vector->napi, skb); 780} 781 782/** 783 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 784 * @vsi: the VSI we care about 785 * @skb: skb currently being received and modified 786 * @rx_status: status value of last descriptor in packet 787 * @rx_error: error value of last descriptor in packet 788 * @rx_ptype: ptype value of last descriptor in packet 789 **/ 790static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 791 struct sk_buff *skb, 792 u32 rx_status, 793 u32 rx_error, 794 u16 rx_ptype) 795{ 796 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); 797 bool ipv4 = false, ipv6 = false; 798 bool ipv4_tunnel, ipv6_tunnel; 799 __wsum rx_udp_csum; 800 struct iphdr *iph; 801 __sum16 csum; 802 803 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && 804 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); 805 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && 806 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); 807 808 skb->ip_summed = CHECKSUM_NONE; 809 810 /* Rx csum enabled and ip headers found? */ 811 if (!(vsi->netdev->features & NETIF_F_RXCSUM)) 812 return; 813 814 /* did the hardware decode the packet and checksum? */ 815 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 816 return; 817 818 /* both known and outer_ip must be set for the below code to work */ 819 if (!(decoded.known && decoded.outer_ip)) 820 return; 821 822 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 823 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) 824 ipv4 = true; 825 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 826 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 827 ipv6 = true; 828 829 if (ipv4 && 830 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | 831 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) 832 goto checksum_fail; 833 834 /* likely incorrect csum if alternate IP extension headers found */ 835 if (ipv6 && 836 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 837 /* don't increment checksum err here, non-fatal err */ 838 return; 839 840 /* there was some L4 error, count error and punt packet to the stack */ 841 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) 842 goto checksum_fail; 843 844 /* handle packets that were not able to be checksummed due 845 * to arrival speed, in this case the stack can compute 846 * the csum. 847 */ 848 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) 849 return; 850 851 /* If VXLAN traffic has an outer UDPv4 checksum we need to check 852 * it in the driver, hardware does not do it for us. 853 * Since L3L4P bit was set we assume a valid IHL value (>=5) 854 * so the total length of IPv4 header is IHL*4 bytes 855 * The UDP_0 bit *may* bet set if the *inner* header is UDP 856 */ 857 if (ipv4_tunnel) { 858 skb->transport_header = skb->mac_header + 859 sizeof(struct ethhdr) + 860 (ip_hdr(skb)->ihl * 4); 861 862 /* Add 4 bytes for VLAN tagged packets */ 863 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || 864 skb->protocol == htons(ETH_P_8021AD)) 865 ? VLAN_HLEN : 0; 866 867 if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && 868 (udp_hdr(skb)->check != 0)) { 869 rx_udp_csum = udp_csum(skb); 870 iph = ip_hdr(skb); 871 csum = csum_tcpudp_magic(iph->saddr, iph->daddr, 872 (skb->len - 873 skb_transport_offset(skb)), 874 IPPROTO_UDP, rx_udp_csum); 875 876 if (udp_hdr(skb)->check != csum) 877 goto checksum_fail; 878 879 } /* else its GRE and so no outer UDP header */ 880 } 881 882 skb->ip_summed = CHECKSUM_UNNECESSARY; 883 skb->csum_level = ipv4_tunnel || ipv6_tunnel; 884 885 return; 886 887checksum_fail: 888 vsi->back->hw_csum_rx_error++; 889} 890 891/** 892 * i40e_rx_hash - returns the hash value from the Rx descriptor 893 * @ring: descriptor ring 894 * @rx_desc: specific descriptor 895 **/ 896static inline u32 i40e_rx_hash(struct i40e_ring *ring, 897 union i40e_rx_desc *rx_desc) 898{ 899 const __le64 rss_mask = 900 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 901 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 902 903 if ((ring->netdev->features & NETIF_F_RXHASH) && 904 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) 905 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 906 else 907 return 0; 908} 909 910/** 911 * i40e_ptype_to_hash - get a hash type 912 * @ptype: the ptype value from the descriptor 913 * 914 * Returns a hash type to be used by skb_set_hash 915 **/ 916static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) 917{ 918 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); 919 920 if (!decoded.known) 921 return PKT_HASH_TYPE_NONE; 922 923 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 924 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) 925 return PKT_HASH_TYPE_L4; 926 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 927 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) 928 return PKT_HASH_TYPE_L3; 929 else 930 return PKT_HASH_TYPE_L2; 931} 932 933/** 934 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split 935 * @rx_ring: rx ring to clean 936 * @budget: how many cleans we're allowed 937 * 938 * Returns true if there's any budget left (e.g. the clean is finished) 939 **/ 940static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) 941{ 942 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 943 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; 944 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 945 const int current_node = numa_mem_id(); 946 struct i40e_vsi *vsi = rx_ring->vsi; 947 u16 i = rx_ring->next_to_clean; 948 union i40e_rx_desc *rx_desc; 949 u32 rx_error, rx_status; 950 u8 rx_ptype; 951 u64 qword; 952 953 do { 954 struct i40e_rx_buffer *rx_bi; 955 struct sk_buff *skb; 956 u16 vlan_tag; 957 /* return some buffers to hardware, one at a time is too slow */ 958 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 959 i40evf_alloc_rx_buffers_ps(rx_ring, cleaned_count); 960 cleaned_count = 0; 961 } 962 963 i = rx_ring->next_to_clean; 964 rx_desc = I40E_RX_DESC(rx_ring, i); 965 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 966 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 967 I40E_RXD_QW1_STATUS_SHIFT; 968 969 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) 970 break; 971 972 /* This memory barrier is needed to keep us from reading 973 * any other fields out of the rx_desc until we know the 974 * DD bit is set. 975 */ 976 dma_rmb(); 977 rx_bi = &rx_ring->rx_bi[i]; 978 skb = rx_bi->skb; 979 if (likely(!skb)) { 980 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 981 rx_ring->rx_hdr_len); 982 if (!skb) { 983 rx_ring->rx_stats.alloc_buff_failed++; 984 break; 985 } 986 987 /* initialize queue mapping */ 988 skb_record_rx_queue(skb, rx_ring->queue_index); 989 /* we are reusing so sync this buffer for CPU use */ 990 dma_sync_single_range_for_cpu(rx_ring->dev, 991 rx_bi->dma, 992 0, 993 rx_ring->rx_hdr_len, 994 DMA_FROM_DEVICE); 995 } 996 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 997 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 998 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> 999 I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1000 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> 1001 I40E_RXD_QW1_LENGTH_SPH_SHIFT; 1002 1003 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1004 I40E_RXD_QW1_ERROR_SHIFT; 1005 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT); 1006 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT); 1007 1008 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1009 I40E_RXD_QW1_PTYPE_SHIFT; 1010 prefetch(rx_bi->page); 1011 rx_bi->skb = NULL; 1012 cleaned_count++; 1013 if (rx_hbo || rx_sph) { 1014 int len; 1015 1016 if (rx_hbo) 1017 len = I40E_RX_HDR_SIZE; 1018 else 1019 len = rx_header_len; 1020 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); 1021 } else if (skb->len == 0) { 1022 int len; 1023 1024 len = (rx_packet_len > skb_headlen(skb) ? 1025 skb_headlen(skb) : rx_packet_len); 1026 memcpy(__skb_put(skb, len), 1027 rx_bi->page + rx_bi->page_offset, 1028 len); 1029 rx_bi->page_offset += len; 1030 rx_packet_len -= len; 1031 } 1032 1033 /* Get the rest of the data if this was a header split */ 1034 if (rx_packet_len) { 1035 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 1036 rx_bi->page, 1037 rx_bi->page_offset, 1038 rx_packet_len); 1039 1040 skb->len += rx_packet_len; 1041 skb->data_len += rx_packet_len; 1042 skb->truesize += rx_packet_len; 1043 1044 if ((page_count(rx_bi->page) == 1) && 1045 (page_to_nid(rx_bi->page) == current_node)) 1046 get_page(rx_bi->page); 1047 else 1048 rx_bi->page = NULL; 1049 1050 dma_unmap_page(rx_ring->dev, 1051 rx_bi->page_dma, 1052 PAGE_SIZE / 2, 1053 DMA_FROM_DEVICE); 1054 rx_bi->page_dma = 0; 1055 } 1056 I40E_RX_INCREMENT(rx_ring, i); 1057 1058 if (unlikely( 1059 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 1060 struct i40e_rx_buffer *next_buffer; 1061 1062 next_buffer = &rx_ring->rx_bi[i]; 1063 next_buffer->skb = skb; 1064 rx_ring->rx_stats.non_eop_descs++; 1065 continue; 1066 } 1067 1068 /* ERR_MASK will only have valid bits if EOP set */ 1069 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1070 dev_kfree_skb_any(skb); 1071 continue; 1072 } 1073 1074 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), 1075 i40e_ptype_to_hash(rx_ptype)); 1076 /* probably a little skewed due to removing CRC */ 1077 total_rx_bytes += skb->len; 1078 total_rx_packets++; 1079 1080 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1081 1082 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1083 1084 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1085 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1086 : 0; 1087#ifdef I40E_FCOE 1088 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) { 1089 dev_kfree_skb_any(skb); 1090 continue; 1091 } 1092#endif 1093 skb_mark_napi_id(skb, &rx_ring->q_vector->napi); 1094 i40e_receive_skb(rx_ring, skb, vlan_tag); 1095 1096 rx_desc->wb.qword1.status_error_len = 0; 1097 1098 } while (likely(total_rx_packets < budget)); 1099 1100 u64_stats_update_begin(&rx_ring->syncp); 1101 rx_ring->stats.packets += total_rx_packets; 1102 rx_ring->stats.bytes += total_rx_bytes; 1103 u64_stats_update_end(&rx_ring->syncp); 1104 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1105 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1106 1107 return total_rx_packets; 1108} 1109 1110/** 1111 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer 1112 * @rx_ring: rx ring to clean 1113 * @budget: how many cleans we're allowed 1114 * 1115 * Returns number of packets cleaned 1116 **/ 1117static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) 1118{ 1119 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 1120 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 1121 struct i40e_vsi *vsi = rx_ring->vsi; 1122 union i40e_rx_desc *rx_desc; 1123 u32 rx_error, rx_status; 1124 u16 rx_packet_len; 1125 u8 rx_ptype; 1126 u64 qword; 1127 u16 i; 1128 1129 do { 1130 struct i40e_rx_buffer *rx_bi; 1131 struct sk_buff *skb; 1132 u16 vlan_tag; 1133 /* return some buffers to hardware, one at a time is too slow */ 1134 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1135 i40evf_alloc_rx_buffers_1buf(rx_ring, cleaned_count); 1136 cleaned_count = 0; 1137 } 1138 1139 i = rx_ring->next_to_clean; 1140 rx_desc = I40E_RX_DESC(rx_ring, i); 1141 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1142 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1143 I40E_RXD_QW1_STATUS_SHIFT; 1144 1145 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) 1146 break; 1147 1148 /* This memory barrier is needed to keep us from reading 1149 * any other fields out of the rx_desc until we know the 1150 * DD bit is set. 1151 */ 1152 dma_rmb(); 1153 1154 rx_bi = &rx_ring->rx_bi[i]; 1155 skb = rx_bi->skb; 1156 prefetch(skb->data); 1157 1158 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1159 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1160 1161 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1162 I40E_RXD_QW1_ERROR_SHIFT; 1163 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT); 1164 1165 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1166 I40E_RXD_QW1_PTYPE_SHIFT; 1167 rx_bi->skb = NULL; 1168 cleaned_count++; 1169 1170 /* Get the header and possibly the whole packet 1171 * If this is an skb from previous receive dma will be 0 1172 */ 1173 skb_put(skb, rx_packet_len); 1174 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len, 1175 DMA_FROM_DEVICE); 1176 rx_bi->dma = 0; 1177 1178 I40E_RX_INCREMENT(rx_ring, i); 1179 1180 if (unlikely( 1181 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 1182 rx_ring->rx_stats.non_eop_descs++; 1183 continue; 1184 } 1185 1186 /* ERR_MASK will only have valid bits if EOP set */ 1187 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1188 dev_kfree_skb_any(skb); 1189 continue; 1190 } 1191 1192 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), 1193 i40e_ptype_to_hash(rx_ptype)); 1194 /* probably a little skewed due to removing CRC */ 1195 total_rx_bytes += skb->len; 1196 total_rx_packets++; 1197 1198 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1199 1200 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1201 1202 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1203 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1204 : 0; 1205 i40e_receive_skb(rx_ring, skb, vlan_tag); 1206 1207 rx_desc->wb.qword1.status_error_len = 0; 1208 } while (likely(total_rx_packets < budget)); 1209 1210 u64_stats_update_begin(&rx_ring->syncp); 1211 rx_ring->stats.packets += total_rx_packets; 1212 rx_ring->stats.bytes += total_rx_bytes; 1213 u64_stats_update_end(&rx_ring->syncp); 1214 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1215 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1216 1217 return total_rx_packets; 1218} 1219 1220static u32 i40e_buildreg_itr(const int type, const u16 itr) 1221{ 1222 u32 val; 1223 1224 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | 1225 I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | 1226 (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | 1227 (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); 1228 1229 return val; 1230} 1231 1232/* a small macro to shorten up some long lines */ 1233#define INTREG I40E_VFINT_DYN_CTLN1 1234 1235/** 1236 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt 1237 * @vsi: the VSI we care about 1238 * @q_vector: q_vector for which itr is being updated and interrupt enabled 1239 * 1240 **/ 1241static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, 1242 struct i40e_q_vector *q_vector) 1243{ 1244 struct i40e_hw *hw = &vsi->back->hw; 1245 bool rx = false, tx = false; 1246 u32 rxval, txval; 1247 int vector; 1248 1249 vector = (q_vector->v_idx + vsi->base_vector); 1250 1251 /* avoid dynamic calculation if in countdown mode OR if 1252 * all dynamic is disabled 1253 */ 1254 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); 1255 1256 if (q_vector->itr_countdown > 0 || 1257 (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && 1258 !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { 1259 goto enable_int; 1260 } 1261 1262 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { 1263 rx = i40e_set_new_dynamic_itr(&q_vector->rx); 1264 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); 1265 } 1266 if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { 1267 tx = i40e_set_new_dynamic_itr(&q_vector->tx); 1268 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); 1269 } 1270 if (rx || tx) { 1271 /* get the higher of the two ITR adjustments and 1272 * use the same value for both ITR registers 1273 * when in adaptive mode (Rx and/or Tx) 1274 */ 1275 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); 1276 1277 q_vector->tx.itr = q_vector->rx.itr = itr; 1278 txval = i40e_buildreg_itr(I40E_TX_ITR, itr); 1279 tx = true; 1280 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); 1281 rx = true; 1282 } 1283 1284 /* only need to enable the interrupt once, but need 1285 * to possibly update both ITR values 1286 */ 1287 if (rx) { 1288 /* set the INTENA_MSK_MASK so that this first write 1289 * won't actually enable the interrupt, instead just 1290 * updating the ITR (it's bit 31 PF and VF) 1291 */ 1292 rxval |= BIT(31); 1293 /* don't check _DOWN because interrupt isn't being enabled */ 1294 wr32(hw, INTREG(vector - 1), rxval); 1295 } 1296 1297enable_int: 1298 if (!test_bit(__I40E_DOWN, &vsi->state)) 1299 wr32(hw, INTREG(vector - 1), txval); 1300 1301 if (q_vector->itr_countdown) 1302 q_vector->itr_countdown--; 1303 else 1304 q_vector->itr_countdown = ITR_COUNTDOWN_START; 1305 1306} 1307 1308/** 1309 * i40evf_napi_poll - NAPI polling Rx/Tx cleanup routine 1310 * @napi: napi struct with our devices info in it 1311 * @budget: amount of work driver is allowed to do this pass, in packets 1312 * 1313 * This function will clean all queues associated with a q_vector. 1314 * 1315 * Returns the amount of work done 1316 **/ 1317int i40evf_napi_poll(struct napi_struct *napi, int budget) 1318{ 1319 struct i40e_q_vector *q_vector = 1320 container_of(napi, struct i40e_q_vector, napi); 1321 struct i40e_vsi *vsi = q_vector->vsi; 1322 struct i40e_ring *ring; 1323 bool clean_complete = true; 1324 bool arm_wb = false; 1325 int budget_per_ring; 1326 int work_done = 0; 1327 1328 if (test_bit(__I40E_DOWN, &vsi->state)) { 1329 napi_complete(napi); 1330 return 0; 1331 } 1332 1333 /* Since the actual Tx work is minimal, we can give the Tx a larger 1334 * budget and be more aggressive about cleaning up the Tx descriptors. 1335 */ 1336 i40e_for_each_ring(ring, q_vector->tx) { 1337 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); 1338 arm_wb |= ring->arm_wb; 1339 ring->arm_wb = false; 1340 } 1341 1342 /* Handle case where we are called by netpoll with a budget of 0 */ 1343 if (budget <= 0) 1344 goto tx_only; 1345 1346 /* We attempt to distribute budget to each Rx queue fairly, but don't 1347 * allow the budget to go below 1 because that would exit polling early. 1348 */ 1349 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 1350 1351 i40e_for_each_ring(ring, q_vector->rx) { 1352 int cleaned; 1353 1354 if (ring_is_ps_enabled(ring)) 1355 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring); 1356 else 1357 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring); 1358 1359 work_done += cleaned; 1360 /* if we didn't clean as many as budgeted, we must be done */ 1361 clean_complete &= (budget_per_ring != cleaned); 1362 } 1363 1364 /* If work not completed, return budget and polling will return */ 1365 if (!clean_complete) { 1366tx_only: 1367 if (arm_wb) 1368 i40evf_force_wb(vsi, q_vector); 1369 return budget; 1370 } 1371 1372 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) 1373 q_vector->arm_wb_state = false; 1374 1375 /* Work is done so exit the polling mode and re-enable the interrupt */ 1376 napi_complete_done(napi, work_done); 1377 i40e_update_enable_itr(vsi, q_vector); 1378 return 0; 1379} 1380 1381/** 1382 * i40evf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 1383 * @skb: send buffer 1384 * @tx_ring: ring to send buffer on 1385 * @flags: the tx flags to be set 1386 * 1387 * Checks the skb and set up correspondingly several generic transmit flags 1388 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 1389 * 1390 * Returns error code indicate the frame should be dropped upon error and the 1391 * otherwise returns 0 to indicate the flags has been set properly. 1392 **/ 1393static inline int i40evf_tx_prepare_vlan_flags(struct sk_buff *skb, 1394 struct i40e_ring *tx_ring, 1395 u32 *flags) 1396{ 1397 __be16 protocol = skb->protocol; 1398 u32 tx_flags = 0; 1399 1400 if (protocol == htons(ETH_P_8021Q) && 1401 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { 1402 /* When HW VLAN acceleration is turned off by the user the 1403 * stack sets the protocol to 8021q so that the driver 1404 * can take any steps required to support the SW only 1405 * VLAN handling. In our case the driver doesn't need 1406 * to take any further steps so just set the protocol 1407 * to the encapsulated ethertype. 1408 */ 1409 skb->protocol = vlan_get_protocol(skb); 1410 goto out; 1411 } 1412 1413 /* if we have a HW VLAN tag being added, default to the HW one */ 1414 if (skb_vlan_tag_present(skb)) { 1415 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 1416 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1417 /* else if it is a SW VLAN, check the next protocol and store the tag */ 1418 } else if (protocol == htons(ETH_P_8021Q)) { 1419 struct vlan_hdr *vhdr, _vhdr; 1420 1421 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 1422 if (!vhdr) 1423 return -EINVAL; 1424 1425 protocol = vhdr->h_vlan_encapsulated_proto; 1426 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 1427 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 1428 } 1429 1430out: 1431 *flags = tx_flags; 1432 return 0; 1433} 1434 1435/** 1436 * i40e_tso - set up the tso context descriptor 1437 * @tx_ring: ptr to the ring to send 1438 * @skb: ptr to the skb we're sending 1439 * @hdr_len: ptr to the size of the packet header 1440 * @cd_tunneling: ptr to context descriptor bits 1441 * 1442 * Returns 0 if no TSO can happen, 1 if tso is going, or error 1443 **/ 1444static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, 1445 u8 *hdr_len, u64 *cd_type_cmd_tso_mss, 1446 u32 *cd_tunneling) 1447{ 1448 u32 cd_cmd, cd_tso_len, cd_mss; 1449 struct ipv6hdr *ipv6h; 1450 struct tcphdr *tcph; 1451 struct iphdr *iph; 1452 u32 l4len; 1453 int err; 1454 1455 if (!skb_is_gso(skb)) 1456 return 0; 1457 1458 err = skb_cow_head(skb, 0); 1459 if (err < 0) 1460 return err; 1461 1462 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 1463 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 1464 1465 if (iph->version == 4) { 1466 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1467 iph->tot_len = 0; 1468 iph->check = 0; 1469 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1470 0, IPPROTO_TCP, 0); 1471 } else if (ipv6h->version == 6) { 1472 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1473 ipv6h->payload_len = 0; 1474 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 1475 0, IPPROTO_TCP, 0); 1476 } 1477 1478 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); 1479 *hdr_len = (skb->encapsulation 1480 ? (skb_inner_transport_header(skb) - skb->data) 1481 : skb_transport_offset(skb)) + l4len; 1482 1483 /* find the field values */ 1484 cd_cmd = I40E_TX_CTX_DESC_TSO; 1485 cd_tso_len = skb->len - *hdr_len; 1486 cd_mss = skb_shinfo(skb)->gso_size; 1487 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 1488 ((u64)cd_tso_len << 1489 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 1490 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 1491 return 1; 1492} 1493 1494/** 1495 * i40e_tx_enable_csum - Enable Tx checksum offloads 1496 * @skb: send buffer 1497 * @tx_flags: pointer to Tx flags currently set 1498 * @td_cmd: Tx descriptor command bits to set 1499 * @td_offset: Tx descriptor header offsets to set 1500 * @cd_tunneling: ptr to context desc bits 1501 **/ 1502static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, 1503 u32 *td_cmd, u32 *td_offset, 1504 struct i40e_ring *tx_ring, 1505 u32 *cd_tunneling) 1506{ 1507 struct ipv6hdr *this_ipv6_hdr; 1508 unsigned int this_tcp_hdrlen; 1509 struct iphdr *this_ip_hdr; 1510 u32 network_hdr_len; 1511 u8 l4_hdr = 0; 1512 struct udphdr *oudph; 1513 struct iphdr *oiph; 1514 u32 l4_tunnel = 0; 1515 1516 if (skb->encapsulation) { 1517 switch (ip_hdr(skb)->protocol) { 1518 case IPPROTO_UDP: 1519 oudph = udp_hdr(skb); 1520 oiph = ip_hdr(skb); 1521 l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; 1522 *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; 1523 break; 1524 default: 1525 return; 1526 } 1527 network_hdr_len = skb_inner_network_header_len(skb); 1528 this_ip_hdr = inner_ip_hdr(skb); 1529 this_ipv6_hdr = inner_ipv6_hdr(skb); 1530 this_tcp_hdrlen = inner_tcp_hdrlen(skb); 1531 1532 if (*tx_flags & I40E_TX_FLAGS_IPV4) { 1533 if (*tx_flags & I40E_TX_FLAGS_TSO) { 1534 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; 1535 ip_hdr(skb)->check = 0; 1536 } else { 1537 *cd_tunneling |= 1538 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1539 } 1540 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { 1541 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; 1542 if (*tx_flags & I40E_TX_FLAGS_TSO) 1543 ip_hdr(skb)->check = 0; 1544 } 1545 1546 /* Now set the ctx descriptor fields */ 1547 *cd_tunneling |= (skb_network_header_len(skb) >> 2) << 1548 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | 1549 l4_tunnel | 1550 ((skb_inner_network_offset(skb) - 1551 skb_transport_offset(skb)) >> 1) << 1552 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 1553 if (this_ip_hdr->version == 6) { 1554 *tx_flags &= ~I40E_TX_FLAGS_IPV4; 1555 *tx_flags |= I40E_TX_FLAGS_IPV6; 1556 } 1557 1558 1559 if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && 1560 (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && 1561 (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { 1562 oudph->check = ~csum_tcpudp_magic(oiph->saddr, 1563 oiph->daddr, 1564 (skb->len - skb_transport_offset(skb)), 1565 IPPROTO_UDP, 0); 1566 *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; 1567 } 1568 } else { 1569 network_hdr_len = skb_network_header_len(skb); 1570 this_ip_hdr = ip_hdr(skb); 1571 this_ipv6_hdr = ipv6_hdr(skb); 1572 this_tcp_hdrlen = tcp_hdrlen(skb); 1573 } 1574 1575 /* Enable IP checksum offloads */ 1576 if (*tx_flags & I40E_TX_FLAGS_IPV4) { 1577 l4_hdr = this_ip_hdr->protocol; 1578 /* the stack computes the IP header already, the only time we 1579 * need the hardware to recompute it is in the case of TSO. 1580 */ 1581 if (*tx_flags & I40E_TX_FLAGS_TSO) { 1582 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 1583 this_ip_hdr->check = 0; 1584 } else { 1585 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 1586 } 1587 /* Now set the td_offset for IP header length */ 1588 *td_offset = (network_hdr_len >> 2) << 1589 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1590 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { 1591 l4_hdr = this_ipv6_hdr->nexthdr; 1592 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 1593 /* Now set the td_offset for IP header length */ 1594 *td_offset = (network_hdr_len >> 2) << 1595 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1596 } 1597 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ 1598 *td_offset |= (skb_network_offset(skb) >> 1) << 1599 I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1600 1601 /* Enable L4 checksum offloads */ 1602 switch (l4_hdr) { 1603 case IPPROTO_TCP: 1604 /* enable checksum offloads */ 1605 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 1606 *td_offset |= (this_tcp_hdrlen >> 2) << 1607 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1608 break; 1609 case IPPROTO_SCTP: 1610 /* enable SCTP checksum offload */ 1611 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 1612 *td_offset |= (sizeof(struct sctphdr) >> 2) << 1613 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1614 break; 1615 case IPPROTO_UDP: 1616 /* enable UDP checksum offload */ 1617 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 1618 *td_offset |= (sizeof(struct udphdr) >> 2) << 1619 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1620 break; 1621 default: 1622 break; 1623 } 1624} 1625 1626/** 1627 * i40e_create_tx_ctx Build the Tx context descriptor 1628 * @tx_ring: ring to create the descriptor on 1629 * @cd_type_cmd_tso_mss: Quad Word 1 1630 * @cd_tunneling: Quad Word 0 - bits 0-31 1631 * @cd_l2tag2: Quad Word 0 - bits 32-63 1632 **/ 1633static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 1634 const u64 cd_type_cmd_tso_mss, 1635 const u32 cd_tunneling, const u32 cd_l2tag2) 1636{ 1637 struct i40e_tx_context_desc *context_desc; 1638 int i = tx_ring->next_to_use; 1639 1640 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && 1641 !cd_tunneling && !cd_l2tag2) 1642 return; 1643 1644 /* grab the next descriptor */ 1645 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 1646 1647 i++; 1648 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1649 1650 /* cpu_to_le32 and assign to struct fields */ 1651 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 1652 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 1653 context_desc->rsvd = cpu_to_le16(0); 1654 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 1655} 1656 1657 /** 1658 * i40e_chk_linearize - Check if there are more than 8 fragments per packet 1659 * @skb: send buffer 1660 * @tx_flags: collected send information 1661 * 1662 * Note: Our HW can't scatter-gather more than 8 fragments to build 1663 * a packet on the wire and so we need to figure out the cases where we 1664 * need to linearize the skb. 1665 **/ 1666static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) 1667{ 1668 struct skb_frag_struct *frag; 1669 bool linearize = false; 1670 unsigned int size = 0; 1671 u16 num_frags; 1672 u16 gso_segs; 1673 1674 num_frags = skb_shinfo(skb)->nr_frags; 1675 gso_segs = skb_shinfo(skb)->gso_segs; 1676 1677 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { 1678 u16 j = 0; 1679 1680 if (num_frags < (I40E_MAX_BUFFER_TXD)) 1681 goto linearize_chk_done; 1682 /* try the simple math, if we have too many frags per segment */ 1683 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > 1684 I40E_MAX_BUFFER_TXD) { 1685 linearize = true; 1686 goto linearize_chk_done; 1687 } 1688 frag = &skb_shinfo(skb)->frags[0]; 1689 /* we might still have more fragments per segment */ 1690 do { 1691 size += skb_frag_size(frag); 1692 frag++; j++; 1693 if ((size >= skb_shinfo(skb)->gso_size) && 1694 (j < I40E_MAX_BUFFER_TXD)) { 1695 size = (size % skb_shinfo(skb)->gso_size); 1696 j = (size) ? 1 : 0; 1697 } 1698 if (j == I40E_MAX_BUFFER_TXD) { 1699 linearize = true; 1700 break; 1701 } 1702 num_frags--; 1703 } while (num_frags); 1704 } else { 1705 if (num_frags >= I40E_MAX_BUFFER_TXD) 1706 linearize = true; 1707 } 1708 1709linearize_chk_done: 1710 return linearize; 1711} 1712 1713/** 1714 * __i40evf_maybe_stop_tx - 2nd level check for tx stop conditions 1715 * @tx_ring: the ring to be checked 1716 * @size: the size buffer we want to assure is available 1717 * 1718 * Returns -EBUSY if a stop is needed, else 0 1719 **/ 1720static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1721{ 1722 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 1723 /* Memory barrier before checking head and tail */ 1724 smp_mb(); 1725 1726 /* Check again in a case another CPU has just made room available. */ 1727 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 1728 return -EBUSY; 1729 1730 /* A reprieve! - use start_queue because it doesn't call schedule */ 1731 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 1732 ++tx_ring->tx_stats.restart_queue; 1733 return 0; 1734} 1735 1736/** 1737 * i40evf_maybe_stop_tx - 1st level check for tx stop conditions 1738 * @tx_ring: the ring to be checked 1739 * @size: the size buffer we want to assure is available 1740 * 1741 * Returns 0 if stop is not needed 1742 **/ 1743static inline int i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1744{ 1745 if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) 1746 return 0; 1747 return __i40evf_maybe_stop_tx(tx_ring, size); 1748} 1749 1750/** 1751 * i40evf_tx_map - Build the Tx descriptor 1752 * @tx_ring: ring to send buffer on 1753 * @skb: send buffer 1754 * @first: first buffer info buffer to use 1755 * @tx_flags: collected send information 1756 * @hdr_len: size of the packet header 1757 * @td_cmd: the command field in the descriptor 1758 * @td_offset: offset for checksum or crc 1759 **/ 1760static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 1761 struct i40e_tx_buffer *first, u32 tx_flags, 1762 const u8 hdr_len, u32 td_cmd, u32 td_offset) 1763{ 1764 unsigned int data_len = skb->data_len; 1765 unsigned int size = skb_headlen(skb); 1766 struct skb_frag_struct *frag; 1767 struct i40e_tx_buffer *tx_bi; 1768 struct i40e_tx_desc *tx_desc; 1769 u16 i = tx_ring->next_to_use; 1770 u32 td_tag = 0; 1771 dma_addr_t dma; 1772 u16 gso_segs; 1773 1774 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 1775 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 1776 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 1777 I40E_TX_FLAGS_VLAN_SHIFT; 1778 } 1779 1780 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 1781 gso_segs = skb_shinfo(skb)->gso_segs; 1782 else 1783 gso_segs = 1; 1784 1785 /* multiply data chunks by size of headers */ 1786 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); 1787 first->gso_segs = gso_segs; 1788 first->skb = skb; 1789 first->tx_flags = tx_flags; 1790 1791 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1792 1793 tx_desc = I40E_TX_DESC(tx_ring, i); 1794 tx_bi = first; 1795 1796 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1797 if (dma_mapping_error(tx_ring->dev, dma)) 1798 goto dma_error; 1799 1800 /* record length, and DMA address */ 1801 dma_unmap_len_set(tx_bi, len, size); 1802 dma_unmap_addr_set(tx_bi, dma, dma); 1803 1804 tx_desc->buffer_addr = cpu_to_le64(dma); 1805 1806 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 1807 tx_desc->cmd_type_offset_bsz = 1808 build_ctob(td_cmd, td_offset, 1809 I40E_MAX_DATA_PER_TXD, td_tag); 1810 1811 tx_desc++; 1812 i++; 1813 if (i == tx_ring->count) { 1814 tx_desc = I40E_TX_DESC(tx_ring, 0); 1815 i = 0; 1816 } 1817 1818 dma += I40E_MAX_DATA_PER_TXD; 1819 size -= I40E_MAX_DATA_PER_TXD; 1820 1821 tx_desc->buffer_addr = cpu_to_le64(dma); 1822 } 1823 1824 if (likely(!data_len)) 1825 break; 1826 1827 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 1828 size, td_tag); 1829 1830 tx_desc++; 1831 i++; 1832 if (i == tx_ring->count) { 1833 tx_desc = I40E_TX_DESC(tx_ring, 0); 1834 i = 0; 1835 } 1836 1837 size = skb_frag_size(frag); 1838 data_len -= size; 1839 1840 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 1841 DMA_TO_DEVICE); 1842 1843 tx_bi = &tx_ring->tx_bi[i]; 1844 } 1845 1846 /* Place RS bit on last descriptor of any packet that spans across the 1847 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. 1848 */ 1849#define WB_STRIDE 0x3 1850 if (((i & WB_STRIDE) != WB_STRIDE) && 1851 (first <= &tx_ring->tx_bi[i]) && 1852 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { 1853 tx_desc->cmd_type_offset_bsz = 1854 build_ctob(td_cmd, td_offset, size, td_tag) | 1855 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << 1856 I40E_TXD_QW1_CMD_SHIFT); 1857 } else { 1858 tx_desc->cmd_type_offset_bsz = 1859 build_ctob(td_cmd, td_offset, size, td_tag) | 1860 cpu_to_le64((u64)I40E_TXD_CMD << 1861 I40E_TXD_QW1_CMD_SHIFT); 1862 } 1863 1864 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 1865 tx_ring->queue_index), 1866 first->bytecount); 1867 1868 /* Force memory writes to complete before letting h/w 1869 * know there are new descriptors to fetch. (Only 1870 * applicable for weak-ordered memory model archs, 1871 * such as IA-64). 1872 */ 1873 wmb(); 1874 1875 /* set next_to_watch value indicating a packet is present */ 1876 first->next_to_watch = tx_desc; 1877 1878 i++; 1879 if (i == tx_ring->count) 1880 i = 0; 1881 1882 tx_ring->next_to_use = i; 1883 1884 i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); 1885 /* notify HW of packet */ 1886 if (!skb->xmit_more || 1887 netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, 1888 tx_ring->queue_index))) 1889 writel(i, tx_ring->tail); 1890 else 1891 prefetchw(tx_desc + 1); 1892 1893 return; 1894 1895dma_error: 1896 dev_info(tx_ring->dev, "TX DMA map failed\n"); 1897 1898 /* clear dma mappings for failed tx_bi map */ 1899 for (;;) { 1900 tx_bi = &tx_ring->tx_bi[i]; 1901 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 1902 if (tx_bi == first) 1903 break; 1904 if (i == 0) 1905 i = tx_ring->count; 1906 i--; 1907 } 1908 1909 tx_ring->next_to_use = i; 1910} 1911 1912/** 1913 * i40evf_xmit_descriptor_count - calculate number of tx descriptors needed 1914 * @skb: send buffer 1915 * @tx_ring: ring to send buffer on 1916 * 1917 * Returns number of data descriptors needed for this skb. Returns 0 to indicate 1918 * there is not enough descriptors available in this ring since we need at least 1919 * one descriptor. 1920 **/ 1921static inline int i40evf_xmit_descriptor_count(struct sk_buff *skb, 1922 struct i40e_ring *tx_ring) 1923{ 1924 unsigned int f; 1925 int count = 0; 1926 1927 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 1928 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 1929 * + 4 desc gap to avoid the cache line where head is, 1930 * + 1 desc for context descriptor, 1931 * otherwise try next time 1932 */ 1933 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1934 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); 1935 1936 count += TXD_USE_COUNT(skb_headlen(skb)); 1937 if (i40evf_maybe_stop_tx(tx_ring, count + 4 + 1)) { 1938 tx_ring->tx_stats.tx_busy++; 1939 return 0; 1940 } 1941 return count; 1942} 1943 1944/** 1945 * i40e_xmit_frame_ring - Sends buffer on Tx ring 1946 * @skb: send buffer 1947 * @tx_ring: ring to send buffer on 1948 * 1949 * Returns NETDEV_TX_OK if sent, else an error code 1950 **/ 1951static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 1952 struct i40e_ring *tx_ring) 1953{ 1954 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 1955 u32 cd_tunneling = 0, cd_l2tag2 = 0; 1956 struct i40e_tx_buffer *first; 1957 u32 td_offset = 0; 1958 u32 tx_flags = 0; 1959 __be16 protocol; 1960 u32 td_cmd = 0; 1961 u8 hdr_len = 0; 1962 int tso; 1963 1964 if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) 1965 return NETDEV_TX_BUSY; 1966 1967 /* prepare the xmit flags */ 1968 if (i40evf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 1969 goto out_drop; 1970 1971 /* obtain protocol of skb */ 1972 protocol = vlan_get_protocol(skb); 1973 1974 /* record the location of the first descriptor for this packet */ 1975 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 1976 1977 /* setup IPv4/IPv6 offloads */ 1978 if (protocol == htons(ETH_P_IP)) 1979 tx_flags |= I40E_TX_FLAGS_IPV4; 1980 else if (protocol == htons(ETH_P_IPV6)) 1981 tx_flags |= I40E_TX_FLAGS_IPV6; 1982 1983 tso = i40e_tso(tx_ring, skb, &hdr_len, 1984 &cd_type_cmd_tso_mss, &cd_tunneling); 1985 1986 if (tso < 0) 1987 goto out_drop; 1988 else if (tso) 1989 tx_flags |= I40E_TX_FLAGS_TSO; 1990 1991 if (i40e_chk_linearize(skb, tx_flags)) { 1992 if (skb_linearize(skb)) 1993 goto out_drop; 1994 tx_ring->tx_stats.tx_linearize++; 1995 } 1996 skb_tx_timestamp(skb); 1997 1998 /* always enable CRC insertion offload */ 1999 td_cmd |= I40E_TX_DESC_CMD_ICRC; 2000 2001 /* Always offload the checksum, since it's in the data descriptor */ 2002 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2003 tx_flags |= I40E_TX_FLAGS_CSUM; 2004 2005 i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, 2006 tx_ring, &cd_tunneling); 2007 } 2008 2009 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 2010 cd_tunneling, cd_l2tag2); 2011 2012 i40evf_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 2013 td_cmd, td_offset); 2014 2015 return NETDEV_TX_OK; 2016 2017out_drop: 2018 dev_kfree_skb_any(skb); 2019 return NETDEV_TX_OK; 2020} 2021 2022/** 2023 * i40evf_xmit_frame - Selects the correct VSI and Tx queue to send buffer 2024 * @skb: send buffer 2025 * @netdev: network interface device structure 2026 * 2027 * Returns NETDEV_TX_OK if sent, else an error code 2028 **/ 2029netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 2030{ 2031 struct i40evf_adapter *adapter = netdev_priv(netdev); 2032 struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping]; 2033 2034 /* hardware can't handle really short frames, hardware padding works 2035 * beyond this point 2036 */ 2037 if (unlikely(skb->len < I40E_MIN_TX_LEN)) { 2038 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) 2039 return NETDEV_TX_OK; 2040 skb->len = I40E_MIN_TX_LEN; 2041 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); 2042 } 2043 2044 return i40e_xmit_frame_ring(skb, tx_ring); 2045} 2046