root/tools/testing/selftests/net/msg_zerocopy.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. gettimeofday_ms
  2. get_ip_csum
  3. do_setcpu
  4. do_setsockopt
  5. do_poll
  6. do_accept
  7. add_zcopy_cookie
  8. do_sendmsg
  9. do_sendmsg_corked
  10. setup_iph
  11. setup_ip6h
  12. setup_sockaddr
  13. do_setup_tx
  14. do_process_zerocopy_cookies
  15. do_recvmsg_completion
  16. do_recv_completion
  17. do_recv_completions
  18. do_recv_remaining_completions
  19. do_tx
  20. do_setup_rx
  21. do_flush_tcp
  22. do_flush_datagram
  23. do_rx
  24. do_test
  25. usage
  26. parse_opts
  27. main

   1 /* Evaluate MSG_ZEROCOPY
   2  *
   3  * Send traffic between two processes over one of the supported
   4  * protocols and modes:
   5  *
   6  * PF_INET/PF_INET6
   7  * - SOCK_STREAM
   8  * - SOCK_DGRAM
   9  * - SOCK_DGRAM with UDP_CORK
  10  * - SOCK_RAW
  11  * - SOCK_RAW with IP_HDRINCL
  12  *
  13  * PF_PACKET
  14  * - SOCK_DGRAM
  15  * - SOCK_RAW
  16  *
  17  * PF_RDS
  18  * - SOCK_SEQPACKET
  19  *
  20  * Start this program on two connected hosts, one in send mode and
  21  * the other with option '-r' to put it in receiver mode.
  22  *
  23  * If zerocopy mode ('-z') is enabled, the sender will verify that
  24  * the kernel queues completions on the error queue for all zerocopy
  25  * transfers.
  26  */
  27 
  28 #define _GNU_SOURCE
  29 
  30 #include <arpa/inet.h>
  31 #include <error.h>
  32 #include <errno.h>
  33 #include <limits.h>
  34 #include <linux/errqueue.h>
  35 #include <linux/if_packet.h>
  36 #include <linux/ipv6.h>
  37 #include <linux/socket.h>
  38 #include <linux/sockios.h>
  39 #include <net/ethernet.h>
  40 #include <net/if.h>
  41 #include <netinet/ip.h>
  42 #include <netinet/ip6.h>
  43 #include <netinet/tcp.h>
  44 #include <netinet/udp.h>
  45 #include <poll.h>
  46 #include <sched.h>
  47 #include <stdbool.h>
  48 #include <stdio.h>
  49 #include <stdint.h>
  50 #include <stdlib.h>
  51 #include <string.h>
  52 #include <sys/ioctl.h>
  53 #include <sys/socket.h>
  54 #include <sys/stat.h>
  55 #include <sys/time.h>
  56 #include <sys/types.h>
  57 #include <sys/wait.h>
  58 #include <unistd.h>
  59 #include <linux/rds.h>
  60 
  61 #ifndef SO_EE_ORIGIN_ZEROCOPY
  62 #define SO_EE_ORIGIN_ZEROCOPY           5
  63 #endif
  64 
  65 #ifndef SO_ZEROCOPY
  66 #define SO_ZEROCOPY     60
  67 #endif
  68 
  69 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
  70 #define SO_EE_CODE_ZEROCOPY_COPIED      1
  71 #endif
  72 
  73 #ifndef MSG_ZEROCOPY
  74 #define MSG_ZEROCOPY    0x4000000
  75 #endif
  76 
  77 static int  cfg_cork;
  78 static bool cfg_cork_mixed;
  79 static int  cfg_cpu             = -1;           /* default: pin to last cpu */
  80 static int  cfg_family          = PF_UNSPEC;
  81 static int  cfg_ifindex         = 1;
  82 static int  cfg_payload_len;
  83 static int  cfg_port            = 8000;
  84 static bool cfg_rx;
  85 static int  cfg_runtime_ms      = 4200;
  86 static int  cfg_verbose;
  87 static int  cfg_waittime_ms     = 500;
  88 static bool cfg_zerocopy;
  89 
  90 static socklen_t cfg_alen;
  91 static struct sockaddr_storage cfg_dst_addr;
  92 static struct sockaddr_storage cfg_src_addr;
  93 
  94 static char payload[IP_MAXPACKET];
  95 static long packets, bytes, completions, expected_completions;
  96 static int  zerocopied = -1;
  97 static uint32_t next_completion;
  98 
  99 static unsigned long gettimeofday_ms(void)
 100 {
 101         struct timeval tv;
 102 
 103         gettimeofday(&tv, NULL);
 104         return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
 105 }
 106 
 107 static uint16_t get_ip_csum(const uint16_t *start, int num_words)
 108 {
 109         unsigned long sum = 0;
 110         int i;
 111 
 112         for (i = 0; i < num_words; i++)
 113                 sum += start[i];
 114 
 115         while (sum >> 16)
 116                 sum = (sum & 0xFFFF) + (sum >> 16);
 117 
 118         return ~sum;
 119 }
 120 
 121 static int do_setcpu(int cpu)
 122 {
 123         cpu_set_t mask;
 124 
 125         CPU_ZERO(&mask);
 126         CPU_SET(cpu, &mask);
 127         if (sched_setaffinity(0, sizeof(mask), &mask))
 128                 error(1, 0, "setaffinity %d", cpu);
 129 
 130         if (cfg_verbose)
 131                 fprintf(stderr, "cpu: %u\n", cpu);
 132 
 133         return 0;
 134 }
 135 
 136 static void do_setsockopt(int fd, int level, int optname, int val)
 137 {
 138         if (setsockopt(fd, level, optname, &val, sizeof(val)))
 139                 error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
 140 }
 141 
 142 static int do_poll(int fd, int events)
 143 {
 144         struct pollfd pfd;
 145         int ret;
 146 
 147         pfd.events = events;
 148         pfd.revents = 0;
 149         pfd.fd = fd;
 150 
 151         ret = poll(&pfd, 1, cfg_waittime_ms);
 152         if (ret == -1)
 153                 error(1, errno, "poll");
 154 
 155         return ret && (pfd.revents & events);
 156 }
 157 
 158 static int do_accept(int fd)
 159 {
 160         int fda = fd;
 161 
 162         fd = accept(fda, NULL, NULL);
 163         if (fd == -1)
 164                 error(1, errno, "accept");
 165         if (close(fda))
 166                 error(1, errno, "close listen sock");
 167 
 168         return fd;
 169 }
 170 
 171 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
 172 {
 173         struct cmsghdr *cm;
 174 
 175         if (!msg->msg_control)
 176                 error(1, errno, "NULL cookie");
 177         cm = (void *)msg->msg_control;
 178         cm->cmsg_len = CMSG_LEN(sizeof(cookie));
 179         cm->cmsg_level = SOL_RDS;
 180         cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
 181         memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
 182 }
 183 
 184 static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 185 {
 186         int ret, len, i, flags;
 187         static uint32_t cookie;
 188         char ckbuf[CMSG_SPACE(sizeof(cookie))];
 189 
 190         len = 0;
 191         for (i = 0; i < msg->msg_iovlen; i++)
 192                 len += msg->msg_iov[i].iov_len;
 193 
 194         flags = MSG_DONTWAIT;
 195         if (do_zerocopy) {
 196                 flags |= MSG_ZEROCOPY;
 197                 if (domain == PF_RDS) {
 198                         memset(&msg->msg_control, 0, sizeof(msg->msg_control));
 199                         msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
 200                         msg->msg_control = (struct cmsghdr *)ckbuf;
 201                         add_zcopy_cookie(msg, ++cookie);
 202                 }
 203         }
 204 
 205         ret = sendmsg(fd, msg, flags);
 206         if (ret == -1 && errno == EAGAIN)
 207                 return false;
 208         if (ret == -1)
 209                 error(1, errno, "send");
 210         if (cfg_verbose && ret != len)
 211                 fprintf(stderr, "send: ret=%u != %u\n", ret, len);
 212 
 213         if (len) {
 214                 packets++;
 215                 bytes += ret;
 216                 if (do_zerocopy && ret)
 217                         expected_completions++;
 218         }
 219         if (do_zerocopy && domain == PF_RDS) {
 220                 msg->msg_control = NULL;
 221                 msg->msg_controllen = 0;
 222         }
 223 
 224         return true;
 225 }
 226 
 227 static void do_sendmsg_corked(int fd, struct msghdr *msg)
 228 {
 229         bool do_zerocopy = cfg_zerocopy;
 230         int i, payload_len, extra_len;
 231 
 232         /* split up the packet. for non-multiple, make first buffer longer */
 233         payload_len = cfg_payload_len / cfg_cork;
 234         extra_len = cfg_payload_len - (cfg_cork * payload_len);
 235 
 236         do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
 237 
 238         for (i = 0; i < cfg_cork; i++) {
 239 
 240                 /* in mixed-frags mode, alternate zerocopy and copy frags
 241                  * start with non-zerocopy, to ensure attach later works
 242                  */
 243                 if (cfg_cork_mixed)
 244                         do_zerocopy = (i & 1);
 245 
 246                 msg->msg_iov[0].iov_len = payload_len + extra_len;
 247                 extra_len = 0;
 248 
 249                 do_sendmsg(fd, msg, do_zerocopy,
 250                            (cfg_dst_addr.ss_family == AF_INET ?
 251                             PF_INET : PF_INET6));
 252         }
 253 
 254         do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
 255 }
 256 
 257 static int setup_iph(struct iphdr *iph, uint16_t payload_len)
 258 {
 259         struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
 260         struct sockaddr_in *saddr = (void *) &cfg_src_addr;
 261 
 262         memset(iph, 0, sizeof(*iph));
 263 
 264         iph->version    = 4;
 265         iph->tos        = 0;
 266         iph->ihl        = 5;
 267         iph->ttl        = 2;
 268         iph->saddr      = saddr->sin_addr.s_addr;
 269         iph->daddr      = daddr->sin_addr.s_addr;
 270         iph->protocol   = IPPROTO_EGP;
 271         iph->tot_len    = htons(sizeof(*iph) + payload_len);
 272         iph->check      = get_ip_csum((void *) iph, iph->ihl << 1);
 273 
 274         return sizeof(*iph);
 275 }
 276 
 277 static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
 278 {
 279         struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
 280         struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
 281 
 282         memset(ip6h, 0, sizeof(*ip6h));
 283 
 284         ip6h->version           = 6;
 285         ip6h->payload_len       = htons(payload_len);
 286         ip6h->nexthdr           = IPPROTO_EGP;
 287         ip6h->hop_limit         = 2;
 288         ip6h->saddr             = saddr->sin6_addr;
 289         ip6h->daddr             = daddr->sin6_addr;
 290 
 291         return sizeof(*ip6h);
 292 }
 293 
 294 
 295 static void setup_sockaddr(int domain, const char *str_addr,
 296                            struct sockaddr_storage *sockaddr)
 297 {
 298         struct sockaddr_in6 *addr6 = (void *) sockaddr;
 299         struct sockaddr_in *addr4 = (void *) sockaddr;
 300 
 301         switch (domain) {
 302         case PF_INET:
 303                 memset(addr4, 0, sizeof(*addr4));
 304                 addr4->sin_family = AF_INET;
 305                 addr4->sin_port = htons(cfg_port);
 306                 if (str_addr &&
 307                     inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
 308                         error(1, 0, "ipv4 parse error: %s", str_addr);
 309                 break;
 310         case PF_INET6:
 311                 memset(addr6, 0, sizeof(*addr6));
 312                 addr6->sin6_family = AF_INET6;
 313                 addr6->sin6_port = htons(cfg_port);
 314                 if (str_addr &&
 315                     inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
 316                         error(1, 0, "ipv6 parse error: %s", str_addr);
 317                 break;
 318         default:
 319                 error(1, 0, "illegal domain");
 320         }
 321 }
 322 
 323 static int do_setup_tx(int domain, int type, int protocol)
 324 {
 325         int fd;
 326 
 327         fd = socket(domain, type, protocol);
 328         if (fd == -1)
 329                 error(1, errno, "socket t");
 330 
 331         do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
 332         if (cfg_zerocopy)
 333                 do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
 334 
 335         if (domain != PF_PACKET && domain != PF_RDS)
 336                 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
 337                         error(1, errno, "connect");
 338 
 339         if (domain == PF_RDS) {
 340                 if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
 341                         error(1, errno, "bind");
 342         }
 343 
 344         return fd;
 345 }
 346 
 347 static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
 348 {
 349         int i;
 350 
 351         if (ck->num > RDS_MAX_ZCOOKIES)
 352                 error(1, 0, "Returned %d cookies, max expected %d\n",
 353                       ck->num, RDS_MAX_ZCOOKIES);
 354         for (i = 0; i < ck->num; i++)
 355                 if (cfg_verbose >= 2)
 356                         fprintf(stderr, "%d\n", ck->cookies[i]);
 357         return ck->num;
 358 }
 359 
 360 static bool do_recvmsg_completion(int fd)
 361 {
 362         char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
 363         struct rds_zcopy_cookies *ck;
 364         struct cmsghdr *cmsg;
 365         struct msghdr msg;
 366         bool ret = false;
 367 
 368         memset(&msg, 0, sizeof(msg));
 369         msg.msg_control = cmsgbuf;
 370         msg.msg_controllen = sizeof(cmsgbuf);
 371 
 372         if (recvmsg(fd, &msg, MSG_DONTWAIT))
 373                 return ret;
 374 
 375         if (msg.msg_flags & MSG_CTRUNC)
 376                 error(1, errno, "recvmsg notification: truncated");
 377 
 378         for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
 379                 if (cmsg->cmsg_level == SOL_RDS &&
 380                     cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
 381 
 382                         ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
 383                         completions += do_process_zerocopy_cookies(ck);
 384                         ret = true;
 385                         break;
 386                 }
 387                 error(0, 0, "ignoring cmsg at level %d type %d\n",
 388                             cmsg->cmsg_level, cmsg->cmsg_type);
 389         }
 390         return ret;
 391 }
 392 
 393 static bool do_recv_completion(int fd, int domain)
 394 {
 395         struct sock_extended_err *serr;
 396         struct msghdr msg = {};
 397         struct cmsghdr *cm;
 398         uint32_t hi, lo, range;
 399         int ret, zerocopy;
 400         char control[100];
 401 
 402         if (domain == PF_RDS)
 403                 return do_recvmsg_completion(fd);
 404 
 405         msg.msg_control = control;
 406         msg.msg_controllen = sizeof(control);
 407 
 408         ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
 409         if (ret == -1 && errno == EAGAIN)
 410                 return false;
 411         if (ret == -1)
 412                 error(1, errno, "recvmsg notification");
 413         if (msg.msg_flags & MSG_CTRUNC)
 414                 error(1, errno, "recvmsg notification: truncated");
 415 
 416         cm = CMSG_FIRSTHDR(&msg);
 417         if (!cm)
 418                 error(1, 0, "cmsg: no cmsg");
 419         if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
 420               (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
 421               (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
 422                 error(1, 0, "serr: wrong type: %d.%d",
 423                       cm->cmsg_level, cm->cmsg_type);
 424 
 425         serr = (void *) CMSG_DATA(cm);
 426 
 427         if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
 428                 error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
 429         if (serr->ee_errno != 0)
 430                 error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
 431 
 432         hi = serr->ee_data;
 433         lo = serr->ee_info;
 434         range = hi - lo + 1;
 435 
 436         /* Detect notification gaps. These should not happen often, if at all.
 437          * Gaps can occur due to drops, reordering and retransmissions.
 438          */
 439         if (lo != next_completion)
 440                 fprintf(stderr, "gap: %u..%u does not append to %u\n",
 441                         lo, hi, next_completion);
 442         next_completion = hi + 1;
 443 
 444         zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
 445         if (zerocopied == -1)
 446                 zerocopied = zerocopy;
 447         else if (zerocopied != zerocopy) {
 448                 fprintf(stderr, "serr: inconsistent\n");
 449                 zerocopied = zerocopy;
 450         }
 451 
 452         if (cfg_verbose >= 2)
 453                 fprintf(stderr, "completed: %u (h=%u l=%u)\n",
 454                         range, hi, lo);
 455 
 456         completions += range;
 457         return true;
 458 }
 459 
 460 /* Read all outstanding messages on the errqueue */
 461 static void do_recv_completions(int fd, int domain)
 462 {
 463         while (do_recv_completion(fd, domain)) {}
 464 }
 465 
 466 /* Wait for all remaining completions on the errqueue */
 467 static void do_recv_remaining_completions(int fd, int domain)
 468 {
 469         int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 470 
 471         while (completions < expected_completions &&
 472                gettimeofday_ms() < tstop) {
 473                 if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
 474                         do_recv_completions(fd, domain);
 475         }
 476 
 477         if (completions < expected_completions)
 478                 fprintf(stderr, "missing notifications: %lu < %lu\n",
 479                         completions, expected_completions);
 480 }
 481 
 482 static void do_tx(int domain, int type, int protocol)
 483 {
 484         struct iovec iov[3] = { {0} };
 485         struct sockaddr_ll laddr;
 486         struct msghdr msg = {0};
 487         struct ethhdr eth;
 488         union {
 489                 struct ipv6hdr ip6h;
 490                 struct iphdr iph;
 491         } nh;
 492         uint64_t tstop;
 493         int fd;
 494 
 495         fd = do_setup_tx(domain, type, protocol);
 496 
 497         if (domain == PF_PACKET) {
 498                 uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
 499 
 500                 /* sock_raw passes ll header as data */
 501                 if (type == SOCK_RAW) {
 502                         memset(eth.h_dest, 0x06, ETH_ALEN);
 503                         memset(eth.h_source, 0x02, ETH_ALEN);
 504                         eth.h_proto = htons(proto);
 505                         iov[0].iov_base = &eth;
 506                         iov[0].iov_len = sizeof(eth);
 507                         msg.msg_iovlen++;
 508                 }
 509 
 510                 /* both sock_raw and sock_dgram expect name */
 511                 memset(&laddr, 0, sizeof(laddr));
 512                 laddr.sll_family        = AF_PACKET;
 513                 laddr.sll_ifindex       = cfg_ifindex;
 514                 laddr.sll_protocol      = htons(proto);
 515                 laddr.sll_halen         = ETH_ALEN;
 516 
 517                 memset(laddr.sll_addr, 0x06, ETH_ALEN);
 518 
 519                 msg.msg_name            = &laddr;
 520                 msg.msg_namelen         = sizeof(laddr);
 521         }
 522 
 523         /* packet and raw sockets with hdrincl must pass network header */
 524         if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
 525                 if (cfg_family == PF_INET)
 526                         iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
 527                 else
 528                         iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
 529 
 530                 iov[1].iov_base = (void *) &nh;
 531                 msg.msg_iovlen++;
 532         }
 533 
 534         if (domain == PF_RDS) {
 535                 msg.msg_name = &cfg_dst_addr;
 536                 msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
 537                                     sizeof(struct sockaddr_in) :
 538                                     sizeof(struct sockaddr_in6));
 539         }
 540 
 541         iov[2].iov_base = payload;
 542         iov[2].iov_len = cfg_payload_len;
 543         msg.msg_iovlen++;
 544         msg.msg_iov = &iov[3 - msg.msg_iovlen];
 545 
 546         tstop = gettimeofday_ms() + cfg_runtime_ms;
 547         do {
 548                 if (cfg_cork)
 549                         do_sendmsg_corked(fd, &msg);
 550                 else
 551                         do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 552 
 553                 while (!do_poll(fd, POLLOUT)) {
 554                         if (cfg_zerocopy)
 555                                 do_recv_completions(fd, domain);
 556                 }
 557 
 558         } while (gettimeofday_ms() < tstop);
 559 
 560         if (cfg_zerocopy)
 561                 do_recv_remaining_completions(fd, domain);
 562 
 563         if (close(fd))
 564                 error(1, errno, "close");
 565 
 566         fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
 567                 packets, bytes >> 20, completions,
 568                 zerocopied == 1 ? 'y' : 'n');
 569 }
 570 
 571 static int do_setup_rx(int domain, int type, int protocol)
 572 {
 573         int fd;
 574 
 575         /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
 576          * to recv the only copy of the packet, not a clone
 577          */
 578         if (domain == PF_PACKET)
 579                 error(1, 0, "Use PF_INET/SOCK_RAW to read");
 580 
 581         if (type == SOCK_RAW && protocol == IPPROTO_RAW)
 582                 error(1, 0, "IPPROTO_RAW: not supported on Rx");
 583 
 584         fd = socket(domain, type, protocol);
 585         if (fd == -1)
 586                 error(1, errno, "socket r");
 587 
 588         do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
 589         do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
 590         do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
 591 
 592         if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
 593                 error(1, errno, "bind");
 594 
 595         if (type == SOCK_STREAM) {
 596                 if (listen(fd, 1))
 597                         error(1, errno, "listen");
 598                 fd = do_accept(fd);
 599         }
 600 
 601         return fd;
 602 }
 603 
 604 /* Flush all outstanding bytes for the tcp receive queue */
 605 static void do_flush_tcp(int fd)
 606 {
 607         int ret;
 608 
 609         /* MSG_TRUNC flushes up to len bytes */
 610         ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
 611         if (ret == -1 && errno == EAGAIN)
 612                 return;
 613         if (ret == -1)
 614                 error(1, errno, "flush");
 615         if (!ret)
 616                 return;
 617 
 618         packets++;
 619         bytes += ret;
 620 }
 621 
 622 /* Flush all outstanding datagrams. Verify first few bytes of each. */
 623 static void do_flush_datagram(int fd, int type)
 624 {
 625         int ret, off = 0;
 626         char buf[64];
 627 
 628         /* MSG_TRUNC will return full datagram length */
 629         ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
 630         if (ret == -1 && errno == EAGAIN)
 631                 return;
 632 
 633         /* raw ipv4 return with header, raw ipv6 without */
 634         if (cfg_family == PF_INET && type == SOCK_RAW) {
 635                 off += sizeof(struct iphdr);
 636                 ret -= sizeof(struct iphdr);
 637         }
 638 
 639         if (ret == -1)
 640                 error(1, errno, "recv");
 641         if (ret != cfg_payload_len)
 642                 error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
 643         if (ret > sizeof(buf) - off)
 644                 ret = sizeof(buf) - off;
 645         if (memcmp(buf + off, payload, ret))
 646                 error(1, 0, "recv: data mismatch");
 647 
 648         packets++;
 649         bytes += cfg_payload_len;
 650 }
 651 
 652 static void do_rx(int domain, int type, int protocol)
 653 {
 654         const int cfg_receiver_wait_ms = 400;
 655         uint64_t tstop;
 656         int fd;
 657 
 658         fd = do_setup_rx(domain, type, protocol);
 659 
 660         tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
 661         do {
 662                 if (type == SOCK_STREAM)
 663                         do_flush_tcp(fd);
 664                 else
 665                         do_flush_datagram(fd, type);
 666 
 667                 do_poll(fd, POLLIN);
 668 
 669         } while (gettimeofday_ms() < tstop);
 670 
 671         if (close(fd))
 672                 error(1, errno, "close");
 673 
 674         fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
 675 }
 676 
 677 static void do_test(int domain, int type, int protocol)
 678 {
 679         int i;
 680 
 681         if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
 682                 error(1, 0, "can only cork udp sockets");
 683 
 684         do_setcpu(cfg_cpu);
 685 
 686         for (i = 0; i < IP_MAXPACKET; i++)
 687                 payload[i] = 'a' + (i % 26);
 688 
 689         if (cfg_rx)
 690                 do_rx(domain, type, protocol);
 691         else
 692                 do_tx(domain, type, protocol);
 693 }
 694 
 695 static void usage(const char *filepath)
 696 {
 697         error(1, 0, "Usage: %s [options] <test>", filepath);
 698 }
 699 
 700 static void parse_opts(int argc, char **argv)
 701 {
 702         const int max_payload_len = sizeof(payload) -
 703                                     sizeof(struct ipv6hdr) -
 704                                     sizeof(struct tcphdr) -
 705                                     40 /* max tcp options */;
 706         int c;
 707         char *daddr = NULL, *saddr = NULL;
 708         char *cfg_test;
 709 
 710         cfg_payload_len = max_payload_len;
 711 
 712         while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
 713                 switch (c) {
 714                 case '4':
 715                         if (cfg_family != PF_UNSPEC)
 716                                 error(1, 0, "Pass one of -4 or -6");
 717                         cfg_family = PF_INET;
 718                         cfg_alen = sizeof(struct sockaddr_in);
 719                         break;
 720                 case '6':
 721                         if (cfg_family != PF_UNSPEC)
 722                                 error(1, 0, "Pass one of -4 or -6");
 723                         cfg_family = PF_INET6;
 724                         cfg_alen = sizeof(struct sockaddr_in6);
 725                         break;
 726                 case 'c':
 727                         cfg_cork = strtol(optarg, NULL, 0);
 728                         break;
 729                 case 'C':
 730                         cfg_cpu = strtol(optarg, NULL, 0);
 731                         break;
 732                 case 'D':
 733                         daddr = optarg;
 734                         break;
 735                 case 'i':
 736                         cfg_ifindex = if_nametoindex(optarg);
 737                         if (cfg_ifindex == 0)
 738                                 error(1, errno, "invalid iface: %s", optarg);
 739                         break;
 740                 case 'm':
 741                         cfg_cork_mixed = true;
 742                         break;
 743                 case 'p':
 744                         cfg_port = strtoul(optarg, NULL, 0);
 745                         break;
 746                 case 'r':
 747                         cfg_rx = true;
 748                         break;
 749                 case 's':
 750                         cfg_payload_len = strtoul(optarg, NULL, 0);
 751                         break;
 752                 case 'S':
 753                         saddr = optarg;
 754                         break;
 755                 case 't':
 756                         cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
 757                         break;
 758                 case 'v':
 759                         cfg_verbose++;
 760                         break;
 761                 case 'z':
 762                         cfg_zerocopy = true;
 763                         break;
 764                 }
 765         }
 766 
 767         cfg_test = argv[argc - 1];
 768         if (strcmp(cfg_test, "rds") == 0) {
 769                 if (!daddr)
 770                         error(1, 0, "-D <server addr> required for PF_RDS\n");
 771                 if (!cfg_rx && !saddr)
 772                         error(1, 0, "-S <client addr> required for PF_RDS\n");
 773         }
 774         setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
 775         setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
 776 
 777         if (cfg_payload_len > max_payload_len)
 778                 error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
 779         if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
 780                 error(1, 0, "-m: cork_mixed requires corking and zerocopy");
 781 
 782         if (optind != argc - 1)
 783                 usage(argv[0]);
 784 }
 785 
 786 int main(int argc, char **argv)
 787 {
 788         const char *cfg_test;
 789 
 790         parse_opts(argc, argv);
 791 
 792         cfg_test = argv[argc - 1];
 793 
 794         if (!strcmp(cfg_test, "packet"))
 795                 do_test(PF_PACKET, SOCK_RAW, 0);
 796         else if (!strcmp(cfg_test, "packet_dgram"))
 797                 do_test(PF_PACKET, SOCK_DGRAM, 0);
 798         else if (!strcmp(cfg_test, "raw"))
 799                 do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
 800         else if (!strcmp(cfg_test, "raw_hdrincl"))
 801                 do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
 802         else if (!strcmp(cfg_test, "tcp"))
 803                 do_test(cfg_family, SOCK_STREAM, 0);
 804         else if (!strcmp(cfg_test, "udp"))
 805                 do_test(cfg_family, SOCK_DGRAM, 0);
 806         else if (!strcmp(cfg_test, "rds"))
 807                 do_test(PF_RDS, SOCK_SEQPACKET, 0);
 808         else
 809                 error(1, 0, "unknown cfg_test %s", cfg_test);
 810 
 811         return 0;
 812 }

/* [<][>][^][v][top][bottom][index][help] */