1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2013 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * The full GNU General Public License is included in this distribution in 16 * the file called "COPYING". 17 * 18 * Intel MIC User Space Tools. 19 */ 20 21#define _GNU_SOURCE 22 23#include <stdlib.h> 24#include <fcntl.h> 25#include <getopt.h> 26#include <assert.h> 27#include <unistd.h> 28#include <stdbool.h> 29#include <signal.h> 30#include <poll.h> 31#include <features.h> 32#include <sys/types.h> 33#include <sys/stat.h> 34#include <sys/mman.h> 35#include <sys/socket.h> 36#include <linux/virtio_ring.h> 37#include <linux/virtio_net.h> 38#include <linux/virtio_console.h> 39#include <linux/virtio_blk.h> 40#include <linux/version.h> 41#include "mpssd.h" 42#include <linux/mic_ioctl.h> 43#include <linux/mic_common.h> 44#include <tools/endian.h> 45 46static void init_mic(struct mic_info *mic); 47 48static FILE *logfp; 49static struct mic_info mic_list; 50 51#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 52 53#define min_t(type, x, y) ({ \ 54 type __min1 = (x); \ 55 type __min2 = (y); \ 56 __min1 < __min2 ? __min1 : __min2; }) 57 58/* align addr on a size boundary - adjust address up/down if needed */ 59#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) 60#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) 61 62/* align addr on a size boundary - adjust address up if needed */ 63#define _ALIGN(addr, size) _ALIGN_UP(addr, size) 64 65/* to align the pointer to the (next) page boundary */ 66#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) 67 68#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 69 70#define GSO_ENABLED 1 71#define MAX_GSO_SIZE (64 * 1024) 72#define ETH_H_LEN 14 73#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) 74#define MIC_DEVICE_PAGE_END 0x1000 75 76#ifndef VIRTIO_NET_HDR_F_DATA_VALID 77#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 78#endif 79 80static struct { 81 struct mic_device_desc dd; 82 struct mic_vqconfig vqconfig[2]; 83 __u32 host_features, guest_acknowledgements; 84 struct virtio_console_config cons_config; 85} virtcons_dev_page = { 86 .dd = { 87 .type = VIRTIO_ID_CONSOLE, 88 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), 89 .feature_len = sizeof(virtcons_dev_page.host_features), 90 .config_len = sizeof(virtcons_dev_page.cons_config), 91 }, 92 .vqconfig[0] = { 93 .num = htole16(MIC_VRING_ENTRIES), 94 }, 95 .vqconfig[1] = { 96 .num = htole16(MIC_VRING_ENTRIES), 97 }, 98}; 99 100static struct { 101 struct mic_device_desc dd; 102 struct mic_vqconfig vqconfig[2]; 103 __u32 host_features, guest_acknowledgements; 104 struct virtio_net_config net_config; 105} virtnet_dev_page = { 106 .dd = { 107 .type = VIRTIO_ID_NET, 108 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), 109 .feature_len = sizeof(virtnet_dev_page.host_features), 110 .config_len = sizeof(virtnet_dev_page.net_config), 111 }, 112 .vqconfig[0] = { 113 .num = htole16(MIC_VRING_ENTRIES), 114 }, 115 .vqconfig[1] = { 116 .num = htole16(MIC_VRING_ENTRIES), 117 }, 118#if GSO_ENABLED 119 .host_features = htole32( 120 1 << VIRTIO_NET_F_CSUM | 121 1 << VIRTIO_NET_F_GSO | 122 1 << VIRTIO_NET_F_GUEST_TSO4 | 123 1 << VIRTIO_NET_F_GUEST_TSO6 | 124 1 << VIRTIO_NET_F_GUEST_ECN | 125 1 << VIRTIO_NET_F_GUEST_UFO), 126#else 127 .host_features = 0, 128#endif 129}; 130 131static const char *mic_config_dir = "/etc/sysconfig/mic"; 132static const char *virtblk_backend = "VIRTBLK_BACKEND"; 133static struct { 134 struct mic_device_desc dd; 135 struct mic_vqconfig vqconfig[1]; 136 __u32 host_features, guest_acknowledgements; 137 struct virtio_blk_config blk_config; 138} virtblk_dev_page = { 139 .dd = { 140 .type = VIRTIO_ID_BLOCK, 141 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), 142 .feature_len = sizeof(virtblk_dev_page.host_features), 143 .config_len = sizeof(virtblk_dev_page.blk_config), 144 }, 145 .vqconfig[0] = { 146 .num = htole16(MIC_VRING_ENTRIES), 147 }, 148 .host_features = 149 htole32(1<<VIRTIO_BLK_F_SEG_MAX), 150 .blk_config = { 151 .seg_max = htole32(MIC_VRING_ENTRIES - 2), 152 .capacity = htole64(0), 153 } 154}; 155 156static char *myname; 157 158static int 159tap_configure(struct mic_info *mic, char *dev) 160{ 161 pid_t pid; 162 char *ifargv[7]; 163 char ipaddr[IFNAMSIZ]; 164 int ret = 0; 165 166 pid = fork(); 167 if (pid == 0) { 168 ifargv[0] = "ip"; 169 ifargv[1] = "link"; 170 ifargv[2] = "set"; 171 ifargv[3] = dev; 172 ifargv[4] = "up"; 173 ifargv[5] = NULL; 174 mpsslog("Configuring %s\n", dev); 175 ret = execvp("ip", ifargv); 176 if (ret < 0) { 177 mpsslog("%s execvp failed errno %s\n", 178 mic->name, strerror(errno)); 179 return ret; 180 } 181 } 182 if (pid < 0) { 183 mpsslog("%s fork failed errno %s\n", 184 mic->name, strerror(errno)); 185 return ret; 186 } 187 188 ret = waitpid(pid, NULL, 0); 189 if (ret < 0) { 190 mpsslog("%s waitpid failed errno %s\n", 191 mic->name, strerror(errno)); 192 return ret; 193 } 194 195 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id); 196 197 pid = fork(); 198 if (pid == 0) { 199 ifargv[0] = "ip"; 200 ifargv[1] = "addr"; 201 ifargv[2] = "add"; 202 ifargv[3] = ipaddr; 203 ifargv[4] = "dev"; 204 ifargv[5] = dev; 205 ifargv[6] = NULL; 206 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); 207 ret = execvp("ip", ifargv); 208 if (ret < 0) { 209 mpsslog("%s execvp failed errno %s\n", 210 mic->name, strerror(errno)); 211 return ret; 212 } 213 } 214 if (pid < 0) { 215 mpsslog("%s fork failed errno %s\n", 216 mic->name, strerror(errno)); 217 return ret; 218 } 219 220 ret = waitpid(pid, NULL, 0); 221 if (ret < 0) { 222 mpsslog("%s waitpid failed errno %s\n", 223 mic->name, strerror(errno)); 224 return ret; 225 } 226 mpsslog("MIC name %s %s %d DONE!\n", 227 mic->name, __func__, __LINE__); 228 return 0; 229} 230 231static int tun_alloc(struct mic_info *mic, char *dev) 232{ 233 struct ifreq ifr; 234 int fd, err; 235#if GSO_ENABLED 236 unsigned offload; 237#endif 238 fd = open("/dev/net/tun", O_RDWR); 239 if (fd < 0) { 240 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); 241 goto done; 242 } 243 244 memset(&ifr, 0, sizeof(ifr)); 245 246 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 247 if (*dev) 248 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 249 250 err = ioctl(fd, TUNSETIFF, (void *)&ifr); 251 if (err < 0) { 252 mpsslog("%s %s %d TUNSETIFF failed %s\n", 253 mic->name, __func__, __LINE__, strerror(errno)); 254 close(fd); 255 return err; 256 } 257#if GSO_ENABLED 258 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 259 TUN_F_TSO_ECN | TUN_F_UFO; 260 261 err = ioctl(fd, TUNSETOFFLOAD, offload); 262 if (err < 0) { 263 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", 264 mic->name, __func__, __LINE__, strerror(errno)); 265 close(fd); 266 return err; 267 } 268#endif 269 strcpy(dev, ifr.ifr_name); 270 mpsslog("Created TAP %s\n", dev); 271done: 272 return fd; 273} 274 275#define NET_FD_VIRTIO_NET 0 276#define NET_FD_TUN 1 277#define MAX_NET_FD 2 278 279static void set_dp(struct mic_info *mic, int type, void *dp) 280{ 281 switch (type) { 282 case VIRTIO_ID_CONSOLE: 283 mic->mic_console.console_dp = dp; 284 return; 285 case VIRTIO_ID_NET: 286 mic->mic_net.net_dp = dp; 287 return; 288 case VIRTIO_ID_BLOCK: 289 mic->mic_virtblk.block_dp = dp; 290 return; 291 } 292 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 293 assert(0); 294} 295 296static void *get_dp(struct mic_info *mic, int type) 297{ 298 switch (type) { 299 case VIRTIO_ID_CONSOLE: 300 return mic->mic_console.console_dp; 301 case VIRTIO_ID_NET: 302 return mic->mic_net.net_dp; 303 case VIRTIO_ID_BLOCK: 304 return mic->mic_virtblk.block_dp; 305 } 306 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 307 assert(0); 308 return NULL; 309} 310 311static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) 312{ 313 struct mic_device_desc *d; 314 int i; 315 void *dp = get_dp(mic, type); 316 317 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; 318 i += mic_total_desc_size(d)) { 319 d = dp + i; 320 321 /* End of list */ 322 if (d->type == 0) 323 break; 324 325 if (d->type == -1) 326 continue; 327 328 mpsslog("%s %s d-> type %d d %p\n", 329 mic->name, __func__, d->type, d); 330 331 if (d->type == (__u8)type) 332 return d; 333 } 334 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 335 assert(0); 336 return NULL; 337} 338 339/* See comments in vhost.c for explanation of next_desc() */ 340static unsigned next_desc(struct vring_desc *desc) 341{ 342 unsigned int next; 343 344 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) 345 return -1U; 346 next = le16toh(desc->next); 347 return next; 348} 349 350/* Sum up all the IOVEC length */ 351static ssize_t 352sum_iovec_len(struct mic_copy_desc *copy) 353{ 354 ssize_t sum = 0; 355 int i; 356 357 for (i = 0; i < copy->iovcnt; i++) 358 sum += copy->iov[i].iov_len; 359 return sum; 360} 361 362static inline void verify_out_len(struct mic_info *mic, 363 struct mic_copy_desc *copy) 364{ 365 if (copy->out_len != sum_iovec_len(copy)) { 366 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", 367 mic->name, __func__, __LINE__, 368 copy->out_len, sum_iovec_len(copy)); 369 assert(copy->out_len == sum_iovec_len(copy)); 370 } 371} 372 373/* Display an iovec */ 374static void 375disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, 376 const char *s, int line) 377{ 378 int i; 379 380 for (i = 0; i < copy->iovcnt; i++) 381 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", 382 mic->name, s, line, i, 383 copy->iov[i].iov_base, copy->iov[i].iov_len); 384} 385 386static inline __u16 read_avail_idx(struct mic_vring *vr) 387{ 388 return ACCESS_ONCE(vr->info->avail_idx); 389} 390 391static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, 392 struct mic_copy_desc *copy, ssize_t len) 393{ 394 copy->vr_idx = tx ? 0 : 1; 395 copy->update_used = true; 396 if (type == VIRTIO_ID_NET) 397 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); 398 else 399 copy->iov[0].iov_len = len; 400} 401 402/* Central API which triggers the copies */ 403static int 404mic_virtio_copy(struct mic_info *mic, int fd, 405 struct mic_vring *vr, struct mic_copy_desc *copy) 406{ 407 int ret; 408 409 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); 410 if (ret) { 411 mpsslog("%s %s %d errno %s ret %d\n", 412 mic->name, __func__, __LINE__, 413 strerror(errno), ret); 414 } 415 return ret; 416} 417 418/* 419 * This initialization routine requires at least one 420 * vring i.e. vr0. vr1 is optional. 421 */ 422static void * 423init_vr(struct mic_info *mic, int fd, int type, 424 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) 425{ 426 int vr_size; 427 char *va; 428 429 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, 430 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); 431 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, 432 PROT_READ, MAP_SHARED, fd, 0); 433 if (MAP_FAILED == va) { 434 mpsslog("%s %s %d mmap failed errno %s\n", 435 mic->name, __func__, __LINE__, 436 strerror(errno)); 437 goto done; 438 } 439 set_dp(mic, type, va); 440 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; 441 vr0->info = vr0->va + 442 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); 443 vring_init(&vr0->vr, 444 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); 445 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", 446 __func__, mic->name, vr0->va, vr0->info, vr_size, 447 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); 448 mpsslog("magic 0x%x expected 0x%x\n", 449 le32toh(vr0->info->magic), MIC_MAGIC + type); 450 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); 451 if (vr1) { 452 vr1->va = (struct mic_vring *) 453 &va[MIC_DEVICE_PAGE_END + vr_size]; 454 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES, 455 MIC_VIRTIO_RING_ALIGN); 456 vring_init(&vr1->vr, 457 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); 458 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", 459 __func__, mic->name, vr1->va, vr1->info, vr_size, 460 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); 461 mpsslog("magic 0x%x expected 0x%x\n", 462 le32toh(vr1->info->magic), MIC_MAGIC + type + 1); 463 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); 464 } 465done: 466 return va; 467} 468 469static void 470wait_for_card_driver(struct mic_info *mic, int fd, int type) 471{ 472 struct pollfd pollfd; 473 int err; 474 struct mic_device_desc *desc = get_device_desc(mic, type); 475 476 pollfd.fd = fd; 477 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", 478 mic->name, __func__, type, desc->status); 479 while (1) { 480 pollfd.events = POLLIN; 481 pollfd.revents = 0; 482 err = poll(&pollfd, 1, -1); 483 if (err < 0) { 484 mpsslog("%s %s poll failed %s\n", 485 mic->name, __func__, strerror(errno)); 486 continue; 487 } 488 489 if (pollfd.revents) { 490 mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n", 491 mic->name, __func__, type, desc->status); 492 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { 493 mpsslog("%s %s poll.revents %d\n", 494 mic->name, __func__, pollfd.revents); 495 mpsslog("%s %s desc-> type %d status 0x%x\n", 496 mic->name, __func__, type, 497 desc->status); 498 break; 499 } 500 } 501 } 502} 503 504/* Spin till we have some descriptors */ 505static void 506spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) 507{ 508 __u16 avail_idx = read_avail_idx(vr); 509 510 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { 511#ifdef DEBUG 512 mpsslog("%s %s waiting for desc avail %d info_avail %d\n", 513 mic->name, __func__, 514 le16toh(vr->vr.avail->idx), vr->info->avail_idx); 515#endif 516 sched_yield(); 517 } 518} 519 520static void * 521virtio_net(void *arg) 522{ 523 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; 524 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); 525 struct iovec vnet_iov[2][2] = { 526 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, 527 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, 528 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, 529 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, 530 }; 531 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; 532 struct mic_info *mic = (struct mic_info *)arg; 533 char if_name[IFNAMSIZ]; 534 struct pollfd net_poll[MAX_NET_FD]; 535 struct mic_vring tx_vr, rx_vr; 536 struct mic_copy_desc copy; 537 struct mic_device_desc *desc; 538 int err; 539 540 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); 541 mic->mic_net.tap_fd = tun_alloc(mic, if_name); 542 if (mic->mic_net.tap_fd < 0) 543 goto done; 544 545 if (tap_configure(mic, if_name)) 546 goto done; 547 mpsslog("MIC name %s id %d\n", mic->name, mic->id); 548 549 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; 550 net_poll[NET_FD_VIRTIO_NET].events = POLLIN; 551 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; 552 net_poll[NET_FD_TUN].events = POLLIN; 553 554 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, 555 VIRTIO_ID_NET, &tx_vr, &rx_vr, 556 virtnet_dev_page.dd.num_vq)) { 557 mpsslog("%s init_vr failed %s\n", 558 mic->name, strerror(errno)); 559 goto done; 560 } 561 562 copy.iovcnt = 2; 563 desc = get_device_desc(mic, VIRTIO_ID_NET); 564 565 while (1) { 566 ssize_t len; 567 568 net_poll[NET_FD_VIRTIO_NET].revents = 0; 569 net_poll[NET_FD_TUN].revents = 0; 570 571 /* Start polling for data from tap and virtio net */ 572 err = poll(net_poll, 2, -1); 573 if (err < 0) { 574 mpsslog("%s poll failed %s\n", 575 __func__, strerror(errno)); 576 continue; 577 } 578 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) 579 wait_for_card_driver(mic, mic->mic_net.virtio_net_fd, 580 VIRTIO_ID_NET); 581 /* 582 * Check if there is data to be read from TUN and write to 583 * virtio net fd if there is. 584 */ 585 if (net_poll[NET_FD_TUN].revents & POLLIN) { 586 copy.iov = iov0; 587 len = readv(net_poll[NET_FD_TUN].fd, 588 copy.iov, copy.iovcnt); 589 if (len > 0) { 590 struct virtio_net_hdr *hdr 591 = (struct virtio_net_hdr *)vnet_hdr[0]; 592 593 /* Disable checksums on the card since we are on 594 a reliable PCIe link */ 595 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; 596#ifdef DEBUG 597 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, 598 __func__, __LINE__, hdr->flags); 599 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", 600 copy.out_len, hdr->gso_type); 601#endif 602#ifdef DEBUG 603 disp_iovec(mic, copy, __func__, __LINE__); 604 mpsslog("%s %s %d read from tap 0x%lx\n", 605 mic->name, __func__, __LINE__, 606 len); 607#endif 608 spin_for_descriptors(mic, &tx_vr); 609 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, 610 len); 611 612 err = mic_virtio_copy(mic, 613 mic->mic_net.virtio_net_fd, &tx_vr, 614 ©); 615 if (err < 0) { 616 mpsslog("%s %s %d mic_virtio_copy %s\n", 617 mic->name, __func__, __LINE__, 618 strerror(errno)); 619 } 620 if (!err) 621 verify_out_len(mic, ©); 622#ifdef DEBUG 623 disp_iovec(mic, copy, __func__, __LINE__); 624 mpsslog("%s %s %d wrote to net 0x%lx\n", 625 mic->name, __func__, __LINE__, 626 sum_iovec_len(©)); 627#endif 628 /* Reinitialize IOV for next run */ 629 iov0[1].iov_len = MAX_NET_PKT_SIZE; 630 } else if (len < 0) { 631 disp_iovec(mic, ©, __func__, __LINE__); 632 mpsslog("%s %s %d read failed %s ", mic->name, 633 __func__, __LINE__, strerror(errno)); 634 mpsslog("cnt %d sum %zd\n", 635 copy.iovcnt, sum_iovec_len(©)); 636 } 637 } 638 639 /* 640 * Check if there is data to be read from virtio net and 641 * write to TUN if there is. 642 */ 643 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { 644 while (rx_vr.info->avail_idx != 645 le16toh(rx_vr.vr.avail->idx)) { 646 copy.iov = iov1; 647 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, 648 MAX_NET_PKT_SIZE 649 + sizeof(struct virtio_net_hdr)); 650 651 err = mic_virtio_copy(mic, 652 mic->mic_net.virtio_net_fd, &rx_vr, 653 ©); 654 if (!err) { 655#ifdef DEBUG 656 struct virtio_net_hdr *hdr 657 = (struct virtio_net_hdr *) 658 vnet_hdr[1]; 659 660 mpsslog("%s %s %d hdr->flags 0x%x, ", 661 mic->name, __func__, __LINE__, 662 hdr->flags); 663 mpsslog("out_len %d gso_type 0x%x\n", 664 copy.out_len, 665 hdr->gso_type); 666#endif 667 /* Set the correct output iov_len */ 668 iov1[1].iov_len = copy.out_len - 669 sizeof(struct virtio_net_hdr); 670 verify_out_len(mic, ©); 671#ifdef DEBUG 672 disp_iovec(mic, copy, __func__, 673 __LINE__); 674 mpsslog("%s %s %d ", 675 mic->name, __func__, __LINE__); 676 mpsslog("read from net 0x%lx\n", 677 sum_iovec_len(copy)); 678#endif 679 len = writev(net_poll[NET_FD_TUN].fd, 680 copy.iov, copy.iovcnt); 681 if (len != sum_iovec_len(©)) { 682 mpsslog("Tun write failed %s ", 683 strerror(errno)); 684 mpsslog("len 0x%zx ", len); 685 mpsslog("read_len 0x%zx\n", 686 sum_iovec_len(©)); 687 } else { 688#ifdef DEBUG 689 disp_iovec(mic, ©, __func__, 690 __LINE__); 691 mpsslog("%s %s %d ", 692 mic->name, __func__, 693 __LINE__); 694 mpsslog("wrote to tap 0x%lx\n", 695 len); 696#endif 697 } 698 } else { 699 mpsslog("%s %s %d mic_virtio_copy %s\n", 700 mic->name, __func__, __LINE__, 701 strerror(errno)); 702 break; 703 } 704 } 705 } 706 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) 707 mpsslog("%s: %s: POLLERR\n", __func__, mic->name); 708 } 709done: 710 pthread_exit(NULL); 711} 712 713/* virtio_console */ 714#define VIRTIO_CONSOLE_FD 0 715#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) 716#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ 717#define MAX_BUFFER_SIZE PAGE_SIZE 718 719static void * 720virtio_console(void *arg) 721{ 722 static __u8 vcons_buf[2][PAGE_SIZE]; 723 struct iovec vcons_iov[2] = { 724 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, 725 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, 726 }; 727 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; 728 struct mic_info *mic = (struct mic_info *)arg; 729 int err; 730 struct pollfd console_poll[MAX_CONSOLE_FD]; 731 int pty_fd; 732 char *pts_name; 733 ssize_t len; 734 struct mic_vring tx_vr, rx_vr; 735 struct mic_copy_desc copy; 736 struct mic_device_desc *desc; 737 738 pty_fd = posix_openpt(O_RDWR); 739 if (pty_fd < 0) { 740 mpsslog("can't open a pseudoterminal master device: %s\n", 741 strerror(errno)); 742 goto _return; 743 } 744 pts_name = ptsname(pty_fd); 745 if (pts_name == NULL) { 746 mpsslog("can't get pts name\n"); 747 goto _close_pty; 748 } 749 printf("%s console message goes to %s\n", mic->name, pts_name); 750 mpsslog("%s console message goes to %s\n", mic->name, pts_name); 751 err = grantpt(pty_fd); 752 if (err < 0) { 753 mpsslog("can't grant access: %s %s\n", 754 pts_name, strerror(errno)); 755 goto _close_pty; 756 } 757 err = unlockpt(pty_fd); 758 if (err < 0) { 759 mpsslog("can't unlock a pseudoterminal: %s %s\n", 760 pts_name, strerror(errno)); 761 goto _close_pty; 762 } 763 console_poll[MONITOR_FD].fd = pty_fd; 764 console_poll[MONITOR_FD].events = POLLIN; 765 766 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; 767 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; 768 769 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, 770 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, 771 virtcons_dev_page.dd.num_vq)) { 772 mpsslog("%s init_vr failed %s\n", 773 mic->name, strerror(errno)); 774 goto _close_pty; 775 } 776 777 copy.iovcnt = 1; 778 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); 779 780 for (;;) { 781 console_poll[MONITOR_FD].revents = 0; 782 console_poll[VIRTIO_CONSOLE_FD].revents = 0; 783 err = poll(console_poll, MAX_CONSOLE_FD, -1); 784 if (err < 0) { 785 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, 786 strerror(errno)); 787 continue; 788 } 789 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) 790 wait_for_card_driver(mic, 791 mic->mic_console.virtio_console_fd, 792 VIRTIO_ID_CONSOLE); 793 794 if (console_poll[MONITOR_FD].revents & POLLIN) { 795 copy.iov = iov0; 796 len = readv(pty_fd, copy.iov, copy.iovcnt); 797 if (len > 0) { 798#ifdef DEBUG 799 disp_iovec(mic, copy, __func__, __LINE__); 800 mpsslog("%s %s %d read from tap 0x%lx\n", 801 mic->name, __func__, __LINE__, 802 len); 803#endif 804 spin_for_descriptors(mic, &tx_vr); 805 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, 806 ©, len); 807 808 err = mic_virtio_copy(mic, 809 mic->mic_console.virtio_console_fd, 810 &tx_vr, ©); 811 if (err < 0) { 812 mpsslog("%s %s %d mic_virtio_copy %s\n", 813 mic->name, __func__, __LINE__, 814 strerror(errno)); 815 } 816 if (!err) 817 verify_out_len(mic, ©); 818#ifdef DEBUG 819 disp_iovec(mic, copy, __func__, __LINE__); 820 mpsslog("%s %s %d wrote to net 0x%lx\n", 821 mic->name, __func__, __LINE__, 822 sum_iovec_len(copy)); 823#endif 824 /* Reinitialize IOV for next run */ 825 iov0->iov_len = PAGE_SIZE; 826 } else if (len < 0) { 827 disp_iovec(mic, ©, __func__, __LINE__); 828 mpsslog("%s %s %d read failed %s ", 829 mic->name, __func__, __LINE__, 830 strerror(errno)); 831 mpsslog("cnt %d sum %zd\n", 832 copy.iovcnt, sum_iovec_len(©)); 833 } 834 } 835 836 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { 837 while (rx_vr.info->avail_idx != 838 le16toh(rx_vr.vr.avail->idx)) { 839 copy.iov = iov1; 840 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, 841 ©, PAGE_SIZE); 842 843 err = mic_virtio_copy(mic, 844 mic->mic_console.virtio_console_fd, 845 &rx_vr, ©); 846 if (!err) { 847 /* Set the correct output iov_len */ 848 iov1->iov_len = copy.out_len; 849 verify_out_len(mic, ©); 850#ifdef DEBUG 851 disp_iovec(mic, copy, __func__, 852 __LINE__); 853 mpsslog("%s %s %d ", 854 mic->name, __func__, __LINE__); 855 mpsslog("read from net 0x%lx\n", 856 sum_iovec_len(copy)); 857#endif 858 len = writev(pty_fd, 859 copy.iov, copy.iovcnt); 860 if (len != sum_iovec_len(©)) { 861 mpsslog("Tun write failed %s ", 862 strerror(errno)); 863 mpsslog("len 0x%zx ", len); 864 mpsslog("read_len 0x%zx\n", 865 sum_iovec_len(©)); 866 } else { 867#ifdef DEBUG 868 disp_iovec(mic, copy, __func__, 869 __LINE__); 870 mpsslog("%s %s %d ", 871 mic->name, __func__, 872 __LINE__); 873 mpsslog("wrote to tap 0x%lx\n", 874 len); 875#endif 876 } 877 } else { 878 mpsslog("%s %s %d mic_virtio_copy %s\n", 879 mic->name, __func__, __LINE__, 880 strerror(errno)); 881 break; 882 } 883 } 884 } 885 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) 886 mpsslog("%s: %s: POLLERR\n", __func__, mic->name); 887 } 888_close_pty: 889 close(pty_fd); 890_return: 891 pthread_exit(NULL); 892} 893 894static void 895add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) 896{ 897 char path[PATH_MAX]; 898 int fd, err; 899 900 snprintf(path, PATH_MAX, "/dev/mic%d", mic->id); 901 fd = open(path, O_RDWR); 902 if (fd < 0) { 903 mpsslog("Could not open %s %s\n", path, strerror(errno)); 904 return; 905 } 906 907 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); 908 if (err < 0) { 909 mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); 910 close(fd); 911 return; 912 } 913 switch (dd->type) { 914 case VIRTIO_ID_NET: 915 mic->mic_net.virtio_net_fd = fd; 916 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); 917 break; 918 case VIRTIO_ID_CONSOLE: 919 mic->mic_console.virtio_console_fd = fd; 920 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); 921 break; 922 case VIRTIO_ID_BLOCK: 923 mic->mic_virtblk.virtio_block_fd = fd; 924 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); 925 break; 926 } 927} 928 929static bool 930set_backend_file(struct mic_info *mic) 931{ 932 FILE *config; 933 char buff[PATH_MAX], *line, *evv, *p; 934 935 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); 936 config = fopen(buff, "r"); 937 if (config == NULL) 938 return false; 939 do { /* look for "virtblk_backend=XXXX" */ 940 line = fgets(buff, PATH_MAX, config); 941 if (line == NULL) 942 break; 943 if (*line == '#') 944 continue; 945 p = strchr(line, '\n'); 946 if (p) 947 *p = '\0'; 948 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); 949 fclose(config); 950 if (line == NULL) 951 return false; 952 evv = strchr(line, '='); 953 if (evv == NULL) 954 return false; 955 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); 956 if (mic->mic_virtblk.backend_file == NULL) { 957 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); 958 return false; 959 } 960 strcpy(mic->mic_virtblk.backend_file, evv + 1); 961 return true; 962} 963 964#define SECTOR_SIZE 512 965static bool 966set_backend_size(struct mic_info *mic) 967{ 968 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, 969 SEEK_END); 970 if (mic->mic_virtblk.backend_size < 0) { 971 mpsslog("%s: can't seek: %s\n", 972 mic->name, mic->mic_virtblk.backend_file); 973 return false; 974 } 975 virtblk_dev_page.blk_config.capacity = 976 mic->mic_virtblk.backend_size / SECTOR_SIZE; 977 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) 978 virtblk_dev_page.blk_config.capacity++; 979 980 virtblk_dev_page.blk_config.capacity = 981 htole64(virtblk_dev_page.blk_config.capacity); 982 983 return true; 984} 985 986static bool 987open_backend(struct mic_info *mic) 988{ 989 if (!set_backend_file(mic)) 990 goto _error_exit; 991 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); 992 if (mic->mic_virtblk.backend < 0) { 993 mpsslog("%s: can't open: %s\n", mic->name, 994 mic->mic_virtblk.backend_file); 995 goto _error_free; 996 } 997 if (!set_backend_size(mic)) 998 goto _error_close; 999 mic->mic_virtblk.backend_addr = mmap(NULL, 1000 mic->mic_virtblk.backend_size, 1001 PROT_READ|PROT_WRITE, MAP_SHARED, 1002 mic->mic_virtblk.backend, 0L); 1003 if (mic->mic_virtblk.backend_addr == MAP_FAILED) { 1004 mpsslog("%s: can't map: %s %s\n", 1005 mic->name, mic->mic_virtblk.backend_file, 1006 strerror(errno)); 1007 goto _error_close; 1008 } 1009 return true; 1010 1011 _error_close: 1012 close(mic->mic_virtblk.backend); 1013 _error_free: 1014 free(mic->mic_virtblk.backend_file); 1015 _error_exit: 1016 return false; 1017} 1018 1019static void 1020close_backend(struct mic_info *mic) 1021{ 1022 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); 1023 close(mic->mic_virtblk.backend); 1024 free(mic->mic_virtblk.backend_file); 1025} 1026 1027static bool 1028start_virtblk(struct mic_info *mic, struct mic_vring *vring) 1029{ 1030 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { 1031 mpsslog("%s: blk_config is not 8 byte aligned.\n", 1032 mic->name); 1033 return false; 1034 } 1035 add_virtio_device(mic, &virtblk_dev_page.dd); 1036 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, 1037 VIRTIO_ID_BLOCK, vring, NULL, 1038 virtblk_dev_page.dd.num_vq)) { 1039 mpsslog("%s init_vr failed %s\n", 1040 mic->name, strerror(errno)); 1041 return false; 1042 } 1043 return true; 1044} 1045 1046static void 1047stop_virtblk(struct mic_info *mic) 1048{ 1049 int vr_size, ret; 1050 1051 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, 1052 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); 1053 ret = munmap(mic->mic_virtblk.block_dp, 1054 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); 1055 if (ret < 0) 1056 mpsslog("%s munmap errno %d\n", mic->name, errno); 1057 close(mic->mic_virtblk.virtio_block_fd); 1058} 1059 1060static __u8 1061header_error_check(struct vring_desc *desc) 1062{ 1063 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { 1064 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", 1065 __func__, __LINE__); 1066 return -EIO; 1067 } 1068 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { 1069 mpsslog("%s() %d: alone\n", 1070 __func__, __LINE__); 1071 return -EIO; 1072 } 1073 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { 1074 mpsslog("%s() %d: not read\n", 1075 __func__, __LINE__); 1076 return -EIO; 1077 } 1078 return 0; 1079} 1080 1081static int 1082read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) 1083{ 1084 struct iovec iovec; 1085 struct mic_copy_desc copy; 1086 1087 iovec.iov_len = sizeof(*hdr); 1088 iovec.iov_base = hdr; 1089 copy.iov = &iovec; 1090 copy.iovcnt = 1; 1091 copy.vr_idx = 0; /* only one vring on virtio_block */ 1092 copy.update_used = false; /* do not update used index */ 1093 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); 1094} 1095 1096static int 1097transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) 1098{ 1099 struct mic_copy_desc copy; 1100 1101 copy.iov = iovec; 1102 copy.iovcnt = iovcnt; 1103 copy.vr_idx = 0; /* only one vring on virtio_block */ 1104 copy.update_used = false; /* do not update used index */ 1105 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); 1106} 1107 1108static __u8 1109status_error_check(struct vring_desc *desc) 1110{ 1111 if (le32toh(desc->len) != sizeof(__u8)) { 1112 mpsslog("%s() %d: length is not sizeof(status)\n", 1113 __func__, __LINE__); 1114 return -EIO; 1115 } 1116 return 0; 1117} 1118 1119static int 1120write_status(int fd, __u8 *status) 1121{ 1122 struct iovec iovec; 1123 struct mic_copy_desc copy; 1124 1125 iovec.iov_base = status; 1126 iovec.iov_len = sizeof(*status); 1127 copy.iov = &iovec; 1128 copy.iovcnt = 1; 1129 copy.vr_idx = 0; /* only one vring on virtio_block */ 1130 copy.update_used = true; /* Update used index */ 1131 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); 1132} 1133 1134static void * 1135virtio_block(void *arg) 1136{ 1137 struct mic_info *mic = (struct mic_info *)arg; 1138 int ret; 1139 struct pollfd block_poll; 1140 struct mic_vring vring; 1141 __u16 avail_idx; 1142 __u32 desc_idx; 1143 struct vring_desc *desc; 1144 struct iovec *iovec, *piov; 1145 __u8 status; 1146 __u32 buffer_desc_idx; 1147 struct virtio_blk_outhdr hdr; 1148 void *fos; 1149 1150 for (;;) { /* forever */ 1151 if (!open_backend(mic)) { /* No virtblk */ 1152 for (mic->mic_virtblk.signaled = 0; 1153 !mic->mic_virtblk.signaled;) 1154 sleep(1); 1155 continue; 1156 } 1157 1158 /* backend file is specified. */ 1159 if (!start_virtblk(mic, &vring)) 1160 goto _close_backend; 1161 iovec = malloc(sizeof(*iovec) * 1162 le32toh(virtblk_dev_page.blk_config.seg_max)); 1163 if (!iovec) { 1164 mpsslog("%s: can't alloc iovec: %s\n", 1165 mic->name, strerror(ENOMEM)); 1166 goto _stop_virtblk; 1167 } 1168 1169 block_poll.fd = mic->mic_virtblk.virtio_block_fd; 1170 block_poll.events = POLLIN; 1171 for (mic->mic_virtblk.signaled = 0; 1172 !mic->mic_virtblk.signaled;) { 1173 block_poll.revents = 0; 1174 /* timeout in 1 sec to see signaled */ 1175 ret = poll(&block_poll, 1, 1000); 1176 if (ret < 0) { 1177 mpsslog("%s %d: poll failed: %s\n", 1178 __func__, __LINE__, 1179 strerror(errno)); 1180 continue; 1181 } 1182 1183 if (!(block_poll.revents & POLLIN)) { 1184#ifdef DEBUG 1185 mpsslog("%s %d: block_poll.revents=0x%x\n", 1186 __func__, __LINE__, block_poll.revents); 1187#endif 1188 continue; 1189 } 1190 1191 /* POLLIN */ 1192 while (vring.info->avail_idx != 1193 le16toh(vring.vr.avail->idx)) { 1194 /* read header element */ 1195 avail_idx = 1196 vring.info->avail_idx & 1197 (vring.vr.num - 1); 1198 desc_idx = le16toh( 1199 vring.vr.avail->ring[avail_idx]); 1200 desc = &vring.vr.desc[desc_idx]; 1201#ifdef DEBUG 1202 mpsslog("%s() %d: avail_idx=%d ", 1203 __func__, __LINE__, 1204 vring.info->avail_idx); 1205 mpsslog("vring.vr.num=%d desc=%p\n", 1206 vring.vr.num, desc); 1207#endif 1208 status = header_error_check(desc); 1209 ret = read_header( 1210 mic->mic_virtblk.virtio_block_fd, 1211 &hdr, desc_idx); 1212 if (ret < 0) { 1213 mpsslog("%s() %d %s: ret=%d %s\n", 1214 __func__, __LINE__, 1215 mic->name, ret, 1216 strerror(errno)); 1217 break; 1218 } 1219 /* buffer element */ 1220 piov = iovec; 1221 status = 0; 1222 fos = mic->mic_virtblk.backend_addr + 1223 (hdr.sector * SECTOR_SIZE); 1224 buffer_desc_idx = next_desc(desc); 1225 desc_idx = buffer_desc_idx; 1226 for (desc = &vring.vr.desc[buffer_desc_idx]; 1227 desc->flags & VRING_DESC_F_NEXT; 1228 desc_idx = next_desc(desc), 1229 desc = &vring.vr.desc[desc_idx]) { 1230 piov->iov_len = desc->len; 1231 piov->iov_base = fos; 1232 piov++; 1233 fos += desc->len; 1234 } 1235 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ 1236 if (hdr.type & ~(VIRTIO_BLK_T_OUT | 1237 VIRTIO_BLK_T_GET_ID)) { 1238 /* 1239 VIRTIO_BLK_T_IN - does not do 1240 anything. Probably for documenting. 1241 VIRTIO_BLK_T_SCSI_CMD - for 1242 virtio_scsi. 1243 VIRTIO_BLK_T_FLUSH - turned off in 1244 config space. 1245 VIRTIO_BLK_T_BARRIER - defined but not 1246 used in anywhere. 1247 */ 1248 mpsslog("%s() %d: type %x ", 1249 __func__, __LINE__, 1250 hdr.type); 1251 mpsslog("is not supported\n"); 1252 status = -ENOTSUP; 1253 1254 } else { 1255 ret = transfer_blocks( 1256 mic->mic_virtblk.virtio_block_fd, 1257 iovec, 1258 piov - iovec); 1259 if (ret < 0 && 1260 status != 0) 1261 status = ret; 1262 } 1263 /* write status and update used pointer */ 1264 if (status != 0) 1265 status = status_error_check(desc); 1266 ret = write_status( 1267 mic->mic_virtblk.virtio_block_fd, 1268 &status); 1269#ifdef DEBUG 1270 mpsslog("%s() %d: write status=%d on desc=%p\n", 1271 __func__, __LINE__, 1272 status, desc); 1273#endif 1274 } 1275 } 1276 free(iovec); 1277_stop_virtblk: 1278 stop_virtblk(mic); 1279_close_backend: 1280 close_backend(mic); 1281 } /* forever */ 1282 1283 pthread_exit(NULL); 1284} 1285 1286static void 1287reset(struct mic_info *mic) 1288{ 1289#define RESET_TIMEOUT 120 1290 int i = RESET_TIMEOUT; 1291 setsysfs(mic->name, "state", "reset"); 1292 while (i) { 1293 char *state; 1294 state = readsysfs(mic->name, "state"); 1295 if (!state) 1296 goto retry; 1297 mpsslog("%s: %s %d state %s\n", 1298 mic->name, __func__, __LINE__, state); 1299 1300 /* 1301 * If the shutdown was initiated by OSPM, the state stays 1302 * in "suspended" which is also a valid condition for reset. 1303 */ 1304 if ((!strcmp(state, "offline")) || 1305 (!strcmp(state, "suspended"))) { 1306 free(state); 1307 break; 1308 } 1309 free(state); 1310retry: 1311 sleep(1); 1312 i--; 1313 } 1314} 1315 1316static int 1317get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) 1318{ 1319 if (!strcmp(shutdown_status, "nop")) 1320 return MIC_NOP; 1321 if (!strcmp(shutdown_status, "crashed")) 1322 return MIC_CRASHED; 1323 if (!strcmp(shutdown_status, "halted")) 1324 return MIC_HALTED; 1325 if (!strcmp(shutdown_status, "poweroff")) 1326 return MIC_POWER_OFF; 1327 if (!strcmp(shutdown_status, "restart")) 1328 return MIC_RESTART; 1329 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); 1330 /* Invalid state */ 1331 assert(0); 1332}; 1333 1334static int get_mic_state(struct mic_info *mic, char *state) 1335{ 1336 if (!strcmp(state, "offline")) 1337 return MIC_OFFLINE; 1338 if (!strcmp(state, "online")) 1339 return MIC_ONLINE; 1340 if (!strcmp(state, "shutting_down")) 1341 return MIC_SHUTTING_DOWN; 1342 if (!strcmp(state, "reset_failed")) 1343 return MIC_RESET_FAILED; 1344 if (!strcmp(state, "suspending")) 1345 return MIC_SUSPENDING; 1346 if (!strcmp(state, "suspended")) 1347 return MIC_SUSPENDED; 1348 mpsslog("%s: BUG invalid state %s\n", mic->name, state); 1349 /* Invalid state */ 1350 assert(0); 1351}; 1352 1353static void mic_handle_shutdown(struct mic_info *mic) 1354{ 1355#define SHUTDOWN_TIMEOUT 60 1356 int i = SHUTDOWN_TIMEOUT, ret, stat = 0; 1357 char *shutdown_status; 1358 while (i) { 1359 shutdown_status = readsysfs(mic->name, "shutdown_status"); 1360 if (!shutdown_status) 1361 continue; 1362 mpsslog("%s: %s %d shutdown_status %s\n", 1363 mic->name, __func__, __LINE__, shutdown_status); 1364 switch (get_mic_shutdown_status(mic, shutdown_status)) { 1365 case MIC_RESTART: 1366 mic->restart = 1; 1367 case MIC_HALTED: 1368 case MIC_POWER_OFF: 1369 case MIC_CRASHED: 1370 free(shutdown_status); 1371 goto reset; 1372 default: 1373 break; 1374 } 1375 free(shutdown_status); 1376 sleep(1); 1377 i--; 1378 } 1379reset: 1380 ret = kill(mic->pid, SIGTERM); 1381 mpsslog("%s: %s %d kill pid %d ret %d\n", 1382 mic->name, __func__, __LINE__, 1383 mic->pid, ret); 1384 if (!ret) { 1385 ret = waitpid(mic->pid, &stat, 1386 WIFSIGNALED(stat)); 1387 mpsslog("%s: %s %d waitpid ret %d pid %d\n", 1388 mic->name, __func__, __LINE__, 1389 ret, mic->pid); 1390 } 1391 if (ret == mic->pid) 1392 reset(mic); 1393} 1394 1395static void * 1396mic_config(void *arg) 1397{ 1398 struct mic_info *mic = (struct mic_info *)arg; 1399 char *state = NULL; 1400 char pathname[PATH_MAX]; 1401 int fd, ret; 1402 struct pollfd ufds[1]; 1403 char value[4096]; 1404 1405 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", 1406 MICSYSFSDIR, mic->name, "state"); 1407 1408 fd = open(pathname, O_RDONLY); 1409 if (fd < 0) { 1410 mpsslog("%s: opening file %s failed %s\n", 1411 mic->name, pathname, strerror(errno)); 1412 goto error; 1413 } 1414 1415 do { 1416 ret = lseek(fd, 0, SEEK_SET); 1417 if (ret < 0) { 1418 mpsslog("%s: Failed to seek to file start '%s': %s\n", 1419 mic->name, pathname, strerror(errno)); 1420 goto close_error1; 1421 } 1422 ret = read(fd, value, sizeof(value)); 1423 if (ret < 0) { 1424 mpsslog("%s: Failed to read sysfs entry '%s': %s\n", 1425 mic->name, pathname, strerror(errno)); 1426 goto close_error1; 1427 } 1428retry: 1429 state = readsysfs(mic->name, "state"); 1430 if (!state) 1431 goto retry; 1432 mpsslog("%s: %s %d state %s\n", 1433 mic->name, __func__, __LINE__, state); 1434 switch (get_mic_state(mic, state)) { 1435 case MIC_SHUTTING_DOWN: 1436 mic_handle_shutdown(mic); 1437 goto close_error; 1438 case MIC_SUSPENDING: 1439 mic->boot_on_resume = 1; 1440 setsysfs(mic->name, "state", "suspend"); 1441 mic_handle_shutdown(mic); 1442 goto close_error; 1443 case MIC_OFFLINE: 1444 if (mic->boot_on_resume) { 1445 setsysfs(mic->name, "state", "boot"); 1446 mic->boot_on_resume = 0; 1447 } 1448 break; 1449 default: 1450 break; 1451 } 1452 free(state); 1453 1454 ufds[0].fd = fd; 1455 ufds[0].events = POLLERR | POLLPRI; 1456 ret = poll(ufds, 1, -1); 1457 if (ret < 0) { 1458 mpsslog("%s: poll failed %s\n", 1459 mic->name, strerror(errno)); 1460 goto close_error1; 1461 } 1462 } while (1); 1463close_error: 1464 free(state); 1465close_error1: 1466 close(fd); 1467error: 1468 init_mic(mic); 1469 pthread_exit(NULL); 1470} 1471 1472static void 1473set_cmdline(struct mic_info *mic) 1474{ 1475 char buffer[PATH_MAX]; 1476 int len; 1477 1478 len = snprintf(buffer, PATH_MAX, 1479 "clocksource=tsc highres=off nohz=off "); 1480 len += snprintf(buffer + len, PATH_MAX - len, 1481 "cpufreq_on;corec6_off;pc3_off;pc6_off "); 1482 len += snprintf(buffer + len, PATH_MAX - len, 1483 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", 1484 mic->id); 1485 1486 setsysfs(mic->name, "cmdline", buffer); 1487 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); 1488 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id); 1489 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); 1490} 1491 1492static void 1493set_log_buf_info(struct mic_info *mic) 1494{ 1495 int fd; 1496 off_t len; 1497 char system_map[] = "/lib/firmware/mic/System.map"; 1498 char *map, *temp, log_buf[17] = {'\0'}; 1499 1500 fd = open(system_map, O_RDONLY); 1501 if (fd < 0) { 1502 mpsslog("%s: Opening System.map failed: %d\n", 1503 mic->name, errno); 1504 return; 1505 } 1506 len = lseek(fd, 0, SEEK_END); 1507 if (len < 0) { 1508 mpsslog("%s: Reading System.map size failed: %d\n", 1509 mic->name, errno); 1510 close(fd); 1511 return; 1512 } 1513 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); 1514 if (map == MAP_FAILED) { 1515 mpsslog("%s: mmap of System.map failed: %d\n", 1516 mic->name, errno); 1517 close(fd); 1518 return; 1519 } 1520 temp = strstr(map, "__log_buf"); 1521 if (!temp) { 1522 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); 1523 munmap(map, len); 1524 close(fd); 1525 return; 1526 } 1527 strncpy(log_buf, temp - 19, 16); 1528 setsysfs(mic->name, "log_buf_addr", log_buf); 1529 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); 1530 temp = strstr(map, "log_buf_len"); 1531 if (!temp) { 1532 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); 1533 munmap(map, len); 1534 close(fd); 1535 return; 1536 } 1537 strncpy(log_buf, temp - 19, 16); 1538 setsysfs(mic->name, "log_buf_len", log_buf); 1539 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); 1540 munmap(map, len); 1541 close(fd); 1542} 1543 1544static void init_mic(struct mic_info *mic); 1545 1546static void 1547change_virtblk_backend(int x, siginfo_t *siginfo, void *p) 1548{ 1549 struct mic_info *mic; 1550 1551 for (mic = mic_list.next; mic != NULL; mic = mic->next) 1552 mic->mic_virtblk.signaled = 1/* true */; 1553} 1554 1555static void 1556init_mic(struct mic_info *mic) 1557{ 1558 struct sigaction ignore = { 1559 .sa_flags = 0, 1560 .sa_handler = SIG_IGN 1561 }; 1562 struct sigaction act = { 1563 .sa_flags = SA_SIGINFO, 1564 .sa_sigaction = change_virtblk_backend, 1565 }; 1566 char buffer[PATH_MAX]; 1567 int err; 1568 1569 /* 1570 * Currently, one virtio block device is supported for each MIC card 1571 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. 1572 * The signal informs the virtio block backend about a change in the 1573 * configuration file which specifies the virtio backend file name on 1574 * the host. Virtio block backend then re-reads the configuration file 1575 * and switches to the new block device. This signalling mechanism may 1576 * not be required once multiple virtio block devices are supported by 1577 * the MIC daemon. 1578 */ 1579 sigaction(SIGUSR1, &ignore, NULL); 1580 1581 mic->pid = fork(); 1582 switch (mic->pid) { 1583 case 0: 1584 set_log_buf_info(mic); 1585 set_cmdline(mic); 1586 add_virtio_device(mic, &virtcons_dev_page.dd); 1587 add_virtio_device(mic, &virtnet_dev_page.dd); 1588 err = pthread_create(&mic->mic_console.console_thread, NULL, 1589 virtio_console, mic); 1590 if (err) 1591 mpsslog("%s virtcons pthread_create failed %s\n", 1592 mic->name, strerror(err)); 1593 err = pthread_create(&mic->mic_net.net_thread, NULL, 1594 virtio_net, mic); 1595 if (err) 1596 mpsslog("%s virtnet pthread_create failed %s\n", 1597 mic->name, strerror(err)); 1598 err = pthread_create(&mic->mic_virtblk.block_thread, NULL, 1599 virtio_block, mic); 1600 if (err) 1601 mpsslog("%s virtblk pthread_create failed %s\n", 1602 mic->name, strerror(err)); 1603 sigemptyset(&act.sa_mask); 1604 err = sigaction(SIGUSR1, &act, NULL); 1605 if (err) 1606 mpsslog("%s sigaction SIGUSR1 failed %s\n", 1607 mic->name, strerror(errno)); 1608 while (1) 1609 sleep(60); 1610 case -1: 1611 mpsslog("fork failed MIC name %s id %d errno %d\n", 1612 mic->name, mic->id, errno); 1613 break; 1614 default: 1615 if (mic->restart) { 1616 snprintf(buffer, PATH_MAX, "boot"); 1617 setsysfs(mic->name, "state", buffer); 1618 mpsslog("%s restarting mic %d\n", 1619 mic->name, mic->restart); 1620 mic->restart = 0; 1621 } 1622 pthread_create(&mic->config_thread, NULL, mic_config, mic); 1623 } 1624} 1625 1626static void 1627start_daemon(void) 1628{ 1629 struct mic_info *mic; 1630 1631 for (mic = mic_list.next; mic != NULL; mic = mic->next) 1632 init_mic(mic); 1633 1634 while (1) 1635 sleep(60); 1636} 1637 1638static int 1639init_mic_list(void) 1640{ 1641 struct mic_info *mic = &mic_list; 1642 struct dirent *file; 1643 DIR *dp; 1644 int cnt = 0; 1645 1646 dp = opendir(MICSYSFSDIR); 1647 if (!dp) 1648 return 0; 1649 1650 while ((file = readdir(dp)) != NULL) { 1651 if (!strncmp(file->d_name, "mic", 3)) { 1652 mic->next = calloc(1, sizeof(struct mic_info)); 1653 if (mic->next) { 1654 mic = mic->next; 1655 mic->id = atoi(&file->d_name[3]); 1656 mic->name = malloc(strlen(file->d_name) + 16); 1657 if (mic->name) 1658 strcpy(mic->name, file->d_name); 1659 mpsslog("MIC name %s id %d\n", mic->name, 1660 mic->id); 1661 cnt++; 1662 } 1663 } 1664 } 1665 1666 closedir(dp); 1667 return cnt; 1668} 1669 1670void 1671mpsslog(char *format, ...) 1672{ 1673 va_list args; 1674 char buffer[4096]; 1675 char ts[52], *ts1; 1676 time_t t; 1677 1678 if (logfp == NULL) 1679 return; 1680 1681 va_start(args, format); 1682 vsprintf(buffer, format, args); 1683 va_end(args); 1684 1685 time(&t); 1686 ts1 = ctime_r(&t, ts); 1687 ts1[strlen(ts1) - 1] = '\0'; 1688 fprintf(logfp, "%s: %s", ts1, buffer); 1689 1690 fflush(logfp); 1691} 1692 1693int 1694main(int argc, char *argv[]) 1695{ 1696 int cnt; 1697 pid_t pid; 1698 1699 myname = argv[0]; 1700 1701 logfp = fopen(LOGFILE_NAME, "a+"); 1702 if (!logfp) { 1703 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); 1704 exit(1); 1705 } 1706 pid = fork(); 1707 switch (pid) { 1708 case 0: 1709 break; 1710 case -1: 1711 exit(2); 1712 default: 1713 exit(0); 1714 } 1715 1716 mpsslog("MIC Daemon start\n"); 1717 1718 cnt = init_mic_list(); 1719 if (cnt == 0) { 1720 mpsslog("MIC module not loaded\n"); 1721 exit(3); 1722 } 1723 mpsslog("MIC found %d devices\n", cnt); 1724 1725 start_daemon(); 1726 1727 exit(0); 1728} 1729