1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2013 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel MIC User Space Tools.
19  */
20 
21 #define _GNU_SOURCE
22 
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44 #include <tools/endian.h>
45 
46 static void init_mic(struct mic_info *mic);
47 
48 static FILE *logfp;
49 static struct mic_info mic_list;
50 
51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52 
53 #define min_t(type, x, y) ({				\
54 		type __min1 = (x);                      \
55 		type __min2 = (y);                      \
56 		__min1 < __min2 ? __min1 : __min2; })
57 
58 /* align addr on a size boundary - adjust address up/down if needed */
59 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61 
62 /* align addr on a size boundary - adjust address up if needed */
63 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64 
65 /* to align the pointer to the (next) page boundary */
66 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67 
68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69 
70 #define GSO_ENABLED		1
71 #define MAX_GSO_SIZE		(64 * 1024)
72 #define ETH_H_LEN		14
73 #define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74 #define MIC_DEVICE_PAGE_END	0x1000
75 
76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
77 #define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
78 #endif
79 
80 static struct {
81 	struct mic_device_desc dd;
82 	struct mic_vqconfig vqconfig[2];
83 	__u32 host_features, guest_acknowledgements;
84 	struct virtio_console_config cons_config;
85 } virtcons_dev_page = {
86 	.dd = {
87 		.type = VIRTIO_ID_CONSOLE,
88 		.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89 		.feature_len = sizeof(virtcons_dev_page.host_features),
90 		.config_len = sizeof(virtcons_dev_page.cons_config),
91 	},
92 	.vqconfig[0] = {
93 		.num = htole16(MIC_VRING_ENTRIES),
94 	},
95 	.vqconfig[1] = {
96 		.num = htole16(MIC_VRING_ENTRIES),
97 	},
98 };
99 
100 static struct {
101 	struct mic_device_desc dd;
102 	struct mic_vqconfig vqconfig[2];
103 	__u32 host_features, guest_acknowledgements;
104 	struct virtio_net_config net_config;
105 } virtnet_dev_page = {
106 	.dd = {
107 		.type = VIRTIO_ID_NET,
108 		.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109 		.feature_len = sizeof(virtnet_dev_page.host_features),
110 		.config_len = sizeof(virtnet_dev_page.net_config),
111 	},
112 	.vqconfig[0] = {
113 		.num = htole16(MIC_VRING_ENTRIES),
114 	},
115 	.vqconfig[1] = {
116 		.num = htole16(MIC_VRING_ENTRIES),
117 	},
118 #if GSO_ENABLED
119 		.host_features = htole32(
120 		1 << VIRTIO_NET_F_CSUM |
121 		1 << VIRTIO_NET_F_GSO |
122 		1 << VIRTIO_NET_F_GUEST_TSO4 |
123 		1 << VIRTIO_NET_F_GUEST_TSO6 |
124 		1 << VIRTIO_NET_F_GUEST_ECN |
125 		1 << VIRTIO_NET_F_GUEST_UFO),
126 #else
127 		.host_features = 0,
128 #endif
129 };
130 
131 static const char *mic_config_dir = "/etc/sysconfig/mic";
132 static const char *virtblk_backend = "VIRTBLK_BACKEND";
133 static struct {
134 	struct mic_device_desc dd;
135 	struct mic_vqconfig vqconfig[1];
136 	__u32 host_features, guest_acknowledgements;
137 	struct virtio_blk_config blk_config;
138 } virtblk_dev_page = {
139 	.dd = {
140 		.type = VIRTIO_ID_BLOCK,
141 		.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
142 		.feature_len = sizeof(virtblk_dev_page.host_features),
143 		.config_len = sizeof(virtblk_dev_page.blk_config),
144 	},
145 	.vqconfig[0] = {
146 		.num = htole16(MIC_VRING_ENTRIES),
147 	},
148 	.host_features =
149 		htole32(1<<VIRTIO_BLK_F_SEG_MAX),
150 	.blk_config = {
151 		.seg_max = htole32(MIC_VRING_ENTRIES - 2),
152 		.capacity = htole64(0),
153 	 }
154 };
155 
156 static char *myname;
157 
158 static int
tap_configure(struct mic_info * mic,char * dev)159 tap_configure(struct mic_info *mic, char *dev)
160 {
161 	pid_t pid;
162 	char *ifargv[7];
163 	char ipaddr[IFNAMSIZ];
164 	int ret = 0;
165 
166 	pid = fork();
167 	if (pid == 0) {
168 		ifargv[0] = "ip";
169 		ifargv[1] = "link";
170 		ifargv[2] = "set";
171 		ifargv[3] = dev;
172 		ifargv[4] = "up";
173 		ifargv[5] = NULL;
174 		mpsslog("Configuring %s\n", dev);
175 		ret = execvp("ip", ifargv);
176 		if (ret < 0) {
177 			mpsslog("%s execvp failed errno %s\n",
178 				mic->name, strerror(errno));
179 			return ret;
180 		}
181 	}
182 	if (pid < 0) {
183 		mpsslog("%s fork failed errno %s\n",
184 			mic->name, strerror(errno));
185 		return ret;
186 	}
187 
188 	ret = waitpid(pid, NULL, 0);
189 	if (ret < 0) {
190 		mpsslog("%s waitpid failed errno %s\n",
191 			mic->name, strerror(errno));
192 		return ret;
193 	}
194 
195 	snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
196 
197 	pid = fork();
198 	if (pid == 0) {
199 		ifargv[0] = "ip";
200 		ifargv[1] = "addr";
201 		ifargv[2] = "add";
202 		ifargv[3] = ipaddr;
203 		ifargv[4] = "dev";
204 		ifargv[5] = dev;
205 		ifargv[6] = NULL;
206 		mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
207 		ret = execvp("ip", ifargv);
208 		if (ret < 0) {
209 			mpsslog("%s execvp failed errno %s\n",
210 				mic->name, strerror(errno));
211 			return ret;
212 		}
213 	}
214 	if (pid < 0) {
215 		mpsslog("%s fork failed errno %s\n",
216 			mic->name, strerror(errno));
217 		return ret;
218 	}
219 
220 	ret = waitpid(pid, NULL, 0);
221 	if (ret < 0) {
222 		mpsslog("%s waitpid failed errno %s\n",
223 			mic->name, strerror(errno));
224 		return ret;
225 	}
226 	mpsslog("MIC name %s %s %d DONE!\n",
227 		mic->name, __func__, __LINE__);
228 	return 0;
229 }
230 
tun_alloc(struct mic_info * mic,char * dev)231 static int tun_alloc(struct mic_info *mic, char *dev)
232 {
233 	struct ifreq ifr;
234 	int fd, err;
235 #if GSO_ENABLED
236 	unsigned offload;
237 #endif
238 	fd = open("/dev/net/tun", O_RDWR);
239 	if (fd < 0) {
240 		mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
241 		goto done;
242 	}
243 
244 	memset(&ifr, 0, sizeof(ifr));
245 
246 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
247 	if (*dev)
248 		strncpy(ifr.ifr_name, dev, IFNAMSIZ);
249 
250 	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
251 	if (err < 0) {
252 		mpsslog("%s %s %d TUNSETIFF failed %s\n",
253 			mic->name, __func__, __LINE__, strerror(errno));
254 		close(fd);
255 		return err;
256 	}
257 #if GSO_ENABLED
258 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
259 		TUN_F_TSO_ECN | TUN_F_UFO;
260 
261 	err = ioctl(fd, TUNSETOFFLOAD, offload);
262 	if (err < 0) {
263 		mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
264 			mic->name, __func__, __LINE__, strerror(errno));
265 		close(fd);
266 		return err;
267 	}
268 #endif
269 	strcpy(dev, ifr.ifr_name);
270 	mpsslog("Created TAP %s\n", dev);
271 done:
272 	return fd;
273 }
274 
275 #define NET_FD_VIRTIO_NET 0
276 #define NET_FD_TUN 1
277 #define MAX_NET_FD 2
278 
set_dp(struct mic_info * mic,int type,void * dp)279 static void set_dp(struct mic_info *mic, int type, void *dp)
280 {
281 	switch (type) {
282 	case VIRTIO_ID_CONSOLE:
283 		mic->mic_console.console_dp = dp;
284 		return;
285 	case VIRTIO_ID_NET:
286 		mic->mic_net.net_dp = dp;
287 		return;
288 	case VIRTIO_ID_BLOCK:
289 		mic->mic_virtblk.block_dp = dp;
290 		return;
291 	}
292 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
293 	assert(0);
294 }
295 
get_dp(struct mic_info * mic,int type)296 static void *get_dp(struct mic_info *mic, int type)
297 {
298 	switch (type) {
299 	case VIRTIO_ID_CONSOLE:
300 		return mic->mic_console.console_dp;
301 	case VIRTIO_ID_NET:
302 		return mic->mic_net.net_dp;
303 	case VIRTIO_ID_BLOCK:
304 		return mic->mic_virtblk.block_dp;
305 	}
306 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
307 	assert(0);
308 	return NULL;
309 }
310 
get_device_desc(struct mic_info * mic,int type)311 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
312 {
313 	struct mic_device_desc *d;
314 	int i;
315 	void *dp = get_dp(mic, type);
316 
317 	for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
318 		i += mic_total_desc_size(d)) {
319 		d = dp + i;
320 
321 		/* End of list */
322 		if (d->type == 0)
323 			break;
324 
325 		if (d->type == -1)
326 			continue;
327 
328 		mpsslog("%s %s d-> type %d d %p\n",
329 			mic->name, __func__, d->type, d);
330 
331 		if (d->type == (__u8)type)
332 			return d;
333 	}
334 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
335 	assert(0);
336 	return NULL;
337 }
338 
339 /* See comments in vhost.c for explanation of next_desc() */
next_desc(struct vring_desc * desc)340 static unsigned next_desc(struct vring_desc *desc)
341 {
342 	unsigned int next;
343 
344 	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
345 		return -1U;
346 	next = le16toh(desc->next);
347 	return next;
348 }
349 
350 /* Sum up all the IOVEC length */
351 static ssize_t
sum_iovec_len(struct mic_copy_desc * copy)352 sum_iovec_len(struct mic_copy_desc *copy)
353 {
354 	ssize_t sum = 0;
355 	int i;
356 
357 	for (i = 0; i < copy->iovcnt; i++)
358 		sum += copy->iov[i].iov_len;
359 	return sum;
360 }
361 
verify_out_len(struct mic_info * mic,struct mic_copy_desc * copy)362 static inline void verify_out_len(struct mic_info *mic,
363 	struct mic_copy_desc *copy)
364 {
365 	if (copy->out_len != sum_iovec_len(copy)) {
366 		mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
367 			mic->name, __func__, __LINE__,
368 			copy->out_len, sum_iovec_len(copy));
369 		assert(copy->out_len == sum_iovec_len(copy));
370 	}
371 }
372 
373 /* Display an iovec */
374 static void
disp_iovec(struct mic_info * mic,struct mic_copy_desc * copy,const char * s,int line)375 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
376 	   const char *s, int line)
377 {
378 	int i;
379 
380 	for (i = 0; i < copy->iovcnt; i++)
381 		mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
382 			mic->name, s, line, i,
383 			copy->iov[i].iov_base, copy->iov[i].iov_len);
384 }
385 
read_avail_idx(struct mic_vring * vr)386 static inline __u16 read_avail_idx(struct mic_vring *vr)
387 {
388 	return ACCESS_ONCE(vr->info->avail_idx);
389 }
390 
txrx_prepare(int type,bool tx,struct mic_vring * vr,struct mic_copy_desc * copy,ssize_t len)391 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
392 				struct mic_copy_desc *copy, ssize_t len)
393 {
394 	copy->vr_idx = tx ? 0 : 1;
395 	copy->update_used = true;
396 	if (type == VIRTIO_ID_NET)
397 		copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
398 	else
399 		copy->iov[0].iov_len = len;
400 }
401 
402 /* Central API which triggers the copies */
403 static int
mic_virtio_copy(struct mic_info * mic,int fd,struct mic_vring * vr,struct mic_copy_desc * copy)404 mic_virtio_copy(struct mic_info *mic, int fd,
405 		struct mic_vring *vr, struct mic_copy_desc *copy)
406 {
407 	int ret;
408 
409 	ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
410 	if (ret) {
411 		mpsslog("%s %s %d errno %s ret %d\n",
412 			mic->name, __func__, __LINE__,
413 			strerror(errno), ret);
414 	}
415 	return ret;
416 }
417 
418 /*
419  * This initialization routine requires at least one
420  * vring i.e. vr0. vr1 is optional.
421  */
422 static void *
init_vr(struct mic_info * mic,int fd,int type,struct mic_vring * vr0,struct mic_vring * vr1,int num_vq)423 init_vr(struct mic_info *mic, int fd, int type,
424 	struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
425 {
426 	int vr_size;
427 	char *va;
428 
429 	vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
430 		MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
431 	va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
432 		PROT_READ, MAP_SHARED, fd, 0);
433 	if (MAP_FAILED == va) {
434 		mpsslog("%s %s %d mmap failed errno %s\n",
435 			mic->name, __func__, __LINE__,
436 			strerror(errno));
437 		goto done;
438 	}
439 	set_dp(mic, type, va);
440 	vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
441 	vr0->info = vr0->va +
442 		vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
443 	vring_init(&vr0->vr,
444 		   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
445 	mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
446 		__func__, mic->name, vr0->va, vr0->info, vr_size,
447 		vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
448 	mpsslog("magic 0x%x expected 0x%x\n",
449 		le32toh(vr0->info->magic), MIC_MAGIC + type);
450 	assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
451 	if (vr1) {
452 		vr1->va = (struct mic_vring *)
453 			&va[MIC_DEVICE_PAGE_END + vr_size];
454 		vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
455 			MIC_VIRTIO_RING_ALIGN);
456 		vring_init(&vr1->vr,
457 			   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
458 		mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
459 			__func__, mic->name, vr1->va, vr1->info, vr_size,
460 			vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
461 		mpsslog("magic 0x%x expected 0x%x\n",
462 			le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
463 		assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
464 	}
465 done:
466 	return va;
467 }
468 
469 static void
wait_for_card_driver(struct mic_info * mic,int fd,int type)470 wait_for_card_driver(struct mic_info *mic, int fd, int type)
471 {
472 	struct pollfd pollfd;
473 	int err;
474 	struct mic_device_desc *desc = get_device_desc(mic, type);
475 
476 	pollfd.fd = fd;
477 	mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
478 		mic->name, __func__, type, desc->status);
479 	while (1) {
480 		pollfd.events = POLLIN;
481 		pollfd.revents = 0;
482 		err = poll(&pollfd, 1, -1);
483 		if (err < 0) {
484 			mpsslog("%s %s poll failed %s\n",
485 				mic->name, __func__, strerror(errno));
486 			continue;
487 		}
488 
489 		if (pollfd.revents) {
490 			mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
491 				mic->name, __func__, type, desc->status);
492 			if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
493 				mpsslog("%s %s poll.revents %d\n",
494 					mic->name, __func__, pollfd.revents);
495 				mpsslog("%s %s desc-> type %d status 0x%x\n",
496 					mic->name, __func__, type,
497 					desc->status);
498 				break;
499 			}
500 		}
501 	}
502 }
503 
504 /* Spin till we have some descriptors */
505 static void
spin_for_descriptors(struct mic_info * mic,struct mic_vring * vr)506 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
507 {
508 	__u16 avail_idx = read_avail_idx(vr);
509 
510 	while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
511 #ifdef DEBUG
512 		mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
513 			mic->name, __func__,
514 			le16toh(vr->vr.avail->idx), vr->info->avail_idx);
515 #endif
516 		sched_yield();
517 	}
518 }
519 
520 static void *
virtio_net(void * arg)521 virtio_net(void *arg)
522 {
523 	static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
524 	static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
525 	struct iovec vnet_iov[2][2] = {
526 		{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
527 		  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
528 		{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
529 		  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
530 	};
531 	struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
532 	struct mic_info *mic = (struct mic_info *)arg;
533 	char if_name[IFNAMSIZ];
534 	struct pollfd net_poll[MAX_NET_FD];
535 	struct mic_vring tx_vr, rx_vr;
536 	struct mic_copy_desc copy;
537 	struct mic_device_desc *desc;
538 	int err;
539 
540 	snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
541 	mic->mic_net.tap_fd = tun_alloc(mic, if_name);
542 	if (mic->mic_net.tap_fd < 0)
543 		goto done;
544 
545 	if (tap_configure(mic, if_name))
546 		goto done;
547 	mpsslog("MIC name %s id %d\n", mic->name, mic->id);
548 
549 	net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
550 	net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
551 	net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
552 	net_poll[NET_FD_TUN].events = POLLIN;
553 
554 	if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
555 				  VIRTIO_ID_NET, &tx_vr, &rx_vr,
556 		virtnet_dev_page.dd.num_vq)) {
557 		mpsslog("%s init_vr failed %s\n",
558 			mic->name, strerror(errno));
559 		goto done;
560 	}
561 
562 	copy.iovcnt = 2;
563 	desc = get_device_desc(mic, VIRTIO_ID_NET);
564 
565 	while (1) {
566 		ssize_t len;
567 
568 		net_poll[NET_FD_VIRTIO_NET].revents = 0;
569 		net_poll[NET_FD_TUN].revents = 0;
570 
571 		/* Start polling for data from tap and virtio net */
572 		err = poll(net_poll, 2, -1);
573 		if (err < 0) {
574 			mpsslog("%s poll failed %s\n",
575 				__func__, strerror(errno));
576 			continue;
577 		}
578 		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
579 			wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
580 					     VIRTIO_ID_NET);
581 		/*
582 		 * Check if there is data to be read from TUN and write to
583 		 * virtio net fd if there is.
584 		 */
585 		if (net_poll[NET_FD_TUN].revents & POLLIN) {
586 			copy.iov = iov0;
587 			len = readv(net_poll[NET_FD_TUN].fd,
588 				copy.iov, copy.iovcnt);
589 			if (len > 0) {
590 				struct virtio_net_hdr *hdr
591 					= (struct virtio_net_hdr *)vnet_hdr[0];
592 
593 				/* Disable checksums on the card since we are on
594 				   a reliable PCIe link */
595 				hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
596 #ifdef DEBUG
597 				mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
598 					__func__, __LINE__, hdr->flags);
599 				mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
600 					copy.out_len, hdr->gso_type);
601 #endif
602 #ifdef DEBUG
603 				disp_iovec(mic, copy, __func__, __LINE__);
604 				mpsslog("%s %s %d read from tap 0x%lx\n",
605 					mic->name, __func__, __LINE__,
606 					len);
607 #endif
608 				spin_for_descriptors(mic, &tx_vr);
609 				txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
610 					     len);
611 
612 				err = mic_virtio_copy(mic,
613 					mic->mic_net.virtio_net_fd, &tx_vr,
614 					&copy);
615 				if (err < 0) {
616 					mpsslog("%s %s %d mic_virtio_copy %s\n",
617 						mic->name, __func__, __LINE__,
618 						strerror(errno));
619 				}
620 				if (!err)
621 					verify_out_len(mic, &copy);
622 #ifdef DEBUG
623 				disp_iovec(mic, copy, __func__, __LINE__);
624 				mpsslog("%s %s %d wrote to net 0x%lx\n",
625 					mic->name, __func__, __LINE__,
626 					sum_iovec_len(&copy));
627 #endif
628 				/* Reinitialize IOV for next run */
629 				iov0[1].iov_len = MAX_NET_PKT_SIZE;
630 			} else if (len < 0) {
631 				disp_iovec(mic, &copy, __func__, __LINE__);
632 				mpsslog("%s %s %d read failed %s ", mic->name,
633 					__func__, __LINE__, strerror(errno));
634 				mpsslog("cnt %d sum %zd\n",
635 					copy.iovcnt, sum_iovec_len(&copy));
636 			}
637 		}
638 
639 		/*
640 		 * Check if there is data to be read from virtio net and
641 		 * write to TUN if there is.
642 		 */
643 		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
644 			while (rx_vr.info->avail_idx !=
645 				le16toh(rx_vr.vr.avail->idx)) {
646 				copy.iov = iov1;
647 				txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
648 					     MAX_NET_PKT_SIZE
649 					+ sizeof(struct virtio_net_hdr));
650 
651 				err = mic_virtio_copy(mic,
652 					mic->mic_net.virtio_net_fd, &rx_vr,
653 					&copy);
654 				if (!err) {
655 #ifdef DEBUG
656 					struct virtio_net_hdr *hdr
657 						= (struct virtio_net_hdr *)
658 							vnet_hdr[1];
659 
660 					mpsslog("%s %s %d hdr->flags 0x%x, ",
661 						mic->name, __func__, __LINE__,
662 						hdr->flags);
663 					mpsslog("out_len %d gso_type 0x%x\n",
664 						copy.out_len,
665 						hdr->gso_type);
666 #endif
667 					/* Set the correct output iov_len */
668 					iov1[1].iov_len = copy.out_len -
669 						sizeof(struct virtio_net_hdr);
670 					verify_out_len(mic, &copy);
671 #ifdef DEBUG
672 					disp_iovec(mic, copy, __func__,
673 						   __LINE__);
674 					mpsslog("%s %s %d ",
675 						mic->name, __func__, __LINE__);
676 					mpsslog("read from net 0x%lx\n",
677 						sum_iovec_len(copy));
678 #endif
679 					len = writev(net_poll[NET_FD_TUN].fd,
680 						copy.iov, copy.iovcnt);
681 					if (len != sum_iovec_len(&copy)) {
682 						mpsslog("Tun write failed %s ",
683 							strerror(errno));
684 						mpsslog("len 0x%zx ", len);
685 						mpsslog("read_len 0x%zx\n",
686 							sum_iovec_len(&copy));
687 					} else {
688 #ifdef DEBUG
689 						disp_iovec(mic, &copy, __func__,
690 							   __LINE__);
691 						mpsslog("%s %s %d ",
692 							mic->name, __func__,
693 							__LINE__);
694 						mpsslog("wrote to tap 0x%lx\n",
695 							len);
696 #endif
697 					}
698 				} else {
699 					mpsslog("%s %s %d mic_virtio_copy %s\n",
700 						mic->name, __func__, __LINE__,
701 						strerror(errno));
702 					break;
703 				}
704 			}
705 		}
706 		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
707 			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
708 	}
709 done:
710 	pthread_exit(NULL);
711 }
712 
713 /* virtio_console */
714 #define VIRTIO_CONSOLE_FD 0
715 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
716 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
717 #define MAX_BUFFER_SIZE PAGE_SIZE
718 
719 static void *
virtio_console(void * arg)720 virtio_console(void *arg)
721 {
722 	static __u8 vcons_buf[2][PAGE_SIZE];
723 	struct iovec vcons_iov[2] = {
724 		{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
725 		{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
726 	};
727 	struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
728 	struct mic_info *mic = (struct mic_info *)arg;
729 	int err;
730 	struct pollfd console_poll[MAX_CONSOLE_FD];
731 	int pty_fd;
732 	char *pts_name;
733 	ssize_t len;
734 	struct mic_vring tx_vr, rx_vr;
735 	struct mic_copy_desc copy;
736 	struct mic_device_desc *desc;
737 
738 	pty_fd = posix_openpt(O_RDWR);
739 	if (pty_fd < 0) {
740 		mpsslog("can't open a pseudoterminal master device: %s\n",
741 			strerror(errno));
742 		goto _return;
743 	}
744 	pts_name = ptsname(pty_fd);
745 	if (pts_name == NULL) {
746 		mpsslog("can't get pts name\n");
747 		goto _close_pty;
748 	}
749 	printf("%s console message goes to %s\n", mic->name, pts_name);
750 	mpsslog("%s console message goes to %s\n", mic->name, pts_name);
751 	err = grantpt(pty_fd);
752 	if (err < 0) {
753 		mpsslog("can't grant access: %s %s\n",
754 			pts_name, strerror(errno));
755 		goto _close_pty;
756 	}
757 	err = unlockpt(pty_fd);
758 	if (err < 0) {
759 		mpsslog("can't unlock a pseudoterminal: %s %s\n",
760 			pts_name, strerror(errno));
761 		goto _close_pty;
762 	}
763 	console_poll[MONITOR_FD].fd = pty_fd;
764 	console_poll[MONITOR_FD].events = POLLIN;
765 
766 	console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
767 	console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
768 
769 	if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
770 				  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
771 		virtcons_dev_page.dd.num_vq)) {
772 		mpsslog("%s init_vr failed %s\n",
773 			mic->name, strerror(errno));
774 		goto _close_pty;
775 	}
776 
777 	copy.iovcnt = 1;
778 	desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
779 
780 	for (;;) {
781 		console_poll[MONITOR_FD].revents = 0;
782 		console_poll[VIRTIO_CONSOLE_FD].revents = 0;
783 		err = poll(console_poll, MAX_CONSOLE_FD, -1);
784 		if (err < 0) {
785 			mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
786 				strerror(errno));
787 			continue;
788 		}
789 		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
790 			wait_for_card_driver(mic,
791 					     mic->mic_console.virtio_console_fd,
792 				VIRTIO_ID_CONSOLE);
793 
794 		if (console_poll[MONITOR_FD].revents & POLLIN) {
795 			copy.iov = iov0;
796 			len = readv(pty_fd, copy.iov, copy.iovcnt);
797 			if (len > 0) {
798 #ifdef DEBUG
799 				disp_iovec(mic, copy, __func__, __LINE__);
800 				mpsslog("%s %s %d read from tap 0x%lx\n",
801 					mic->name, __func__, __LINE__,
802 					len);
803 #endif
804 				spin_for_descriptors(mic, &tx_vr);
805 				txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
806 					     &copy, len);
807 
808 				err = mic_virtio_copy(mic,
809 					mic->mic_console.virtio_console_fd,
810 					&tx_vr, &copy);
811 				if (err < 0) {
812 					mpsslog("%s %s %d mic_virtio_copy %s\n",
813 						mic->name, __func__, __LINE__,
814 						strerror(errno));
815 				}
816 				if (!err)
817 					verify_out_len(mic, &copy);
818 #ifdef DEBUG
819 				disp_iovec(mic, copy, __func__, __LINE__);
820 				mpsslog("%s %s %d wrote to net 0x%lx\n",
821 					mic->name, __func__, __LINE__,
822 					sum_iovec_len(copy));
823 #endif
824 				/* Reinitialize IOV for next run */
825 				iov0->iov_len = PAGE_SIZE;
826 			} else if (len < 0) {
827 				disp_iovec(mic, &copy, __func__, __LINE__);
828 				mpsslog("%s %s %d read failed %s ",
829 					mic->name, __func__, __LINE__,
830 					strerror(errno));
831 				mpsslog("cnt %d sum %zd\n",
832 					copy.iovcnt, sum_iovec_len(&copy));
833 			}
834 		}
835 
836 		if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
837 			while (rx_vr.info->avail_idx !=
838 				le16toh(rx_vr.vr.avail->idx)) {
839 				copy.iov = iov1;
840 				txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
841 					     &copy, PAGE_SIZE);
842 
843 				err = mic_virtio_copy(mic,
844 					mic->mic_console.virtio_console_fd,
845 					&rx_vr, &copy);
846 				if (!err) {
847 					/* Set the correct output iov_len */
848 					iov1->iov_len = copy.out_len;
849 					verify_out_len(mic, &copy);
850 #ifdef DEBUG
851 					disp_iovec(mic, copy, __func__,
852 						   __LINE__);
853 					mpsslog("%s %s %d ",
854 						mic->name, __func__, __LINE__);
855 					mpsslog("read from net 0x%lx\n",
856 						sum_iovec_len(copy));
857 #endif
858 					len = writev(pty_fd,
859 						copy.iov, copy.iovcnt);
860 					if (len != sum_iovec_len(&copy)) {
861 						mpsslog("Tun write failed %s ",
862 							strerror(errno));
863 						mpsslog("len 0x%zx ", len);
864 						mpsslog("read_len 0x%zx\n",
865 							sum_iovec_len(&copy));
866 					} else {
867 #ifdef DEBUG
868 						disp_iovec(mic, copy, __func__,
869 							   __LINE__);
870 						mpsslog("%s %s %d ",
871 							mic->name, __func__,
872 							__LINE__);
873 						mpsslog("wrote to tap 0x%lx\n",
874 							len);
875 #endif
876 					}
877 				} else {
878 					mpsslog("%s %s %d mic_virtio_copy %s\n",
879 						mic->name, __func__, __LINE__,
880 						strerror(errno));
881 					break;
882 				}
883 			}
884 		}
885 		if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
886 			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
887 	}
888 _close_pty:
889 	close(pty_fd);
890 _return:
891 	pthread_exit(NULL);
892 }
893 
894 static void
add_virtio_device(struct mic_info * mic,struct mic_device_desc * dd)895 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
896 {
897 	char path[PATH_MAX];
898 	int fd, err;
899 
900 	snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
901 	fd = open(path, O_RDWR);
902 	if (fd < 0) {
903 		mpsslog("Could not open %s %s\n", path, strerror(errno));
904 		return;
905 	}
906 
907 	err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
908 	if (err < 0) {
909 		mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
910 		close(fd);
911 		return;
912 	}
913 	switch (dd->type) {
914 	case VIRTIO_ID_NET:
915 		mic->mic_net.virtio_net_fd = fd;
916 		mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
917 		break;
918 	case VIRTIO_ID_CONSOLE:
919 		mic->mic_console.virtio_console_fd = fd;
920 		mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
921 		break;
922 	case VIRTIO_ID_BLOCK:
923 		mic->mic_virtblk.virtio_block_fd = fd;
924 		mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
925 		break;
926 	}
927 }
928 
929 static bool
set_backend_file(struct mic_info * mic)930 set_backend_file(struct mic_info *mic)
931 {
932 	FILE *config;
933 	char buff[PATH_MAX], *line, *evv, *p;
934 
935 	snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
936 	config = fopen(buff, "r");
937 	if (config == NULL)
938 		return false;
939 	do {  /* look for "virtblk_backend=XXXX" */
940 		line = fgets(buff, PATH_MAX, config);
941 		if (line == NULL)
942 			break;
943 		if (*line == '#')
944 			continue;
945 		p = strchr(line, '\n');
946 		if (p)
947 			*p = '\0';
948 	} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
949 	fclose(config);
950 	if (line == NULL)
951 		return false;
952 	evv = strchr(line, '=');
953 	if (evv == NULL)
954 		return false;
955 	mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
956 	if (mic->mic_virtblk.backend_file == NULL) {
957 		mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
958 		return false;
959 	}
960 	strcpy(mic->mic_virtblk.backend_file, evv + 1);
961 	return true;
962 }
963 
964 #define SECTOR_SIZE 512
965 static bool
set_backend_size(struct mic_info * mic)966 set_backend_size(struct mic_info *mic)
967 {
968 	mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
969 		SEEK_END);
970 	if (mic->mic_virtblk.backend_size < 0) {
971 		mpsslog("%s: can't seek: %s\n",
972 			mic->name, mic->mic_virtblk.backend_file);
973 		return false;
974 	}
975 	virtblk_dev_page.blk_config.capacity =
976 		mic->mic_virtblk.backend_size / SECTOR_SIZE;
977 	if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
978 		virtblk_dev_page.blk_config.capacity++;
979 
980 	virtblk_dev_page.blk_config.capacity =
981 		htole64(virtblk_dev_page.blk_config.capacity);
982 
983 	return true;
984 }
985 
986 static bool
open_backend(struct mic_info * mic)987 open_backend(struct mic_info *mic)
988 {
989 	if (!set_backend_file(mic))
990 		goto _error_exit;
991 	mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
992 	if (mic->mic_virtblk.backend < 0) {
993 		mpsslog("%s: can't open: %s\n", mic->name,
994 			mic->mic_virtblk.backend_file);
995 		goto _error_free;
996 	}
997 	if (!set_backend_size(mic))
998 		goto _error_close;
999 	mic->mic_virtblk.backend_addr = mmap(NULL,
1000 		mic->mic_virtblk.backend_size,
1001 		PROT_READ|PROT_WRITE, MAP_SHARED,
1002 		mic->mic_virtblk.backend, 0L);
1003 	if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1004 		mpsslog("%s: can't map: %s %s\n",
1005 			mic->name, mic->mic_virtblk.backend_file,
1006 			strerror(errno));
1007 		goto _error_close;
1008 	}
1009 	return true;
1010 
1011  _error_close:
1012 	close(mic->mic_virtblk.backend);
1013  _error_free:
1014 	free(mic->mic_virtblk.backend_file);
1015  _error_exit:
1016 	return false;
1017 }
1018 
1019 static void
close_backend(struct mic_info * mic)1020 close_backend(struct mic_info *mic)
1021 {
1022 	munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1023 	close(mic->mic_virtblk.backend);
1024 	free(mic->mic_virtblk.backend_file);
1025 }
1026 
1027 static bool
start_virtblk(struct mic_info * mic,struct mic_vring * vring)1028 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1029 {
1030 	if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1031 		mpsslog("%s: blk_config is not 8 byte aligned.\n",
1032 			mic->name);
1033 		return false;
1034 	}
1035 	add_virtio_device(mic, &virtblk_dev_page.dd);
1036 	if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1037 				  VIRTIO_ID_BLOCK, vring, NULL,
1038 				  virtblk_dev_page.dd.num_vq)) {
1039 		mpsslog("%s init_vr failed %s\n",
1040 			mic->name, strerror(errno));
1041 		return false;
1042 	}
1043 	return true;
1044 }
1045 
1046 static void
stop_virtblk(struct mic_info * mic)1047 stop_virtblk(struct mic_info *mic)
1048 {
1049 	int vr_size, ret;
1050 
1051 	vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1052 		MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1053 	ret = munmap(mic->mic_virtblk.block_dp,
1054 		MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1055 	if (ret < 0)
1056 		mpsslog("%s munmap errno %d\n", mic->name, errno);
1057 	close(mic->mic_virtblk.virtio_block_fd);
1058 }
1059 
1060 static __u8
header_error_check(struct vring_desc * desc)1061 header_error_check(struct vring_desc *desc)
1062 {
1063 	if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1064 		mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1065 			__func__, __LINE__);
1066 		return -EIO;
1067 	}
1068 	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1069 		mpsslog("%s() %d: alone\n",
1070 			__func__, __LINE__);
1071 		return -EIO;
1072 	}
1073 	if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1074 		mpsslog("%s() %d: not read\n",
1075 			__func__, __LINE__);
1076 		return -EIO;
1077 	}
1078 	return 0;
1079 }
1080 
1081 static int
read_header(int fd,struct virtio_blk_outhdr * hdr,__u32 desc_idx)1082 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1083 {
1084 	struct iovec iovec;
1085 	struct mic_copy_desc copy;
1086 
1087 	iovec.iov_len = sizeof(*hdr);
1088 	iovec.iov_base = hdr;
1089 	copy.iov = &iovec;
1090 	copy.iovcnt = 1;
1091 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1092 	copy.update_used = false;  /* do not update used index */
1093 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1094 }
1095 
1096 static int
transfer_blocks(int fd,struct iovec * iovec,__u32 iovcnt)1097 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1098 {
1099 	struct mic_copy_desc copy;
1100 
1101 	copy.iov = iovec;
1102 	copy.iovcnt = iovcnt;
1103 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1104 	copy.update_used = false;  /* do not update used index */
1105 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1106 }
1107 
1108 static __u8
status_error_check(struct vring_desc * desc)1109 status_error_check(struct vring_desc *desc)
1110 {
1111 	if (le32toh(desc->len) != sizeof(__u8)) {
1112 		mpsslog("%s() %d: length is not sizeof(status)\n",
1113 			__func__, __LINE__);
1114 		return -EIO;
1115 	}
1116 	return 0;
1117 }
1118 
1119 static int
write_status(int fd,__u8 * status)1120 write_status(int fd, __u8 *status)
1121 {
1122 	struct iovec iovec;
1123 	struct mic_copy_desc copy;
1124 
1125 	iovec.iov_base = status;
1126 	iovec.iov_len = sizeof(*status);
1127 	copy.iov = &iovec;
1128 	copy.iovcnt = 1;
1129 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1130 	copy.update_used = true; /* Update used index */
1131 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1132 }
1133 
1134 static void *
virtio_block(void * arg)1135 virtio_block(void *arg)
1136 {
1137 	struct mic_info *mic = (struct mic_info *)arg;
1138 	int ret;
1139 	struct pollfd block_poll;
1140 	struct mic_vring vring;
1141 	__u16 avail_idx;
1142 	__u32 desc_idx;
1143 	struct vring_desc *desc;
1144 	struct iovec *iovec, *piov;
1145 	__u8 status;
1146 	__u32 buffer_desc_idx;
1147 	struct virtio_blk_outhdr hdr;
1148 	void *fos;
1149 
1150 	for (;;) {  /* forever */
1151 		if (!open_backend(mic)) { /* No virtblk */
1152 			for (mic->mic_virtblk.signaled = 0;
1153 				!mic->mic_virtblk.signaled;)
1154 				sleep(1);
1155 			continue;
1156 		}
1157 
1158 		/* backend file is specified. */
1159 		if (!start_virtblk(mic, &vring))
1160 			goto _close_backend;
1161 		iovec = malloc(sizeof(*iovec) *
1162 			le32toh(virtblk_dev_page.blk_config.seg_max));
1163 		if (!iovec) {
1164 			mpsslog("%s: can't alloc iovec: %s\n",
1165 				mic->name, strerror(ENOMEM));
1166 			goto _stop_virtblk;
1167 		}
1168 
1169 		block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1170 		block_poll.events = POLLIN;
1171 		for (mic->mic_virtblk.signaled = 0;
1172 		     !mic->mic_virtblk.signaled;) {
1173 			block_poll.revents = 0;
1174 					/* timeout in 1 sec to see signaled */
1175 			ret = poll(&block_poll, 1, 1000);
1176 			if (ret < 0) {
1177 				mpsslog("%s %d: poll failed: %s\n",
1178 					__func__, __LINE__,
1179 					strerror(errno));
1180 				continue;
1181 			}
1182 
1183 			if (!(block_poll.revents & POLLIN)) {
1184 #ifdef DEBUG
1185 				mpsslog("%s %d: block_poll.revents=0x%x\n",
1186 					__func__, __LINE__, block_poll.revents);
1187 #endif
1188 				continue;
1189 			}
1190 
1191 			/* POLLIN */
1192 			while (vring.info->avail_idx !=
1193 				le16toh(vring.vr.avail->idx)) {
1194 				/* read header element */
1195 				avail_idx =
1196 					vring.info->avail_idx &
1197 					(vring.vr.num - 1);
1198 				desc_idx = le16toh(
1199 					vring.vr.avail->ring[avail_idx]);
1200 				desc = &vring.vr.desc[desc_idx];
1201 #ifdef DEBUG
1202 				mpsslog("%s() %d: avail_idx=%d ",
1203 					__func__, __LINE__,
1204 					vring.info->avail_idx);
1205 				mpsslog("vring.vr.num=%d desc=%p\n",
1206 					vring.vr.num, desc);
1207 #endif
1208 				status = header_error_check(desc);
1209 				ret = read_header(
1210 					mic->mic_virtblk.virtio_block_fd,
1211 					&hdr, desc_idx);
1212 				if (ret < 0) {
1213 					mpsslog("%s() %d %s: ret=%d %s\n",
1214 						__func__, __LINE__,
1215 						mic->name, ret,
1216 						strerror(errno));
1217 					break;
1218 				}
1219 				/* buffer element */
1220 				piov = iovec;
1221 				status = 0;
1222 				fos = mic->mic_virtblk.backend_addr +
1223 					(hdr.sector * SECTOR_SIZE);
1224 				buffer_desc_idx = next_desc(desc);
1225 				desc_idx = buffer_desc_idx;
1226 				for (desc = &vring.vr.desc[buffer_desc_idx];
1227 				     desc->flags & VRING_DESC_F_NEXT;
1228 				     desc_idx = next_desc(desc),
1229 					     desc = &vring.vr.desc[desc_idx]) {
1230 					piov->iov_len = desc->len;
1231 					piov->iov_base = fos;
1232 					piov++;
1233 					fos += desc->len;
1234 				}
1235 				/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1236 				if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1237 					VIRTIO_BLK_T_GET_ID)) {
1238 					/*
1239 					  VIRTIO_BLK_T_IN - does not do
1240 					  anything. Probably for documenting.
1241 					  VIRTIO_BLK_T_SCSI_CMD - for
1242 					  virtio_scsi.
1243 					  VIRTIO_BLK_T_FLUSH - turned off in
1244 					  config space.
1245 					  VIRTIO_BLK_T_BARRIER - defined but not
1246 					  used in anywhere.
1247 					*/
1248 					mpsslog("%s() %d: type %x ",
1249 						__func__, __LINE__,
1250 						hdr.type);
1251 					mpsslog("is not supported\n");
1252 					status = -ENOTSUP;
1253 
1254 				} else {
1255 					ret = transfer_blocks(
1256 					mic->mic_virtblk.virtio_block_fd,
1257 						iovec,
1258 						piov - iovec);
1259 					if (ret < 0 &&
1260 					    status != 0)
1261 						status = ret;
1262 				}
1263 				/* write status and update used pointer */
1264 				if (status != 0)
1265 					status = status_error_check(desc);
1266 				ret = write_status(
1267 					mic->mic_virtblk.virtio_block_fd,
1268 					&status);
1269 #ifdef DEBUG
1270 				mpsslog("%s() %d: write status=%d on desc=%p\n",
1271 					__func__, __LINE__,
1272 					status, desc);
1273 #endif
1274 			}
1275 		}
1276 		free(iovec);
1277 _stop_virtblk:
1278 		stop_virtblk(mic);
1279 _close_backend:
1280 		close_backend(mic);
1281 	}  /* forever */
1282 
1283 	pthread_exit(NULL);
1284 }
1285 
1286 static void
reset(struct mic_info * mic)1287 reset(struct mic_info *mic)
1288 {
1289 #define RESET_TIMEOUT 120
1290 	int i = RESET_TIMEOUT;
1291 	setsysfs(mic->name, "state", "reset");
1292 	while (i) {
1293 		char *state;
1294 		state = readsysfs(mic->name, "state");
1295 		if (!state)
1296 			goto retry;
1297 		mpsslog("%s: %s %d state %s\n",
1298 			mic->name, __func__, __LINE__, state);
1299 
1300 		/*
1301 		 * If the shutdown was initiated by OSPM, the state stays
1302 		 * in "suspended" which is also a valid condition for reset.
1303 		 */
1304 		if ((!strcmp(state, "offline")) ||
1305 		    (!strcmp(state, "suspended"))) {
1306 			free(state);
1307 			break;
1308 		}
1309 		free(state);
1310 retry:
1311 		sleep(1);
1312 		i--;
1313 	}
1314 }
1315 
1316 static int
get_mic_shutdown_status(struct mic_info * mic,char * shutdown_status)1317 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1318 {
1319 	if (!strcmp(shutdown_status, "nop"))
1320 		return MIC_NOP;
1321 	if (!strcmp(shutdown_status, "crashed"))
1322 		return MIC_CRASHED;
1323 	if (!strcmp(shutdown_status, "halted"))
1324 		return MIC_HALTED;
1325 	if (!strcmp(shutdown_status, "poweroff"))
1326 		return MIC_POWER_OFF;
1327 	if (!strcmp(shutdown_status, "restart"))
1328 		return MIC_RESTART;
1329 	mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1330 	/* Invalid state */
1331 	assert(0);
1332 };
1333 
get_mic_state(struct mic_info * mic,char * state)1334 static int get_mic_state(struct mic_info *mic, char *state)
1335 {
1336 	if (!strcmp(state, "offline"))
1337 		return MIC_OFFLINE;
1338 	if (!strcmp(state, "online"))
1339 		return MIC_ONLINE;
1340 	if (!strcmp(state, "shutting_down"))
1341 		return MIC_SHUTTING_DOWN;
1342 	if (!strcmp(state, "reset_failed"))
1343 		return MIC_RESET_FAILED;
1344 	if (!strcmp(state, "suspending"))
1345 		return MIC_SUSPENDING;
1346 	if (!strcmp(state, "suspended"))
1347 		return MIC_SUSPENDED;
1348 	mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1349 	/* Invalid state */
1350 	assert(0);
1351 };
1352 
mic_handle_shutdown(struct mic_info * mic)1353 static void mic_handle_shutdown(struct mic_info *mic)
1354 {
1355 #define SHUTDOWN_TIMEOUT 60
1356 	int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1357 	char *shutdown_status;
1358 	while (i) {
1359 		shutdown_status = readsysfs(mic->name, "shutdown_status");
1360 		if (!shutdown_status)
1361 			continue;
1362 		mpsslog("%s: %s %d shutdown_status %s\n",
1363 			mic->name, __func__, __LINE__, shutdown_status);
1364 		switch (get_mic_shutdown_status(mic, shutdown_status)) {
1365 		case MIC_RESTART:
1366 			mic->restart = 1;
1367 		case MIC_HALTED:
1368 		case MIC_POWER_OFF:
1369 		case MIC_CRASHED:
1370 			free(shutdown_status);
1371 			goto reset;
1372 		default:
1373 			break;
1374 		}
1375 		free(shutdown_status);
1376 		sleep(1);
1377 		i--;
1378 	}
1379 reset:
1380 	ret = kill(mic->pid, SIGTERM);
1381 	mpsslog("%s: %s %d kill pid %d ret %d\n",
1382 		mic->name, __func__, __LINE__,
1383 		mic->pid, ret);
1384 	if (!ret) {
1385 		ret = waitpid(mic->pid, &stat,
1386 			WIFSIGNALED(stat));
1387 		mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1388 			mic->name, __func__, __LINE__,
1389 			ret, mic->pid);
1390 	}
1391 	if (ret == mic->pid)
1392 		reset(mic);
1393 }
1394 
1395 static void *
mic_config(void * arg)1396 mic_config(void *arg)
1397 {
1398 	struct mic_info *mic = (struct mic_info *)arg;
1399 	char *state = NULL;
1400 	char pathname[PATH_MAX];
1401 	int fd, ret;
1402 	struct pollfd ufds[1];
1403 	char value[4096];
1404 
1405 	snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1406 		 MICSYSFSDIR, mic->name, "state");
1407 
1408 	fd = open(pathname, O_RDONLY);
1409 	if (fd < 0) {
1410 		mpsslog("%s: opening file %s failed %s\n",
1411 			mic->name, pathname, strerror(errno));
1412 		goto error;
1413 	}
1414 
1415 	do {
1416 		ret = lseek(fd, 0, SEEK_SET);
1417 		if (ret < 0) {
1418 			mpsslog("%s: Failed to seek to file start '%s': %s\n",
1419 				mic->name, pathname, strerror(errno));
1420 			goto close_error1;
1421 		}
1422 		ret = read(fd, value, sizeof(value));
1423 		if (ret < 0) {
1424 			mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1425 				mic->name, pathname, strerror(errno));
1426 			goto close_error1;
1427 		}
1428 retry:
1429 		state = readsysfs(mic->name, "state");
1430 		if (!state)
1431 			goto retry;
1432 		mpsslog("%s: %s %d state %s\n",
1433 			mic->name, __func__, __LINE__, state);
1434 		switch (get_mic_state(mic, state)) {
1435 		case MIC_SHUTTING_DOWN:
1436 			mic_handle_shutdown(mic);
1437 			goto close_error;
1438 		case MIC_SUSPENDING:
1439 			mic->boot_on_resume = 1;
1440 			setsysfs(mic->name, "state", "suspend");
1441 			mic_handle_shutdown(mic);
1442 			goto close_error;
1443 		case MIC_OFFLINE:
1444 			if (mic->boot_on_resume) {
1445 				setsysfs(mic->name, "state", "boot");
1446 				mic->boot_on_resume = 0;
1447 			}
1448 			break;
1449 		default:
1450 			break;
1451 		}
1452 		free(state);
1453 
1454 		ufds[0].fd = fd;
1455 		ufds[0].events = POLLERR | POLLPRI;
1456 		ret = poll(ufds, 1, -1);
1457 		if (ret < 0) {
1458 			mpsslog("%s: poll failed %s\n",
1459 				mic->name, strerror(errno));
1460 			goto close_error1;
1461 		}
1462 	} while (1);
1463 close_error:
1464 	free(state);
1465 close_error1:
1466 	close(fd);
1467 error:
1468 	init_mic(mic);
1469 	pthread_exit(NULL);
1470 }
1471 
1472 static void
set_cmdline(struct mic_info * mic)1473 set_cmdline(struct mic_info *mic)
1474 {
1475 	char buffer[PATH_MAX];
1476 	int len;
1477 
1478 	len = snprintf(buffer, PATH_MAX,
1479 		"clocksource=tsc highres=off nohz=off ");
1480 	len += snprintf(buffer + len, PATH_MAX - len,
1481 		"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1482 	len += snprintf(buffer + len, PATH_MAX - len,
1483 		"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1484 		mic->id);
1485 
1486 	setsysfs(mic->name, "cmdline", buffer);
1487 	mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1488 	snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1489 	mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1490 }
1491 
1492 static void
set_log_buf_info(struct mic_info * mic)1493 set_log_buf_info(struct mic_info *mic)
1494 {
1495 	int fd;
1496 	off_t len;
1497 	char system_map[] = "/lib/firmware/mic/System.map";
1498 	char *map, *temp, log_buf[17] = {'\0'};
1499 
1500 	fd = open(system_map, O_RDONLY);
1501 	if (fd < 0) {
1502 		mpsslog("%s: Opening System.map failed: %d\n",
1503 			mic->name, errno);
1504 		return;
1505 	}
1506 	len = lseek(fd, 0, SEEK_END);
1507 	if (len < 0) {
1508 		mpsslog("%s: Reading System.map size failed: %d\n",
1509 			mic->name, errno);
1510 		close(fd);
1511 		return;
1512 	}
1513 	map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1514 	if (map == MAP_FAILED) {
1515 		mpsslog("%s: mmap of System.map failed: %d\n",
1516 			mic->name, errno);
1517 		close(fd);
1518 		return;
1519 	}
1520 	temp = strstr(map, "__log_buf");
1521 	if (!temp) {
1522 		mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1523 		munmap(map, len);
1524 		close(fd);
1525 		return;
1526 	}
1527 	strncpy(log_buf, temp - 19, 16);
1528 	setsysfs(mic->name, "log_buf_addr", log_buf);
1529 	mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1530 	temp = strstr(map, "log_buf_len");
1531 	if (!temp) {
1532 		mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1533 		munmap(map, len);
1534 		close(fd);
1535 		return;
1536 	}
1537 	strncpy(log_buf, temp - 19, 16);
1538 	setsysfs(mic->name, "log_buf_len", log_buf);
1539 	mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1540 	munmap(map, len);
1541 	close(fd);
1542 }
1543 
1544 static void init_mic(struct mic_info *mic);
1545 
1546 static void
change_virtblk_backend(int x,siginfo_t * siginfo,void * p)1547 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1548 {
1549 	struct mic_info *mic;
1550 
1551 	for (mic = mic_list.next; mic != NULL; mic = mic->next)
1552 		mic->mic_virtblk.signaled = 1/* true */;
1553 }
1554 
1555 static void
init_mic(struct mic_info * mic)1556 init_mic(struct mic_info *mic)
1557 {
1558 	struct sigaction ignore = {
1559 		.sa_flags = 0,
1560 		.sa_handler = SIG_IGN
1561 	};
1562 	struct sigaction act = {
1563 		.sa_flags = SA_SIGINFO,
1564 		.sa_sigaction = change_virtblk_backend,
1565 	};
1566 	char buffer[PATH_MAX];
1567 	int err;
1568 
1569 	/*
1570 	 * Currently, one virtio block device is supported for each MIC card
1571 	 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1572 	 * The signal informs the virtio block backend about a change in the
1573 	 * configuration file which specifies the virtio backend file name on
1574 	 * the host. Virtio block backend then re-reads the configuration file
1575 	 * and switches to the new block device. This signalling mechanism may
1576 	 * not be required once multiple virtio block devices are supported by
1577 	 * the MIC daemon.
1578 	 */
1579 	sigaction(SIGUSR1, &ignore, NULL);
1580 
1581 	mic->pid = fork();
1582 	switch (mic->pid) {
1583 	case 0:
1584 		set_log_buf_info(mic);
1585 		set_cmdline(mic);
1586 		add_virtio_device(mic, &virtcons_dev_page.dd);
1587 		add_virtio_device(mic, &virtnet_dev_page.dd);
1588 		err = pthread_create(&mic->mic_console.console_thread, NULL,
1589 			virtio_console, mic);
1590 		if (err)
1591 			mpsslog("%s virtcons pthread_create failed %s\n",
1592 				mic->name, strerror(err));
1593 		err = pthread_create(&mic->mic_net.net_thread, NULL,
1594 			virtio_net, mic);
1595 		if (err)
1596 			mpsslog("%s virtnet pthread_create failed %s\n",
1597 				mic->name, strerror(err));
1598 		err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1599 			virtio_block, mic);
1600 		if (err)
1601 			mpsslog("%s virtblk pthread_create failed %s\n",
1602 				mic->name, strerror(err));
1603 		sigemptyset(&act.sa_mask);
1604 		err = sigaction(SIGUSR1, &act, NULL);
1605 		if (err)
1606 			mpsslog("%s sigaction SIGUSR1 failed %s\n",
1607 				mic->name, strerror(errno));
1608 		while (1)
1609 			sleep(60);
1610 	case -1:
1611 		mpsslog("fork failed MIC name %s id %d errno %d\n",
1612 			mic->name, mic->id, errno);
1613 		break;
1614 	default:
1615 		if (mic->restart) {
1616 			snprintf(buffer, PATH_MAX, "boot");
1617 			setsysfs(mic->name, "state", buffer);
1618 			mpsslog("%s restarting mic %d\n",
1619 				mic->name, mic->restart);
1620 			mic->restart = 0;
1621 		}
1622 		pthread_create(&mic->config_thread, NULL, mic_config, mic);
1623 	}
1624 }
1625 
1626 static void
start_daemon(void)1627 start_daemon(void)
1628 {
1629 	struct mic_info *mic;
1630 
1631 	for (mic = mic_list.next; mic != NULL; mic = mic->next)
1632 		init_mic(mic);
1633 
1634 	while (1)
1635 		sleep(60);
1636 }
1637 
1638 static int
init_mic_list(void)1639 init_mic_list(void)
1640 {
1641 	struct mic_info *mic = &mic_list;
1642 	struct dirent *file;
1643 	DIR *dp;
1644 	int cnt = 0;
1645 
1646 	dp = opendir(MICSYSFSDIR);
1647 	if (!dp)
1648 		return 0;
1649 
1650 	while ((file = readdir(dp)) != NULL) {
1651 		if (!strncmp(file->d_name, "mic", 3)) {
1652 			mic->next = calloc(1, sizeof(struct mic_info));
1653 			if (mic->next) {
1654 				mic = mic->next;
1655 				mic->id = atoi(&file->d_name[3]);
1656 				mic->name = malloc(strlen(file->d_name) + 16);
1657 				if (mic->name)
1658 					strcpy(mic->name, file->d_name);
1659 				mpsslog("MIC name %s id %d\n", mic->name,
1660 					mic->id);
1661 				cnt++;
1662 			}
1663 		}
1664 	}
1665 
1666 	closedir(dp);
1667 	return cnt;
1668 }
1669 
1670 void
mpsslog(char * format,...)1671 mpsslog(char *format, ...)
1672 {
1673 	va_list args;
1674 	char buffer[4096];
1675 	char ts[52], *ts1;
1676 	time_t t;
1677 
1678 	if (logfp == NULL)
1679 		return;
1680 
1681 	va_start(args, format);
1682 	vsprintf(buffer, format, args);
1683 	va_end(args);
1684 
1685 	time(&t);
1686 	ts1 = ctime_r(&t, ts);
1687 	ts1[strlen(ts1) - 1] = '\0';
1688 	fprintf(logfp, "%s: %s", ts1, buffer);
1689 
1690 	fflush(logfp);
1691 }
1692 
1693 int
main(int argc,char * argv[])1694 main(int argc, char *argv[])
1695 {
1696 	int cnt;
1697 	pid_t pid;
1698 
1699 	myname = argv[0];
1700 
1701 	logfp = fopen(LOGFILE_NAME, "a+");
1702 	if (!logfp) {
1703 		fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1704 		exit(1);
1705 	}
1706 	pid = fork();
1707 	switch (pid) {
1708 	case 0:
1709 		break;
1710 	case -1:
1711 		exit(2);
1712 	default:
1713 		exit(0);
1714 	}
1715 
1716 	mpsslog("MIC Daemon start\n");
1717 
1718 	cnt = init_mic_list();
1719 	if (cnt == 0) {
1720 		mpsslog("MIC module not loaded\n");
1721 		exit(3);
1722 	}
1723 	mpsslog("MIC found %d devices\n", cnt);
1724 
1725 	start_daemon();
1726 
1727 	exit(0);
1728 }
1729