1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC User Space Tools.
19 */
20
21#define _GNU_SOURCE
22
23#include <stdlib.h>
24#include <fcntl.h>
25#include <getopt.h>
26#include <assert.h>
27#include <unistd.h>
28#include <stdbool.h>
29#include <signal.h>
30#include <poll.h>
31#include <features.h>
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/mman.h>
35#include <sys/socket.h>
36#include <linux/virtio_ring.h>
37#include <linux/virtio_net.h>
38#include <linux/virtio_console.h>
39#include <linux/virtio_blk.h>
40#include <linux/version.h>
41#include "mpssd.h"
42#include <linux/mic_ioctl.h>
43#include <linux/mic_common.h>
44#include <tools/endian.h>
45
46static void init_mic(struct mic_info *mic);
47
48static FILE *logfp;
49static struct mic_info mic_list;
50
51#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52
53#define min_t(type, x, y) ({				\
54		type __min1 = (x);                      \
55		type __min2 = (y);                      \
56		__min1 < __min2 ? __min1 : __min2; })
57
58/* align addr on a size boundary - adjust address up/down if needed */
59#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61
62/* align addr on a size boundary - adjust address up if needed */
63#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64
65/* to align the pointer to the (next) page boundary */
66#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67
68#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69
70#define GSO_ENABLED		1
71#define MAX_GSO_SIZE		(64 * 1024)
72#define ETH_H_LEN		14
73#define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74#define MIC_DEVICE_PAGE_END	0x1000
75
76#ifndef VIRTIO_NET_HDR_F_DATA_VALID
77#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
78#endif
79
80static struct {
81	struct mic_device_desc dd;
82	struct mic_vqconfig vqconfig[2];
83	__u32 host_features, guest_acknowledgements;
84	struct virtio_console_config cons_config;
85} virtcons_dev_page = {
86	.dd = {
87		.type = VIRTIO_ID_CONSOLE,
88		.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89		.feature_len = sizeof(virtcons_dev_page.host_features),
90		.config_len = sizeof(virtcons_dev_page.cons_config),
91	},
92	.vqconfig[0] = {
93		.num = htole16(MIC_VRING_ENTRIES),
94	},
95	.vqconfig[1] = {
96		.num = htole16(MIC_VRING_ENTRIES),
97	},
98};
99
100static struct {
101	struct mic_device_desc dd;
102	struct mic_vqconfig vqconfig[2];
103	__u32 host_features, guest_acknowledgements;
104	struct virtio_net_config net_config;
105} virtnet_dev_page = {
106	.dd = {
107		.type = VIRTIO_ID_NET,
108		.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109		.feature_len = sizeof(virtnet_dev_page.host_features),
110		.config_len = sizeof(virtnet_dev_page.net_config),
111	},
112	.vqconfig[0] = {
113		.num = htole16(MIC_VRING_ENTRIES),
114	},
115	.vqconfig[1] = {
116		.num = htole16(MIC_VRING_ENTRIES),
117	},
118#if GSO_ENABLED
119		.host_features = htole32(
120		1 << VIRTIO_NET_F_CSUM |
121		1 << VIRTIO_NET_F_GSO |
122		1 << VIRTIO_NET_F_GUEST_TSO4 |
123		1 << VIRTIO_NET_F_GUEST_TSO6 |
124		1 << VIRTIO_NET_F_GUEST_ECN |
125		1 << VIRTIO_NET_F_GUEST_UFO),
126#else
127		.host_features = 0,
128#endif
129};
130
131static const char *mic_config_dir = "/etc/sysconfig/mic";
132static const char *virtblk_backend = "VIRTBLK_BACKEND";
133static struct {
134	struct mic_device_desc dd;
135	struct mic_vqconfig vqconfig[1];
136	__u32 host_features, guest_acknowledgements;
137	struct virtio_blk_config blk_config;
138} virtblk_dev_page = {
139	.dd = {
140		.type = VIRTIO_ID_BLOCK,
141		.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
142		.feature_len = sizeof(virtblk_dev_page.host_features),
143		.config_len = sizeof(virtblk_dev_page.blk_config),
144	},
145	.vqconfig[0] = {
146		.num = htole16(MIC_VRING_ENTRIES),
147	},
148	.host_features =
149		htole32(1<<VIRTIO_BLK_F_SEG_MAX),
150	.blk_config = {
151		.seg_max = htole32(MIC_VRING_ENTRIES - 2),
152		.capacity = htole64(0),
153	 }
154};
155
156static char *myname;
157
158static int
159tap_configure(struct mic_info *mic, char *dev)
160{
161	pid_t pid;
162	char *ifargv[7];
163	char ipaddr[IFNAMSIZ];
164	int ret = 0;
165
166	pid = fork();
167	if (pid == 0) {
168		ifargv[0] = "ip";
169		ifargv[1] = "link";
170		ifargv[2] = "set";
171		ifargv[3] = dev;
172		ifargv[4] = "up";
173		ifargv[5] = NULL;
174		mpsslog("Configuring %s\n", dev);
175		ret = execvp("ip", ifargv);
176		if (ret < 0) {
177			mpsslog("%s execvp failed errno %s\n",
178				mic->name, strerror(errno));
179			return ret;
180		}
181	}
182	if (pid < 0) {
183		mpsslog("%s fork failed errno %s\n",
184			mic->name, strerror(errno));
185		return ret;
186	}
187
188	ret = waitpid(pid, NULL, 0);
189	if (ret < 0) {
190		mpsslog("%s waitpid failed errno %s\n",
191			mic->name, strerror(errno));
192		return ret;
193	}
194
195	snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
196
197	pid = fork();
198	if (pid == 0) {
199		ifargv[0] = "ip";
200		ifargv[1] = "addr";
201		ifargv[2] = "add";
202		ifargv[3] = ipaddr;
203		ifargv[4] = "dev";
204		ifargv[5] = dev;
205		ifargv[6] = NULL;
206		mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
207		ret = execvp("ip", ifargv);
208		if (ret < 0) {
209			mpsslog("%s execvp failed errno %s\n",
210				mic->name, strerror(errno));
211			return ret;
212		}
213	}
214	if (pid < 0) {
215		mpsslog("%s fork failed errno %s\n",
216			mic->name, strerror(errno));
217		return ret;
218	}
219
220	ret = waitpid(pid, NULL, 0);
221	if (ret < 0) {
222		mpsslog("%s waitpid failed errno %s\n",
223			mic->name, strerror(errno));
224		return ret;
225	}
226	mpsslog("MIC name %s %s %d DONE!\n",
227		mic->name, __func__, __LINE__);
228	return 0;
229}
230
231static int tun_alloc(struct mic_info *mic, char *dev)
232{
233	struct ifreq ifr;
234	int fd, err;
235#if GSO_ENABLED
236	unsigned offload;
237#endif
238	fd = open("/dev/net/tun", O_RDWR);
239	if (fd < 0) {
240		mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
241		goto done;
242	}
243
244	memset(&ifr, 0, sizeof(ifr));
245
246	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
247	if (*dev)
248		strncpy(ifr.ifr_name, dev, IFNAMSIZ);
249
250	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
251	if (err < 0) {
252		mpsslog("%s %s %d TUNSETIFF failed %s\n",
253			mic->name, __func__, __LINE__, strerror(errno));
254		close(fd);
255		return err;
256	}
257#if GSO_ENABLED
258	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
259		TUN_F_TSO_ECN | TUN_F_UFO;
260
261	err = ioctl(fd, TUNSETOFFLOAD, offload);
262	if (err < 0) {
263		mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
264			mic->name, __func__, __LINE__, strerror(errno));
265		close(fd);
266		return err;
267	}
268#endif
269	strcpy(dev, ifr.ifr_name);
270	mpsslog("Created TAP %s\n", dev);
271done:
272	return fd;
273}
274
275#define NET_FD_VIRTIO_NET 0
276#define NET_FD_TUN 1
277#define MAX_NET_FD 2
278
279static void set_dp(struct mic_info *mic, int type, void *dp)
280{
281	switch (type) {
282	case VIRTIO_ID_CONSOLE:
283		mic->mic_console.console_dp = dp;
284		return;
285	case VIRTIO_ID_NET:
286		mic->mic_net.net_dp = dp;
287		return;
288	case VIRTIO_ID_BLOCK:
289		mic->mic_virtblk.block_dp = dp;
290		return;
291	}
292	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
293	assert(0);
294}
295
296static void *get_dp(struct mic_info *mic, int type)
297{
298	switch (type) {
299	case VIRTIO_ID_CONSOLE:
300		return mic->mic_console.console_dp;
301	case VIRTIO_ID_NET:
302		return mic->mic_net.net_dp;
303	case VIRTIO_ID_BLOCK:
304		return mic->mic_virtblk.block_dp;
305	}
306	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
307	assert(0);
308	return NULL;
309}
310
311static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
312{
313	struct mic_device_desc *d;
314	int i;
315	void *dp = get_dp(mic, type);
316
317	for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
318		i += mic_total_desc_size(d)) {
319		d = dp + i;
320
321		/* End of list */
322		if (d->type == 0)
323			break;
324
325		if (d->type == -1)
326			continue;
327
328		mpsslog("%s %s d-> type %d d %p\n",
329			mic->name, __func__, d->type, d);
330
331		if (d->type == (__u8)type)
332			return d;
333	}
334	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
335	assert(0);
336	return NULL;
337}
338
339/* See comments in vhost.c for explanation of next_desc() */
340static unsigned next_desc(struct vring_desc *desc)
341{
342	unsigned int next;
343
344	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
345		return -1U;
346	next = le16toh(desc->next);
347	return next;
348}
349
350/* Sum up all the IOVEC length */
351static ssize_t
352sum_iovec_len(struct mic_copy_desc *copy)
353{
354	ssize_t sum = 0;
355	int i;
356
357	for (i = 0; i < copy->iovcnt; i++)
358		sum += copy->iov[i].iov_len;
359	return sum;
360}
361
362static inline void verify_out_len(struct mic_info *mic,
363	struct mic_copy_desc *copy)
364{
365	if (copy->out_len != sum_iovec_len(copy)) {
366		mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
367			mic->name, __func__, __LINE__,
368			copy->out_len, sum_iovec_len(copy));
369		assert(copy->out_len == sum_iovec_len(copy));
370	}
371}
372
373/* Display an iovec */
374static void
375disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
376	   const char *s, int line)
377{
378	int i;
379
380	for (i = 0; i < copy->iovcnt; i++)
381		mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
382			mic->name, s, line, i,
383			copy->iov[i].iov_base, copy->iov[i].iov_len);
384}
385
386static inline __u16 read_avail_idx(struct mic_vring *vr)
387{
388	return ACCESS_ONCE(vr->info->avail_idx);
389}
390
391static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
392				struct mic_copy_desc *copy, ssize_t len)
393{
394	copy->vr_idx = tx ? 0 : 1;
395	copy->update_used = true;
396	if (type == VIRTIO_ID_NET)
397		copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
398	else
399		copy->iov[0].iov_len = len;
400}
401
402/* Central API which triggers the copies */
403static int
404mic_virtio_copy(struct mic_info *mic, int fd,
405		struct mic_vring *vr, struct mic_copy_desc *copy)
406{
407	int ret;
408
409	ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
410	if (ret) {
411		mpsslog("%s %s %d errno %s ret %d\n",
412			mic->name, __func__, __LINE__,
413			strerror(errno), ret);
414	}
415	return ret;
416}
417
418/*
419 * This initialization routine requires at least one
420 * vring i.e. vr0. vr1 is optional.
421 */
422static void *
423init_vr(struct mic_info *mic, int fd, int type,
424	struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
425{
426	int vr_size;
427	char *va;
428
429	vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
430		MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
431	va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
432		PROT_READ, MAP_SHARED, fd, 0);
433	if (MAP_FAILED == va) {
434		mpsslog("%s %s %d mmap failed errno %s\n",
435			mic->name, __func__, __LINE__,
436			strerror(errno));
437		goto done;
438	}
439	set_dp(mic, type, va);
440	vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
441	vr0->info = vr0->va +
442		vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
443	vring_init(&vr0->vr,
444		   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
445	mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
446		__func__, mic->name, vr0->va, vr0->info, vr_size,
447		vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
448	mpsslog("magic 0x%x expected 0x%x\n",
449		le32toh(vr0->info->magic), MIC_MAGIC + type);
450	assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
451	if (vr1) {
452		vr1->va = (struct mic_vring *)
453			&va[MIC_DEVICE_PAGE_END + vr_size];
454		vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
455			MIC_VIRTIO_RING_ALIGN);
456		vring_init(&vr1->vr,
457			   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
458		mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
459			__func__, mic->name, vr1->va, vr1->info, vr_size,
460			vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
461		mpsslog("magic 0x%x expected 0x%x\n",
462			le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
463		assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
464	}
465done:
466	return va;
467}
468
469static void
470wait_for_card_driver(struct mic_info *mic, int fd, int type)
471{
472	struct pollfd pollfd;
473	int err;
474	struct mic_device_desc *desc = get_device_desc(mic, type);
475
476	pollfd.fd = fd;
477	mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
478		mic->name, __func__, type, desc->status);
479	while (1) {
480		pollfd.events = POLLIN;
481		pollfd.revents = 0;
482		err = poll(&pollfd, 1, -1);
483		if (err < 0) {
484			mpsslog("%s %s poll failed %s\n",
485				mic->name, __func__, strerror(errno));
486			continue;
487		}
488
489		if (pollfd.revents) {
490			mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
491				mic->name, __func__, type, desc->status);
492			if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
493				mpsslog("%s %s poll.revents %d\n",
494					mic->name, __func__, pollfd.revents);
495				mpsslog("%s %s desc-> type %d status 0x%x\n",
496					mic->name, __func__, type,
497					desc->status);
498				break;
499			}
500		}
501	}
502}
503
504/* Spin till we have some descriptors */
505static void
506spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
507{
508	__u16 avail_idx = read_avail_idx(vr);
509
510	while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
511#ifdef DEBUG
512		mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
513			mic->name, __func__,
514			le16toh(vr->vr.avail->idx), vr->info->avail_idx);
515#endif
516		sched_yield();
517	}
518}
519
520static void *
521virtio_net(void *arg)
522{
523	static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
524	static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
525	struct iovec vnet_iov[2][2] = {
526		{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
527		  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
528		{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
529		  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
530	};
531	struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
532	struct mic_info *mic = (struct mic_info *)arg;
533	char if_name[IFNAMSIZ];
534	struct pollfd net_poll[MAX_NET_FD];
535	struct mic_vring tx_vr, rx_vr;
536	struct mic_copy_desc copy;
537	struct mic_device_desc *desc;
538	int err;
539
540	snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
541	mic->mic_net.tap_fd = tun_alloc(mic, if_name);
542	if (mic->mic_net.tap_fd < 0)
543		goto done;
544
545	if (tap_configure(mic, if_name))
546		goto done;
547	mpsslog("MIC name %s id %d\n", mic->name, mic->id);
548
549	net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
550	net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
551	net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
552	net_poll[NET_FD_TUN].events = POLLIN;
553
554	if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
555				  VIRTIO_ID_NET, &tx_vr, &rx_vr,
556		virtnet_dev_page.dd.num_vq)) {
557		mpsslog("%s init_vr failed %s\n",
558			mic->name, strerror(errno));
559		goto done;
560	}
561
562	copy.iovcnt = 2;
563	desc = get_device_desc(mic, VIRTIO_ID_NET);
564
565	while (1) {
566		ssize_t len;
567
568		net_poll[NET_FD_VIRTIO_NET].revents = 0;
569		net_poll[NET_FD_TUN].revents = 0;
570
571		/* Start polling for data from tap and virtio net */
572		err = poll(net_poll, 2, -1);
573		if (err < 0) {
574			mpsslog("%s poll failed %s\n",
575				__func__, strerror(errno));
576			continue;
577		}
578		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
579			wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
580					     VIRTIO_ID_NET);
581		/*
582		 * Check if there is data to be read from TUN and write to
583		 * virtio net fd if there is.
584		 */
585		if (net_poll[NET_FD_TUN].revents & POLLIN) {
586			copy.iov = iov0;
587			len = readv(net_poll[NET_FD_TUN].fd,
588				copy.iov, copy.iovcnt);
589			if (len > 0) {
590				struct virtio_net_hdr *hdr
591					= (struct virtio_net_hdr *)vnet_hdr[0];
592
593				/* Disable checksums on the card since we are on
594				   a reliable PCIe link */
595				hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
596#ifdef DEBUG
597				mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
598					__func__, __LINE__, hdr->flags);
599				mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
600					copy.out_len, hdr->gso_type);
601#endif
602#ifdef DEBUG
603				disp_iovec(mic, copy, __func__, __LINE__);
604				mpsslog("%s %s %d read from tap 0x%lx\n",
605					mic->name, __func__, __LINE__,
606					len);
607#endif
608				spin_for_descriptors(mic, &tx_vr);
609				txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
610					     len);
611
612				err = mic_virtio_copy(mic,
613					mic->mic_net.virtio_net_fd, &tx_vr,
614					&copy);
615				if (err < 0) {
616					mpsslog("%s %s %d mic_virtio_copy %s\n",
617						mic->name, __func__, __LINE__,
618						strerror(errno));
619				}
620				if (!err)
621					verify_out_len(mic, &copy);
622#ifdef DEBUG
623				disp_iovec(mic, copy, __func__, __LINE__);
624				mpsslog("%s %s %d wrote to net 0x%lx\n",
625					mic->name, __func__, __LINE__,
626					sum_iovec_len(&copy));
627#endif
628				/* Reinitialize IOV for next run */
629				iov0[1].iov_len = MAX_NET_PKT_SIZE;
630			} else if (len < 0) {
631				disp_iovec(mic, &copy, __func__, __LINE__);
632				mpsslog("%s %s %d read failed %s ", mic->name,
633					__func__, __LINE__, strerror(errno));
634				mpsslog("cnt %d sum %zd\n",
635					copy.iovcnt, sum_iovec_len(&copy));
636			}
637		}
638
639		/*
640		 * Check if there is data to be read from virtio net and
641		 * write to TUN if there is.
642		 */
643		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
644			while (rx_vr.info->avail_idx !=
645				le16toh(rx_vr.vr.avail->idx)) {
646				copy.iov = iov1;
647				txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
648					     MAX_NET_PKT_SIZE
649					+ sizeof(struct virtio_net_hdr));
650
651				err = mic_virtio_copy(mic,
652					mic->mic_net.virtio_net_fd, &rx_vr,
653					&copy);
654				if (!err) {
655#ifdef DEBUG
656					struct virtio_net_hdr *hdr
657						= (struct virtio_net_hdr *)
658							vnet_hdr[1];
659
660					mpsslog("%s %s %d hdr->flags 0x%x, ",
661						mic->name, __func__, __LINE__,
662						hdr->flags);
663					mpsslog("out_len %d gso_type 0x%x\n",
664						copy.out_len,
665						hdr->gso_type);
666#endif
667					/* Set the correct output iov_len */
668					iov1[1].iov_len = copy.out_len -
669						sizeof(struct virtio_net_hdr);
670					verify_out_len(mic, &copy);
671#ifdef DEBUG
672					disp_iovec(mic, copy, __func__,
673						   __LINE__);
674					mpsslog("%s %s %d ",
675						mic->name, __func__, __LINE__);
676					mpsslog("read from net 0x%lx\n",
677						sum_iovec_len(copy));
678#endif
679					len = writev(net_poll[NET_FD_TUN].fd,
680						copy.iov, copy.iovcnt);
681					if (len != sum_iovec_len(&copy)) {
682						mpsslog("Tun write failed %s ",
683							strerror(errno));
684						mpsslog("len 0x%zx ", len);
685						mpsslog("read_len 0x%zx\n",
686							sum_iovec_len(&copy));
687					} else {
688#ifdef DEBUG
689						disp_iovec(mic, &copy, __func__,
690							   __LINE__);
691						mpsslog("%s %s %d ",
692							mic->name, __func__,
693							__LINE__);
694						mpsslog("wrote to tap 0x%lx\n",
695							len);
696#endif
697					}
698				} else {
699					mpsslog("%s %s %d mic_virtio_copy %s\n",
700						mic->name, __func__, __LINE__,
701						strerror(errno));
702					break;
703				}
704			}
705		}
706		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
707			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
708	}
709done:
710	pthread_exit(NULL);
711}
712
713/* virtio_console */
714#define VIRTIO_CONSOLE_FD 0
715#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
716#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
717#define MAX_BUFFER_SIZE PAGE_SIZE
718
719static void *
720virtio_console(void *arg)
721{
722	static __u8 vcons_buf[2][PAGE_SIZE];
723	struct iovec vcons_iov[2] = {
724		{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
725		{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
726	};
727	struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
728	struct mic_info *mic = (struct mic_info *)arg;
729	int err;
730	struct pollfd console_poll[MAX_CONSOLE_FD];
731	int pty_fd;
732	char *pts_name;
733	ssize_t len;
734	struct mic_vring tx_vr, rx_vr;
735	struct mic_copy_desc copy;
736	struct mic_device_desc *desc;
737
738	pty_fd = posix_openpt(O_RDWR);
739	if (pty_fd < 0) {
740		mpsslog("can't open a pseudoterminal master device: %s\n",
741			strerror(errno));
742		goto _return;
743	}
744	pts_name = ptsname(pty_fd);
745	if (pts_name == NULL) {
746		mpsslog("can't get pts name\n");
747		goto _close_pty;
748	}
749	printf("%s console message goes to %s\n", mic->name, pts_name);
750	mpsslog("%s console message goes to %s\n", mic->name, pts_name);
751	err = grantpt(pty_fd);
752	if (err < 0) {
753		mpsslog("can't grant access: %s %s\n",
754			pts_name, strerror(errno));
755		goto _close_pty;
756	}
757	err = unlockpt(pty_fd);
758	if (err < 0) {
759		mpsslog("can't unlock a pseudoterminal: %s %s\n",
760			pts_name, strerror(errno));
761		goto _close_pty;
762	}
763	console_poll[MONITOR_FD].fd = pty_fd;
764	console_poll[MONITOR_FD].events = POLLIN;
765
766	console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
767	console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
768
769	if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
770				  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
771		virtcons_dev_page.dd.num_vq)) {
772		mpsslog("%s init_vr failed %s\n",
773			mic->name, strerror(errno));
774		goto _close_pty;
775	}
776
777	copy.iovcnt = 1;
778	desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
779
780	for (;;) {
781		console_poll[MONITOR_FD].revents = 0;
782		console_poll[VIRTIO_CONSOLE_FD].revents = 0;
783		err = poll(console_poll, MAX_CONSOLE_FD, -1);
784		if (err < 0) {
785			mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
786				strerror(errno));
787			continue;
788		}
789		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
790			wait_for_card_driver(mic,
791					     mic->mic_console.virtio_console_fd,
792				VIRTIO_ID_CONSOLE);
793
794		if (console_poll[MONITOR_FD].revents & POLLIN) {
795			copy.iov = iov0;
796			len = readv(pty_fd, copy.iov, copy.iovcnt);
797			if (len > 0) {
798#ifdef DEBUG
799				disp_iovec(mic, copy, __func__, __LINE__);
800				mpsslog("%s %s %d read from tap 0x%lx\n",
801					mic->name, __func__, __LINE__,
802					len);
803#endif
804				spin_for_descriptors(mic, &tx_vr);
805				txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
806					     &copy, len);
807
808				err = mic_virtio_copy(mic,
809					mic->mic_console.virtio_console_fd,
810					&tx_vr, &copy);
811				if (err < 0) {
812					mpsslog("%s %s %d mic_virtio_copy %s\n",
813						mic->name, __func__, __LINE__,
814						strerror(errno));
815				}
816				if (!err)
817					verify_out_len(mic, &copy);
818#ifdef DEBUG
819				disp_iovec(mic, copy, __func__, __LINE__);
820				mpsslog("%s %s %d wrote to net 0x%lx\n",
821					mic->name, __func__, __LINE__,
822					sum_iovec_len(copy));
823#endif
824				/* Reinitialize IOV for next run */
825				iov0->iov_len = PAGE_SIZE;
826			} else if (len < 0) {
827				disp_iovec(mic, &copy, __func__, __LINE__);
828				mpsslog("%s %s %d read failed %s ",
829					mic->name, __func__, __LINE__,
830					strerror(errno));
831				mpsslog("cnt %d sum %zd\n",
832					copy.iovcnt, sum_iovec_len(&copy));
833			}
834		}
835
836		if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
837			while (rx_vr.info->avail_idx !=
838				le16toh(rx_vr.vr.avail->idx)) {
839				copy.iov = iov1;
840				txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
841					     &copy, PAGE_SIZE);
842
843				err = mic_virtio_copy(mic,
844					mic->mic_console.virtio_console_fd,
845					&rx_vr, &copy);
846				if (!err) {
847					/* Set the correct output iov_len */
848					iov1->iov_len = copy.out_len;
849					verify_out_len(mic, &copy);
850#ifdef DEBUG
851					disp_iovec(mic, copy, __func__,
852						   __LINE__);
853					mpsslog("%s %s %d ",
854						mic->name, __func__, __LINE__);
855					mpsslog("read from net 0x%lx\n",
856						sum_iovec_len(copy));
857#endif
858					len = writev(pty_fd,
859						copy.iov, copy.iovcnt);
860					if (len != sum_iovec_len(&copy)) {
861						mpsslog("Tun write failed %s ",
862							strerror(errno));
863						mpsslog("len 0x%zx ", len);
864						mpsslog("read_len 0x%zx\n",
865							sum_iovec_len(&copy));
866					} else {
867#ifdef DEBUG
868						disp_iovec(mic, copy, __func__,
869							   __LINE__);
870						mpsslog("%s %s %d ",
871							mic->name, __func__,
872							__LINE__);
873						mpsslog("wrote to tap 0x%lx\n",
874							len);
875#endif
876					}
877				} else {
878					mpsslog("%s %s %d mic_virtio_copy %s\n",
879						mic->name, __func__, __LINE__,
880						strerror(errno));
881					break;
882				}
883			}
884		}
885		if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
886			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
887	}
888_close_pty:
889	close(pty_fd);
890_return:
891	pthread_exit(NULL);
892}
893
894static void
895add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
896{
897	char path[PATH_MAX];
898	int fd, err;
899
900	snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
901	fd = open(path, O_RDWR);
902	if (fd < 0) {
903		mpsslog("Could not open %s %s\n", path, strerror(errno));
904		return;
905	}
906
907	err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
908	if (err < 0) {
909		mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
910		close(fd);
911		return;
912	}
913	switch (dd->type) {
914	case VIRTIO_ID_NET:
915		mic->mic_net.virtio_net_fd = fd;
916		mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
917		break;
918	case VIRTIO_ID_CONSOLE:
919		mic->mic_console.virtio_console_fd = fd;
920		mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
921		break;
922	case VIRTIO_ID_BLOCK:
923		mic->mic_virtblk.virtio_block_fd = fd;
924		mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
925		break;
926	}
927}
928
929static bool
930set_backend_file(struct mic_info *mic)
931{
932	FILE *config;
933	char buff[PATH_MAX], *line, *evv, *p;
934
935	snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
936	config = fopen(buff, "r");
937	if (config == NULL)
938		return false;
939	do {  /* look for "virtblk_backend=XXXX" */
940		line = fgets(buff, PATH_MAX, config);
941		if (line == NULL)
942			break;
943		if (*line == '#')
944			continue;
945		p = strchr(line, '\n');
946		if (p)
947			*p = '\0';
948	} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
949	fclose(config);
950	if (line == NULL)
951		return false;
952	evv = strchr(line, '=');
953	if (evv == NULL)
954		return false;
955	mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
956	if (mic->mic_virtblk.backend_file == NULL) {
957		mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
958		return false;
959	}
960	strcpy(mic->mic_virtblk.backend_file, evv + 1);
961	return true;
962}
963
964#define SECTOR_SIZE 512
965static bool
966set_backend_size(struct mic_info *mic)
967{
968	mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
969		SEEK_END);
970	if (mic->mic_virtblk.backend_size < 0) {
971		mpsslog("%s: can't seek: %s\n",
972			mic->name, mic->mic_virtblk.backend_file);
973		return false;
974	}
975	virtblk_dev_page.blk_config.capacity =
976		mic->mic_virtblk.backend_size / SECTOR_SIZE;
977	if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
978		virtblk_dev_page.blk_config.capacity++;
979
980	virtblk_dev_page.blk_config.capacity =
981		htole64(virtblk_dev_page.blk_config.capacity);
982
983	return true;
984}
985
986static bool
987open_backend(struct mic_info *mic)
988{
989	if (!set_backend_file(mic))
990		goto _error_exit;
991	mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
992	if (mic->mic_virtblk.backend < 0) {
993		mpsslog("%s: can't open: %s\n", mic->name,
994			mic->mic_virtblk.backend_file);
995		goto _error_free;
996	}
997	if (!set_backend_size(mic))
998		goto _error_close;
999	mic->mic_virtblk.backend_addr = mmap(NULL,
1000		mic->mic_virtblk.backend_size,
1001		PROT_READ|PROT_WRITE, MAP_SHARED,
1002		mic->mic_virtblk.backend, 0L);
1003	if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1004		mpsslog("%s: can't map: %s %s\n",
1005			mic->name, mic->mic_virtblk.backend_file,
1006			strerror(errno));
1007		goto _error_close;
1008	}
1009	return true;
1010
1011 _error_close:
1012	close(mic->mic_virtblk.backend);
1013 _error_free:
1014	free(mic->mic_virtblk.backend_file);
1015 _error_exit:
1016	return false;
1017}
1018
1019static void
1020close_backend(struct mic_info *mic)
1021{
1022	munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1023	close(mic->mic_virtblk.backend);
1024	free(mic->mic_virtblk.backend_file);
1025}
1026
1027static bool
1028start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1029{
1030	if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1031		mpsslog("%s: blk_config is not 8 byte aligned.\n",
1032			mic->name);
1033		return false;
1034	}
1035	add_virtio_device(mic, &virtblk_dev_page.dd);
1036	if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1037				  VIRTIO_ID_BLOCK, vring, NULL,
1038				  virtblk_dev_page.dd.num_vq)) {
1039		mpsslog("%s init_vr failed %s\n",
1040			mic->name, strerror(errno));
1041		return false;
1042	}
1043	return true;
1044}
1045
1046static void
1047stop_virtblk(struct mic_info *mic)
1048{
1049	int vr_size, ret;
1050
1051	vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1052		MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1053	ret = munmap(mic->mic_virtblk.block_dp,
1054		MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1055	if (ret < 0)
1056		mpsslog("%s munmap errno %d\n", mic->name, errno);
1057	close(mic->mic_virtblk.virtio_block_fd);
1058}
1059
1060static __u8
1061header_error_check(struct vring_desc *desc)
1062{
1063	if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1064		mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1065			__func__, __LINE__);
1066		return -EIO;
1067	}
1068	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1069		mpsslog("%s() %d: alone\n",
1070			__func__, __LINE__);
1071		return -EIO;
1072	}
1073	if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1074		mpsslog("%s() %d: not read\n",
1075			__func__, __LINE__);
1076		return -EIO;
1077	}
1078	return 0;
1079}
1080
1081static int
1082read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1083{
1084	struct iovec iovec;
1085	struct mic_copy_desc copy;
1086
1087	iovec.iov_len = sizeof(*hdr);
1088	iovec.iov_base = hdr;
1089	copy.iov = &iovec;
1090	copy.iovcnt = 1;
1091	copy.vr_idx = 0;  /* only one vring on virtio_block */
1092	copy.update_used = false;  /* do not update used index */
1093	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1094}
1095
1096static int
1097transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1098{
1099	struct mic_copy_desc copy;
1100
1101	copy.iov = iovec;
1102	copy.iovcnt = iovcnt;
1103	copy.vr_idx = 0;  /* only one vring on virtio_block */
1104	copy.update_used = false;  /* do not update used index */
1105	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1106}
1107
1108static __u8
1109status_error_check(struct vring_desc *desc)
1110{
1111	if (le32toh(desc->len) != sizeof(__u8)) {
1112		mpsslog("%s() %d: length is not sizeof(status)\n",
1113			__func__, __LINE__);
1114		return -EIO;
1115	}
1116	return 0;
1117}
1118
1119static int
1120write_status(int fd, __u8 *status)
1121{
1122	struct iovec iovec;
1123	struct mic_copy_desc copy;
1124
1125	iovec.iov_base = status;
1126	iovec.iov_len = sizeof(*status);
1127	copy.iov = &iovec;
1128	copy.iovcnt = 1;
1129	copy.vr_idx = 0;  /* only one vring on virtio_block */
1130	copy.update_used = true; /* Update used index */
1131	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1132}
1133
1134static void *
1135virtio_block(void *arg)
1136{
1137	struct mic_info *mic = (struct mic_info *)arg;
1138	int ret;
1139	struct pollfd block_poll;
1140	struct mic_vring vring;
1141	__u16 avail_idx;
1142	__u32 desc_idx;
1143	struct vring_desc *desc;
1144	struct iovec *iovec, *piov;
1145	__u8 status;
1146	__u32 buffer_desc_idx;
1147	struct virtio_blk_outhdr hdr;
1148	void *fos;
1149
1150	for (;;) {  /* forever */
1151		if (!open_backend(mic)) { /* No virtblk */
1152			for (mic->mic_virtblk.signaled = 0;
1153				!mic->mic_virtblk.signaled;)
1154				sleep(1);
1155			continue;
1156		}
1157
1158		/* backend file is specified. */
1159		if (!start_virtblk(mic, &vring))
1160			goto _close_backend;
1161		iovec = malloc(sizeof(*iovec) *
1162			le32toh(virtblk_dev_page.blk_config.seg_max));
1163		if (!iovec) {
1164			mpsslog("%s: can't alloc iovec: %s\n",
1165				mic->name, strerror(ENOMEM));
1166			goto _stop_virtblk;
1167		}
1168
1169		block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1170		block_poll.events = POLLIN;
1171		for (mic->mic_virtblk.signaled = 0;
1172		     !mic->mic_virtblk.signaled;) {
1173			block_poll.revents = 0;
1174					/* timeout in 1 sec to see signaled */
1175			ret = poll(&block_poll, 1, 1000);
1176			if (ret < 0) {
1177				mpsslog("%s %d: poll failed: %s\n",
1178					__func__, __LINE__,
1179					strerror(errno));
1180				continue;
1181			}
1182
1183			if (!(block_poll.revents & POLLIN)) {
1184#ifdef DEBUG
1185				mpsslog("%s %d: block_poll.revents=0x%x\n",
1186					__func__, __LINE__, block_poll.revents);
1187#endif
1188				continue;
1189			}
1190
1191			/* POLLIN */
1192			while (vring.info->avail_idx !=
1193				le16toh(vring.vr.avail->idx)) {
1194				/* read header element */
1195				avail_idx =
1196					vring.info->avail_idx &
1197					(vring.vr.num - 1);
1198				desc_idx = le16toh(
1199					vring.vr.avail->ring[avail_idx]);
1200				desc = &vring.vr.desc[desc_idx];
1201#ifdef DEBUG
1202				mpsslog("%s() %d: avail_idx=%d ",
1203					__func__, __LINE__,
1204					vring.info->avail_idx);
1205				mpsslog("vring.vr.num=%d desc=%p\n",
1206					vring.vr.num, desc);
1207#endif
1208				status = header_error_check(desc);
1209				ret = read_header(
1210					mic->mic_virtblk.virtio_block_fd,
1211					&hdr, desc_idx);
1212				if (ret < 0) {
1213					mpsslog("%s() %d %s: ret=%d %s\n",
1214						__func__, __LINE__,
1215						mic->name, ret,
1216						strerror(errno));
1217					break;
1218				}
1219				/* buffer element */
1220				piov = iovec;
1221				status = 0;
1222				fos = mic->mic_virtblk.backend_addr +
1223					(hdr.sector * SECTOR_SIZE);
1224				buffer_desc_idx = next_desc(desc);
1225				desc_idx = buffer_desc_idx;
1226				for (desc = &vring.vr.desc[buffer_desc_idx];
1227				     desc->flags & VRING_DESC_F_NEXT;
1228				     desc_idx = next_desc(desc),
1229					     desc = &vring.vr.desc[desc_idx]) {
1230					piov->iov_len = desc->len;
1231					piov->iov_base = fos;
1232					piov++;
1233					fos += desc->len;
1234				}
1235				/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1236				if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1237					VIRTIO_BLK_T_GET_ID)) {
1238					/*
1239					  VIRTIO_BLK_T_IN - does not do
1240					  anything. Probably for documenting.
1241					  VIRTIO_BLK_T_SCSI_CMD - for
1242					  virtio_scsi.
1243					  VIRTIO_BLK_T_FLUSH - turned off in
1244					  config space.
1245					  VIRTIO_BLK_T_BARRIER - defined but not
1246					  used in anywhere.
1247					*/
1248					mpsslog("%s() %d: type %x ",
1249						__func__, __LINE__,
1250						hdr.type);
1251					mpsslog("is not supported\n");
1252					status = -ENOTSUP;
1253
1254				} else {
1255					ret = transfer_blocks(
1256					mic->mic_virtblk.virtio_block_fd,
1257						iovec,
1258						piov - iovec);
1259					if (ret < 0 &&
1260					    status != 0)
1261						status = ret;
1262				}
1263				/* write status and update used pointer */
1264				if (status != 0)
1265					status = status_error_check(desc);
1266				ret = write_status(
1267					mic->mic_virtblk.virtio_block_fd,
1268					&status);
1269#ifdef DEBUG
1270				mpsslog("%s() %d: write status=%d on desc=%p\n",
1271					__func__, __LINE__,
1272					status, desc);
1273#endif
1274			}
1275		}
1276		free(iovec);
1277_stop_virtblk:
1278		stop_virtblk(mic);
1279_close_backend:
1280		close_backend(mic);
1281	}  /* forever */
1282
1283	pthread_exit(NULL);
1284}
1285
1286static void
1287reset(struct mic_info *mic)
1288{
1289#define RESET_TIMEOUT 120
1290	int i = RESET_TIMEOUT;
1291	setsysfs(mic->name, "state", "reset");
1292	while (i) {
1293		char *state;
1294		state = readsysfs(mic->name, "state");
1295		if (!state)
1296			goto retry;
1297		mpsslog("%s: %s %d state %s\n",
1298			mic->name, __func__, __LINE__, state);
1299
1300		/*
1301		 * If the shutdown was initiated by OSPM, the state stays
1302		 * in "suspended" which is also a valid condition for reset.
1303		 */
1304		if ((!strcmp(state, "offline")) ||
1305		    (!strcmp(state, "suspended"))) {
1306			free(state);
1307			break;
1308		}
1309		free(state);
1310retry:
1311		sleep(1);
1312		i--;
1313	}
1314}
1315
1316static int
1317get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1318{
1319	if (!strcmp(shutdown_status, "nop"))
1320		return MIC_NOP;
1321	if (!strcmp(shutdown_status, "crashed"))
1322		return MIC_CRASHED;
1323	if (!strcmp(shutdown_status, "halted"))
1324		return MIC_HALTED;
1325	if (!strcmp(shutdown_status, "poweroff"))
1326		return MIC_POWER_OFF;
1327	if (!strcmp(shutdown_status, "restart"))
1328		return MIC_RESTART;
1329	mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1330	/* Invalid state */
1331	assert(0);
1332};
1333
1334static int get_mic_state(struct mic_info *mic, char *state)
1335{
1336	if (!strcmp(state, "offline"))
1337		return MIC_OFFLINE;
1338	if (!strcmp(state, "online"))
1339		return MIC_ONLINE;
1340	if (!strcmp(state, "shutting_down"))
1341		return MIC_SHUTTING_DOWN;
1342	if (!strcmp(state, "reset_failed"))
1343		return MIC_RESET_FAILED;
1344	if (!strcmp(state, "suspending"))
1345		return MIC_SUSPENDING;
1346	if (!strcmp(state, "suspended"))
1347		return MIC_SUSPENDED;
1348	mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1349	/* Invalid state */
1350	assert(0);
1351};
1352
1353static void mic_handle_shutdown(struct mic_info *mic)
1354{
1355#define SHUTDOWN_TIMEOUT 60
1356	int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1357	char *shutdown_status;
1358	while (i) {
1359		shutdown_status = readsysfs(mic->name, "shutdown_status");
1360		if (!shutdown_status)
1361			continue;
1362		mpsslog("%s: %s %d shutdown_status %s\n",
1363			mic->name, __func__, __LINE__, shutdown_status);
1364		switch (get_mic_shutdown_status(mic, shutdown_status)) {
1365		case MIC_RESTART:
1366			mic->restart = 1;
1367		case MIC_HALTED:
1368		case MIC_POWER_OFF:
1369		case MIC_CRASHED:
1370			free(shutdown_status);
1371			goto reset;
1372		default:
1373			break;
1374		}
1375		free(shutdown_status);
1376		sleep(1);
1377		i--;
1378	}
1379reset:
1380	ret = kill(mic->pid, SIGTERM);
1381	mpsslog("%s: %s %d kill pid %d ret %d\n",
1382		mic->name, __func__, __LINE__,
1383		mic->pid, ret);
1384	if (!ret) {
1385		ret = waitpid(mic->pid, &stat,
1386			WIFSIGNALED(stat));
1387		mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1388			mic->name, __func__, __LINE__,
1389			ret, mic->pid);
1390	}
1391	if (ret == mic->pid)
1392		reset(mic);
1393}
1394
1395static void *
1396mic_config(void *arg)
1397{
1398	struct mic_info *mic = (struct mic_info *)arg;
1399	char *state = NULL;
1400	char pathname[PATH_MAX];
1401	int fd, ret;
1402	struct pollfd ufds[1];
1403	char value[4096];
1404
1405	snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1406		 MICSYSFSDIR, mic->name, "state");
1407
1408	fd = open(pathname, O_RDONLY);
1409	if (fd < 0) {
1410		mpsslog("%s: opening file %s failed %s\n",
1411			mic->name, pathname, strerror(errno));
1412		goto error;
1413	}
1414
1415	do {
1416		ret = lseek(fd, 0, SEEK_SET);
1417		if (ret < 0) {
1418			mpsslog("%s: Failed to seek to file start '%s': %s\n",
1419				mic->name, pathname, strerror(errno));
1420			goto close_error1;
1421		}
1422		ret = read(fd, value, sizeof(value));
1423		if (ret < 0) {
1424			mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1425				mic->name, pathname, strerror(errno));
1426			goto close_error1;
1427		}
1428retry:
1429		state = readsysfs(mic->name, "state");
1430		if (!state)
1431			goto retry;
1432		mpsslog("%s: %s %d state %s\n",
1433			mic->name, __func__, __LINE__, state);
1434		switch (get_mic_state(mic, state)) {
1435		case MIC_SHUTTING_DOWN:
1436			mic_handle_shutdown(mic);
1437			goto close_error;
1438		case MIC_SUSPENDING:
1439			mic->boot_on_resume = 1;
1440			setsysfs(mic->name, "state", "suspend");
1441			mic_handle_shutdown(mic);
1442			goto close_error;
1443		case MIC_OFFLINE:
1444			if (mic->boot_on_resume) {
1445				setsysfs(mic->name, "state", "boot");
1446				mic->boot_on_resume = 0;
1447			}
1448			break;
1449		default:
1450			break;
1451		}
1452		free(state);
1453
1454		ufds[0].fd = fd;
1455		ufds[0].events = POLLERR | POLLPRI;
1456		ret = poll(ufds, 1, -1);
1457		if (ret < 0) {
1458			mpsslog("%s: poll failed %s\n",
1459				mic->name, strerror(errno));
1460			goto close_error1;
1461		}
1462	} while (1);
1463close_error:
1464	free(state);
1465close_error1:
1466	close(fd);
1467error:
1468	init_mic(mic);
1469	pthread_exit(NULL);
1470}
1471
1472static void
1473set_cmdline(struct mic_info *mic)
1474{
1475	char buffer[PATH_MAX];
1476	int len;
1477
1478	len = snprintf(buffer, PATH_MAX,
1479		"clocksource=tsc highres=off nohz=off ");
1480	len += snprintf(buffer + len, PATH_MAX - len,
1481		"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1482	len += snprintf(buffer + len, PATH_MAX - len,
1483		"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1484		mic->id);
1485
1486	setsysfs(mic->name, "cmdline", buffer);
1487	mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1488	snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1489	mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1490}
1491
1492static void
1493set_log_buf_info(struct mic_info *mic)
1494{
1495	int fd;
1496	off_t len;
1497	char system_map[] = "/lib/firmware/mic/System.map";
1498	char *map, *temp, log_buf[17] = {'\0'};
1499
1500	fd = open(system_map, O_RDONLY);
1501	if (fd < 0) {
1502		mpsslog("%s: Opening System.map failed: %d\n",
1503			mic->name, errno);
1504		return;
1505	}
1506	len = lseek(fd, 0, SEEK_END);
1507	if (len < 0) {
1508		mpsslog("%s: Reading System.map size failed: %d\n",
1509			mic->name, errno);
1510		close(fd);
1511		return;
1512	}
1513	map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1514	if (map == MAP_FAILED) {
1515		mpsslog("%s: mmap of System.map failed: %d\n",
1516			mic->name, errno);
1517		close(fd);
1518		return;
1519	}
1520	temp = strstr(map, "__log_buf");
1521	if (!temp) {
1522		mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1523		munmap(map, len);
1524		close(fd);
1525		return;
1526	}
1527	strncpy(log_buf, temp - 19, 16);
1528	setsysfs(mic->name, "log_buf_addr", log_buf);
1529	mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1530	temp = strstr(map, "log_buf_len");
1531	if (!temp) {
1532		mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1533		munmap(map, len);
1534		close(fd);
1535		return;
1536	}
1537	strncpy(log_buf, temp - 19, 16);
1538	setsysfs(mic->name, "log_buf_len", log_buf);
1539	mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1540	munmap(map, len);
1541	close(fd);
1542}
1543
1544static void init_mic(struct mic_info *mic);
1545
1546static void
1547change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1548{
1549	struct mic_info *mic;
1550
1551	for (mic = mic_list.next; mic != NULL; mic = mic->next)
1552		mic->mic_virtblk.signaled = 1/* true */;
1553}
1554
1555static void
1556init_mic(struct mic_info *mic)
1557{
1558	struct sigaction ignore = {
1559		.sa_flags = 0,
1560		.sa_handler = SIG_IGN
1561	};
1562	struct sigaction act = {
1563		.sa_flags = SA_SIGINFO,
1564		.sa_sigaction = change_virtblk_backend,
1565	};
1566	char buffer[PATH_MAX];
1567	int err;
1568
1569	/*
1570	 * Currently, one virtio block device is supported for each MIC card
1571	 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1572	 * The signal informs the virtio block backend about a change in the
1573	 * configuration file which specifies the virtio backend file name on
1574	 * the host. Virtio block backend then re-reads the configuration file
1575	 * and switches to the new block device. This signalling mechanism may
1576	 * not be required once multiple virtio block devices are supported by
1577	 * the MIC daemon.
1578	 */
1579	sigaction(SIGUSR1, &ignore, NULL);
1580
1581	mic->pid = fork();
1582	switch (mic->pid) {
1583	case 0:
1584		set_log_buf_info(mic);
1585		set_cmdline(mic);
1586		add_virtio_device(mic, &virtcons_dev_page.dd);
1587		add_virtio_device(mic, &virtnet_dev_page.dd);
1588		err = pthread_create(&mic->mic_console.console_thread, NULL,
1589			virtio_console, mic);
1590		if (err)
1591			mpsslog("%s virtcons pthread_create failed %s\n",
1592				mic->name, strerror(err));
1593		err = pthread_create(&mic->mic_net.net_thread, NULL,
1594			virtio_net, mic);
1595		if (err)
1596			mpsslog("%s virtnet pthread_create failed %s\n",
1597				mic->name, strerror(err));
1598		err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1599			virtio_block, mic);
1600		if (err)
1601			mpsslog("%s virtblk pthread_create failed %s\n",
1602				mic->name, strerror(err));
1603		sigemptyset(&act.sa_mask);
1604		err = sigaction(SIGUSR1, &act, NULL);
1605		if (err)
1606			mpsslog("%s sigaction SIGUSR1 failed %s\n",
1607				mic->name, strerror(errno));
1608		while (1)
1609			sleep(60);
1610	case -1:
1611		mpsslog("fork failed MIC name %s id %d errno %d\n",
1612			mic->name, mic->id, errno);
1613		break;
1614	default:
1615		if (mic->restart) {
1616			snprintf(buffer, PATH_MAX, "boot");
1617			setsysfs(mic->name, "state", buffer);
1618			mpsslog("%s restarting mic %d\n",
1619				mic->name, mic->restart);
1620			mic->restart = 0;
1621		}
1622		pthread_create(&mic->config_thread, NULL, mic_config, mic);
1623	}
1624}
1625
1626static void
1627start_daemon(void)
1628{
1629	struct mic_info *mic;
1630
1631	for (mic = mic_list.next; mic != NULL; mic = mic->next)
1632		init_mic(mic);
1633
1634	while (1)
1635		sleep(60);
1636}
1637
1638static int
1639init_mic_list(void)
1640{
1641	struct mic_info *mic = &mic_list;
1642	struct dirent *file;
1643	DIR *dp;
1644	int cnt = 0;
1645
1646	dp = opendir(MICSYSFSDIR);
1647	if (!dp)
1648		return 0;
1649
1650	while ((file = readdir(dp)) != NULL) {
1651		if (!strncmp(file->d_name, "mic", 3)) {
1652			mic->next = calloc(1, sizeof(struct mic_info));
1653			if (mic->next) {
1654				mic = mic->next;
1655				mic->id = atoi(&file->d_name[3]);
1656				mic->name = malloc(strlen(file->d_name) + 16);
1657				if (mic->name)
1658					strcpy(mic->name, file->d_name);
1659				mpsslog("MIC name %s id %d\n", mic->name,
1660					mic->id);
1661				cnt++;
1662			}
1663		}
1664	}
1665
1666	closedir(dp);
1667	return cnt;
1668}
1669
1670void
1671mpsslog(char *format, ...)
1672{
1673	va_list args;
1674	char buffer[4096];
1675	char ts[52], *ts1;
1676	time_t t;
1677
1678	if (logfp == NULL)
1679		return;
1680
1681	va_start(args, format);
1682	vsprintf(buffer, format, args);
1683	va_end(args);
1684
1685	time(&t);
1686	ts1 = ctime_r(&t, ts);
1687	ts1[strlen(ts1) - 1] = '\0';
1688	fprintf(logfp, "%s: %s", ts1, buffer);
1689
1690	fflush(logfp);
1691}
1692
1693int
1694main(int argc, char *argv[])
1695{
1696	int cnt;
1697	pid_t pid;
1698
1699	myname = argv[0];
1700
1701	logfp = fopen(LOGFILE_NAME, "a+");
1702	if (!logfp) {
1703		fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1704		exit(1);
1705	}
1706	pid = fork();
1707	switch (pid) {
1708	case 0:
1709		break;
1710	case -1:
1711		exit(2);
1712	default:
1713		exit(0);
1714	}
1715
1716	mpsslog("MIC Daemon start\n");
1717
1718	cnt = init_mic_list();
1719	if (cnt == 0) {
1720		mpsslog("MIC module not loaded\n");
1721		exit(3);
1722	}
1723	mpsslog("MIC found %d devices\n", cnt);
1724
1725	start_daemon();
1726
1727	exit(0);
1728}
1729