1/* sunvdc.c: Sun LDOM Virtual Disk Client.
2 *
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/module.h>
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/blkdev.h>
10#include <linux/hdreg.h>
11#include <linux/genhd.h>
12#include <linux/cdrom.h>
13#include <linux/slab.h>
14#include <linux/spinlock.h>
15#include <linux/completion.h>
16#include <linux/delay.h>
17#include <linux/init.h>
18#include <linux/list.h>
19#include <linux/scatterlist.h>
20
21#include <asm/vio.h>
22#include <asm/ldc.h>
23
24#define DRV_MODULE_NAME		"sunvdc"
25#define PFX DRV_MODULE_NAME	": "
26#define DRV_MODULE_VERSION	"1.2"
27#define DRV_MODULE_RELDATE	"November 24, 2014"
28
29static char version[] =
30	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
32MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
33MODULE_LICENSE("GPL");
34MODULE_VERSION(DRV_MODULE_VERSION);
35
36#define VDC_TX_RING_SIZE	512
37
38#define WAITING_FOR_LINK_UP	0x01
39#define WAITING_FOR_TX_SPACE	0x02
40#define WAITING_FOR_GEN_CMD	0x04
41#define WAITING_FOR_ANY		-1
42
43static struct workqueue_struct *sunvdc_wq;
44
45struct vdc_req_entry {
46	struct request		*req;
47};
48
49struct vdc_port {
50	struct vio_driver_state	vio;
51
52	struct gendisk		*disk;
53
54	struct vdc_completion	*cmp;
55
56	u64			req_id;
57	u64			seq;
58	struct vdc_req_entry	rq_arr[VDC_TX_RING_SIZE];
59
60	unsigned long		ring_cookies;
61
62	u64			max_xfer_size;
63	u32			vdisk_block_size;
64
65	u64			ldc_timeout;
66	struct timer_list	ldc_reset_timer;
67	struct work_struct	ldc_reset_work;
68
69	/* The server fills these in for us in the disk attribute
70	 * ACK packet.
71	 */
72	u64			operations;
73	u32			vdisk_size;
74	u8			vdisk_type;
75	u8			vdisk_mtype;
76
77	char			disk_name[32];
78};
79
80static void vdc_ldc_reset(struct vdc_port *port);
81static void vdc_ldc_reset_work(struct work_struct *work);
82static void vdc_ldc_reset_timer(unsigned long _arg);
83
84static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
85{
86	return container_of(vio, struct vdc_port, vio);
87}
88
89/* Ordered from largest major to lowest */
90static struct vio_version vdc_versions[] = {
91	{ .major = 1, .minor = 1 },
92	{ .major = 1, .minor = 0 },
93};
94
95static inline int vdc_version_supported(struct vdc_port *port,
96					u16 major, u16 minor)
97{
98	return port->vio.ver.major == major && port->vio.ver.minor >= minor;
99}
100
101#define VDCBLK_NAME	"vdisk"
102static int vdc_major;
103#define PARTITION_SHIFT	3
104
105static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
106{
107	return vio_dring_avail(dr, VDC_TX_RING_SIZE);
108}
109
110static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
111{
112	struct gendisk *disk = bdev->bd_disk;
113	sector_t nsect = get_capacity(disk);
114	sector_t cylinders = nsect;
115
116	geo->heads = 0xff;
117	geo->sectors = 0x3f;
118	sector_div(cylinders, geo->heads * geo->sectors);
119	geo->cylinders = cylinders;
120	if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
121		geo->cylinders = 0xffff;
122
123	return 0;
124}
125
126/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
127 * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
128 * Needed to be able to install inside an ldom from an iso image.
129 */
130static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
131		     unsigned command, unsigned long argument)
132{
133	int i;
134	struct gendisk *disk;
135
136	switch (command) {
137	case CDROMMULTISESSION:
138		pr_debug(PFX "Multisession CDs not supported\n");
139		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
140			if (put_user(0, (char __user *)(argument + i)))
141				return -EFAULT;
142		return 0;
143
144	case CDROM_GET_CAPABILITY:
145		disk = bdev->bd_disk;
146
147		if (bdev->bd_disk && (disk->flags & GENHD_FL_CD))
148			return 0;
149		return -EINVAL;
150
151	default:
152		pr_debug(PFX "ioctl %08x not supported\n", command);
153		return -EINVAL;
154	}
155}
156
157static const struct block_device_operations vdc_fops = {
158	.owner		= THIS_MODULE,
159	.getgeo		= vdc_getgeo,
160	.ioctl		= vdc_ioctl,
161};
162
163static void vdc_blk_queue_start(struct vdc_port *port)
164{
165	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
166
167	/* restart blk queue when ring is half emptied. also called after
168	 * handshake completes, so check for initial handshake before we've
169	 * allocated a disk.
170	 */
171	if (port->disk && blk_queue_stopped(port->disk->queue) &&
172	    vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) {
173		blk_start_queue(port->disk->queue);
174	}
175
176}
177
178static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
179{
180	if (vio->cmp &&
181	    (waiting_for == -1 ||
182	     vio->cmp->waiting_for == waiting_for)) {
183		vio->cmp->err = err;
184		complete(&vio->cmp->com);
185		vio->cmp = NULL;
186	}
187}
188
189static void vdc_handshake_complete(struct vio_driver_state *vio)
190{
191	struct vdc_port *port = to_vdc_port(vio);
192
193	del_timer(&port->ldc_reset_timer);
194	vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
195	vdc_blk_queue_start(port);
196}
197
198static int vdc_handle_unknown(struct vdc_port *port, void *arg)
199{
200	struct vio_msg_tag *pkt = arg;
201
202	printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
203	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
204	printk(KERN_ERR PFX "Resetting connection.\n");
205
206	ldc_disconnect(port->vio.lp);
207
208	return -ECONNRESET;
209}
210
211static int vdc_send_attr(struct vio_driver_state *vio)
212{
213	struct vdc_port *port = to_vdc_port(vio);
214	struct vio_disk_attr_info pkt;
215
216	memset(&pkt, 0, sizeof(pkt));
217
218	pkt.tag.type = VIO_TYPE_CTRL;
219	pkt.tag.stype = VIO_SUBTYPE_INFO;
220	pkt.tag.stype_env = VIO_ATTR_INFO;
221	pkt.tag.sid = vio_send_sid(vio);
222
223	pkt.xfer_mode = VIO_DRING_MODE;
224	pkt.vdisk_block_size = port->vdisk_block_size;
225	pkt.max_xfer_size = port->max_xfer_size;
226
227	viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
228	       pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
229
230	return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
231}
232
233static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
234{
235	struct vdc_port *port = to_vdc_port(vio);
236	struct vio_disk_attr_info *pkt = arg;
237
238	viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
239	       "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
240	       pkt->tag.stype, pkt->operations,
241	       pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
242	       pkt->xfer_mode, pkt->vdisk_block_size,
243	       pkt->max_xfer_size);
244
245	if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
246		switch (pkt->vdisk_type) {
247		case VD_DISK_TYPE_DISK:
248		case VD_DISK_TYPE_SLICE:
249			break;
250
251		default:
252			printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
253			       vio->name, pkt->vdisk_type);
254			return -ECONNRESET;
255		}
256
257		if (pkt->vdisk_block_size > port->vdisk_block_size) {
258			printk(KERN_ERR PFX "%s: BLOCK size increased "
259			       "%u --> %u\n",
260			       vio->name,
261			       port->vdisk_block_size, pkt->vdisk_block_size);
262			return -ECONNRESET;
263		}
264
265		port->operations = pkt->operations;
266		port->vdisk_type = pkt->vdisk_type;
267		if (vdc_version_supported(port, 1, 1)) {
268			port->vdisk_size = pkt->vdisk_size;
269			port->vdisk_mtype = pkt->vdisk_mtype;
270		}
271		if (pkt->max_xfer_size < port->max_xfer_size)
272			port->max_xfer_size = pkt->max_xfer_size;
273		port->vdisk_block_size = pkt->vdisk_block_size;
274		return 0;
275	} else {
276		printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
277
278		return -ECONNRESET;
279	}
280}
281
282static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
283{
284	int err = desc->status;
285
286	vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
287}
288
289static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
290			unsigned int index)
291{
292	struct vio_disk_desc *desc = vio_dring_entry(dr, index);
293	struct vdc_req_entry *rqe = &port->rq_arr[index];
294	struct request *req;
295
296	if (unlikely(desc->hdr.state != VIO_DESC_DONE))
297		return;
298
299	ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
300	desc->hdr.state = VIO_DESC_FREE;
301	dr->cons = vio_dring_next(dr, index);
302
303	req = rqe->req;
304	if (req == NULL) {
305		vdc_end_special(port, desc);
306		return;
307	}
308
309	rqe->req = NULL;
310
311	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
312
313	vdc_blk_queue_start(port);
314}
315
316static int vdc_ack(struct vdc_port *port, void *msgbuf)
317{
318	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
319	struct vio_dring_data *pkt = msgbuf;
320
321	if (unlikely(pkt->dring_ident != dr->ident ||
322		     pkt->start_idx != pkt->end_idx ||
323		     pkt->start_idx >= VDC_TX_RING_SIZE))
324		return 0;
325
326	vdc_end_one(port, dr, pkt->start_idx);
327
328	return 0;
329}
330
331static int vdc_nack(struct vdc_port *port, void *msgbuf)
332{
333	/* XXX Implement me XXX */
334	return 0;
335}
336
337static void vdc_event(void *arg, int event)
338{
339	struct vdc_port *port = arg;
340	struct vio_driver_state *vio = &port->vio;
341	unsigned long flags;
342	int err;
343
344	spin_lock_irqsave(&vio->lock, flags);
345
346	if (unlikely(event == LDC_EVENT_RESET)) {
347		vio_link_state_change(vio, event);
348		queue_work(sunvdc_wq, &port->ldc_reset_work);
349		goto out;
350	}
351
352	if (unlikely(event == LDC_EVENT_UP)) {
353		vio_link_state_change(vio, event);
354		goto out;
355	}
356
357	if (unlikely(event != LDC_EVENT_DATA_READY)) {
358		pr_warn(PFX "Unexpected LDC event %d\n", event);
359		goto out;
360	}
361
362	err = 0;
363	while (1) {
364		union {
365			struct vio_msg_tag tag;
366			u64 raw[8];
367		} msgbuf;
368
369		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
370		if (unlikely(err < 0)) {
371			if (err == -ECONNRESET)
372				vio_conn_reset(vio);
373			break;
374		}
375		if (err == 0)
376			break;
377		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
378		       msgbuf.tag.type,
379		       msgbuf.tag.stype,
380		       msgbuf.tag.stype_env,
381		       msgbuf.tag.sid);
382		err = vio_validate_sid(vio, &msgbuf.tag);
383		if (err < 0)
384			break;
385
386		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
387			if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
388				err = vdc_ack(port, &msgbuf);
389			else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
390				err = vdc_nack(port, &msgbuf);
391			else
392				err = vdc_handle_unknown(port, &msgbuf);
393		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
394			err = vio_control_pkt_engine(vio, &msgbuf);
395		} else {
396			err = vdc_handle_unknown(port, &msgbuf);
397		}
398		if (err < 0)
399			break;
400	}
401	if (err < 0)
402		vdc_finish(&port->vio, err, WAITING_FOR_ANY);
403out:
404	spin_unlock_irqrestore(&vio->lock, flags);
405}
406
407static int __vdc_tx_trigger(struct vdc_port *port)
408{
409	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
410	struct vio_dring_data hdr = {
411		.tag = {
412			.type		= VIO_TYPE_DATA,
413			.stype		= VIO_SUBTYPE_INFO,
414			.stype_env	= VIO_DRING_DATA,
415			.sid		= vio_send_sid(&port->vio),
416		},
417		.dring_ident		= dr->ident,
418		.start_idx		= dr->prod,
419		.end_idx		= dr->prod,
420	};
421	int err, delay;
422
423	hdr.seq = dr->snd_nxt;
424	delay = 1;
425	do {
426		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
427		if (err > 0) {
428			dr->snd_nxt++;
429			break;
430		}
431		udelay(delay);
432		if ((delay <<= 1) > 128)
433			delay = 128;
434	} while (err == -EAGAIN);
435
436	if (err == -ENOTCONN)
437		vdc_ldc_reset(port);
438	return err;
439}
440
441static int __send_request(struct request *req)
442{
443	struct vdc_port *port = req->rq_disk->private_data;
444	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
445	struct scatterlist sg[port->ring_cookies];
446	struct vdc_req_entry *rqe;
447	struct vio_disk_desc *desc;
448	unsigned int map_perm;
449	int nsg, err, i;
450	u64 len;
451	u8 op;
452
453	map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
454
455	if (rq_data_dir(req) == READ) {
456		map_perm |= LDC_MAP_W;
457		op = VD_OP_BREAD;
458	} else {
459		map_perm |= LDC_MAP_R;
460		op = VD_OP_BWRITE;
461	}
462
463	sg_init_table(sg, port->ring_cookies);
464	nsg = blk_rq_map_sg(req->q, req, sg);
465
466	len = 0;
467	for (i = 0; i < nsg; i++)
468		len += sg[i].length;
469
470	desc = vio_dring_cur(dr);
471
472	err = ldc_map_sg(port->vio.lp, sg, nsg,
473			 desc->cookies, port->ring_cookies,
474			 map_perm);
475	if (err < 0) {
476		printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
477		return err;
478	}
479
480	rqe = &port->rq_arr[dr->prod];
481	rqe->req = req;
482
483	desc->hdr.ack = VIO_ACK_ENABLE;
484	desc->req_id = port->req_id;
485	desc->operation = op;
486	if (port->vdisk_type == VD_DISK_TYPE_DISK) {
487		desc->slice = 0xff;
488	} else {
489		desc->slice = 0;
490	}
491	desc->status = ~0;
492	desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
493	desc->size = len;
494	desc->ncookies = err;
495
496	/* This has to be a non-SMP write barrier because we are writing
497	 * to memory which is shared with the peer LDOM.
498	 */
499	wmb();
500	desc->hdr.state = VIO_DESC_READY;
501
502	err = __vdc_tx_trigger(port);
503	if (err < 0) {
504		printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
505	} else {
506		port->req_id++;
507		dr->prod = vio_dring_next(dr, dr->prod);
508	}
509
510	return err;
511}
512
513static void do_vdc_request(struct request_queue *rq)
514{
515	struct request *req;
516
517	while ((req = blk_peek_request(rq)) != NULL) {
518		struct vdc_port *port;
519		struct vio_dring_state *dr;
520
521		port = req->rq_disk->private_data;
522		dr = &port->vio.drings[VIO_DRIVER_TX_RING];
523		if (unlikely(vdc_tx_dring_avail(dr) < 1))
524			goto wait;
525
526		blk_start_request(req);
527
528		if (__send_request(req) < 0) {
529			blk_requeue_request(rq, req);
530wait:
531			/* Avoid pointless unplugs. */
532			blk_stop_queue(rq);
533			break;
534		}
535	}
536}
537
538static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
539{
540	struct vio_dring_state *dr;
541	struct vio_completion comp;
542	struct vio_disk_desc *desc;
543	unsigned int map_perm;
544	unsigned long flags;
545	int op_len, err;
546	void *req_buf;
547
548	if (!(((u64)1 << (u64)op) & port->operations))
549		return -EOPNOTSUPP;
550
551	switch (op) {
552	case VD_OP_BREAD:
553	case VD_OP_BWRITE:
554	default:
555		return -EINVAL;
556
557	case VD_OP_FLUSH:
558		op_len = 0;
559		map_perm = 0;
560		break;
561
562	case VD_OP_GET_WCE:
563		op_len = sizeof(u32);
564		map_perm = LDC_MAP_W;
565		break;
566
567	case VD_OP_SET_WCE:
568		op_len = sizeof(u32);
569		map_perm = LDC_MAP_R;
570		break;
571
572	case VD_OP_GET_VTOC:
573		op_len = sizeof(struct vio_disk_vtoc);
574		map_perm = LDC_MAP_W;
575		break;
576
577	case VD_OP_SET_VTOC:
578		op_len = sizeof(struct vio_disk_vtoc);
579		map_perm = LDC_MAP_R;
580		break;
581
582	case VD_OP_GET_DISKGEOM:
583		op_len = sizeof(struct vio_disk_geom);
584		map_perm = LDC_MAP_W;
585		break;
586
587	case VD_OP_SET_DISKGEOM:
588		op_len = sizeof(struct vio_disk_geom);
589		map_perm = LDC_MAP_R;
590		break;
591
592	case VD_OP_SCSICMD:
593		op_len = 16;
594		map_perm = LDC_MAP_RW;
595		break;
596
597	case VD_OP_GET_DEVID:
598		op_len = sizeof(struct vio_disk_devid);
599		map_perm = LDC_MAP_W;
600		break;
601
602	case VD_OP_GET_EFI:
603	case VD_OP_SET_EFI:
604		return -EOPNOTSUPP;
605		break;
606	};
607
608	map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
609
610	op_len = (op_len + 7) & ~7;
611	req_buf = kzalloc(op_len, GFP_KERNEL);
612	if (!req_buf)
613		return -ENOMEM;
614
615	if (len > op_len)
616		len = op_len;
617
618	if (map_perm & LDC_MAP_R)
619		memcpy(req_buf, buf, len);
620
621	spin_lock_irqsave(&port->vio.lock, flags);
622
623	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
624
625	/* XXX If we want to use this code generically we have to
626	 * XXX handle TX ring exhaustion etc.
627	 */
628	desc = vio_dring_cur(dr);
629
630	err = ldc_map_single(port->vio.lp, req_buf, op_len,
631			     desc->cookies, port->ring_cookies,
632			     map_perm);
633	if (err < 0) {
634		spin_unlock_irqrestore(&port->vio.lock, flags);
635		kfree(req_buf);
636		return err;
637	}
638
639	init_completion(&comp.com);
640	comp.waiting_for = WAITING_FOR_GEN_CMD;
641	port->vio.cmp = &comp;
642
643	desc->hdr.ack = VIO_ACK_ENABLE;
644	desc->req_id = port->req_id;
645	desc->operation = op;
646	desc->slice = 0;
647	desc->status = ~0;
648	desc->offset = 0;
649	desc->size = op_len;
650	desc->ncookies = err;
651
652	/* This has to be a non-SMP write barrier because we are writing
653	 * to memory which is shared with the peer LDOM.
654	 */
655	wmb();
656	desc->hdr.state = VIO_DESC_READY;
657
658	err = __vdc_tx_trigger(port);
659	if (err >= 0) {
660		port->req_id++;
661		dr->prod = vio_dring_next(dr, dr->prod);
662		spin_unlock_irqrestore(&port->vio.lock, flags);
663
664		wait_for_completion(&comp.com);
665		err = comp.err;
666	} else {
667		port->vio.cmp = NULL;
668		spin_unlock_irqrestore(&port->vio.lock, flags);
669	}
670
671	if (map_perm & LDC_MAP_W)
672		memcpy(buf, req_buf, len);
673
674	kfree(req_buf);
675
676	return err;
677}
678
679static int vdc_alloc_tx_ring(struct vdc_port *port)
680{
681	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
682	unsigned long len, entry_size;
683	int ncookies;
684	void *dring;
685
686	entry_size = sizeof(struct vio_disk_desc) +
687		(sizeof(struct ldc_trans_cookie) * port->ring_cookies);
688	len = (VDC_TX_RING_SIZE * entry_size);
689
690	ncookies = VIO_MAX_RING_COOKIES;
691	dring = ldc_alloc_exp_dring(port->vio.lp, len,
692				    dr->cookies, &ncookies,
693				    (LDC_MAP_SHADOW |
694				     LDC_MAP_DIRECT |
695				     LDC_MAP_RW));
696	if (IS_ERR(dring))
697		return PTR_ERR(dring);
698
699	dr->base = dring;
700	dr->entry_size = entry_size;
701	dr->num_entries = VDC_TX_RING_SIZE;
702	dr->prod = dr->cons = 0;
703	dr->pending = VDC_TX_RING_SIZE;
704	dr->ncookies = ncookies;
705
706	return 0;
707}
708
709static void vdc_free_tx_ring(struct vdc_port *port)
710{
711	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
712
713	if (dr->base) {
714		ldc_free_exp_dring(port->vio.lp, dr->base,
715				   (dr->entry_size * dr->num_entries),
716				   dr->cookies, dr->ncookies);
717		dr->base = NULL;
718		dr->entry_size = 0;
719		dr->num_entries = 0;
720		dr->pending = 0;
721		dr->ncookies = 0;
722	}
723}
724
725static int vdc_port_up(struct vdc_port *port)
726{
727	struct vio_completion comp;
728
729	init_completion(&comp.com);
730	comp.err = 0;
731	comp.waiting_for = WAITING_FOR_LINK_UP;
732	port->vio.cmp = &comp;
733
734	vio_port_up(&port->vio);
735	wait_for_completion(&comp.com);
736	return comp.err;
737}
738
739static void vdc_port_down(struct vdc_port *port)
740{
741	ldc_disconnect(port->vio.lp);
742	ldc_unbind(port->vio.lp);
743	vdc_free_tx_ring(port);
744	vio_ldc_free(&port->vio);
745}
746
747static int probe_disk(struct vdc_port *port)
748{
749	struct request_queue *q;
750	struct gendisk *g;
751	int err;
752
753	err = vdc_port_up(port);
754	if (err)
755		return err;
756
757	if (vdc_version_supported(port, 1, 1)) {
758		/* vdisk_size should be set during the handshake, if it wasn't
759		 * then the underlying disk is reserved by another system
760		 */
761		if (port->vdisk_size == -1)
762			return -ENODEV;
763	} else {
764		struct vio_disk_geom geom;
765
766		err = generic_request(port, VD_OP_GET_DISKGEOM,
767				      &geom, sizeof(geom));
768		if (err < 0) {
769			printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
770			       "error %d\n", err);
771			return err;
772		}
773		port->vdisk_size = ((u64)geom.num_cyl *
774				    (u64)geom.num_hd *
775				    (u64)geom.num_sec);
776	}
777
778	q = blk_init_queue(do_vdc_request, &port->vio.lock);
779	if (!q) {
780		printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
781		       port->vio.name);
782		return -ENOMEM;
783	}
784	g = alloc_disk(1 << PARTITION_SHIFT);
785	if (!g) {
786		printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
787		       port->vio.name);
788		blk_cleanup_queue(q);
789		return -ENOMEM;
790	}
791
792	port->disk = g;
793
794	/* Each segment in a request is up to an aligned page in size. */
795	blk_queue_segment_boundary(q, PAGE_SIZE - 1);
796	blk_queue_max_segment_size(q, PAGE_SIZE);
797
798	blk_queue_max_segments(q, port->ring_cookies);
799	blk_queue_max_hw_sectors(q, port->max_xfer_size);
800	g->major = vdc_major;
801	g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
802	strcpy(g->disk_name, port->disk_name);
803
804	g->fops = &vdc_fops;
805	g->queue = q;
806	g->private_data = port;
807	g->driverfs_dev = &port->vio.vdev->dev;
808
809	set_capacity(g, port->vdisk_size);
810
811	if (vdc_version_supported(port, 1, 1)) {
812		switch (port->vdisk_mtype) {
813		case VD_MEDIA_TYPE_CD:
814			pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
815			g->flags |= GENHD_FL_CD;
816			g->flags |= GENHD_FL_REMOVABLE;
817			set_disk_ro(g, 1);
818			break;
819
820		case VD_MEDIA_TYPE_DVD:
821			pr_info(PFX "Virtual DVD %s\n", port->disk_name);
822			g->flags |= GENHD_FL_CD;
823			g->flags |= GENHD_FL_REMOVABLE;
824			set_disk_ro(g, 1);
825			break;
826
827		case VD_MEDIA_TYPE_FIXED:
828			pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
829			break;
830		}
831	}
832
833	pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
834	       g->disk_name,
835	       port->vdisk_size, (port->vdisk_size >> (20 - 9)),
836	       port->vio.ver.major, port->vio.ver.minor);
837
838	add_disk(g);
839
840	return 0;
841}
842
843static struct ldc_channel_config vdc_ldc_cfg = {
844	.event		= vdc_event,
845	.mtu		= 64,
846	.mode		= LDC_MODE_UNRELIABLE,
847};
848
849static struct vio_driver_ops vdc_vio_ops = {
850	.send_attr		= vdc_send_attr,
851	.handle_attr		= vdc_handle_attr,
852	.handshake_complete	= vdc_handshake_complete,
853};
854
855static void print_version(void)
856{
857	static int version_printed;
858
859	if (version_printed++ == 0)
860		printk(KERN_INFO "%s", version);
861}
862
863static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
864{
865	struct mdesc_handle *hp;
866	struct vdc_port *port;
867	int err;
868	const u64 *ldc_timeout;
869
870	print_version();
871
872	hp = mdesc_grab();
873
874	err = -ENODEV;
875	if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
876		printk(KERN_ERR PFX "Port id [%llu] too large.\n",
877		       vdev->dev_no);
878		goto err_out_release_mdesc;
879	}
880
881	port = kzalloc(sizeof(*port), GFP_KERNEL);
882	err = -ENOMEM;
883	if (!port) {
884		printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
885		goto err_out_release_mdesc;
886	}
887
888	if (vdev->dev_no >= 26)
889		snprintf(port->disk_name, sizeof(port->disk_name),
890			 VDCBLK_NAME "%c%c",
891			 'a' + ((int)vdev->dev_no / 26) - 1,
892			 'a' + ((int)vdev->dev_no % 26));
893	else
894		snprintf(port->disk_name, sizeof(port->disk_name),
895			 VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
896	port->vdisk_size = -1;
897
898	/* Actual wall time may be double due to do_generic_file_read() doing
899	 * a readahead I/O first, and once that fails it will try to read a
900	 * single page.
901	 */
902	ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
903	port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
904	setup_timer(&port->ldc_reset_timer, vdc_ldc_reset_timer,
905		    (unsigned long)port);
906	INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
907
908	err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
909			      vdc_versions, ARRAY_SIZE(vdc_versions),
910			      &vdc_vio_ops, port->disk_name);
911	if (err)
912		goto err_out_free_port;
913
914	port->vdisk_block_size = 512;
915	port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
916	port->ring_cookies = ((port->max_xfer_size *
917			       port->vdisk_block_size) / PAGE_SIZE) + 2;
918
919	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
920	if (err)
921		goto err_out_free_port;
922
923	err = vdc_alloc_tx_ring(port);
924	if (err)
925		goto err_out_free_ldc;
926
927	err = probe_disk(port);
928	if (err)
929		goto err_out_free_tx_ring;
930
931	dev_set_drvdata(&vdev->dev, port);
932
933	mdesc_release(hp);
934
935	return 0;
936
937err_out_free_tx_ring:
938	vdc_free_tx_ring(port);
939
940err_out_free_ldc:
941	vio_ldc_free(&port->vio);
942
943err_out_free_port:
944	kfree(port);
945
946err_out_release_mdesc:
947	mdesc_release(hp);
948	return err;
949}
950
951static int vdc_port_remove(struct vio_dev *vdev)
952{
953	struct vdc_port *port = dev_get_drvdata(&vdev->dev);
954
955	if (port) {
956		unsigned long flags;
957
958		spin_lock_irqsave(&port->vio.lock, flags);
959		blk_stop_queue(port->disk->queue);
960		spin_unlock_irqrestore(&port->vio.lock, flags);
961
962		flush_work(&port->ldc_reset_work);
963		del_timer_sync(&port->ldc_reset_timer);
964		del_timer_sync(&port->vio.timer);
965
966		del_gendisk(port->disk);
967		blk_cleanup_queue(port->disk->queue);
968		put_disk(port->disk);
969		port->disk = NULL;
970
971		vdc_free_tx_ring(port);
972		vio_ldc_free(&port->vio);
973
974		dev_set_drvdata(&vdev->dev, NULL);
975
976		kfree(port);
977	}
978	return 0;
979}
980
981static void vdc_requeue_inflight(struct vdc_port *port)
982{
983	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
984	u32 idx;
985
986	for (idx = dr->cons; idx != dr->prod; idx = vio_dring_next(dr, idx)) {
987		struct vio_disk_desc *desc = vio_dring_entry(dr, idx);
988		struct vdc_req_entry *rqe = &port->rq_arr[idx];
989		struct request *req;
990
991		ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
992		desc->hdr.state = VIO_DESC_FREE;
993		dr->cons = vio_dring_next(dr, idx);
994
995		req = rqe->req;
996		if (req == NULL) {
997			vdc_end_special(port, desc);
998			continue;
999		}
1000
1001		rqe->req = NULL;
1002		blk_requeue_request(port->disk->queue, req);
1003	}
1004}
1005
1006static void vdc_queue_drain(struct vdc_port *port)
1007{
1008	struct request *req;
1009
1010	while ((req = blk_fetch_request(port->disk->queue)) != NULL)
1011		__blk_end_request_all(req, -EIO);
1012}
1013
1014static void vdc_ldc_reset_timer(unsigned long _arg)
1015{
1016	struct vdc_port *port = (struct vdc_port *) _arg;
1017	struct vio_driver_state *vio = &port->vio;
1018	unsigned long flags;
1019
1020	spin_lock_irqsave(&vio->lock, flags);
1021	if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
1022		pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
1023			port->disk_name, port->ldc_timeout);
1024		vdc_queue_drain(port);
1025		vdc_blk_queue_start(port);
1026	}
1027	spin_unlock_irqrestore(&vio->lock, flags);
1028}
1029
1030static void vdc_ldc_reset_work(struct work_struct *work)
1031{
1032	struct vdc_port *port;
1033	struct vio_driver_state *vio;
1034	unsigned long flags;
1035
1036	port = container_of(work, struct vdc_port, ldc_reset_work);
1037	vio = &port->vio;
1038
1039	spin_lock_irqsave(&vio->lock, flags);
1040	vdc_ldc_reset(port);
1041	spin_unlock_irqrestore(&vio->lock, flags);
1042}
1043
1044static void vdc_ldc_reset(struct vdc_port *port)
1045{
1046	int err;
1047
1048	assert_spin_locked(&port->vio.lock);
1049
1050	pr_warn(PFX "%s ldc link reset\n", port->disk_name);
1051	blk_stop_queue(port->disk->queue);
1052	vdc_requeue_inflight(port);
1053	vdc_port_down(port);
1054
1055	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1056	if (err) {
1057		pr_err(PFX "%s vio_ldc_alloc:%d\n", port->disk_name, err);
1058		return;
1059	}
1060
1061	err = vdc_alloc_tx_ring(port);
1062	if (err) {
1063		pr_err(PFX "%s vio_alloc_tx_ring:%d\n", port->disk_name, err);
1064		goto err_free_ldc;
1065	}
1066
1067	if (port->ldc_timeout)
1068		mod_timer(&port->ldc_reset_timer,
1069			  round_jiffies(jiffies + HZ * port->ldc_timeout));
1070	mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
1071	return;
1072
1073err_free_ldc:
1074	vio_ldc_free(&port->vio);
1075}
1076
1077static const struct vio_device_id vdc_port_match[] = {
1078	{
1079		.type = "vdc-port",
1080	},
1081	{},
1082};
1083MODULE_DEVICE_TABLE(vio, vdc_port_match);
1084
1085static struct vio_driver vdc_port_driver = {
1086	.id_table	= vdc_port_match,
1087	.probe		= vdc_port_probe,
1088	.remove		= vdc_port_remove,
1089	.name		= "vdc_port",
1090};
1091
1092static int __init vdc_init(void)
1093{
1094	int err;
1095
1096	sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
1097	if (!sunvdc_wq)
1098		return -ENOMEM;
1099
1100	err = register_blkdev(0, VDCBLK_NAME);
1101	if (err < 0)
1102		goto out_free_wq;
1103
1104	vdc_major = err;
1105
1106	err = vio_register_driver(&vdc_port_driver);
1107	if (err)
1108		goto out_unregister_blkdev;
1109
1110	return 0;
1111
1112out_unregister_blkdev:
1113	unregister_blkdev(vdc_major, VDCBLK_NAME);
1114	vdc_major = 0;
1115
1116out_free_wq:
1117	destroy_workqueue(sunvdc_wq);
1118	return err;
1119}
1120
1121static void __exit vdc_exit(void)
1122{
1123	vio_unregister_driver(&vdc_port_driver);
1124	unregister_blkdev(vdc_major, VDCBLK_NAME);
1125	destroy_workqueue(sunvdc_wq);
1126}
1127
1128module_init(vdc_init);
1129module_exit(vdc_exit);
1130