1/*  Xenbus code for blkif backend
2    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
3    Copyright (C) 2005 XenSource Ltd
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15*/
16
17#define pr_fmt(fmt) "xen-blkback: " fmt
18
19#include <stdarg.h>
20#include <linux/module.h>
21#include <linux/kthread.h>
22#include <xen/events.h>
23#include <xen/grant_table.h>
24#include "common.h"
25
26/* Enlarge the array size in order to fully show blkback name. */
27#define BLKBACK_NAME_LEN (20)
28
29struct backend_info {
30	struct xenbus_device	*dev;
31	struct xen_blkif	*blkif;
32	struct xenbus_watch	backend_watch;
33	unsigned		major;
34	unsigned		minor;
35	char			*mode;
36};
37
38static struct kmem_cache *xen_blkif_cachep;
39static void connect(struct backend_info *);
40static int connect_ring(struct backend_info *);
41static void backend_changed(struct xenbus_watch *, const char **,
42			    unsigned int);
43static void xen_blkif_free(struct xen_blkif *blkif);
44static void xen_vbd_free(struct xen_vbd *vbd);
45
46struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
47{
48	return be->dev;
49}
50
51/*
52 * The last request could free the device from softirq context and
53 * xen_blkif_free() can sleep.
54 */
55static void xen_blkif_deferred_free(struct work_struct *work)
56{
57	struct xen_blkif *blkif;
58
59	blkif = container_of(work, struct xen_blkif, free_work);
60	xen_blkif_free(blkif);
61}
62
63static int blkback_name(struct xen_blkif *blkif, char *buf)
64{
65	char *devpath, *devname;
66	struct xenbus_device *dev = blkif->be->dev;
67
68	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
69	if (IS_ERR(devpath))
70		return PTR_ERR(devpath);
71
72	devname = strstr(devpath, "/dev/");
73	if (devname != NULL)
74		devname += strlen("/dev/");
75	else
76		devname  = devpath;
77
78	snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
79	kfree(devpath);
80
81	return 0;
82}
83
84static void xen_update_blkif_status(struct xen_blkif *blkif)
85{
86	int err;
87	char name[BLKBACK_NAME_LEN];
88
89	/* Not ready to connect? */
90	if (!blkif->irq || !blkif->vbd.bdev)
91		return;
92
93	/* Already connected? */
94	if (blkif->be->dev->state == XenbusStateConnected)
95		return;
96
97	/* Attempt to connect: exit if we fail to. */
98	connect(blkif->be);
99	if (blkif->be->dev->state != XenbusStateConnected)
100		return;
101
102	err = blkback_name(blkif, name);
103	if (err) {
104		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
105		return;
106	}
107
108	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
109	if (err) {
110		xenbus_dev_error(blkif->be->dev, err, "block flush");
111		return;
112	}
113	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
114
115	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
116	if (IS_ERR(blkif->xenblkd)) {
117		err = PTR_ERR(blkif->xenblkd);
118		blkif->xenblkd = NULL;
119		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
120		return;
121	}
122}
123
124static struct xen_blkif *xen_blkif_alloc(domid_t domid)
125{
126	struct xen_blkif *blkif;
127	struct pending_req *req, *n;
128	int i, j;
129
130	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
131
132	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
133	if (!blkif)
134		return ERR_PTR(-ENOMEM);
135
136	blkif->domid = domid;
137	spin_lock_init(&blkif->blk_ring_lock);
138	atomic_set(&blkif->refcnt, 1);
139	init_waitqueue_head(&blkif->wq);
140	init_completion(&blkif->drain_complete);
141	atomic_set(&blkif->drain, 0);
142	blkif->st_print = jiffies;
143	blkif->persistent_gnts.rb_node = NULL;
144	spin_lock_init(&blkif->free_pages_lock);
145	INIT_LIST_HEAD(&blkif->free_pages);
146	INIT_LIST_HEAD(&blkif->persistent_purge_list);
147	blkif->free_pages_num = 0;
148	atomic_set(&blkif->persistent_gnt_in_use, 0);
149	atomic_set(&blkif->inflight, 0);
150	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
151
152	INIT_LIST_HEAD(&blkif->pending_free);
153	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
154
155	for (i = 0; i < XEN_BLKIF_REQS; i++) {
156		req = kzalloc(sizeof(*req), GFP_KERNEL);
157		if (!req)
158			goto fail;
159		list_add_tail(&req->free_list,
160		              &blkif->pending_free);
161		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
162			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
163			                           GFP_KERNEL);
164			if (!req->segments[j])
165				goto fail;
166		}
167		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
168			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
169			                                 GFP_KERNEL);
170			if (!req->indirect_pages[j])
171				goto fail;
172		}
173	}
174	spin_lock_init(&blkif->pending_free_lock);
175	init_waitqueue_head(&blkif->pending_free_wq);
176	init_waitqueue_head(&blkif->shutdown_wq);
177
178	return blkif;
179
180fail:
181	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
182		list_del(&req->free_list);
183		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
184			if (!req->segments[j])
185				break;
186			kfree(req->segments[j]);
187		}
188		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
189			if (!req->indirect_pages[j])
190				break;
191			kfree(req->indirect_pages[j]);
192		}
193		kfree(req);
194	}
195
196	kmem_cache_free(xen_blkif_cachep, blkif);
197
198	return ERR_PTR(-ENOMEM);
199}
200
201static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
202			 unsigned int evtchn)
203{
204	int err;
205
206	/* Already connected through? */
207	if (blkif->irq)
208		return 0;
209
210	err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
211				     &blkif->blk_ring);
212	if (err < 0)
213		return err;
214
215	switch (blkif->blk_protocol) {
216	case BLKIF_PROTOCOL_NATIVE:
217	{
218		struct blkif_sring *sring;
219		sring = (struct blkif_sring *)blkif->blk_ring;
220		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
221		break;
222	}
223	case BLKIF_PROTOCOL_X86_32:
224	{
225		struct blkif_x86_32_sring *sring_x86_32;
226		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
227		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
228		break;
229	}
230	case BLKIF_PROTOCOL_X86_64:
231	{
232		struct blkif_x86_64_sring *sring_x86_64;
233		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
234		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
235		break;
236	}
237	default:
238		BUG();
239	}
240
241	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
242						    xen_blkif_be_int, 0,
243						    "blkif-backend", blkif);
244	if (err < 0) {
245		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
246		blkif->blk_rings.common.sring = NULL;
247		return err;
248	}
249	blkif->irq = err;
250
251	return 0;
252}
253
254static int xen_blkif_disconnect(struct xen_blkif *blkif)
255{
256	if (blkif->xenblkd) {
257		kthread_stop(blkif->xenblkd);
258		wake_up(&blkif->shutdown_wq);
259		blkif->xenblkd = NULL;
260	}
261
262	/* The above kthread_stop() guarantees that at this point we
263	 * don't have any discard_io or other_io requests. So, checking
264	 * for inflight IO is enough.
265	 */
266	if (atomic_read(&blkif->inflight) > 0)
267		return -EBUSY;
268
269	if (blkif->irq) {
270		unbind_from_irqhandler(blkif->irq, blkif);
271		blkif->irq = 0;
272	}
273
274	if (blkif->blk_rings.common.sring) {
275		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
276		blkif->blk_rings.common.sring = NULL;
277	}
278
279	/* Remove all persistent grants and the cache of ballooned pages. */
280	xen_blkbk_free_caches(blkif);
281
282	return 0;
283}
284
285static void xen_blkif_free(struct xen_blkif *blkif)
286{
287	struct pending_req *req, *n;
288	int i = 0, j;
289
290	xen_blkif_disconnect(blkif);
291	xen_vbd_free(&blkif->vbd);
292
293	/* Make sure everything is drained before shutting down */
294	BUG_ON(blkif->persistent_gnt_c != 0);
295	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
296	BUG_ON(blkif->free_pages_num != 0);
297	BUG_ON(!list_empty(&blkif->persistent_purge_list));
298	BUG_ON(!list_empty(&blkif->free_pages));
299	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
300
301	/* Check that there is no request in use */
302	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
303		list_del(&req->free_list);
304
305		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
306			kfree(req->segments[j]);
307
308		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
309			kfree(req->indirect_pages[j]);
310
311		kfree(req);
312		i++;
313	}
314
315	WARN_ON(i != XEN_BLKIF_REQS);
316
317	kmem_cache_free(xen_blkif_cachep, blkif);
318}
319
320int __init xen_blkif_interface_init(void)
321{
322	xen_blkif_cachep = kmem_cache_create("blkif_cache",
323					     sizeof(struct xen_blkif),
324					     0, 0, NULL);
325	if (!xen_blkif_cachep)
326		return -ENOMEM;
327
328	return 0;
329}
330
331/*
332 *  sysfs interface for VBD I/O requests
333 */
334
335#define VBD_SHOW(name, format, args...)					\
336	static ssize_t show_##name(struct device *_dev,			\
337				   struct device_attribute *attr,	\
338				   char *buf)				\
339	{								\
340		struct xenbus_device *dev = to_xenbus_device(_dev);	\
341		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
342									\
343		return sprintf(buf, format, ##args);			\
344	}								\
345	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
346
347VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
348VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
349VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
350VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
351VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
352VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
353VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
354
355static struct attribute *xen_vbdstat_attrs[] = {
356	&dev_attr_oo_req.attr,
357	&dev_attr_rd_req.attr,
358	&dev_attr_wr_req.attr,
359	&dev_attr_f_req.attr,
360	&dev_attr_ds_req.attr,
361	&dev_attr_rd_sect.attr,
362	&dev_attr_wr_sect.attr,
363	NULL
364};
365
366static struct attribute_group xen_vbdstat_group = {
367	.name = "statistics",
368	.attrs = xen_vbdstat_attrs,
369};
370
371VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
372VBD_SHOW(mode, "%s\n", be->mode);
373
374static int xenvbd_sysfs_addif(struct xenbus_device *dev)
375{
376	int error;
377
378	error = device_create_file(&dev->dev, &dev_attr_physical_device);
379	if (error)
380		goto fail1;
381
382	error = device_create_file(&dev->dev, &dev_attr_mode);
383	if (error)
384		goto fail2;
385
386	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
387	if (error)
388		goto fail3;
389
390	return 0;
391
392fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
393fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
394fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
395	return error;
396}
397
398static void xenvbd_sysfs_delif(struct xenbus_device *dev)
399{
400	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
401	device_remove_file(&dev->dev, &dev_attr_mode);
402	device_remove_file(&dev->dev, &dev_attr_physical_device);
403}
404
405
406static void xen_vbd_free(struct xen_vbd *vbd)
407{
408	if (vbd->bdev)
409		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
410	vbd->bdev = NULL;
411}
412
413static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
414			  unsigned major, unsigned minor, int readonly,
415			  int cdrom)
416{
417	struct xen_vbd *vbd;
418	struct block_device *bdev;
419	struct request_queue *q;
420
421	vbd = &blkif->vbd;
422	vbd->handle   = handle;
423	vbd->readonly = readonly;
424	vbd->type     = 0;
425
426	vbd->pdevice  = MKDEV(major, minor);
427
428	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
429				 FMODE_READ : FMODE_WRITE, NULL);
430
431	if (IS_ERR(bdev)) {
432		pr_warn("xen_vbd_create: device %08x could not be opened\n",
433			vbd->pdevice);
434		return -ENOENT;
435	}
436
437	vbd->bdev = bdev;
438	if (vbd->bdev->bd_disk == NULL) {
439		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
440			vbd->pdevice);
441		xen_vbd_free(vbd);
442		return -ENOENT;
443	}
444	vbd->size = vbd_sz(vbd);
445
446	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
447		vbd->type |= VDISK_CDROM;
448	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
449		vbd->type |= VDISK_REMOVABLE;
450
451	q = bdev_get_queue(bdev);
452	if (q && q->flush_flags)
453		vbd->flush_support = true;
454
455	if (q && blk_queue_secdiscard(q))
456		vbd->discard_secure = true;
457
458	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
459		handle, blkif->domid);
460	return 0;
461}
462static int xen_blkbk_remove(struct xenbus_device *dev)
463{
464	struct backend_info *be = dev_get_drvdata(&dev->dev);
465
466	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
467
468	if (be->major || be->minor)
469		xenvbd_sysfs_delif(dev);
470
471	if (be->backend_watch.node) {
472		unregister_xenbus_watch(&be->backend_watch);
473		kfree(be->backend_watch.node);
474		be->backend_watch.node = NULL;
475	}
476
477	dev_set_drvdata(&dev->dev, NULL);
478
479	if (be->blkif) {
480		xen_blkif_disconnect(be->blkif);
481		xen_blkif_put(be->blkif);
482	}
483
484	kfree(be->mode);
485	kfree(be);
486	return 0;
487}
488
489int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
490			      struct backend_info *be, int state)
491{
492	struct xenbus_device *dev = be->dev;
493	int err;
494
495	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
496			    "%d", state);
497	if (err)
498		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
499
500	return err;
501}
502
503static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
504{
505	struct xenbus_device *dev = be->dev;
506	struct xen_blkif *blkif = be->blkif;
507	int err;
508	int state = 0, discard_enable;
509	struct block_device *bdev = be->blkif->vbd.bdev;
510	struct request_queue *q = bdev_get_queue(bdev);
511
512	err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
513			   &discard_enable);
514	if (err == 1 && !discard_enable)
515		return;
516
517	if (blk_queue_discard(q)) {
518		err = xenbus_printf(xbt, dev->nodename,
519			"discard-granularity", "%u",
520			q->limits.discard_granularity);
521		if (err) {
522			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
523			return;
524		}
525		err = xenbus_printf(xbt, dev->nodename,
526			"discard-alignment", "%u",
527			q->limits.discard_alignment);
528		if (err) {
529			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
530			return;
531		}
532		state = 1;
533		/* Optional. */
534		err = xenbus_printf(xbt, dev->nodename,
535				    "discard-secure", "%d",
536				    blkif->vbd.discard_secure);
537		if (err) {
538			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
539			return;
540		}
541	}
542	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
543			    "%d", state);
544	if (err)
545		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
546}
547int xen_blkbk_barrier(struct xenbus_transaction xbt,
548		      struct backend_info *be, int state)
549{
550	struct xenbus_device *dev = be->dev;
551	int err;
552
553	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
554			    "%d", state);
555	if (err)
556		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
557
558	return err;
559}
560
561/*
562 * Entry point to this code when a new device is created.  Allocate the basic
563 * structures, and watch the store waiting for the hotplug scripts to tell us
564 * the device's physical major and minor numbers.  Switch to InitWait.
565 */
566static int xen_blkbk_probe(struct xenbus_device *dev,
567			   const struct xenbus_device_id *id)
568{
569	int err;
570	struct backend_info *be = kzalloc(sizeof(struct backend_info),
571					  GFP_KERNEL);
572
573	/* match the pr_debug in xen_blkbk_remove */
574	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
575
576	if (!be) {
577		xenbus_dev_fatal(dev, -ENOMEM,
578				 "allocating backend structure");
579		return -ENOMEM;
580	}
581	be->dev = dev;
582	dev_set_drvdata(&dev->dev, be);
583
584	be->blkif = xen_blkif_alloc(dev->otherend_id);
585	if (IS_ERR(be->blkif)) {
586		err = PTR_ERR(be->blkif);
587		be->blkif = NULL;
588		xenbus_dev_fatal(dev, err, "creating block interface");
589		goto fail;
590	}
591
592	/* setup back pointer */
593	be->blkif->be = be;
594
595	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
596				   "%s/%s", dev->nodename, "physical-device");
597	if (err)
598		goto fail;
599
600	err = xenbus_switch_state(dev, XenbusStateInitWait);
601	if (err)
602		goto fail;
603
604	return 0;
605
606fail:
607	pr_warn("%s failed\n", __func__);
608	xen_blkbk_remove(dev);
609	return err;
610}
611
612
613/*
614 * Callback received when the hotplug scripts have placed the physical-device
615 * node.  Read it and the mode node, and create a vbd.  If the frontend is
616 * ready, connect.
617 */
618static void backend_changed(struct xenbus_watch *watch,
619			    const char **vec, unsigned int len)
620{
621	int err;
622	unsigned major;
623	unsigned minor;
624	struct backend_info *be
625		= container_of(watch, struct backend_info, backend_watch);
626	struct xenbus_device *dev = be->dev;
627	int cdrom = 0;
628	unsigned long handle;
629	char *device_type;
630
631	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
632
633	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
634			   &major, &minor);
635	if (XENBUS_EXIST_ERR(err)) {
636		/*
637		 * Since this watch will fire once immediately after it is
638		 * registered, we expect this.  Ignore it, and wait for the
639		 * hotplug scripts.
640		 */
641		return;
642	}
643	if (err != 2) {
644		xenbus_dev_fatal(dev, err, "reading physical-device");
645		return;
646	}
647
648	if (be->major | be->minor) {
649		if (be->major != major || be->minor != minor)
650			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
651				be->major, be->minor, major, minor);
652		return;
653	}
654
655	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
656	if (IS_ERR(be->mode)) {
657		err = PTR_ERR(be->mode);
658		be->mode = NULL;
659		xenbus_dev_fatal(dev, err, "reading mode");
660		return;
661	}
662
663	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
664	if (!IS_ERR(device_type)) {
665		cdrom = strcmp(device_type, "cdrom") == 0;
666		kfree(device_type);
667	}
668
669	/* Front end dir is a number, which is used as the handle. */
670	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
671	if (err)
672		return;
673
674	be->major = major;
675	be->minor = minor;
676
677	err = xen_vbd_create(be->blkif, handle, major, minor,
678			     !strchr(be->mode, 'w'), cdrom);
679
680	if (err)
681		xenbus_dev_fatal(dev, err, "creating vbd structure");
682	else {
683		err = xenvbd_sysfs_addif(dev);
684		if (err) {
685			xen_vbd_free(&be->blkif->vbd);
686			xenbus_dev_fatal(dev, err, "creating sysfs entries");
687		}
688	}
689
690	if (err) {
691		kfree(be->mode);
692		be->mode = NULL;
693		be->major = 0;
694		be->minor = 0;
695	} else {
696		/* We're potentially connected now */
697		xen_update_blkif_status(be->blkif);
698	}
699}
700
701
702/*
703 * Callback received when the frontend's state changes.
704 */
705static void frontend_changed(struct xenbus_device *dev,
706			     enum xenbus_state frontend_state)
707{
708	struct backend_info *be = dev_get_drvdata(&dev->dev);
709	int err;
710
711	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
712
713	switch (frontend_state) {
714	case XenbusStateInitialising:
715		if (dev->state == XenbusStateClosed) {
716			pr_info("%s: prepare for reconnect\n", dev->nodename);
717			xenbus_switch_state(dev, XenbusStateInitWait);
718		}
719		break;
720
721	case XenbusStateInitialised:
722	case XenbusStateConnected:
723		/*
724		 * Ensure we connect even when two watches fire in
725		 * close succession and we miss the intermediate value
726		 * of frontend_state.
727		 */
728		if (dev->state == XenbusStateConnected)
729			break;
730
731		/*
732		 * Enforce precondition before potential leak point.
733		 * xen_blkif_disconnect() is idempotent.
734		 */
735		err = xen_blkif_disconnect(be->blkif);
736		if (err) {
737			xenbus_dev_fatal(dev, err, "pending I/O");
738			break;
739		}
740
741		err = connect_ring(be);
742		if (err)
743			break;
744		xen_update_blkif_status(be->blkif);
745		break;
746
747	case XenbusStateClosing:
748		xenbus_switch_state(dev, XenbusStateClosing);
749		break;
750
751	case XenbusStateClosed:
752		xen_blkif_disconnect(be->blkif);
753		xenbus_switch_state(dev, XenbusStateClosed);
754		if (xenbus_dev_is_online(dev))
755			break;
756		/* fall through if not online */
757	case XenbusStateUnknown:
758		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
759		device_unregister(&dev->dev);
760		break;
761
762	default:
763		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
764				 frontend_state);
765		break;
766	}
767}
768
769
770/* ** Connection ** */
771
772
773/*
774 * Write the physical details regarding the block device to the store, and
775 * switch to Connected state.
776 */
777static void connect(struct backend_info *be)
778{
779	struct xenbus_transaction xbt;
780	int err;
781	struct xenbus_device *dev = be->dev;
782
783	pr_debug("%s %s\n", __func__, dev->otherend);
784
785	/* Supply the information about the device the frontend needs */
786again:
787	err = xenbus_transaction_start(&xbt);
788	if (err) {
789		xenbus_dev_fatal(dev, err, "starting transaction");
790		return;
791	}
792
793	/* If we can't advertise it is OK. */
794	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
795
796	xen_blkbk_discard(xbt, be);
797
798	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
799
800	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
801	if (err) {
802		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
803				 dev->nodename);
804		goto abort;
805	}
806	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
807			    MAX_INDIRECT_SEGMENTS);
808	if (err)
809		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
810			 dev->nodename, err);
811
812	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
813			    (unsigned long long)vbd_sz(&be->blkif->vbd));
814	if (err) {
815		xenbus_dev_fatal(dev, err, "writing %s/sectors",
816				 dev->nodename);
817		goto abort;
818	}
819
820	/* FIXME: use a typename instead */
821	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
822			    be->blkif->vbd.type |
823			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
824	if (err) {
825		xenbus_dev_fatal(dev, err, "writing %s/info",
826				 dev->nodename);
827		goto abort;
828	}
829	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
830			    (unsigned long)
831			    bdev_logical_block_size(be->blkif->vbd.bdev));
832	if (err) {
833		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
834				 dev->nodename);
835		goto abort;
836	}
837	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
838			    bdev_physical_block_size(be->blkif->vbd.bdev));
839	if (err)
840		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
841				 dev->nodename);
842
843	err = xenbus_transaction_end(xbt, 0);
844	if (err == -EAGAIN)
845		goto again;
846	if (err)
847		xenbus_dev_fatal(dev, err, "ending transaction");
848
849	err = xenbus_switch_state(dev, XenbusStateConnected);
850	if (err)
851		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
852				 dev->nodename);
853
854	return;
855 abort:
856	xenbus_transaction_end(xbt, 1);
857}
858
859
860static int connect_ring(struct backend_info *be)
861{
862	struct xenbus_device *dev = be->dev;
863	unsigned long ring_ref;
864	unsigned int evtchn;
865	unsigned int pers_grants;
866	char protocol[64] = "";
867	int err;
868
869	pr_debug("%s %s\n", __func__, dev->otherend);
870
871	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
872			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
873	if (err) {
874		xenbus_dev_fatal(dev, err,
875				 "reading %s/ring-ref and event-channel",
876				 dev->otherend);
877		return err;
878	}
879
880	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
881	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
882			    "%63s", protocol, NULL);
883	if (err)
884		strcpy(protocol, "unspecified, assuming default");
885	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
886		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
887	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
888		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
889	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
890		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
891	else {
892		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
893		return -1;
894	}
895	err = xenbus_gather(XBT_NIL, dev->otherend,
896			    "feature-persistent", "%u",
897			    &pers_grants, NULL);
898	if (err)
899		pers_grants = 0;
900
901	be->blkif->vbd.feature_gnt_persistent = pers_grants;
902	be->blkif->vbd.overflow_max_grants = 0;
903
904	pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
905		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
906		pers_grants ? "persistent grants" : "");
907
908	/* Map the shared frame, irq etc. */
909	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
910	if (err) {
911		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
912				 ring_ref, evtchn);
913		return err;
914	}
915
916	return 0;
917}
918
919static const struct xenbus_device_id xen_blkbk_ids[] = {
920	{ "vbd" },
921	{ "" }
922};
923
924static struct xenbus_driver xen_blkbk_driver = {
925	.ids  = xen_blkbk_ids,
926	.probe = xen_blkbk_probe,
927	.remove = xen_blkbk_remove,
928	.otherend_changed = frontend_changed
929};
930
931int xen_blkif_xenbus_init(void)
932{
933	return xenbus_register_backend(&xen_blkbk_driver);
934}
935