1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/export.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17#include <linux/bitmap.h>
18#include <linux/iommu-common.h>
19
20#include <asm/hypervisor.h>
21#include <asm/iommu.h>
22#include <asm/page.h>
23#include <asm/ldc.h>
24#include <asm/mdesc.h>
25
26#define DRV_MODULE_NAME		"ldc"
27#define PFX DRV_MODULE_NAME	": "
28#define DRV_MODULE_VERSION	"1.1"
29#define DRV_MODULE_RELDATE	"July 22, 2008"
30
31#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
32#define COOKIE_PGSZ_CODE_SHIFT	60ULL
33
34
35static char version[] =
36	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
37#define LDC_PACKET_SIZE		64
38
39/* Packet header layout for unreliable and reliable mode frames.
40 * When in RAW mode, packets are simply straight 64-byte payloads
41 * with no headers.
42 */
43struct ldc_packet {
44	u8			type;
45#define LDC_CTRL		0x01
46#define LDC_DATA		0x02
47#define LDC_ERR			0x10
48
49	u8			stype;
50#define LDC_INFO		0x01
51#define LDC_ACK			0x02
52#define LDC_NACK		0x04
53
54	u8			ctrl;
55#define LDC_VERS		0x01 /* Link Version		*/
56#define LDC_RTS			0x02 /* Request To Send		*/
57#define LDC_RTR			0x03 /* Ready To Receive	*/
58#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
59#define LDC_CTRL_MSK		0x0f
60
61	u8			env;
62#define LDC_LEN			0x3f
63#define LDC_FRAG_MASK		0xc0
64#define LDC_START		0x40
65#define LDC_STOP		0x80
66
67	u32			seqid;
68
69	union {
70		u8		u_data[LDC_PACKET_SIZE - 8];
71		struct {
72			u32	pad;
73			u32	ackid;
74			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
75		} r;
76	} u;
77};
78
79struct ldc_version {
80	u16 major;
81	u16 minor;
82};
83
84/* Ordered from largest major to lowest.  */
85static struct ldc_version ver_arr[] = {
86	{ .major = 1, .minor = 0 },
87};
88
89#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
90#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
91
92struct ldc_channel;
93
94struct ldc_mode_ops {
95	int (*write)(struct ldc_channel *, const void *, unsigned int);
96	int (*read)(struct ldc_channel *, void *, unsigned int);
97};
98
99static const struct ldc_mode_ops raw_ops;
100static const struct ldc_mode_ops nonraw_ops;
101static const struct ldc_mode_ops stream_ops;
102
103int ldom_domaining_enabled;
104
105struct ldc_iommu {
106	/* Protects ldc_unmap.  */
107	spinlock_t			lock;
108	struct ldc_mtable_entry		*page_table;
109	struct iommu_map_table		iommu_map_table;
110};
111
112struct ldc_channel {
113	/* Protects all operations that depend upon channel state.  */
114	spinlock_t			lock;
115
116	unsigned long			id;
117
118	u8				*mssbuf;
119	u32				mssbuf_len;
120	u32				mssbuf_off;
121
122	struct ldc_packet		*tx_base;
123	unsigned long			tx_head;
124	unsigned long			tx_tail;
125	unsigned long			tx_num_entries;
126	unsigned long			tx_ra;
127
128	unsigned long			tx_acked;
129
130	struct ldc_packet		*rx_base;
131	unsigned long			rx_head;
132	unsigned long			rx_tail;
133	unsigned long			rx_num_entries;
134	unsigned long			rx_ra;
135
136	u32				rcv_nxt;
137	u32				snd_nxt;
138
139	unsigned long			chan_state;
140
141	struct ldc_channel_config	cfg;
142	void				*event_arg;
143
144	const struct ldc_mode_ops	*mops;
145
146	struct ldc_iommu		iommu;
147
148	struct ldc_version		ver;
149
150	u8				hs_state;
151#define LDC_HS_CLOSED			0x00
152#define LDC_HS_OPEN			0x01
153#define LDC_HS_GOTVERS			0x02
154#define LDC_HS_SENTRTR			0x03
155#define LDC_HS_GOTRTR			0x04
156#define LDC_HS_COMPLETE			0x10
157
158	u8				flags;
159#define LDC_FLAG_ALLOCED_QUEUES		0x01
160#define LDC_FLAG_REGISTERED_QUEUES	0x02
161#define LDC_FLAG_REGISTERED_IRQS	0x04
162#define LDC_FLAG_RESET			0x10
163
164	u8				mss;
165	u8				state;
166
167#define LDC_IRQ_NAME_MAX		32
168	char				rx_irq_name[LDC_IRQ_NAME_MAX];
169	char				tx_irq_name[LDC_IRQ_NAME_MAX];
170
171	struct hlist_head		mh_list;
172
173	struct hlist_node		list;
174};
175
176#define ldcdbg(TYPE, f, a...) \
177do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
178		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
179} while (0)
180
181static const char *state_to_str(u8 state)
182{
183	switch (state) {
184	case LDC_STATE_INVALID:
185		return "INVALID";
186	case LDC_STATE_INIT:
187		return "INIT";
188	case LDC_STATE_BOUND:
189		return "BOUND";
190	case LDC_STATE_READY:
191		return "READY";
192	case LDC_STATE_CONNECTED:
193		return "CONNECTED";
194	default:
195		return "<UNKNOWN>";
196	}
197}
198
199static void ldc_set_state(struct ldc_channel *lp, u8 state)
200{
201	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
202	       state_to_str(lp->state),
203	       state_to_str(state));
204
205	lp->state = state;
206}
207
208static unsigned long __advance(unsigned long off, unsigned long num_entries)
209{
210	off += LDC_PACKET_SIZE;
211	if (off == (num_entries * LDC_PACKET_SIZE))
212		off = 0;
213
214	return off;
215}
216
217static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
218{
219	return __advance(off, lp->rx_num_entries);
220}
221
222static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
223{
224	return __advance(off, lp->tx_num_entries);
225}
226
227static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
228						  unsigned long *new_tail)
229{
230	struct ldc_packet *p;
231	unsigned long t;
232
233	t = tx_advance(lp, lp->tx_tail);
234	if (t == lp->tx_head)
235		return NULL;
236
237	*new_tail = t;
238
239	p = lp->tx_base;
240	return p + (lp->tx_tail / LDC_PACKET_SIZE);
241}
242
243/* When we are in reliable or stream mode, have to track the next packet
244 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
245 * to be careful not to stomp over the queue past that point.  During
246 * the handshake, we don't have TX data packets pending in the queue
247 * and that's why handshake_get_tx_packet() need not be mindful of
248 * lp->tx_acked.
249 */
250static unsigned long head_for_data(struct ldc_channel *lp)
251{
252	if (lp->cfg.mode == LDC_MODE_STREAM)
253		return lp->tx_acked;
254	return lp->tx_head;
255}
256
257static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
258{
259	unsigned long limit, tail, new_tail, diff;
260	unsigned int mss;
261
262	limit = head_for_data(lp);
263	tail = lp->tx_tail;
264	new_tail = tx_advance(lp, tail);
265	if (new_tail == limit)
266		return 0;
267
268	if (limit > new_tail)
269		diff = limit - new_tail;
270	else
271		diff = (limit +
272			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
273	diff /= LDC_PACKET_SIZE;
274	mss = lp->mss;
275
276	if (diff * mss < size)
277		return 0;
278
279	return 1;
280}
281
282static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
283					     unsigned long *new_tail)
284{
285	struct ldc_packet *p;
286	unsigned long h, t;
287
288	h = head_for_data(lp);
289	t = tx_advance(lp, lp->tx_tail);
290	if (t == h)
291		return NULL;
292
293	*new_tail = t;
294
295	p = lp->tx_base;
296	return p + (lp->tx_tail / LDC_PACKET_SIZE);
297}
298
299static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
300{
301	unsigned long orig_tail = lp->tx_tail;
302	int limit = 1000;
303
304	lp->tx_tail = tail;
305	while (limit-- > 0) {
306		unsigned long err;
307
308		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
309		if (!err)
310			return 0;
311
312		if (err != HV_EWOULDBLOCK) {
313			lp->tx_tail = orig_tail;
314			return -EINVAL;
315		}
316		udelay(1);
317	}
318
319	lp->tx_tail = orig_tail;
320	return -EBUSY;
321}
322
323/* This just updates the head value in the hypervisor using
324 * a polling loop with a timeout.  The caller takes care of
325 * upating software state representing the head change, if any.
326 */
327static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
328{
329	int limit = 1000;
330
331	while (limit-- > 0) {
332		unsigned long err;
333
334		err = sun4v_ldc_rx_set_qhead(lp->id, head);
335		if (!err)
336			return 0;
337
338		if (err != HV_EWOULDBLOCK)
339			return -EINVAL;
340
341		udelay(1);
342	}
343
344	return -EBUSY;
345}
346
347static int send_tx_packet(struct ldc_channel *lp,
348			  struct ldc_packet *p,
349			  unsigned long new_tail)
350{
351	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
352
353	return set_tx_tail(lp, new_tail);
354}
355
356static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
357						 u8 stype, u8 ctrl,
358						 void *data, int dlen,
359						 unsigned long *new_tail)
360{
361	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
362
363	if (p) {
364		memset(p, 0, sizeof(*p));
365		p->type = LDC_CTRL;
366		p->stype = stype;
367		p->ctrl = ctrl;
368		if (data)
369			memcpy(p->u.u_data, data, dlen);
370	}
371	return p;
372}
373
374static int start_handshake(struct ldc_channel *lp)
375{
376	struct ldc_packet *p;
377	struct ldc_version *ver;
378	unsigned long new_tail;
379
380	ver = &ver_arr[0];
381
382	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
383	       ver->major, ver->minor);
384
385	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
386				   ver, sizeof(*ver), &new_tail);
387	if (p) {
388		int err = send_tx_packet(lp, p, new_tail);
389		if (!err)
390			lp->flags &= ~LDC_FLAG_RESET;
391		return err;
392	}
393	return -EBUSY;
394}
395
396static int send_version_nack(struct ldc_channel *lp,
397			     u16 major, u16 minor)
398{
399	struct ldc_packet *p;
400	struct ldc_version ver;
401	unsigned long new_tail;
402
403	ver.major = major;
404	ver.minor = minor;
405
406	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
407				   &ver, sizeof(ver), &new_tail);
408	if (p) {
409		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
410		       ver.major, ver.minor);
411
412		return send_tx_packet(lp, p, new_tail);
413	}
414	return -EBUSY;
415}
416
417static int send_version_ack(struct ldc_channel *lp,
418			    struct ldc_version *vp)
419{
420	struct ldc_packet *p;
421	unsigned long new_tail;
422
423	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
424				   vp, sizeof(*vp), &new_tail);
425	if (p) {
426		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
427		       vp->major, vp->minor);
428
429		return send_tx_packet(lp, p, new_tail);
430	}
431	return -EBUSY;
432}
433
434static int send_rts(struct ldc_channel *lp)
435{
436	struct ldc_packet *p;
437	unsigned long new_tail;
438
439	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
440				   &new_tail);
441	if (p) {
442		p->env = lp->cfg.mode;
443		p->seqid = 0;
444		lp->rcv_nxt = 0;
445
446		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
447		       p->env, p->seqid);
448
449		return send_tx_packet(lp, p, new_tail);
450	}
451	return -EBUSY;
452}
453
454static int send_rtr(struct ldc_channel *lp)
455{
456	struct ldc_packet *p;
457	unsigned long new_tail;
458
459	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
460				   &new_tail);
461	if (p) {
462		p->env = lp->cfg.mode;
463		p->seqid = 0;
464
465		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
466		       p->env, p->seqid);
467
468		return send_tx_packet(lp, p, new_tail);
469	}
470	return -EBUSY;
471}
472
473static int send_rdx(struct ldc_channel *lp)
474{
475	struct ldc_packet *p;
476	unsigned long new_tail;
477
478	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
479				   &new_tail);
480	if (p) {
481		p->env = 0;
482		p->seqid = ++lp->snd_nxt;
483		p->u.r.ackid = lp->rcv_nxt;
484
485		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
486		       p->env, p->seqid, p->u.r.ackid);
487
488		return send_tx_packet(lp, p, new_tail);
489	}
490	return -EBUSY;
491}
492
493static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
494{
495	struct ldc_packet *p;
496	unsigned long new_tail;
497	int err;
498
499	p = data_get_tx_packet(lp, &new_tail);
500	if (!p)
501		return -EBUSY;
502	memset(p, 0, sizeof(*p));
503	p->type = data_pkt->type;
504	p->stype = LDC_NACK;
505	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
506	p->seqid = lp->snd_nxt + 1;
507	p->u.r.ackid = lp->rcv_nxt;
508
509	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
510	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
511
512	err = send_tx_packet(lp, p, new_tail);
513	if (!err)
514		lp->snd_nxt++;
515
516	return err;
517}
518
519static int ldc_abort(struct ldc_channel *lp)
520{
521	unsigned long hv_err;
522
523	ldcdbg(STATE, "ABORT\n");
524
525	/* We report but do not act upon the hypervisor errors because
526	 * there really isn't much we can do if they fail at this point.
527	 */
528	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
529	if (hv_err)
530		printk(KERN_ERR PFX "ldc_abort: "
531		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
532		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
533
534	hv_err = sun4v_ldc_tx_get_state(lp->id,
535					&lp->tx_head,
536					&lp->tx_tail,
537					&lp->chan_state);
538	if (hv_err)
539		printk(KERN_ERR PFX "ldc_abort: "
540		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
541		       lp->id, hv_err);
542
543	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
544	if (hv_err)
545		printk(KERN_ERR PFX "ldc_abort: "
546		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
547		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
548
549	/* Refetch the RX queue state as well, because we could be invoked
550	 * here in the queue processing context.
551	 */
552	hv_err = sun4v_ldc_rx_get_state(lp->id,
553					&lp->rx_head,
554					&lp->rx_tail,
555					&lp->chan_state);
556	if (hv_err)
557		printk(KERN_ERR PFX "ldc_abort: "
558		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
559		       lp->id, hv_err);
560
561	return -ECONNRESET;
562}
563
564static struct ldc_version *find_by_major(u16 major)
565{
566	struct ldc_version *ret = NULL;
567	int i;
568
569	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
570		struct ldc_version *v = &ver_arr[i];
571		if (v->major <= major) {
572			ret = v;
573			break;
574		}
575	}
576	return ret;
577}
578
579static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
580{
581	struct ldc_version *vap;
582	int err;
583
584	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
585	       vp->major, vp->minor);
586
587	if (lp->hs_state == LDC_HS_GOTVERS) {
588		lp->hs_state = LDC_HS_OPEN;
589		memset(&lp->ver, 0, sizeof(lp->ver));
590	}
591
592	vap = find_by_major(vp->major);
593	if (!vap) {
594		err = send_version_nack(lp, 0, 0);
595	} else if (vap->major != vp->major) {
596		err = send_version_nack(lp, vap->major, vap->minor);
597	} else {
598		struct ldc_version ver = *vp;
599		if (ver.minor > vap->minor)
600			ver.minor = vap->minor;
601		err = send_version_ack(lp, &ver);
602		if (!err) {
603			lp->ver = ver;
604			lp->hs_state = LDC_HS_GOTVERS;
605		}
606	}
607	if (err)
608		return ldc_abort(lp);
609
610	return 0;
611}
612
613static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
614{
615	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
616	       vp->major, vp->minor);
617
618	if (lp->hs_state == LDC_HS_GOTVERS) {
619		if (lp->ver.major != vp->major ||
620		    lp->ver.minor != vp->minor)
621			return ldc_abort(lp);
622	} else {
623		lp->ver = *vp;
624		lp->hs_state = LDC_HS_GOTVERS;
625	}
626	if (send_rts(lp))
627		return ldc_abort(lp);
628	return 0;
629}
630
631static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
632{
633	struct ldc_version *vap;
634	struct ldc_packet *p;
635	unsigned long new_tail;
636
637	if (vp->major == 0 && vp->minor == 0)
638		return ldc_abort(lp);
639
640	vap = find_by_major(vp->major);
641	if (!vap)
642		return ldc_abort(lp);
643
644	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
645					   vap, sizeof(*vap),
646					   &new_tail);
647	if (!p)
648		return ldc_abort(lp);
649
650	return send_tx_packet(lp, p, new_tail);
651}
652
653static int process_version(struct ldc_channel *lp,
654			   struct ldc_packet *p)
655{
656	struct ldc_version *vp;
657
658	vp = (struct ldc_version *) p->u.u_data;
659
660	switch (p->stype) {
661	case LDC_INFO:
662		return process_ver_info(lp, vp);
663
664	case LDC_ACK:
665		return process_ver_ack(lp, vp);
666
667	case LDC_NACK:
668		return process_ver_nack(lp, vp);
669
670	default:
671		return ldc_abort(lp);
672	}
673}
674
675static int process_rts(struct ldc_channel *lp,
676		       struct ldc_packet *p)
677{
678	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
679	       p->stype, p->seqid, p->env);
680
681	if (p->stype     != LDC_INFO	   ||
682	    lp->hs_state != LDC_HS_GOTVERS ||
683	    p->env       != lp->cfg.mode)
684		return ldc_abort(lp);
685
686	lp->snd_nxt = p->seqid;
687	lp->rcv_nxt = p->seqid;
688	lp->hs_state = LDC_HS_SENTRTR;
689	if (send_rtr(lp))
690		return ldc_abort(lp);
691
692	return 0;
693}
694
695static int process_rtr(struct ldc_channel *lp,
696		       struct ldc_packet *p)
697{
698	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
699	       p->stype, p->seqid, p->env);
700
701	if (p->stype     != LDC_INFO ||
702	    p->env       != lp->cfg.mode)
703		return ldc_abort(lp);
704
705	lp->snd_nxt = p->seqid;
706	lp->hs_state = LDC_HS_COMPLETE;
707	ldc_set_state(lp, LDC_STATE_CONNECTED);
708	send_rdx(lp);
709
710	return LDC_EVENT_UP;
711}
712
713static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
714{
715	return lp->rcv_nxt + 1 == seqid;
716}
717
718static int process_rdx(struct ldc_channel *lp,
719		       struct ldc_packet *p)
720{
721	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
722	       p->stype, p->seqid, p->env, p->u.r.ackid);
723
724	if (p->stype != LDC_INFO ||
725	    !(rx_seq_ok(lp, p->seqid)))
726		return ldc_abort(lp);
727
728	lp->rcv_nxt = p->seqid;
729
730	lp->hs_state = LDC_HS_COMPLETE;
731	ldc_set_state(lp, LDC_STATE_CONNECTED);
732
733	return LDC_EVENT_UP;
734}
735
736static int process_control_frame(struct ldc_channel *lp,
737				 struct ldc_packet *p)
738{
739	switch (p->ctrl) {
740	case LDC_VERS:
741		return process_version(lp, p);
742
743	case LDC_RTS:
744		return process_rts(lp, p);
745
746	case LDC_RTR:
747		return process_rtr(lp, p);
748
749	case LDC_RDX:
750		return process_rdx(lp, p);
751
752	default:
753		return ldc_abort(lp);
754	}
755}
756
757static int process_error_frame(struct ldc_channel *lp,
758			       struct ldc_packet *p)
759{
760	return ldc_abort(lp);
761}
762
763static int process_data_ack(struct ldc_channel *lp,
764			    struct ldc_packet *ack)
765{
766	unsigned long head = lp->tx_acked;
767	u32 ackid = ack->u.r.ackid;
768
769	while (1) {
770		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
771
772		head = tx_advance(lp, head);
773
774		if (p->seqid == ackid) {
775			lp->tx_acked = head;
776			return 0;
777		}
778		if (head == lp->tx_tail)
779			return ldc_abort(lp);
780	}
781
782	return 0;
783}
784
785static void send_events(struct ldc_channel *lp, unsigned int event_mask)
786{
787	if (event_mask & LDC_EVENT_RESET)
788		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
789	if (event_mask & LDC_EVENT_UP)
790		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
791	if (event_mask & LDC_EVENT_DATA_READY)
792		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
793}
794
795static irqreturn_t ldc_rx(int irq, void *dev_id)
796{
797	struct ldc_channel *lp = dev_id;
798	unsigned long orig_state, flags;
799	unsigned int event_mask;
800
801	spin_lock_irqsave(&lp->lock, flags);
802
803	orig_state = lp->chan_state;
804
805	/* We should probably check for hypervisor errors here and
806	 * reset the LDC channel if we get one.
807	 */
808	sun4v_ldc_rx_get_state(lp->id,
809			       &lp->rx_head,
810			       &lp->rx_tail,
811			       &lp->chan_state);
812
813	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
814	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
815
816	event_mask = 0;
817
818	if (lp->cfg.mode == LDC_MODE_RAW &&
819	    lp->chan_state == LDC_CHANNEL_UP) {
820		lp->hs_state = LDC_HS_COMPLETE;
821		ldc_set_state(lp, LDC_STATE_CONNECTED);
822
823		event_mask |= LDC_EVENT_UP;
824
825		orig_state = lp->chan_state;
826	}
827
828	/* If we are in reset state, flush the RX queue and ignore
829	 * everything.
830	 */
831	if (lp->flags & LDC_FLAG_RESET) {
832		(void) __set_rx_head(lp, lp->rx_tail);
833		goto out;
834	}
835
836	/* Once we finish the handshake, we let the ldc_read()
837	 * paths do all of the control frame and state management.
838	 * Just trigger the callback.
839	 */
840	if (lp->hs_state == LDC_HS_COMPLETE) {
841handshake_complete:
842		if (lp->chan_state != orig_state) {
843			unsigned int event = LDC_EVENT_RESET;
844
845			if (lp->chan_state == LDC_CHANNEL_UP)
846				event = LDC_EVENT_UP;
847
848			event_mask |= event;
849		}
850		if (lp->rx_head != lp->rx_tail)
851			event_mask |= LDC_EVENT_DATA_READY;
852
853		goto out;
854	}
855
856	if (lp->chan_state != orig_state)
857		goto out;
858
859	while (lp->rx_head != lp->rx_tail) {
860		struct ldc_packet *p;
861		unsigned long new;
862		int err;
863
864		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
865
866		switch (p->type) {
867		case LDC_CTRL:
868			err = process_control_frame(lp, p);
869			if (err > 0)
870				event_mask |= err;
871			break;
872
873		case LDC_DATA:
874			event_mask |= LDC_EVENT_DATA_READY;
875			err = 0;
876			break;
877
878		case LDC_ERR:
879			err = process_error_frame(lp, p);
880			break;
881
882		default:
883			err = ldc_abort(lp);
884			break;
885		}
886
887		if (err < 0)
888			break;
889
890		new = lp->rx_head;
891		new += LDC_PACKET_SIZE;
892		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
893			new = 0;
894		lp->rx_head = new;
895
896		err = __set_rx_head(lp, new);
897		if (err < 0) {
898			(void) ldc_abort(lp);
899			break;
900		}
901		if (lp->hs_state == LDC_HS_COMPLETE)
902			goto handshake_complete;
903	}
904
905out:
906	spin_unlock_irqrestore(&lp->lock, flags);
907
908	send_events(lp, event_mask);
909
910	return IRQ_HANDLED;
911}
912
913static irqreturn_t ldc_tx(int irq, void *dev_id)
914{
915	struct ldc_channel *lp = dev_id;
916	unsigned long flags, orig_state;
917	unsigned int event_mask = 0;
918
919	spin_lock_irqsave(&lp->lock, flags);
920
921	orig_state = lp->chan_state;
922
923	/* We should probably check for hypervisor errors here and
924	 * reset the LDC channel if we get one.
925	 */
926	sun4v_ldc_tx_get_state(lp->id,
927			       &lp->tx_head,
928			       &lp->tx_tail,
929			       &lp->chan_state);
930
931	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
932	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
933
934	if (lp->cfg.mode == LDC_MODE_RAW &&
935	    lp->chan_state == LDC_CHANNEL_UP) {
936		lp->hs_state = LDC_HS_COMPLETE;
937		ldc_set_state(lp, LDC_STATE_CONNECTED);
938
939		event_mask |= LDC_EVENT_UP;
940	}
941
942	spin_unlock_irqrestore(&lp->lock, flags);
943
944	send_events(lp, event_mask);
945
946	return IRQ_HANDLED;
947}
948
949/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
950 * XXX that addition and removal from the ldc_channel_list has
951 * XXX atomicity, otherwise the __ldc_channel_exists() check is
952 * XXX totally pointless as another thread can slip into ldc_alloc()
953 * XXX and add a channel with the same ID.  There also needs to be
954 * XXX a spinlock for ldc_channel_list.
955 */
956static HLIST_HEAD(ldc_channel_list);
957
958static int __ldc_channel_exists(unsigned long id)
959{
960	struct ldc_channel *lp;
961
962	hlist_for_each_entry(lp, &ldc_channel_list, list) {
963		if (lp->id == id)
964			return 1;
965	}
966	return 0;
967}
968
969static int alloc_queue(const char *name, unsigned long num_entries,
970		       struct ldc_packet **base, unsigned long *ra)
971{
972	unsigned long size, order;
973	void *q;
974
975	size = num_entries * LDC_PACKET_SIZE;
976	order = get_order(size);
977
978	q = (void *) __get_free_pages(GFP_KERNEL, order);
979	if (!q) {
980		printk(KERN_ERR PFX "Alloc of %s queue failed with "
981		       "size=%lu order=%lu\n", name, size, order);
982		return -ENOMEM;
983	}
984
985	memset(q, 0, PAGE_SIZE << order);
986
987	*base = q;
988	*ra = __pa(q);
989
990	return 0;
991}
992
993static void free_queue(unsigned long num_entries, struct ldc_packet *q)
994{
995	unsigned long size, order;
996
997	if (!q)
998		return;
999
1000	size = num_entries * LDC_PACKET_SIZE;
1001	order = get_order(size);
1002
1003	free_pages((unsigned long)q, order);
1004}
1005
1006static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1007{
1008	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1009	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1010
1011	cookie &= ~COOKIE_PGSZ_CODE;
1012
1013	return (cookie >> (13ULL + (szcode * 3ULL)));
1014}
1015
1016static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1017		      unsigned long entry, unsigned long npages)
1018{
1019	struct ldc_mtable_entry *base;
1020	unsigned long i, shift;
1021
1022	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1023	base = iommu->page_table + entry;
1024	for (i = 0; i < npages; i++) {
1025		if (base->cookie)
1026			sun4v_ldc_revoke(id, cookie + (i << shift),
1027					 base->cookie);
1028		base->mte = 0;
1029	}
1030}
1031
1032/* XXX Make this configurable... XXX */
1033#define LDC_IOTABLE_SIZE	(8 * 1024)
1034
1035static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1036{
1037	unsigned long sz, num_tsb_entries, tsbsize, order;
1038	struct ldc_iommu *ldc_iommu = &lp->iommu;
1039	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1040	struct ldc_mtable_entry *table;
1041	unsigned long hv_err;
1042	int err;
1043
1044	num_tsb_entries = LDC_IOTABLE_SIZE;
1045	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1046	spin_lock_init(&ldc_iommu->lock);
1047
1048	sz = num_tsb_entries / 8;
1049	sz = (sz + 7UL) & ~7UL;
1050	iommu->map = kzalloc(sz, GFP_KERNEL);
1051	if (!iommu->map) {
1052		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1053		return -ENOMEM;
1054	}
1055	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1056			    NULL, false /* no large pool */,
1057			    1 /* npools */,
1058			    true /* skip span boundary check */);
1059
1060	order = get_order(tsbsize);
1061
1062	table = (struct ldc_mtable_entry *)
1063		__get_free_pages(GFP_KERNEL, order);
1064	err = -ENOMEM;
1065	if (!table) {
1066		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1067		       "size=%lu order=%lu\n", tsbsize, order);
1068		goto out_free_map;
1069	}
1070
1071	memset(table, 0, PAGE_SIZE << order);
1072
1073	ldc_iommu->page_table = table;
1074
1075	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1076					 num_tsb_entries);
1077	err = -EINVAL;
1078	if (hv_err)
1079		goto out_free_table;
1080
1081	return 0;
1082
1083out_free_table:
1084	free_pages((unsigned long) table, order);
1085	ldc_iommu->page_table = NULL;
1086
1087out_free_map:
1088	kfree(iommu->map);
1089	iommu->map = NULL;
1090
1091	return err;
1092}
1093
1094static void ldc_iommu_release(struct ldc_channel *lp)
1095{
1096	struct ldc_iommu *ldc_iommu = &lp->iommu;
1097	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1098	unsigned long num_tsb_entries, tsbsize, order;
1099
1100	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1101
1102	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1103	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1104	order = get_order(tsbsize);
1105
1106	free_pages((unsigned long) ldc_iommu->page_table, order);
1107	ldc_iommu->page_table = NULL;
1108
1109	kfree(iommu->map);
1110	iommu->map = NULL;
1111}
1112
1113struct ldc_channel *ldc_alloc(unsigned long id,
1114			      const struct ldc_channel_config *cfgp,
1115			      void *event_arg,
1116			      const char *name)
1117{
1118	struct ldc_channel *lp;
1119	const struct ldc_mode_ops *mops;
1120	unsigned long dummy1, dummy2, hv_err;
1121	u8 mss, *mssbuf;
1122	int err;
1123
1124	err = -ENODEV;
1125	if (!ldom_domaining_enabled)
1126		goto out_err;
1127
1128	err = -EINVAL;
1129	if (!cfgp)
1130		goto out_err;
1131	if (!name)
1132		goto out_err;
1133
1134	switch (cfgp->mode) {
1135	case LDC_MODE_RAW:
1136		mops = &raw_ops;
1137		mss = LDC_PACKET_SIZE;
1138		break;
1139
1140	case LDC_MODE_UNRELIABLE:
1141		mops = &nonraw_ops;
1142		mss = LDC_PACKET_SIZE - 8;
1143		break;
1144
1145	case LDC_MODE_STREAM:
1146		mops = &stream_ops;
1147		mss = LDC_PACKET_SIZE - 8 - 8;
1148		break;
1149
1150	default:
1151		goto out_err;
1152	}
1153
1154	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1155		goto out_err;
1156
1157	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1158	err = -ENODEV;
1159	if (hv_err == HV_ECHANNEL)
1160		goto out_err;
1161
1162	err = -EEXIST;
1163	if (__ldc_channel_exists(id))
1164		goto out_err;
1165
1166	mssbuf = NULL;
1167
1168	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1169	err = -ENOMEM;
1170	if (!lp)
1171		goto out_err;
1172
1173	spin_lock_init(&lp->lock);
1174
1175	lp->id = id;
1176
1177	err = ldc_iommu_init(name, lp);
1178	if (err)
1179		goto out_free_ldc;
1180
1181	lp->mops = mops;
1182	lp->mss = mss;
1183
1184	lp->cfg = *cfgp;
1185	if (!lp->cfg.mtu)
1186		lp->cfg.mtu = LDC_DEFAULT_MTU;
1187
1188	if (lp->cfg.mode == LDC_MODE_STREAM) {
1189		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1190		if (!mssbuf) {
1191			err = -ENOMEM;
1192			goto out_free_iommu;
1193		}
1194		lp->mssbuf = mssbuf;
1195	}
1196
1197	lp->event_arg = event_arg;
1198
1199	/* XXX allow setting via ldc_channel_config to override defaults
1200	 * XXX or use some formula based upon mtu
1201	 */
1202	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1203	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1204
1205	err = alloc_queue("TX", lp->tx_num_entries,
1206			  &lp->tx_base, &lp->tx_ra);
1207	if (err)
1208		goto out_free_mssbuf;
1209
1210	err = alloc_queue("RX", lp->rx_num_entries,
1211			  &lp->rx_base, &lp->rx_ra);
1212	if (err)
1213		goto out_free_txq;
1214
1215	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1216
1217	lp->hs_state = LDC_HS_CLOSED;
1218	ldc_set_state(lp, LDC_STATE_INIT);
1219
1220	INIT_HLIST_NODE(&lp->list);
1221	hlist_add_head(&lp->list, &ldc_channel_list);
1222
1223	INIT_HLIST_HEAD(&lp->mh_list);
1224
1225	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1226	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1227
1228	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1229			  lp->rx_irq_name, lp);
1230	if (err)
1231		goto out_free_txq;
1232
1233	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1234			  lp->tx_irq_name, lp);
1235	if (err) {
1236		free_irq(lp->cfg.rx_irq, lp);
1237		goto out_free_txq;
1238	}
1239
1240	return lp;
1241
1242out_free_txq:
1243	free_queue(lp->tx_num_entries, lp->tx_base);
1244
1245out_free_mssbuf:
1246	kfree(mssbuf);
1247
1248out_free_iommu:
1249	ldc_iommu_release(lp);
1250
1251out_free_ldc:
1252	kfree(lp);
1253
1254out_err:
1255	return ERR_PTR(err);
1256}
1257EXPORT_SYMBOL(ldc_alloc);
1258
1259void ldc_unbind(struct ldc_channel *lp)
1260{
1261	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1262		free_irq(lp->cfg.rx_irq, lp);
1263		free_irq(lp->cfg.tx_irq, lp);
1264		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1265	}
1266
1267	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1268		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1269		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1270		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1271	}
1272	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1273		free_queue(lp->tx_num_entries, lp->tx_base);
1274		free_queue(lp->rx_num_entries, lp->rx_base);
1275		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1276	}
1277
1278	ldc_set_state(lp, LDC_STATE_INIT);
1279}
1280EXPORT_SYMBOL(ldc_unbind);
1281
1282void ldc_free(struct ldc_channel *lp)
1283{
1284	ldc_unbind(lp);
1285	hlist_del(&lp->list);
1286	kfree(lp->mssbuf);
1287	ldc_iommu_release(lp);
1288
1289	kfree(lp);
1290}
1291EXPORT_SYMBOL(ldc_free);
1292
1293/* Bind the channel.  This registers the LDC queues with
1294 * the hypervisor and puts the channel into a pseudo-listening
1295 * state.  This does not initiate a handshake, ldc_connect() does
1296 * that.
1297 */
1298int ldc_bind(struct ldc_channel *lp)
1299{
1300	unsigned long hv_err, flags;
1301	int err = -EINVAL;
1302
1303	if (lp->state != LDC_STATE_INIT)
1304		return -EINVAL;
1305
1306	spin_lock_irqsave(&lp->lock, flags);
1307
1308	enable_irq(lp->cfg.rx_irq);
1309	enable_irq(lp->cfg.tx_irq);
1310
1311	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1312
1313	err = -ENODEV;
1314	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1315	if (hv_err)
1316		goto out_free_irqs;
1317
1318	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1319	if (hv_err)
1320		goto out_free_irqs;
1321
1322	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1323	if (hv_err)
1324		goto out_unmap_tx;
1325
1326	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1327	if (hv_err)
1328		goto out_unmap_tx;
1329
1330	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1331
1332	hv_err = sun4v_ldc_tx_get_state(lp->id,
1333					&lp->tx_head,
1334					&lp->tx_tail,
1335					&lp->chan_state);
1336	err = -EBUSY;
1337	if (hv_err)
1338		goto out_unmap_rx;
1339
1340	lp->tx_acked = lp->tx_head;
1341
1342	lp->hs_state = LDC_HS_OPEN;
1343	ldc_set_state(lp, LDC_STATE_BOUND);
1344
1345	spin_unlock_irqrestore(&lp->lock, flags);
1346
1347	return 0;
1348
1349out_unmap_rx:
1350	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1351	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1352
1353out_unmap_tx:
1354	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1355
1356out_free_irqs:
1357	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1358	free_irq(lp->cfg.tx_irq, lp);
1359	free_irq(lp->cfg.rx_irq, lp);
1360
1361	spin_unlock_irqrestore(&lp->lock, flags);
1362
1363	return err;
1364}
1365EXPORT_SYMBOL(ldc_bind);
1366
1367int ldc_connect(struct ldc_channel *lp)
1368{
1369	unsigned long flags;
1370	int err;
1371
1372	if (lp->cfg.mode == LDC_MODE_RAW)
1373		return -EINVAL;
1374
1375	spin_lock_irqsave(&lp->lock, flags);
1376
1377	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1378	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1379	    lp->hs_state != LDC_HS_OPEN)
1380		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1381	else
1382		err = start_handshake(lp);
1383
1384	spin_unlock_irqrestore(&lp->lock, flags);
1385
1386	return err;
1387}
1388EXPORT_SYMBOL(ldc_connect);
1389
1390int ldc_disconnect(struct ldc_channel *lp)
1391{
1392	unsigned long hv_err, flags;
1393	int err;
1394
1395	if (lp->cfg.mode == LDC_MODE_RAW)
1396		return -EINVAL;
1397
1398	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1399	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1400		return -EINVAL;
1401
1402	spin_lock_irqsave(&lp->lock, flags);
1403
1404	err = -ENODEV;
1405	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1406	if (hv_err)
1407		goto out_err;
1408
1409	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1410	if (hv_err)
1411		goto out_err;
1412
1413	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1414	if (hv_err)
1415		goto out_err;
1416
1417	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1418	if (hv_err)
1419		goto out_err;
1420
1421	ldc_set_state(lp, LDC_STATE_BOUND);
1422	lp->hs_state = LDC_HS_OPEN;
1423	lp->flags |= LDC_FLAG_RESET;
1424
1425	spin_unlock_irqrestore(&lp->lock, flags);
1426
1427	return 0;
1428
1429out_err:
1430	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1431	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1432	free_irq(lp->cfg.tx_irq, lp);
1433	free_irq(lp->cfg.rx_irq, lp);
1434	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1435		       LDC_FLAG_REGISTERED_QUEUES);
1436	ldc_set_state(lp, LDC_STATE_INIT);
1437
1438	spin_unlock_irqrestore(&lp->lock, flags);
1439
1440	return err;
1441}
1442EXPORT_SYMBOL(ldc_disconnect);
1443
1444int ldc_state(struct ldc_channel *lp)
1445{
1446	return lp->state;
1447}
1448EXPORT_SYMBOL(ldc_state);
1449
1450static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1451{
1452	struct ldc_packet *p;
1453	unsigned long new_tail;
1454	int err;
1455
1456	if (size > LDC_PACKET_SIZE)
1457		return -EMSGSIZE;
1458
1459	p = data_get_tx_packet(lp, &new_tail);
1460	if (!p)
1461		return -EAGAIN;
1462
1463	memcpy(p, buf, size);
1464
1465	err = send_tx_packet(lp, p, new_tail);
1466	if (!err)
1467		err = size;
1468
1469	return err;
1470}
1471
1472static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1473{
1474	struct ldc_packet *p;
1475	unsigned long hv_err, new;
1476	int err;
1477
1478	if (size < LDC_PACKET_SIZE)
1479		return -EINVAL;
1480
1481	hv_err = sun4v_ldc_rx_get_state(lp->id,
1482					&lp->rx_head,
1483					&lp->rx_tail,
1484					&lp->chan_state);
1485	if (hv_err)
1486		return ldc_abort(lp);
1487
1488	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1489	    lp->chan_state == LDC_CHANNEL_RESETTING)
1490		return -ECONNRESET;
1491
1492	if (lp->rx_head == lp->rx_tail)
1493		return 0;
1494
1495	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1496	memcpy(buf, p, LDC_PACKET_SIZE);
1497
1498	new = rx_advance(lp, lp->rx_head);
1499	lp->rx_head = new;
1500
1501	err = __set_rx_head(lp, new);
1502	if (err < 0)
1503		err = -ECONNRESET;
1504	else
1505		err = LDC_PACKET_SIZE;
1506
1507	return err;
1508}
1509
1510static const struct ldc_mode_ops raw_ops = {
1511	.write		=	write_raw,
1512	.read		=	read_raw,
1513};
1514
1515static int write_nonraw(struct ldc_channel *lp, const void *buf,
1516			unsigned int size)
1517{
1518	unsigned long hv_err, tail;
1519	unsigned int copied;
1520	u32 seq;
1521	int err;
1522
1523	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1524					&lp->chan_state);
1525	if (unlikely(hv_err))
1526		return -EBUSY;
1527
1528	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1529		return ldc_abort(lp);
1530
1531	if (!tx_has_space_for(lp, size))
1532		return -EAGAIN;
1533
1534	seq = lp->snd_nxt;
1535	copied = 0;
1536	tail = lp->tx_tail;
1537	while (copied < size) {
1538		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1539		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1540			    p->u.u_data :
1541			    p->u.r.r_data);
1542		int data_len;
1543
1544		p->type = LDC_DATA;
1545		p->stype = LDC_INFO;
1546		p->ctrl = 0;
1547
1548		data_len = size - copied;
1549		if (data_len > lp->mss)
1550			data_len = lp->mss;
1551
1552		BUG_ON(data_len > LDC_LEN);
1553
1554		p->env = (data_len |
1555			  (copied == 0 ? LDC_START : 0) |
1556			  (data_len == size - copied ? LDC_STOP : 0));
1557
1558		p->seqid = ++seq;
1559
1560		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1561		       p->type,
1562		       p->stype,
1563		       p->ctrl,
1564		       p->env,
1565		       p->seqid);
1566
1567		memcpy(data, buf, data_len);
1568		buf += data_len;
1569		copied += data_len;
1570
1571		tail = tx_advance(lp, tail);
1572	}
1573
1574	err = set_tx_tail(lp, tail);
1575	if (!err) {
1576		lp->snd_nxt = seq;
1577		err = size;
1578	}
1579
1580	return err;
1581}
1582
1583static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1584		      struct ldc_packet *first_frag)
1585{
1586	int err;
1587
1588	if (first_frag)
1589		lp->rcv_nxt = first_frag->seqid - 1;
1590
1591	err = send_data_nack(lp, p);
1592	if (err)
1593		return err;
1594
1595	err = __set_rx_head(lp, lp->rx_tail);
1596	if (err < 0)
1597		return ldc_abort(lp);
1598
1599	return 0;
1600}
1601
1602static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1603{
1604	if (p->stype & LDC_ACK) {
1605		int err = process_data_ack(lp, p);
1606		if (err)
1607			return err;
1608	}
1609	if (p->stype & LDC_NACK)
1610		return ldc_abort(lp);
1611
1612	return 0;
1613}
1614
1615static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1616{
1617	unsigned long dummy;
1618	int limit = 1000;
1619
1620	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1621	       cur_head, lp->rx_head, lp->rx_tail);
1622	while (limit-- > 0) {
1623		unsigned long hv_err;
1624
1625		hv_err = sun4v_ldc_rx_get_state(lp->id,
1626						&dummy,
1627						&lp->rx_tail,
1628						&lp->chan_state);
1629		if (hv_err)
1630			return ldc_abort(lp);
1631
1632		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1633		    lp->chan_state == LDC_CHANNEL_RESETTING)
1634			return -ECONNRESET;
1635
1636		if (cur_head != lp->rx_tail) {
1637			ldcdbg(DATA, "DATA WAIT DONE "
1638			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1639			       dummy, lp->rx_tail, lp->chan_state);
1640			return 0;
1641		}
1642
1643		udelay(1);
1644	}
1645	return -EAGAIN;
1646}
1647
1648static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1649{
1650	int err = __set_rx_head(lp, head);
1651
1652	if (err < 0)
1653		return ldc_abort(lp);
1654
1655	lp->rx_head = head;
1656	return 0;
1657}
1658
1659static void send_data_ack(struct ldc_channel *lp)
1660{
1661	unsigned long new_tail;
1662	struct ldc_packet *p;
1663
1664	p = data_get_tx_packet(lp, &new_tail);
1665	if (likely(p)) {
1666		int err;
1667
1668		memset(p, 0, sizeof(*p));
1669		p->type = LDC_DATA;
1670		p->stype = LDC_ACK;
1671		p->ctrl = 0;
1672		p->seqid = lp->snd_nxt + 1;
1673		p->u.r.ackid = lp->rcv_nxt;
1674
1675		err = send_tx_packet(lp, p, new_tail);
1676		if (!err)
1677			lp->snd_nxt++;
1678	}
1679}
1680
1681static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1682{
1683	struct ldc_packet *first_frag;
1684	unsigned long hv_err, new;
1685	int err, copied;
1686
1687	hv_err = sun4v_ldc_rx_get_state(lp->id,
1688					&lp->rx_head,
1689					&lp->rx_tail,
1690					&lp->chan_state);
1691	if (hv_err)
1692		return ldc_abort(lp);
1693
1694	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1695	    lp->chan_state == LDC_CHANNEL_RESETTING)
1696		return -ECONNRESET;
1697
1698	if (lp->rx_head == lp->rx_tail)
1699		return 0;
1700
1701	first_frag = NULL;
1702	copied = err = 0;
1703	new = lp->rx_head;
1704	while (1) {
1705		struct ldc_packet *p;
1706		int pkt_len;
1707
1708		BUG_ON(new == lp->rx_tail);
1709		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1710
1711		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1712		       "rcv_nxt[%08x]\n",
1713		       p->type,
1714		       p->stype,
1715		       p->ctrl,
1716		       p->env,
1717		       p->seqid,
1718		       p->u.r.ackid,
1719		       lp->rcv_nxt);
1720
1721		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1722			err = rx_bad_seq(lp, p, first_frag);
1723			copied = 0;
1724			break;
1725		}
1726
1727		if (p->type & LDC_CTRL) {
1728			err = process_control_frame(lp, p);
1729			if (err < 0)
1730				break;
1731			err = 0;
1732		}
1733
1734		lp->rcv_nxt = p->seqid;
1735
1736		if (!(p->type & LDC_DATA)) {
1737			new = rx_advance(lp, new);
1738			goto no_data;
1739		}
1740		if (p->stype & (LDC_ACK | LDC_NACK)) {
1741			err = data_ack_nack(lp, p);
1742			if (err)
1743				break;
1744		}
1745		if (!(p->stype & LDC_INFO)) {
1746			new = rx_advance(lp, new);
1747			err = rx_set_head(lp, new);
1748			if (err)
1749				break;
1750			goto no_data;
1751		}
1752
1753		pkt_len = p->env & LDC_LEN;
1754
1755		/* Every initial packet starts with the START bit set.
1756		 *
1757		 * Singleton packets will have both START+STOP set.
1758		 *
1759		 * Fragments will have START set in the first frame, STOP
1760		 * set in the last frame, and neither bit set in middle
1761		 * frames of the packet.
1762		 *
1763		 * Therefore if we are at the beginning of a packet and
1764		 * we don't see START, or we are in the middle of a fragmented
1765		 * packet and do see START, we are unsynchronized and should
1766		 * flush the RX queue.
1767		 */
1768		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1769		    (first_frag != NULL &&  (p->env & LDC_START))) {
1770			if (!first_frag)
1771				new = rx_advance(lp, new);
1772
1773			err = rx_set_head(lp, new);
1774			if (err)
1775				break;
1776
1777			if (!first_frag)
1778				goto no_data;
1779		}
1780		if (!first_frag)
1781			first_frag = p;
1782
1783		if (pkt_len > size - copied) {
1784			/* User didn't give us a big enough buffer,
1785			 * what to do?  This is a pretty serious error.
1786			 *
1787			 * Since we haven't updated the RX ring head to
1788			 * consume any of the packets, signal the error
1789			 * to the user and just leave the RX ring alone.
1790			 *
1791			 * This seems the best behavior because this allows
1792			 * a user of the LDC layer to start with a small
1793			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1794			 * as a cue to enlarge it's read buffer.
1795			 */
1796			err = -EMSGSIZE;
1797			break;
1798		}
1799
1800		/* Ok, we are gonna eat this one.  */
1801		new = rx_advance(lp, new);
1802
1803		memcpy(buf,
1804		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1805			p->u.u_data : p->u.r.r_data), pkt_len);
1806		buf += pkt_len;
1807		copied += pkt_len;
1808
1809		if (p->env & LDC_STOP)
1810			break;
1811
1812no_data:
1813		if (new == lp->rx_tail) {
1814			err = rx_data_wait(lp, new);
1815			if (err)
1816				break;
1817		}
1818	}
1819
1820	if (!err)
1821		err = rx_set_head(lp, new);
1822
1823	if (err && first_frag)
1824		lp->rcv_nxt = first_frag->seqid - 1;
1825
1826	if (!err) {
1827		err = copied;
1828		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1829			send_data_ack(lp);
1830	}
1831
1832	return err;
1833}
1834
1835static const struct ldc_mode_ops nonraw_ops = {
1836	.write		=	write_nonraw,
1837	.read		=	read_nonraw,
1838};
1839
1840static int write_stream(struct ldc_channel *lp, const void *buf,
1841			unsigned int size)
1842{
1843	if (size > lp->cfg.mtu)
1844		size = lp->cfg.mtu;
1845	return write_nonraw(lp, buf, size);
1846}
1847
1848static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1849{
1850	if (!lp->mssbuf_len) {
1851		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1852		if (err < 0)
1853			return err;
1854
1855		lp->mssbuf_len = err;
1856		lp->mssbuf_off = 0;
1857	}
1858
1859	if (size > lp->mssbuf_len)
1860		size = lp->mssbuf_len;
1861	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1862
1863	lp->mssbuf_off += size;
1864	lp->mssbuf_len -= size;
1865
1866	return size;
1867}
1868
1869static const struct ldc_mode_ops stream_ops = {
1870	.write		=	write_stream,
1871	.read		=	read_stream,
1872};
1873
1874int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1875{
1876	unsigned long flags;
1877	int err;
1878
1879	if (!buf)
1880		return -EINVAL;
1881
1882	if (!size)
1883		return 0;
1884
1885	spin_lock_irqsave(&lp->lock, flags);
1886
1887	if (lp->hs_state != LDC_HS_COMPLETE)
1888		err = -ENOTCONN;
1889	else
1890		err = lp->mops->write(lp, buf, size);
1891
1892	spin_unlock_irqrestore(&lp->lock, flags);
1893
1894	return err;
1895}
1896EXPORT_SYMBOL(ldc_write);
1897
1898int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1899{
1900	unsigned long flags;
1901	int err;
1902
1903	if (!buf)
1904		return -EINVAL;
1905
1906	if (!size)
1907		return 0;
1908
1909	spin_lock_irqsave(&lp->lock, flags);
1910
1911	if (lp->hs_state != LDC_HS_COMPLETE)
1912		err = -ENOTCONN;
1913	else
1914		err = lp->mops->read(lp, buf, size);
1915
1916	spin_unlock_irqrestore(&lp->lock, flags);
1917
1918	return err;
1919}
1920EXPORT_SYMBOL(ldc_read);
1921
1922static u64 pagesize_code(void)
1923{
1924	switch (PAGE_SIZE) {
1925	default:
1926	case (8ULL * 1024ULL):
1927		return 0;
1928	case (64ULL * 1024ULL):
1929		return 1;
1930	case (512ULL * 1024ULL):
1931		return 2;
1932	case (4ULL * 1024ULL * 1024ULL):
1933		return 3;
1934	case (32ULL * 1024ULL * 1024ULL):
1935		return 4;
1936	case (256ULL * 1024ULL * 1024ULL):
1937		return 5;
1938	}
1939}
1940
1941static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1942{
1943	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1944		(index << PAGE_SHIFT) |
1945		page_offset);
1946}
1947
1948
1949static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1950					     unsigned long npages)
1951{
1952	long entry;
1953
1954	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
1955				      npages, NULL, (unsigned long)-1, 0);
1956	if (unlikely(entry < 0))
1957		return NULL;
1958
1959	return iommu->page_table + entry;
1960}
1961
1962static u64 perm_to_mte(unsigned int map_perm)
1963{
1964	u64 mte_base;
1965
1966	mte_base = pagesize_code();
1967
1968	if (map_perm & LDC_MAP_SHADOW) {
1969		if (map_perm & LDC_MAP_R)
1970			mte_base |= LDC_MTE_COPY_R;
1971		if (map_perm & LDC_MAP_W)
1972			mte_base |= LDC_MTE_COPY_W;
1973	}
1974	if (map_perm & LDC_MAP_DIRECT) {
1975		if (map_perm & LDC_MAP_R)
1976			mte_base |= LDC_MTE_READ;
1977		if (map_perm & LDC_MAP_W)
1978			mte_base |= LDC_MTE_WRITE;
1979		if (map_perm & LDC_MAP_X)
1980			mte_base |= LDC_MTE_EXEC;
1981	}
1982	if (map_perm & LDC_MAP_IO) {
1983		if (map_perm & LDC_MAP_R)
1984			mte_base |= LDC_MTE_IOMMU_R;
1985		if (map_perm & LDC_MAP_W)
1986			mte_base |= LDC_MTE_IOMMU_W;
1987	}
1988
1989	return mte_base;
1990}
1991
1992static int pages_in_region(unsigned long base, long len)
1993{
1994	int count = 0;
1995
1996	do {
1997		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1998
1999		len -= (new - base);
2000		base = new;
2001		count++;
2002	} while (len > 0);
2003
2004	return count;
2005}
2006
2007struct cookie_state {
2008	struct ldc_mtable_entry		*page_table;
2009	struct ldc_trans_cookie		*cookies;
2010	u64				mte_base;
2011	u64				prev_cookie;
2012	u32				pte_idx;
2013	u32				nc;
2014};
2015
2016static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2017			 unsigned long off, unsigned long len)
2018{
2019	do {
2020		unsigned long tlen, new = pa + PAGE_SIZE;
2021		u64 this_cookie;
2022
2023		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2024
2025		tlen = PAGE_SIZE;
2026		if (off)
2027			tlen = PAGE_SIZE - off;
2028		if (tlen > len)
2029			tlen = len;
2030
2031		this_cookie = make_cookie(sp->pte_idx,
2032					  pagesize_code(), off);
2033
2034		off = 0;
2035
2036		if (this_cookie == sp->prev_cookie) {
2037			sp->cookies[sp->nc - 1].cookie_size += tlen;
2038		} else {
2039			sp->cookies[sp->nc].cookie_addr = this_cookie;
2040			sp->cookies[sp->nc].cookie_size = tlen;
2041			sp->nc++;
2042		}
2043		sp->prev_cookie = this_cookie + tlen;
2044
2045		sp->pte_idx++;
2046
2047		len -= tlen;
2048		pa = new;
2049	} while (len > 0);
2050}
2051
2052static int sg_count_one(struct scatterlist *sg)
2053{
2054	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2055	long len = sg->length;
2056
2057	if ((sg->offset | len) & (8UL - 1))
2058		return -EFAULT;
2059
2060	return pages_in_region(base + sg->offset, len);
2061}
2062
2063static int sg_count_pages(struct scatterlist *sg, int num_sg)
2064{
2065	int count;
2066	int i;
2067
2068	count = 0;
2069	for (i = 0; i < num_sg; i++) {
2070		int err = sg_count_one(sg + i);
2071		if (err < 0)
2072			return err;
2073		count += err;
2074	}
2075
2076	return count;
2077}
2078
2079int ldc_map_sg(struct ldc_channel *lp,
2080	       struct scatterlist *sg, int num_sg,
2081	       struct ldc_trans_cookie *cookies, int ncookies,
2082	       unsigned int map_perm)
2083{
2084	unsigned long i, npages;
2085	struct ldc_mtable_entry *base;
2086	struct cookie_state state;
2087	struct ldc_iommu *iommu;
2088	int err;
2089
2090	if (map_perm & ~LDC_MAP_ALL)
2091		return -EINVAL;
2092
2093	err = sg_count_pages(sg, num_sg);
2094	if (err < 0)
2095		return err;
2096
2097	npages = err;
2098	if (err > ncookies)
2099		return -EMSGSIZE;
2100
2101	iommu = &lp->iommu;
2102
2103	base = alloc_npages(iommu, npages);
2104
2105	if (!base)
2106		return -ENOMEM;
2107
2108	state.page_table = iommu->page_table;
2109	state.cookies = cookies;
2110	state.mte_base = perm_to_mte(map_perm);
2111	state.prev_cookie = ~(u64)0;
2112	state.pte_idx = (base - iommu->page_table);
2113	state.nc = 0;
2114
2115	for (i = 0; i < num_sg; i++)
2116		fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2117			     sg[i].offset, sg[i].length);
2118
2119	return state.nc;
2120}
2121EXPORT_SYMBOL(ldc_map_sg);
2122
2123int ldc_map_single(struct ldc_channel *lp,
2124		   void *buf, unsigned int len,
2125		   struct ldc_trans_cookie *cookies, int ncookies,
2126		   unsigned int map_perm)
2127{
2128	unsigned long npages, pa;
2129	struct ldc_mtable_entry *base;
2130	struct cookie_state state;
2131	struct ldc_iommu *iommu;
2132
2133	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2134		return -EINVAL;
2135
2136	pa = __pa(buf);
2137	if ((pa | len) & (8UL - 1))
2138		return -EFAULT;
2139
2140	npages = pages_in_region(pa, len);
2141
2142	iommu = &lp->iommu;
2143
2144	base = alloc_npages(iommu, npages);
2145
2146	if (!base)
2147		return -ENOMEM;
2148
2149	state.page_table = iommu->page_table;
2150	state.cookies = cookies;
2151	state.mte_base = perm_to_mte(map_perm);
2152	state.prev_cookie = ~(u64)0;
2153	state.pte_idx = (base - iommu->page_table);
2154	state.nc = 0;
2155	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2156	BUG_ON(state.nc > ncookies);
2157
2158	return state.nc;
2159}
2160EXPORT_SYMBOL(ldc_map_single);
2161
2162
2163static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2164			u64 cookie, u64 size)
2165{
2166	unsigned long npages, entry;
2167
2168	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2169
2170	entry = ldc_cookie_to_index(cookie, iommu);
2171	ldc_demap(iommu, id, cookie, entry, npages);
2172	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2173}
2174
2175void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2176	       int ncookies)
2177{
2178	struct ldc_iommu *iommu = &lp->iommu;
2179	int i;
2180	unsigned long flags;
2181
2182	spin_lock_irqsave(&iommu->lock, flags);
2183	for (i = 0; i < ncookies; i++) {
2184		u64 addr = cookies[i].cookie_addr;
2185		u64 size = cookies[i].cookie_size;
2186
2187		free_npages(lp->id, iommu, addr, size);
2188	}
2189	spin_unlock_irqrestore(&iommu->lock, flags);
2190}
2191EXPORT_SYMBOL(ldc_unmap);
2192
2193int ldc_copy(struct ldc_channel *lp, int copy_dir,
2194	     void *buf, unsigned int len, unsigned long offset,
2195	     struct ldc_trans_cookie *cookies, int ncookies)
2196{
2197	unsigned int orig_len;
2198	unsigned long ra;
2199	int i;
2200
2201	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2202		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2203		       lp->id, copy_dir);
2204		return -EINVAL;
2205	}
2206
2207	ra = __pa(buf);
2208	if ((ra | len | offset) & (8UL - 1)) {
2209		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2210		       "ra[%lx] len[%x] offset[%lx]\n",
2211		       lp->id, ra, len, offset);
2212		return -EFAULT;
2213	}
2214
2215	if (lp->hs_state != LDC_HS_COMPLETE ||
2216	    (lp->flags & LDC_FLAG_RESET)) {
2217		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2218		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2219		return -ECONNRESET;
2220	}
2221
2222	orig_len = len;
2223	for (i = 0; i < ncookies; i++) {
2224		unsigned long cookie_raddr = cookies[i].cookie_addr;
2225		unsigned long this_len = cookies[i].cookie_size;
2226		unsigned long actual_len;
2227
2228		if (unlikely(offset)) {
2229			unsigned long this_off = offset;
2230
2231			if (this_off > this_len)
2232				this_off = this_len;
2233
2234			offset -= this_off;
2235			this_len -= this_off;
2236			if (!this_len)
2237				continue;
2238			cookie_raddr += this_off;
2239		}
2240
2241		if (this_len > len)
2242			this_len = len;
2243
2244		while (1) {
2245			unsigned long hv_err;
2246
2247			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2248						cookie_raddr, ra,
2249						this_len, &actual_len);
2250			if (unlikely(hv_err)) {
2251				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2252				       "HV error %lu\n",
2253				       lp->id, hv_err);
2254				if (lp->hs_state != LDC_HS_COMPLETE ||
2255				    (lp->flags & LDC_FLAG_RESET))
2256					return -ECONNRESET;
2257				else
2258					return -EFAULT;
2259			}
2260
2261			cookie_raddr += actual_len;
2262			ra += actual_len;
2263			len -= actual_len;
2264			if (actual_len == this_len)
2265				break;
2266
2267			this_len -= actual_len;
2268		}
2269
2270		if (!len)
2271			break;
2272	}
2273
2274	/* It is caller policy what to do about short copies.
2275	 * For example, a networking driver can declare the
2276	 * packet a runt and drop it.
2277	 */
2278
2279	return orig_len - len;
2280}
2281EXPORT_SYMBOL(ldc_copy);
2282
2283void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2284			  struct ldc_trans_cookie *cookies, int *ncookies,
2285			  unsigned int map_perm)
2286{
2287	void *buf;
2288	int err;
2289
2290	if (len & (8UL - 1))
2291		return ERR_PTR(-EINVAL);
2292
2293	buf = kzalloc(len, GFP_ATOMIC);
2294	if (!buf)
2295		return ERR_PTR(-ENOMEM);
2296
2297	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2298	if (err < 0) {
2299		kfree(buf);
2300		return ERR_PTR(err);
2301	}
2302	*ncookies = err;
2303
2304	return buf;
2305}
2306EXPORT_SYMBOL(ldc_alloc_exp_dring);
2307
2308void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2309			struct ldc_trans_cookie *cookies, int ncookies)
2310{
2311	ldc_unmap(lp, cookies, ncookies);
2312	kfree(buf);
2313}
2314EXPORT_SYMBOL(ldc_free_exp_dring);
2315
2316static int __init ldc_init(void)
2317{
2318	unsigned long major, minor;
2319	struct mdesc_handle *hp;
2320	const u64 *v;
2321	int err;
2322	u64 mp;
2323
2324	hp = mdesc_grab();
2325	if (!hp)
2326		return -ENODEV;
2327
2328	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2329	err = -ENODEV;
2330	if (mp == MDESC_NODE_NULL)
2331		goto out;
2332
2333	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2334	if (!v)
2335		goto out;
2336
2337	major = 1;
2338	minor = 0;
2339	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2340		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2341		goto out;
2342	}
2343
2344	printk(KERN_INFO "%s", version);
2345
2346	if (!*v) {
2347		printk(KERN_INFO PFX "Domaining disabled.\n");
2348		goto out;
2349	}
2350	ldom_domaining_enabled = 1;
2351	err = 0;
2352
2353out:
2354	mdesc_release(hp);
2355	return err;
2356}
2357
2358core_initcall(ldc_init);
2359