1/*
2 *  IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 *  MR/MW functions
5 *
6 *  Authors: Dietmar Decker <ddecker@de.ibm.com>
7 *           Christoph Raisch <raisch@de.ibm.com>
8 *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9 *
10 *  Copyright (c) 2005 IBM Corporation
11 *
12 *  All rights reserved.
13 *
14 *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 *  BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43#include <linux/slab.h>
44#include <rdma/ib_umem.h>
45
46#include "ehca_iverbs.h"
47#include "ehca_mrmw.h"
48#include "hcp_if.h"
49#include "hipz_hw.h"
50
51#define NUM_CHUNKS(length, chunk_size) \
52	(((length) + (chunk_size - 1)) / (chunk_size))
53
54/* max number of rpages (per hcall register_rpages) */
55#define MAX_RPAGES 512
56
57/* DMEM toleration management */
58#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
59#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
60#define EHCA_HUGEPAGESHIFT     34
61#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
62#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
63#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
64#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
65#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
66#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
67#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
68#define EHCA_DIR_MAP_SIZE (0x10000)
69#define EHCA_ENT_MAP_SIZE (0x10000)
70#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
71
72static unsigned long ehca_mr_len;
73
74/*
75 * Memory map data structures
76 */
77struct ehca_dir_bmap {
78	u64 ent[EHCA_MAP_ENTRIES];
79};
80struct ehca_top_bmap {
81	struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
82};
83struct ehca_bmap {
84	struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
85};
86
87static struct ehca_bmap *ehca_bmap;
88
89static struct kmem_cache *mr_cache;
90static struct kmem_cache *mw_cache;
91
92enum ehca_mr_pgsize {
93	EHCA_MR_PGSIZE4K  = 0x1000L,
94	EHCA_MR_PGSIZE64K = 0x10000L,
95	EHCA_MR_PGSIZE1M  = 0x100000L,
96	EHCA_MR_PGSIZE16M = 0x1000000L
97};
98
99#define EHCA_MR_PGSHIFT4K  12
100#define EHCA_MR_PGSHIFT64K 16
101#define EHCA_MR_PGSHIFT1M  20
102#define EHCA_MR_PGSHIFT16M 24
103
104static u64 ehca_map_vaddr(void *caddr);
105
106static u32 ehca_encode_hwpage_size(u32 pgsize)
107{
108	int log = ilog2(pgsize);
109	WARN_ON(log < 12 || log > 24 || log & 3);
110	return (log - 12) / 4;
111}
112
113static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
114{
115	return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
116}
117
118static struct ehca_mr *ehca_mr_new(void)
119{
120	struct ehca_mr *me;
121
122	me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
123	if (me)
124		spin_lock_init(&me->mrlock);
125	else
126		ehca_gen_err("alloc failed");
127
128	return me;
129}
130
131static void ehca_mr_delete(struct ehca_mr *me)
132{
133	kmem_cache_free(mr_cache, me);
134}
135
136static struct ehca_mw *ehca_mw_new(void)
137{
138	struct ehca_mw *me;
139
140	me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
141	if (me)
142		spin_lock_init(&me->mwlock);
143	else
144		ehca_gen_err("alloc failed");
145
146	return me;
147}
148
149static void ehca_mw_delete(struct ehca_mw *me)
150{
151	kmem_cache_free(mw_cache, me);
152}
153
154/*----------------------------------------------------------------------*/
155
156struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
157{
158	struct ib_mr *ib_mr;
159	int ret;
160	struct ehca_mr *e_maxmr;
161	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
162	struct ehca_shca *shca =
163		container_of(pd->device, struct ehca_shca, ib_device);
164
165	if (shca->maxmr) {
166		e_maxmr = ehca_mr_new();
167		if (!e_maxmr) {
168			ehca_err(&shca->ib_device, "out of memory");
169			ib_mr = ERR_PTR(-ENOMEM);
170			goto get_dma_mr_exit0;
171		}
172
173		ret = ehca_reg_maxmr(shca, e_maxmr,
174				     (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
175				     mr_access_flags, e_pd,
176				     &e_maxmr->ib.ib_mr.lkey,
177				     &e_maxmr->ib.ib_mr.rkey);
178		if (ret) {
179			ehca_mr_delete(e_maxmr);
180			ib_mr = ERR_PTR(ret);
181			goto get_dma_mr_exit0;
182		}
183		ib_mr = &e_maxmr->ib.ib_mr;
184	} else {
185		ehca_err(&shca->ib_device, "no internal max-MR exist!");
186		ib_mr = ERR_PTR(-EINVAL);
187		goto get_dma_mr_exit0;
188	}
189
190get_dma_mr_exit0:
191	if (IS_ERR(ib_mr))
192		ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
193			 PTR_ERR(ib_mr), pd, mr_access_flags);
194	return ib_mr;
195} /* end ehca_get_dma_mr() */
196
197/*----------------------------------------------------------------------*/
198
199struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
200			       struct ib_phys_buf *phys_buf_array,
201			       int num_phys_buf,
202			       int mr_access_flags,
203			       u64 *iova_start)
204{
205	struct ib_mr *ib_mr;
206	int ret;
207	struct ehca_mr *e_mr;
208	struct ehca_shca *shca =
209		container_of(pd->device, struct ehca_shca, ib_device);
210	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
211
212	u64 size;
213
214	if ((num_phys_buf <= 0) || !phys_buf_array) {
215		ehca_err(pd->device, "bad input values: num_phys_buf=%x "
216			 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
217		ib_mr = ERR_PTR(-EINVAL);
218		goto reg_phys_mr_exit0;
219	}
220	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
221	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
222	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
223	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
224		/*
225		 * Remote Write Access requires Local Write Access
226		 * Remote Atomic Access requires Local Write Access
227		 */
228		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
229			 mr_access_flags);
230		ib_mr = ERR_PTR(-EINVAL);
231		goto reg_phys_mr_exit0;
232	}
233
234	/* check physical buffer list and calculate size */
235	ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
236					    iova_start, &size);
237	if (ret) {
238		ib_mr = ERR_PTR(ret);
239		goto reg_phys_mr_exit0;
240	}
241	if ((size == 0) ||
242	    (((u64)iova_start + size) < (u64)iova_start)) {
243		ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
244			 size, iova_start);
245		ib_mr = ERR_PTR(-EINVAL);
246		goto reg_phys_mr_exit0;
247	}
248
249	e_mr = ehca_mr_new();
250	if (!e_mr) {
251		ehca_err(pd->device, "out of memory");
252		ib_mr = ERR_PTR(-ENOMEM);
253		goto reg_phys_mr_exit0;
254	}
255
256	/* register MR on HCA */
257	if (ehca_mr_is_maxmr(size, iova_start)) {
258		e_mr->flags |= EHCA_MR_FLAG_MAXMR;
259		ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
260				     e_pd, &e_mr->ib.ib_mr.lkey,
261				     &e_mr->ib.ib_mr.rkey);
262		if (ret) {
263			ib_mr = ERR_PTR(ret);
264			goto reg_phys_mr_exit1;
265		}
266	} else {
267		struct ehca_mr_pginfo pginfo;
268		u32 num_kpages;
269		u32 num_hwpages;
270		u64 hw_pgsize;
271
272		num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
273					PAGE_SIZE);
274		/* for kernel space we try most possible pgsize */
275		hw_pgsize = ehca_get_max_hwpage_size(shca);
276		num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
277					 hw_pgsize);
278		memset(&pginfo, 0, sizeof(pginfo));
279		pginfo.type = EHCA_MR_PGI_PHYS;
280		pginfo.num_kpages = num_kpages;
281		pginfo.hwpage_size = hw_pgsize;
282		pginfo.num_hwpages = num_hwpages;
283		pginfo.u.phy.num_phys_buf = num_phys_buf;
284		pginfo.u.phy.phys_buf_array = phys_buf_array;
285		pginfo.next_hwpage =
286			((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
287
288		ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
289				  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
290				  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
291		if (ret) {
292			ib_mr = ERR_PTR(ret);
293			goto reg_phys_mr_exit1;
294		}
295	}
296
297	/* successful registration of all pages */
298	return &e_mr->ib.ib_mr;
299
300reg_phys_mr_exit1:
301	ehca_mr_delete(e_mr);
302reg_phys_mr_exit0:
303	if (IS_ERR(ib_mr))
304		ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
305			 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
306			 PTR_ERR(ib_mr), pd, phys_buf_array,
307			 num_phys_buf, mr_access_flags, iova_start);
308	return ib_mr;
309} /* end ehca_reg_phys_mr() */
310
311/*----------------------------------------------------------------------*/
312
313struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
314			       u64 virt, int mr_access_flags,
315			       struct ib_udata *udata)
316{
317	struct ib_mr *ib_mr;
318	struct ehca_mr *e_mr;
319	struct ehca_shca *shca =
320		container_of(pd->device, struct ehca_shca, ib_device);
321	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
322	struct ehca_mr_pginfo pginfo;
323	int ret, page_shift;
324	u32 num_kpages;
325	u32 num_hwpages;
326	u64 hwpage_size;
327
328	if (!pd) {
329		ehca_gen_err("bad pd=%p", pd);
330		return ERR_PTR(-EFAULT);
331	}
332
333	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
334	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
335	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
336	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
337		/*
338		 * Remote Write Access requires Local Write Access
339		 * Remote Atomic Access requires Local Write Access
340		 */
341		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
342			 mr_access_flags);
343		ib_mr = ERR_PTR(-EINVAL);
344		goto reg_user_mr_exit0;
345	}
346
347	if (length == 0 || virt + length < virt) {
348		ehca_err(pd->device, "bad input values: length=%llx "
349			 "virt_base=%llx", length, virt);
350		ib_mr = ERR_PTR(-EINVAL);
351		goto reg_user_mr_exit0;
352	}
353
354	e_mr = ehca_mr_new();
355	if (!e_mr) {
356		ehca_err(pd->device, "out of memory");
357		ib_mr = ERR_PTR(-ENOMEM);
358		goto reg_user_mr_exit0;
359	}
360
361	e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
362				 mr_access_flags, 0);
363	if (IS_ERR(e_mr->umem)) {
364		ib_mr = (void *)e_mr->umem;
365		goto reg_user_mr_exit1;
366	}
367
368	if (e_mr->umem->page_size != PAGE_SIZE) {
369		ehca_err(pd->device, "page size not supported, "
370			 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
371		ib_mr = ERR_PTR(-EINVAL);
372		goto reg_user_mr_exit2;
373	}
374
375	/* determine number of MR pages */
376	num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
377	/* select proper hw_pgsize */
378	page_shift = PAGE_SHIFT;
379	if (e_mr->umem->hugetlb) {
380		/* determine page_shift, clamp between 4K and 16M */
381		page_shift = (fls64(length - 1) + 3) & ~3;
382		page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
383				 EHCA_MR_PGSHIFT16M);
384	}
385	hwpage_size = 1UL << page_shift;
386
387	/* now that we have the desired page size, shift until it's
388	 * supported, too. 4K is always supported, so this terminates.
389	 */
390	while (!(hwpage_size & shca->hca_cap_mr_pgsize))
391		hwpage_size >>= 4;
392
393reg_user_mr_fallback:
394	num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
395	/* register MR on HCA */
396	memset(&pginfo, 0, sizeof(pginfo));
397	pginfo.type = EHCA_MR_PGI_USER;
398	pginfo.hwpage_size = hwpage_size;
399	pginfo.num_kpages = num_kpages;
400	pginfo.num_hwpages = num_hwpages;
401	pginfo.u.usr.region = e_mr->umem;
402	pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
403	pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
404	ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
405			  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
406			  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
407	if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
408		ehca_warn(pd->device, "failed to register mr "
409			  "with hwpage_size=%llx", hwpage_size);
410		ehca_info(pd->device, "try to register mr with "
411			  "kpage_size=%lx", PAGE_SIZE);
412		/*
413		 * this means kpages are not contiguous for a hw page
414		 * try kernel page size as fallback solution
415		 */
416		hwpage_size = PAGE_SIZE;
417		goto reg_user_mr_fallback;
418	}
419	if (ret) {
420		ib_mr = ERR_PTR(ret);
421		goto reg_user_mr_exit2;
422	}
423
424	/* successful registration of all pages */
425	return &e_mr->ib.ib_mr;
426
427reg_user_mr_exit2:
428	ib_umem_release(e_mr->umem);
429reg_user_mr_exit1:
430	ehca_mr_delete(e_mr);
431reg_user_mr_exit0:
432	if (IS_ERR(ib_mr))
433		ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
434			 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
435	return ib_mr;
436} /* end ehca_reg_user_mr() */
437
438/*----------------------------------------------------------------------*/
439
440int ehca_rereg_phys_mr(struct ib_mr *mr,
441		       int mr_rereg_mask,
442		       struct ib_pd *pd,
443		       struct ib_phys_buf *phys_buf_array,
444		       int num_phys_buf,
445		       int mr_access_flags,
446		       u64 *iova_start)
447{
448	int ret;
449
450	struct ehca_shca *shca =
451		container_of(mr->device, struct ehca_shca, ib_device);
452	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
453	u64 new_size;
454	u64 *new_start;
455	u32 new_acl;
456	struct ehca_pd *new_pd;
457	u32 tmp_lkey, tmp_rkey;
458	unsigned long sl_flags;
459	u32 num_kpages = 0;
460	u32 num_hwpages = 0;
461	struct ehca_mr_pginfo pginfo;
462
463	if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
464		/* TODO not supported, because PHYP rereg hCall needs pages */
465		ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
466			 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
467		ret = -EINVAL;
468		goto rereg_phys_mr_exit0;
469	}
470
471	if (mr_rereg_mask & IB_MR_REREG_PD) {
472		if (!pd) {
473			ehca_err(mr->device, "rereg with bad pd, pd=%p "
474				 "mr_rereg_mask=%x", pd, mr_rereg_mask);
475			ret = -EINVAL;
476			goto rereg_phys_mr_exit0;
477		}
478	}
479
480	if ((mr_rereg_mask &
481	     ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
482	    (mr_rereg_mask == 0)) {
483		ret = -EINVAL;
484		goto rereg_phys_mr_exit0;
485	}
486
487	/* check other parameters */
488	if (e_mr == shca->maxmr) {
489		/* should be impossible, however reject to be sure */
490		ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
491			 "shca->maxmr=%p mr->lkey=%x",
492			 mr, shca->maxmr, mr->lkey);
493		ret = -EINVAL;
494		goto rereg_phys_mr_exit0;
495	}
496	if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
497		if (e_mr->flags & EHCA_MR_FLAG_FMR) {
498			ehca_err(mr->device, "not supported for FMR, mr=%p "
499				 "flags=%x", mr, e_mr->flags);
500			ret = -EINVAL;
501			goto rereg_phys_mr_exit0;
502		}
503		if (!phys_buf_array || num_phys_buf <= 0) {
504			ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
505				 " phys_buf_array=%p num_phys_buf=%x",
506				 mr_rereg_mask, phys_buf_array, num_phys_buf);
507			ret = -EINVAL;
508			goto rereg_phys_mr_exit0;
509		}
510	}
511	if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&	/* change ACL */
512	    (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
513	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
514	     ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
515	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
516		/*
517		 * Remote Write Access requires Local Write Access
518		 * Remote Atomic Access requires Local Write Access
519		 */
520		ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
521			 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
522		ret = -EINVAL;
523		goto rereg_phys_mr_exit0;
524	}
525
526	/* set requested values dependent on rereg request */
527	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
528	new_start = e_mr->start;
529	new_size = e_mr->size;
530	new_acl = e_mr->acl;
531	new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
532
533	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
534		u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
535
536		new_start = iova_start;	/* change address */
537		/* check physical buffer list and calculate size */
538		ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
539						    num_phys_buf, iova_start,
540						    &new_size);
541		if (ret)
542			goto rereg_phys_mr_exit1;
543		if ((new_size == 0) ||
544		    (((u64)iova_start + new_size) < (u64)iova_start)) {
545			ehca_err(mr->device, "bad input values: new_size=%llx "
546				 "iova_start=%p", new_size, iova_start);
547			ret = -EINVAL;
548			goto rereg_phys_mr_exit1;
549		}
550		num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
551					new_size, PAGE_SIZE);
552		num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
553					 new_size, hw_pgsize);
554		memset(&pginfo, 0, sizeof(pginfo));
555		pginfo.type = EHCA_MR_PGI_PHYS;
556		pginfo.num_kpages = num_kpages;
557		pginfo.hwpage_size = hw_pgsize;
558		pginfo.num_hwpages = num_hwpages;
559		pginfo.u.phy.num_phys_buf = num_phys_buf;
560		pginfo.u.phy.phys_buf_array = phys_buf_array;
561		pginfo.next_hwpage =
562			((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
563	}
564	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
565		new_acl = mr_access_flags;
566	if (mr_rereg_mask & IB_MR_REREG_PD)
567		new_pd = container_of(pd, struct ehca_pd, ib_pd);
568
569	ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
570			    new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
571	if (ret)
572		goto rereg_phys_mr_exit1;
573
574	/* successful reregistration */
575	if (mr_rereg_mask & IB_MR_REREG_PD)
576		mr->pd = pd;
577	mr->lkey = tmp_lkey;
578	mr->rkey = tmp_rkey;
579
580rereg_phys_mr_exit1:
581	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
582rereg_phys_mr_exit0:
583	if (ret)
584		ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
585			 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
586			 "iova_start=%p",
587			 ret, mr, mr_rereg_mask, pd, phys_buf_array,
588			 num_phys_buf, mr_access_flags, iova_start);
589	return ret;
590} /* end ehca_rereg_phys_mr() */
591
592/*----------------------------------------------------------------------*/
593
594int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
595{
596	int ret = 0;
597	u64 h_ret;
598	struct ehca_shca *shca =
599		container_of(mr->device, struct ehca_shca, ib_device);
600	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
601	unsigned long sl_flags;
602	struct ehca_mr_hipzout_parms hipzout;
603
604	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
605		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
606			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
607		ret = -EINVAL;
608		goto query_mr_exit0;
609	}
610
611	memset(mr_attr, 0, sizeof(struct ib_mr_attr));
612	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
613
614	h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
615	if (h_ret != H_SUCCESS) {
616		ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
617			 "hca_hndl=%llx mr_hndl=%llx lkey=%x",
618			 h_ret, mr, shca->ipz_hca_handle.handle,
619			 e_mr->ipz_mr_handle.handle, mr->lkey);
620		ret = ehca2ib_return_code(h_ret);
621		goto query_mr_exit1;
622	}
623	mr_attr->pd = mr->pd;
624	mr_attr->device_virt_addr = hipzout.vaddr;
625	mr_attr->size = hipzout.len;
626	mr_attr->lkey = hipzout.lkey;
627	mr_attr->rkey = hipzout.rkey;
628	ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
629
630query_mr_exit1:
631	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
632query_mr_exit0:
633	if (ret)
634		ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
635			 ret, mr, mr_attr);
636	return ret;
637} /* end ehca_query_mr() */
638
639/*----------------------------------------------------------------------*/
640
641int ehca_dereg_mr(struct ib_mr *mr)
642{
643	int ret = 0;
644	u64 h_ret;
645	struct ehca_shca *shca =
646		container_of(mr->device, struct ehca_shca, ib_device);
647	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
648
649	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
650		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
651			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
652		ret = -EINVAL;
653		goto dereg_mr_exit0;
654	} else if (e_mr == shca->maxmr) {
655		/* should be impossible, however reject to be sure */
656		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
657			 "shca->maxmr=%p mr->lkey=%x",
658			 mr, shca->maxmr, mr->lkey);
659		ret = -EINVAL;
660		goto dereg_mr_exit0;
661	}
662
663	/* TODO: BUSY: MR still has bound window(s) */
664	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
665	if (h_ret != H_SUCCESS) {
666		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
667			 "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
668			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
669			 e_mr->ipz_mr_handle.handle, mr->lkey);
670		ret = ehca2ib_return_code(h_ret);
671		goto dereg_mr_exit0;
672	}
673
674	if (e_mr->umem)
675		ib_umem_release(e_mr->umem);
676
677	/* successful deregistration */
678	ehca_mr_delete(e_mr);
679
680dereg_mr_exit0:
681	if (ret)
682		ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
683	return ret;
684} /* end ehca_dereg_mr() */
685
686/*----------------------------------------------------------------------*/
687
688struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
689{
690	struct ib_mw *ib_mw;
691	u64 h_ret;
692	struct ehca_mw *e_mw;
693	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
694	struct ehca_shca *shca =
695		container_of(pd->device, struct ehca_shca, ib_device);
696	struct ehca_mw_hipzout_parms hipzout;
697
698	if (type != IB_MW_TYPE_1)
699		return ERR_PTR(-EINVAL);
700
701	e_mw = ehca_mw_new();
702	if (!e_mw) {
703		ib_mw = ERR_PTR(-ENOMEM);
704		goto alloc_mw_exit0;
705	}
706
707	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
708					 e_pd->fw_pd, &hipzout);
709	if (h_ret != H_SUCCESS) {
710		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
711			 "shca=%p hca_hndl=%llx mw=%p",
712			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
713		ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
714		goto alloc_mw_exit1;
715	}
716	/* successful MW allocation */
717	e_mw->ipz_mw_handle = hipzout.handle;
718	e_mw->ib_mw.rkey    = hipzout.rkey;
719	return &e_mw->ib_mw;
720
721alloc_mw_exit1:
722	ehca_mw_delete(e_mw);
723alloc_mw_exit0:
724	if (IS_ERR(ib_mw))
725		ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
726	return ib_mw;
727} /* end ehca_alloc_mw() */
728
729/*----------------------------------------------------------------------*/
730
731int ehca_bind_mw(struct ib_qp *qp,
732		 struct ib_mw *mw,
733		 struct ib_mw_bind *mw_bind)
734{
735	/* TODO: not supported up to now */
736	ehca_gen_err("bind MW currently not supported by HCAD");
737
738	return -EPERM;
739} /* end ehca_bind_mw() */
740
741/*----------------------------------------------------------------------*/
742
743int ehca_dealloc_mw(struct ib_mw *mw)
744{
745	u64 h_ret;
746	struct ehca_shca *shca =
747		container_of(mw->device, struct ehca_shca, ib_device);
748	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
749
750	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
751	if (h_ret != H_SUCCESS) {
752		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
753			 "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
754			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
755			 e_mw->ipz_mw_handle.handle);
756		return ehca2ib_return_code(h_ret);
757	}
758	/* successful deallocation */
759	ehca_mw_delete(e_mw);
760	return 0;
761} /* end ehca_dealloc_mw() */
762
763/*----------------------------------------------------------------------*/
764
765struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
766			      int mr_access_flags,
767			      struct ib_fmr_attr *fmr_attr)
768{
769	struct ib_fmr *ib_fmr;
770	struct ehca_shca *shca =
771		container_of(pd->device, struct ehca_shca, ib_device);
772	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
773	struct ehca_mr *e_fmr;
774	int ret;
775	u32 tmp_lkey, tmp_rkey;
776	struct ehca_mr_pginfo pginfo;
777	u64 hw_pgsize;
778
779	/* check other parameters */
780	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
781	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
782	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
783	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
784		/*
785		 * Remote Write Access requires Local Write Access
786		 * Remote Atomic Access requires Local Write Access
787		 */
788		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
789			 mr_access_flags);
790		ib_fmr = ERR_PTR(-EINVAL);
791		goto alloc_fmr_exit0;
792	}
793	if (mr_access_flags & IB_ACCESS_MW_BIND) {
794		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
795			 mr_access_flags);
796		ib_fmr = ERR_PTR(-EINVAL);
797		goto alloc_fmr_exit0;
798	}
799	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
800		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
801			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
802			 fmr_attr->max_pages, fmr_attr->max_maps,
803			 fmr_attr->page_shift);
804		ib_fmr = ERR_PTR(-EINVAL);
805		goto alloc_fmr_exit0;
806	}
807
808	hw_pgsize = 1 << fmr_attr->page_shift;
809	if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
810		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
811			 fmr_attr->page_shift);
812		ib_fmr = ERR_PTR(-EINVAL);
813		goto alloc_fmr_exit0;
814	}
815
816	e_fmr = ehca_mr_new();
817	if (!e_fmr) {
818		ib_fmr = ERR_PTR(-ENOMEM);
819		goto alloc_fmr_exit0;
820	}
821	e_fmr->flags |= EHCA_MR_FLAG_FMR;
822
823	/* register MR on HCA */
824	memset(&pginfo, 0, sizeof(pginfo));
825	pginfo.hwpage_size = hw_pgsize;
826	/*
827	 * pginfo.num_hwpages==0, ie register_rpages() will not be called
828	 * but deferred to map_phys_fmr()
829	 */
830	ret = ehca_reg_mr(shca, e_fmr, NULL,
831			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
832			  mr_access_flags, e_pd, &pginfo,
833			  &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
834	if (ret) {
835		ib_fmr = ERR_PTR(ret);
836		goto alloc_fmr_exit1;
837	}
838
839	/* successful */
840	e_fmr->hwpage_size = hw_pgsize;
841	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
842	e_fmr->fmr_max_pages = fmr_attr->max_pages;
843	e_fmr->fmr_max_maps = fmr_attr->max_maps;
844	e_fmr->fmr_map_cnt = 0;
845	return &e_fmr->ib.ib_fmr;
846
847alloc_fmr_exit1:
848	ehca_mr_delete(e_fmr);
849alloc_fmr_exit0:
850	return ib_fmr;
851} /* end ehca_alloc_fmr() */
852
853/*----------------------------------------------------------------------*/
854
855int ehca_map_phys_fmr(struct ib_fmr *fmr,
856		      u64 *page_list,
857		      int list_len,
858		      u64 iova)
859{
860	int ret;
861	struct ehca_shca *shca =
862		container_of(fmr->device, struct ehca_shca, ib_device);
863	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
864	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
865	struct ehca_mr_pginfo pginfo;
866	u32 tmp_lkey, tmp_rkey;
867
868	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
869		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
870			 e_fmr, e_fmr->flags);
871		ret = -EINVAL;
872		goto map_phys_fmr_exit0;
873	}
874	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
875	if (ret)
876		goto map_phys_fmr_exit0;
877	if (iova % e_fmr->fmr_page_size) {
878		/* only whole-numbered pages */
879		ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
880			 iova, e_fmr->fmr_page_size);
881		ret = -EINVAL;
882		goto map_phys_fmr_exit0;
883	}
884	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
885		/* HCAD does not limit the maps, however trace this anyway */
886		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
887			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
888			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
889	}
890
891	memset(&pginfo, 0, sizeof(pginfo));
892	pginfo.type = EHCA_MR_PGI_FMR;
893	pginfo.num_kpages = list_len;
894	pginfo.hwpage_size = e_fmr->hwpage_size;
895	pginfo.num_hwpages =
896		list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
897	pginfo.u.fmr.page_list = page_list;
898	pginfo.next_hwpage =
899		(iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
900	pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
901
902	ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
903			    list_len * e_fmr->fmr_page_size,
904			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
905	if (ret)
906		goto map_phys_fmr_exit0;
907
908	/* successful reregistration */
909	e_fmr->fmr_map_cnt++;
910	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
911	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
912	return 0;
913
914map_phys_fmr_exit0:
915	if (ret)
916		ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
917			 "iova=%llx", ret, fmr, page_list, list_len, iova);
918	return ret;
919} /* end ehca_map_phys_fmr() */
920
921/*----------------------------------------------------------------------*/
922
923int ehca_unmap_fmr(struct list_head *fmr_list)
924{
925	int ret = 0;
926	struct ib_fmr *ib_fmr;
927	struct ehca_shca *shca = NULL;
928	struct ehca_shca *prev_shca;
929	struct ehca_mr *e_fmr;
930	u32 num_fmr = 0;
931	u32 unmap_fmr_cnt = 0;
932
933	/* check all FMR belong to same SHCA, and check internal flag */
934	list_for_each_entry(ib_fmr, fmr_list, list) {
935		prev_shca = shca;
936		shca = container_of(ib_fmr->device, struct ehca_shca,
937				    ib_device);
938		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
939		if ((shca != prev_shca) && prev_shca) {
940			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
941				 "prev_shca=%p e_fmr=%p",
942				 shca, prev_shca, e_fmr);
943			ret = -EINVAL;
944			goto unmap_fmr_exit0;
945		}
946		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
947			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
948				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
949			ret = -EINVAL;
950			goto unmap_fmr_exit0;
951		}
952		num_fmr++;
953	}
954
955	/* loop over all FMRs to unmap */
956	list_for_each_entry(ib_fmr, fmr_list, list) {
957		unmap_fmr_cnt++;
958		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
959		shca = container_of(ib_fmr->device, struct ehca_shca,
960				    ib_device);
961		ret = ehca_unmap_one_fmr(shca, e_fmr);
962		if (ret) {
963			/* unmap failed, stop unmapping of rest of FMRs */
964			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
965				 "stop rest, e_fmr=%p num_fmr=%x "
966				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
967				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
968			goto unmap_fmr_exit0;
969		}
970	}
971
972unmap_fmr_exit0:
973	if (ret)
974		ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
975			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
976	return ret;
977} /* end ehca_unmap_fmr() */
978
979/*----------------------------------------------------------------------*/
980
981int ehca_dealloc_fmr(struct ib_fmr *fmr)
982{
983	int ret;
984	u64 h_ret;
985	struct ehca_shca *shca =
986		container_of(fmr->device, struct ehca_shca, ib_device);
987	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
988
989	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
990		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
991			 e_fmr, e_fmr->flags);
992		ret = -EINVAL;
993		goto free_fmr_exit0;
994	}
995
996	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
997	if (h_ret != H_SUCCESS) {
998		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
999			 "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
1000			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1001			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
1002		ret = ehca2ib_return_code(h_ret);
1003		goto free_fmr_exit0;
1004	}
1005	/* successful deregistration */
1006	ehca_mr_delete(e_fmr);
1007	return 0;
1008
1009free_fmr_exit0:
1010	if (ret)
1011		ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1012	return ret;
1013} /* end ehca_dealloc_fmr() */
1014
1015/*----------------------------------------------------------------------*/
1016
1017static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
1018				   struct ehca_mr *e_mr,
1019				   struct ehca_mr_pginfo *pginfo);
1020
1021int ehca_reg_mr(struct ehca_shca *shca,
1022		struct ehca_mr *e_mr,
1023		u64 *iova_start,
1024		u64 size,
1025		int acl,
1026		struct ehca_pd *e_pd,
1027		struct ehca_mr_pginfo *pginfo,
1028		u32 *lkey, /*OUT*/
1029		u32 *rkey, /*OUT*/
1030		enum ehca_reg_type reg_type)
1031{
1032	int ret;
1033	u64 h_ret;
1034	u32 hipz_acl;
1035	struct ehca_mr_hipzout_parms hipzout;
1036
1037	ehca_mrmw_map_acl(acl, &hipz_acl);
1038	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1039	if (ehca_use_hp_mr == 1)
1040		hipz_acl |= 0x00000001;
1041
1042	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
1043					 (u64)iova_start, size, hipz_acl,
1044					 e_pd->fw_pd, &hipzout);
1045	if (h_ret != H_SUCCESS) {
1046		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
1047			 "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
1048		ret = ehca2ib_return_code(h_ret);
1049		goto ehca_reg_mr_exit0;
1050	}
1051
1052	e_mr->ipz_mr_handle = hipzout.handle;
1053
1054	if (reg_type == EHCA_REG_BUSMAP_MR)
1055		ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
1056	else if (reg_type == EHCA_REG_MR)
1057		ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
1058	else
1059		ret = -EINVAL;
1060
1061	if (ret)
1062		goto ehca_reg_mr_exit1;
1063
1064	/* successful registration */
1065	e_mr->num_kpages = pginfo->num_kpages;
1066	e_mr->num_hwpages = pginfo->num_hwpages;
1067	e_mr->hwpage_size = pginfo->hwpage_size;
1068	e_mr->start = iova_start;
1069	e_mr->size = size;
1070	e_mr->acl = acl;
1071	*lkey = hipzout.lkey;
1072	*rkey = hipzout.rkey;
1073	return 0;
1074
1075ehca_reg_mr_exit1:
1076	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1077	if (h_ret != H_SUCCESS) {
1078		ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
1079			 "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
1080			 "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
1081			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1082			 hipzout.lkey, pginfo, pginfo->num_kpages,
1083			 pginfo->num_hwpages, ret);
1084		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
1085			 "not recoverable");
1086	}
1087ehca_reg_mr_exit0:
1088	if (ret)
1089		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1090			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1091			 "num_kpages=%llx num_hwpages=%llx",
1092			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1093			 pginfo->num_kpages, pginfo->num_hwpages);
1094	return ret;
1095} /* end ehca_reg_mr() */
1096
1097/*----------------------------------------------------------------------*/
1098
1099int ehca_reg_mr_rpages(struct ehca_shca *shca,
1100		       struct ehca_mr *e_mr,
1101		       struct ehca_mr_pginfo *pginfo)
1102{
1103	int ret = 0;
1104	u64 h_ret;
1105	u32 rnum;
1106	u64 rpage;
1107	u32 i;
1108	u64 *kpage;
1109
1110	if (!pginfo->num_hwpages) /* in case of fmr */
1111		return 0;
1112
1113	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1114	if (!kpage) {
1115		ehca_err(&shca->ib_device, "kpage alloc failed");
1116		ret = -ENOMEM;
1117		goto ehca_reg_mr_rpages_exit0;
1118	}
1119
1120	/* max MAX_RPAGES ehca mr pages per register call */
1121	for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1122
1123		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1124			rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1125			if (rnum == 0)
1126				rnum = MAX_RPAGES;      /* last shot is full */
1127		} else
1128			rnum = MAX_RPAGES;
1129
1130		ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1131		if (ret) {
1132			ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1133				 "bad rc, ret=%i rnum=%x kpage=%p",
1134				 ret, rnum, kpage);
1135			goto ehca_reg_mr_rpages_exit1;
1136		}
1137
1138		if (rnum > 1) {
1139			rpage = __pa(kpage);
1140			if (!rpage) {
1141				ehca_err(&shca->ib_device, "kpage=%p i=%x",
1142					 kpage, i);
1143				ret = -EFAULT;
1144				goto ehca_reg_mr_rpages_exit1;
1145			}
1146		} else
1147			rpage = *kpage;
1148
1149		h_ret = hipz_h_register_rpage_mr(
1150			shca->ipz_hca_handle, e_mr,
1151			ehca_encode_hwpage_size(pginfo->hwpage_size),
1152			0, rpage, rnum);
1153
1154		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1155			/*
1156			 * check for 'registration complete'==H_SUCCESS
1157			 * and for 'page registered'==H_PAGE_REGISTERED
1158			 */
1159			if (h_ret != H_SUCCESS) {
1160				ehca_err(&shca->ib_device, "last "
1161					 "hipz_reg_rpage_mr failed, h_ret=%lli "
1162					 "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
1163					 " lkey=%x", h_ret, e_mr, i,
1164					 shca->ipz_hca_handle.handle,
1165					 e_mr->ipz_mr_handle.handle,
1166					 e_mr->ib.ib_mr.lkey);
1167				ret = ehca2ib_return_code(h_ret);
1168				break;
1169			} else
1170				ret = 0;
1171		} else if (h_ret != H_PAGE_REGISTERED) {
1172			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1173				 "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
1174				 "mr_hndl=%llx", h_ret, e_mr, i,
1175				 e_mr->ib.ib_mr.lkey,
1176				 shca->ipz_hca_handle.handle,
1177				 e_mr->ipz_mr_handle.handle);
1178			ret = ehca2ib_return_code(h_ret);
1179			break;
1180		} else
1181			ret = 0;
1182	} /* end for(i) */
1183
1184
1185ehca_reg_mr_rpages_exit1:
1186	ehca_free_fw_ctrlblock(kpage);
1187ehca_reg_mr_rpages_exit0:
1188	if (ret)
1189		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1190			 "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
1191			 pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1192	return ret;
1193} /* end ehca_reg_mr_rpages() */
1194
1195/*----------------------------------------------------------------------*/
1196
1197inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1198				struct ehca_mr *e_mr,
1199				u64 *iova_start,
1200				u64 size,
1201				u32 acl,
1202				struct ehca_pd *e_pd,
1203				struct ehca_mr_pginfo *pginfo,
1204				u32 *lkey, /*OUT*/
1205				u32 *rkey) /*OUT*/
1206{
1207	int ret;
1208	u64 h_ret;
1209	u32 hipz_acl;
1210	u64 *kpage;
1211	u64 rpage;
1212	struct ehca_mr_pginfo pginfo_save;
1213	struct ehca_mr_hipzout_parms hipzout;
1214
1215	ehca_mrmw_map_acl(acl, &hipz_acl);
1216	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1217
1218	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1219	if (!kpage) {
1220		ehca_err(&shca->ib_device, "kpage alloc failed");
1221		ret = -ENOMEM;
1222		goto ehca_rereg_mr_rereg1_exit0;
1223	}
1224
1225	pginfo_save = *pginfo;
1226	ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1227	if (ret) {
1228		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1229			 "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
1230			 "kpage=%p", e_mr, pginfo, pginfo->type,
1231			 pginfo->num_kpages, pginfo->num_hwpages, kpage);
1232		goto ehca_rereg_mr_rereg1_exit1;
1233	}
1234	rpage = __pa(kpage);
1235	if (!rpage) {
1236		ehca_err(&shca->ib_device, "kpage=%p", kpage);
1237		ret = -EFAULT;
1238		goto ehca_rereg_mr_rereg1_exit1;
1239	}
1240	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1241				      (u64)iova_start, size, hipz_acl,
1242				      e_pd->fw_pd, rpage, &hipzout);
1243	if (h_ret != H_SUCCESS) {
1244		/*
1245		 * reregistration unsuccessful, try it again with the 3 hCalls,
1246		 * e.g. this is required in case H_MR_CONDITION
1247		 * (MW bound or MR is shared)
1248		 */
1249		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1250			  "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
1251		*pginfo = pginfo_save;
1252		ret = -EAGAIN;
1253	} else if ((u64 *)hipzout.vaddr != iova_start) {
1254		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1255			 "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
1256			 "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
1257			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1258			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
1259		ret = -EFAULT;
1260	} else {
1261		/*
1262		 * successful reregistration
1263		 * note: start and start_out are identical for eServer HCAs
1264		 */
1265		e_mr->num_kpages = pginfo->num_kpages;
1266		e_mr->num_hwpages = pginfo->num_hwpages;
1267		e_mr->hwpage_size = pginfo->hwpage_size;
1268		e_mr->start = iova_start;
1269		e_mr->size = size;
1270		e_mr->acl = acl;
1271		*lkey = hipzout.lkey;
1272		*rkey = hipzout.rkey;
1273	}
1274
1275ehca_rereg_mr_rereg1_exit1:
1276	ehca_free_fw_ctrlblock(kpage);
1277ehca_rereg_mr_rereg1_exit0:
1278	if ( ret && (ret != -EAGAIN) )
1279		ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1280			 "pginfo=%p num_kpages=%llx num_hwpages=%llx",
1281			 ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1282			 pginfo->num_hwpages);
1283	return ret;
1284} /* end ehca_rereg_mr_rereg1() */
1285
1286/*----------------------------------------------------------------------*/
1287
1288int ehca_rereg_mr(struct ehca_shca *shca,
1289		  struct ehca_mr *e_mr,
1290		  u64 *iova_start,
1291		  u64 size,
1292		  int acl,
1293		  struct ehca_pd *e_pd,
1294		  struct ehca_mr_pginfo *pginfo,
1295		  u32 *lkey,
1296		  u32 *rkey)
1297{
1298	int ret = 0;
1299	u64 h_ret;
1300	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1301	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1302
1303	/* first determine reregistration hCall(s) */
1304	if ((pginfo->num_hwpages > MAX_RPAGES) ||
1305	    (e_mr->num_hwpages > MAX_RPAGES) ||
1306	    (pginfo->num_hwpages > e_mr->num_hwpages)) {
1307		ehca_dbg(&shca->ib_device, "Rereg3 case, "
1308			 "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
1309			 pginfo->num_hwpages, e_mr->num_hwpages);
1310		rereg_1_hcall = 0;
1311		rereg_3_hcall = 1;
1312	}
1313
1314	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
1315		rereg_1_hcall = 0;
1316		rereg_3_hcall = 1;
1317		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1318		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1319			 e_mr);
1320	}
1321
1322	if (rereg_1_hcall) {
1323		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1324					   acl, e_pd, pginfo, lkey, rkey);
1325		if (ret) {
1326			if (ret == -EAGAIN)
1327				rereg_3_hcall = 1;
1328			else
1329				goto ehca_rereg_mr_exit0;
1330		}
1331	}
1332
1333	if (rereg_3_hcall) {
1334		struct ehca_mr save_mr;
1335
1336		/* first deregister old MR */
1337		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1338		if (h_ret != H_SUCCESS) {
1339			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1340				 "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
1341				 "mr->lkey=%x",
1342				 h_ret, e_mr, shca->ipz_hca_handle.handle,
1343				 e_mr->ipz_mr_handle.handle,
1344				 e_mr->ib.ib_mr.lkey);
1345			ret = ehca2ib_return_code(h_ret);
1346			goto ehca_rereg_mr_exit0;
1347		}
1348		/* clean ehca_mr_t, without changing struct ib_mr and lock */
1349		save_mr = *e_mr;
1350		ehca_mr_deletenew(e_mr);
1351
1352		/* set some MR values */
1353		e_mr->flags = save_mr.flags;
1354		e_mr->hwpage_size = save_mr.hwpage_size;
1355		e_mr->fmr_page_size = save_mr.fmr_page_size;
1356		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1357		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1358		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1359
1360		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1361				  e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
1362		if (ret) {
1363			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1364			memcpy(&e_mr->flags, &(save_mr.flags),
1365			       sizeof(struct ehca_mr) - offset);
1366			goto ehca_rereg_mr_exit0;
1367		}
1368	}
1369
1370ehca_rereg_mr_exit0:
1371	if (ret)
1372		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1373			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1374			 "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
1375			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1376			 acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1377			 rereg_1_hcall, rereg_3_hcall);
1378	return ret;
1379} /* end ehca_rereg_mr() */
1380
1381/*----------------------------------------------------------------------*/
1382
1383int ehca_unmap_one_fmr(struct ehca_shca *shca,
1384		       struct ehca_mr *e_fmr)
1385{
1386	int ret = 0;
1387	u64 h_ret;
1388	struct ehca_pd *e_pd =
1389		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1390	struct ehca_mr save_fmr;
1391	u32 tmp_lkey, tmp_rkey;
1392	struct ehca_mr_pginfo pginfo;
1393	struct ehca_mr_hipzout_parms hipzout;
1394	struct ehca_mr save_mr;
1395
1396	if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1397		/*
1398		 * note: after using rereg hcall with len=0,
1399		 * rereg hcall must be used again for registering pages
1400		 */
1401		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1402					      0, 0, e_pd->fw_pd, 0, &hipzout);
1403		if (h_ret == H_SUCCESS) {
1404			/* successful reregistration */
1405			e_fmr->start = NULL;
1406			e_fmr->size = 0;
1407			tmp_lkey = hipzout.lkey;
1408			tmp_rkey = hipzout.rkey;
1409			return 0;
1410		}
1411		/*
1412		 * should not happen, because length checked above,
1413		 * FMRs are not shared and no MW bound to FMRs
1414		 */
1415		ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1416			 "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
1417			 "mr_hndl=%llx lkey=%x lkey_out=%x",
1418			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1419			 e_fmr->ipz_mr_handle.handle,
1420			 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1421		/* try free and rereg */
1422	}
1423
1424	/* first free old FMR */
1425	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1426	if (h_ret != H_SUCCESS) {
1427		ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1428			 "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
1429			 "lkey=%x",
1430			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1431			 e_fmr->ipz_mr_handle.handle,
1432			 e_fmr->ib.ib_fmr.lkey);
1433		ret = ehca2ib_return_code(h_ret);
1434		goto ehca_unmap_one_fmr_exit0;
1435	}
1436	/* clean ehca_mr_t, without changing lock */
1437	save_fmr = *e_fmr;
1438	ehca_mr_deletenew(e_fmr);
1439
1440	/* set some MR values */
1441	e_fmr->flags = save_fmr.flags;
1442	e_fmr->hwpage_size = save_fmr.hwpage_size;
1443	e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1444	e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1445	e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1446	e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1447	e_fmr->acl = save_fmr.acl;
1448
1449	memset(&pginfo, 0, sizeof(pginfo));
1450	pginfo.type = EHCA_MR_PGI_FMR;
1451	ret = ehca_reg_mr(shca, e_fmr, NULL,
1452			  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1453			  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1454			  &tmp_rkey, EHCA_REG_MR);
1455	if (ret) {
1456		u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1457		memcpy(&e_fmr->flags, &(save_mr.flags),
1458		       sizeof(struct ehca_mr) - offset);
1459	}
1460
1461ehca_unmap_one_fmr_exit0:
1462	if (ret)
1463		ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1464			 "fmr_max_pages=%x",
1465			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1466	return ret;
1467} /* end ehca_unmap_one_fmr() */
1468
1469/*----------------------------------------------------------------------*/
1470
1471int ehca_reg_smr(struct ehca_shca *shca,
1472		 struct ehca_mr *e_origmr,
1473		 struct ehca_mr *e_newmr,
1474		 u64 *iova_start,
1475		 int acl,
1476		 struct ehca_pd *e_pd,
1477		 u32 *lkey, /*OUT*/
1478		 u32 *rkey) /*OUT*/
1479{
1480	int ret = 0;
1481	u64 h_ret;
1482	u32 hipz_acl;
1483	struct ehca_mr_hipzout_parms hipzout;
1484
1485	ehca_mrmw_map_acl(acl, &hipz_acl);
1486	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1487
1488	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1489				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
1490				    &hipzout);
1491	if (h_ret != H_SUCCESS) {
1492		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1493			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1494			 "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1495			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1496			 shca->ipz_hca_handle.handle,
1497			 e_origmr->ipz_mr_handle.handle,
1498			 e_origmr->ib.ib_mr.lkey);
1499		ret = ehca2ib_return_code(h_ret);
1500		goto ehca_reg_smr_exit0;
1501	}
1502	/* successful registration */
1503	e_newmr->num_kpages = e_origmr->num_kpages;
1504	e_newmr->num_hwpages = e_origmr->num_hwpages;
1505	e_newmr->hwpage_size   = e_origmr->hwpage_size;
1506	e_newmr->start = iova_start;
1507	e_newmr->size = e_origmr->size;
1508	e_newmr->acl = acl;
1509	e_newmr->ipz_mr_handle = hipzout.handle;
1510	*lkey = hipzout.lkey;
1511	*rkey = hipzout.rkey;
1512	return 0;
1513
1514ehca_reg_smr_exit0:
1515	if (ret)
1516		ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1517			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1518			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1519	return ret;
1520} /* end ehca_reg_smr() */
1521
1522/*----------------------------------------------------------------------*/
1523static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1524{
1525	unsigned long ret = idx;
1526	ret |= dir << EHCA_DIR_INDEX_SHIFT;
1527	ret |= top << EHCA_TOP_INDEX_SHIFT;
1528	return __va(ret << SECTION_SIZE_BITS);
1529}
1530
1531#define ehca_bmap_valid(entry) \
1532	((u64)entry != (u64)EHCA_INVAL_ADDR)
1533
1534static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1535			       struct ehca_shca *shca, struct ehca_mr *mr,
1536			       struct ehca_mr_pginfo *pginfo)
1537{
1538	u64 h_ret = 0;
1539	unsigned long page = 0;
1540	u64 rpage = __pa(kpage);
1541	int page_count;
1542
1543	void *sectbase = ehca_calc_sectbase(top, dir, idx);
1544	if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
1545		ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
1546					   "hwpage_size does not fit to "
1547					   "section start address");
1548	}
1549	page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
1550
1551	while (page < page_count) {
1552		u64 rnum;
1553		for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1554		     rnum++) {
1555			void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1556			kpage[rnum] = __pa(pg);
1557		}
1558
1559		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
1560			ehca_encode_hwpage_size(pginfo->hwpage_size),
1561			0, rpage, rnum);
1562
1563		if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
1564			ehca_err(&shca->ib_device, "register_rpage_mr failed");
1565			return h_ret;
1566		}
1567	}
1568	return h_ret;
1569}
1570
1571static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
1572				struct ehca_shca *shca, struct ehca_mr *mr,
1573				struct ehca_mr_pginfo *pginfo)
1574{
1575	u64 hret = H_SUCCESS;
1576	int idx;
1577
1578	for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
1579		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
1580			continue;
1581
1582		hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
1583					   pginfo);
1584		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1585				return hret;
1586	}
1587	return hret;
1588}
1589
1590static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
1591				    struct ehca_mr *mr,
1592				    struct ehca_mr_pginfo *pginfo)
1593{
1594	u64 hret = H_SUCCESS;
1595	int dir;
1596
1597	for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
1598		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
1599			continue;
1600
1601		hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
1602		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1603				return hret;
1604	}
1605	return hret;
1606}
1607
1608/* register internal max-MR to internal SHCA */
1609int ehca_reg_internal_maxmr(
1610	struct ehca_shca *shca,
1611	struct ehca_pd *e_pd,
1612	struct ehca_mr **e_maxmr)  /*OUT*/
1613{
1614	int ret;
1615	struct ehca_mr *e_mr;
1616	u64 *iova_start;
1617	u64 size_maxmr;
1618	struct ehca_mr_pginfo pginfo;
1619	struct ib_phys_buf ib_pbuf;
1620	u32 num_kpages;
1621	u32 num_hwpages;
1622	u64 hw_pgsize;
1623
1624	if (!ehca_bmap) {
1625		ret = -EFAULT;
1626		goto ehca_reg_internal_maxmr_exit0;
1627	}
1628
1629	e_mr = ehca_mr_new();
1630	if (!e_mr) {
1631		ehca_err(&shca->ib_device, "out of memory");
1632		ret = -ENOMEM;
1633		goto ehca_reg_internal_maxmr_exit0;
1634	}
1635	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1636
1637	/* register internal max-MR on HCA */
1638	size_maxmr = ehca_mr_len;
1639	iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
1640	ib_pbuf.addr = 0;
1641	ib_pbuf.size = size_maxmr;
1642	num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1643				PAGE_SIZE);
1644	hw_pgsize = ehca_get_max_hwpage_size(shca);
1645	num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
1646				 hw_pgsize);
1647
1648	memset(&pginfo, 0, sizeof(pginfo));
1649	pginfo.type = EHCA_MR_PGI_PHYS;
1650	pginfo.num_kpages = num_kpages;
1651	pginfo.num_hwpages = num_hwpages;
1652	pginfo.hwpage_size = hw_pgsize;
1653	pginfo.u.phy.num_phys_buf = 1;
1654	pginfo.u.phy.phys_buf_array = &ib_pbuf;
1655
1656	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1657			  &pginfo, &e_mr->ib.ib_mr.lkey,
1658			  &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
1659	if (ret) {
1660		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1661			 "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
1662			 "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1663			 num_kpages, num_hwpages);
1664		goto ehca_reg_internal_maxmr_exit1;
1665	}
1666
1667	/* successful registration of all pages */
1668	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1669	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1670	e_mr->ib.ib_mr.uobject = NULL;
1671	atomic_inc(&(e_pd->ib_pd.usecnt));
1672	atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1673	*e_maxmr = e_mr;
1674	return 0;
1675
1676ehca_reg_internal_maxmr_exit1:
1677	ehca_mr_delete(e_mr);
1678ehca_reg_internal_maxmr_exit0:
1679	if (ret)
1680		ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1681			 ret, shca, e_pd, e_maxmr);
1682	return ret;
1683} /* end ehca_reg_internal_maxmr() */
1684
1685/*----------------------------------------------------------------------*/
1686
1687int ehca_reg_maxmr(struct ehca_shca *shca,
1688		   struct ehca_mr *e_newmr,
1689		   u64 *iova_start,
1690		   int acl,
1691		   struct ehca_pd *e_pd,
1692		   u32 *lkey,
1693		   u32 *rkey)
1694{
1695	u64 h_ret;
1696	struct ehca_mr *e_origmr = shca->maxmr;
1697	u32 hipz_acl;
1698	struct ehca_mr_hipzout_parms hipzout;
1699
1700	ehca_mrmw_map_acl(acl, &hipz_acl);
1701	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1702
1703	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1704				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
1705				    &hipzout);
1706	if (h_ret != H_SUCCESS) {
1707		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1708			 "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1709			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
1710			 e_origmr->ipz_mr_handle.handle,
1711			 e_origmr->ib.ib_mr.lkey);
1712		return ehca2ib_return_code(h_ret);
1713	}
1714	/* successful registration */
1715	e_newmr->num_kpages = e_origmr->num_kpages;
1716	e_newmr->num_hwpages = e_origmr->num_hwpages;
1717	e_newmr->hwpage_size = e_origmr->hwpage_size;
1718	e_newmr->start = iova_start;
1719	e_newmr->size = e_origmr->size;
1720	e_newmr->acl = acl;
1721	e_newmr->ipz_mr_handle = hipzout.handle;
1722	*lkey = hipzout.lkey;
1723	*rkey = hipzout.rkey;
1724	return 0;
1725} /* end ehca_reg_maxmr() */
1726
1727/*----------------------------------------------------------------------*/
1728
1729int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1730{
1731	int ret;
1732	struct ehca_mr *e_maxmr;
1733	struct ib_pd *ib_pd;
1734
1735	if (!shca->maxmr) {
1736		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1737		ret = -EINVAL;
1738		goto ehca_dereg_internal_maxmr_exit0;
1739	}
1740
1741	e_maxmr = shca->maxmr;
1742	ib_pd = e_maxmr->ib.ib_mr.pd;
1743	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1744
1745	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1746	if (ret) {
1747		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1748			 "ret=%i e_maxmr=%p shca=%p lkey=%x",
1749			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1750		shca->maxmr = e_maxmr;
1751		goto ehca_dereg_internal_maxmr_exit0;
1752	}
1753
1754	atomic_dec(&ib_pd->usecnt);
1755
1756ehca_dereg_internal_maxmr_exit0:
1757	if (ret)
1758		ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1759			 ret, shca, shca->maxmr);
1760	return ret;
1761} /* end ehca_dereg_internal_maxmr() */
1762
1763/*----------------------------------------------------------------------*/
1764
1765/*
1766 * check physical buffer array of MR verbs for validness and
1767 * calculates MR size
1768 */
1769int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1770				  int num_phys_buf,
1771				  u64 *iova_start,
1772				  u64 *size)
1773{
1774	struct ib_phys_buf *pbuf = phys_buf_array;
1775	u64 size_count = 0;
1776	u32 i;
1777
1778	if (num_phys_buf == 0) {
1779		ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1780		return -EINVAL;
1781	}
1782	/* check first buffer */
1783	if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1784		ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1785			     "pbuf->addr=%llx pbuf->size=%llx",
1786			     iova_start, pbuf->addr, pbuf->size);
1787		return -EINVAL;
1788	}
1789	if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1790	    (num_phys_buf > 1)) {
1791		ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1792			     "pbuf->size=%llx", pbuf->addr, pbuf->size);
1793		return -EINVAL;
1794	}
1795
1796	for (i = 0; i < num_phys_buf; i++) {
1797		if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1798			ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1799				     "pbuf->size=%llx",
1800				     i, pbuf->addr, pbuf->size);
1801			return -EINVAL;
1802		}
1803		if (((i > 0) &&	/* not 1st */
1804		     (i < (num_phys_buf - 1)) &&	/* not last */
1805		     (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1806			ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1807				     i, pbuf->size);
1808			return -EINVAL;
1809		}
1810		size_count += pbuf->size;
1811		pbuf++;
1812	}
1813
1814	*size = size_count;
1815	return 0;
1816} /* end ehca_mr_chk_buf_and_calc_size() */
1817
1818/*----------------------------------------------------------------------*/
1819
1820/* check page list of map FMR verb for validness */
1821int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1822			     u64 *page_list,
1823			     int list_len)
1824{
1825	u32 i;
1826	u64 *page;
1827
1828	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1829		ehca_gen_err("bad list_len, list_len=%x "
1830			     "e_fmr->fmr_max_pages=%x fmr=%p",
1831			     list_len, e_fmr->fmr_max_pages, e_fmr);
1832		return -EINVAL;
1833	}
1834
1835	/* each page must be aligned */
1836	page = page_list;
1837	for (i = 0; i < list_len; i++) {
1838		if (*page % e_fmr->fmr_page_size) {
1839			ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
1840				     "fmr_page_size=%x", i, *page, page, e_fmr,
1841				     e_fmr->fmr_page_size);
1842			return -EINVAL;
1843		}
1844		page++;
1845	}
1846
1847	return 0;
1848} /* end ehca_fmr_check_page_list() */
1849
1850/*----------------------------------------------------------------------*/
1851
1852/* PAGE_SIZE >= pginfo->hwpage_size */
1853static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1854				  u32 number,
1855				  u64 *kpage)
1856{
1857	int ret = 0;
1858	u64 pgaddr;
1859	u32 j = 0;
1860	int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
1861	struct scatterlist **sg = &pginfo->u.usr.next_sg;
1862
1863	while (*sg != NULL) {
1864		pgaddr = page_to_pfn(sg_page(*sg))
1865			<< PAGE_SHIFT;
1866		*kpage = pgaddr + (pginfo->next_hwpage *
1867				   pginfo->hwpage_size);
1868		if (!(*kpage)) {
1869			ehca_gen_err("pgaddr=%llx "
1870				     "sg_dma_address=%llx "
1871				     "entry=%llx next_hwpage=%llx",
1872				     pgaddr, (u64)sg_dma_address(*sg),
1873				     pginfo->u.usr.next_nmap,
1874				     pginfo->next_hwpage);
1875			return -EFAULT;
1876		}
1877		(pginfo->hwpage_cnt)++;
1878		(pginfo->next_hwpage)++;
1879		kpage++;
1880		if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
1881			(pginfo->kpage_cnt)++;
1882			(pginfo->u.usr.next_nmap)++;
1883			pginfo->next_hwpage = 0;
1884			*sg = sg_next(*sg);
1885		}
1886		j++;
1887		if (j >= number)
1888			break;
1889	}
1890
1891	return ret;
1892}
1893
1894/*
1895 * check given pages for contiguous layout
1896 * last page addr is returned in prev_pgaddr for further check
1897 */
1898static int ehca_check_kpages_per_ate(struct scatterlist **sg,
1899				     int num_pages,
1900				     u64 *prev_pgaddr)
1901{
1902	for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
1903		u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
1904		if (ehca_debug_level >= 3)
1905			ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1906				     *(u64 *)__va(pgaddr));
1907		if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1908			ehca_gen_err("uncontiguous page found pgaddr=%llx "
1909				     "prev_pgaddr=%llx entries_left_in_hwpage=%x",
1910				     pgaddr, *prev_pgaddr, num_pages);
1911			return -EINVAL;
1912		}
1913		*prev_pgaddr = pgaddr;
1914	}
1915	return 0;
1916}
1917
1918/* PAGE_SIZE < pginfo->hwpage_size */
1919static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1920				  u32 number,
1921				  u64 *kpage)
1922{
1923	int ret = 0;
1924	u64 pgaddr, prev_pgaddr;
1925	u32 j = 0;
1926	int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
1927	int nr_kpages = kpages_per_hwpage;
1928	struct scatterlist **sg = &pginfo->u.usr.next_sg;
1929
1930	while (*sg != NULL) {
1931
1932		if (nr_kpages == kpages_per_hwpage) {
1933			pgaddr = (page_to_pfn(sg_page(*sg))
1934				   << PAGE_SHIFT);
1935			*kpage = pgaddr;
1936			if (!(*kpage)) {
1937				ehca_gen_err("pgaddr=%llx entry=%llx",
1938					     pgaddr, pginfo->u.usr.next_nmap);
1939				ret = -EFAULT;
1940				return ret;
1941			}
1942			/*
1943			 * The first page in a hwpage must be aligned;
1944			 * the first MR page is exempt from this rule.
1945			 */
1946			if (pgaddr & (pginfo->hwpage_size - 1)) {
1947				if (pginfo->hwpage_cnt) {
1948					ehca_gen_err(
1949						"invalid alignment "
1950						"pgaddr=%llx entry=%llx "
1951						"mr_pgsize=%llx",
1952						pgaddr, pginfo->u.usr.next_nmap,
1953						pginfo->hwpage_size);
1954					ret = -EFAULT;
1955					return ret;
1956				}
1957				/* first MR page */
1958				pginfo->kpage_cnt =
1959					(pgaddr &
1960					 (pginfo->hwpage_size - 1)) >>
1961					PAGE_SHIFT;
1962				nr_kpages -= pginfo->kpage_cnt;
1963				*kpage = pgaddr &
1964					 ~(pginfo->hwpage_size - 1);
1965			}
1966			if (ehca_debug_level >= 3) {
1967				u64 val = *(u64 *)__va(pgaddr);
1968				ehca_gen_dbg("kpage=%llx page=%llx "
1969					     "value=%016llx",
1970					     *kpage, pgaddr, val);
1971			}
1972			prev_pgaddr = pgaddr;
1973			*sg = sg_next(*sg);
1974			pginfo->kpage_cnt++;
1975			pginfo->u.usr.next_nmap++;
1976			nr_kpages--;
1977			if (!nr_kpages)
1978				goto next_kpage;
1979			continue;
1980		}
1981
1982		ret = ehca_check_kpages_per_ate(sg, nr_kpages,
1983						&prev_pgaddr);
1984		if (ret)
1985			return ret;
1986		pginfo->kpage_cnt += nr_kpages;
1987		pginfo->u.usr.next_nmap += nr_kpages;
1988
1989next_kpage:
1990		nr_kpages = kpages_per_hwpage;
1991		(pginfo->hwpage_cnt)++;
1992		kpage++;
1993		j++;
1994		if (j >= number)
1995			break;
1996	}
1997
1998	return ret;
1999}
2000
2001static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2002				 u32 number, u64 *kpage)
2003{
2004	int ret = 0;
2005	struct ib_phys_buf *pbuf;
2006	u64 num_hw, offs_hw;
2007	u32 i = 0;
2008
2009	/* loop over desired phys_buf_array entries */
2010	while (i < number) {
2011		pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
2012		num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
2013				     pbuf->size, pginfo->hwpage_size);
2014		offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
2015			pginfo->hwpage_size;
2016		while (pginfo->next_hwpage < offs_hw + num_hw) {
2017			/* sanity check */
2018			if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2019			    (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2020				ehca_gen_err("kpage_cnt >= num_kpages, "
2021					     "kpage_cnt=%llx num_kpages=%llx "
2022					     "hwpage_cnt=%llx "
2023					     "num_hwpages=%llx i=%x",
2024					     pginfo->kpage_cnt,
2025					     pginfo->num_kpages,
2026					     pginfo->hwpage_cnt,
2027					     pginfo->num_hwpages, i);
2028				return -EFAULT;
2029			}
2030			*kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2031				 (pginfo->next_hwpage * pginfo->hwpage_size);
2032			if ( !(*kpage) && pbuf->addr ) {
2033				ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2034					     "next_hwpage=%llx", pbuf->addr,
2035					     pbuf->size, pginfo->next_hwpage);
2036				return -EFAULT;
2037			}
2038			(pginfo->hwpage_cnt)++;
2039			(pginfo->next_hwpage)++;
2040			if (PAGE_SIZE >= pginfo->hwpage_size) {
2041				if (pginfo->next_hwpage %
2042				    (PAGE_SIZE / pginfo->hwpage_size) == 0)
2043					(pginfo->kpage_cnt)++;
2044			} else
2045				pginfo->kpage_cnt += pginfo->hwpage_size /
2046					PAGE_SIZE;
2047			kpage++;
2048			i++;
2049			if (i >= number) break;
2050		}
2051		if (pginfo->next_hwpage >= offs_hw + num_hw) {
2052			(pginfo->u.phy.next_buf)++;
2053			pginfo->next_hwpage = 0;
2054		}
2055	}
2056	return ret;
2057}
2058
2059static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2060				u32 number, u64 *kpage)
2061{
2062	int ret = 0;
2063	u64 *fmrlist;
2064	u32 i;
2065
2066	/* loop over desired page_list entries */
2067	fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2068	for (i = 0; i < number; i++) {
2069		*kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
2070			   pginfo->next_hwpage * pginfo->hwpage_size;
2071		if ( !(*kpage) ) {
2072			ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2073				     "next_listelem=%llx next_hwpage=%llx",
2074				     *fmrlist, fmrlist,
2075				     pginfo->u.fmr.next_listelem,
2076				     pginfo->next_hwpage);
2077			return -EFAULT;
2078		}
2079		(pginfo->hwpage_cnt)++;
2080		if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
2081			if (pginfo->next_hwpage %
2082			    (pginfo->u.fmr.fmr_pgsize /
2083			     pginfo->hwpage_size) == 0) {
2084				(pginfo->kpage_cnt)++;
2085				(pginfo->u.fmr.next_listelem)++;
2086				fmrlist++;
2087				pginfo->next_hwpage = 0;
2088			} else
2089				(pginfo->next_hwpage)++;
2090		} else {
2091			unsigned int cnt_per_hwpage = pginfo->hwpage_size /
2092				pginfo->u.fmr.fmr_pgsize;
2093			unsigned int j;
2094			u64 prev = *kpage;
2095			/* check if adrs are contiguous */
2096			for (j = 1; j < cnt_per_hwpage; j++) {
2097				u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
2098				if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2099					ehca_gen_err("uncontiguous fmr pages "
2100						     "found prev=%llx p=%llx "
2101						     "idx=%x", prev, p, i + j);
2102					return -EINVAL;
2103				}
2104				prev = p;
2105			}
2106			pginfo->kpage_cnt += cnt_per_hwpage;
2107			pginfo->u.fmr.next_listelem += cnt_per_hwpage;
2108			fmrlist += cnt_per_hwpage;
2109		}
2110		kpage++;
2111	}
2112	return ret;
2113}
2114
2115/* setup page buffer from page info */
2116int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
2117		     u32 number,
2118		     u64 *kpage)
2119{
2120	int ret;
2121
2122	switch (pginfo->type) {
2123	case EHCA_MR_PGI_PHYS:
2124		ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
2125		break;
2126	case EHCA_MR_PGI_USER:
2127		ret = PAGE_SIZE >= pginfo->hwpage_size ?
2128			ehca_set_pagebuf_user1(pginfo, number, kpage) :
2129			ehca_set_pagebuf_user2(pginfo, number, kpage);
2130		break;
2131	case EHCA_MR_PGI_FMR:
2132		ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
2133		break;
2134	default:
2135		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
2136		ret = -EFAULT;
2137		break;
2138	}
2139	return ret;
2140} /* end ehca_set_pagebuf() */
2141
2142/*----------------------------------------------------------------------*/
2143
2144/*
2145 * check MR if it is a max-MR, i.e. uses whole memory
2146 * in case it's a max-MR 1 is returned, else 0
2147 */
2148int ehca_mr_is_maxmr(u64 size,
2149		     u64 *iova_start)
2150{
2151	/* a MR is treated as max-MR only if it fits following: */
2152	if ((size == ehca_mr_len) &&
2153	    (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
2154		ehca_gen_dbg("this is a max-MR");
2155		return 1;
2156	} else
2157		return 0;
2158} /* end ehca_mr_is_maxmr() */
2159
2160/*----------------------------------------------------------------------*/
2161
2162/* map access control for MR/MW. This routine is used for MR and MW. */
2163void ehca_mrmw_map_acl(int ib_acl,
2164		       u32 *hipz_acl)
2165{
2166	*hipz_acl = 0;
2167	if (ib_acl & IB_ACCESS_REMOTE_READ)
2168		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
2169	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
2170		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
2171	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
2172		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2173	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2174		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2175	if (ib_acl & IB_ACCESS_MW_BIND)
2176		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2177} /* end ehca_mrmw_map_acl() */
2178
2179/*----------------------------------------------------------------------*/
2180
2181/* sets page size in hipz access control for MR/MW. */
2182void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
2183{
2184	*hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
2185} /* end ehca_mrmw_set_pgsize_hipz_acl() */
2186
2187/*----------------------------------------------------------------------*/
2188
2189/*
2190 * reverse map access control for MR/MW.
2191 * This routine is used for MR and MW.
2192 */
2193void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2194			       int *ib_acl) /*OUT*/
2195{
2196	*ib_acl = 0;
2197	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2198		*ib_acl |= IB_ACCESS_REMOTE_READ;
2199	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2200		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
2201	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2202		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2203	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2204		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
2205	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2206		*ib_acl |= IB_ACCESS_MW_BIND;
2207} /* end ehca_mrmw_reverse_map_acl() */
2208
2209
2210/*----------------------------------------------------------------------*/
2211
2212/*
2213 * MR destructor and constructor
2214 * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2215 * except struct ib_mr and spinlock
2216 */
2217void ehca_mr_deletenew(struct ehca_mr *mr)
2218{
2219	mr->flags = 0;
2220	mr->num_kpages = 0;
2221	mr->num_hwpages = 0;
2222	mr->acl = 0;
2223	mr->start = NULL;
2224	mr->fmr_page_size = 0;
2225	mr->fmr_max_pages = 0;
2226	mr->fmr_max_maps = 0;
2227	mr->fmr_map_cnt = 0;
2228	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2229	memset(&mr->galpas, 0, sizeof(mr->galpas));
2230} /* end ehca_mr_deletenew() */
2231
2232int ehca_init_mrmw_cache(void)
2233{
2234	mr_cache = kmem_cache_create("ehca_cache_mr",
2235				     sizeof(struct ehca_mr), 0,
2236				     SLAB_HWCACHE_ALIGN,
2237				     NULL);
2238	if (!mr_cache)
2239		return -ENOMEM;
2240	mw_cache = kmem_cache_create("ehca_cache_mw",
2241				     sizeof(struct ehca_mw), 0,
2242				     SLAB_HWCACHE_ALIGN,
2243				     NULL);
2244	if (!mw_cache) {
2245		kmem_cache_destroy(mr_cache);
2246		mr_cache = NULL;
2247		return -ENOMEM;
2248	}
2249	return 0;
2250}
2251
2252void ehca_cleanup_mrmw_cache(void)
2253{
2254	if (mr_cache)
2255		kmem_cache_destroy(mr_cache);
2256	if (mw_cache)
2257		kmem_cache_destroy(mw_cache);
2258}
2259
2260static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
2261				     int dir)
2262{
2263	if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
2264		ehca_top_bmap->dir[dir] =
2265			kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
2266		if (!ehca_top_bmap->dir[dir])
2267			return -ENOMEM;
2268		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2269		memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
2270	}
2271	return 0;
2272}
2273
2274static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
2275{
2276	if (!ehca_bmap_valid(ehca_bmap->top[top])) {
2277		ehca_bmap->top[top] =
2278			kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
2279		if (!ehca_bmap->top[top])
2280			return -ENOMEM;
2281		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2282		memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
2283	}
2284	return ehca_init_top_bmap(ehca_bmap->top[top], dir);
2285}
2286
2287static inline int ehca_calc_index(unsigned long i, unsigned long s)
2288{
2289	return (i >> s) & EHCA_INDEX_MASK;
2290}
2291
2292void ehca_destroy_busmap(void)
2293{
2294	int top, dir;
2295
2296	if (!ehca_bmap)
2297		return;
2298
2299	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2300		if (!ehca_bmap_valid(ehca_bmap->top[top]))
2301			continue;
2302		for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
2303			if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2304				continue;
2305
2306			kfree(ehca_bmap->top[top]->dir[dir]);
2307		}
2308
2309		kfree(ehca_bmap->top[top]);
2310	}
2311
2312	kfree(ehca_bmap);
2313	ehca_bmap = NULL;
2314}
2315
2316static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2317{
2318	unsigned long i, start_section, end_section;
2319	int top, dir, idx;
2320
2321	if (!nr_pages)
2322		return 0;
2323
2324	if (!ehca_bmap) {
2325		ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
2326		if (!ehca_bmap)
2327			return -ENOMEM;
2328		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2329		memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2330	}
2331
2332	start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2333	end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2334	for (i = start_section; i < end_section; i++) {
2335		int ret;
2336		top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
2337		dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
2338		idx = i & EHCA_INDEX_MASK;
2339
2340		ret = ehca_init_bmap(ehca_bmap, top, dir);
2341		if (ret) {
2342			ehca_destroy_busmap();
2343			return ret;
2344		}
2345		ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
2346		ehca_mr_len += EHCA_SECTSIZE;
2347	}
2348	return 0;
2349}
2350
2351static int ehca_is_hugepage(unsigned long pfn)
2352{
2353	int page_order;
2354
2355	if (pfn & EHCA_HUGEPAGE_PFN_MASK)
2356		return 0;
2357
2358	page_order = compound_order(pfn_to_page(pfn));
2359	if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
2360		return 0;
2361
2362	return 1;
2363}
2364
2365static int ehca_create_busmap_callback(unsigned long initial_pfn,
2366				       unsigned long total_nr_pages, void *arg)
2367{
2368	int ret;
2369	unsigned long pfn, start_pfn, end_pfn, nr_pages;
2370
2371	if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
2372		return ehca_update_busmap(initial_pfn, total_nr_pages);
2373
2374	/* Given chunk is >= 16GB -> check for hugepages */
2375	start_pfn = initial_pfn;
2376	end_pfn = initial_pfn + total_nr_pages;
2377	pfn = start_pfn;
2378
2379	while (pfn < end_pfn) {
2380		if (ehca_is_hugepage(pfn)) {
2381			/* Add mem found in front of the hugepage */
2382			nr_pages = pfn - start_pfn;
2383			ret = ehca_update_busmap(start_pfn, nr_pages);
2384			if (ret)
2385				return ret;
2386			/* Skip the hugepage */
2387			pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
2388			start_pfn = pfn;
2389		} else
2390			pfn += (EHCA_SECTSIZE / PAGE_SIZE);
2391	}
2392
2393	/* Add mem found behind the hugepage(s)  */
2394	nr_pages = pfn - start_pfn;
2395	return ehca_update_busmap(start_pfn, nr_pages);
2396}
2397
2398int ehca_create_busmap(void)
2399{
2400	int ret;
2401
2402	ehca_mr_len = 0;
2403	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
2404				   ehca_create_busmap_callback);
2405	return ret;
2406}
2407
2408static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
2409				   struct ehca_mr *e_mr,
2410				   struct ehca_mr_pginfo *pginfo)
2411{
2412	int top;
2413	u64 hret, *kpage;
2414
2415	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
2416	if (!kpage) {
2417		ehca_err(&shca->ib_device, "kpage alloc failed");
2418		return -ENOMEM;
2419	}
2420	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2421		if (!ehca_bmap_valid(ehca_bmap->top[top]))
2422			continue;
2423		hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
2424		if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
2425			break;
2426	}
2427
2428	ehca_free_fw_ctrlblock(kpage);
2429
2430	if (hret == H_SUCCESS)
2431		return 0; /* Everything is fine */
2432	else {
2433		ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
2434				 "h_ret=%lli e_mr=%p top=%x lkey=%x "
2435				 "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
2436				 e_mr->ib.ib_mr.lkey,
2437				 shca->ipz_hca_handle.handle,
2438				 e_mr->ipz_mr_handle.handle);
2439		return ehca2ib_return_code(hret);
2440	}
2441}
2442
2443static u64 ehca_map_vaddr(void *caddr)
2444{
2445	int top, dir, idx;
2446	unsigned long abs_addr, offset;
2447	u64 entry;
2448
2449	if (!ehca_bmap)
2450		return EHCA_INVAL_ADDR;
2451
2452	abs_addr = __pa(caddr);
2453	top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2454	if (!ehca_bmap_valid(ehca_bmap->top[top]))
2455		return EHCA_INVAL_ADDR;
2456
2457	dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
2458	if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2459		return EHCA_INVAL_ADDR;
2460
2461	idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
2462
2463	entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
2464	if (ehca_bmap_valid(entry)) {
2465		offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
2466		return entry | offset;
2467	} else
2468		return EHCA_INVAL_ADDR;
2469}
2470
2471static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
2472{
2473	return dma_addr == EHCA_INVAL_ADDR;
2474}
2475
2476static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
2477			       size_t size, enum dma_data_direction direction)
2478{
2479	if (cpu_addr)
2480		return ehca_map_vaddr(cpu_addr);
2481	else
2482		return EHCA_INVAL_ADDR;
2483}
2484
2485static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
2486				  enum dma_data_direction direction)
2487{
2488	/* This is only a stub; nothing to be done here */
2489}
2490
2491static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
2492			     unsigned long offset, size_t size,
2493			     enum dma_data_direction direction)
2494{
2495	u64 addr;
2496
2497	if (offset + size > PAGE_SIZE)
2498		return EHCA_INVAL_ADDR;
2499
2500	addr = ehca_map_vaddr(page_address(page));
2501	if (!ehca_dma_mapping_error(dev, addr))
2502		addr += offset;
2503
2504	return addr;
2505}
2506
2507static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
2508				enum dma_data_direction direction)
2509{
2510	/* This is only a stub; nothing to be done here */
2511}
2512
2513static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
2514			   int nents, enum dma_data_direction direction)
2515{
2516	struct scatterlist *sg;
2517	int i;
2518
2519	for_each_sg(sgl, sg, nents, i) {
2520		u64 addr;
2521		addr = ehca_map_vaddr(sg_virt(sg));
2522		if (ehca_dma_mapping_error(dev, addr))
2523			return 0;
2524
2525		sg->dma_address = addr;
2526		sg->dma_length = sg->length;
2527	}
2528	return nents;
2529}
2530
2531static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
2532			      int nents, enum dma_data_direction direction)
2533{
2534	/* This is only a stub; nothing to be done here */
2535}
2536
2537static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
2538					 size_t size,
2539					 enum dma_data_direction dir)
2540{
2541	dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
2542}
2543
2544static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
2545					    size_t size,
2546					    enum dma_data_direction dir)
2547{
2548	dma_sync_single_for_device(dev->dma_device, addr, size, dir);
2549}
2550
2551static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
2552				     u64 *dma_handle, gfp_t flag)
2553{
2554	struct page *p;
2555	void *addr = NULL;
2556	u64 dma_addr;
2557
2558	p = alloc_pages(flag, get_order(size));
2559	if (p) {
2560		addr = page_address(p);
2561		dma_addr = ehca_map_vaddr(addr);
2562		if (ehca_dma_mapping_error(dev, dma_addr)) {
2563			free_pages((unsigned long)addr,	get_order(size));
2564			return NULL;
2565		}
2566		if (dma_handle)
2567			*dma_handle = dma_addr;
2568		return addr;
2569	}
2570	return NULL;
2571}
2572
2573static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
2574				   void *cpu_addr, u64 dma_handle)
2575{
2576	if (cpu_addr && size)
2577		free_pages((unsigned long)cpu_addr, get_order(size));
2578}
2579
2580
2581struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
2582	.mapping_error          = ehca_dma_mapping_error,
2583	.map_single             = ehca_dma_map_single,
2584	.unmap_single           = ehca_dma_unmap_single,
2585	.map_page               = ehca_dma_map_page,
2586	.unmap_page             = ehca_dma_unmap_page,
2587	.map_sg                 = ehca_dma_map_sg,
2588	.unmap_sg               = ehca_dma_unmap_sg,
2589	.sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
2590	.sync_single_for_device = ehca_dma_sync_single_for_device,
2591	.alloc_coherent         = ehca_dma_alloc_coherent,
2592	.free_coherent          = ehca_dma_free_coherent,
2593};
2594