1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for          *
3 * RoCE (RDMA over Converged Ethernet) adapters.                   *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
5 * EMULEX and SLI are trademarks of Emulex.                        *
6 * www.emulex.com                                                  *
7 *                                                                 *
8 * This program is free software; you can redistribute it and/or   *
9 * modify it under the terms of version 2 of the GNU General       *
10 * Public License as published by the Free Software Foundation.    *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
17 * more details, a copy of which can be found in the file COPYING  *
18 * included with this package.                                     *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#include <linux/dma-mapping.h>
29#include <rdma/ib_verbs.h>
30#include <rdma/ib_user_verbs.h>
31#include <rdma/iw_cm.h>
32#include <rdma/ib_umem.h>
33#include <rdma/ib_addr.h>
34
35#include "ocrdma.h"
36#include "ocrdma_hw.h"
37#include "ocrdma_verbs.h"
38#include "ocrdma_abi.h"
39
40int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
41{
42	if (index > 1)
43		return -EINVAL;
44
45	*pkey = 0xffff;
46	return 0;
47}
48
49int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
50		     int index, union ib_gid *sgid)
51{
52	struct ocrdma_dev *dev;
53
54	dev = get_ocrdma_dev(ibdev);
55	memset(sgid, 0, sizeof(*sgid));
56	if (index >= OCRDMA_MAX_SGID)
57		return -EINVAL;
58
59	memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
60
61	return 0;
62}
63
64int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
65{
66	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
67
68	memset(attr, 0, sizeof *attr);
69	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
70	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
71	ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
72	attr->max_mr_size = dev->attr.max_mr_size;
73	attr->page_size_cap = 0xffff000;
74	attr->vendor_id = dev->nic_info.pdev->vendor;
75	attr->vendor_part_id = dev->nic_info.pdev->device;
76	attr->hw_ver = dev->asic_id;
77	attr->max_qp = dev->attr.max_qp;
78	attr->max_ah = OCRDMA_MAX_AH;
79	attr->max_qp_wr = dev->attr.max_wqe;
80
81	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
82					IB_DEVICE_RC_RNR_NAK_GEN |
83					IB_DEVICE_SHUTDOWN_PORT |
84					IB_DEVICE_SYS_IMAGE_GUID |
85					IB_DEVICE_LOCAL_DMA_LKEY |
86					IB_DEVICE_MEM_MGT_EXTENSIONS;
87	attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
88	attr->max_sge_rd = 0;
89	attr->max_cq = dev->attr.max_cq;
90	attr->max_cqe = dev->attr.max_cqe;
91	attr->max_mr = dev->attr.max_mr;
92	attr->max_mw = dev->attr.max_mw;
93	attr->max_pd = dev->attr.max_pd;
94	attr->atomic_cap = 0;
95	attr->max_fmr = 0;
96	attr->max_map_per_fmr = 0;
97	attr->max_qp_rd_atom =
98	    min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
99	attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
100	attr->max_srq = dev->attr.max_srq;
101	attr->max_srq_sge = dev->attr.max_srq_sge;
102	attr->max_srq_wr = dev->attr.max_rqe;
103	attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
104	attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
105	attr->max_pkeys = 1;
106	return 0;
107}
108
109static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
110					    u8 *ib_speed, u8 *ib_width)
111{
112	int status;
113	u8 speed;
114
115	status = ocrdma_mbx_get_link_speed(dev, &speed);
116	if (status)
117		speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
118
119	switch (speed) {
120	case OCRDMA_PHYS_LINK_SPEED_1GBPS:
121		*ib_speed = IB_SPEED_SDR;
122		*ib_width = IB_WIDTH_1X;
123		break;
124
125	case OCRDMA_PHYS_LINK_SPEED_10GBPS:
126		*ib_speed = IB_SPEED_QDR;
127		*ib_width = IB_WIDTH_1X;
128		break;
129
130	case OCRDMA_PHYS_LINK_SPEED_20GBPS:
131		*ib_speed = IB_SPEED_DDR;
132		*ib_width = IB_WIDTH_4X;
133		break;
134
135	case OCRDMA_PHYS_LINK_SPEED_40GBPS:
136		*ib_speed = IB_SPEED_QDR;
137		*ib_width = IB_WIDTH_4X;
138		break;
139
140	default:
141		/* Unsupported */
142		*ib_speed = IB_SPEED_SDR;
143		*ib_width = IB_WIDTH_1X;
144	}
145}
146
147int ocrdma_query_port(struct ib_device *ibdev,
148		      u8 port, struct ib_port_attr *props)
149{
150	enum ib_port_state port_state;
151	struct ocrdma_dev *dev;
152	struct net_device *netdev;
153
154	dev = get_ocrdma_dev(ibdev);
155	if (port > 1) {
156		pr_err("%s(%d) invalid_port=0x%x\n", __func__,
157		       dev->id, port);
158		return -EINVAL;
159	}
160	netdev = dev->nic_info.netdev;
161	if (netif_running(netdev) && netif_oper_up(netdev)) {
162		port_state = IB_PORT_ACTIVE;
163		props->phys_state = 5;
164	} else {
165		port_state = IB_PORT_DOWN;
166		props->phys_state = 3;
167	}
168	props->max_mtu = IB_MTU_4096;
169	props->active_mtu = iboe_get_mtu(netdev->mtu);
170	props->lid = 0;
171	props->lmc = 0;
172	props->sm_lid = 0;
173	props->sm_sl = 0;
174	props->state = port_state;
175	props->port_cap_flags =
176	    IB_PORT_CM_SUP |
177	    IB_PORT_REINIT_SUP |
178	    IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS;
179	props->gid_tbl_len = OCRDMA_MAX_SGID;
180	props->pkey_tbl_len = 1;
181	props->bad_pkey_cntr = 0;
182	props->qkey_viol_cntr = 0;
183	get_link_speed_and_width(dev, &props->active_speed,
184				 &props->active_width);
185	props->max_msg_sz = 0x80000000;
186	props->max_vl_num = 4;
187	return 0;
188}
189
190int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
191		       struct ib_port_modify *props)
192{
193	struct ocrdma_dev *dev;
194
195	dev = get_ocrdma_dev(ibdev);
196	if (port > 1) {
197		pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
198		return -EINVAL;
199	}
200	return 0;
201}
202
203static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
204			   unsigned long len)
205{
206	struct ocrdma_mm *mm;
207
208	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
209	if (mm == NULL)
210		return -ENOMEM;
211	mm->key.phy_addr = phy_addr;
212	mm->key.len = len;
213	INIT_LIST_HEAD(&mm->entry);
214
215	mutex_lock(&uctx->mm_list_lock);
216	list_add_tail(&mm->entry, &uctx->mm_head);
217	mutex_unlock(&uctx->mm_list_lock);
218	return 0;
219}
220
221static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
222			    unsigned long len)
223{
224	struct ocrdma_mm *mm, *tmp;
225
226	mutex_lock(&uctx->mm_list_lock);
227	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
228		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
229			continue;
230
231		list_del(&mm->entry);
232		kfree(mm);
233		break;
234	}
235	mutex_unlock(&uctx->mm_list_lock);
236}
237
238static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
239			      unsigned long len)
240{
241	bool found = false;
242	struct ocrdma_mm *mm;
243
244	mutex_lock(&uctx->mm_list_lock);
245	list_for_each_entry(mm, &uctx->mm_head, entry) {
246		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
247			continue;
248
249		found = true;
250		break;
251	}
252	mutex_unlock(&uctx->mm_list_lock);
253	return found;
254}
255
256
257static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
258{
259	u16 pd_bitmap_idx = 0;
260	const unsigned long *pd_bitmap;
261
262	if (dpp_pool) {
263		pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
264		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
265						    dev->pd_mgr->max_dpp_pd);
266		__set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap);
267		dev->pd_mgr->pd_dpp_count++;
268		if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
269			dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
270	} else {
271		pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
272		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
273						    dev->pd_mgr->max_normal_pd);
274		__set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap);
275		dev->pd_mgr->pd_norm_count++;
276		if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
277			dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
278	}
279	return pd_bitmap_idx;
280}
281
282static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
283					bool dpp_pool)
284{
285	u16 pd_count;
286	u16 pd_bit_index;
287
288	pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count :
289			      dev->pd_mgr->pd_norm_count;
290	if (pd_count == 0)
291		return -EINVAL;
292
293	if (dpp_pool) {
294		pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start;
295		if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) {
296			return -EINVAL;
297		} else {
298			__clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap);
299			dev->pd_mgr->pd_dpp_count--;
300		}
301	} else {
302		pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start;
303		if (pd_bit_index >= dev->pd_mgr->max_normal_pd) {
304			return -EINVAL;
305		} else {
306			__clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap);
307			dev->pd_mgr->pd_norm_count--;
308		}
309	}
310
311	return 0;
312}
313
314static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
315				   bool dpp_pool)
316{
317	int status;
318
319	mutex_lock(&dev->dev_lock);
320	status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool);
321	mutex_unlock(&dev->dev_lock);
322	return status;
323}
324
325static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
326{
327	u16 pd_idx = 0;
328	int status = 0;
329
330	mutex_lock(&dev->dev_lock);
331	if (pd->dpp_enabled) {
332		/* try allocating DPP PD, if not available then normal PD */
333		if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) {
334			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true);
335			pd->id = dev->pd_mgr->pd_dpp_start + pd_idx;
336			pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx;
337		} else if (dev->pd_mgr->pd_norm_count <
338			   dev->pd_mgr->max_normal_pd) {
339			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
340			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
341			pd->dpp_enabled = false;
342		} else {
343			status = -EINVAL;
344		}
345	} else {
346		if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) {
347			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
348			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
349		} else {
350			status = -EINVAL;
351		}
352	}
353	mutex_unlock(&dev->dev_lock);
354	return status;
355}
356
357static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
358					  struct ocrdma_ucontext *uctx,
359					  struct ib_udata *udata)
360{
361	struct ocrdma_pd *pd = NULL;
362	int status = 0;
363
364	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
365	if (!pd)
366		return ERR_PTR(-ENOMEM);
367
368	if (udata && uctx && dev->attr.max_dpp_pds) {
369		pd->dpp_enabled =
370			ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
371		pd->num_dpp_qp =
372			pd->dpp_enabled ? (dev->nic_info.db_page_size /
373					   dev->attr.wqe_size) : 0;
374	}
375
376	if (dev->pd_mgr->pd_prealloc_valid) {
377		status = ocrdma_get_pd_num(dev, pd);
378		return (status == 0) ? pd : ERR_PTR(status);
379	}
380
381retry:
382	status = ocrdma_mbx_alloc_pd(dev, pd);
383	if (status) {
384		if (pd->dpp_enabled) {
385			pd->dpp_enabled = false;
386			pd->num_dpp_qp = 0;
387			goto retry;
388		} else {
389			kfree(pd);
390			return ERR_PTR(status);
391		}
392	}
393
394	return pd;
395}
396
397static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
398				 struct ocrdma_pd *pd)
399{
400	return (uctx->cntxt_pd == pd ? true : false);
401}
402
403static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
404			      struct ocrdma_pd *pd)
405{
406	int status = 0;
407
408	if (dev->pd_mgr->pd_prealloc_valid)
409		status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
410	else
411		status = ocrdma_mbx_dealloc_pd(dev, pd);
412
413	kfree(pd);
414	return status;
415}
416
417static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
418				    struct ocrdma_ucontext *uctx,
419				    struct ib_udata *udata)
420{
421	int status = 0;
422
423	uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
424	if (IS_ERR(uctx->cntxt_pd)) {
425		status = PTR_ERR(uctx->cntxt_pd);
426		uctx->cntxt_pd = NULL;
427		goto err;
428	}
429
430	uctx->cntxt_pd->uctx = uctx;
431	uctx->cntxt_pd->ibpd.device = &dev->ibdev;
432err:
433	return status;
434}
435
436static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
437{
438	struct ocrdma_pd *pd = uctx->cntxt_pd;
439	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
440
441	if (uctx->pd_in_use) {
442		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
443		       __func__, dev->id, pd->id);
444	}
445	uctx->cntxt_pd = NULL;
446	(void)_ocrdma_dealloc_pd(dev, pd);
447	return 0;
448}
449
450static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
451{
452	struct ocrdma_pd *pd = NULL;
453
454	mutex_lock(&uctx->mm_list_lock);
455	if (!uctx->pd_in_use) {
456		uctx->pd_in_use = true;
457		pd = uctx->cntxt_pd;
458	}
459	mutex_unlock(&uctx->mm_list_lock);
460
461	return pd;
462}
463
464static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
465{
466	mutex_lock(&uctx->mm_list_lock);
467	uctx->pd_in_use = false;
468	mutex_unlock(&uctx->mm_list_lock);
469}
470
471struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
472					  struct ib_udata *udata)
473{
474	int status;
475	struct ocrdma_ucontext *ctx;
476	struct ocrdma_alloc_ucontext_resp resp;
477	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
478	struct pci_dev *pdev = dev->nic_info.pdev;
479	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
480
481	if (!udata)
482		return ERR_PTR(-EFAULT);
483	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
484	if (!ctx)
485		return ERR_PTR(-ENOMEM);
486	INIT_LIST_HEAD(&ctx->mm_head);
487	mutex_init(&ctx->mm_list_lock);
488
489	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
490					    &ctx->ah_tbl.pa, GFP_KERNEL);
491	if (!ctx->ah_tbl.va) {
492		kfree(ctx);
493		return ERR_PTR(-ENOMEM);
494	}
495	memset(ctx->ah_tbl.va, 0, map_len);
496	ctx->ah_tbl.len = map_len;
497
498	memset(&resp, 0, sizeof(resp));
499	resp.ah_tbl_len = ctx->ah_tbl.len;
500	resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
501
502	status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
503	if (status)
504		goto map_err;
505
506	status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
507	if (status)
508		goto pd_err;
509
510	resp.dev_id = dev->id;
511	resp.max_inline_data = dev->attr.max_inline_data;
512	resp.wqe_size = dev->attr.wqe_size;
513	resp.rqe_size = dev->attr.rqe_size;
514	resp.dpp_wqe_size = dev->attr.wqe_size;
515
516	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
517	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
518	if (status)
519		goto cpy_err;
520	return &ctx->ibucontext;
521
522cpy_err:
523pd_err:
524	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
525map_err:
526	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
527			  ctx->ah_tbl.pa);
528	kfree(ctx);
529	return ERR_PTR(status);
530}
531
532int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
533{
534	int status = 0;
535	struct ocrdma_mm *mm, *tmp;
536	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
537	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
538	struct pci_dev *pdev = dev->nic_info.pdev;
539
540	status = ocrdma_dealloc_ucontext_pd(uctx);
541
542	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
543	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
544			  uctx->ah_tbl.pa);
545
546	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
547		list_del(&mm->entry);
548		kfree(mm);
549	}
550	kfree(uctx);
551	return status;
552}
553
554int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
555{
556	struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
557	struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
558	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
559	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
560	unsigned long len = (vma->vm_end - vma->vm_start);
561	int status = 0;
562	bool found;
563
564	if (vma->vm_start & (PAGE_SIZE - 1))
565		return -EINVAL;
566	found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
567	if (!found)
568		return -EINVAL;
569
570	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
571		dev->nic_info.db_total_size)) &&
572		(len <=	dev->nic_info.db_page_size)) {
573		if (vma->vm_flags & VM_READ)
574			return -EPERM;
575
576		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
577		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
578					    len, vma->vm_page_prot);
579	} else if (dev->nic_info.dpp_unmapped_len &&
580		(vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
581		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
582			dev->nic_info.dpp_unmapped_len)) &&
583		(len <= dev->nic_info.dpp_unmapped_len)) {
584		if (vma->vm_flags & VM_READ)
585			return -EPERM;
586
587		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
588		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
589					    len, vma->vm_page_prot);
590	} else {
591		status = remap_pfn_range(vma, vma->vm_start,
592					 vma->vm_pgoff, len, vma->vm_page_prot);
593	}
594	return status;
595}
596
597static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
598				struct ib_ucontext *ib_ctx,
599				struct ib_udata *udata)
600{
601	int status;
602	u64 db_page_addr;
603	u64 dpp_page_addr = 0;
604	u32 db_page_size;
605	struct ocrdma_alloc_pd_uresp rsp;
606	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
607
608	memset(&rsp, 0, sizeof(rsp));
609	rsp.id = pd->id;
610	rsp.dpp_enabled = pd->dpp_enabled;
611	db_page_addr = ocrdma_get_db_addr(dev, pd->id);
612	db_page_size = dev->nic_info.db_page_size;
613
614	status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
615	if (status)
616		return status;
617
618	if (pd->dpp_enabled) {
619		dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
620				(pd->id * PAGE_SIZE);
621		status = ocrdma_add_mmap(uctx, dpp_page_addr,
622				 PAGE_SIZE);
623		if (status)
624			goto dpp_map_err;
625		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
626		rsp.dpp_page_addr_lo = dpp_page_addr;
627	}
628
629	status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
630	if (status)
631		goto ucopy_err;
632
633	pd->uctx = uctx;
634	return 0;
635
636ucopy_err:
637	if (pd->dpp_enabled)
638		ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
639dpp_map_err:
640	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
641	return status;
642}
643
644struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
645			      struct ib_ucontext *context,
646			      struct ib_udata *udata)
647{
648	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
649	struct ocrdma_pd *pd;
650	struct ocrdma_ucontext *uctx = NULL;
651	int status;
652	u8 is_uctx_pd = false;
653
654	if (udata && context) {
655		uctx = get_ocrdma_ucontext(context);
656		pd = ocrdma_get_ucontext_pd(uctx);
657		if (pd) {
658			is_uctx_pd = true;
659			goto pd_mapping;
660		}
661	}
662
663	pd = _ocrdma_alloc_pd(dev, uctx, udata);
664	if (IS_ERR(pd)) {
665		status = PTR_ERR(pd);
666		goto exit;
667	}
668
669pd_mapping:
670	if (udata && context) {
671		status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
672		if (status)
673			goto err;
674	}
675	return &pd->ibpd;
676
677err:
678	if (is_uctx_pd) {
679		ocrdma_release_ucontext_pd(uctx);
680	} else {
681		status = _ocrdma_dealloc_pd(dev, pd);
682	}
683exit:
684	return ERR_PTR(status);
685}
686
687int ocrdma_dealloc_pd(struct ib_pd *ibpd)
688{
689	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
690	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
691	struct ocrdma_ucontext *uctx = NULL;
692	int status = 0;
693	u64 usr_db;
694
695	uctx = pd->uctx;
696	if (uctx) {
697		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
698			(pd->id * PAGE_SIZE);
699		if (pd->dpp_enabled)
700			ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
701		usr_db = ocrdma_get_db_addr(dev, pd->id);
702		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
703
704		if (is_ucontext_pd(uctx, pd)) {
705			ocrdma_release_ucontext_pd(uctx);
706			return status;
707		}
708	}
709	status = _ocrdma_dealloc_pd(dev, pd);
710	return status;
711}
712
713static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
714			    u32 pdid, int acc, u32 num_pbls, u32 addr_check)
715{
716	int status;
717
718	mr->hwmr.fr_mr = 0;
719	mr->hwmr.local_rd = 1;
720	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
721	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
722	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
723	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
724	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
725	mr->hwmr.num_pbls = num_pbls;
726
727	status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
728	if (status)
729		return status;
730
731	mr->ibmr.lkey = mr->hwmr.lkey;
732	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
733		mr->ibmr.rkey = mr->hwmr.lkey;
734	return 0;
735}
736
737struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
738{
739	int status;
740	struct ocrdma_mr *mr;
741	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
742	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
743
744	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
745		pr_err("%s err, invalid access rights\n", __func__);
746		return ERR_PTR(-EINVAL);
747	}
748
749	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
750	if (!mr)
751		return ERR_PTR(-ENOMEM);
752
753	status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
754				   OCRDMA_ADDR_CHECK_DISABLE);
755	if (status) {
756		kfree(mr);
757		return ERR_PTR(status);
758	}
759
760	return &mr->ibmr;
761}
762
763static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
764				   struct ocrdma_hw_mr *mr)
765{
766	struct pci_dev *pdev = dev->nic_info.pdev;
767	int i = 0;
768
769	if (mr->pbl_table) {
770		for (i = 0; i < mr->num_pbls; i++) {
771			if (!mr->pbl_table[i].va)
772				continue;
773			dma_free_coherent(&pdev->dev, mr->pbl_size,
774					  mr->pbl_table[i].va,
775					  mr->pbl_table[i].pa);
776		}
777		kfree(mr->pbl_table);
778		mr->pbl_table = NULL;
779	}
780}
781
782static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
783			      u32 num_pbes)
784{
785	u32 num_pbls = 0;
786	u32 idx = 0;
787	int status = 0;
788	u32 pbl_size;
789
790	do {
791		pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
792		if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
793			status = -EFAULT;
794			break;
795		}
796		num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
797		num_pbls = num_pbls / (pbl_size / sizeof(u64));
798		idx++;
799	} while (num_pbls >= dev->attr.max_num_mr_pbl);
800
801	mr->hwmr.num_pbes = num_pbes;
802	mr->hwmr.num_pbls = num_pbls;
803	mr->hwmr.pbl_size = pbl_size;
804	return status;
805}
806
807static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
808{
809	int status = 0;
810	int i;
811	u32 dma_len = mr->pbl_size;
812	struct pci_dev *pdev = dev->nic_info.pdev;
813	void *va;
814	dma_addr_t pa;
815
816	mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
817				mr->num_pbls, GFP_KERNEL);
818
819	if (!mr->pbl_table)
820		return -ENOMEM;
821
822	for (i = 0; i < mr->num_pbls; i++) {
823		va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
824		if (!va) {
825			ocrdma_free_mr_pbl_tbl(dev, mr);
826			status = -ENOMEM;
827			break;
828		}
829		memset(va, 0, dma_len);
830		mr->pbl_table[i].va = va;
831		mr->pbl_table[i].pa = pa;
832	}
833	return status;
834}
835
836static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
837			    u32 num_pbes)
838{
839	struct ocrdma_pbe *pbe;
840	struct scatterlist *sg;
841	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
842	struct ib_umem *umem = mr->umem;
843	int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
844
845	if (!mr->hwmr.num_pbes)
846		return;
847
848	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
849	pbe_cnt = 0;
850
851	shift = ilog2(umem->page_size);
852
853	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
854		pages = sg_dma_len(sg) >> shift;
855		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
856			/* store the page address in pbe */
857			pbe->pa_lo =
858			    cpu_to_le32(sg_dma_address
859					(sg) +
860					(umem->page_size * pg_cnt));
861			pbe->pa_hi =
862			    cpu_to_le32(upper_32_bits
863					((sg_dma_address
864					  (sg) +
865					  umem->page_size * pg_cnt)));
866			pbe_cnt += 1;
867			total_num_pbes += 1;
868			pbe++;
869
870			/* if done building pbes, issue the mbx cmd. */
871			if (total_num_pbes == num_pbes)
872				return;
873
874			/* if the given pbl is full storing the pbes,
875			 * move to next pbl.
876			 */
877			if (pbe_cnt ==
878				(mr->hwmr.pbl_size / sizeof(u64))) {
879				pbl_tbl++;
880				pbe = (struct ocrdma_pbe *)pbl_tbl->va;
881				pbe_cnt = 0;
882			}
883
884		}
885	}
886}
887
888struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
889				 u64 usr_addr, int acc, struct ib_udata *udata)
890{
891	int status = -ENOMEM;
892	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
893	struct ocrdma_mr *mr;
894	struct ocrdma_pd *pd;
895	u32 num_pbes;
896
897	pd = get_ocrdma_pd(ibpd);
898
899	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
900		return ERR_PTR(-EINVAL);
901
902	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
903	if (!mr)
904		return ERR_PTR(status);
905	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
906	if (IS_ERR(mr->umem)) {
907		status = -EFAULT;
908		goto umem_err;
909	}
910	num_pbes = ib_umem_page_count(mr->umem);
911	status = ocrdma_get_pbl_info(dev, mr, num_pbes);
912	if (status)
913		goto umem_err;
914
915	mr->hwmr.pbe_size = mr->umem->page_size;
916	mr->hwmr.fbo = ib_umem_offset(mr->umem);
917	mr->hwmr.va = usr_addr;
918	mr->hwmr.len = len;
919	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
920	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
921	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
922	mr->hwmr.local_rd = 1;
923	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
924	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
925	if (status)
926		goto umem_err;
927	build_user_pbes(dev, mr, num_pbes);
928	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
929	if (status)
930		goto mbx_err;
931	mr->ibmr.lkey = mr->hwmr.lkey;
932	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
933		mr->ibmr.rkey = mr->hwmr.lkey;
934
935	return &mr->ibmr;
936
937mbx_err:
938	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
939umem_err:
940	kfree(mr);
941	return ERR_PTR(status);
942}
943
944int ocrdma_dereg_mr(struct ib_mr *ib_mr)
945{
946	struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
947	struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
948
949	(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
950
951	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
952
953	/* it could be user registered memory. */
954	if (mr->umem)
955		ib_umem_release(mr->umem);
956	kfree(mr);
957
958	/* Don't stop cleanup, in case FW is unresponsive */
959	if (dev->mqe_ctx.fw_error_state) {
960		pr_err("%s(%d) fw not responding.\n",
961		       __func__, dev->id);
962	}
963	return 0;
964}
965
966static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
967				struct ib_udata *udata,
968				struct ib_ucontext *ib_ctx)
969{
970	int status;
971	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
972	struct ocrdma_create_cq_uresp uresp;
973
974	memset(&uresp, 0, sizeof(uresp));
975	uresp.cq_id = cq->id;
976	uresp.page_size = PAGE_ALIGN(cq->len);
977	uresp.num_pages = 1;
978	uresp.max_hw_cqe = cq->max_hw_cqe;
979	uresp.page_addr[0] = virt_to_phys(cq->va);
980	uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
981	uresp.db_page_size = dev->nic_info.db_page_size;
982	uresp.phase_change = cq->phase_change ? 1 : 0;
983	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
984	if (status) {
985		pr_err("%s(%d) copy error cqid=0x%x.\n",
986		       __func__, dev->id, cq->id);
987		goto err;
988	}
989	status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
990	if (status)
991		goto err;
992	status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
993	if (status) {
994		ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
995		goto err;
996	}
997	cq->ucontext = uctx;
998err:
999	return status;
1000}
1001
1002struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
1003			       struct ib_ucontext *ib_ctx,
1004			       struct ib_udata *udata)
1005{
1006	struct ocrdma_cq *cq;
1007	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
1008	struct ocrdma_ucontext *uctx = NULL;
1009	u16 pd_id = 0;
1010	int status;
1011	struct ocrdma_create_cq_ureq ureq;
1012
1013	if (udata) {
1014		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1015			return ERR_PTR(-EFAULT);
1016	} else
1017		ureq.dpp_cq = 0;
1018	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1019	if (!cq)
1020		return ERR_PTR(-ENOMEM);
1021
1022	spin_lock_init(&cq->cq_lock);
1023	spin_lock_init(&cq->comp_handler_lock);
1024	INIT_LIST_HEAD(&cq->sq_head);
1025	INIT_LIST_HEAD(&cq->rq_head);
1026	cq->first_arm = true;
1027
1028	if (ib_ctx) {
1029		uctx = get_ocrdma_ucontext(ib_ctx);
1030		pd_id = uctx->cntxt_pd->id;
1031	}
1032
1033	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
1034	if (status) {
1035		kfree(cq);
1036		return ERR_PTR(status);
1037	}
1038	if (ib_ctx) {
1039		status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx);
1040		if (status)
1041			goto ctx_err;
1042	}
1043	cq->phase = OCRDMA_CQE_VALID;
1044	dev->cq_tbl[cq->id] = cq;
1045	return &cq->ibcq;
1046
1047ctx_err:
1048	ocrdma_mbx_destroy_cq(dev, cq);
1049	kfree(cq);
1050	return ERR_PTR(status);
1051}
1052
1053int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
1054		     struct ib_udata *udata)
1055{
1056	int status = 0;
1057	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1058
1059	if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
1060		status = -EINVAL;
1061		return status;
1062	}
1063	ibcq->cqe = new_cnt;
1064	return status;
1065}
1066
1067static void ocrdma_flush_cq(struct ocrdma_cq *cq)
1068{
1069	int cqe_cnt;
1070	int valid_count = 0;
1071	unsigned long flags;
1072
1073	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
1074	struct ocrdma_cqe *cqe = NULL;
1075
1076	cqe = cq->va;
1077	cqe_cnt = cq->cqe_cnt;
1078
1079	/* Last irq might have scheduled a polling thread
1080	 * sync-up with it before hard flushing.
1081	 */
1082	spin_lock_irqsave(&cq->cq_lock, flags);
1083	while (cqe_cnt) {
1084		if (is_cqe_valid(cq, cqe))
1085			valid_count++;
1086		cqe++;
1087		cqe_cnt--;
1088	}
1089	ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
1090	spin_unlock_irqrestore(&cq->cq_lock, flags);
1091}
1092
1093int ocrdma_destroy_cq(struct ib_cq *ibcq)
1094{
1095	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1096	struct ocrdma_eq *eq = NULL;
1097	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
1098	int pdid = 0;
1099	u32 irq, indx;
1100
1101	dev->cq_tbl[cq->id] = NULL;
1102	indx = ocrdma_get_eq_table_index(dev, cq->eqn);
1103	if (indx == -EINVAL)
1104		BUG();
1105
1106	eq = &dev->eq_tbl[indx];
1107	irq = ocrdma_get_irq(dev, eq);
1108	synchronize_irq(irq);
1109	ocrdma_flush_cq(cq);
1110
1111	(void)ocrdma_mbx_destroy_cq(dev, cq);
1112	if (cq->ucontext) {
1113		pdid = cq->ucontext->cntxt_pd->id;
1114		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
1115				PAGE_ALIGN(cq->len));
1116		ocrdma_del_mmap(cq->ucontext,
1117				ocrdma_get_db_addr(dev, pdid),
1118				dev->nic_info.db_page_size);
1119	}
1120
1121	kfree(cq);
1122	return 0;
1123}
1124
1125static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1126{
1127	int status = -EINVAL;
1128
1129	if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
1130		dev->qp_tbl[qp->id] = qp;
1131		status = 0;
1132	}
1133	return status;
1134}
1135
1136static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1137{
1138	dev->qp_tbl[qp->id] = NULL;
1139}
1140
1141static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
1142				  struct ib_qp_init_attr *attrs)
1143{
1144	if ((attrs->qp_type != IB_QPT_GSI) &&
1145	    (attrs->qp_type != IB_QPT_RC) &&
1146	    (attrs->qp_type != IB_QPT_UC) &&
1147	    (attrs->qp_type != IB_QPT_UD)) {
1148		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1149		       __func__, dev->id, attrs->qp_type);
1150		return -EINVAL;
1151	}
1152	/* Skip the check for QP1 to support CM size of 128 */
1153	if ((attrs->qp_type != IB_QPT_GSI) &&
1154	    (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
1155		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1156		       __func__, dev->id, attrs->cap.max_send_wr);
1157		pr_err("%s(%d) supported send_wr=0x%x\n",
1158		       __func__, dev->id, dev->attr.max_wqe);
1159		return -EINVAL;
1160	}
1161	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
1162		pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1163		       __func__, dev->id, attrs->cap.max_recv_wr);
1164		pr_err("%s(%d) supported recv_wr=0x%x\n",
1165		       __func__, dev->id, dev->attr.max_rqe);
1166		return -EINVAL;
1167	}
1168	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
1169		pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1170		       __func__, dev->id, attrs->cap.max_inline_data);
1171		pr_err("%s(%d) supported inline data size=0x%x\n",
1172		       __func__, dev->id, dev->attr.max_inline_data);
1173		return -EINVAL;
1174	}
1175	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
1176		pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1177		       __func__, dev->id, attrs->cap.max_send_sge);
1178		pr_err("%s(%d) supported send_sge=0x%x\n",
1179		       __func__, dev->id, dev->attr.max_send_sge);
1180		return -EINVAL;
1181	}
1182	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
1183		pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1184		       __func__, dev->id, attrs->cap.max_recv_sge);
1185		pr_err("%s(%d) supported recv_sge=0x%x\n",
1186		       __func__, dev->id, dev->attr.max_recv_sge);
1187		return -EINVAL;
1188	}
1189	/* unprivileged user space cannot create special QP */
1190	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1191		pr_err
1192		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1193		     __func__, dev->id, attrs->qp_type);
1194		return -EINVAL;
1195	}
1196	/* allow creating only one GSI type of QP */
1197	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
1198		pr_err("%s(%d) GSI special QPs already created.\n",
1199		       __func__, dev->id);
1200		return -EINVAL;
1201	}
1202	/* verify consumer QPs are not trying to use GSI QP's CQ */
1203	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
1204		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
1205			(dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
1206			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1207				__func__, dev->id);
1208			return -EINVAL;
1209		}
1210	}
1211	return 0;
1212}
1213
1214static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
1215				struct ib_udata *udata, int dpp_offset,
1216				int dpp_credit_lmt, int srq)
1217{
1218	int status = 0;
1219	u64 usr_db;
1220	struct ocrdma_create_qp_uresp uresp;
1221	struct ocrdma_pd *pd = qp->pd;
1222	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
1223
1224	memset(&uresp, 0, sizeof(uresp));
1225	usr_db = dev->nic_info.unmapped_db +
1226			(pd->id * dev->nic_info.db_page_size);
1227	uresp.qp_id = qp->id;
1228	uresp.sq_dbid = qp->sq.dbid;
1229	uresp.num_sq_pages = 1;
1230	uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
1231	uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
1232	uresp.num_wqe_allocated = qp->sq.max_cnt;
1233	if (!srq) {
1234		uresp.rq_dbid = qp->rq.dbid;
1235		uresp.num_rq_pages = 1;
1236		uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
1237		uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
1238		uresp.num_rqe_allocated = qp->rq.max_cnt;
1239	}
1240	uresp.db_page_addr = usr_db;
1241	uresp.db_page_size = dev->nic_info.db_page_size;
1242	uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
1243	uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1244	uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
1245
1246	if (qp->dpp_enabled) {
1247		uresp.dpp_credit = dpp_credit_lmt;
1248		uresp.dpp_offset = dpp_offset;
1249	}
1250	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1251	if (status) {
1252		pr_err("%s(%d) user copy error.\n", __func__, dev->id);
1253		goto err;
1254	}
1255	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
1256				 uresp.sq_page_size);
1257	if (status)
1258		goto err;
1259
1260	if (!srq) {
1261		status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
1262					 uresp.rq_page_size);
1263		if (status)
1264			goto rq_map_err;
1265	}
1266	return status;
1267rq_map_err:
1268	ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
1269err:
1270	return status;
1271}
1272
1273static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
1274			     struct ocrdma_pd *pd)
1275{
1276	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1277		qp->sq_db = dev->nic_info.db +
1278			(pd->id * dev->nic_info.db_page_size) +
1279			OCRDMA_DB_GEN2_SQ_OFFSET;
1280		qp->rq_db = dev->nic_info.db +
1281			(pd->id * dev->nic_info.db_page_size) +
1282			OCRDMA_DB_GEN2_RQ_OFFSET;
1283	} else {
1284		qp->sq_db = dev->nic_info.db +
1285			(pd->id * dev->nic_info.db_page_size) +
1286			OCRDMA_DB_SQ_OFFSET;
1287		qp->rq_db = dev->nic_info.db +
1288			(pd->id * dev->nic_info.db_page_size) +
1289			OCRDMA_DB_RQ_OFFSET;
1290	}
1291}
1292
1293static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
1294{
1295	qp->wqe_wr_id_tbl =
1296	    kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
1297		    GFP_KERNEL);
1298	if (qp->wqe_wr_id_tbl == NULL)
1299		return -ENOMEM;
1300	qp->rqe_wr_id_tbl =
1301	    kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
1302	if (qp->rqe_wr_id_tbl == NULL)
1303		return -ENOMEM;
1304
1305	return 0;
1306}
1307
1308static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1309				      struct ocrdma_pd *pd,
1310				      struct ib_qp_init_attr *attrs)
1311{
1312	qp->pd = pd;
1313	spin_lock_init(&qp->q_lock);
1314	INIT_LIST_HEAD(&qp->sq_entry);
1315	INIT_LIST_HEAD(&qp->rq_entry);
1316
1317	qp->qp_type = attrs->qp_type;
1318	qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1319	qp->max_inline_data = attrs->cap.max_inline_data;
1320	qp->sq.max_sges = attrs->cap.max_send_sge;
1321	qp->rq.max_sges = attrs->cap.max_recv_sge;
1322	qp->state = OCRDMA_QPS_RST;
1323	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1324}
1325
1326static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1327				   struct ib_qp_init_attr *attrs)
1328{
1329	if (attrs->qp_type == IB_QPT_GSI) {
1330		dev->gsi_qp_created = 1;
1331		dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1332		dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1333	}
1334}
1335
1336struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1337			       struct ib_qp_init_attr *attrs,
1338			       struct ib_udata *udata)
1339{
1340	int status;
1341	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1342	struct ocrdma_qp *qp;
1343	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
1344	struct ocrdma_create_qp_ureq ureq;
1345	u16 dpp_credit_lmt, dpp_offset;
1346
1347	status = ocrdma_check_qp_params(ibpd, dev, attrs);
1348	if (status)
1349		goto gen_err;
1350
1351	memset(&ureq, 0, sizeof(ureq));
1352	if (udata) {
1353		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1354			return ERR_PTR(-EFAULT);
1355	}
1356	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1357	if (!qp) {
1358		status = -ENOMEM;
1359		goto gen_err;
1360	}
1361	ocrdma_set_qp_init_params(qp, pd, attrs);
1362	if (udata == NULL)
1363		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1364					OCRDMA_QP_FAST_REG);
1365
1366	mutex_lock(&dev->dev_lock);
1367	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1368					ureq.dpp_cq_id,
1369					&dpp_offset, &dpp_credit_lmt);
1370	if (status)
1371		goto mbx_err;
1372
1373	/* user space QP's wr_id table are managed in library */
1374	if (udata == NULL) {
1375		status = ocrdma_alloc_wr_id_tbl(qp);
1376		if (status)
1377			goto map_err;
1378	}
1379
1380	status = ocrdma_add_qpn_map(dev, qp);
1381	if (status)
1382		goto map_err;
1383	ocrdma_set_qp_db(dev, qp, pd);
1384	if (udata) {
1385		status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1386					      dpp_credit_lmt,
1387					      (attrs->srq != NULL));
1388		if (status)
1389			goto cpy_err;
1390	}
1391	ocrdma_store_gsi_qp_cq(dev, attrs);
1392	qp->ibqp.qp_num = qp->id;
1393	mutex_unlock(&dev->dev_lock);
1394	return &qp->ibqp;
1395
1396cpy_err:
1397	ocrdma_del_qpn_map(dev, qp);
1398map_err:
1399	ocrdma_mbx_destroy_qp(dev, qp);
1400mbx_err:
1401	mutex_unlock(&dev->dev_lock);
1402	kfree(qp->wqe_wr_id_tbl);
1403	kfree(qp->rqe_wr_id_tbl);
1404	kfree(qp);
1405	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1406gen_err:
1407	return ERR_PTR(status);
1408}
1409
1410int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1411		      int attr_mask)
1412{
1413	int status = 0;
1414	struct ocrdma_qp *qp;
1415	struct ocrdma_dev *dev;
1416	enum ib_qp_state old_qps;
1417
1418	qp = get_ocrdma_qp(ibqp);
1419	dev = get_ocrdma_dev(ibqp->device);
1420	if (attr_mask & IB_QP_STATE)
1421		status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
1422	/* if new and previous states are same hw doesn't need to
1423	 * know about it.
1424	 */
1425	if (status < 0)
1426		return status;
1427	status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
1428
1429	return status;
1430}
1431
1432int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1433		     int attr_mask, struct ib_udata *udata)
1434{
1435	unsigned long flags;
1436	int status = -EINVAL;
1437	struct ocrdma_qp *qp;
1438	struct ocrdma_dev *dev;
1439	enum ib_qp_state old_qps, new_qps;
1440
1441	qp = get_ocrdma_qp(ibqp);
1442	dev = get_ocrdma_dev(ibqp->device);
1443
1444	/* syncronize with multiple context trying to change, retrive qps */
1445	mutex_lock(&dev->dev_lock);
1446	/* syncronize with wqe, rqe posting and cqe processing contexts */
1447	spin_lock_irqsave(&qp->q_lock, flags);
1448	old_qps = get_ibqp_state(qp->state);
1449	if (attr_mask & IB_QP_STATE)
1450		new_qps = attr->qp_state;
1451	else
1452		new_qps = old_qps;
1453	spin_unlock_irqrestore(&qp->q_lock, flags);
1454
1455	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
1456				IB_LINK_LAYER_ETHERNET)) {
1457		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1458		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1459		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1460		       old_qps, new_qps);
1461		goto param_err;
1462	}
1463
1464	status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1465	if (status > 0)
1466		status = 0;
1467param_err:
1468	mutex_unlock(&dev->dev_lock);
1469	return status;
1470}
1471
1472static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1473{
1474	switch (mtu) {
1475	case 256:
1476		return IB_MTU_256;
1477	case 512:
1478		return IB_MTU_512;
1479	case 1024:
1480		return IB_MTU_1024;
1481	case 2048:
1482		return IB_MTU_2048;
1483	case 4096:
1484		return IB_MTU_4096;
1485	default:
1486		return IB_MTU_1024;
1487	}
1488}
1489
1490static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1491{
1492	int ib_qp_acc_flags = 0;
1493
1494	if (qp_cap_flags & OCRDMA_QP_INB_WR)
1495		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1496	if (qp_cap_flags & OCRDMA_QP_INB_RD)
1497		ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1498	return ib_qp_acc_flags;
1499}
1500
1501int ocrdma_query_qp(struct ib_qp *ibqp,
1502		    struct ib_qp_attr *qp_attr,
1503		    int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1504{
1505	int status;
1506	u32 qp_state;
1507	struct ocrdma_qp_params params;
1508	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1509	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1510
1511	memset(&params, 0, sizeof(params));
1512	mutex_lock(&dev->dev_lock);
1513	status = ocrdma_mbx_query_qp(dev, qp, &params);
1514	mutex_unlock(&dev->dev_lock);
1515	if (status)
1516		goto mbx_err;
1517	if (qp->qp_type == IB_QPT_UD)
1518		qp_attr->qkey = params.qkey;
1519	qp_attr->path_mtu =
1520		ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1521				OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1522				OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1523	qp_attr->path_mig_state = IB_MIG_MIGRATED;
1524	qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1525	qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1526	qp_attr->dest_qp_num =
1527	    params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1528
1529	qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1530	qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1531	qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1532	qp_attr->cap.max_send_sge = qp->sq.max_sges;
1533	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1534	qp_attr->cap.max_inline_data = qp->max_inline_data;
1535	qp_init_attr->cap = qp_attr->cap;
1536	memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
1537	       sizeof(params.dgid));
1538	qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
1539	    OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
1540	qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
1541	qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
1542					  OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1543						OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
1544	qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
1545					      OCRDMA_QP_PARAMS_TCLASS_MASK) >>
1546						OCRDMA_QP_PARAMS_TCLASS_SHIFT;
1547
1548	qp_attr->ah_attr.ah_flags = IB_AH_GRH;
1549	qp_attr->ah_attr.port_num = 1;
1550	qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
1551			       OCRDMA_QP_PARAMS_SL_MASK) >>
1552				OCRDMA_QP_PARAMS_SL_SHIFT;
1553	qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1554			    OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1555				OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1556	qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1557			      OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1558				OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1559	qp_attr->retry_cnt =
1560	    (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1561		OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1562	qp_attr->min_rnr_timer = 0;
1563	qp_attr->pkey_index = 0;
1564	qp_attr->port_num = 1;
1565	qp_attr->ah_attr.src_path_bits = 0;
1566	qp_attr->ah_attr.static_rate = 0;
1567	qp_attr->alt_pkey_index = 0;
1568	qp_attr->alt_port_num = 0;
1569	qp_attr->alt_timeout = 0;
1570	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1571	qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1572		    OCRDMA_QP_PARAMS_STATE_SHIFT;
1573	qp_attr->qp_state = get_ibqp_state(qp_state);
1574	qp_attr->cur_qp_state = qp_attr->qp_state;
1575	qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1576	qp_attr->max_dest_rd_atomic =
1577	    params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1578	qp_attr->max_rd_atomic =
1579	    params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1580	qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1581				OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1582	/* Sync driver QP state with FW */
1583	ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL);
1584mbx_err:
1585	return status;
1586}
1587
1588static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx)
1589{
1590	unsigned int i = idx / 32;
1591	u32 mask = (1U << (idx % 32));
1592
1593	srq->idx_bit_fields[i] ^= mask;
1594}
1595
1596static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1597{
1598	return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
1599}
1600
1601static int is_hw_sq_empty(struct ocrdma_qp *qp)
1602{
1603	return (qp->sq.tail == qp->sq.head);
1604}
1605
1606static int is_hw_rq_empty(struct ocrdma_qp *qp)
1607{
1608	return (qp->rq.tail == qp->rq.head);
1609}
1610
1611static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1612{
1613	return q->va + (q->head * q->entry_size);
1614}
1615
1616static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1617				      u32 idx)
1618{
1619	return q->va + (idx * q->entry_size);
1620}
1621
1622static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1623{
1624	q->head = (q->head + 1) & q->max_wqe_idx;
1625}
1626
1627static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1628{
1629	q->tail = (q->tail + 1) & q->max_wqe_idx;
1630}
1631
1632/* discard the cqe for a given QP */
1633static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1634{
1635	unsigned long cq_flags;
1636	unsigned long flags;
1637	int discard_cnt = 0;
1638	u32 cur_getp, stop_getp;
1639	struct ocrdma_cqe *cqe;
1640	u32 qpn = 0, wqe_idx = 0;
1641
1642	spin_lock_irqsave(&cq->cq_lock, cq_flags);
1643
1644	/* traverse through the CQEs in the hw CQ,
1645	 * find the matching CQE for a given qp,
1646	 * mark the matching one discarded by clearing qpn.
1647	 * ring the doorbell in the poll_cq() as
1648	 * we don't complete out of order cqe.
1649	 */
1650
1651	cur_getp = cq->getp;
1652	/* find upto when do we reap the cq. */
1653	stop_getp = cur_getp;
1654	do {
1655		if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1656			break;
1657
1658		cqe = cq->va + cur_getp;
1659		/* if (a) done reaping whole hw cq, or
1660		 *    (b) qp_xq becomes empty.
1661		 * then exit
1662		 */
1663		qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1664		/* if previously discarded cqe found, skip that too. */
1665		/* check for matching qp */
1666		if (qpn == 0 || qpn != qp->id)
1667			goto skip_cqe;
1668
1669		if (is_cqe_for_sq(cqe)) {
1670			ocrdma_hwq_inc_tail(&qp->sq);
1671		} else {
1672			if (qp->srq) {
1673				wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
1674					OCRDMA_CQE_BUFTAG_SHIFT) &
1675					qp->srq->rq.max_wqe_idx;
1676				if (wqe_idx < 1)
1677					BUG();
1678				spin_lock_irqsave(&qp->srq->q_lock, flags);
1679				ocrdma_hwq_inc_tail(&qp->srq->rq);
1680				ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
1681				spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1682
1683			} else {
1684				ocrdma_hwq_inc_tail(&qp->rq);
1685			}
1686		}
1687		/* mark cqe discarded so that it is not picked up later
1688		 * in the poll_cq().
1689		 */
1690		discard_cnt += 1;
1691		cqe->cmn.qpn = 0;
1692skip_cqe:
1693		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1694	} while (cur_getp != stop_getp);
1695	spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1696}
1697
1698void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1699{
1700	int found = false;
1701	unsigned long flags;
1702	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
1703	/* sync with any active CQ poll */
1704
1705	spin_lock_irqsave(&dev->flush_q_lock, flags);
1706	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1707	if (found)
1708		list_del(&qp->sq_entry);
1709	if (!qp->srq) {
1710		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1711		if (found)
1712			list_del(&qp->rq_entry);
1713	}
1714	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1715}
1716
1717int ocrdma_destroy_qp(struct ib_qp *ibqp)
1718{
1719	struct ocrdma_pd *pd;
1720	struct ocrdma_qp *qp;
1721	struct ocrdma_dev *dev;
1722	struct ib_qp_attr attrs;
1723	int attr_mask;
1724	unsigned long flags;
1725
1726	qp = get_ocrdma_qp(ibqp);
1727	dev = get_ocrdma_dev(ibqp->device);
1728
1729	pd = qp->pd;
1730
1731	/* change the QP state to ERROR */
1732	if (qp->state != OCRDMA_QPS_RST) {
1733		attrs.qp_state = IB_QPS_ERR;
1734		attr_mask = IB_QP_STATE;
1735		_ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1736	}
1737	/* ensure that CQEs for newly created QP (whose id may be same with
1738	 * one which just getting destroyed are same), dont get
1739	 * discarded until the old CQEs are discarded.
1740	 */
1741	mutex_lock(&dev->dev_lock);
1742	(void) ocrdma_mbx_destroy_qp(dev, qp);
1743
1744	/*
1745	 * acquire CQ lock while destroy is in progress, in order to
1746	 * protect against proessing in-flight CQEs for this QP.
1747	 */
1748	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1749	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1750		spin_lock(&qp->rq_cq->cq_lock);
1751
1752	ocrdma_del_qpn_map(dev, qp);
1753
1754	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1755		spin_unlock(&qp->rq_cq->cq_lock);
1756	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1757
1758	if (!pd->uctx) {
1759		ocrdma_discard_cqes(qp, qp->sq_cq);
1760		ocrdma_discard_cqes(qp, qp->rq_cq);
1761	}
1762	mutex_unlock(&dev->dev_lock);
1763
1764	if (pd->uctx) {
1765		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
1766				PAGE_ALIGN(qp->sq.len));
1767		if (!qp->srq)
1768			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
1769					PAGE_ALIGN(qp->rq.len));
1770	}
1771
1772	ocrdma_del_flush_qp(qp);
1773
1774	kfree(qp->wqe_wr_id_tbl);
1775	kfree(qp->rqe_wr_id_tbl);
1776	kfree(qp);
1777	return 0;
1778}
1779
1780static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
1781				struct ib_udata *udata)
1782{
1783	int status;
1784	struct ocrdma_create_srq_uresp uresp;
1785
1786	memset(&uresp, 0, sizeof(uresp));
1787	uresp.rq_dbid = srq->rq.dbid;
1788	uresp.num_rq_pages = 1;
1789	uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
1790	uresp.rq_page_size = srq->rq.len;
1791	uresp.db_page_addr = dev->nic_info.unmapped_db +
1792	    (srq->pd->id * dev->nic_info.db_page_size);
1793	uresp.db_page_size = dev->nic_info.db_page_size;
1794	uresp.num_rqe_allocated = srq->rq.max_cnt;
1795	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1796		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1797		uresp.db_shift = 24;
1798	} else {
1799		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1800		uresp.db_shift = 16;
1801	}
1802
1803	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1804	if (status)
1805		return status;
1806	status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1807				 uresp.rq_page_size);
1808	if (status)
1809		return status;
1810	return status;
1811}
1812
1813struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1814				 struct ib_srq_init_attr *init_attr,
1815				 struct ib_udata *udata)
1816{
1817	int status = -ENOMEM;
1818	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1819	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
1820	struct ocrdma_srq *srq;
1821
1822	if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1823		return ERR_PTR(-EINVAL);
1824	if (init_attr->attr.max_wr > dev->attr.max_rqe)
1825		return ERR_PTR(-EINVAL);
1826
1827	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1828	if (!srq)
1829		return ERR_PTR(status);
1830
1831	spin_lock_init(&srq->q_lock);
1832	srq->pd = pd;
1833	srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1834	status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
1835	if (status)
1836		goto err;
1837
1838	if (udata == NULL) {
1839		srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
1840			    GFP_KERNEL);
1841		if (srq->rqe_wr_id_tbl == NULL)
1842			goto arm_err;
1843
1844		srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1845		    (srq->rq.max_cnt % 32 ? 1 : 0);
1846		srq->idx_bit_fields =
1847		    kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
1848		if (srq->idx_bit_fields == NULL)
1849			goto arm_err;
1850		memset(srq->idx_bit_fields, 0xff,
1851		       srq->bit_fields_len * sizeof(u32));
1852	}
1853
1854	if (init_attr->attr.srq_limit) {
1855		status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1856		if (status)
1857			goto arm_err;
1858	}
1859
1860	if (udata) {
1861		status = ocrdma_copy_srq_uresp(dev, srq, udata);
1862		if (status)
1863			goto arm_err;
1864	}
1865
1866	return &srq->ibsrq;
1867
1868arm_err:
1869	ocrdma_mbx_destroy_srq(dev, srq);
1870err:
1871	kfree(srq->rqe_wr_id_tbl);
1872	kfree(srq->idx_bit_fields);
1873	kfree(srq);
1874	return ERR_PTR(status);
1875}
1876
1877int ocrdma_modify_srq(struct ib_srq *ibsrq,
1878		      struct ib_srq_attr *srq_attr,
1879		      enum ib_srq_attr_mask srq_attr_mask,
1880		      struct ib_udata *udata)
1881{
1882	int status = 0;
1883	struct ocrdma_srq *srq;
1884
1885	srq = get_ocrdma_srq(ibsrq);
1886	if (srq_attr_mask & IB_SRQ_MAX_WR)
1887		status = -EINVAL;
1888	else
1889		status = ocrdma_mbx_modify_srq(srq, srq_attr);
1890	return status;
1891}
1892
1893int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1894{
1895	int status;
1896	struct ocrdma_srq *srq;
1897
1898	srq = get_ocrdma_srq(ibsrq);
1899	status = ocrdma_mbx_query_srq(srq, srq_attr);
1900	return status;
1901}
1902
1903int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1904{
1905	int status;
1906	struct ocrdma_srq *srq;
1907	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1908
1909	srq = get_ocrdma_srq(ibsrq);
1910
1911	status = ocrdma_mbx_destroy_srq(dev, srq);
1912
1913	if (srq->pd->uctx)
1914		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
1915				PAGE_ALIGN(srq->rq.len));
1916
1917	kfree(srq->idx_bit_fields);
1918	kfree(srq->rqe_wr_id_tbl);
1919	kfree(srq);
1920	return status;
1921}
1922
1923/* unprivileged verbs and their support functions. */
1924static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1925				struct ocrdma_hdr_wqe *hdr,
1926				struct ib_send_wr *wr)
1927{
1928	struct ocrdma_ewqe_ud_hdr *ud_hdr =
1929		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1930	struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
1931
1932	ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
1933	if (qp->qp_type == IB_QPT_GSI)
1934		ud_hdr->qkey = qp->qkey;
1935	else
1936		ud_hdr->qkey = wr->wr.ud.remote_qkey;
1937	ud_hdr->rsvd_ahid = ah->id;
1938	if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
1939		hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
1940}
1941
1942static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1943			      struct ocrdma_sge *sge, int num_sge,
1944			      struct ib_sge *sg_list)
1945{
1946	int i;
1947
1948	for (i = 0; i < num_sge; i++) {
1949		sge[i].lrkey = sg_list[i].lkey;
1950		sge[i].addr_lo = sg_list[i].addr;
1951		sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1952		sge[i].len = sg_list[i].length;
1953		hdr->total_len += sg_list[i].length;
1954	}
1955	if (num_sge == 0)
1956		memset(sge, 0, sizeof(*sge));
1957}
1958
1959static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
1960{
1961	uint32_t total_len = 0, i;
1962
1963	for (i = 0; i < num_sge; i++)
1964		total_len += sg_list[i].length;
1965	return total_len;
1966}
1967
1968
1969static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1970				    struct ocrdma_hdr_wqe *hdr,
1971				    struct ocrdma_sge *sge,
1972				    struct ib_send_wr *wr, u32 wqe_size)
1973{
1974	int i;
1975	char *dpp_addr;
1976
1977	if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
1978		hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
1979		if (unlikely(hdr->total_len > qp->max_inline_data)) {
1980			pr_err("%s() supported_len=0x%x,\n"
1981			       " unsupported len req=0x%x\n", __func__,
1982				qp->max_inline_data, hdr->total_len);
1983			return -EINVAL;
1984		}
1985		dpp_addr = (char *)sge;
1986		for (i = 0; i < wr->num_sge; i++) {
1987			memcpy(dpp_addr,
1988			       (void *)(unsigned long)wr->sg_list[i].addr,
1989			       wr->sg_list[i].length);
1990			dpp_addr += wr->sg_list[i].length;
1991		}
1992
1993		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1994		if (0 == hdr->total_len)
1995			wqe_size += sizeof(struct ocrdma_sge);
1996		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1997	} else {
1998		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1999		if (wr->num_sge)
2000			wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
2001		else
2002			wqe_size += sizeof(struct ocrdma_sge);
2003		hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2004	}
2005	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2006	return 0;
2007}
2008
2009static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2010			     struct ib_send_wr *wr)
2011{
2012	int status;
2013	struct ocrdma_sge *sge;
2014	u32 wqe_size = sizeof(*hdr);
2015
2016	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2017		ocrdma_build_ud_hdr(qp, hdr, wr);
2018		sge = (struct ocrdma_sge *)(hdr + 2);
2019		wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
2020	} else {
2021		sge = (struct ocrdma_sge *)(hdr + 1);
2022	}
2023
2024	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
2025	return status;
2026}
2027
2028static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2029			      struct ib_send_wr *wr)
2030{
2031	int status;
2032	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
2033	struct ocrdma_sge *sge = ext_rw + 1;
2034	u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
2035
2036	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
2037	if (status)
2038		return status;
2039	ext_rw->addr_lo = wr->wr.rdma.remote_addr;
2040	ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
2041	ext_rw->lrkey = wr->wr.rdma.rkey;
2042	ext_rw->len = hdr->total_len;
2043	return 0;
2044}
2045
2046static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2047			      struct ib_send_wr *wr)
2048{
2049	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
2050	struct ocrdma_sge *sge = ext_rw + 1;
2051	u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
2052	    sizeof(struct ocrdma_hdr_wqe);
2053
2054	ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
2055	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2056	hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
2057	hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2058
2059	ext_rw->addr_lo = wr->wr.rdma.remote_addr;
2060	ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
2061	ext_rw->lrkey = wr->wr.rdma.rkey;
2062	ext_rw->len = hdr->total_len;
2063}
2064
2065static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
2066			    struct ocrdma_hw_mr *hwmr)
2067{
2068	int i;
2069	u64 buf_addr = 0;
2070	int num_pbes;
2071	struct ocrdma_pbe *pbe;
2072
2073	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2074	num_pbes = 0;
2075
2076	/* go through the OS phy regions & fill hw pbe entries into pbls. */
2077	for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
2078		/* number of pbes can be more for one OS buf, when
2079		 * buffers are of different sizes.
2080		 * split the ib_buf to one or more pbes.
2081		 */
2082		buf_addr = wr->wr.fast_reg.page_list->page_list[i];
2083		pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2084		pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2085		num_pbes += 1;
2086		pbe++;
2087
2088		/* if the pbl is full storing the pbes,
2089		 * move to next pbl.
2090		*/
2091		if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
2092			pbl_tbl++;
2093			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2094		}
2095	}
2096	return;
2097}
2098
2099static int get_encoded_page_size(int pg_sz)
2100{
2101	/* Max size is 256M 4096 << 16 */
2102	int i = 0;
2103	for (; i < 17; i++)
2104		if (pg_sz == (4096 << i))
2105			break;
2106	return i;
2107}
2108
2109
2110static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2111			   struct ib_send_wr *wr)
2112{
2113	u64 fbo;
2114	struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
2115	struct ocrdma_mr *mr;
2116	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2117	u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
2118
2119	wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
2120
2121	if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr)
2122		return -EINVAL;
2123
2124	hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
2125	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2126
2127	if (wr->wr.fast_reg.page_list_len == 0)
2128		BUG();
2129	if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
2130		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
2131	if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
2132		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
2133	if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
2134		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
2135	hdr->lkey = wr->wr.fast_reg.rkey;
2136	hdr->total_len = wr->wr.fast_reg.length;
2137
2138	fbo = wr->wr.fast_reg.iova_start -
2139	    (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
2140
2141	fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
2142	fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
2143	fast_reg->fbo_hi = upper_32_bits(fbo);
2144	fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
2145	fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
2146	fast_reg->size_sge =
2147		get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
2148	mr = (struct ocrdma_mr *) (unsigned long)
2149		dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
2150	build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
2151	return 0;
2152}
2153
2154static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
2155{
2156	u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
2157
2158	iowrite32(val, qp->sq_db);
2159}
2160
2161int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2162		     struct ib_send_wr **bad_wr)
2163{
2164	int status = 0;
2165	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2166	struct ocrdma_hdr_wqe *hdr;
2167	unsigned long flags;
2168
2169	spin_lock_irqsave(&qp->q_lock, flags);
2170	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
2171		spin_unlock_irqrestore(&qp->q_lock, flags);
2172		*bad_wr = wr;
2173		return -EINVAL;
2174	}
2175
2176	while (wr) {
2177		if (qp->qp_type == IB_QPT_UD &&
2178		    (wr->opcode != IB_WR_SEND &&
2179		     wr->opcode != IB_WR_SEND_WITH_IMM)) {
2180			*bad_wr = wr;
2181			status = -EINVAL;
2182			break;
2183		}
2184		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
2185		    wr->num_sge > qp->sq.max_sges) {
2186			*bad_wr = wr;
2187			status = -ENOMEM;
2188			break;
2189		}
2190		hdr = ocrdma_hwq_head(&qp->sq);
2191		hdr->cw = 0;
2192		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2193			hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2194		if (wr->send_flags & IB_SEND_FENCE)
2195			hdr->cw |=
2196			    (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
2197		if (wr->send_flags & IB_SEND_SOLICITED)
2198			hdr->cw |=
2199			    (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
2200		hdr->total_len = 0;
2201		switch (wr->opcode) {
2202		case IB_WR_SEND_WITH_IMM:
2203			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2204			hdr->immdt = ntohl(wr->ex.imm_data);
2205		case IB_WR_SEND:
2206			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2207			ocrdma_build_send(qp, hdr, wr);
2208			break;
2209		case IB_WR_SEND_WITH_INV:
2210			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
2211			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2212			hdr->lkey = wr->ex.invalidate_rkey;
2213			status = ocrdma_build_send(qp, hdr, wr);
2214			break;
2215		case IB_WR_RDMA_WRITE_WITH_IMM:
2216			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2217			hdr->immdt = ntohl(wr->ex.imm_data);
2218		case IB_WR_RDMA_WRITE:
2219			hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
2220			status = ocrdma_build_write(qp, hdr, wr);
2221			break;
2222		case IB_WR_RDMA_READ:
2223			ocrdma_build_read(qp, hdr, wr);
2224			break;
2225		case IB_WR_LOCAL_INV:
2226			hdr->cw |=
2227			    (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
2228			hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
2229					sizeof(struct ocrdma_sge)) /
2230				OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
2231			hdr->lkey = wr->ex.invalidate_rkey;
2232			break;
2233		case IB_WR_FAST_REG_MR:
2234			status = ocrdma_build_fr(qp, hdr, wr);
2235			break;
2236		default:
2237			status = -EINVAL;
2238			break;
2239		}
2240		if (status) {
2241			*bad_wr = wr;
2242			break;
2243		}
2244		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2245			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
2246		else
2247			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
2248		qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
2249		ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
2250				   OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
2251		/* make sure wqe is written before adapter can access it */
2252		wmb();
2253		/* inform hw to start processing it */
2254		ocrdma_ring_sq_db(qp);
2255
2256		/* update pointer, counter for next wr */
2257		ocrdma_hwq_inc_head(&qp->sq);
2258		wr = wr->next;
2259	}
2260	spin_unlock_irqrestore(&qp->q_lock, flags);
2261	return status;
2262}
2263
2264static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
2265{
2266	u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
2267
2268	iowrite32(val, qp->rq_db);
2269}
2270
2271static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
2272			     u16 tag)
2273{
2274	u32 wqe_size = 0;
2275	struct ocrdma_sge *sge;
2276	if (wr->num_sge)
2277		wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
2278	else
2279		wqe_size = sizeof(*sge) + sizeof(*rqe);
2280
2281	rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
2282				OCRDMA_WQE_SIZE_SHIFT);
2283	rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2284	rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2285	rqe->total_len = 0;
2286	rqe->rsvd_tag = tag;
2287	sge = (struct ocrdma_sge *)(rqe + 1);
2288	ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
2289	ocrdma_cpu_to_le32(rqe, wqe_size);
2290}
2291
2292int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2293		     struct ib_recv_wr **bad_wr)
2294{
2295	int status = 0;
2296	unsigned long flags;
2297	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2298	struct ocrdma_hdr_wqe *rqe;
2299
2300	spin_lock_irqsave(&qp->q_lock, flags);
2301	if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
2302		spin_unlock_irqrestore(&qp->q_lock, flags);
2303		*bad_wr = wr;
2304		return -EINVAL;
2305	}
2306	while (wr) {
2307		if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
2308		    wr->num_sge > qp->rq.max_sges) {
2309			*bad_wr = wr;
2310			status = -ENOMEM;
2311			break;
2312		}
2313		rqe = ocrdma_hwq_head(&qp->rq);
2314		ocrdma_build_rqe(rqe, wr, 0);
2315
2316		qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
2317		/* make sure rqe is written before adapter can access it */
2318		wmb();
2319
2320		/* inform hw to start processing it */
2321		ocrdma_ring_rq_db(qp);
2322
2323		/* update pointer, counter for next wr */
2324		ocrdma_hwq_inc_head(&qp->rq);
2325		wr = wr->next;
2326	}
2327	spin_unlock_irqrestore(&qp->q_lock, flags);
2328	return status;
2329}
2330
2331/* cqe for srq's rqe can potentially arrive out of order.
2332 * index gives the entry in the shadow table where to store
2333 * the wr_id. tag/index is returned in cqe to reference back
2334 * for a given rqe.
2335 */
2336static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
2337{
2338	int row = 0;
2339	int indx = 0;
2340
2341	for (row = 0; row < srq->bit_fields_len; row++) {
2342		if (srq->idx_bit_fields[row]) {
2343			indx = ffs(srq->idx_bit_fields[row]);
2344			indx = (row * 32) + (indx - 1);
2345			if (indx >= srq->rq.max_cnt)
2346				BUG();
2347			ocrdma_srq_toggle_bit(srq, indx);
2348			break;
2349		}
2350	}
2351
2352	if (row == srq->bit_fields_len)
2353		BUG();
2354	return indx + 1; /* Use from index 1 */
2355}
2356
2357static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
2358{
2359	u32 val = srq->rq.dbid | (1 << 16);
2360
2361	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
2362}
2363
2364int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
2365			 struct ib_recv_wr **bad_wr)
2366{
2367	int status = 0;
2368	unsigned long flags;
2369	struct ocrdma_srq *srq;
2370	struct ocrdma_hdr_wqe *rqe;
2371	u16 tag;
2372
2373	srq = get_ocrdma_srq(ibsrq);
2374
2375	spin_lock_irqsave(&srq->q_lock, flags);
2376	while (wr) {
2377		if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
2378		    wr->num_sge > srq->rq.max_sges) {
2379			status = -ENOMEM;
2380			*bad_wr = wr;
2381			break;
2382		}
2383		tag = ocrdma_srq_get_idx(srq);
2384		rqe = ocrdma_hwq_head(&srq->rq);
2385		ocrdma_build_rqe(rqe, wr, tag);
2386
2387		srq->rqe_wr_id_tbl[tag] = wr->wr_id;
2388		/* make sure rqe is written before adapter can perform DMA */
2389		wmb();
2390		/* inform hw to start processing it */
2391		ocrdma_ring_srq_db(srq);
2392		/* update pointer, counter for next wr */
2393		ocrdma_hwq_inc_head(&srq->rq);
2394		wr = wr->next;
2395	}
2396	spin_unlock_irqrestore(&srq->q_lock, flags);
2397	return status;
2398}
2399
2400static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
2401{
2402	enum ib_wc_status ibwc_status;
2403
2404	switch (status) {
2405	case OCRDMA_CQE_GENERAL_ERR:
2406		ibwc_status = IB_WC_GENERAL_ERR;
2407		break;
2408	case OCRDMA_CQE_LOC_LEN_ERR:
2409		ibwc_status = IB_WC_LOC_LEN_ERR;
2410		break;
2411	case OCRDMA_CQE_LOC_QP_OP_ERR:
2412		ibwc_status = IB_WC_LOC_QP_OP_ERR;
2413		break;
2414	case OCRDMA_CQE_LOC_EEC_OP_ERR:
2415		ibwc_status = IB_WC_LOC_EEC_OP_ERR;
2416		break;
2417	case OCRDMA_CQE_LOC_PROT_ERR:
2418		ibwc_status = IB_WC_LOC_PROT_ERR;
2419		break;
2420	case OCRDMA_CQE_WR_FLUSH_ERR:
2421		ibwc_status = IB_WC_WR_FLUSH_ERR;
2422		break;
2423	case OCRDMA_CQE_MW_BIND_ERR:
2424		ibwc_status = IB_WC_MW_BIND_ERR;
2425		break;
2426	case OCRDMA_CQE_BAD_RESP_ERR:
2427		ibwc_status = IB_WC_BAD_RESP_ERR;
2428		break;
2429	case OCRDMA_CQE_LOC_ACCESS_ERR:
2430		ibwc_status = IB_WC_LOC_ACCESS_ERR;
2431		break;
2432	case OCRDMA_CQE_REM_INV_REQ_ERR:
2433		ibwc_status = IB_WC_REM_INV_REQ_ERR;
2434		break;
2435	case OCRDMA_CQE_REM_ACCESS_ERR:
2436		ibwc_status = IB_WC_REM_ACCESS_ERR;
2437		break;
2438	case OCRDMA_CQE_REM_OP_ERR:
2439		ibwc_status = IB_WC_REM_OP_ERR;
2440		break;
2441	case OCRDMA_CQE_RETRY_EXC_ERR:
2442		ibwc_status = IB_WC_RETRY_EXC_ERR;
2443		break;
2444	case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2445		ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2446		break;
2447	case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2448		ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2449		break;
2450	case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2451		ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2452		break;
2453	case OCRDMA_CQE_REM_ABORT_ERR:
2454		ibwc_status = IB_WC_REM_ABORT_ERR;
2455		break;
2456	case OCRDMA_CQE_INV_EECN_ERR:
2457		ibwc_status = IB_WC_INV_EECN_ERR;
2458		break;
2459	case OCRDMA_CQE_INV_EEC_STATE_ERR:
2460		ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2461		break;
2462	case OCRDMA_CQE_FATAL_ERR:
2463		ibwc_status = IB_WC_FATAL_ERR;
2464		break;
2465	case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2466		ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2467		break;
2468	default:
2469		ibwc_status = IB_WC_GENERAL_ERR;
2470		break;
2471	}
2472	return ibwc_status;
2473}
2474
2475static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2476		      u32 wqe_idx)
2477{
2478	struct ocrdma_hdr_wqe *hdr;
2479	struct ocrdma_sge *rw;
2480	int opcode;
2481
2482	hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2483
2484	ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2485	/* Undo the hdr->cw swap */
2486	opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2487	switch (opcode) {
2488	case OCRDMA_WRITE:
2489		ibwc->opcode = IB_WC_RDMA_WRITE;
2490		break;
2491	case OCRDMA_READ:
2492		rw = (struct ocrdma_sge *)(hdr + 1);
2493		ibwc->opcode = IB_WC_RDMA_READ;
2494		ibwc->byte_len = rw->len;
2495		break;
2496	case OCRDMA_SEND:
2497		ibwc->opcode = IB_WC_SEND;
2498		break;
2499	case OCRDMA_FR_MR:
2500		ibwc->opcode = IB_WC_FAST_REG_MR;
2501		break;
2502	case OCRDMA_LKEY_INV:
2503		ibwc->opcode = IB_WC_LOCAL_INV;
2504		break;
2505	default:
2506		ibwc->status = IB_WC_GENERAL_ERR;
2507		pr_err("%s() invalid opcode received = 0x%x\n",
2508		       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2509		break;
2510	}
2511}
2512
2513static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2514						struct ocrdma_cqe *cqe)
2515{
2516	if (is_cqe_for_sq(cqe)) {
2517		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2518				cqe->flags_status_srcqpn) &
2519					~OCRDMA_CQE_STATUS_MASK);
2520		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2521				cqe->flags_status_srcqpn) |
2522				(OCRDMA_CQE_WR_FLUSH_ERR <<
2523					OCRDMA_CQE_STATUS_SHIFT));
2524	} else {
2525		if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2526			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2527					cqe->flags_status_srcqpn) &
2528						~OCRDMA_CQE_UD_STATUS_MASK);
2529			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2530					cqe->flags_status_srcqpn) |
2531					(OCRDMA_CQE_WR_FLUSH_ERR <<
2532						OCRDMA_CQE_UD_STATUS_SHIFT));
2533		} else {
2534			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2535					cqe->flags_status_srcqpn) &
2536						~OCRDMA_CQE_STATUS_MASK);
2537			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2538					cqe->flags_status_srcqpn) |
2539					(OCRDMA_CQE_WR_FLUSH_ERR <<
2540						OCRDMA_CQE_STATUS_SHIFT));
2541		}
2542	}
2543}
2544
2545static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2546				  struct ocrdma_qp *qp, int status)
2547{
2548	bool expand = false;
2549
2550	ibwc->byte_len = 0;
2551	ibwc->qp = &qp->ibqp;
2552	ibwc->status = ocrdma_to_ibwc_err(status);
2553
2554	ocrdma_flush_qp(qp);
2555	ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
2556
2557	/* if wqe/rqe pending for which cqe needs to be returned,
2558	 * trigger inflating it.
2559	 */
2560	if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2561		expand = true;
2562		ocrdma_set_cqe_status_flushed(qp, cqe);
2563	}
2564	return expand;
2565}
2566
2567static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2568				  struct ocrdma_qp *qp, int status)
2569{
2570	ibwc->opcode = IB_WC_RECV;
2571	ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2572	ocrdma_hwq_inc_tail(&qp->rq);
2573
2574	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2575}
2576
2577static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2578				  struct ocrdma_qp *qp, int status)
2579{
2580	ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2581	ocrdma_hwq_inc_tail(&qp->sq);
2582
2583	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2584}
2585
2586
2587static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2588				 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2589				 bool *polled, bool *stop)
2590{
2591	bool expand;
2592	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2593	int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2594		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2595	if (status < OCRDMA_MAX_CQE_ERR)
2596		atomic_inc(&dev->cqe_err_stats[status]);
2597
2598	/* when hw sq is empty, but rq is not empty, so we continue
2599	 * to keep the cqe in order to get the cq event again.
2600	 */
2601	if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2602		/* when cq for rq and sq is same, it is safe to return
2603		 * flush cqe for RQEs.
2604		 */
2605		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2606			*polled = true;
2607			status = OCRDMA_CQE_WR_FLUSH_ERR;
2608			expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2609		} else {
2610			/* stop processing further cqe as this cqe is used for
2611			 * triggering cq event on buddy cq of RQ.
2612			 * When QP is destroyed, this cqe will be removed
2613			 * from the cq's hardware q.
2614			 */
2615			*polled = false;
2616			*stop = true;
2617			expand = false;
2618		}
2619	} else if (is_hw_sq_empty(qp)) {
2620		/* Do nothing */
2621		expand = false;
2622		*polled = false;
2623		*stop = false;
2624	} else {
2625		*polled = true;
2626		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2627	}
2628	return expand;
2629}
2630
2631static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2632				     struct ocrdma_cqe *cqe,
2633				     struct ib_wc *ibwc, bool *polled)
2634{
2635	bool expand = false;
2636	int tail = qp->sq.tail;
2637	u32 wqe_idx;
2638
2639	if (!qp->wqe_wr_id_tbl[tail].signaled) {
2640		*polled = false;    /* WC cannot be consumed yet */
2641	} else {
2642		ibwc->status = IB_WC_SUCCESS;
2643		ibwc->wc_flags = 0;
2644		ibwc->qp = &qp->ibqp;
2645		ocrdma_update_wc(qp, ibwc, tail);
2646		*polled = true;
2647	}
2648	wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
2649			OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
2650	if (tail != wqe_idx)
2651		expand = true; /* Coalesced CQE can't be consumed yet */
2652
2653	ocrdma_hwq_inc_tail(&qp->sq);
2654	return expand;
2655}
2656
2657static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2658			     struct ib_wc *ibwc, bool *polled, bool *stop)
2659{
2660	int status;
2661	bool expand;
2662
2663	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2664		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2665
2666	if (status == OCRDMA_CQE_SUCCESS)
2667		expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2668	else
2669		expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2670	return expand;
2671}
2672
2673static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
2674{
2675	int status;
2676
2677	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2678		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2679	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2680						OCRDMA_CQE_SRCQP_MASK;
2681	ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
2682						OCRDMA_CQE_PKEY_MASK;
2683	ibwc->wc_flags = IB_WC_GRH;
2684	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2685					OCRDMA_CQE_UD_XFER_LEN_SHIFT);
2686	return status;
2687}
2688
2689static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2690				       struct ocrdma_cqe *cqe,
2691				       struct ocrdma_qp *qp)
2692{
2693	unsigned long flags;
2694	struct ocrdma_srq *srq;
2695	u32 wqe_idx;
2696
2697	srq = get_ocrdma_srq(qp->ibqp.srq);
2698	wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
2699		OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
2700	if (wqe_idx < 1)
2701		BUG();
2702
2703	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2704	spin_lock_irqsave(&srq->q_lock, flags);
2705	ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
2706	spin_unlock_irqrestore(&srq->q_lock, flags);
2707	ocrdma_hwq_inc_tail(&srq->rq);
2708}
2709
2710static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2711				struct ib_wc *ibwc, bool *polled, bool *stop,
2712				int status)
2713{
2714	bool expand;
2715	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2716
2717	if (status < OCRDMA_MAX_CQE_ERR)
2718		atomic_inc(&dev->cqe_err_stats[status]);
2719
2720	/* when hw_rq is empty, but wq is not empty, so continue
2721	 * to keep the cqe to get the cq event again.
2722	 */
2723	if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2724		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2725			*polled = true;
2726			status = OCRDMA_CQE_WR_FLUSH_ERR;
2727			expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2728		} else {
2729			*polled = false;
2730			*stop = true;
2731			expand = false;
2732		}
2733	} else if (is_hw_rq_empty(qp)) {
2734		/* Do nothing */
2735		expand = false;
2736		*polled = false;
2737		*stop = false;
2738	} else {
2739		*polled = true;
2740		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2741	}
2742	return expand;
2743}
2744
2745static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2746				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2747{
2748	ibwc->opcode = IB_WC_RECV;
2749	ibwc->qp = &qp->ibqp;
2750	ibwc->status = IB_WC_SUCCESS;
2751
2752	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2753		ocrdma_update_ud_rcqe(ibwc, cqe);
2754	else
2755		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2756
2757	if (is_cqe_imm(cqe)) {
2758		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2759		ibwc->wc_flags |= IB_WC_WITH_IMM;
2760	} else if (is_cqe_wr_imm(cqe)) {
2761		ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2762		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2763		ibwc->wc_flags |= IB_WC_WITH_IMM;
2764	} else if (is_cqe_invalidated(cqe)) {
2765		ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2766		ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2767	}
2768	if (qp->ibqp.srq) {
2769		ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2770	} else {
2771		ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2772		ocrdma_hwq_inc_tail(&qp->rq);
2773	}
2774}
2775
2776static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2777			     struct ib_wc *ibwc, bool *polled, bool *stop)
2778{
2779	int status;
2780	bool expand = false;
2781
2782	ibwc->wc_flags = 0;
2783	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2784		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2785					OCRDMA_CQE_UD_STATUS_MASK) >>
2786					OCRDMA_CQE_UD_STATUS_SHIFT;
2787	} else {
2788		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2789			     OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2790	}
2791
2792	if (status == OCRDMA_CQE_SUCCESS) {
2793		*polled = true;
2794		ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2795	} else {
2796		expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2797					      status);
2798	}
2799	return expand;
2800}
2801
2802static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2803				   u16 cur_getp)
2804{
2805	if (cq->phase_change) {
2806		if (cur_getp == 0)
2807			cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2808	} else {
2809		/* clear valid bit */
2810		cqe->flags_status_srcqpn = 0;
2811	}
2812}
2813
2814static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2815			    struct ib_wc *ibwc)
2816{
2817	u16 qpn = 0;
2818	int i = 0;
2819	bool expand = false;
2820	int polled_hw_cqes = 0;
2821	struct ocrdma_qp *qp = NULL;
2822	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
2823	struct ocrdma_cqe *cqe;
2824	u16 cur_getp; bool polled = false; bool stop = false;
2825
2826	cur_getp = cq->getp;
2827	while (num_entries) {
2828		cqe = cq->va + cur_getp;
2829		/* check whether valid cqe or not */
2830		if (!is_cqe_valid(cq, cqe))
2831			break;
2832		qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2833		/* ignore discarded cqe */
2834		if (qpn == 0)
2835			goto skip_cqe;
2836		qp = dev->qp_tbl[qpn];
2837		BUG_ON(qp == NULL);
2838
2839		if (is_cqe_for_sq(cqe)) {
2840			expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2841						  &stop);
2842		} else {
2843			expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2844						  &stop);
2845		}
2846		if (expand)
2847			goto expand_cqe;
2848		if (stop)
2849			goto stop_cqe;
2850		/* clear qpn to avoid duplicate processing by discard_cqe() */
2851		cqe->cmn.qpn = 0;
2852skip_cqe:
2853		polled_hw_cqes += 1;
2854		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2855		ocrdma_change_cq_phase(cq, cqe, cur_getp);
2856expand_cqe:
2857		if (polled) {
2858			num_entries -= 1;
2859			i += 1;
2860			ibwc = ibwc + 1;
2861			polled = false;
2862		}
2863	}
2864stop_cqe:
2865	cq->getp = cur_getp;
2866	if (cq->deferred_arm) {
2867		ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
2868				  polled_hw_cqes);
2869		cq->deferred_arm = false;
2870		cq->deferred_sol = false;
2871	} else {
2872		/* We need to pop the CQE. No need to arm */
2873		ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
2874				  polled_hw_cqes);
2875		cq->deferred_sol = false;
2876	}
2877
2878	return i;
2879}
2880
2881/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2882static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2883			      struct ocrdma_qp *qp, struct ib_wc *ibwc)
2884{
2885	int err_cqes = 0;
2886
2887	while (num_entries) {
2888		if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2889			break;
2890		if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2891			ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2892			ocrdma_hwq_inc_tail(&qp->sq);
2893		} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2894			ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2895			ocrdma_hwq_inc_tail(&qp->rq);
2896		} else {
2897			return err_cqes;
2898		}
2899		ibwc->byte_len = 0;
2900		ibwc->status = IB_WC_WR_FLUSH_ERR;
2901		ibwc = ibwc + 1;
2902		err_cqes += 1;
2903		num_entries -= 1;
2904	}
2905	return err_cqes;
2906}
2907
2908int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2909{
2910	int cqes_to_poll = num_entries;
2911	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2912	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2913	int num_os_cqe = 0, err_cqes = 0;
2914	struct ocrdma_qp *qp;
2915	unsigned long flags;
2916
2917	/* poll cqes from adapter CQ */
2918	spin_lock_irqsave(&cq->cq_lock, flags);
2919	num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2920	spin_unlock_irqrestore(&cq->cq_lock, flags);
2921	cqes_to_poll -= num_os_cqe;
2922
2923	if (cqes_to_poll) {
2924		wc = wc + num_os_cqe;
2925		/* adapter returns single error cqe when qp moves to
2926		 * error state. So insert error cqes with wc_status as
2927		 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2928		 * respectively which uses this CQ.
2929		 */
2930		spin_lock_irqsave(&dev->flush_q_lock, flags);
2931		list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2932			if (cqes_to_poll == 0)
2933				break;
2934			err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2935			cqes_to_poll -= err_cqes;
2936			num_os_cqe += err_cqes;
2937			wc = wc + err_cqes;
2938		}
2939		spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2940	}
2941	return num_os_cqe;
2942}
2943
2944int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2945{
2946	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2947	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2948	u16 cq_id;
2949	unsigned long flags;
2950	bool arm_needed = false, sol_needed = false;
2951
2952	cq_id = cq->id;
2953
2954	spin_lock_irqsave(&cq->cq_lock, flags);
2955	if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2956		arm_needed = true;
2957	if (cq_flags & IB_CQ_SOLICITED)
2958		sol_needed = true;
2959
2960	if (cq->first_arm) {
2961		ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
2962		cq->first_arm = false;
2963	}
2964
2965	cq->deferred_arm = true;
2966	cq->deferred_sol = sol_needed;
2967	spin_unlock_irqrestore(&cq->cq_lock, flags);
2968
2969	return 0;
2970}
2971
2972struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
2973{
2974	int status;
2975	struct ocrdma_mr *mr;
2976	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
2977	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
2978
2979	if (max_page_list_len > dev->attr.max_pages_per_frmr)
2980		return ERR_PTR(-EINVAL);
2981
2982	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2983	if (!mr)
2984		return ERR_PTR(-ENOMEM);
2985
2986	status = ocrdma_get_pbl_info(dev, mr, max_page_list_len);
2987	if (status)
2988		goto pbl_err;
2989	mr->hwmr.fr_mr = 1;
2990	mr->hwmr.remote_rd = 0;
2991	mr->hwmr.remote_wr = 0;
2992	mr->hwmr.local_rd = 0;
2993	mr->hwmr.local_wr = 0;
2994	mr->hwmr.mw_bind = 0;
2995	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
2996	if (status)
2997		goto pbl_err;
2998	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
2999	if (status)
3000		goto mbx_err;
3001	mr->ibmr.rkey = mr->hwmr.lkey;
3002	mr->ibmr.lkey = mr->hwmr.lkey;
3003	dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
3004		(unsigned long) mr;
3005	return &mr->ibmr;
3006mbx_err:
3007	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
3008pbl_err:
3009	kfree(mr);
3010	return ERR_PTR(-ENOMEM);
3011}
3012
3013struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
3014							  *ibdev,
3015							  int page_list_len)
3016{
3017	struct ib_fast_reg_page_list *frmr_list;
3018	int size;
3019
3020	size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
3021	frmr_list = kzalloc(size, GFP_KERNEL);
3022	if (!frmr_list)
3023		return ERR_PTR(-ENOMEM);
3024	frmr_list->page_list = (u64 *)(frmr_list + 1);
3025	return frmr_list;
3026}
3027
3028void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
3029{
3030	kfree(page_list);
3031}
3032
3033#define MAX_KERNEL_PBE_SIZE 65536
3034static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
3035				    int buf_cnt, u32 *pbe_size)
3036{
3037	u64 total_size = 0;
3038	u64 buf_size = 0;
3039	int i;
3040	*pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
3041	*pbe_size = roundup_pow_of_two(*pbe_size);
3042
3043	/* find the smallest PBE size that we can have */
3044	for (i = 0; i < buf_cnt; i++) {
3045		/* first addr may not be page aligned, so ignore checking */
3046		if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
3047				 (buf_list[i].size & ~PAGE_MASK))) {
3048			return 0;
3049		}
3050
3051		/* if configured PBE size is greater then the chosen one,
3052		 * reduce the PBE size.
3053		 */
3054		buf_size = roundup(buf_list[i].size, PAGE_SIZE);
3055		/* pbe_size has to be even multiple of 4K 1,2,4,8...*/
3056		buf_size = roundup_pow_of_two(buf_size);
3057		if (*pbe_size > buf_size)
3058			*pbe_size = buf_size;
3059
3060		total_size += buf_size;
3061	}
3062	*pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
3063	    (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
3064
3065	/* num_pbes = total_size / (*pbe_size);  this is implemented below. */
3066
3067	return total_size >> ilog2(*pbe_size);
3068}
3069
3070static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
3071			      u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
3072			      struct ocrdma_hw_mr *hwmr)
3073{
3074	int i;
3075	int idx;
3076	int pbes_per_buf = 0;
3077	u64 buf_addr = 0;
3078	int num_pbes;
3079	struct ocrdma_pbe *pbe;
3080	int total_num_pbes = 0;
3081
3082	if (!hwmr->num_pbes)
3083		return;
3084
3085	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
3086	num_pbes = 0;
3087
3088	/* go through the OS phy regions & fill hw pbe entries into pbls. */
3089	for (i = 0; i < ib_buf_cnt; i++) {
3090		buf_addr = buf_list[i].addr;
3091		pbes_per_buf =
3092		    roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
3093		    pbe_size;
3094		hwmr->len += buf_list[i].size;
3095		/* number of pbes can be more for one OS buf, when
3096		 * buffers are of different sizes.
3097		 * split the ib_buf to one or more pbes.
3098		 */
3099		for (idx = 0; idx < pbes_per_buf; idx++) {
3100			/* we program always page aligned addresses,
3101			 * first unaligned address is taken care by fbo.
3102			 */
3103			if (i == 0) {
3104				/* for non zero fbo, assign the
3105				 * start of the page.
3106				 */
3107				pbe->pa_lo =
3108				    cpu_to_le32((u32) (buf_addr & PAGE_MASK));
3109				pbe->pa_hi =
3110				    cpu_to_le32((u32) upper_32_bits(buf_addr));
3111			} else {
3112				pbe->pa_lo =
3113				    cpu_to_le32((u32) (buf_addr & 0xffffffff));
3114				pbe->pa_hi =
3115				    cpu_to_le32((u32) upper_32_bits(buf_addr));
3116			}
3117			buf_addr += pbe_size;
3118			num_pbes += 1;
3119			total_num_pbes += 1;
3120			pbe++;
3121
3122			if (total_num_pbes == hwmr->num_pbes)
3123				goto mr_tbl_done;
3124			/* if the pbl is full storing the pbes,
3125			 * move to next pbl.
3126			 */
3127			if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
3128				pbl_tbl++;
3129				pbe = (struct ocrdma_pbe *)pbl_tbl->va;
3130				num_pbes = 0;
3131			}
3132		}
3133	}
3134mr_tbl_done:
3135	return;
3136}
3137
3138struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
3139				   struct ib_phys_buf *buf_list,
3140				   int buf_cnt, int acc, u64 *iova_start)
3141{
3142	int status = -ENOMEM;
3143	struct ocrdma_mr *mr;
3144	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
3145	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
3146	u32 num_pbes;
3147	u32 pbe_size = 0;
3148
3149	if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
3150		return ERR_PTR(-EINVAL);
3151
3152	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3153	if (!mr)
3154		return ERR_PTR(status);
3155
3156	num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
3157	if (num_pbes == 0) {
3158		status = -EINVAL;
3159		goto pbl_err;
3160	}
3161	status = ocrdma_get_pbl_info(dev, mr, num_pbes);
3162	if (status)
3163		goto pbl_err;
3164
3165	mr->hwmr.pbe_size = pbe_size;
3166	mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
3167	mr->hwmr.va = *iova_start;
3168	mr->hwmr.local_rd = 1;
3169	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3170	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3171	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3172	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3173	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
3174
3175	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
3176	if (status)
3177		goto pbl_err;
3178	build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
3179			  &mr->hwmr);
3180	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
3181	if (status)
3182		goto mbx_err;
3183
3184	mr->ibmr.lkey = mr->hwmr.lkey;
3185	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
3186		mr->ibmr.rkey = mr->hwmr.lkey;
3187	return &mr->ibmr;
3188
3189mbx_err:
3190	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
3191pbl_err:
3192	kfree(mr);
3193	return ERR_PTR(status);
3194}
3195