1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/log2.h>
35#include <linux/slab.h>
36#include <linux/netdevice.h>
37
38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
42
43#include <linux/mlx4/driver.h>
44#include <linux/mlx4/qp.h>
45
46#include "mlx4_ib.h"
47#include "user.h"
48
49static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
50			     struct mlx4_ib_cq *recv_cq);
51static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
52			       struct mlx4_ib_cq *recv_cq);
53
54enum {
55	MLX4_IB_ACK_REQ_FREQ	= 8,
56};
57
58enum {
59	MLX4_IB_DEFAULT_SCHED_QUEUE	= 0x83,
60	MLX4_IB_DEFAULT_QP0_SCHED_QUEUE	= 0x3f,
61	MLX4_IB_LINK_TYPE_IB		= 0,
62	MLX4_IB_LINK_TYPE_ETH		= 1
63};
64
65enum {
66	/*
67	 * Largest possible UD header: send with GRH and immediate
68	 * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
69	 * tag.  (LRH would only use 8 bytes, so Ethernet is the
70	 * biggest case)
71	 */
72	MLX4_IB_UD_HEADER_SIZE		= 82,
73	MLX4_IB_LSO_HEADER_SPARE	= 128,
74};
75
76enum {
77	MLX4_IB_IBOE_ETHERTYPE		= 0x8915
78};
79
80struct mlx4_ib_sqp {
81	struct mlx4_ib_qp	qp;
82	int			pkey_index;
83	u32			qkey;
84	u32			send_psn;
85	struct ib_ud_header	ud_header;
86	u8			header_buf[MLX4_IB_UD_HEADER_SIZE];
87};
88
89enum {
90	MLX4_IB_MIN_SQ_STRIDE	= 6,
91	MLX4_IB_CACHE_LINE_SIZE	= 64,
92};
93
94enum {
95	MLX4_RAW_QP_MTU		= 7,
96	MLX4_RAW_QP_MSGMAX	= 31,
97};
98
99#ifndef ETH_ALEN
100#define ETH_ALEN        6
101#endif
102
103static const __be32 mlx4_ib_opcode[] = {
104	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND),
105	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO),
106	[IB_WR_SEND_WITH_IMM]			= cpu_to_be32(MLX4_OPCODE_SEND_IMM),
107	[IB_WR_RDMA_WRITE]			= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
108	[IB_WR_RDMA_WRITE_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
109	[IB_WR_RDMA_READ]			= cpu_to_be32(MLX4_OPCODE_RDMA_READ),
110	[IB_WR_ATOMIC_CMP_AND_SWP]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
111	[IB_WR_ATOMIC_FETCH_AND_ADD]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
112	[IB_WR_SEND_WITH_INV]			= cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
113	[IB_WR_LOCAL_INV]			= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
114	[IB_WR_FAST_REG_MR]			= cpu_to_be32(MLX4_OPCODE_FMR),
115	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
116	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
117	[IB_WR_BIND_MW]				= cpu_to_be32(MLX4_OPCODE_BIND_MW),
118};
119
120static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
121{
122	return container_of(mqp, struct mlx4_ib_sqp, qp);
123}
124
125static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
126{
127	if (!mlx4_is_master(dev->dev))
128		return 0;
129
130	return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
131	       qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
132		8 * MLX4_MFUNC_MAX;
133}
134
135static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
136{
137	int proxy_sqp = 0;
138	int real_sqp = 0;
139	int i;
140	/* PPF or Native -- real SQP */
141	real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
142		    qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
143		    qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
144	if (real_sqp)
145		return 1;
146	/* VF or PF -- proxy SQP */
147	if (mlx4_is_mfunc(dev->dev)) {
148		for (i = 0; i < dev->dev->caps.num_ports; i++) {
149			if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
150			    qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
151				proxy_sqp = 1;
152				break;
153			}
154		}
155	}
156	return proxy_sqp;
157}
158
159/* used for INIT/CLOSE port logic */
160static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
161{
162	int proxy_qp0 = 0;
163	int real_qp0 = 0;
164	int i;
165	/* PPF or Native -- real QP0 */
166	real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
167		    qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
168		    qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
169	if (real_qp0)
170		return 1;
171	/* VF or PF -- proxy QP0 */
172	if (mlx4_is_mfunc(dev->dev)) {
173		for (i = 0; i < dev->dev->caps.num_ports; i++) {
174			if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
175				proxy_qp0 = 1;
176				break;
177			}
178		}
179	}
180	return proxy_qp0;
181}
182
183static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
184{
185	return mlx4_buf_offset(&qp->buf, offset);
186}
187
188static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
189{
190	return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
191}
192
193static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
194{
195	return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
196}
197
198/*
199 * Stamp a SQ WQE so that it is invalid if prefetched by marking the
200 * first four bytes of every 64 byte chunk with
201 *     0x7FFFFFF | (invalid_ownership_value << 31).
202 *
203 * When the max work request size is less than or equal to the WQE
204 * basic block size, as an optimization, we can stamp all WQEs with
205 * 0xffffffff, and skip the very first chunk of each WQE.
206 */
207static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
208{
209	__be32 *wqe;
210	int i;
211	int s;
212	int ind;
213	void *buf;
214	__be32 stamp;
215	struct mlx4_wqe_ctrl_seg *ctrl;
216
217	if (qp->sq_max_wqes_per_wr > 1) {
218		s = roundup(size, 1U << qp->sq.wqe_shift);
219		for (i = 0; i < s; i += 64) {
220			ind = (i >> qp->sq.wqe_shift) + n;
221			stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
222						       cpu_to_be32(0xffffffff);
223			buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
224			wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
225			*wqe = stamp;
226		}
227	} else {
228		ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
229		s = (ctrl->fence_size & 0x3f) << 4;
230		for (i = 64; i < s; i += 64) {
231			wqe = buf + i;
232			*wqe = cpu_to_be32(0xffffffff);
233		}
234	}
235}
236
237static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
238{
239	struct mlx4_wqe_ctrl_seg *ctrl;
240	struct mlx4_wqe_inline_seg *inl;
241	void *wqe;
242	int s;
243
244	ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
245	s = sizeof(struct mlx4_wqe_ctrl_seg);
246
247	if (qp->ibqp.qp_type == IB_QPT_UD) {
248		struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
249		struct mlx4_av *av = (struct mlx4_av *)dgram->av;
250		memset(dgram, 0, sizeof *dgram);
251		av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
252		s += sizeof(struct mlx4_wqe_datagram_seg);
253	}
254
255	/* Pad the remainder of the WQE with an inline data segment. */
256	if (size > s) {
257		inl = wqe + s;
258		inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
259	}
260	ctrl->srcrb_flags = 0;
261	ctrl->fence_size = size / 16;
262	/*
263	 * Make sure descriptor is fully written before setting ownership bit
264	 * (because HW can start executing as soon as we do).
265	 */
266	wmb();
267
268	ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
269		(n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
270
271	stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
272}
273
274/* Post NOP WQE to prevent wrap-around in the middle of WR */
275static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
276{
277	unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
278	if (unlikely(s < qp->sq_max_wqes_per_wr)) {
279		post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
280		ind += s;
281	}
282	return ind;
283}
284
285static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
286{
287	struct ib_event event;
288	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
289
290	if (type == MLX4_EVENT_TYPE_PATH_MIG)
291		to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
292
293	if (ibqp->event_handler) {
294		event.device     = ibqp->device;
295		event.element.qp = ibqp;
296		switch (type) {
297		case MLX4_EVENT_TYPE_PATH_MIG:
298			event.event = IB_EVENT_PATH_MIG;
299			break;
300		case MLX4_EVENT_TYPE_COMM_EST:
301			event.event = IB_EVENT_COMM_EST;
302			break;
303		case MLX4_EVENT_TYPE_SQ_DRAINED:
304			event.event = IB_EVENT_SQ_DRAINED;
305			break;
306		case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
307			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
308			break;
309		case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
310			event.event = IB_EVENT_QP_FATAL;
311			break;
312		case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
313			event.event = IB_EVENT_PATH_MIG_ERR;
314			break;
315		case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
316			event.event = IB_EVENT_QP_REQ_ERR;
317			break;
318		case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
319			event.event = IB_EVENT_QP_ACCESS_ERR;
320			break;
321		default:
322			pr_warn("Unexpected event type %d "
323			       "on QP %06x\n", type, qp->qpn);
324			return;
325		}
326
327		ibqp->event_handler(&event, ibqp->qp_context);
328	}
329}
330
331static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
332{
333	/*
334	 * UD WQEs must have a datagram segment.
335	 * RC and UC WQEs might have a remote address segment.
336	 * MLX WQEs need two extra inline data segments (for the UD
337	 * header and space for the ICRC).
338	 */
339	switch (type) {
340	case MLX4_IB_QPT_UD:
341		return sizeof (struct mlx4_wqe_ctrl_seg) +
342			sizeof (struct mlx4_wqe_datagram_seg) +
343			((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
344	case MLX4_IB_QPT_PROXY_SMI_OWNER:
345	case MLX4_IB_QPT_PROXY_SMI:
346	case MLX4_IB_QPT_PROXY_GSI:
347		return sizeof (struct mlx4_wqe_ctrl_seg) +
348			sizeof (struct mlx4_wqe_datagram_seg) + 64;
349	case MLX4_IB_QPT_TUN_SMI_OWNER:
350	case MLX4_IB_QPT_TUN_GSI:
351		return sizeof (struct mlx4_wqe_ctrl_seg) +
352			sizeof (struct mlx4_wqe_datagram_seg);
353
354	case MLX4_IB_QPT_UC:
355		return sizeof (struct mlx4_wqe_ctrl_seg) +
356			sizeof (struct mlx4_wqe_raddr_seg);
357	case MLX4_IB_QPT_RC:
358		return sizeof (struct mlx4_wqe_ctrl_seg) +
359			sizeof (struct mlx4_wqe_atomic_seg) +
360			sizeof (struct mlx4_wqe_raddr_seg);
361	case MLX4_IB_QPT_SMI:
362	case MLX4_IB_QPT_GSI:
363		return sizeof (struct mlx4_wqe_ctrl_seg) +
364			ALIGN(MLX4_IB_UD_HEADER_SIZE +
365			      DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
366					   MLX4_INLINE_ALIGN) *
367			      sizeof (struct mlx4_wqe_inline_seg),
368			      sizeof (struct mlx4_wqe_data_seg)) +
369			ALIGN(4 +
370			      sizeof (struct mlx4_wqe_inline_seg),
371			      sizeof (struct mlx4_wqe_data_seg));
372	default:
373		return sizeof (struct mlx4_wqe_ctrl_seg);
374	}
375}
376
377static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
378		       int is_user, int has_rq, struct mlx4_ib_qp *qp)
379{
380	/* Sanity check RQ size before proceeding */
381	if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
382	    cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
383		return -EINVAL;
384
385	if (!has_rq) {
386		if (cap->max_recv_wr)
387			return -EINVAL;
388
389		qp->rq.wqe_cnt = qp->rq.max_gs = 0;
390	} else {
391		/* HW requires >= 1 RQ entry with >= 1 gather entry */
392		if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
393			return -EINVAL;
394
395		qp->rq.wqe_cnt	 = roundup_pow_of_two(max(1U, cap->max_recv_wr));
396		qp->rq.max_gs	 = roundup_pow_of_two(max(1U, cap->max_recv_sge));
397		qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
398	}
399
400	/* leave userspace return values as they were, so as not to break ABI */
401	if (is_user) {
402		cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;
403		cap->max_recv_sge = qp->rq.max_gs;
404	} else {
405		cap->max_recv_wr  = qp->rq.max_post =
406			min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
407		cap->max_recv_sge = min(qp->rq.max_gs,
408					min(dev->dev->caps.max_sq_sg,
409					    dev->dev->caps.max_rq_sg));
410	}
411
412	return 0;
413}
414
415static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
416			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
417{
418	int s;
419
420	/* Sanity check SQ size before proceeding */
421	if (cap->max_send_wr  > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
422	    cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
423	    cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
424	    sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
425		return -EINVAL;
426
427	/*
428	 * For MLX transport we need 2 extra S/G entries:
429	 * one for the header and one for the checksum at the end
430	 */
431	if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
432	     type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
433	    cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
434		return -EINVAL;
435
436	s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
437		cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
438		send_wqe_overhead(type, qp->flags);
439
440	if (s > dev->dev->caps.max_sq_desc_sz)
441		return -EINVAL;
442
443	/*
444	 * Hermon supports shrinking WQEs, such that a single work
445	 * request can include multiple units of 1 << wqe_shift.  This
446	 * way, work requests can differ in size, and do not have to
447	 * be a power of 2 in size, saving memory and speeding up send
448	 * WR posting.  Unfortunately, if we do this then the
449	 * wqe_index field in CQEs can't be used to look up the WR ID
450	 * anymore, so we do this only if selective signaling is off.
451	 *
452	 * Further, on 32-bit platforms, we can't use vmap() to make
453	 * the QP buffer virtually contiguous.  Thus we have to use
454	 * constant-sized WRs to make sure a WR is always fully within
455	 * a single page-sized chunk.
456	 *
457	 * Finally, we use NOP work requests to pad the end of the
458	 * work queue, to avoid wrap-around in the middle of WR.  We
459	 * set NEC bit to avoid getting completions with error for
460	 * these NOP WRs, but since NEC is only supported starting
461	 * with firmware 2.2.232, we use constant-sized WRs for older
462	 * firmware.
463	 *
464	 * And, since MLX QPs only support SEND, we use constant-sized
465	 * WRs in this case.
466	 *
467	 * We look for the smallest value of wqe_shift such that the
468	 * resulting number of wqes does not exceed device
469	 * capabilities.
470	 *
471	 * We set WQE size to at least 64 bytes, this way stamping
472	 * invalidates each WQE.
473	 */
474	if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
475	    qp->sq_signal_bits && BITS_PER_LONG == 64 &&
476	    type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
477	    !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
478		      MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
479		qp->sq.wqe_shift = ilog2(64);
480	else
481		qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
482
483	for (;;) {
484		qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
485
486		/*
487		 * We need to leave 2 KB + 1 WR of headroom in the SQ to
488		 * allow HW to prefetch.
489		 */
490		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
491		qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
492						    qp->sq_max_wqes_per_wr +
493						    qp->sq_spare_wqes);
494
495		if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
496			break;
497
498		if (qp->sq_max_wqes_per_wr <= 1)
499			return -EINVAL;
500
501		++qp->sq.wqe_shift;
502	}
503
504	qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
505			     (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
506			 send_wqe_overhead(type, qp->flags)) /
507		sizeof (struct mlx4_wqe_data_seg);
508
509	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
510		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
511	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
512		qp->rq.offset = 0;
513		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
514	} else {
515		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
516		qp->sq.offset = 0;
517	}
518
519	cap->max_send_wr  = qp->sq.max_post =
520		(qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
521	cap->max_send_sge = min(qp->sq.max_gs,
522				min(dev->dev->caps.max_sq_sg,
523				    dev->dev->caps.max_rq_sg));
524	/* We don't support inline sends for kernel QPs (yet) */
525	cap->max_inline_data = 0;
526
527	return 0;
528}
529
530static int set_user_sq_size(struct mlx4_ib_dev *dev,
531			    struct mlx4_ib_qp *qp,
532			    struct mlx4_ib_create_qp *ucmd)
533{
534	/* Sanity check SQ size before proceeding */
535	if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes	 ||
536	    ucmd->log_sq_stride >
537		ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
538	    ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
539		return -EINVAL;
540
541	qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;
542	qp->sq.wqe_shift = ucmd->log_sq_stride;
543
544	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
545		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
546
547	return 0;
548}
549
550static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
551{
552	int i;
553
554	qp->sqp_proxy_rcv =
555		kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
556			GFP_KERNEL);
557	if (!qp->sqp_proxy_rcv)
558		return -ENOMEM;
559	for (i = 0; i < qp->rq.wqe_cnt; i++) {
560		qp->sqp_proxy_rcv[i].addr =
561			kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
562				GFP_KERNEL);
563		if (!qp->sqp_proxy_rcv[i].addr)
564			goto err;
565		qp->sqp_proxy_rcv[i].map =
566			ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
567					  sizeof (struct mlx4_ib_proxy_sqp_hdr),
568					  DMA_FROM_DEVICE);
569		if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
570			kfree(qp->sqp_proxy_rcv[i].addr);
571			goto err;
572		}
573	}
574	return 0;
575
576err:
577	while (i > 0) {
578		--i;
579		ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
580				    sizeof (struct mlx4_ib_proxy_sqp_hdr),
581				    DMA_FROM_DEVICE);
582		kfree(qp->sqp_proxy_rcv[i].addr);
583	}
584	kfree(qp->sqp_proxy_rcv);
585	qp->sqp_proxy_rcv = NULL;
586	return -ENOMEM;
587}
588
589static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
590{
591	int i;
592
593	for (i = 0; i < qp->rq.wqe_cnt; i++) {
594		ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
595				    sizeof (struct mlx4_ib_proxy_sqp_hdr),
596				    DMA_FROM_DEVICE);
597		kfree(qp->sqp_proxy_rcv[i].addr);
598	}
599	kfree(qp->sqp_proxy_rcv);
600}
601
602static int qp_has_rq(struct ib_qp_init_attr *attr)
603{
604	if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
605		return 0;
606
607	return !attr->srq;
608}
609
610static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
611{
612	int i;
613	for (i = 0; i < dev->caps.num_ports; i++) {
614		if (qpn == dev->caps.qp0_proxy[i])
615			return !!dev->caps.qp0_qkey[i];
616	}
617	return 0;
618}
619
620static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
621			    struct ib_qp_init_attr *init_attr,
622			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
623			    gfp_t gfp)
624{
625	int qpn;
626	int err;
627	struct mlx4_ib_sqp *sqp;
628	struct mlx4_ib_qp *qp;
629	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
630	struct mlx4_ib_cq *mcq;
631	unsigned long flags;
632
633	/* When tunneling special qps, we use a plain UD qp */
634	if (sqpn) {
635		if (mlx4_is_mfunc(dev->dev) &&
636		    (!mlx4_is_master(dev->dev) ||
637		     !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
638			if (init_attr->qp_type == IB_QPT_GSI)
639				qp_type = MLX4_IB_QPT_PROXY_GSI;
640			else {
641				if (mlx4_is_master(dev->dev) ||
642				    qp0_enabled_vf(dev->dev, sqpn))
643					qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
644				else
645					qp_type = MLX4_IB_QPT_PROXY_SMI;
646			}
647		}
648		qpn = sqpn;
649		/* add extra sg entry for tunneling */
650		init_attr->cap.max_recv_sge++;
651	} else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
652		struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
653			container_of(init_attr,
654				     struct mlx4_ib_qp_tunnel_init_attr, init_attr);
655		if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
656		     tnl_init->proxy_qp_type != IB_QPT_GSI)   ||
657		    !mlx4_is_master(dev->dev))
658			return -EINVAL;
659		if (tnl_init->proxy_qp_type == IB_QPT_GSI)
660			qp_type = MLX4_IB_QPT_TUN_GSI;
661		else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
662			 mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
663					     tnl_init->port))
664			qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
665		else
666			qp_type = MLX4_IB_QPT_TUN_SMI;
667		/* we are definitely in the PPF here, since we are creating
668		 * tunnel QPs. base_tunnel_sqpn is therefore valid. */
669		qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
670			+ tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
671		sqpn = qpn;
672	}
673
674	if (!*caller_qp) {
675		if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
676		    (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
677				MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
678			sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
679			if (!sqp)
680				return -ENOMEM;
681			qp = &sqp->qp;
682			qp->pri.vid = 0xFFFF;
683			qp->alt.vid = 0xFFFF;
684		} else {
685			qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
686			if (!qp)
687				return -ENOMEM;
688			qp->pri.vid = 0xFFFF;
689			qp->alt.vid = 0xFFFF;
690		}
691	} else
692		qp = *caller_qp;
693
694	qp->mlx4_ib_qp_type = qp_type;
695
696	mutex_init(&qp->mutex);
697	spin_lock_init(&qp->sq.lock);
698	spin_lock_init(&qp->rq.lock);
699	INIT_LIST_HEAD(&qp->gid_list);
700	INIT_LIST_HEAD(&qp->steering_rules);
701
702	qp->state	 = IB_QPS_RESET;
703	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
704		qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
705
706	err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
707	if (err)
708		goto err;
709
710	if (pd->uobject) {
711		struct mlx4_ib_create_qp ucmd;
712
713		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
714			err = -EFAULT;
715			goto err;
716		}
717
718		qp->sq_no_prefetch = ucmd.sq_no_prefetch;
719
720		err = set_user_sq_size(dev, qp, &ucmd);
721		if (err)
722			goto err;
723
724		qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
725				       qp->buf_size, 0, 0);
726		if (IS_ERR(qp->umem)) {
727			err = PTR_ERR(qp->umem);
728			goto err;
729		}
730
731		err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
732				    ilog2(qp->umem->page_size), &qp->mtt);
733		if (err)
734			goto err_buf;
735
736		err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
737		if (err)
738			goto err_mtt;
739
740		if (qp_has_rq(init_attr)) {
741			err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
742						  ucmd.db_addr, &qp->db);
743			if (err)
744				goto err_mtt;
745		}
746	} else {
747		qp->sq_no_prefetch = 0;
748
749		if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
750			qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
751
752		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
753			qp->flags |= MLX4_IB_QP_LSO;
754
755		if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
756			if (dev->steering_support ==
757			    MLX4_STEERING_MODE_DEVICE_MANAGED)
758				qp->flags |= MLX4_IB_QP_NETIF;
759			else
760				goto err;
761		}
762
763		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
764		if (err)
765			goto err;
766
767		if (qp_has_rq(init_attr)) {
768			err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
769			if (err)
770				goto err;
771
772			*qp->db.db = 0;
773		}
774
775		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
776			err = -ENOMEM;
777			goto err_db;
778		}
779
780		err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
781				    &qp->mtt);
782		if (err)
783			goto err_buf;
784
785		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
786		if (err)
787			goto err_mtt;
788
789		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
790		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
791		if (!qp->sq.wrid || !qp->rq.wrid) {
792			err = -ENOMEM;
793			goto err_wrid;
794		}
795	}
796
797	if (sqpn) {
798		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
799		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
800			if (alloc_proxy_bufs(pd->device, qp)) {
801				err = -ENOMEM;
802				goto err_wrid;
803			}
804		}
805	} else {
806		/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
807		 * otherwise, the WQE BlueFlame setup flow wrongly causes
808		 * VLAN insertion. */
809		if (init_attr->qp_type == IB_QPT_RAW_PACKET)
810			err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn,
811						    (init_attr->cap.max_send_wr ?
812						     MLX4_RESERVE_ETH_BF_QP : 0) |
813						    (init_attr->cap.max_recv_wr ?
814						     MLX4_RESERVE_A0_QP : 0));
815		else
816			if (qp->flags & MLX4_IB_QP_NETIF)
817				err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
818			else
819				err = mlx4_qp_reserve_range(dev->dev, 1, 1,
820							    &qpn, 0);
821		if (err)
822			goto err_proxy;
823	}
824
825	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
826	if (err)
827		goto err_qpn;
828
829	if (init_attr->qp_type == IB_QPT_XRC_TGT)
830		qp->mqp.qpn |= (1 << 23);
831
832	/*
833	 * Hardware wants QPN written in big-endian order (after
834	 * shifting) for send doorbell.  Precompute this value to save
835	 * a little bit when posting sends.
836	 */
837	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
838
839	qp->mqp.event = mlx4_ib_qp_event;
840	if (!*caller_qp)
841		*caller_qp = qp;
842
843	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
844	mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
845			 to_mcq(init_attr->recv_cq));
846	/* Maintain device to QPs access, needed for further handling
847	 * via reset flow
848	 */
849	list_add_tail(&qp->qps_list, &dev->qp_list);
850	/* Maintain CQ to QPs access, needed for further handling
851	 * via reset flow
852	 */
853	mcq = to_mcq(init_attr->send_cq);
854	list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
855	mcq = to_mcq(init_attr->recv_cq);
856	list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
857	mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
858			   to_mcq(init_attr->recv_cq));
859	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
860	return 0;
861
862err_qpn:
863	if (!sqpn) {
864		if (qp->flags & MLX4_IB_QP_NETIF)
865			mlx4_ib_steer_qp_free(dev, qpn, 1);
866		else
867			mlx4_qp_release_range(dev->dev, qpn, 1);
868	}
869err_proxy:
870	if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
871		free_proxy_bufs(pd->device, qp);
872err_wrid:
873	if (pd->uobject) {
874		if (qp_has_rq(init_attr))
875			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
876	} else {
877		kfree(qp->sq.wrid);
878		kfree(qp->rq.wrid);
879	}
880
881err_mtt:
882	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
883
884err_buf:
885	if (pd->uobject)
886		ib_umem_release(qp->umem);
887	else
888		mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
889
890err_db:
891	if (!pd->uobject && qp_has_rq(init_attr))
892		mlx4_db_free(dev->dev, &qp->db);
893
894err:
895	if (!*caller_qp)
896		kfree(qp);
897	return err;
898}
899
900static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
901{
902	switch (state) {
903	case IB_QPS_RESET:	return MLX4_QP_STATE_RST;
904	case IB_QPS_INIT:	return MLX4_QP_STATE_INIT;
905	case IB_QPS_RTR:	return MLX4_QP_STATE_RTR;
906	case IB_QPS_RTS:	return MLX4_QP_STATE_RTS;
907	case IB_QPS_SQD:	return MLX4_QP_STATE_SQD;
908	case IB_QPS_SQE:	return MLX4_QP_STATE_SQER;
909	case IB_QPS_ERR:	return MLX4_QP_STATE_ERR;
910	default:		return -1;
911	}
912}
913
914static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
915	__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
916{
917	if (send_cq == recv_cq) {
918		spin_lock(&send_cq->lock);
919		__acquire(&recv_cq->lock);
920	} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
921		spin_lock(&send_cq->lock);
922		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
923	} else {
924		spin_lock(&recv_cq->lock);
925		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
926	}
927}
928
929static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
930	__releases(&send_cq->lock) __releases(&recv_cq->lock)
931{
932	if (send_cq == recv_cq) {
933		__release(&recv_cq->lock);
934		spin_unlock(&send_cq->lock);
935	} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
936		spin_unlock(&recv_cq->lock);
937		spin_unlock(&send_cq->lock);
938	} else {
939		spin_unlock(&send_cq->lock);
940		spin_unlock(&recv_cq->lock);
941	}
942}
943
944static void del_gid_entries(struct mlx4_ib_qp *qp)
945{
946	struct mlx4_ib_gid_entry *ge, *tmp;
947
948	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
949		list_del(&ge->list);
950		kfree(ge);
951	}
952}
953
954static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
955{
956	if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
957		return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
958	else
959		return to_mpd(qp->ibqp.pd);
960}
961
962static void get_cqs(struct mlx4_ib_qp *qp,
963		    struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
964{
965	switch (qp->ibqp.qp_type) {
966	case IB_QPT_XRC_TGT:
967		*send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
968		*recv_cq = *send_cq;
969		break;
970	case IB_QPT_XRC_INI:
971		*send_cq = to_mcq(qp->ibqp.send_cq);
972		*recv_cq = *send_cq;
973		break;
974	default:
975		*send_cq = to_mcq(qp->ibqp.send_cq);
976		*recv_cq = to_mcq(qp->ibqp.recv_cq);
977		break;
978	}
979}
980
981static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
982			      int is_user)
983{
984	struct mlx4_ib_cq *send_cq, *recv_cq;
985	unsigned long flags;
986
987	if (qp->state != IB_QPS_RESET) {
988		if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
989				   MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
990			pr_warn("modify QP %06x to RESET failed.\n",
991			       qp->mqp.qpn);
992		if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
993			mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
994			qp->pri.smac = 0;
995			qp->pri.smac_port = 0;
996		}
997		if (qp->alt.smac) {
998			mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
999			qp->alt.smac = 0;
1000		}
1001		if (qp->pri.vid < 0x1000) {
1002			mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
1003			qp->pri.vid = 0xFFFF;
1004			qp->pri.candidate_vid = 0xFFFF;
1005			qp->pri.update_vid = 0;
1006		}
1007		if (qp->alt.vid < 0x1000) {
1008			mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
1009			qp->alt.vid = 0xFFFF;
1010			qp->alt.candidate_vid = 0xFFFF;
1011			qp->alt.update_vid = 0;
1012		}
1013	}
1014
1015	get_cqs(qp, &send_cq, &recv_cq);
1016
1017	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1018	mlx4_ib_lock_cqs(send_cq, recv_cq);
1019
1020	/* del from lists under both locks above to protect reset flow paths */
1021	list_del(&qp->qps_list);
1022	list_del(&qp->cq_send_list);
1023	list_del(&qp->cq_recv_list);
1024	if (!is_user) {
1025		__mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
1026				 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
1027		if (send_cq != recv_cq)
1028			__mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1029	}
1030
1031	mlx4_qp_remove(dev->dev, &qp->mqp);
1032
1033	mlx4_ib_unlock_cqs(send_cq, recv_cq);
1034	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1035
1036	mlx4_qp_free(dev->dev, &qp->mqp);
1037
1038	if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
1039		if (qp->flags & MLX4_IB_QP_NETIF)
1040			mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
1041		else
1042			mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
1043	}
1044
1045	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
1046
1047	if (is_user) {
1048		if (qp->rq.wqe_cnt)
1049			mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
1050					      &qp->db);
1051		ib_umem_release(qp->umem);
1052	} else {
1053		kfree(qp->sq.wrid);
1054		kfree(qp->rq.wrid);
1055		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
1056		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
1057			free_proxy_bufs(&dev->ib_dev, qp);
1058		mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
1059		if (qp->rq.wqe_cnt)
1060			mlx4_db_free(dev->dev, &qp->db);
1061	}
1062
1063	del_gid_entries(qp);
1064}
1065
1066static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
1067{
1068	/* Native or PPF */
1069	if (!mlx4_is_mfunc(dev->dev) ||
1070	    (mlx4_is_master(dev->dev) &&
1071	     attr->create_flags & MLX4_IB_SRIOV_SQP)) {
1072		return  dev->dev->phys_caps.base_sqpn +
1073			(attr->qp_type == IB_QPT_SMI ? 0 : 2) +
1074			attr->port_num - 1;
1075	}
1076	/* PF or VF -- creating proxies */
1077	if (attr->qp_type == IB_QPT_SMI)
1078		return dev->dev->caps.qp0_proxy[attr->port_num - 1];
1079	else
1080		return dev->dev->caps.qp1_proxy[attr->port_num - 1];
1081}
1082
1083struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1084				struct ib_qp_init_attr *init_attr,
1085				struct ib_udata *udata)
1086{
1087	struct mlx4_ib_qp *qp = NULL;
1088	int err;
1089	u16 xrcdn = 0;
1090	gfp_t gfp;
1091
1092	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
1093		GFP_NOIO : GFP_KERNEL;
1094	/*
1095	 * We only support LSO, vendor flag1, and multicast loopback blocking,
1096	 * and only for kernel UD QPs.
1097	 */
1098	if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
1099					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
1100					MLX4_IB_SRIOV_TUNNEL_QP |
1101					MLX4_IB_SRIOV_SQP |
1102					MLX4_IB_QP_NETIF |
1103					MLX4_IB_QP_CREATE_USE_GFP_NOIO))
1104		return ERR_PTR(-EINVAL);
1105
1106	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
1107		if (init_attr->qp_type != IB_QPT_UD)
1108			return ERR_PTR(-EINVAL);
1109	}
1110
1111	if (init_attr->create_flags &&
1112	    (udata ||
1113	     ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
1114	      init_attr->qp_type != IB_QPT_UD) ||
1115	     ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
1116	      init_attr->qp_type > IB_QPT_GSI)))
1117		return ERR_PTR(-EINVAL);
1118
1119	switch (init_attr->qp_type) {
1120	case IB_QPT_XRC_TGT:
1121		pd = to_mxrcd(init_attr->xrcd)->pd;
1122		xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1123		init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
1124		/* fall through */
1125	case IB_QPT_XRC_INI:
1126		if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1127			return ERR_PTR(-ENOSYS);
1128		init_attr->recv_cq = init_attr->send_cq;
1129		/* fall through */
1130	case IB_QPT_RC:
1131	case IB_QPT_UC:
1132	case IB_QPT_RAW_PACKET:
1133		qp = kzalloc(sizeof *qp, gfp);
1134		if (!qp)
1135			return ERR_PTR(-ENOMEM);
1136		qp->pri.vid = 0xFFFF;
1137		qp->alt.vid = 0xFFFF;
1138		/* fall through */
1139	case IB_QPT_UD:
1140	{
1141		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
1142				       udata, 0, &qp, gfp);
1143		if (err)
1144			return ERR_PTR(err);
1145
1146		qp->ibqp.qp_num = qp->mqp.qpn;
1147		qp->xrcdn = xrcdn;
1148
1149		break;
1150	}
1151	case IB_QPT_SMI:
1152	case IB_QPT_GSI:
1153	{
1154		/* Userspace is not allowed to create special QPs: */
1155		if (udata)
1156			return ERR_PTR(-EINVAL);
1157
1158		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
1159				       get_sqp_num(to_mdev(pd->device), init_attr),
1160				       &qp, gfp);
1161		if (err)
1162			return ERR_PTR(err);
1163
1164		qp->port	= init_attr->port_num;
1165		qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
1166
1167		break;
1168	}
1169	default:
1170		/* Don't support raw QPs */
1171		return ERR_PTR(-EINVAL);
1172	}
1173
1174	return &qp->ibqp;
1175}
1176
1177int mlx4_ib_destroy_qp(struct ib_qp *qp)
1178{
1179	struct mlx4_ib_dev *dev = to_mdev(qp->device);
1180	struct mlx4_ib_qp *mqp = to_mqp(qp);
1181	struct mlx4_ib_pd *pd;
1182
1183	if (is_qp0(dev, mqp))
1184		mlx4_CLOSE_PORT(dev->dev, mqp->port);
1185
1186	if (dev->qp1_proxy[mqp->port - 1] == mqp) {
1187		mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
1188		dev->qp1_proxy[mqp->port - 1] = NULL;
1189		mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
1190	}
1191
1192	pd = get_pd(mqp);
1193	destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
1194
1195	if (is_sqp(dev, mqp))
1196		kfree(to_msqp(mqp));
1197	else
1198		kfree(mqp);
1199
1200	return 0;
1201}
1202
1203static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
1204{
1205	switch (type) {
1206	case MLX4_IB_QPT_RC:		return MLX4_QP_ST_RC;
1207	case MLX4_IB_QPT_UC:		return MLX4_QP_ST_UC;
1208	case MLX4_IB_QPT_UD:		return MLX4_QP_ST_UD;
1209	case MLX4_IB_QPT_XRC_INI:
1210	case MLX4_IB_QPT_XRC_TGT:	return MLX4_QP_ST_XRC;
1211	case MLX4_IB_QPT_SMI:
1212	case MLX4_IB_QPT_GSI:
1213	case MLX4_IB_QPT_RAW_PACKET:	return MLX4_QP_ST_MLX;
1214
1215	case MLX4_IB_QPT_PROXY_SMI_OWNER:
1216	case MLX4_IB_QPT_TUN_SMI_OWNER:	return (mlx4_is_mfunc(dev->dev) ?
1217						MLX4_QP_ST_MLX : -1);
1218	case MLX4_IB_QPT_PROXY_SMI:
1219	case MLX4_IB_QPT_TUN_SMI:
1220	case MLX4_IB_QPT_PROXY_GSI:
1221	case MLX4_IB_QPT_TUN_GSI:	return (mlx4_is_mfunc(dev->dev) ?
1222						MLX4_QP_ST_UD : -1);
1223	default:			return -1;
1224	}
1225}
1226
1227static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
1228				   int attr_mask)
1229{
1230	u8 dest_rd_atomic;
1231	u32 access_flags;
1232	u32 hw_access_flags = 0;
1233
1234	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1235		dest_rd_atomic = attr->max_dest_rd_atomic;
1236	else
1237		dest_rd_atomic = qp->resp_depth;
1238
1239	if (attr_mask & IB_QP_ACCESS_FLAGS)
1240		access_flags = attr->qp_access_flags;
1241	else
1242		access_flags = qp->atomic_rd_en;
1243
1244	if (!dest_rd_atomic)
1245		access_flags &= IB_ACCESS_REMOTE_WRITE;
1246
1247	if (access_flags & IB_ACCESS_REMOTE_READ)
1248		hw_access_flags |= MLX4_QP_BIT_RRE;
1249	if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1250		hw_access_flags |= MLX4_QP_BIT_RAE;
1251	if (access_flags & IB_ACCESS_REMOTE_WRITE)
1252		hw_access_flags |= MLX4_QP_BIT_RWE;
1253
1254	return cpu_to_be32(hw_access_flags);
1255}
1256
1257static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
1258			    int attr_mask)
1259{
1260	if (attr_mask & IB_QP_PKEY_INDEX)
1261		sqp->pkey_index = attr->pkey_index;
1262	if (attr_mask & IB_QP_QKEY)
1263		sqp->qkey = attr->qkey;
1264	if (attr_mask & IB_QP_SQ_PSN)
1265		sqp->send_psn = attr->sq_psn;
1266}
1267
1268static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
1269{
1270	path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
1271}
1272
1273static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
1274			  u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
1275			  struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
1276{
1277	int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
1278		IB_LINK_LAYER_ETHERNET;
1279	int vidx;
1280	int smac_index;
1281	int err;
1282
1283
1284	path->grh_mylmc     = ah->src_path_bits & 0x7f;
1285	path->rlid	    = cpu_to_be16(ah->dlid);
1286	if (ah->static_rate) {
1287		path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
1288		while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
1289		       !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
1290			--path->static_rate;
1291	} else
1292		path->static_rate = 0;
1293
1294	if (ah->ah_flags & IB_AH_GRH) {
1295		if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
1296			pr_err("sgid_index (%u) too large. max is %d\n",
1297			       ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
1298			return -1;
1299		}
1300
1301		path->grh_mylmc |= 1 << 7;
1302		path->mgid_index = ah->grh.sgid_index;
1303		path->hop_limit  = ah->grh.hop_limit;
1304		path->tclass_flowlabel =
1305			cpu_to_be32((ah->grh.traffic_class << 20) |
1306				    (ah->grh.flow_label));
1307		memcpy(path->rgid, ah->grh.dgid.raw, 16);
1308	}
1309
1310	if (is_eth) {
1311		if (!(ah->ah_flags & IB_AH_GRH))
1312			return -1;
1313
1314		path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1315			((port - 1) << 6) | ((ah->sl & 7) << 3);
1316
1317		path->feup |= MLX4_FEUP_FORCE_ETH_UP;
1318		if (vlan_tag < 0x1000) {
1319			if (smac_info->vid < 0x1000) {
1320				/* both valid vlan ids */
1321				if (smac_info->vid != vlan_tag) {
1322					/* different VIDs.  unreg old and reg new */
1323					err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
1324					if (err)
1325						return err;
1326					smac_info->candidate_vid = vlan_tag;
1327					smac_info->candidate_vlan_index = vidx;
1328					smac_info->candidate_vlan_port = port;
1329					smac_info->update_vid = 1;
1330					path->vlan_index = vidx;
1331				} else {
1332					path->vlan_index = smac_info->vlan_index;
1333				}
1334			} else {
1335				/* no current vlan tag in qp */
1336				err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
1337				if (err)
1338					return err;
1339				smac_info->candidate_vid = vlan_tag;
1340				smac_info->candidate_vlan_index = vidx;
1341				smac_info->candidate_vlan_port = port;
1342				smac_info->update_vid = 1;
1343				path->vlan_index = vidx;
1344			}
1345			path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
1346			path->fl = 1 << 6;
1347		} else {
1348			/* have current vlan tag. unregister it at modify-qp success */
1349			if (smac_info->vid < 0x1000) {
1350				smac_info->candidate_vid = 0xFFFF;
1351				smac_info->update_vid = 1;
1352			}
1353		}
1354
1355		/* get smac_index for RoCE use.
1356		 * If no smac was yet assigned, register one.
1357		 * If one was already assigned, but the new mac differs,
1358		 * unregister the old one and register the new one.
1359		*/
1360		if ((!smac_info->smac && !smac_info->smac_port) ||
1361		    smac_info->smac != smac) {
1362			/* register candidate now, unreg if needed, after success */
1363			smac_index = mlx4_register_mac(dev->dev, port, smac);
1364			if (smac_index >= 0) {
1365				smac_info->candidate_smac_index = smac_index;
1366				smac_info->candidate_smac = smac;
1367				smac_info->candidate_smac_port = port;
1368			} else {
1369				return -EINVAL;
1370			}
1371		} else {
1372			smac_index = smac_info->smac_index;
1373		}
1374
1375		memcpy(path->dmac, ah->dmac, 6);
1376		path->ackto = MLX4_IB_LINK_TYPE_ETH;
1377		/* put MAC table smac index for IBoE */
1378		path->grh_mylmc = (u8) (smac_index) | 0x80;
1379	} else {
1380		path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1381			((port - 1) << 6) | ((ah->sl & 0xf) << 2);
1382	}
1383
1384	return 0;
1385}
1386
1387static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
1388			 enum ib_qp_attr_mask qp_attr_mask,
1389			 struct mlx4_ib_qp *mqp,
1390			 struct mlx4_qp_path *path, u8 port)
1391{
1392	return _mlx4_set_path(dev, &qp->ah_attr,
1393			      mlx4_mac_to_u64((u8 *)qp->smac),
1394			      (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
1395			      path, &mqp->pri, port);
1396}
1397
1398static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
1399			     const struct ib_qp_attr *qp,
1400			     enum ib_qp_attr_mask qp_attr_mask,
1401			     struct mlx4_ib_qp *mqp,
1402			     struct mlx4_qp_path *path, u8 port)
1403{
1404	return _mlx4_set_path(dev, &qp->alt_ah_attr,
1405			      mlx4_mac_to_u64((u8 *)qp->alt_smac),
1406			      (qp_attr_mask & IB_QP_ALT_VID) ?
1407			      qp->alt_vlan_id : 0xffff,
1408			      path, &mqp->alt, port);
1409}
1410
1411static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
1412{
1413	struct mlx4_ib_gid_entry *ge, *tmp;
1414
1415	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1416		if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
1417			ge->added = 1;
1418			ge->port = qp->port;
1419		}
1420	}
1421}
1422
1423static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
1424				    struct mlx4_qp_context *context)
1425{
1426	u64 u64_mac;
1427	int smac_index;
1428
1429	u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]);
1430
1431	context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
1432	if (!qp->pri.smac && !qp->pri.smac_port) {
1433		smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
1434		if (smac_index >= 0) {
1435			qp->pri.candidate_smac_index = smac_index;
1436			qp->pri.candidate_smac = u64_mac;
1437			qp->pri.candidate_smac_port = qp->port;
1438			context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
1439		} else {
1440			return -ENOENT;
1441		}
1442	}
1443	return 0;
1444}
1445
1446static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1447			       const struct ib_qp_attr *attr, int attr_mask,
1448			       enum ib_qp_state cur_state, enum ib_qp_state new_state)
1449{
1450	struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1451	struct mlx4_ib_qp *qp = to_mqp(ibqp);
1452	struct mlx4_ib_pd *pd;
1453	struct mlx4_ib_cq *send_cq, *recv_cq;
1454	struct mlx4_qp_context *context;
1455	enum mlx4_qp_optpar optpar = 0;
1456	int sqd_event;
1457	int steer_qp = 0;
1458	int err = -EINVAL;
1459
1460	/* APM is not supported under RoCE */
1461	if (attr_mask & IB_QP_ALT_PATH &&
1462	    rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
1463	    IB_LINK_LAYER_ETHERNET)
1464		return -ENOTSUPP;
1465
1466	context = kzalloc(sizeof *context, GFP_KERNEL);
1467	if (!context)
1468		return -ENOMEM;
1469
1470	context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1471				     (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
1472
1473	if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1474		context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
1475	else {
1476		optpar |= MLX4_QP_OPTPAR_PM_STATE;
1477		switch (attr->path_mig_state) {
1478		case IB_MIG_MIGRATED:
1479			context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
1480			break;
1481		case IB_MIG_REARM:
1482			context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
1483			break;
1484		case IB_MIG_ARMED:
1485			context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
1486			break;
1487		}
1488	}
1489
1490	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
1491		context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
1492	else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1493		context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
1494	else if (ibqp->qp_type == IB_QPT_UD) {
1495		if (qp->flags & MLX4_IB_QP_LSO)
1496			context->mtu_msgmax = (IB_MTU_4096 << 5) |
1497					      ilog2(dev->dev->caps.max_gso_sz);
1498		else
1499			context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1500	} else if (attr_mask & IB_QP_PATH_MTU) {
1501		if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
1502			pr_err("path MTU (%u) is invalid\n",
1503			       attr->path_mtu);
1504			goto out;
1505		}
1506		context->mtu_msgmax = (attr->path_mtu << 5) |
1507			ilog2(dev->dev->caps.max_msg_sz);
1508	}
1509
1510	if (qp->rq.wqe_cnt)
1511		context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
1512	context->rq_size_stride |= qp->rq.wqe_shift - 4;
1513
1514	if (qp->sq.wqe_cnt)
1515		context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
1516	context->sq_size_stride |= qp->sq.wqe_shift - 4;
1517
1518	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1519		context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
1520		context->xrcd = cpu_to_be32((u32) qp->xrcdn);
1521		if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1522			context->param3 |= cpu_to_be32(1 << 30);
1523	}
1524
1525	if (qp->ibqp.uobject)
1526		context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
1527	else
1528		context->usr_page = cpu_to_be32(dev->priv_uar.index);
1529
1530	if (attr_mask & IB_QP_DEST_QPN)
1531		context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
1532
1533	if (attr_mask & IB_QP_PORT) {
1534		if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
1535		    !(attr_mask & IB_QP_AV)) {
1536			mlx4_set_sched(&context->pri_path, attr->port_num);
1537			optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
1538		}
1539	}
1540
1541	if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
1542		if (dev->counters[qp->port - 1] != -1) {
1543			context->pri_path.counter_index =
1544						dev->counters[qp->port - 1];
1545			optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
1546		} else
1547			context->pri_path.counter_index = 0xff;
1548
1549		if (qp->flags & MLX4_IB_QP_NETIF) {
1550			mlx4_ib_steer_qp_reg(dev, qp, 1);
1551			steer_qp = 1;
1552		}
1553	}
1554
1555	if (attr_mask & IB_QP_PKEY_INDEX) {
1556		if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1557			context->pri_path.disable_pkey_check = 0x40;
1558		context->pri_path.pkey_index = attr->pkey_index;
1559		optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1560	}
1561
1562	if (attr_mask & IB_QP_AV) {
1563		if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
1564				  attr_mask & IB_QP_PORT ?
1565				  attr->port_num : qp->port))
1566			goto out;
1567
1568		optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
1569			   MLX4_QP_OPTPAR_SCHED_QUEUE);
1570	}
1571
1572	if (attr_mask & IB_QP_TIMEOUT) {
1573		context->pri_path.ackto |= attr->timeout << 3;
1574		optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
1575	}
1576
1577	if (attr_mask & IB_QP_ALT_PATH) {
1578		if (attr->alt_port_num == 0 ||
1579		    attr->alt_port_num > dev->dev->caps.num_ports)
1580			goto out;
1581
1582		if (attr->alt_pkey_index >=
1583		    dev->dev->caps.pkey_table_len[attr->alt_port_num])
1584			goto out;
1585
1586		if (mlx4_set_alt_path(dev, attr, attr_mask, qp,
1587				      &context->alt_path,
1588				      attr->alt_port_num))
1589			goto out;
1590
1591		context->alt_path.pkey_index = attr->alt_pkey_index;
1592		context->alt_path.ackto = attr->alt_timeout << 3;
1593		optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
1594	}
1595
1596	pd = get_pd(qp);
1597	get_cqs(qp, &send_cq, &recv_cq);
1598	context->pd       = cpu_to_be32(pd->pdn);
1599	context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
1600	context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
1601	context->params1  = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
1602
1603	/* Set "fast registration enabled" for all kernel QPs */
1604	if (!qp->ibqp.uobject)
1605		context->params1 |= cpu_to_be32(1 << 11);
1606
1607	if (attr_mask & IB_QP_RNR_RETRY) {
1608		context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1609		optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
1610	}
1611
1612	if (attr_mask & IB_QP_RETRY_CNT) {
1613		context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1614		optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
1615	}
1616
1617	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1618		if (attr->max_rd_atomic)
1619			context->params1 |=
1620				cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1621		optpar |= MLX4_QP_OPTPAR_SRA_MAX;
1622	}
1623
1624	if (attr_mask & IB_QP_SQ_PSN)
1625		context->next_send_psn = cpu_to_be32(attr->sq_psn);
1626
1627	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1628		if (attr->max_dest_rd_atomic)
1629			context->params2 |=
1630				cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1631		optpar |= MLX4_QP_OPTPAR_RRA_MAX;
1632	}
1633
1634	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
1635		context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
1636		optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
1637	}
1638
1639	if (ibqp->srq)
1640		context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
1641
1642	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1643		context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1644		optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
1645	}
1646	if (attr_mask & IB_QP_RQ_PSN)
1647		context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1648
1649	/* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
1650	if (attr_mask & IB_QP_QKEY) {
1651		if (qp->mlx4_ib_qp_type &
1652		    (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1653			context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1654		else {
1655			if (mlx4_is_mfunc(dev->dev) &&
1656			    !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1657			    (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1658			    MLX4_RESERVED_QKEY_BASE) {
1659				pr_err("Cannot use reserved QKEY"
1660				       " 0x%x (range 0xffff0000..0xffffffff"
1661				       " is reserved)\n", attr->qkey);
1662				err = -EINVAL;
1663				goto out;
1664			}
1665			context->qkey = cpu_to_be32(attr->qkey);
1666		}
1667		optpar |= MLX4_QP_OPTPAR_Q_KEY;
1668	}
1669
1670	if (ibqp->srq)
1671		context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
1672
1673	if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1674		context->db_rec_addr = cpu_to_be64(qp->db.dma);
1675
1676	if (cur_state == IB_QPS_INIT &&
1677	    new_state == IB_QPS_RTR  &&
1678	    (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
1679	     ibqp->qp_type == IB_QPT_UD ||
1680	     ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1681		context->pri_path.sched_queue = (qp->port - 1) << 6;
1682		if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1683		    qp->mlx4_ib_qp_type &
1684		    (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1685			context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1686			if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
1687				context->pri_path.fl = 0x80;
1688		} else {
1689			if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1690				context->pri_path.fl = 0x80;
1691			context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1692		}
1693		if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
1694		    IB_LINK_LAYER_ETHERNET) {
1695			if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
1696			    qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
1697				context->pri_path.feup = 1 << 7; /* don't fsm */
1698			/* handle smac_index */
1699			if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
1700			    qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
1701			    qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
1702				err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
1703				if (err) {
1704					err = -EINVAL;
1705					goto out;
1706				}
1707				if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
1708					dev->qp1_proxy[qp->port - 1] = qp;
1709			}
1710		}
1711	}
1712
1713	if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
1714		context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
1715					MLX4_IB_LINK_TYPE_ETH;
1716		if (dev->dev->caps.tunnel_offload_mode ==  MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
1717			/* set QP to receive both tunneled & non-tunneled packets */
1718			if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET)))
1719				context->srqn = cpu_to_be32(7 << 28);
1720		}
1721	}
1722
1723	if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
1724		int is_eth = rdma_port_get_link_layer(
1725				&dev->ib_dev, qp->port) ==
1726				IB_LINK_LAYER_ETHERNET;
1727		if (is_eth) {
1728			context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
1729			optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
1730		}
1731	}
1732
1733
1734	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
1735	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1736		sqd_event = 1;
1737	else
1738		sqd_event = 0;
1739
1740	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1741		context->rlkey |= (1 << 4);
1742
1743	/*
1744	 * Before passing a kernel QP to the HW, make sure that the
1745	 * ownership bits of the send queue are set and the SQ
1746	 * headroom is stamped so that the hardware doesn't start
1747	 * processing stale work requests.
1748	 */
1749	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1750		struct mlx4_wqe_ctrl_seg *ctrl;
1751		int i;
1752
1753		for (i = 0; i < qp->sq.wqe_cnt; ++i) {
1754			ctrl = get_send_wqe(qp, i);
1755			ctrl->owner_opcode = cpu_to_be32(1 << 31);
1756			if (qp->sq_max_wqes_per_wr == 1)
1757				ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
1758
1759			stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
1760		}
1761	}
1762
1763	err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
1764			     to_mlx4_state(new_state), context, optpar,
1765			     sqd_event, &qp->mqp);
1766	if (err)
1767		goto out;
1768
1769	qp->state = new_state;
1770
1771	if (attr_mask & IB_QP_ACCESS_FLAGS)
1772		qp->atomic_rd_en = attr->qp_access_flags;
1773	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1774		qp->resp_depth = attr->max_dest_rd_atomic;
1775	if (attr_mask & IB_QP_PORT) {
1776		qp->port = attr->port_num;
1777		update_mcg_macs(dev, qp);
1778	}
1779	if (attr_mask & IB_QP_ALT_PATH)
1780		qp->alt_port = attr->alt_port_num;
1781
1782	if (is_sqp(dev, qp))
1783		store_sqp_attrs(to_msqp(qp), attr, attr_mask);
1784
1785	/*
1786	 * If we moved QP0 to RTR, bring the IB link up; if we moved
1787	 * QP0 to RESET or ERROR, bring the link back down.
1788	 */
1789	if (is_qp0(dev, qp)) {
1790		if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
1791			if (mlx4_INIT_PORT(dev->dev, qp->port))
1792				pr_warn("INIT_PORT failed for port %d\n",
1793				       qp->port);
1794
1795		if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
1796		    (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
1797			mlx4_CLOSE_PORT(dev->dev, qp->port);
1798	}
1799
1800	/*
1801	 * If we moved a kernel QP to RESET, clean up all old CQ
1802	 * entries and reinitialize the QP.
1803	 */
1804	if (new_state == IB_QPS_RESET) {
1805		if (!ibqp->uobject) {
1806			mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
1807					 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1808			if (send_cq != recv_cq)
1809				mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1810
1811			qp->rq.head = 0;
1812			qp->rq.tail = 0;
1813			qp->sq.head = 0;
1814			qp->sq.tail = 0;
1815			qp->sq_next_wqe = 0;
1816			if (qp->rq.wqe_cnt)
1817				*qp->db.db  = 0;
1818
1819			if (qp->flags & MLX4_IB_QP_NETIF)
1820				mlx4_ib_steer_qp_reg(dev, qp, 0);
1821		}
1822		if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
1823			mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
1824			qp->pri.smac = 0;
1825			qp->pri.smac_port = 0;
1826		}
1827		if (qp->alt.smac) {
1828			mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
1829			qp->alt.smac = 0;
1830		}
1831		if (qp->pri.vid < 0x1000) {
1832			mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
1833			qp->pri.vid = 0xFFFF;
1834			qp->pri.candidate_vid = 0xFFFF;
1835			qp->pri.update_vid = 0;
1836		}
1837
1838		if (qp->alt.vid < 0x1000) {
1839			mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
1840			qp->alt.vid = 0xFFFF;
1841			qp->alt.candidate_vid = 0xFFFF;
1842			qp->alt.update_vid = 0;
1843		}
1844	}
1845out:
1846	if (err && steer_qp)
1847		mlx4_ib_steer_qp_reg(dev, qp, 0);
1848	kfree(context);
1849	if (qp->pri.candidate_smac ||
1850	    (!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) {
1851		if (err) {
1852			mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
1853		} else {
1854			if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port))
1855				mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
1856			qp->pri.smac = qp->pri.candidate_smac;
1857			qp->pri.smac_index = qp->pri.candidate_smac_index;
1858			qp->pri.smac_port = qp->pri.candidate_smac_port;
1859		}
1860		qp->pri.candidate_smac = 0;
1861		qp->pri.candidate_smac_index = 0;
1862		qp->pri.candidate_smac_port = 0;
1863	}
1864	if (qp->alt.candidate_smac) {
1865		if (err) {
1866			mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac);
1867		} else {
1868			if (qp->alt.smac)
1869				mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
1870			qp->alt.smac = qp->alt.candidate_smac;
1871			qp->alt.smac_index = qp->alt.candidate_smac_index;
1872			qp->alt.smac_port = qp->alt.candidate_smac_port;
1873		}
1874		qp->alt.candidate_smac = 0;
1875		qp->alt.candidate_smac_index = 0;
1876		qp->alt.candidate_smac_port = 0;
1877	}
1878
1879	if (qp->pri.update_vid) {
1880		if (err) {
1881			if (qp->pri.candidate_vid < 0x1000)
1882				mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
1883						     qp->pri.candidate_vid);
1884		} else {
1885			if (qp->pri.vid < 0x1000)
1886				mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
1887						     qp->pri.vid);
1888			qp->pri.vid = qp->pri.candidate_vid;
1889			qp->pri.vlan_port = qp->pri.candidate_vlan_port;
1890			qp->pri.vlan_index =  qp->pri.candidate_vlan_index;
1891		}
1892		qp->pri.candidate_vid = 0xFFFF;
1893		qp->pri.update_vid = 0;
1894	}
1895
1896	if (qp->alt.update_vid) {
1897		if (err) {
1898			if (qp->alt.candidate_vid < 0x1000)
1899				mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
1900						     qp->alt.candidate_vid);
1901		} else {
1902			if (qp->alt.vid < 0x1000)
1903				mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
1904						     qp->alt.vid);
1905			qp->alt.vid = qp->alt.candidate_vid;
1906			qp->alt.vlan_port = qp->alt.candidate_vlan_port;
1907			qp->alt.vlan_index =  qp->alt.candidate_vlan_index;
1908		}
1909		qp->alt.candidate_vid = 0xFFFF;
1910		qp->alt.update_vid = 0;
1911	}
1912
1913	return err;
1914}
1915
1916int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1917		      int attr_mask, struct ib_udata *udata)
1918{
1919	struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1920	struct mlx4_ib_qp *qp = to_mqp(ibqp);
1921	enum ib_qp_state cur_state, new_state;
1922	int err = -EINVAL;
1923	int ll;
1924	mutex_lock(&qp->mutex);
1925
1926	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1927	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1928
1929	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1930		ll = IB_LINK_LAYER_UNSPECIFIED;
1931	} else {
1932		int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1933		ll = rdma_port_get_link_layer(&dev->ib_dev, port);
1934	}
1935
1936	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1937				attr_mask, ll)) {
1938		pr_debug("qpn 0x%x: invalid attribute mask specified "
1939			 "for transition %d to %d. qp_type %d,"
1940			 " attr_mask 0x%x\n",
1941			 ibqp->qp_num, cur_state, new_state,
1942			 ibqp->qp_type, attr_mask);
1943		goto out;
1944	}
1945
1946	if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
1947		if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
1948			if ((ibqp->qp_type == IB_QPT_RC) ||
1949			    (ibqp->qp_type == IB_QPT_UD) ||
1950			    (ibqp->qp_type == IB_QPT_UC) ||
1951			    (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
1952			    (ibqp->qp_type == IB_QPT_XRC_INI)) {
1953				attr->port_num = mlx4_ib_bond_next_port(dev);
1954			}
1955		} else {
1956			/* no sense in changing port_num
1957			 * when ports are bonded */
1958			attr_mask &= ~IB_QP_PORT;
1959		}
1960	}
1961
1962	if ((attr_mask & IB_QP_PORT) &&
1963	    (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
1964		pr_debug("qpn 0x%x: invalid port number (%d) specified "
1965			 "for transition %d to %d. qp_type %d\n",
1966			 ibqp->qp_num, attr->port_num, cur_state,
1967			 new_state, ibqp->qp_type);
1968		goto out;
1969	}
1970
1971	if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
1972	    (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
1973	     IB_LINK_LAYER_ETHERNET))
1974		goto out;
1975
1976	if (attr_mask & IB_QP_PKEY_INDEX) {
1977		int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1978		if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
1979			pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
1980				 "for transition %d to %d. qp_type %d\n",
1981				 ibqp->qp_num, attr->pkey_index, cur_state,
1982				 new_state, ibqp->qp_type);
1983			goto out;
1984		}
1985	}
1986
1987	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1988	    attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
1989		pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
1990			 "Transition %d to %d. qp_type %d\n",
1991			 ibqp->qp_num, attr->max_rd_atomic, cur_state,
1992			 new_state, ibqp->qp_type);
1993		goto out;
1994	}
1995
1996	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1997	    attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
1998		pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
1999			 "Transition %d to %d. qp_type %d\n",
2000			 ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
2001			 new_state, ibqp->qp_type);
2002		goto out;
2003	}
2004
2005	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
2006		err = 0;
2007		goto out;
2008	}
2009
2010	err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
2011
2012	if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
2013		attr->port_num = 1;
2014
2015out:
2016	mutex_unlock(&qp->mutex);
2017	return err;
2018}
2019
2020static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
2021{
2022	int i;
2023	for (i = 0; i < dev->caps.num_ports; i++) {
2024		if (qpn == dev->caps.qp0_proxy[i] ||
2025		    qpn == dev->caps.qp0_tunnel[i]) {
2026			*qkey = dev->caps.qp0_qkey[i];
2027			return 0;
2028		}
2029	}
2030	return -EINVAL;
2031}
2032
2033static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2034				  struct ib_send_wr *wr,
2035				  void *wqe, unsigned *mlx_seg_len)
2036{
2037	struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
2038	struct ib_device *ib_dev = &mdev->ib_dev;
2039	struct mlx4_wqe_mlx_seg *mlx = wqe;
2040	struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
2041	struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2042	u16 pkey;
2043	u32 qkey;
2044	int send_size;
2045	int header_size;
2046	int spc;
2047	int i;
2048
2049	if (wr->opcode != IB_WR_SEND)
2050		return -EINVAL;
2051
2052	send_size = 0;
2053
2054	for (i = 0; i < wr->num_sge; ++i)
2055		send_size += wr->sg_list[i].length;
2056
2057	/* for proxy-qp0 sends, need to add in size of tunnel header */
2058	/* for tunnel-qp0 sends, tunnel header is already in s/g list */
2059	if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
2060		send_size += sizeof (struct mlx4_ib_tunnel_header);
2061
2062	ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
2063
2064	if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
2065		sqp->ud_header.lrh.service_level =
2066			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
2067		sqp->ud_header.lrh.destination_lid =
2068			cpu_to_be16(ah->av.ib.g_slid & 0x7f);
2069		sqp->ud_header.lrh.source_lid =
2070			cpu_to_be16(ah->av.ib.g_slid & 0x7f);
2071	}
2072
2073	mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
2074
2075	/* force loopback */
2076	mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
2077	mlx->rlid = sqp->ud_header.lrh.destination_lid;
2078
2079	sqp->ud_header.lrh.virtual_lane    = 0;
2080	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
2081	ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
2082	sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
2083	if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
2084		sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2085	else
2086		sqp->ud_header.bth.destination_qpn =
2087			cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
2088
2089	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
2090	if (mlx4_is_master(mdev->dev)) {
2091		if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
2092			return -EINVAL;
2093	} else {
2094		if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
2095			return -EINVAL;
2096	}
2097	sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
2098	sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
2099
2100	sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
2101	sqp->ud_header.immediate_present = 0;
2102
2103	header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
2104
2105	/*
2106	 * Inline data segments may not cross a 64 byte boundary.  If
2107	 * our UD header is bigger than the space available up to the
2108	 * next 64 byte boundary in the WQE, use two inline data
2109	 * segments to hold the UD header.
2110	 */
2111	spc = MLX4_INLINE_ALIGN -
2112	      ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2113	if (header_size <= spc) {
2114		inl->byte_count = cpu_to_be32(1 << 31 | header_size);
2115		memcpy(inl + 1, sqp->header_buf, header_size);
2116		i = 1;
2117	} else {
2118		inl->byte_count = cpu_to_be32(1 << 31 | spc);
2119		memcpy(inl + 1, sqp->header_buf, spc);
2120
2121		inl = (void *) (inl + 1) + spc;
2122		memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
2123		/*
2124		 * Need a barrier here to make sure all the data is
2125		 * visible before the byte_count field is set.
2126		 * Otherwise the HCA prefetcher could grab the 64-byte
2127		 * chunk with this inline segment and get a valid (!=
2128		 * 0xffffffff) byte count but stale data, and end up
2129		 * generating a packet with bad headers.
2130		 *
2131		 * The first inline segment's byte_count field doesn't
2132		 * need a barrier, because it comes after a
2133		 * control/MLX segment and therefore is at an offset
2134		 * of 16 mod 64.
2135		 */
2136		wmb();
2137		inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
2138		i = 2;
2139	}
2140
2141	*mlx_seg_len =
2142	ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
2143	return 0;
2144}
2145
2146static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
2147{
2148	int i;
2149
2150	for (i = ETH_ALEN; i; i--) {
2151		dst_mac[i - 1] = src_mac & 0xff;
2152		src_mac >>= 8;
2153	}
2154}
2155
2156static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2157			    void *wqe, unsigned *mlx_seg_len)
2158{
2159	struct ib_device *ib_dev = sqp->qp.ibqp.device;
2160	struct mlx4_wqe_mlx_seg *mlx = wqe;
2161	struct mlx4_wqe_ctrl_seg *ctrl = wqe;
2162	struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
2163	struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2164	union ib_gid sgid;
2165	u16 pkey;
2166	int send_size;
2167	int header_size;
2168	int spc;
2169	int i;
2170	int err = 0;
2171	u16 vlan = 0xffff;
2172	bool is_eth;
2173	bool is_vlan = false;
2174	bool is_grh;
2175
2176	send_size = 0;
2177	for (i = 0; i < wr->num_sge; ++i)
2178		send_size += wr->sg_list[i].length;
2179
2180	is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
2181	is_grh = mlx4_ib_ah_grh_present(ah);
2182	if (is_eth) {
2183		if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
2184			/* When multi-function is enabled, the ib_core gid
2185			 * indexes don't necessarily match the hw ones, so
2186			 * we must use our own cache */
2187			err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
2188							   be32_to_cpu(ah->av.ib.port_pd) >> 24,
2189							   ah->av.ib.gid_index, &sgid.raw[0]);
2190			if (err)
2191				return err;
2192		} else  {
2193			err = ib_get_cached_gid(ib_dev,
2194						be32_to_cpu(ah->av.ib.port_pd) >> 24,
2195						ah->av.ib.gid_index, &sgid);
2196			if (err)
2197				return err;
2198		}
2199
2200		if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
2201			vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
2202			is_vlan = 1;
2203		}
2204	}
2205	ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
2206
2207	if (!is_eth) {
2208		sqp->ud_header.lrh.service_level =
2209			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
2210		sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
2211		sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
2212	}
2213
2214	if (is_grh) {
2215		sqp->ud_header.grh.traffic_class =
2216			(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
2217		sqp->ud_header.grh.flow_label    =
2218			ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
2219		sqp->ud_header.grh.hop_limit     = ah->av.ib.hop_limit;
2220		if (is_eth)
2221			memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
2222		else {
2223		if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
2224			/* When multi-function is enabled, the ib_core gid
2225			 * indexes don't necessarily match the hw ones, so
2226			 * we must use our own cache */
2227			sqp->ud_header.grh.source_gid.global.subnet_prefix =
2228				to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
2229						       subnet_prefix;
2230			sqp->ud_header.grh.source_gid.global.interface_id =
2231				to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
2232					       guid_cache[ah->av.ib.gid_index];
2233		} else
2234			ib_get_cached_gid(ib_dev,
2235					  be32_to_cpu(ah->av.ib.port_pd) >> 24,
2236					  ah->av.ib.gid_index,
2237					  &sqp->ud_header.grh.source_gid);
2238		}
2239		memcpy(sqp->ud_header.grh.destination_gid.raw,
2240		       ah->av.ib.dgid, 16);
2241	}
2242
2243	mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
2244
2245	if (!is_eth) {
2246		mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
2247					  (sqp->ud_header.lrh.destination_lid ==
2248					   IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
2249					  (sqp->ud_header.lrh.service_level << 8));
2250		if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
2251			mlx->flags |= cpu_to_be32(0x1); /* force loopback */
2252		mlx->rlid = sqp->ud_header.lrh.destination_lid;
2253	}
2254
2255	switch (wr->opcode) {
2256	case IB_WR_SEND:
2257		sqp->ud_header.bth.opcode	 = IB_OPCODE_UD_SEND_ONLY;
2258		sqp->ud_header.immediate_present = 0;
2259		break;
2260	case IB_WR_SEND_WITH_IMM:
2261		sqp->ud_header.bth.opcode	 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
2262		sqp->ud_header.immediate_present = 1;
2263		sqp->ud_header.immediate_data    = wr->ex.imm_data;
2264		break;
2265	default:
2266		return -EINVAL;
2267	}
2268
2269	if (is_eth) {
2270		struct in6_addr in6;
2271
2272		u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
2273
2274		mlx->sched_prio = cpu_to_be16(pcp);
2275
2276		memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
2277		/* FIXME: cache smac value? */
2278		memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
2279		memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
2280		memcpy(&in6, sgid.raw, sizeof(in6));
2281
2282		if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
2283			u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
2284			u8 smac[ETH_ALEN];
2285
2286			mlx4_u64_to_smac(smac, mac);
2287			memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
2288		} else {
2289			/* use the src mac of the tunnel */
2290			memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
2291		}
2292
2293		if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
2294			mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
2295		if (!is_vlan) {
2296			sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
2297		} else {
2298			sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
2299			sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
2300		}
2301	} else {
2302		sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
2303		if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
2304			sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
2305	}
2306	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
2307	if (!sqp->qp.ibqp.qp_num)
2308		ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
2309	else
2310		ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
2311	sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
2312	sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2313	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
2314	sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
2315					       sqp->qkey : wr->wr.ud.remote_qkey);
2316	sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
2317
2318	header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
2319
2320	if (0) {
2321		pr_err("built UD header of size %d:\n", header_size);
2322		for (i = 0; i < header_size / 4; ++i) {
2323			if (i % 8 == 0)
2324				pr_err("  [%02x] ", i * 4);
2325			pr_cont(" %08x",
2326				be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
2327			if ((i + 1) % 8 == 0)
2328				pr_cont("\n");
2329		}
2330		pr_err("\n");
2331	}
2332
2333	/*
2334	 * Inline data segments may not cross a 64 byte boundary.  If
2335	 * our UD header is bigger than the space available up to the
2336	 * next 64 byte boundary in the WQE, use two inline data
2337	 * segments to hold the UD header.
2338	 */
2339	spc = MLX4_INLINE_ALIGN -
2340		((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2341	if (header_size <= spc) {
2342		inl->byte_count = cpu_to_be32(1 << 31 | header_size);
2343		memcpy(inl + 1, sqp->header_buf, header_size);
2344		i = 1;
2345	} else {
2346		inl->byte_count = cpu_to_be32(1 << 31 | spc);
2347		memcpy(inl + 1, sqp->header_buf, spc);
2348
2349		inl = (void *) (inl + 1) + spc;
2350		memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
2351		/*
2352		 * Need a barrier here to make sure all the data is
2353		 * visible before the byte_count field is set.
2354		 * Otherwise the HCA prefetcher could grab the 64-byte
2355		 * chunk with this inline segment and get a valid (!=
2356		 * 0xffffffff) byte count but stale data, and end up
2357		 * generating a packet with bad headers.
2358		 *
2359		 * The first inline segment's byte_count field doesn't
2360		 * need a barrier, because it comes after a
2361		 * control/MLX segment and therefore is at an offset
2362		 * of 16 mod 64.
2363		 */
2364		wmb();
2365		inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
2366		i = 2;
2367	}
2368
2369	*mlx_seg_len =
2370		ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
2371	return 0;
2372}
2373
2374static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
2375{
2376	unsigned cur;
2377	struct mlx4_ib_cq *cq;
2378
2379	cur = wq->head - wq->tail;
2380	if (likely(cur + nreq < wq->max_post))
2381		return 0;
2382
2383	cq = to_mcq(ib_cq);
2384	spin_lock(&cq->lock);
2385	cur = wq->head - wq->tail;
2386	spin_unlock(&cq->lock);
2387
2388	return cur + nreq >= wq->max_post;
2389}
2390
2391static __be32 convert_access(int acc)
2392{
2393	return (acc & IB_ACCESS_REMOTE_ATOMIC ?
2394		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC)       : 0) |
2395	       (acc & IB_ACCESS_REMOTE_WRITE  ?
2396		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
2397	       (acc & IB_ACCESS_REMOTE_READ   ?
2398		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ)  : 0) |
2399	       (acc & IB_ACCESS_LOCAL_WRITE   ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE)  : 0) |
2400		cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
2401}
2402
2403static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
2404{
2405	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
2406	int i;
2407
2408	for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
2409		mfrpl->mapped_page_list[i] =
2410			cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
2411				    MLX4_MTT_FLAG_PRESENT);
2412
2413	fseg->flags		= convert_access(wr->wr.fast_reg.access_flags);
2414	fseg->mem_key		= cpu_to_be32(wr->wr.fast_reg.rkey);
2415	fseg->buf_list		= cpu_to_be64(mfrpl->map);
2416	fseg->start_addr	= cpu_to_be64(wr->wr.fast_reg.iova_start);
2417	fseg->reg_len		= cpu_to_be64(wr->wr.fast_reg.length);
2418	fseg->offset		= 0; /* XXX -- is this just for ZBVA? */
2419	fseg->page_size		= cpu_to_be32(wr->wr.fast_reg.page_shift);
2420	fseg->reserved[0]	= 0;
2421	fseg->reserved[1]	= 0;
2422}
2423
2424static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
2425{
2426	bseg->flags1 =
2427		convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
2428		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
2429			    MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
2430			    MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
2431	bseg->flags2 = 0;
2432	if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
2433		bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
2434	if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
2435		bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
2436	bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
2437	bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
2438	bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
2439	bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
2440}
2441
2442static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
2443{
2444	memset(iseg, 0, sizeof(*iseg));
2445	iseg->mem_key = cpu_to_be32(rkey);
2446}
2447
2448static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
2449					  u64 remote_addr, u32 rkey)
2450{
2451	rseg->raddr    = cpu_to_be64(remote_addr);
2452	rseg->rkey     = cpu_to_be32(rkey);
2453	rseg->reserved = 0;
2454}
2455
2456static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
2457{
2458	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
2459		aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
2460		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
2461	} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
2462		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
2463		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
2464	} else {
2465		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
2466		aseg->compare  = 0;
2467	}
2468
2469}
2470
2471static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
2472				  struct ib_send_wr *wr)
2473{
2474	aseg->swap_add		= cpu_to_be64(wr->wr.atomic.swap);
2475	aseg->swap_add_mask	= cpu_to_be64(wr->wr.atomic.swap_mask);
2476	aseg->compare		= cpu_to_be64(wr->wr.atomic.compare_add);
2477	aseg->compare_mask	= cpu_to_be64(wr->wr.atomic.compare_add_mask);
2478}
2479
2480static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
2481			     struct ib_send_wr *wr)
2482{
2483	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
2484	dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2485	dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2486	dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
2487	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
2488}
2489
2490static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2491				    struct mlx4_wqe_datagram_seg *dseg,
2492				    struct ib_send_wr *wr,
2493				    enum mlx4_ib_qp_type qpt)
2494{
2495	union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2496	struct mlx4_av sqp_av = {0};
2497	int port = *((u8 *) &av->ib.port_pd) & 0x3;
2498
2499	/* force loopback */
2500	sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2501	sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
2502	sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2503			cpu_to_be32(0xf0000000);
2504
2505	memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2506	if (qpt == MLX4_IB_QPT_PROXY_GSI)
2507		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
2508	else
2509		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
2510	/* Use QKEY from the QP context, which is set by master */
2511	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2512}
2513
2514static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2515{
2516	struct mlx4_wqe_inline_seg *inl = wqe;
2517	struct mlx4_ib_tunnel_header hdr;
2518	struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2519	int spc;
2520	int i;
2521
2522	memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2523	hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2524	hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2525	hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2526	memcpy(hdr.mac, ah->av.eth.mac, 6);
2527	hdr.vlan = ah->av.eth.vlan;
2528
2529	spc = MLX4_INLINE_ALIGN -
2530		((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2531	if (sizeof (hdr) <= spc) {
2532		memcpy(inl + 1, &hdr, sizeof (hdr));
2533		wmb();
2534		inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2535		i = 1;
2536	} else {
2537		memcpy(inl + 1, &hdr, spc);
2538		wmb();
2539		inl->byte_count = cpu_to_be32(1 << 31 | spc);
2540
2541		inl = (void *) (inl + 1) + spc;
2542		memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2543		wmb();
2544		inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2545		i = 2;
2546	}
2547
2548	*mlx_seg_len =
2549		ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2550}
2551
2552static void set_mlx_icrc_seg(void *dseg)
2553{
2554	u32 *t = dseg;
2555	struct mlx4_wqe_inline_seg *iseg = dseg;
2556
2557	t[1] = 0;
2558
2559	/*
2560	 * Need a barrier here before writing the byte_count field to
2561	 * make sure that all the data is visible before the
2562	 * byte_count field is set.  Otherwise, if the segment begins
2563	 * a new cacheline, the HCA prefetcher could grab the 64-byte
2564	 * chunk and get a valid (!= * 0xffffffff) byte count but
2565	 * stale data, and end up sending the wrong data.
2566	 */
2567	wmb();
2568
2569	iseg->byte_count = cpu_to_be32((1 << 31) | 4);
2570}
2571
2572static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2573{
2574	dseg->lkey       = cpu_to_be32(sg->lkey);
2575	dseg->addr       = cpu_to_be64(sg->addr);
2576
2577	/*
2578	 * Need a barrier here before writing the byte_count field to
2579	 * make sure that all the data is visible before the
2580	 * byte_count field is set.  Otherwise, if the segment begins
2581	 * a new cacheline, the HCA prefetcher could grab the 64-byte
2582	 * chunk and get a valid (!= * 0xffffffff) byte count but
2583	 * stale data, and end up sending the wrong data.
2584	 */
2585	wmb();
2586
2587	dseg->byte_count = cpu_to_be32(sg->length);
2588}
2589
2590static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2591{
2592	dseg->byte_count = cpu_to_be32(sg->length);
2593	dseg->lkey       = cpu_to_be32(sg->lkey);
2594	dseg->addr       = cpu_to_be64(sg->addr);
2595}
2596
2597static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
2598			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
2599			 __be32 *lso_hdr_sz, __be32 *blh)
2600{
2601	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
2602
2603	if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
2604		*blh = cpu_to_be32(1 << 6);
2605
2606	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
2607		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
2608		return -EINVAL;
2609
2610	memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
2611
2612	*lso_hdr_sz  = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
2613	*lso_seg_len = halign;
2614	return 0;
2615}
2616
2617static __be32 send_ieth(struct ib_send_wr *wr)
2618{
2619	switch (wr->opcode) {
2620	case IB_WR_SEND_WITH_IMM:
2621	case IB_WR_RDMA_WRITE_WITH_IMM:
2622		return wr->ex.imm_data;
2623
2624	case IB_WR_SEND_WITH_INV:
2625		return cpu_to_be32(wr->ex.invalidate_rkey);
2626
2627	default:
2628		return 0;
2629	}
2630}
2631
2632static void add_zero_len_inline(void *wqe)
2633{
2634	struct mlx4_wqe_inline_seg *inl = wqe;
2635	memset(wqe, 0, 16);
2636	inl->byte_count = cpu_to_be32(1 << 31);
2637}
2638
2639int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2640		      struct ib_send_wr **bad_wr)
2641{
2642	struct mlx4_ib_qp *qp = to_mqp(ibqp);
2643	void *wqe;
2644	struct mlx4_wqe_ctrl_seg *ctrl;
2645	struct mlx4_wqe_data_seg *dseg;
2646	unsigned long flags;
2647	int nreq;
2648	int err = 0;
2649	unsigned ind;
2650	int uninitialized_var(stamp);
2651	int uninitialized_var(size);
2652	unsigned uninitialized_var(seglen);
2653	__be32 dummy;
2654	__be32 *lso_wqe;
2655	__be32 uninitialized_var(lso_hdr_sz);
2656	__be32 blh;
2657	int i;
2658	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
2659
2660	spin_lock_irqsave(&qp->sq.lock, flags);
2661	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
2662		err = -EIO;
2663		*bad_wr = wr;
2664		nreq = 0;
2665		goto out;
2666	}
2667
2668	ind = qp->sq_next_wqe;
2669
2670	for (nreq = 0; wr; ++nreq, wr = wr->next) {
2671		lso_wqe = &dummy;
2672		blh = 0;
2673
2674		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
2675			err = -ENOMEM;
2676			*bad_wr = wr;
2677			goto out;
2678		}
2679
2680		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
2681			err = -EINVAL;
2682			*bad_wr = wr;
2683			goto out;
2684		}
2685
2686		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
2687		qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
2688
2689		ctrl->srcrb_flags =
2690			(wr->send_flags & IB_SEND_SIGNALED ?
2691			 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
2692			(wr->send_flags & IB_SEND_SOLICITED ?
2693			 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
2694			((wr->send_flags & IB_SEND_IP_CSUM) ?
2695			 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
2696				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
2697			qp->sq_signal_bits;
2698
2699		ctrl->imm = send_ieth(wr);
2700
2701		wqe += sizeof *ctrl;
2702		size = sizeof *ctrl / 16;
2703
2704		switch (qp->mlx4_ib_qp_type) {
2705		case MLX4_IB_QPT_RC:
2706		case MLX4_IB_QPT_UC:
2707			switch (wr->opcode) {
2708			case IB_WR_ATOMIC_CMP_AND_SWP:
2709			case IB_WR_ATOMIC_FETCH_AND_ADD:
2710			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
2711				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2712					      wr->wr.atomic.rkey);
2713				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
2714
2715				set_atomic_seg(wqe, wr);
2716				wqe  += sizeof (struct mlx4_wqe_atomic_seg);
2717
2718				size += (sizeof (struct mlx4_wqe_raddr_seg) +
2719					 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
2720
2721				break;
2722
2723			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2724				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2725					      wr->wr.atomic.rkey);
2726				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
2727
2728				set_masked_atomic_seg(wqe, wr);
2729				wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
2730
2731				size += (sizeof (struct mlx4_wqe_raddr_seg) +
2732					 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
2733
2734				break;
2735
2736			case IB_WR_RDMA_READ:
2737			case IB_WR_RDMA_WRITE:
2738			case IB_WR_RDMA_WRITE_WITH_IMM:
2739				set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2740					      wr->wr.rdma.rkey);
2741				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
2742				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
2743				break;
2744
2745			case IB_WR_LOCAL_INV:
2746				ctrl->srcrb_flags |=
2747					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2748				set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
2749				wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
2750				size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
2751				break;
2752
2753			case IB_WR_FAST_REG_MR:
2754				ctrl->srcrb_flags |=
2755					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2756				set_fmr_seg(wqe, wr);
2757				wqe  += sizeof (struct mlx4_wqe_fmr_seg);
2758				size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
2759				break;
2760
2761			case IB_WR_BIND_MW:
2762				ctrl->srcrb_flags |=
2763					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2764				set_bind_seg(wqe, wr);
2765				wqe  += sizeof(struct mlx4_wqe_bind_seg);
2766				size += sizeof(struct mlx4_wqe_bind_seg) / 16;
2767				break;
2768			default:
2769				/* No extra segments required for sends */
2770				break;
2771			}
2772			break;
2773
2774		case MLX4_IB_QPT_TUN_SMI_OWNER:
2775			err =  build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2776			if (unlikely(err)) {
2777				*bad_wr = wr;
2778				goto out;
2779			}
2780			wqe  += seglen;
2781			size += seglen / 16;
2782			break;
2783		case MLX4_IB_QPT_TUN_SMI:
2784		case MLX4_IB_QPT_TUN_GSI:
2785			/* this is a UD qp used in MAD responses to slaves. */
2786			set_datagram_seg(wqe, wr);
2787			/* set the forced-loopback bit in the data seg av */
2788			*(__be32 *) wqe |= cpu_to_be32(0x80000000);
2789			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
2790			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2791			break;
2792		case MLX4_IB_QPT_UD:
2793			set_datagram_seg(wqe, wr);
2794			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
2795			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2796
2797			if (wr->opcode == IB_WR_LSO) {
2798				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
2799				if (unlikely(err)) {
2800					*bad_wr = wr;
2801					goto out;
2802				}
2803				lso_wqe = (__be32 *) wqe;
2804				wqe  += seglen;
2805				size += seglen / 16;
2806			}
2807			break;
2808
2809		case MLX4_IB_QPT_PROXY_SMI_OWNER:
2810			err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2811			if (unlikely(err)) {
2812				*bad_wr = wr;
2813				goto out;
2814			}
2815			wqe  += seglen;
2816			size += seglen / 16;
2817			/* to start tunnel header on a cache-line boundary */
2818			add_zero_len_inline(wqe);
2819			wqe += 16;
2820			size++;
2821			build_tunnel_header(wr, wqe, &seglen);
2822			wqe  += seglen;
2823			size += seglen / 16;
2824			break;
2825		case MLX4_IB_QPT_PROXY_SMI:
2826		case MLX4_IB_QPT_PROXY_GSI:
2827			/* If we are tunneling special qps, this is a UD qp.
2828			 * In this case we first add a UD segment targeting
2829			 * the tunnel qp, and then add a header with address
2830			 * information */
2831			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
2832						qp->mlx4_ib_qp_type);
2833			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
2834			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2835			build_tunnel_header(wr, wqe, &seglen);
2836			wqe  += seglen;
2837			size += seglen / 16;
2838			break;
2839
2840		case MLX4_IB_QPT_SMI:
2841		case MLX4_IB_QPT_GSI:
2842			err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
2843			if (unlikely(err)) {
2844				*bad_wr = wr;
2845				goto out;
2846			}
2847			wqe  += seglen;
2848			size += seglen / 16;
2849			break;
2850
2851		default:
2852			break;
2853		}
2854
2855		/*
2856		 * Write data segments in reverse order, so as to
2857		 * overwrite cacheline stamp last within each
2858		 * cacheline.  This avoids issues with WQE
2859		 * prefetching.
2860		 */
2861
2862		dseg = wqe;
2863		dseg += wr->num_sge - 1;
2864		size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
2865
2866		/* Add one more inline data segment for ICRC for MLX sends */
2867		if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
2868			     qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
2869			     qp->mlx4_ib_qp_type &
2870			     (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
2871			set_mlx_icrc_seg(dseg + 1);
2872			size += sizeof (struct mlx4_wqe_data_seg) / 16;
2873		}
2874
2875		for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
2876			set_data_seg(dseg, wr->sg_list + i);
2877
2878		/*
2879		 * Possibly overwrite stamping in cacheline with LSO
2880		 * segment only after making sure all data segments
2881		 * are written.
2882		 */
2883		wmb();
2884		*lso_wqe = lso_hdr_sz;
2885
2886		ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
2887				    MLX4_WQE_CTRL_FENCE : 0) | size;
2888
2889		/*
2890		 * Make sure descriptor is fully written before
2891		 * setting ownership bit (because HW can start
2892		 * executing as soon as we do).
2893		 */
2894		wmb();
2895
2896		if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
2897			*bad_wr = wr;
2898			err = -EINVAL;
2899			goto out;
2900		}
2901
2902		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
2903			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
2904
2905		stamp = ind + qp->sq_spare_wqes;
2906		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
2907
2908		/*
2909		 * We can improve latency by not stamping the last
2910		 * send queue WQE until after ringing the doorbell, so
2911		 * only stamp here if there are still more WQEs to post.
2912		 *
2913		 * Same optimization applies to padding with NOP wqe
2914		 * in case of WQE shrinking (used to prevent wrap-around
2915		 * in the middle of WR).
2916		 */
2917		if (wr->next) {
2918			stamp_send_wqe(qp, stamp, size * 16);
2919			ind = pad_wraparound(qp, ind);
2920		}
2921	}
2922
2923out:
2924	if (likely(nreq)) {
2925		qp->sq.head += nreq;
2926
2927		/*
2928		 * Make sure that descriptors are written before
2929		 * doorbell record.
2930		 */
2931		wmb();
2932
2933		writel(qp->doorbell_qpn,
2934		       to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
2935
2936		/*
2937		 * Make sure doorbells don't leak out of SQ spinlock
2938		 * and reach the HCA out of order.
2939		 */
2940		mmiowb();
2941
2942		stamp_send_wqe(qp, stamp, size * 16);
2943
2944		ind = pad_wraparound(qp, ind);
2945		qp->sq_next_wqe = ind;
2946	}
2947
2948	spin_unlock_irqrestore(&qp->sq.lock, flags);
2949
2950	return err;
2951}
2952
2953int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2954		      struct ib_recv_wr **bad_wr)
2955{
2956	struct mlx4_ib_qp *qp = to_mqp(ibqp);
2957	struct mlx4_wqe_data_seg *scat;
2958	unsigned long flags;
2959	int err = 0;
2960	int nreq;
2961	int ind;
2962	int max_gs;
2963	int i;
2964	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
2965
2966	max_gs = qp->rq.max_gs;
2967	spin_lock_irqsave(&qp->rq.lock, flags);
2968
2969	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
2970		err = -EIO;
2971		*bad_wr = wr;
2972		nreq = 0;
2973		goto out;
2974	}
2975
2976	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2977
2978	for (nreq = 0; wr; ++nreq, wr = wr->next) {
2979		if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2980			err = -ENOMEM;
2981			*bad_wr = wr;
2982			goto out;
2983		}
2984
2985		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2986			err = -EINVAL;
2987			*bad_wr = wr;
2988			goto out;
2989		}
2990
2991		scat = get_recv_wqe(qp, ind);
2992
2993		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2994		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2995			ib_dma_sync_single_for_device(ibqp->device,
2996						      qp->sqp_proxy_rcv[ind].map,
2997						      sizeof (struct mlx4_ib_proxy_sqp_hdr),
2998						      DMA_FROM_DEVICE);
2999			scat->byte_count =
3000				cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
3001			/* use dma lkey from upper layer entry */
3002			scat->lkey = cpu_to_be32(wr->sg_list->lkey);
3003			scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
3004			scat++;
3005			max_gs--;
3006		}
3007
3008		for (i = 0; i < wr->num_sge; ++i)
3009			__set_data_seg(scat + i, wr->sg_list + i);
3010
3011		if (i < max_gs) {
3012			scat[i].byte_count = 0;
3013			scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);
3014			scat[i].addr       = 0;
3015		}
3016
3017		qp->rq.wrid[ind] = wr->wr_id;
3018
3019		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
3020	}
3021
3022out:
3023	if (likely(nreq)) {
3024		qp->rq.head += nreq;
3025
3026		/*
3027		 * Make sure that descriptors are written before
3028		 * doorbell record.
3029		 */
3030		wmb();
3031
3032		*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
3033	}
3034
3035	spin_unlock_irqrestore(&qp->rq.lock, flags);
3036
3037	return err;
3038}
3039
3040static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
3041{
3042	switch (mlx4_state) {
3043	case MLX4_QP_STATE_RST:      return IB_QPS_RESET;
3044	case MLX4_QP_STATE_INIT:     return IB_QPS_INIT;
3045	case MLX4_QP_STATE_RTR:      return IB_QPS_RTR;
3046	case MLX4_QP_STATE_RTS:      return IB_QPS_RTS;
3047	case MLX4_QP_STATE_SQ_DRAINING:
3048	case MLX4_QP_STATE_SQD:      return IB_QPS_SQD;
3049	case MLX4_QP_STATE_SQER:     return IB_QPS_SQE;
3050	case MLX4_QP_STATE_ERR:      return IB_QPS_ERR;
3051	default:		     return -1;
3052	}
3053}
3054
3055static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
3056{
3057	switch (mlx4_mig_state) {
3058	case MLX4_QP_PM_ARMED:		return IB_MIG_ARMED;
3059	case MLX4_QP_PM_REARM:		return IB_MIG_REARM;
3060	case MLX4_QP_PM_MIGRATED:	return IB_MIG_MIGRATED;
3061	default: return -1;
3062	}
3063}
3064
3065static int to_ib_qp_access_flags(int mlx4_flags)
3066{
3067	int ib_flags = 0;
3068
3069	if (mlx4_flags & MLX4_QP_BIT_RRE)
3070		ib_flags |= IB_ACCESS_REMOTE_READ;
3071	if (mlx4_flags & MLX4_QP_BIT_RWE)
3072		ib_flags |= IB_ACCESS_REMOTE_WRITE;
3073	if (mlx4_flags & MLX4_QP_BIT_RAE)
3074		ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
3075
3076	return ib_flags;
3077}
3078
3079static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
3080				struct mlx4_qp_path *path)
3081{
3082	struct mlx4_dev *dev = ibdev->dev;
3083	int is_eth;
3084
3085	memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
3086	ib_ah_attr->port_num	  = path->sched_queue & 0x40 ? 2 : 1;
3087
3088	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
3089		return;
3090
3091	is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
3092		IB_LINK_LAYER_ETHERNET;
3093	if (is_eth)
3094		ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
3095		((path->sched_queue & 4) << 1);
3096	else
3097		ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
3098
3099	ib_ah_attr->dlid	  = be16_to_cpu(path->rlid);
3100	ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
3101	ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
3102	ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
3103	if (ib_ah_attr->ah_flags) {
3104		ib_ah_attr->grh.sgid_index = path->mgid_index;
3105		ib_ah_attr->grh.hop_limit  = path->hop_limit;
3106		ib_ah_attr->grh.traffic_class =
3107			(be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
3108		ib_ah_attr->grh.flow_label =
3109			be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
3110		memcpy(ib_ah_attr->grh.dgid.raw,
3111			path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
3112	}
3113}
3114
3115int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
3116		     struct ib_qp_init_attr *qp_init_attr)
3117{
3118	struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
3119	struct mlx4_ib_qp *qp = to_mqp(ibqp);
3120	struct mlx4_qp_context context;
3121	int mlx4_state;
3122	int err = 0;
3123
3124	mutex_lock(&qp->mutex);
3125
3126	if (qp->state == IB_QPS_RESET) {
3127		qp_attr->qp_state = IB_QPS_RESET;
3128		goto done;
3129	}
3130
3131	err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
3132	if (err) {
3133		err = -EINVAL;
3134		goto out;
3135	}
3136
3137	mlx4_state = be32_to_cpu(context.flags) >> 28;
3138
3139	qp->state		     = to_ib_qp_state(mlx4_state);
3140	qp_attr->qp_state	     = qp->state;
3141	qp_attr->path_mtu	     = context.mtu_msgmax >> 5;
3142	qp_attr->path_mig_state	     =
3143		to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
3144	qp_attr->qkey		     = be32_to_cpu(context.qkey);
3145	qp_attr->rq_psn		     = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
3146	qp_attr->sq_psn		     = be32_to_cpu(context.next_send_psn) & 0xffffff;
3147	qp_attr->dest_qp_num	     = be32_to_cpu(context.remote_qpn) & 0xffffff;
3148	qp_attr->qp_access_flags     =
3149		to_ib_qp_access_flags(be32_to_cpu(context.params2));
3150
3151	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
3152		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
3153		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
3154		qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
3155		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
3156	}
3157
3158	qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
3159	if (qp_attr->qp_state == IB_QPS_INIT)
3160		qp_attr->port_num = qp->port;
3161	else
3162		qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
3163
3164	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
3165	qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
3166
3167	qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
3168
3169	qp_attr->max_dest_rd_atomic =
3170		1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
3171	qp_attr->min_rnr_timer	    =
3172		(be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
3173	qp_attr->timeout	    = context.pri_path.ackto >> 3;
3174	qp_attr->retry_cnt	    = (be32_to_cpu(context.params1) >> 16) & 0x7;
3175	qp_attr->rnr_retry	    = (be32_to_cpu(context.params1) >> 13) & 0x7;
3176	qp_attr->alt_timeout	    = context.alt_path.ackto >> 3;
3177
3178done:
3179	qp_attr->cur_qp_state	     = qp_attr->qp_state;
3180	qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
3181	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
3182
3183	if (!ibqp->uobject) {
3184		qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
3185		qp_attr->cap.max_send_sge = qp->sq.max_gs;
3186	} else {
3187		qp_attr->cap.max_send_wr  = 0;
3188		qp_attr->cap.max_send_sge = 0;
3189	}
3190
3191	/*
3192	 * We don't support inline sends for kernel QPs (yet), and we
3193	 * don't know what userspace's value should be.
3194	 */
3195	qp_attr->cap.max_inline_data = 0;
3196
3197	qp_init_attr->cap	     = qp_attr->cap;
3198
3199	qp_init_attr->create_flags = 0;
3200	if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
3201		qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
3202
3203	if (qp->flags & MLX4_IB_QP_LSO)
3204		qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
3205
3206	if (qp->flags & MLX4_IB_QP_NETIF)
3207		qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
3208
3209	qp_init_attr->sq_sig_type =
3210		qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
3211		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
3212
3213out:
3214	mutex_unlock(&qp->mutex);
3215	return err;
3216}
3217
3218