1/*
2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
38
39#include <linux/dma-mapping.h>
40#include <linux/slab.h>
41#include <linux/module.h>
42#include <rdma/ib_cache.h>
43
44#include "mad_priv.h"
45#include "mad_rmpp.h"
46#include "smi.h"
47#include "agent.h"
48
49MODULE_LICENSE("Dual BSD/GPL");
50MODULE_DESCRIPTION("kernel IB MAD API");
51MODULE_AUTHOR("Hal Rosenstock");
52MODULE_AUTHOR("Sean Hefty");
53
54static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
55static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
56
57module_param_named(send_queue_size, mad_sendq_size, int, 0444);
58MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
59module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
60MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
61
62static struct kmem_cache *ib_mad_cache;
63
64static struct list_head ib_mad_port_list;
65static u32 ib_mad_client_id = 0;
66
67/* Port list lock */
68static DEFINE_SPINLOCK(ib_mad_port_list_lock);
69
70/* Forward declarations */
71static int method_in_use(struct ib_mad_mgmt_method_table **method,
72			 struct ib_mad_reg_req *mad_reg_req);
73static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
74static struct ib_mad_agent_private *find_mad_agent(
75					struct ib_mad_port_private *port_priv,
76					struct ib_mad *mad);
77static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
78				    struct ib_mad_private *mad);
79static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
80static void timeout_sends(struct work_struct *work);
81static void local_completions(struct work_struct *work);
82static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
83			      struct ib_mad_agent_private *agent_priv,
84			      u8 mgmt_class);
85static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
86			   struct ib_mad_agent_private *agent_priv);
87
88/*
89 * Returns a ib_mad_port_private structure or NULL for a device/port
90 * Assumes ib_mad_port_list_lock is being held
91 */
92static inline struct ib_mad_port_private *
93__ib_get_mad_port(struct ib_device *device, int port_num)
94{
95	struct ib_mad_port_private *entry;
96
97	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
98		if (entry->device == device && entry->port_num == port_num)
99			return entry;
100	}
101	return NULL;
102}
103
104/*
105 * Wrapper function to return a ib_mad_port_private structure or NULL
106 * for a device/port
107 */
108static inline struct ib_mad_port_private *
109ib_get_mad_port(struct ib_device *device, int port_num)
110{
111	struct ib_mad_port_private *entry;
112	unsigned long flags;
113
114	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
115	entry = __ib_get_mad_port(device, port_num);
116	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
117
118	return entry;
119}
120
121static inline u8 convert_mgmt_class(u8 mgmt_class)
122{
123	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
124	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
125		0 : mgmt_class;
126}
127
128static int get_spl_qp_index(enum ib_qp_type qp_type)
129{
130	switch (qp_type)
131	{
132	case IB_QPT_SMI:
133		return 0;
134	case IB_QPT_GSI:
135		return 1;
136	default:
137		return -1;
138	}
139}
140
141static int vendor_class_index(u8 mgmt_class)
142{
143	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
144}
145
146static int is_vendor_class(u8 mgmt_class)
147{
148	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
149	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
150		return 0;
151	return 1;
152}
153
154static int is_vendor_oui(char *oui)
155{
156	if (oui[0] || oui[1] || oui[2])
157		return 1;
158	return 0;
159}
160
161static int is_vendor_method_in_use(
162		struct ib_mad_mgmt_vendor_class *vendor_class,
163		struct ib_mad_reg_req *mad_reg_req)
164{
165	struct ib_mad_mgmt_method_table *method;
166	int i;
167
168	for (i = 0; i < MAX_MGMT_OUI; i++) {
169		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
170			method = vendor_class->method_table[i];
171			if (method) {
172				if (method_in_use(&method, mad_reg_req))
173					return 1;
174				else
175					break;
176			}
177		}
178	}
179	return 0;
180}
181
182int ib_response_mad(struct ib_mad *mad)
183{
184	return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) ||
185		(mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
186		((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) &&
187		 (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP)));
188}
189EXPORT_SYMBOL(ib_response_mad);
190
191/*
192 * ib_register_mad_agent - Register to send/receive MADs
193 */
194struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
195					   u8 port_num,
196					   enum ib_qp_type qp_type,
197					   struct ib_mad_reg_req *mad_reg_req,
198					   u8 rmpp_version,
199					   ib_mad_send_handler send_handler,
200					   ib_mad_recv_handler recv_handler,
201					   void *context,
202					   u32 registration_flags)
203{
204	struct ib_mad_port_private *port_priv;
205	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
206	struct ib_mad_agent_private *mad_agent_priv;
207	struct ib_mad_reg_req *reg_req = NULL;
208	struct ib_mad_mgmt_class_table *class;
209	struct ib_mad_mgmt_vendor_class_table *vendor;
210	struct ib_mad_mgmt_vendor_class *vendor_class;
211	struct ib_mad_mgmt_method_table *method;
212	int ret2, qpn;
213	unsigned long flags;
214	u8 mgmt_class, vclass;
215
216	/* Validate parameters */
217	qpn = get_spl_qp_index(qp_type);
218	if (qpn == -1) {
219		dev_notice(&device->dev,
220			   "ib_register_mad_agent: invalid QP Type %d\n",
221			   qp_type);
222		goto error1;
223	}
224
225	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
226		dev_notice(&device->dev,
227			   "ib_register_mad_agent: invalid RMPP Version %u\n",
228			   rmpp_version);
229		goto error1;
230	}
231
232	/* Validate MAD registration request if supplied */
233	if (mad_reg_req) {
234		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
235			dev_notice(&device->dev,
236				   "ib_register_mad_agent: invalid Class Version %u\n",
237				   mad_reg_req->mgmt_class_version);
238			goto error1;
239		}
240		if (!recv_handler) {
241			dev_notice(&device->dev,
242				   "ib_register_mad_agent: no recv_handler\n");
243			goto error1;
244		}
245		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
246			/*
247			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
248			 * one in this range currently allowed
249			 */
250			if (mad_reg_req->mgmt_class !=
251			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
252				dev_notice(&device->dev,
253					   "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
254					   mad_reg_req->mgmt_class);
255				goto error1;
256			}
257		} else if (mad_reg_req->mgmt_class == 0) {
258			/*
259			 * Class 0 is reserved in IBA and is used for
260			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
261			 */
262			dev_notice(&device->dev,
263				   "ib_register_mad_agent: Invalid Mgmt Class 0\n");
264			goto error1;
265		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
266			/*
267			 * If class is in "new" vendor range,
268			 * ensure supplied OUI is not zero
269			 */
270			if (!is_vendor_oui(mad_reg_req->oui)) {
271				dev_notice(&device->dev,
272					   "ib_register_mad_agent: No OUI specified for class 0x%x\n",
273					   mad_reg_req->mgmt_class);
274				goto error1;
275			}
276		}
277		/* Make sure class supplied is consistent with RMPP */
278		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
279			if (rmpp_version) {
280				dev_notice(&device->dev,
281					   "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
282					   mad_reg_req->mgmt_class);
283				goto error1;
284			}
285		}
286
287		/* Make sure class supplied is consistent with QP type */
288		if (qp_type == IB_QPT_SMI) {
289			if ((mad_reg_req->mgmt_class !=
290					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
291			    (mad_reg_req->mgmt_class !=
292					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
293				dev_notice(&device->dev,
294					   "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
295					   mad_reg_req->mgmt_class);
296				goto error1;
297			}
298		} else {
299			if ((mad_reg_req->mgmt_class ==
300					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
301			    (mad_reg_req->mgmt_class ==
302					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
303				dev_notice(&device->dev,
304					   "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
305					   mad_reg_req->mgmt_class);
306				goto error1;
307			}
308		}
309	} else {
310		/* No registration request supplied */
311		if (!send_handler)
312			goto error1;
313		if (registration_flags & IB_MAD_USER_RMPP)
314			goto error1;
315	}
316
317	/* Validate device and port */
318	port_priv = ib_get_mad_port(device, port_num);
319	if (!port_priv) {
320		dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
321		ret = ERR_PTR(-ENODEV);
322		goto error1;
323	}
324
325	/* Verify the QP requested is supported.  For example, Ethernet devices
326	 * will not have QP0 */
327	if (!port_priv->qp_info[qpn].qp) {
328		dev_notice(&device->dev,
329			   "ib_register_mad_agent: QP %d not supported\n", qpn);
330		ret = ERR_PTR(-EPROTONOSUPPORT);
331		goto error1;
332	}
333
334	/* Allocate structures */
335	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
336	if (!mad_agent_priv) {
337		ret = ERR_PTR(-ENOMEM);
338		goto error1;
339	}
340
341	mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
342						 IB_ACCESS_LOCAL_WRITE);
343	if (IS_ERR(mad_agent_priv->agent.mr)) {
344		ret = ERR_PTR(-ENOMEM);
345		goto error2;
346	}
347
348	if (mad_reg_req) {
349		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
350		if (!reg_req) {
351			ret = ERR_PTR(-ENOMEM);
352			goto error3;
353		}
354	}
355
356	/* Now, fill in the various structures */
357	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
358	mad_agent_priv->reg_req = reg_req;
359	mad_agent_priv->agent.rmpp_version = rmpp_version;
360	mad_agent_priv->agent.device = device;
361	mad_agent_priv->agent.recv_handler = recv_handler;
362	mad_agent_priv->agent.send_handler = send_handler;
363	mad_agent_priv->agent.context = context;
364	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
365	mad_agent_priv->agent.port_num = port_num;
366	mad_agent_priv->agent.flags = registration_flags;
367	spin_lock_init(&mad_agent_priv->lock);
368	INIT_LIST_HEAD(&mad_agent_priv->send_list);
369	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
370	INIT_LIST_HEAD(&mad_agent_priv->done_list);
371	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
372	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
373	INIT_LIST_HEAD(&mad_agent_priv->local_list);
374	INIT_WORK(&mad_agent_priv->local_work, local_completions);
375	atomic_set(&mad_agent_priv->refcount, 1);
376	init_completion(&mad_agent_priv->comp);
377
378	spin_lock_irqsave(&port_priv->reg_lock, flags);
379	mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
380
381	/*
382	 * Make sure MAD registration (if supplied)
383	 * is non overlapping with any existing ones
384	 */
385	if (mad_reg_req) {
386		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
387		if (!is_vendor_class(mgmt_class)) {
388			class = port_priv->version[mad_reg_req->
389						   mgmt_class_version].class;
390			if (class) {
391				method = class->method_table[mgmt_class];
392				if (method) {
393					if (method_in_use(&method,
394							   mad_reg_req))
395						goto error4;
396				}
397			}
398			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
399						  mgmt_class);
400		} else {
401			/* "New" vendor class range */
402			vendor = port_priv->version[mad_reg_req->
403						    mgmt_class_version].vendor;
404			if (vendor) {
405				vclass = vendor_class_index(mgmt_class);
406				vendor_class = vendor->vendor_class[vclass];
407				if (vendor_class) {
408					if (is_vendor_method_in_use(
409							vendor_class,
410							mad_reg_req))
411						goto error4;
412				}
413			}
414			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
415		}
416		if (ret2) {
417			ret = ERR_PTR(ret2);
418			goto error4;
419		}
420	}
421
422	/* Add mad agent into port's agent list */
423	list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
424	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
425
426	return &mad_agent_priv->agent;
427
428error4:
429	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
430	kfree(reg_req);
431error3:
432	ib_dereg_mr(mad_agent_priv->agent.mr);
433error2:
434	kfree(mad_agent_priv);
435error1:
436	return ret;
437}
438EXPORT_SYMBOL(ib_register_mad_agent);
439
440static inline int is_snooping_sends(int mad_snoop_flags)
441{
442	return (mad_snoop_flags &
443		(/*IB_MAD_SNOOP_POSTED_SENDS |
444		 IB_MAD_SNOOP_RMPP_SENDS |*/
445		 IB_MAD_SNOOP_SEND_COMPLETIONS /*|
446		 IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
447}
448
449static inline int is_snooping_recvs(int mad_snoop_flags)
450{
451	return (mad_snoop_flags &
452		(IB_MAD_SNOOP_RECVS /*|
453		 IB_MAD_SNOOP_RMPP_RECVS*/));
454}
455
456static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
457				struct ib_mad_snoop_private *mad_snoop_priv)
458{
459	struct ib_mad_snoop_private **new_snoop_table;
460	unsigned long flags;
461	int i;
462
463	spin_lock_irqsave(&qp_info->snoop_lock, flags);
464	/* Check for empty slot in array. */
465	for (i = 0; i < qp_info->snoop_table_size; i++)
466		if (!qp_info->snoop_table[i])
467			break;
468
469	if (i == qp_info->snoop_table_size) {
470		/* Grow table. */
471		new_snoop_table = krealloc(qp_info->snoop_table,
472					   sizeof mad_snoop_priv *
473					   (qp_info->snoop_table_size + 1),
474					   GFP_ATOMIC);
475		if (!new_snoop_table) {
476			i = -ENOMEM;
477			goto out;
478		}
479
480		qp_info->snoop_table = new_snoop_table;
481		qp_info->snoop_table_size++;
482	}
483	qp_info->snoop_table[i] = mad_snoop_priv;
484	atomic_inc(&qp_info->snoop_count);
485out:
486	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
487	return i;
488}
489
490struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
491					   u8 port_num,
492					   enum ib_qp_type qp_type,
493					   int mad_snoop_flags,
494					   ib_mad_snoop_handler snoop_handler,
495					   ib_mad_recv_handler recv_handler,
496					   void *context)
497{
498	struct ib_mad_port_private *port_priv;
499	struct ib_mad_agent *ret;
500	struct ib_mad_snoop_private *mad_snoop_priv;
501	int qpn;
502
503	/* Validate parameters */
504	if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
505	    (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
506		ret = ERR_PTR(-EINVAL);
507		goto error1;
508	}
509	qpn = get_spl_qp_index(qp_type);
510	if (qpn == -1) {
511		ret = ERR_PTR(-EINVAL);
512		goto error1;
513	}
514	port_priv = ib_get_mad_port(device, port_num);
515	if (!port_priv) {
516		ret = ERR_PTR(-ENODEV);
517		goto error1;
518	}
519	/* Allocate structures */
520	mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
521	if (!mad_snoop_priv) {
522		ret = ERR_PTR(-ENOMEM);
523		goto error1;
524	}
525
526	/* Now, fill in the various structures */
527	mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
528	mad_snoop_priv->agent.device = device;
529	mad_snoop_priv->agent.recv_handler = recv_handler;
530	mad_snoop_priv->agent.snoop_handler = snoop_handler;
531	mad_snoop_priv->agent.context = context;
532	mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
533	mad_snoop_priv->agent.port_num = port_num;
534	mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
535	init_completion(&mad_snoop_priv->comp);
536	mad_snoop_priv->snoop_index = register_snoop_agent(
537						&port_priv->qp_info[qpn],
538						mad_snoop_priv);
539	if (mad_snoop_priv->snoop_index < 0) {
540		ret = ERR_PTR(mad_snoop_priv->snoop_index);
541		goto error2;
542	}
543
544	atomic_set(&mad_snoop_priv->refcount, 1);
545	return &mad_snoop_priv->agent;
546
547error2:
548	kfree(mad_snoop_priv);
549error1:
550	return ret;
551}
552EXPORT_SYMBOL(ib_register_mad_snoop);
553
554static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
555{
556	if (atomic_dec_and_test(&mad_agent_priv->refcount))
557		complete(&mad_agent_priv->comp);
558}
559
560static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
561{
562	if (atomic_dec_and_test(&mad_snoop_priv->refcount))
563		complete(&mad_snoop_priv->comp);
564}
565
566static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
567{
568	struct ib_mad_port_private *port_priv;
569	unsigned long flags;
570
571	/* Note that we could still be handling received MADs */
572
573	/*
574	 * Canceling all sends results in dropping received response
575	 * MADs, preventing us from queuing additional work
576	 */
577	cancel_mads(mad_agent_priv);
578	port_priv = mad_agent_priv->qp_info->port_priv;
579	cancel_delayed_work(&mad_agent_priv->timed_work);
580
581	spin_lock_irqsave(&port_priv->reg_lock, flags);
582	remove_mad_reg_req(mad_agent_priv);
583	list_del(&mad_agent_priv->agent_list);
584	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
585
586	flush_workqueue(port_priv->wq);
587	ib_cancel_rmpp_recvs(mad_agent_priv);
588
589	deref_mad_agent(mad_agent_priv);
590	wait_for_completion(&mad_agent_priv->comp);
591
592	kfree(mad_agent_priv->reg_req);
593	ib_dereg_mr(mad_agent_priv->agent.mr);
594	kfree(mad_agent_priv);
595}
596
597static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
598{
599	struct ib_mad_qp_info *qp_info;
600	unsigned long flags;
601
602	qp_info = mad_snoop_priv->qp_info;
603	spin_lock_irqsave(&qp_info->snoop_lock, flags);
604	qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
605	atomic_dec(&qp_info->snoop_count);
606	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
607
608	deref_snoop_agent(mad_snoop_priv);
609	wait_for_completion(&mad_snoop_priv->comp);
610
611	kfree(mad_snoop_priv);
612}
613
614/*
615 * ib_unregister_mad_agent - Unregisters a client from using MAD services
616 */
617int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
618{
619	struct ib_mad_agent_private *mad_agent_priv;
620	struct ib_mad_snoop_private *mad_snoop_priv;
621
622	/* If the TID is zero, the agent can only snoop. */
623	if (mad_agent->hi_tid) {
624		mad_agent_priv = container_of(mad_agent,
625					      struct ib_mad_agent_private,
626					      agent);
627		unregister_mad_agent(mad_agent_priv);
628	} else {
629		mad_snoop_priv = container_of(mad_agent,
630					      struct ib_mad_snoop_private,
631					      agent);
632		unregister_mad_snoop(mad_snoop_priv);
633	}
634	return 0;
635}
636EXPORT_SYMBOL(ib_unregister_mad_agent);
637
638static void dequeue_mad(struct ib_mad_list_head *mad_list)
639{
640	struct ib_mad_queue *mad_queue;
641	unsigned long flags;
642
643	BUG_ON(!mad_list->mad_queue);
644	mad_queue = mad_list->mad_queue;
645	spin_lock_irqsave(&mad_queue->lock, flags);
646	list_del(&mad_list->list);
647	mad_queue->count--;
648	spin_unlock_irqrestore(&mad_queue->lock, flags);
649}
650
651static void snoop_send(struct ib_mad_qp_info *qp_info,
652		       struct ib_mad_send_buf *send_buf,
653		       struct ib_mad_send_wc *mad_send_wc,
654		       int mad_snoop_flags)
655{
656	struct ib_mad_snoop_private *mad_snoop_priv;
657	unsigned long flags;
658	int i;
659
660	spin_lock_irqsave(&qp_info->snoop_lock, flags);
661	for (i = 0; i < qp_info->snoop_table_size; i++) {
662		mad_snoop_priv = qp_info->snoop_table[i];
663		if (!mad_snoop_priv ||
664		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
665			continue;
666
667		atomic_inc(&mad_snoop_priv->refcount);
668		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
669		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
670						    send_buf, mad_send_wc);
671		deref_snoop_agent(mad_snoop_priv);
672		spin_lock_irqsave(&qp_info->snoop_lock, flags);
673	}
674	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
675}
676
677static void snoop_recv(struct ib_mad_qp_info *qp_info,
678		       struct ib_mad_recv_wc *mad_recv_wc,
679		       int mad_snoop_flags)
680{
681	struct ib_mad_snoop_private *mad_snoop_priv;
682	unsigned long flags;
683	int i;
684
685	spin_lock_irqsave(&qp_info->snoop_lock, flags);
686	for (i = 0; i < qp_info->snoop_table_size; i++) {
687		mad_snoop_priv = qp_info->snoop_table[i];
688		if (!mad_snoop_priv ||
689		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
690			continue;
691
692		atomic_inc(&mad_snoop_priv->refcount);
693		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
694		mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
695						   mad_recv_wc);
696		deref_snoop_agent(mad_snoop_priv);
697		spin_lock_irqsave(&qp_info->snoop_lock, flags);
698	}
699	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
700}
701
702static void build_smp_wc(struct ib_qp *qp,
703			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
704			 struct ib_wc *wc)
705{
706	memset(wc, 0, sizeof *wc);
707	wc->wr_id = wr_id;
708	wc->status = IB_WC_SUCCESS;
709	wc->opcode = IB_WC_RECV;
710	wc->pkey_index = pkey_index;
711	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
712	wc->src_qp = IB_QP0;
713	wc->qp = qp;
714	wc->slid = slid;
715	wc->sl = 0;
716	wc->dlid_path_bits = 0;
717	wc->port_num = port_num;
718}
719
720/*
721 * Return 0 if SMP is to be sent
722 * Return 1 if SMP was consumed locally (whether or not solicited)
723 * Return < 0 if error
724 */
725static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
726				  struct ib_mad_send_wr_private *mad_send_wr)
727{
728	int ret = 0;
729	struct ib_smp *smp = mad_send_wr->send_buf.mad;
730	unsigned long flags;
731	struct ib_mad_local_private *local;
732	struct ib_mad_private *mad_priv;
733	struct ib_mad_port_private *port_priv;
734	struct ib_mad_agent_private *recv_mad_agent = NULL;
735	struct ib_device *device = mad_agent_priv->agent.device;
736	u8 port_num;
737	struct ib_wc mad_wc;
738	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
739
740	if (device->node_type == RDMA_NODE_IB_SWITCH &&
741	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
742		port_num = send_wr->wr.ud.port_num;
743	else
744		port_num = mad_agent_priv->agent.port_num;
745
746	/*
747	 * Directed route handling starts if the initial LID routed part of
748	 * a request or the ending LID routed part of a response is empty.
749	 * If we are at the start of the LID routed part, don't update the
750	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
751	 */
752	if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
753	     IB_LID_PERMISSIVE &&
754	     smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
755	     IB_SMI_DISCARD) {
756		ret = -EINVAL;
757		dev_err(&device->dev, "Invalid directed route\n");
758		goto out;
759	}
760
761	/* Check to post send on QP or process locally */
762	if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
763	    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
764		goto out;
765
766	local = kmalloc(sizeof *local, GFP_ATOMIC);
767	if (!local) {
768		ret = -ENOMEM;
769		dev_err(&device->dev, "No memory for ib_mad_local_private\n");
770		goto out;
771	}
772	local->mad_priv = NULL;
773	local->recv_mad_agent = NULL;
774	mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
775	if (!mad_priv) {
776		ret = -ENOMEM;
777		dev_err(&device->dev, "No memory for local response MAD\n");
778		kfree(local);
779		goto out;
780	}
781
782	build_smp_wc(mad_agent_priv->agent.qp,
783		     send_wr->wr_id, be16_to_cpu(smp->dr_slid),
784		     send_wr->wr.ud.pkey_index,
785		     send_wr->wr.ud.port_num, &mad_wc);
786
787	/* No GRH for DR SMP */
788	ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
789				  (struct ib_mad *)smp,
790				  (struct ib_mad *)&mad_priv->mad);
791	switch (ret)
792	{
793	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
794		if (ib_response_mad(&mad_priv->mad.mad) &&
795		    mad_agent_priv->agent.recv_handler) {
796			local->mad_priv = mad_priv;
797			local->recv_mad_agent = mad_agent_priv;
798			/*
799			 * Reference MAD agent until receive
800			 * side of local completion handled
801			 */
802			atomic_inc(&mad_agent_priv->refcount);
803		} else
804			kmem_cache_free(ib_mad_cache, mad_priv);
805		break;
806	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
807		kmem_cache_free(ib_mad_cache, mad_priv);
808		break;
809	case IB_MAD_RESULT_SUCCESS:
810		/* Treat like an incoming receive MAD */
811		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
812					    mad_agent_priv->agent.port_num);
813		if (port_priv) {
814			memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
815			recv_mad_agent = find_mad_agent(port_priv,
816						        &mad_priv->mad.mad);
817		}
818		if (!port_priv || !recv_mad_agent) {
819			/*
820			 * No receiving agent so drop packet and
821			 * generate send completion.
822			 */
823			kmem_cache_free(ib_mad_cache, mad_priv);
824			break;
825		}
826		local->mad_priv = mad_priv;
827		local->recv_mad_agent = recv_mad_agent;
828		break;
829	default:
830		kmem_cache_free(ib_mad_cache, mad_priv);
831		kfree(local);
832		ret = -EINVAL;
833		goto out;
834	}
835
836	local->mad_send_wr = mad_send_wr;
837	/* Reference MAD agent until send side of local completion handled */
838	atomic_inc(&mad_agent_priv->refcount);
839	/* Queue local completion to local list */
840	spin_lock_irqsave(&mad_agent_priv->lock, flags);
841	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
842	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
843	queue_work(mad_agent_priv->qp_info->port_priv->wq,
844		   &mad_agent_priv->local_work);
845	ret = 1;
846out:
847	return ret;
848}
849
850static int get_pad_size(int hdr_len, int data_len)
851{
852	int seg_size, pad;
853
854	seg_size = sizeof(struct ib_mad) - hdr_len;
855	if (data_len && seg_size) {
856		pad = seg_size - data_len % seg_size;
857		return pad == seg_size ? 0 : pad;
858	} else
859		return seg_size;
860}
861
862static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
863{
864	struct ib_rmpp_segment *s, *t;
865
866	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
867		list_del(&s->list);
868		kfree(s);
869	}
870}
871
872static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
873				gfp_t gfp_mask)
874{
875	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
876	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
877	struct ib_rmpp_segment *seg = NULL;
878	int left, seg_size, pad;
879
880	send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
881	seg_size = send_buf->seg_size;
882	pad = send_wr->pad;
883
884	/* Allocate data segments. */
885	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
886		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
887		if (!seg) {
888			dev_err(&send_buf->mad_agent->device->dev,
889				"alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
890				sizeof (*seg) + seg_size, gfp_mask);
891			free_send_rmpp_list(send_wr);
892			return -ENOMEM;
893		}
894		seg->num = ++send_buf->seg_count;
895		list_add_tail(&seg->list, &send_wr->rmpp_list);
896	}
897
898	/* Zero any padding */
899	if (pad)
900		memset(seg->data + seg_size - pad, 0, pad);
901
902	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
903					  agent.rmpp_version;
904	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
905	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
906
907	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
908					struct ib_rmpp_segment, list);
909	send_wr->last_ack_seg = send_wr->cur_seg;
910	return 0;
911}
912
913int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
914{
915	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
916}
917EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
918
919struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
920					    u32 remote_qpn, u16 pkey_index,
921					    int rmpp_active,
922					    int hdr_len, int data_len,
923					    gfp_t gfp_mask)
924{
925	struct ib_mad_agent_private *mad_agent_priv;
926	struct ib_mad_send_wr_private *mad_send_wr;
927	int pad, message_size, ret, size;
928	void *buf;
929
930	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
931				      agent);
932	pad = get_pad_size(hdr_len, data_len);
933	message_size = hdr_len + data_len + pad;
934
935	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
936		if (!rmpp_active && message_size > sizeof(struct ib_mad))
937			return ERR_PTR(-EINVAL);
938	} else
939		if (rmpp_active || message_size > sizeof(struct ib_mad))
940			return ERR_PTR(-EINVAL);
941
942	size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
943	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
944	if (!buf)
945		return ERR_PTR(-ENOMEM);
946
947	mad_send_wr = buf + size;
948	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
949	mad_send_wr->send_buf.mad = buf;
950	mad_send_wr->send_buf.hdr_len = hdr_len;
951	mad_send_wr->send_buf.data_len = data_len;
952	mad_send_wr->pad = pad;
953
954	mad_send_wr->mad_agent_priv = mad_agent_priv;
955	mad_send_wr->sg_list[0].length = hdr_len;
956	mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
957	mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
958	mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
959
960	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
961	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
962	mad_send_wr->send_wr.num_sge = 2;
963	mad_send_wr->send_wr.opcode = IB_WR_SEND;
964	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
965	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
966	mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
967	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
968
969	if (rmpp_active) {
970		ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
971		if (ret) {
972			kfree(buf);
973			return ERR_PTR(ret);
974		}
975	}
976
977	mad_send_wr->send_buf.mad_agent = mad_agent;
978	atomic_inc(&mad_agent_priv->refcount);
979	return &mad_send_wr->send_buf;
980}
981EXPORT_SYMBOL(ib_create_send_mad);
982
983int ib_get_mad_data_offset(u8 mgmt_class)
984{
985	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
986		return IB_MGMT_SA_HDR;
987	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
988		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
989		 (mgmt_class == IB_MGMT_CLASS_BIS))
990		return IB_MGMT_DEVICE_HDR;
991	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
992		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
993		return IB_MGMT_VENDOR_HDR;
994	else
995		return IB_MGMT_MAD_HDR;
996}
997EXPORT_SYMBOL(ib_get_mad_data_offset);
998
999int ib_is_mad_class_rmpp(u8 mgmt_class)
1000{
1001	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
1002	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
1003	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
1004	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
1005	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
1006	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
1007		return 1;
1008	return 0;
1009}
1010EXPORT_SYMBOL(ib_is_mad_class_rmpp);
1011
1012void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
1013{
1014	struct ib_mad_send_wr_private *mad_send_wr;
1015	struct list_head *list;
1016
1017	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
1018				   send_buf);
1019	list = &mad_send_wr->cur_seg->list;
1020
1021	if (mad_send_wr->cur_seg->num < seg_num) {
1022		list_for_each_entry(mad_send_wr->cur_seg, list, list)
1023			if (mad_send_wr->cur_seg->num == seg_num)
1024				break;
1025	} else if (mad_send_wr->cur_seg->num > seg_num) {
1026		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
1027			if (mad_send_wr->cur_seg->num == seg_num)
1028				break;
1029	}
1030	return mad_send_wr->cur_seg->data;
1031}
1032EXPORT_SYMBOL(ib_get_rmpp_segment);
1033
1034static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
1035{
1036	if (mad_send_wr->send_buf.seg_count)
1037		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
1038					   mad_send_wr->seg_num);
1039	else
1040		return mad_send_wr->send_buf.mad +
1041		       mad_send_wr->send_buf.hdr_len;
1042}
1043
1044void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
1045{
1046	struct ib_mad_agent_private *mad_agent_priv;
1047	struct ib_mad_send_wr_private *mad_send_wr;
1048
1049	mad_agent_priv = container_of(send_buf->mad_agent,
1050				      struct ib_mad_agent_private, agent);
1051	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
1052				   send_buf);
1053
1054	free_send_rmpp_list(mad_send_wr);
1055	kfree(send_buf->mad);
1056	deref_mad_agent(mad_agent_priv);
1057}
1058EXPORT_SYMBOL(ib_free_send_mad);
1059
1060int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1061{
1062	struct ib_mad_qp_info *qp_info;
1063	struct list_head *list;
1064	struct ib_send_wr *bad_send_wr;
1065	struct ib_mad_agent *mad_agent;
1066	struct ib_sge *sge;
1067	unsigned long flags;
1068	int ret;
1069
1070	/* Set WR ID to find mad_send_wr upon completion */
1071	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1072	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1073	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1074
1075	mad_agent = mad_send_wr->send_buf.mad_agent;
1076	sge = mad_send_wr->sg_list;
1077	sge[0].addr = ib_dma_map_single(mad_agent->device,
1078					mad_send_wr->send_buf.mad,
1079					sge[0].length,
1080					DMA_TO_DEVICE);
1081	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
1082		return -ENOMEM;
1083
1084	mad_send_wr->header_mapping = sge[0].addr;
1085
1086	sge[1].addr = ib_dma_map_single(mad_agent->device,
1087					ib_get_payload(mad_send_wr),
1088					sge[1].length,
1089					DMA_TO_DEVICE);
1090	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
1091		ib_dma_unmap_single(mad_agent->device,
1092				    mad_send_wr->header_mapping,
1093				    sge[0].length, DMA_TO_DEVICE);
1094		return -ENOMEM;
1095	}
1096	mad_send_wr->payload_mapping = sge[1].addr;
1097
1098	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1099	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1100		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
1101				   &bad_send_wr);
1102		list = &qp_info->send_queue.list;
1103	} else {
1104		ret = 0;
1105		list = &qp_info->overflow_list;
1106	}
1107
1108	if (!ret) {
1109		qp_info->send_queue.count++;
1110		list_add_tail(&mad_send_wr->mad_list.list, list);
1111	}
1112	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1113	if (ret) {
1114		ib_dma_unmap_single(mad_agent->device,
1115				    mad_send_wr->header_mapping,
1116				    sge[0].length, DMA_TO_DEVICE);
1117		ib_dma_unmap_single(mad_agent->device,
1118				    mad_send_wr->payload_mapping,
1119				    sge[1].length, DMA_TO_DEVICE);
1120	}
1121	return ret;
1122}
1123
1124/*
1125 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1126 *  with the registered client
1127 */
1128int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1129		     struct ib_mad_send_buf **bad_send_buf)
1130{
1131	struct ib_mad_agent_private *mad_agent_priv;
1132	struct ib_mad_send_buf *next_send_buf;
1133	struct ib_mad_send_wr_private *mad_send_wr;
1134	unsigned long flags;
1135	int ret = -EINVAL;
1136
1137	/* Walk list of send WRs and post each on send list */
1138	for (; send_buf; send_buf = next_send_buf) {
1139
1140		mad_send_wr = container_of(send_buf,
1141					   struct ib_mad_send_wr_private,
1142					   send_buf);
1143		mad_agent_priv = mad_send_wr->mad_agent_priv;
1144
1145		if (!send_buf->mad_agent->send_handler ||
1146		    (send_buf->timeout_ms &&
1147		     !send_buf->mad_agent->recv_handler)) {
1148			ret = -EINVAL;
1149			goto error;
1150		}
1151
1152		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1153			if (mad_agent_priv->agent.rmpp_version) {
1154				ret = -EINVAL;
1155				goto error;
1156			}
1157		}
1158
1159		/*
1160		 * Save pointer to next work request to post in case the
1161		 * current one completes, and the user modifies the work
1162		 * request associated with the completion
1163		 */
1164		next_send_buf = send_buf->next;
1165		mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
1166
1167		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1168		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1169			ret = handle_outgoing_dr_smp(mad_agent_priv,
1170						     mad_send_wr);
1171			if (ret < 0)		/* error */
1172				goto error;
1173			else if (ret == 1)	/* locally consumed */
1174				continue;
1175		}
1176
1177		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1178		/* Timeout will be updated after send completes */
1179		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1180		mad_send_wr->max_retries = send_buf->retries;
1181		mad_send_wr->retries_left = send_buf->retries;
1182		send_buf->retries = 0;
1183		/* Reference for work request to QP + response */
1184		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1185		mad_send_wr->status = IB_WC_SUCCESS;
1186
1187		/* Reference MAD agent until send completes */
1188		atomic_inc(&mad_agent_priv->refcount);
1189		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1190		list_add_tail(&mad_send_wr->agent_list,
1191			      &mad_agent_priv->send_list);
1192		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1193
1194		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1195			ret = ib_send_rmpp_mad(mad_send_wr);
1196			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1197				ret = ib_send_mad(mad_send_wr);
1198		} else
1199			ret = ib_send_mad(mad_send_wr);
1200		if (ret < 0) {
1201			/* Fail send request */
1202			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1203			list_del(&mad_send_wr->agent_list);
1204			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1205			atomic_dec(&mad_agent_priv->refcount);
1206			goto error;
1207		}
1208	}
1209	return 0;
1210error:
1211	if (bad_send_buf)
1212		*bad_send_buf = send_buf;
1213	return ret;
1214}
1215EXPORT_SYMBOL(ib_post_send_mad);
1216
1217/*
1218 * ib_free_recv_mad - Returns data buffers used to receive
1219 *  a MAD to the access layer
1220 */
1221void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1222{
1223	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1224	struct ib_mad_private_header *mad_priv_hdr;
1225	struct ib_mad_private *priv;
1226	struct list_head free_list;
1227
1228	INIT_LIST_HEAD(&free_list);
1229	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1230
1231	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1232					&free_list, list) {
1233		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1234					   recv_buf);
1235		mad_priv_hdr = container_of(mad_recv_wc,
1236					    struct ib_mad_private_header,
1237					    recv_wc);
1238		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1239				    header);
1240		kmem_cache_free(ib_mad_cache, priv);
1241	}
1242}
1243EXPORT_SYMBOL(ib_free_recv_mad);
1244
1245struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
1246					u8 rmpp_version,
1247					ib_mad_send_handler send_handler,
1248					ib_mad_recv_handler recv_handler,
1249					void *context)
1250{
1251	return ERR_PTR(-EINVAL);	/* XXX: for now */
1252}
1253EXPORT_SYMBOL(ib_redirect_mad_qp);
1254
1255int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
1256		      struct ib_wc *wc)
1257{
1258	dev_err(&mad_agent->device->dev,
1259		"ib_process_mad_wc() not implemented yet\n");
1260	return 0;
1261}
1262EXPORT_SYMBOL(ib_process_mad_wc);
1263
1264static int method_in_use(struct ib_mad_mgmt_method_table **method,
1265			 struct ib_mad_reg_req *mad_reg_req)
1266{
1267	int i;
1268
1269	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1270		if ((*method)->agent[i]) {
1271			pr_err("Method %d already in use\n", i);
1272			return -EINVAL;
1273		}
1274	}
1275	return 0;
1276}
1277
1278static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1279{
1280	/* Allocate management method table */
1281	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1282	if (!*method) {
1283		pr_err("No memory for ib_mad_mgmt_method_table\n");
1284		return -ENOMEM;
1285	}
1286
1287	return 0;
1288}
1289
1290/*
1291 * Check to see if there are any methods still in use
1292 */
1293static int check_method_table(struct ib_mad_mgmt_method_table *method)
1294{
1295	int i;
1296
1297	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1298		if (method->agent[i])
1299			return 1;
1300	return 0;
1301}
1302
1303/*
1304 * Check to see if there are any method tables for this class still in use
1305 */
1306static int check_class_table(struct ib_mad_mgmt_class_table *class)
1307{
1308	int i;
1309
1310	for (i = 0; i < MAX_MGMT_CLASS; i++)
1311		if (class->method_table[i])
1312			return 1;
1313	return 0;
1314}
1315
1316static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1317{
1318	int i;
1319
1320	for (i = 0; i < MAX_MGMT_OUI; i++)
1321		if (vendor_class->method_table[i])
1322			return 1;
1323	return 0;
1324}
1325
1326static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1327			   char *oui)
1328{
1329	int i;
1330
1331	for (i = 0; i < MAX_MGMT_OUI; i++)
1332		/* Is there matching OUI for this vendor class ? */
1333		if (!memcmp(vendor_class->oui[i], oui, 3))
1334			return i;
1335
1336	return -1;
1337}
1338
1339static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1340{
1341	int i;
1342
1343	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1344		if (vendor->vendor_class[i])
1345			return 1;
1346
1347	return 0;
1348}
1349
1350static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1351				     struct ib_mad_agent_private *agent)
1352{
1353	int i;
1354
1355	/* Remove any methods for this mad agent */
1356	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1357		if (method->agent[i] == agent) {
1358			method->agent[i] = NULL;
1359		}
1360	}
1361}
1362
1363static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1364			      struct ib_mad_agent_private *agent_priv,
1365			      u8 mgmt_class)
1366{
1367	struct ib_mad_port_private *port_priv;
1368	struct ib_mad_mgmt_class_table **class;
1369	struct ib_mad_mgmt_method_table **method;
1370	int i, ret;
1371
1372	port_priv = agent_priv->qp_info->port_priv;
1373	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1374	if (!*class) {
1375		/* Allocate management class table for "new" class version */
1376		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1377		if (!*class) {
1378			dev_err(&agent_priv->agent.device->dev,
1379				"No memory for ib_mad_mgmt_class_table\n");
1380			ret = -ENOMEM;
1381			goto error1;
1382		}
1383
1384		/* Allocate method table for this management class */
1385		method = &(*class)->method_table[mgmt_class];
1386		if ((ret = allocate_method_table(method)))
1387			goto error2;
1388	} else {
1389		method = &(*class)->method_table[mgmt_class];
1390		if (!*method) {
1391			/* Allocate method table for this management class */
1392			if ((ret = allocate_method_table(method)))
1393				goto error1;
1394		}
1395	}
1396
1397	/* Now, make sure methods are not already in use */
1398	if (method_in_use(method, mad_reg_req))
1399		goto error3;
1400
1401	/* Finally, add in methods being registered */
1402	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1403		(*method)->agent[i] = agent_priv;
1404
1405	return 0;
1406
1407error3:
1408	/* Remove any methods for this mad agent */
1409	remove_methods_mad_agent(*method, agent_priv);
1410	/* Now, check to see if there are any methods in use */
1411	if (!check_method_table(*method)) {
1412		/* If not, release management method table */
1413		kfree(*method);
1414		*method = NULL;
1415	}
1416	ret = -EINVAL;
1417	goto error1;
1418error2:
1419	kfree(*class);
1420	*class = NULL;
1421error1:
1422	return ret;
1423}
1424
1425static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1426			   struct ib_mad_agent_private *agent_priv)
1427{
1428	struct ib_mad_port_private *port_priv;
1429	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1430	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1431	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1432	struct ib_mad_mgmt_method_table **method;
1433	int i, ret = -ENOMEM;
1434	u8 vclass;
1435
1436	/* "New" vendor (with OUI) class */
1437	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1438	port_priv = agent_priv->qp_info->port_priv;
1439	vendor_table = &port_priv->version[
1440				mad_reg_req->mgmt_class_version].vendor;
1441	if (!*vendor_table) {
1442		/* Allocate mgmt vendor class table for "new" class version */
1443		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1444		if (!vendor) {
1445			dev_err(&agent_priv->agent.device->dev,
1446				"No memory for ib_mad_mgmt_vendor_class_table\n");
1447			goto error1;
1448		}
1449
1450		*vendor_table = vendor;
1451	}
1452	if (!(*vendor_table)->vendor_class[vclass]) {
1453		/* Allocate table for this management vendor class */
1454		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1455		if (!vendor_class) {
1456			dev_err(&agent_priv->agent.device->dev,
1457				"No memory for ib_mad_mgmt_vendor_class\n");
1458			goto error2;
1459		}
1460
1461		(*vendor_table)->vendor_class[vclass] = vendor_class;
1462	}
1463	for (i = 0; i < MAX_MGMT_OUI; i++) {
1464		/* Is there matching OUI for this vendor class ? */
1465		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1466			    mad_reg_req->oui, 3)) {
1467			method = &(*vendor_table)->vendor_class[
1468						vclass]->method_table[i];
1469			BUG_ON(!*method);
1470			goto check_in_use;
1471		}
1472	}
1473	for (i = 0; i < MAX_MGMT_OUI; i++) {
1474		/* OUI slot available ? */
1475		if (!is_vendor_oui((*vendor_table)->vendor_class[
1476				vclass]->oui[i])) {
1477			method = &(*vendor_table)->vendor_class[
1478				vclass]->method_table[i];
1479			BUG_ON(*method);
1480			/* Allocate method table for this OUI */
1481			if ((ret = allocate_method_table(method)))
1482				goto error3;
1483			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1484			       mad_reg_req->oui, 3);
1485			goto check_in_use;
1486		}
1487	}
1488	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1489	goto error3;
1490
1491check_in_use:
1492	/* Now, make sure methods are not already in use */
1493	if (method_in_use(method, mad_reg_req))
1494		goto error4;
1495
1496	/* Finally, add in methods being registered */
1497	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1498		(*method)->agent[i] = agent_priv;
1499
1500	return 0;
1501
1502error4:
1503	/* Remove any methods for this mad agent */
1504	remove_methods_mad_agent(*method, agent_priv);
1505	/* Now, check to see if there are any methods in use */
1506	if (!check_method_table(*method)) {
1507		/* If not, release management method table */
1508		kfree(*method);
1509		*method = NULL;
1510	}
1511	ret = -EINVAL;
1512error3:
1513	if (vendor_class) {
1514		(*vendor_table)->vendor_class[vclass] = NULL;
1515		kfree(vendor_class);
1516	}
1517error2:
1518	if (vendor) {
1519		*vendor_table = NULL;
1520		kfree(vendor);
1521	}
1522error1:
1523	return ret;
1524}
1525
1526static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1527{
1528	struct ib_mad_port_private *port_priv;
1529	struct ib_mad_mgmt_class_table *class;
1530	struct ib_mad_mgmt_method_table *method;
1531	struct ib_mad_mgmt_vendor_class_table *vendor;
1532	struct ib_mad_mgmt_vendor_class *vendor_class;
1533	int index;
1534	u8 mgmt_class;
1535
1536	/*
1537	 * Was MAD registration request supplied
1538	 * with original registration ?
1539	 */
1540	if (!agent_priv->reg_req) {
1541		goto out;
1542	}
1543
1544	port_priv = agent_priv->qp_info->port_priv;
1545	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1546	class = port_priv->version[
1547			agent_priv->reg_req->mgmt_class_version].class;
1548	if (!class)
1549		goto vendor_check;
1550
1551	method = class->method_table[mgmt_class];
1552	if (method) {
1553		/* Remove any methods for this mad agent */
1554		remove_methods_mad_agent(method, agent_priv);
1555		/* Now, check to see if there are any methods still in use */
1556		if (!check_method_table(method)) {
1557			/* If not, release management method table */
1558			 kfree(method);
1559			 class->method_table[mgmt_class] = NULL;
1560			 /* Any management classes left ? */
1561			if (!check_class_table(class)) {
1562				/* If not, release management class table */
1563				kfree(class);
1564				port_priv->version[
1565					agent_priv->reg_req->
1566					mgmt_class_version].class = NULL;
1567			}
1568		}
1569	}
1570
1571vendor_check:
1572	if (!is_vendor_class(mgmt_class))
1573		goto out;
1574
1575	/* normalize mgmt_class to vendor range 2 */
1576	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1577	vendor = port_priv->version[
1578			agent_priv->reg_req->mgmt_class_version].vendor;
1579
1580	if (!vendor)
1581		goto out;
1582
1583	vendor_class = vendor->vendor_class[mgmt_class];
1584	if (vendor_class) {
1585		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1586		if (index < 0)
1587			goto out;
1588		method = vendor_class->method_table[index];
1589		if (method) {
1590			/* Remove any methods for this mad agent */
1591			remove_methods_mad_agent(method, agent_priv);
1592			/*
1593			 * Now, check to see if there are
1594			 * any methods still in use
1595			 */
1596			if (!check_method_table(method)) {
1597				/* If not, release management method table */
1598				kfree(method);
1599				vendor_class->method_table[index] = NULL;
1600				memset(vendor_class->oui[index], 0, 3);
1601				/* Any OUIs left ? */
1602				if (!check_vendor_class(vendor_class)) {
1603					/* If not, release vendor class table */
1604					kfree(vendor_class);
1605					vendor->vendor_class[mgmt_class] = NULL;
1606					/* Any other vendor classes left ? */
1607					if (!check_vendor_table(vendor)) {
1608						kfree(vendor);
1609						port_priv->version[
1610							agent_priv->reg_req->
1611							mgmt_class_version].
1612							vendor = NULL;
1613					}
1614				}
1615			}
1616		}
1617	}
1618
1619out:
1620	return;
1621}
1622
1623static struct ib_mad_agent_private *
1624find_mad_agent(struct ib_mad_port_private *port_priv,
1625	       struct ib_mad *mad)
1626{
1627	struct ib_mad_agent_private *mad_agent = NULL;
1628	unsigned long flags;
1629
1630	spin_lock_irqsave(&port_priv->reg_lock, flags);
1631	if (ib_response_mad(mad)) {
1632		u32 hi_tid;
1633		struct ib_mad_agent_private *entry;
1634
1635		/*
1636		 * Routing is based on high 32 bits of transaction ID
1637		 * of MAD.
1638		 */
1639		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
1640		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
1641			if (entry->agent.hi_tid == hi_tid) {
1642				mad_agent = entry;
1643				break;
1644			}
1645		}
1646	} else {
1647		struct ib_mad_mgmt_class_table *class;
1648		struct ib_mad_mgmt_method_table *method;
1649		struct ib_mad_mgmt_vendor_class_table *vendor;
1650		struct ib_mad_mgmt_vendor_class *vendor_class;
1651		struct ib_vendor_mad *vendor_mad;
1652		int index;
1653
1654		/*
1655		 * Routing is based on version, class, and method
1656		 * For "newer" vendor MADs, also based on OUI
1657		 */
1658		if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
1659			goto out;
1660		if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
1661			class = port_priv->version[
1662					mad->mad_hdr.class_version].class;
1663			if (!class)
1664				goto out;
1665			if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
1666			    IB_MGMT_MAX_METHODS)
1667				goto out;
1668			method = class->method_table[convert_mgmt_class(
1669							mad->mad_hdr.mgmt_class)];
1670			if (method)
1671				mad_agent = method->agent[mad->mad_hdr.method &
1672							  ~IB_MGMT_METHOD_RESP];
1673		} else {
1674			vendor = port_priv->version[
1675					mad->mad_hdr.class_version].vendor;
1676			if (!vendor)
1677				goto out;
1678			vendor_class = vendor->vendor_class[vendor_class_index(
1679						mad->mad_hdr.mgmt_class)];
1680			if (!vendor_class)
1681				goto out;
1682			/* Find matching OUI */
1683			vendor_mad = (struct ib_vendor_mad *)mad;
1684			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1685			if (index == -1)
1686				goto out;
1687			method = vendor_class->method_table[index];
1688			if (method) {
1689				mad_agent = method->agent[mad->mad_hdr.method &
1690							  ~IB_MGMT_METHOD_RESP];
1691			}
1692		}
1693	}
1694
1695	if (mad_agent) {
1696		if (mad_agent->agent.recv_handler)
1697			atomic_inc(&mad_agent->refcount);
1698		else {
1699			dev_notice(&port_priv->device->dev,
1700				   "No receive handler for client %p on port %d\n",
1701				   &mad_agent->agent, port_priv->port_num);
1702			mad_agent = NULL;
1703		}
1704	}
1705out:
1706	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1707
1708	return mad_agent;
1709}
1710
1711static int validate_mad(struct ib_mad *mad, u32 qp_num)
1712{
1713	int valid = 0;
1714
1715	/* Make sure MAD base version is understood */
1716	if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
1717		pr_err("MAD received with unsupported base version %d\n",
1718			mad->mad_hdr.base_version);
1719		goto out;
1720	}
1721
1722	/* Filter SMI packets sent to other than QP0 */
1723	if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1724	    (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1725		if (qp_num == 0)
1726			valid = 1;
1727	} else {
1728		/* Filter GSI packets sent to QP0 */
1729		if (qp_num != 0)
1730			valid = 1;
1731	}
1732
1733out:
1734	return valid;
1735}
1736
1737static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1738		       struct ib_mad_hdr *mad_hdr)
1739{
1740	struct ib_rmpp_mad *rmpp_mad;
1741
1742	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1743	return !mad_agent_priv->agent.rmpp_version ||
1744		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1745		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1746				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1747		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1748}
1749
1750static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
1751				     struct ib_mad_recv_wc *rwc)
1752{
1753	return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class ==
1754		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1755}
1756
1757static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
1758				   struct ib_mad_send_wr_private *wr,
1759				   struct ib_mad_recv_wc *rwc )
1760{
1761	struct ib_ah_attr attr;
1762	u8 send_resp, rcv_resp;
1763	union ib_gid sgid;
1764	struct ib_device *device = mad_agent_priv->agent.device;
1765	u8 port_num = mad_agent_priv->agent.port_num;
1766	u8 lmc;
1767
1768	send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
1769	rcv_resp = ib_response_mad(rwc->recv_buf.mad);
1770
1771	if (send_resp == rcv_resp)
1772		/* both requests, or both responses. GIDs different */
1773		return 0;
1774
1775	if (ib_query_ah(wr->send_buf.ah, &attr))
1776		/* Assume not equal, to avoid false positives. */
1777		return 0;
1778
1779	if (!!(attr.ah_flags & IB_AH_GRH) !=
1780	    !!(rwc->wc->wc_flags & IB_WC_GRH))
1781		/* one has GID, other does not.  Assume different */
1782		return 0;
1783
1784	if (!send_resp && rcv_resp) {
1785		/* is request/response. */
1786		if (!(attr.ah_flags & IB_AH_GRH)) {
1787			if (ib_get_cached_lmc(device, port_num, &lmc))
1788				return 0;
1789			return (!lmc || !((attr.src_path_bits ^
1790					   rwc->wc->dlid_path_bits) &
1791					  ((1 << lmc) - 1)));
1792		} else {
1793			if (ib_get_cached_gid(device, port_num,
1794					      attr.grh.sgid_index, &sgid))
1795				return 0;
1796			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1797				       16);
1798		}
1799	}
1800
1801	if (!(attr.ah_flags & IB_AH_GRH))
1802		return attr.dlid == rwc->wc->slid;
1803	else
1804		return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
1805			       16);
1806}
1807
1808static inline int is_direct(u8 class)
1809{
1810	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1811}
1812
1813struct ib_mad_send_wr_private*
1814ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1815		 struct ib_mad_recv_wc *wc)
1816{
1817	struct ib_mad_send_wr_private *wr;
1818	struct ib_mad *mad;
1819
1820	mad = (struct ib_mad *)wc->recv_buf.mad;
1821
1822	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1823		if ((wr->tid == mad->mad_hdr.tid) &&
1824		    rcv_has_same_class(wr, wc) &&
1825		    /*
1826		     * Don't check GID for direct routed MADs.
1827		     * These might have permissive LIDs.
1828		     */
1829		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1830		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1831			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1832	}
1833
1834	/*
1835	 * It's possible to receive the response before we've
1836	 * been notified that the send has completed
1837	 */
1838	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1839		if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1840		    wr->tid == mad->mad_hdr.tid &&
1841		    wr->timeout &&
1842		    rcv_has_same_class(wr, wc) &&
1843		    /*
1844		     * Don't check GID for direct routed MADs.
1845		     * These might have permissive LIDs.
1846		     */
1847		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1848		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1849			/* Verify request has not been canceled */
1850			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1851	}
1852	return NULL;
1853}
1854
1855void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1856{
1857	mad_send_wr->timeout = 0;
1858	if (mad_send_wr->refcount == 1)
1859		list_move_tail(&mad_send_wr->agent_list,
1860			      &mad_send_wr->mad_agent_priv->done_list);
1861}
1862
1863static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1864				 struct ib_mad_recv_wc *mad_recv_wc)
1865{
1866	struct ib_mad_send_wr_private *mad_send_wr;
1867	struct ib_mad_send_wc mad_send_wc;
1868	unsigned long flags;
1869
1870	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1871	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1872	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1873		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1874						      mad_recv_wc);
1875		if (!mad_recv_wc) {
1876			deref_mad_agent(mad_agent_priv);
1877			return;
1878		}
1879	}
1880
1881	/* Complete corresponding request */
1882	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
1883		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1884		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1885		if (!mad_send_wr) {
1886			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1887			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1888			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1889			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1890					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
1891				/* user rmpp is in effect
1892				 * and this is an active RMPP MAD
1893				 */
1894				mad_recv_wc->wc->wr_id = 0;
1895				mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1896								   mad_recv_wc);
1897				atomic_dec(&mad_agent_priv->refcount);
1898			} else {
1899				/* not user rmpp, revert to normal behavior and
1900				 * drop the mad */
1901				ib_free_recv_mad(mad_recv_wc);
1902				deref_mad_agent(mad_agent_priv);
1903				return;
1904			}
1905		} else {
1906			ib_mark_mad_done(mad_send_wr);
1907			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1908
1909			/* Defined behavior is to complete response before request */
1910			mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
1911			mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1912							   mad_recv_wc);
1913			atomic_dec(&mad_agent_priv->refcount);
1914
1915			mad_send_wc.status = IB_WC_SUCCESS;
1916			mad_send_wc.vendor_err = 0;
1917			mad_send_wc.send_buf = &mad_send_wr->send_buf;
1918			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1919		}
1920	} else {
1921		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1922						   mad_recv_wc);
1923		deref_mad_agent(mad_agent_priv);
1924	}
1925}
1926
1927static bool generate_unmatched_resp(struct ib_mad_private *recv,
1928				    struct ib_mad_private *response)
1929{
1930	if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET ||
1931	    recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) {
1932		memcpy(response, recv, sizeof *response);
1933		response->header.recv_wc.wc = &response->header.wc;
1934		response->header.recv_wc.recv_buf.mad = &response->mad.mad;
1935		response->header.recv_wc.recv_buf.grh = &response->grh;
1936		response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
1937		response->mad.mad.mad_hdr.status =
1938			cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
1939		if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1940			response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION;
1941
1942		return true;
1943	} else {
1944		return false;
1945	}
1946}
1947static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1948				     struct ib_wc *wc)
1949{
1950	struct ib_mad_qp_info *qp_info;
1951	struct ib_mad_private_header *mad_priv_hdr;
1952	struct ib_mad_private *recv, *response = NULL;
1953	struct ib_mad_list_head *mad_list;
1954	struct ib_mad_agent_private *mad_agent;
1955	int port_num;
1956	int ret = IB_MAD_RESULT_SUCCESS;
1957
1958	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1959	qp_info = mad_list->mad_queue->qp_info;
1960	dequeue_mad(mad_list);
1961
1962	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
1963				    mad_list);
1964	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
1965	ib_dma_unmap_single(port_priv->device,
1966			    recv->header.mapping,
1967			    sizeof(struct ib_mad_private) -
1968			      sizeof(struct ib_mad_private_header),
1969			    DMA_FROM_DEVICE);
1970
1971	/* Setup MAD receive work completion from "normal" work completion */
1972	recv->header.wc = *wc;
1973	recv->header.recv_wc.wc = &recv->header.wc;
1974	recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
1975	recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
1976	recv->header.recv_wc.recv_buf.grh = &recv->grh;
1977
1978	if (atomic_read(&qp_info->snoop_count))
1979		snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
1980
1981	/* Validate MAD */
1982	if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
1983		goto out;
1984
1985	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1986	if (!response) {
1987		dev_err(&port_priv->device->dev,
1988			"ib_mad_recv_done_handler no memory for response buffer\n");
1989		goto out;
1990	}
1991
1992	if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
1993		port_num = wc->port_num;
1994	else
1995		port_num = port_priv->port_num;
1996
1997	if (recv->mad.mad.mad_hdr.mgmt_class ==
1998	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1999		enum smi_forward_action retsmi;
2000
2001		if (smi_handle_dr_smp_recv(&recv->mad.smp,
2002					   port_priv->device->node_type,
2003					   port_num,
2004					   port_priv->device->phys_port_cnt) ==
2005					   IB_SMI_DISCARD)
2006			goto out;
2007
2008		retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
2009		if (retsmi == IB_SMI_LOCAL)
2010			goto local;
2011
2012		if (retsmi == IB_SMI_SEND) { /* don't forward */
2013			if (smi_handle_dr_smp_send(&recv->mad.smp,
2014						   port_priv->device->node_type,
2015						   port_num) == IB_SMI_DISCARD)
2016				goto out;
2017
2018			if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
2019				goto out;
2020		} else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
2021			/* forward case for switches */
2022			memcpy(response, recv, sizeof(*response));
2023			response->header.recv_wc.wc = &response->header.wc;
2024			response->header.recv_wc.recv_buf.mad = &response->mad.mad;
2025			response->header.recv_wc.recv_buf.grh = &response->grh;
2026
2027			agent_send_response(&response->mad.mad,
2028					    &response->grh, wc,
2029					    port_priv->device,
2030					    smi_get_fwd_port(&recv->mad.smp),
2031					    qp_info->qp->qp_num);
2032
2033			goto out;
2034		}
2035	}
2036
2037local:
2038	/* Give driver "right of first refusal" on incoming MAD */
2039	if (port_priv->device->process_mad) {
2040		ret = port_priv->device->process_mad(port_priv->device, 0,
2041						     port_priv->port_num,
2042						     wc, &recv->grh,
2043						     &recv->mad.mad,
2044						     &response->mad.mad);
2045		if (ret & IB_MAD_RESULT_SUCCESS) {
2046			if (ret & IB_MAD_RESULT_CONSUMED)
2047				goto out;
2048			if (ret & IB_MAD_RESULT_REPLY) {
2049				agent_send_response(&response->mad.mad,
2050						    &recv->grh, wc,
2051						    port_priv->device,
2052						    port_num,
2053						    qp_info->qp->qp_num);
2054				goto out;
2055			}
2056		}
2057	}
2058
2059	mad_agent = find_mad_agent(port_priv, &recv->mad.mad);
2060	if (mad_agent) {
2061		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
2062		/*
2063		 * recv is freed up in error cases in ib_mad_complete_recv
2064		 * or via recv_handler in ib_mad_complete_recv()
2065		 */
2066		recv = NULL;
2067	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
2068		   generate_unmatched_resp(recv, response)) {
2069		agent_send_response(&response->mad.mad, &recv->grh, wc,
2070				    port_priv->device, port_num, qp_info->qp->qp_num);
2071	}
2072
2073out:
2074	/* Post another receive request for this QP */
2075	if (response) {
2076		ib_mad_post_receive_mads(qp_info, response);
2077		if (recv)
2078			kmem_cache_free(ib_mad_cache, recv);
2079	} else
2080		ib_mad_post_receive_mads(qp_info, recv);
2081}
2082
2083static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2084{
2085	struct ib_mad_send_wr_private *mad_send_wr;
2086	unsigned long delay;
2087
2088	if (list_empty(&mad_agent_priv->wait_list)) {
2089		cancel_delayed_work(&mad_agent_priv->timed_work);
2090	} else {
2091		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2092					 struct ib_mad_send_wr_private,
2093					 agent_list);
2094
2095		if (time_after(mad_agent_priv->timeout,
2096			       mad_send_wr->timeout)) {
2097			mad_agent_priv->timeout = mad_send_wr->timeout;
2098			delay = mad_send_wr->timeout - jiffies;
2099			if ((long)delay <= 0)
2100				delay = 1;
2101			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2102					 &mad_agent_priv->timed_work, delay);
2103		}
2104	}
2105}
2106
2107static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2108{
2109	struct ib_mad_agent_private *mad_agent_priv;
2110	struct ib_mad_send_wr_private *temp_mad_send_wr;
2111	struct list_head *list_item;
2112	unsigned long delay;
2113
2114	mad_agent_priv = mad_send_wr->mad_agent_priv;
2115	list_del(&mad_send_wr->agent_list);
2116
2117	delay = mad_send_wr->timeout;
2118	mad_send_wr->timeout += jiffies;
2119
2120	if (delay) {
2121		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2122			temp_mad_send_wr = list_entry(list_item,
2123						struct ib_mad_send_wr_private,
2124						agent_list);
2125			if (time_after(mad_send_wr->timeout,
2126				       temp_mad_send_wr->timeout))
2127				break;
2128		}
2129	}
2130	else
2131		list_item = &mad_agent_priv->wait_list;
2132	list_add(&mad_send_wr->agent_list, list_item);
2133
2134	/* Reschedule a work item if we have a shorter timeout */
2135	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2136		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2137				 &mad_agent_priv->timed_work, delay);
2138}
2139
2140void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2141			  int timeout_ms)
2142{
2143	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2144	wait_for_response(mad_send_wr);
2145}
2146
2147/*
2148 * Process a send work completion
2149 */
2150void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2151			     struct ib_mad_send_wc *mad_send_wc)
2152{
2153	struct ib_mad_agent_private	*mad_agent_priv;
2154	unsigned long			flags;
2155	int				ret;
2156
2157	mad_agent_priv = mad_send_wr->mad_agent_priv;
2158	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2159	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2160		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2161		if (ret == IB_RMPP_RESULT_CONSUMED)
2162			goto done;
2163	} else
2164		ret = IB_RMPP_RESULT_UNHANDLED;
2165
2166	if (mad_send_wc->status != IB_WC_SUCCESS &&
2167	    mad_send_wr->status == IB_WC_SUCCESS) {
2168		mad_send_wr->status = mad_send_wc->status;
2169		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2170	}
2171
2172	if (--mad_send_wr->refcount > 0) {
2173		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2174		    mad_send_wr->status == IB_WC_SUCCESS) {
2175			wait_for_response(mad_send_wr);
2176		}
2177		goto done;
2178	}
2179
2180	/* Remove send from MAD agent and notify client of completion */
2181	list_del(&mad_send_wr->agent_list);
2182	adjust_timeout(mad_agent_priv);
2183	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2184
2185	if (mad_send_wr->status != IB_WC_SUCCESS )
2186		mad_send_wc->status = mad_send_wr->status;
2187	if (ret == IB_RMPP_RESULT_INTERNAL)
2188		ib_rmpp_send_handler(mad_send_wc);
2189	else
2190		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2191						   mad_send_wc);
2192
2193	/* Release reference on agent taken when sending */
2194	deref_mad_agent(mad_agent_priv);
2195	return;
2196done:
2197	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2198}
2199
2200static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
2201				     struct ib_wc *wc)
2202{
2203	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2204	struct ib_mad_list_head		*mad_list;
2205	struct ib_mad_qp_info		*qp_info;
2206	struct ib_mad_queue		*send_queue;
2207	struct ib_send_wr		*bad_send_wr;
2208	struct ib_mad_send_wc		mad_send_wc;
2209	unsigned long flags;
2210	int ret;
2211
2212	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2213	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2214				   mad_list);
2215	send_queue = mad_list->mad_queue;
2216	qp_info = send_queue->qp_info;
2217
2218retry:
2219	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2220			    mad_send_wr->header_mapping,
2221			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2222	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2223			    mad_send_wr->payload_mapping,
2224			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2225	queued_send_wr = NULL;
2226	spin_lock_irqsave(&send_queue->lock, flags);
2227	list_del(&mad_list->list);
2228
2229	/* Move queued send to the send queue */
2230	if (send_queue->count-- > send_queue->max_active) {
2231		mad_list = container_of(qp_info->overflow_list.next,
2232					struct ib_mad_list_head, list);
2233		queued_send_wr = container_of(mad_list,
2234					struct ib_mad_send_wr_private,
2235					mad_list);
2236		list_move_tail(&mad_list->list, &send_queue->list);
2237	}
2238	spin_unlock_irqrestore(&send_queue->lock, flags);
2239
2240	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2241	mad_send_wc.status = wc->status;
2242	mad_send_wc.vendor_err = wc->vendor_err;
2243	if (atomic_read(&qp_info->snoop_count))
2244		snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
2245			   IB_MAD_SNOOP_SEND_COMPLETIONS);
2246	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2247
2248	if (queued_send_wr) {
2249		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
2250				   &bad_send_wr);
2251		if (ret) {
2252			dev_err(&port_priv->device->dev,
2253				"ib_post_send failed: %d\n", ret);
2254			mad_send_wr = queued_send_wr;
2255			wc->status = IB_WC_LOC_QP_OP_ERR;
2256			goto retry;
2257		}
2258	}
2259}
2260
2261static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2262{
2263	struct ib_mad_send_wr_private *mad_send_wr;
2264	struct ib_mad_list_head *mad_list;
2265	unsigned long flags;
2266
2267	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2268	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2269		mad_send_wr = container_of(mad_list,
2270					   struct ib_mad_send_wr_private,
2271					   mad_list);
2272		mad_send_wr->retry = 1;
2273	}
2274	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2275}
2276
2277static void mad_error_handler(struct ib_mad_port_private *port_priv,
2278			      struct ib_wc *wc)
2279{
2280	struct ib_mad_list_head *mad_list;
2281	struct ib_mad_qp_info *qp_info;
2282	struct ib_mad_send_wr_private *mad_send_wr;
2283	int ret;
2284
2285	/* Determine if failure was a send or receive */
2286	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2287	qp_info = mad_list->mad_queue->qp_info;
2288	if (mad_list->mad_queue == &qp_info->recv_queue)
2289		/*
2290		 * Receive errors indicate that the QP has entered the error
2291		 * state - error handling/shutdown code will cleanup
2292		 */
2293		return;
2294
2295	/*
2296	 * Send errors will transition the QP to SQE - move
2297	 * QP to RTS and repost flushed work requests
2298	 */
2299	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2300				   mad_list);
2301	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2302		if (mad_send_wr->retry) {
2303			/* Repost send */
2304			struct ib_send_wr *bad_send_wr;
2305
2306			mad_send_wr->retry = 0;
2307			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
2308					&bad_send_wr);
2309			if (ret)
2310				ib_mad_send_done_handler(port_priv, wc);
2311		} else
2312			ib_mad_send_done_handler(port_priv, wc);
2313	} else {
2314		struct ib_qp_attr *attr;
2315
2316		/* Transition QP to RTS and fail offending send */
2317		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2318		if (attr) {
2319			attr->qp_state = IB_QPS_RTS;
2320			attr->cur_qp_state = IB_QPS_SQE;
2321			ret = ib_modify_qp(qp_info->qp, attr,
2322					   IB_QP_STATE | IB_QP_CUR_STATE);
2323			kfree(attr);
2324			if (ret)
2325				dev_err(&port_priv->device->dev,
2326					"mad_error_handler - ib_modify_qp to RTS : %d\n",
2327					ret);
2328			else
2329				mark_sends_for_retry(qp_info);
2330		}
2331		ib_mad_send_done_handler(port_priv, wc);
2332	}
2333}
2334
2335/*
2336 * IB MAD completion callback
2337 */
2338static void ib_mad_completion_handler(struct work_struct *work)
2339{
2340	struct ib_mad_port_private *port_priv;
2341	struct ib_wc wc;
2342
2343	port_priv = container_of(work, struct ib_mad_port_private, work);
2344	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2345
2346	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
2347		if (wc.status == IB_WC_SUCCESS) {
2348			switch (wc.opcode) {
2349			case IB_WC_SEND:
2350				ib_mad_send_done_handler(port_priv, &wc);
2351				break;
2352			case IB_WC_RECV:
2353				ib_mad_recv_done_handler(port_priv, &wc);
2354				break;
2355			default:
2356				BUG_ON(1);
2357				break;
2358			}
2359		} else
2360			mad_error_handler(port_priv, &wc);
2361	}
2362}
2363
2364static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2365{
2366	unsigned long flags;
2367	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2368	struct ib_mad_send_wc mad_send_wc;
2369	struct list_head cancel_list;
2370
2371	INIT_LIST_HEAD(&cancel_list);
2372
2373	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2374	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2375				 &mad_agent_priv->send_list, agent_list) {
2376		if (mad_send_wr->status == IB_WC_SUCCESS) {
2377			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2378			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2379		}
2380	}
2381
2382	/* Empty wait list to prevent receives from finding a request */
2383	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2384	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2385
2386	/* Report all cancelled requests */
2387	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2388	mad_send_wc.vendor_err = 0;
2389
2390	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2391				 &cancel_list, agent_list) {
2392		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2393		list_del(&mad_send_wr->agent_list);
2394		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2395						   &mad_send_wc);
2396		atomic_dec(&mad_agent_priv->refcount);
2397	}
2398}
2399
2400static struct ib_mad_send_wr_private*
2401find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2402	     struct ib_mad_send_buf *send_buf)
2403{
2404	struct ib_mad_send_wr_private *mad_send_wr;
2405
2406	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2407			    agent_list) {
2408		if (&mad_send_wr->send_buf == send_buf)
2409			return mad_send_wr;
2410	}
2411
2412	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2413			    agent_list) {
2414		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
2415		    &mad_send_wr->send_buf == send_buf)
2416			return mad_send_wr;
2417	}
2418	return NULL;
2419}
2420
2421int ib_modify_mad(struct ib_mad_agent *mad_agent,
2422		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2423{
2424	struct ib_mad_agent_private *mad_agent_priv;
2425	struct ib_mad_send_wr_private *mad_send_wr;
2426	unsigned long flags;
2427	int active;
2428
2429	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2430				      agent);
2431	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2432	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2433	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2434		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2435		return -EINVAL;
2436	}
2437
2438	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2439	if (!timeout_ms) {
2440		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2441		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2442	}
2443
2444	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2445	if (active)
2446		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2447	else
2448		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2449
2450	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2451	return 0;
2452}
2453EXPORT_SYMBOL(ib_modify_mad);
2454
2455void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2456		   struct ib_mad_send_buf *send_buf)
2457{
2458	ib_modify_mad(mad_agent, send_buf, 0);
2459}
2460EXPORT_SYMBOL(ib_cancel_mad);
2461
2462static void local_completions(struct work_struct *work)
2463{
2464	struct ib_mad_agent_private *mad_agent_priv;
2465	struct ib_mad_local_private *local;
2466	struct ib_mad_agent_private *recv_mad_agent;
2467	unsigned long flags;
2468	int free_mad;
2469	struct ib_wc wc;
2470	struct ib_mad_send_wc mad_send_wc;
2471
2472	mad_agent_priv =
2473		container_of(work, struct ib_mad_agent_private, local_work);
2474
2475	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2476	while (!list_empty(&mad_agent_priv->local_list)) {
2477		local = list_entry(mad_agent_priv->local_list.next,
2478				   struct ib_mad_local_private,
2479				   completion_list);
2480		list_del(&local->completion_list);
2481		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2482		free_mad = 0;
2483		if (local->mad_priv) {
2484			recv_mad_agent = local->recv_mad_agent;
2485			if (!recv_mad_agent) {
2486				dev_err(&mad_agent_priv->agent.device->dev,
2487					"No receive MAD agent for local completion\n");
2488				free_mad = 1;
2489				goto local_send_completion;
2490			}
2491
2492			/*
2493			 * Defined behavior is to complete response
2494			 * before request
2495			 */
2496			build_smp_wc(recv_mad_agent->agent.qp,
2497				     (unsigned long) local->mad_send_wr,
2498				     be16_to_cpu(IB_LID_PERMISSIVE),
2499				     0, recv_mad_agent->agent.port_num, &wc);
2500
2501			local->mad_priv->header.recv_wc.wc = &wc;
2502			local->mad_priv->header.recv_wc.mad_len =
2503						sizeof(struct ib_mad);
2504			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2505			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2506				 &local->mad_priv->header.recv_wc.rmpp_list);
2507			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2508			local->mad_priv->header.recv_wc.recv_buf.mad =
2509						&local->mad_priv->mad.mad;
2510			if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
2511				snoop_recv(recv_mad_agent->qp_info,
2512					  &local->mad_priv->header.recv_wc,
2513					   IB_MAD_SNOOP_RECVS);
2514			recv_mad_agent->agent.recv_handler(
2515						&recv_mad_agent->agent,
2516						&local->mad_priv->header.recv_wc);
2517			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2518			atomic_dec(&recv_mad_agent->refcount);
2519			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2520		}
2521
2522local_send_completion:
2523		/* Complete send */
2524		mad_send_wc.status = IB_WC_SUCCESS;
2525		mad_send_wc.vendor_err = 0;
2526		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2527		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2528			snoop_send(mad_agent_priv->qp_info,
2529				   &local->mad_send_wr->send_buf,
2530				   &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
2531		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2532						   &mad_send_wc);
2533
2534		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2535		atomic_dec(&mad_agent_priv->refcount);
2536		if (free_mad)
2537			kmem_cache_free(ib_mad_cache, local->mad_priv);
2538		kfree(local);
2539	}
2540	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2541}
2542
2543static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2544{
2545	int ret;
2546
2547	if (!mad_send_wr->retries_left)
2548		return -ETIMEDOUT;
2549
2550	mad_send_wr->retries_left--;
2551	mad_send_wr->send_buf.retries++;
2552
2553	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2554
2555	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2556		ret = ib_retry_rmpp(mad_send_wr);
2557		switch (ret) {
2558		case IB_RMPP_RESULT_UNHANDLED:
2559			ret = ib_send_mad(mad_send_wr);
2560			break;
2561		case IB_RMPP_RESULT_CONSUMED:
2562			ret = 0;
2563			break;
2564		default:
2565			ret = -ECOMM;
2566			break;
2567		}
2568	} else
2569		ret = ib_send_mad(mad_send_wr);
2570
2571	if (!ret) {
2572		mad_send_wr->refcount++;
2573		list_add_tail(&mad_send_wr->agent_list,
2574			      &mad_send_wr->mad_agent_priv->send_list);
2575	}
2576	return ret;
2577}
2578
2579static void timeout_sends(struct work_struct *work)
2580{
2581	struct ib_mad_agent_private *mad_agent_priv;
2582	struct ib_mad_send_wr_private *mad_send_wr;
2583	struct ib_mad_send_wc mad_send_wc;
2584	unsigned long flags, delay;
2585
2586	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2587				      timed_work.work);
2588	mad_send_wc.vendor_err = 0;
2589
2590	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2591	while (!list_empty(&mad_agent_priv->wait_list)) {
2592		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2593					 struct ib_mad_send_wr_private,
2594					 agent_list);
2595
2596		if (time_after(mad_send_wr->timeout, jiffies)) {
2597			delay = mad_send_wr->timeout - jiffies;
2598			if ((long)delay <= 0)
2599				delay = 1;
2600			queue_delayed_work(mad_agent_priv->qp_info->
2601					   port_priv->wq,
2602					   &mad_agent_priv->timed_work, delay);
2603			break;
2604		}
2605
2606		list_del(&mad_send_wr->agent_list);
2607		if (mad_send_wr->status == IB_WC_SUCCESS &&
2608		    !retry_send(mad_send_wr))
2609			continue;
2610
2611		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2612
2613		if (mad_send_wr->status == IB_WC_SUCCESS)
2614			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2615		else
2616			mad_send_wc.status = mad_send_wr->status;
2617		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2618		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2619						   &mad_send_wc);
2620
2621		atomic_dec(&mad_agent_priv->refcount);
2622		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2623	}
2624	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2625}
2626
2627static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2628{
2629	struct ib_mad_port_private *port_priv = cq->cq_context;
2630	unsigned long flags;
2631
2632	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2633	if (!list_empty(&port_priv->port_list))
2634		queue_work(port_priv->wq, &port_priv->work);
2635	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2636}
2637
2638/*
2639 * Allocate receive MADs and post receive WRs for them
2640 */
2641static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2642				    struct ib_mad_private *mad)
2643{
2644	unsigned long flags;
2645	int post, ret;
2646	struct ib_mad_private *mad_priv;
2647	struct ib_sge sg_list;
2648	struct ib_recv_wr recv_wr, *bad_recv_wr;
2649	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2650
2651	/* Initialize common scatter list fields */
2652	sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
2653	sg_list.lkey = (*qp_info->port_priv->mr).lkey;
2654
2655	/* Initialize common receive WR fields */
2656	recv_wr.next = NULL;
2657	recv_wr.sg_list = &sg_list;
2658	recv_wr.num_sge = 1;
2659
2660	do {
2661		/* Allocate and map receive buffer */
2662		if (mad) {
2663			mad_priv = mad;
2664			mad = NULL;
2665		} else {
2666			mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
2667			if (!mad_priv) {
2668				dev_err(&qp_info->port_priv->device->dev,
2669					"No memory for receive buffer\n");
2670				ret = -ENOMEM;
2671				break;
2672			}
2673		}
2674		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2675						 &mad_priv->grh,
2676						 sizeof *mad_priv -
2677						   sizeof mad_priv->header,
2678						 DMA_FROM_DEVICE);
2679		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
2680						  sg_list.addr))) {
2681			ret = -ENOMEM;
2682			break;
2683		}
2684		mad_priv->header.mapping = sg_list.addr;
2685		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2686		mad_priv->header.mad_list.mad_queue = recv_queue;
2687
2688		/* Post receive WR */
2689		spin_lock_irqsave(&recv_queue->lock, flags);
2690		post = (++recv_queue->count < recv_queue->max_active);
2691		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2692		spin_unlock_irqrestore(&recv_queue->lock, flags);
2693		ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
2694		if (ret) {
2695			spin_lock_irqsave(&recv_queue->lock, flags);
2696			list_del(&mad_priv->header.mad_list.list);
2697			recv_queue->count--;
2698			spin_unlock_irqrestore(&recv_queue->lock, flags);
2699			ib_dma_unmap_single(qp_info->port_priv->device,
2700					    mad_priv->header.mapping,
2701					    sizeof *mad_priv -
2702					      sizeof mad_priv->header,
2703					    DMA_FROM_DEVICE);
2704			kmem_cache_free(ib_mad_cache, mad_priv);
2705			dev_err(&qp_info->port_priv->device->dev,
2706				"ib_post_recv failed: %d\n", ret);
2707			break;
2708		}
2709	} while (post);
2710
2711	return ret;
2712}
2713
2714/*
2715 * Return all the posted receive MADs
2716 */
2717static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2718{
2719	struct ib_mad_private_header *mad_priv_hdr;
2720	struct ib_mad_private *recv;
2721	struct ib_mad_list_head *mad_list;
2722
2723	if (!qp_info->qp)
2724		return;
2725
2726	while (!list_empty(&qp_info->recv_queue.list)) {
2727
2728		mad_list = list_entry(qp_info->recv_queue.list.next,
2729				      struct ib_mad_list_head, list);
2730		mad_priv_hdr = container_of(mad_list,
2731					    struct ib_mad_private_header,
2732					    mad_list);
2733		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2734				    header);
2735
2736		/* Remove from posted receive MAD list */
2737		list_del(&mad_list->list);
2738
2739		ib_dma_unmap_single(qp_info->port_priv->device,
2740				    recv->header.mapping,
2741				    sizeof(struct ib_mad_private) -
2742				      sizeof(struct ib_mad_private_header),
2743				    DMA_FROM_DEVICE);
2744		kmem_cache_free(ib_mad_cache, recv);
2745	}
2746
2747	qp_info->recv_queue.count = 0;
2748}
2749
2750/*
2751 * Start the port
2752 */
2753static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2754{
2755	int ret, i;
2756	struct ib_qp_attr *attr;
2757	struct ib_qp *qp;
2758	u16 pkey_index;
2759
2760	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2761	if (!attr) {
2762		dev_err(&port_priv->device->dev,
2763			"Couldn't kmalloc ib_qp_attr\n");
2764		return -ENOMEM;
2765	}
2766
2767	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
2768			   IB_DEFAULT_PKEY_FULL, &pkey_index);
2769	if (ret)
2770		pkey_index = 0;
2771
2772	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2773		qp = port_priv->qp_info[i].qp;
2774		if (!qp)
2775			continue;
2776
2777		/*
2778		 * PKey index for QP1 is irrelevant but
2779		 * one is needed for the Reset to Init transition
2780		 */
2781		attr->qp_state = IB_QPS_INIT;
2782		attr->pkey_index = pkey_index;
2783		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2784		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2785					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2786		if (ret) {
2787			dev_err(&port_priv->device->dev,
2788				"Couldn't change QP%d state to INIT: %d\n",
2789				i, ret);
2790			goto out;
2791		}
2792
2793		attr->qp_state = IB_QPS_RTR;
2794		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2795		if (ret) {
2796			dev_err(&port_priv->device->dev,
2797				"Couldn't change QP%d state to RTR: %d\n",
2798				i, ret);
2799			goto out;
2800		}
2801
2802		attr->qp_state = IB_QPS_RTS;
2803		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2804		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2805		if (ret) {
2806			dev_err(&port_priv->device->dev,
2807				"Couldn't change QP%d state to RTS: %d\n",
2808				i, ret);
2809			goto out;
2810		}
2811	}
2812
2813	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2814	if (ret) {
2815		dev_err(&port_priv->device->dev,
2816			"Failed to request completion notification: %d\n",
2817			ret);
2818		goto out;
2819	}
2820
2821	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2822		if (!port_priv->qp_info[i].qp)
2823			continue;
2824
2825		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2826		if (ret) {
2827			dev_err(&port_priv->device->dev,
2828				"Couldn't post receive WRs\n");
2829			goto out;
2830		}
2831	}
2832out:
2833	kfree(attr);
2834	return ret;
2835}
2836
2837static void qp_event_handler(struct ib_event *event, void *qp_context)
2838{
2839	struct ib_mad_qp_info	*qp_info = qp_context;
2840
2841	/* It's worse than that! He's dead, Jim! */
2842	dev_err(&qp_info->port_priv->device->dev,
2843		"Fatal error (%d) on MAD QP (%d)\n",
2844		event->event, qp_info->qp->qp_num);
2845}
2846
2847static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2848			   struct ib_mad_queue *mad_queue)
2849{
2850	mad_queue->qp_info = qp_info;
2851	mad_queue->count = 0;
2852	spin_lock_init(&mad_queue->lock);
2853	INIT_LIST_HEAD(&mad_queue->list);
2854}
2855
2856static void init_mad_qp(struct ib_mad_port_private *port_priv,
2857			struct ib_mad_qp_info *qp_info)
2858{
2859	qp_info->port_priv = port_priv;
2860	init_mad_queue(qp_info, &qp_info->send_queue);
2861	init_mad_queue(qp_info, &qp_info->recv_queue);
2862	INIT_LIST_HEAD(&qp_info->overflow_list);
2863	spin_lock_init(&qp_info->snoop_lock);
2864	qp_info->snoop_table = NULL;
2865	qp_info->snoop_table_size = 0;
2866	atomic_set(&qp_info->snoop_count, 0);
2867}
2868
2869static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2870			 enum ib_qp_type qp_type)
2871{
2872	struct ib_qp_init_attr	qp_init_attr;
2873	int ret;
2874
2875	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2876	qp_init_attr.send_cq = qp_info->port_priv->cq;
2877	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2878	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2879	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2880	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2881	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2882	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2883	qp_init_attr.qp_type = qp_type;
2884	qp_init_attr.port_num = qp_info->port_priv->port_num;
2885	qp_init_attr.qp_context = qp_info;
2886	qp_init_attr.event_handler = qp_event_handler;
2887	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2888	if (IS_ERR(qp_info->qp)) {
2889		dev_err(&qp_info->port_priv->device->dev,
2890			"Couldn't create ib_mad QP%d\n",
2891			get_spl_qp_index(qp_type));
2892		ret = PTR_ERR(qp_info->qp);
2893		goto error;
2894	}
2895	/* Use minimum queue sizes unless the CQ is resized */
2896	qp_info->send_queue.max_active = mad_sendq_size;
2897	qp_info->recv_queue.max_active = mad_recvq_size;
2898	return 0;
2899
2900error:
2901	return ret;
2902}
2903
2904static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2905{
2906	if (!qp_info->qp)
2907		return;
2908
2909	ib_destroy_qp(qp_info->qp);
2910	kfree(qp_info->snoop_table);
2911}
2912
2913/*
2914 * Open the port
2915 * Create the QP, PD, MR, and CQ if needed
2916 */
2917static int ib_mad_port_open(struct ib_device *device,
2918			    int port_num)
2919{
2920	int ret, cq_size;
2921	struct ib_mad_port_private *port_priv;
2922	unsigned long flags;
2923	char name[sizeof "ib_mad123"];
2924	int has_smi;
2925
2926	/* Create new device info */
2927	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2928	if (!port_priv) {
2929		dev_err(&device->dev, "No memory for ib_mad_port_private\n");
2930		return -ENOMEM;
2931	}
2932
2933	port_priv->device = device;
2934	port_priv->port_num = port_num;
2935	spin_lock_init(&port_priv->reg_lock);
2936	INIT_LIST_HEAD(&port_priv->agent_list);
2937	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2938	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2939
2940	cq_size = mad_sendq_size + mad_recvq_size;
2941	has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND;
2942	if (has_smi)
2943		cq_size *= 2;
2944
2945	port_priv->cq = ib_create_cq(port_priv->device,
2946				     ib_mad_thread_completion_handler,
2947				     NULL, port_priv, cq_size, 0);
2948	if (IS_ERR(port_priv->cq)) {
2949		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
2950		ret = PTR_ERR(port_priv->cq);
2951		goto error3;
2952	}
2953
2954	port_priv->pd = ib_alloc_pd(device);
2955	if (IS_ERR(port_priv->pd)) {
2956		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
2957		ret = PTR_ERR(port_priv->pd);
2958		goto error4;
2959	}
2960
2961	port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
2962	if (IS_ERR(port_priv->mr)) {
2963		dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
2964		ret = PTR_ERR(port_priv->mr);
2965		goto error5;
2966	}
2967
2968	if (has_smi) {
2969		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2970		if (ret)
2971			goto error6;
2972	}
2973	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2974	if (ret)
2975		goto error7;
2976
2977	snprintf(name, sizeof name, "ib_mad%d", port_num);
2978	port_priv->wq = create_singlethread_workqueue(name);
2979	if (!port_priv->wq) {
2980		ret = -ENOMEM;
2981		goto error8;
2982	}
2983	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
2984
2985	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2986	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2987	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2988
2989	ret = ib_mad_port_start(port_priv);
2990	if (ret) {
2991		dev_err(&device->dev, "Couldn't start port\n");
2992		goto error9;
2993	}
2994
2995	return 0;
2996
2997error9:
2998	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2999	list_del_init(&port_priv->port_list);
3000	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3001
3002	destroy_workqueue(port_priv->wq);
3003error8:
3004	destroy_mad_qp(&port_priv->qp_info[1]);
3005error7:
3006	destroy_mad_qp(&port_priv->qp_info[0]);
3007error6:
3008	ib_dereg_mr(port_priv->mr);
3009error5:
3010	ib_dealloc_pd(port_priv->pd);
3011error4:
3012	ib_destroy_cq(port_priv->cq);
3013	cleanup_recv_queue(&port_priv->qp_info[1]);
3014	cleanup_recv_queue(&port_priv->qp_info[0]);
3015error3:
3016	kfree(port_priv);
3017
3018	return ret;
3019}
3020
3021/*
3022 * Close the port
3023 * If there are no classes using the port, free the port
3024 * resources (CQ, MR, PD, QP) and remove the port's info structure
3025 */
3026static int ib_mad_port_close(struct ib_device *device, int port_num)
3027{
3028	struct ib_mad_port_private *port_priv;
3029	unsigned long flags;
3030
3031	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3032	port_priv = __ib_get_mad_port(device, port_num);
3033	if (port_priv == NULL) {
3034		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3035		dev_err(&device->dev, "Port %d not found\n", port_num);
3036		return -ENODEV;
3037	}
3038	list_del_init(&port_priv->port_list);
3039	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3040
3041	destroy_workqueue(port_priv->wq);
3042	destroy_mad_qp(&port_priv->qp_info[1]);
3043	destroy_mad_qp(&port_priv->qp_info[0]);
3044	ib_dereg_mr(port_priv->mr);
3045	ib_dealloc_pd(port_priv->pd);
3046	ib_destroy_cq(port_priv->cq);
3047	cleanup_recv_queue(&port_priv->qp_info[1]);
3048	cleanup_recv_queue(&port_priv->qp_info[0]);
3049	/* XXX: Handle deallocation of MAD registration tables */
3050
3051	kfree(port_priv);
3052
3053	return 0;
3054}
3055
3056static void ib_mad_init_device(struct ib_device *device)
3057{
3058	int start, end, i;
3059
3060	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
3061		return;
3062
3063	if (device->node_type == RDMA_NODE_IB_SWITCH) {
3064		start = 0;
3065		end   = 0;
3066	} else {
3067		start = 1;
3068		end   = device->phys_port_cnt;
3069	}
3070
3071	for (i = start; i <= end; i++) {
3072		if (ib_mad_port_open(device, i)) {
3073			dev_err(&device->dev, "Couldn't open port %d\n", i);
3074			goto error;
3075		}
3076		if (ib_agent_port_open(device, i)) {
3077			dev_err(&device->dev,
3078				"Couldn't open port %d for agents\n", i);
3079			goto error_agent;
3080		}
3081	}
3082	return;
3083
3084error_agent:
3085	if (ib_mad_port_close(device, i))
3086		dev_err(&device->dev, "Couldn't close port %d\n", i);
3087
3088error:
3089	i--;
3090
3091	while (i >= start) {
3092		if (ib_agent_port_close(device, i))
3093			dev_err(&device->dev,
3094				"Couldn't close port %d for agents\n", i);
3095		if (ib_mad_port_close(device, i))
3096			dev_err(&device->dev, "Couldn't close port %d\n", i);
3097		i--;
3098	}
3099}
3100
3101static void ib_mad_remove_device(struct ib_device *device)
3102{
3103	int i, num_ports, cur_port;
3104
3105	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
3106		return;
3107
3108	if (device->node_type == RDMA_NODE_IB_SWITCH) {
3109		num_ports = 1;
3110		cur_port = 0;
3111	} else {
3112		num_ports = device->phys_port_cnt;
3113		cur_port = 1;
3114	}
3115	for (i = 0; i < num_ports; i++, cur_port++) {
3116		if (ib_agent_port_close(device, cur_port))
3117			dev_err(&device->dev,
3118				"Couldn't close port %d for agents\n",
3119				cur_port);
3120		if (ib_mad_port_close(device, cur_port))
3121			dev_err(&device->dev, "Couldn't close port %d\n",
3122				cur_port);
3123	}
3124}
3125
3126static struct ib_client mad_client = {
3127	.name   = "mad",
3128	.add = ib_mad_init_device,
3129	.remove = ib_mad_remove_device
3130};
3131
3132static int __init ib_mad_init_module(void)
3133{
3134	int ret;
3135
3136	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3137	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3138
3139	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3140	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3141
3142	ib_mad_cache = kmem_cache_create("ib_mad",
3143					 sizeof(struct ib_mad_private),
3144					 0,
3145					 SLAB_HWCACHE_ALIGN,
3146					 NULL);
3147	if (!ib_mad_cache) {
3148		pr_err("Couldn't create ib_mad cache\n");
3149		ret = -ENOMEM;
3150		goto error1;
3151	}
3152
3153	INIT_LIST_HEAD(&ib_mad_port_list);
3154
3155	if (ib_register_client(&mad_client)) {
3156		pr_err("Couldn't register ib_mad client\n");
3157		ret = -EINVAL;
3158		goto error2;
3159	}
3160
3161	return 0;
3162
3163error2:
3164	kmem_cache_destroy(ib_mad_cache);
3165error1:
3166	return ret;
3167}
3168
3169static void __exit ib_mad_cleanup_module(void)
3170{
3171	ib_unregister_client(&mad_client);
3172	kmem_cache_destroy(ib_mad_cache);
3173}
3174
3175module_init(ib_mad_init_module);
3176module_exit(ib_mad_cleanup_module);
3177