1/*
2 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/err.h>
39#include <linux/string.h>
40#include <linux/parser.h>
41#include <linux/random.h>
42#include <linux/jiffies.h>
43#include <rdma/ib_cache.h>
44
45#include <linux/atomic.h>
46
47#include <scsi/scsi.h>
48#include <scsi/scsi_device.h>
49#include <scsi/scsi_dbg.h>
50#include <scsi/scsi_tcq.h>
51#include <scsi/srp.h>
52#include <scsi/scsi_transport_srp.h>
53
54#include "ib_srp.h"
55
56#define DRV_NAME	"ib_srp"
57#define PFX		DRV_NAME ": "
58#define DRV_VERSION	"1.0"
59#define DRV_RELDATE	"July 1, 2013"
60
61MODULE_AUTHOR("Roland Dreier");
62MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
63		   "v" DRV_VERSION " (" DRV_RELDATE ")");
64MODULE_LICENSE("Dual BSD/GPL");
65
66static unsigned int srp_sg_tablesize;
67static unsigned int cmd_sg_entries;
68static unsigned int indirect_sg_entries;
69static bool allow_ext_sg;
70static bool prefer_fr;
71static bool register_always;
72static int topspin_workarounds = 1;
73
74module_param(srp_sg_tablesize, uint, 0444);
75MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
76
77module_param(cmd_sg_entries, uint, 0444);
78MODULE_PARM_DESC(cmd_sg_entries,
79		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
80
81module_param(indirect_sg_entries, uint, 0444);
82MODULE_PARM_DESC(indirect_sg_entries,
83		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
84
85module_param(allow_ext_sg, bool, 0444);
86MODULE_PARM_DESC(allow_ext_sg,
87		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
88
89module_param(topspin_workarounds, int, 0444);
90MODULE_PARM_DESC(topspin_workarounds,
91		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
92
93module_param(prefer_fr, bool, 0444);
94MODULE_PARM_DESC(prefer_fr,
95"Whether to use fast registration if both FMR and fast registration are supported");
96
97module_param(register_always, bool, 0444);
98MODULE_PARM_DESC(register_always,
99		 "Use memory registration even for contiguous memory regions");
100
101static struct kernel_param_ops srp_tmo_ops;
102
103static int srp_reconnect_delay = 10;
104module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
105		S_IRUGO | S_IWUSR);
106MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
107
108static int srp_fast_io_fail_tmo = 15;
109module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
110		S_IRUGO | S_IWUSR);
111MODULE_PARM_DESC(fast_io_fail_tmo,
112		 "Number of seconds between the observation of a transport"
113		 " layer error and failing all I/O. \"off\" means that this"
114		 " functionality is disabled.");
115
116static int srp_dev_loss_tmo = 600;
117module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
118		S_IRUGO | S_IWUSR);
119MODULE_PARM_DESC(dev_loss_tmo,
120		 "Maximum number of seconds that the SRP transport should"
121		 " insulate transport layer errors. After this time has been"
122		 " exceeded the SCSI host is removed. Should be"
123		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
124		 " if fast_io_fail_tmo has not been set. \"off\" means that"
125		 " this functionality is disabled.");
126
127static unsigned ch_count;
128module_param(ch_count, uint, 0444);
129MODULE_PARM_DESC(ch_count,
130		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
131
132static void srp_add_one(struct ib_device *device);
133static void srp_remove_one(struct ib_device *device);
134static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
135static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
136static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
137
138static struct scsi_transport_template *ib_srp_transport_template;
139static struct workqueue_struct *srp_remove_wq;
140
141static struct ib_client srp_client = {
142	.name   = "srp",
143	.add    = srp_add_one,
144	.remove = srp_remove_one
145};
146
147static struct ib_sa_client srp_sa_client;
148
149static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
150{
151	int tmo = *(int *)kp->arg;
152
153	if (tmo >= 0)
154		return sprintf(buffer, "%d", tmo);
155	else
156		return sprintf(buffer, "off");
157}
158
159static int srp_tmo_set(const char *val, const struct kernel_param *kp)
160{
161	int tmo, res;
162
163	if (strncmp(val, "off", 3) != 0) {
164		res = kstrtoint(val, 0, &tmo);
165		if (res)
166			goto out;
167	} else {
168		tmo = -1;
169	}
170	if (kp->arg == &srp_reconnect_delay)
171		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
172				    srp_dev_loss_tmo);
173	else if (kp->arg == &srp_fast_io_fail_tmo)
174		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
175	else
176		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
177				    tmo);
178	if (res)
179		goto out;
180	*(int *)kp->arg = tmo;
181
182out:
183	return res;
184}
185
186static struct kernel_param_ops srp_tmo_ops = {
187	.get = srp_tmo_get,
188	.set = srp_tmo_set,
189};
190
191static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
192{
193	return (struct srp_target_port *) host->hostdata;
194}
195
196static const char *srp_target_info(struct Scsi_Host *host)
197{
198	return host_to_target(host)->target_name;
199}
200
201static int srp_target_is_topspin(struct srp_target_port *target)
202{
203	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
204	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
205
206	return topspin_workarounds &&
207		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
208		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209}
210
211static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
212				   gfp_t gfp_mask,
213				   enum dma_data_direction direction)
214{
215	struct srp_iu *iu;
216
217	iu = kmalloc(sizeof *iu, gfp_mask);
218	if (!iu)
219		goto out;
220
221	iu->buf = kzalloc(size, gfp_mask);
222	if (!iu->buf)
223		goto out_free_iu;
224
225	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
226				    direction);
227	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
228		goto out_free_buf;
229
230	iu->size      = size;
231	iu->direction = direction;
232
233	return iu;
234
235out_free_buf:
236	kfree(iu->buf);
237out_free_iu:
238	kfree(iu);
239out:
240	return NULL;
241}
242
243static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
244{
245	if (!iu)
246		return;
247
248	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
249			    iu->direction);
250	kfree(iu->buf);
251	kfree(iu);
252}
253
254static void srp_qp_event(struct ib_event *event, void *context)
255{
256	pr_debug("QP event %d\n", event->event);
257}
258
259static int srp_init_qp(struct srp_target_port *target,
260		       struct ib_qp *qp)
261{
262	struct ib_qp_attr *attr;
263	int ret;
264
265	attr = kmalloc(sizeof *attr, GFP_KERNEL);
266	if (!attr)
267		return -ENOMEM;
268
269	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
270				  target->srp_host->port,
271				  be16_to_cpu(target->pkey),
272				  &attr->pkey_index);
273	if (ret)
274		goto out;
275
276	attr->qp_state        = IB_QPS_INIT;
277	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
278				    IB_ACCESS_REMOTE_WRITE);
279	attr->port_num        = target->srp_host->port;
280
281	ret = ib_modify_qp(qp, attr,
282			   IB_QP_STATE		|
283			   IB_QP_PKEY_INDEX	|
284			   IB_QP_ACCESS_FLAGS	|
285			   IB_QP_PORT);
286
287out:
288	kfree(attr);
289	return ret;
290}
291
292static int srp_new_cm_id(struct srp_rdma_ch *ch)
293{
294	struct srp_target_port *target = ch->target;
295	struct ib_cm_id *new_cm_id;
296
297	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298				    srp_cm_handler, ch);
299	if (IS_ERR(new_cm_id))
300		return PTR_ERR(new_cm_id);
301
302	if (ch->cm_id)
303		ib_destroy_cm_id(ch->cm_id);
304	ch->cm_id = new_cm_id;
305	ch->path.sgid = target->sgid;
306	ch->path.dgid = target->orig_dgid;
307	ch->path.pkey = target->pkey;
308	ch->path.service_id = target->service_id;
309
310	return 0;
311}
312
313static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
314{
315	struct srp_device *dev = target->srp_host->srp_dev;
316	struct ib_fmr_pool_param fmr_param;
317
318	memset(&fmr_param, 0, sizeof(fmr_param));
319	fmr_param.pool_size	    = target->scsi_host->can_queue;
320	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
321	fmr_param.cache		    = 1;
322	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
323	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
324	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
325				       IB_ACCESS_REMOTE_WRITE |
326				       IB_ACCESS_REMOTE_READ);
327
328	return ib_create_fmr_pool(dev->pd, &fmr_param);
329}
330
331/**
332 * srp_destroy_fr_pool() - free the resources owned by a pool
333 * @pool: Fast registration pool to be destroyed.
334 */
335static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
336{
337	int i;
338	struct srp_fr_desc *d;
339
340	if (!pool)
341		return;
342
343	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
344		if (d->frpl)
345			ib_free_fast_reg_page_list(d->frpl);
346		if (d->mr)
347			ib_dereg_mr(d->mr);
348	}
349	kfree(pool);
350}
351
352/**
353 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
354 * @device:            IB device to allocate fast registration descriptors for.
355 * @pd:                Protection domain associated with the FR descriptors.
356 * @pool_size:         Number of descriptors to allocate.
357 * @max_page_list_len: Maximum fast registration work request page list length.
358 */
359static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
360					      struct ib_pd *pd, int pool_size,
361					      int max_page_list_len)
362{
363	struct srp_fr_pool *pool;
364	struct srp_fr_desc *d;
365	struct ib_mr *mr;
366	struct ib_fast_reg_page_list *frpl;
367	int i, ret = -EINVAL;
368
369	if (pool_size <= 0)
370		goto err;
371	ret = -ENOMEM;
372	pool = kzalloc(sizeof(struct srp_fr_pool) +
373		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
374	if (!pool)
375		goto err;
376	pool->size = pool_size;
377	pool->max_page_list_len = max_page_list_len;
378	spin_lock_init(&pool->lock);
379	INIT_LIST_HEAD(&pool->free_list);
380
381	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
382		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
383		if (IS_ERR(mr)) {
384			ret = PTR_ERR(mr);
385			goto destroy_pool;
386		}
387		d->mr = mr;
388		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
389		if (IS_ERR(frpl)) {
390			ret = PTR_ERR(frpl);
391			goto destroy_pool;
392		}
393		d->frpl = frpl;
394		list_add_tail(&d->entry, &pool->free_list);
395	}
396
397out:
398	return pool;
399
400destroy_pool:
401	srp_destroy_fr_pool(pool);
402
403err:
404	pool = ERR_PTR(ret);
405	goto out;
406}
407
408/**
409 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
410 * @pool: Pool to obtain descriptor from.
411 */
412static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
413{
414	struct srp_fr_desc *d = NULL;
415	unsigned long flags;
416
417	spin_lock_irqsave(&pool->lock, flags);
418	if (!list_empty(&pool->free_list)) {
419		d = list_first_entry(&pool->free_list, typeof(*d), entry);
420		list_del(&d->entry);
421	}
422	spin_unlock_irqrestore(&pool->lock, flags);
423
424	return d;
425}
426
427/**
428 * srp_fr_pool_put() - put an FR descriptor back in the free list
429 * @pool: Pool the descriptor was allocated from.
430 * @desc: Pointer to an array of fast registration descriptor pointers.
431 * @n:    Number of descriptors to put back.
432 *
433 * Note: The caller must already have queued an invalidation request for
434 * desc->mr->rkey before calling this function.
435 */
436static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
437			    int n)
438{
439	unsigned long flags;
440	int i;
441
442	spin_lock_irqsave(&pool->lock, flags);
443	for (i = 0; i < n; i++)
444		list_add(&desc[i]->entry, &pool->free_list);
445	spin_unlock_irqrestore(&pool->lock, flags);
446}
447
448static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
449{
450	struct srp_device *dev = target->srp_host->srp_dev;
451
452	return srp_create_fr_pool(dev->dev, dev->pd,
453				  target->scsi_host->can_queue,
454				  dev->max_pages_per_mr);
455}
456
457/**
458 * srp_destroy_qp() - destroy an RDMA queue pair
459 * @ch: SRP RDMA channel.
460 *
461 * Change a queue pair into the error state and wait until all receive
462 * completions have been processed before destroying it. This avoids that
463 * the receive completion handler can access the queue pair while it is
464 * being destroyed.
465 */
466static void srp_destroy_qp(struct srp_rdma_ch *ch)
467{
468	static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
469	static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
470	struct ib_recv_wr *bad_wr;
471	int ret;
472
473	/* Destroying a QP and reusing ch->done is only safe if not connected */
474	WARN_ON_ONCE(ch->connected);
475
476	ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
477	WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
478	if (ret)
479		goto out;
480
481	init_completion(&ch->done);
482	ret = ib_post_recv(ch->qp, &wr, &bad_wr);
483	WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
484	if (ret == 0)
485		wait_for_completion(&ch->done);
486
487out:
488	ib_destroy_qp(ch->qp);
489}
490
491static int srp_create_ch_ib(struct srp_rdma_ch *ch)
492{
493	struct srp_target_port *target = ch->target;
494	struct srp_device *dev = target->srp_host->srp_dev;
495	struct ib_qp_init_attr *init_attr;
496	struct ib_cq *recv_cq, *send_cq;
497	struct ib_qp *qp;
498	struct ib_fmr_pool *fmr_pool = NULL;
499	struct srp_fr_pool *fr_pool = NULL;
500	const int m = 1 + dev->use_fast_reg;
501	int ret;
502
503	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
504	if (!init_attr)
505		return -ENOMEM;
506
507	/* + 1 for SRP_LAST_WR_ID */
508	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
509			       target->queue_size + 1, ch->comp_vector);
510	if (IS_ERR(recv_cq)) {
511		ret = PTR_ERR(recv_cq);
512		goto err;
513	}
514
515	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
516			       m * target->queue_size, ch->comp_vector);
517	if (IS_ERR(send_cq)) {
518		ret = PTR_ERR(send_cq);
519		goto err_recv_cq;
520	}
521
522	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
523
524	init_attr->event_handler       = srp_qp_event;
525	init_attr->cap.max_send_wr     = m * target->queue_size;
526	init_attr->cap.max_recv_wr     = target->queue_size + 1;
527	init_attr->cap.max_recv_sge    = 1;
528	init_attr->cap.max_send_sge    = 1;
529	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
530	init_attr->qp_type             = IB_QPT_RC;
531	init_attr->send_cq             = send_cq;
532	init_attr->recv_cq             = recv_cq;
533
534	qp = ib_create_qp(dev->pd, init_attr);
535	if (IS_ERR(qp)) {
536		ret = PTR_ERR(qp);
537		goto err_send_cq;
538	}
539
540	ret = srp_init_qp(target, qp);
541	if (ret)
542		goto err_qp;
543
544	if (dev->use_fast_reg && dev->has_fr) {
545		fr_pool = srp_alloc_fr_pool(target);
546		if (IS_ERR(fr_pool)) {
547			ret = PTR_ERR(fr_pool);
548			shost_printk(KERN_WARNING, target->scsi_host, PFX
549				     "FR pool allocation failed (%d)\n", ret);
550			goto err_qp;
551		}
552		if (ch->fr_pool)
553			srp_destroy_fr_pool(ch->fr_pool);
554		ch->fr_pool = fr_pool;
555	} else if (!dev->use_fast_reg && dev->has_fmr) {
556		fmr_pool = srp_alloc_fmr_pool(target);
557		if (IS_ERR(fmr_pool)) {
558			ret = PTR_ERR(fmr_pool);
559			shost_printk(KERN_WARNING, target->scsi_host, PFX
560				     "FMR pool allocation failed (%d)\n", ret);
561			goto err_qp;
562		}
563		if (ch->fmr_pool)
564			ib_destroy_fmr_pool(ch->fmr_pool);
565		ch->fmr_pool = fmr_pool;
566	}
567
568	if (ch->qp)
569		srp_destroy_qp(ch);
570	if (ch->recv_cq)
571		ib_destroy_cq(ch->recv_cq);
572	if (ch->send_cq)
573		ib_destroy_cq(ch->send_cq);
574
575	ch->qp = qp;
576	ch->recv_cq = recv_cq;
577	ch->send_cq = send_cq;
578
579	kfree(init_attr);
580	return 0;
581
582err_qp:
583	ib_destroy_qp(qp);
584
585err_send_cq:
586	ib_destroy_cq(send_cq);
587
588err_recv_cq:
589	ib_destroy_cq(recv_cq);
590
591err:
592	kfree(init_attr);
593	return ret;
594}
595
596/*
597 * Note: this function may be called without srp_alloc_iu_bufs() having been
598 * invoked. Hence the ch->[rt]x_ring checks.
599 */
600static void srp_free_ch_ib(struct srp_target_port *target,
601			   struct srp_rdma_ch *ch)
602{
603	struct srp_device *dev = target->srp_host->srp_dev;
604	int i;
605
606	if (!ch->target)
607		return;
608
609	if (ch->cm_id) {
610		ib_destroy_cm_id(ch->cm_id);
611		ch->cm_id = NULL;
612	}
613
614	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
615	if (!ch->qp)
616		return;
617
618	if (dev->use_fast_reg) {
619		if (ch->fr_pool)
620			srp_destroy_fr_pool(ch->fr_pool);
621	} else {
622		if (ch->fmr_pool)
623			ib_destroy_fmr_pool(ch->fmr_pool);
624	}
625	srp_destroy_qp(ch);
626	ib_destroy_cq(ch->send_cq);
627	ib_destroy_cq(ch->recv_cq);
628
629	/*
630	 * Avoid that the SCSI error handler tries to use this channel after
631	 * it has been freed. The SCSI error handler can namely continue
632	 * trying to perform recovery actions after scsi_remove_host()
633	 * returned.
634	 */
635	ch->target = NULL;
636
637	ch->qp = NULL;
638	ch->send_cq = ch->recv_cq = NULL;
639
640	if (ch->rx_ring) {
641		for (i = 0; i < target->queue_size; ++i)
642			srp_free_iu(target->srp_host, ch->rx_ring[i]);
643		kfree(ch->rx_ring);
644		ch->rx_ring = NULL;
645	}
646	if (ch->tx_ring) {
647		for (i = 0; i < target->queue_size; ++i)
648			srp_free_iu(target->srp_host, ch->tx_ring[i]);
649		kfree(ch->tx_ring);
650		ch->tx_ring = NULL;
651	}
652}
653
654static void srp_path_rec_completion(int status,
655				    struct ib_sa_path_rec *pathrec,
656				    void *ch_ptr)
657{
658	struct srp_rdma_ch *ch = ch_ptr;
659	struct srp_target_port *target = ch->target;
660
661	ch->status = status;
662	if (status)
663		shost_printk(KERN_ERR, target->scsi_host,
664			     PFX "Got failed path rec status %d\n", status);
665	else
666		ch->path = *pathrec;
667	complete(&ch->done);
668}
669
670static int srp_lookup_path(struct srp_rdma_ch *ch)
671{
672	struct srp_target_port *target = ch->target;
673	int ret;
674
675	ch->path.numb_path = 1;
676
677	init_completion(&ch->done);
678
679	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680					       target->srp_host->srp_dev->dev,
681					       target->srp_host->port,
682					       &ch->path,
683					       IB_SA_PATH_REC_SERVICE_ID |
684					       IB_SA_PATH_REC_DGID	 |
685					       IB_SA_PATH_REC_SGID	 |
686					       IB_SA_PATH_REC_NUMB_PATH	 |
687					       IB_SA_PATH_REC_PKEY,
688					       SRP_PATH_REC_TIMEOUT_MS,
689					       GFP_KERNEL,
690					       srp_path_rec_completion,
691					       ch, &ch->path_query);
692	if (ch->path_query_id < 0)
693		return ch->path_query_id;
694
695	ret = wait_for_completion_interruptible(&ch->done);
696	if (ret < 0)
697		return ret;
698
699	if (ch->status < 0)
700		shost_printk(KERN_WARNING, target->scsi_host,
701			     PFX "Path record query failed\n");
702
703	return ch->status;
704}
705
706static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
707{
708	struct srp_target_port *target = ch->target;
709	struct {
710		struct ib_cm_req_param param;
711		struct srp_login_req   priv;
712	} *req = NULL;
713	int status;
714
715	req = kzalloc(sizeof *req, GFP_KERNEL);
716	if (!req)
717		return -ENOMEM;
718
719	req->param.primary_path		      = &ch->path;
720	req->param.alternate_path 	      = NULL;
721	req->param.service_id 		      = target->service_id;
722	req->param.qp_num		      = ch->qp->qp_num;
723	req->param.qp_type		      = ch->qp->qp_type;
724	req->param.private_data 	      = &req->priv;
725	req->param.private_data_len 	      = sizeof req->priv;
726	req->param.flow_control 	      = 1;
727
728	get_random_bytes(&req->param.starting_psn, 4);
729	req->param.starting_psn 	     &= 0xffffff;
730
731	/*
732	 * Pick some arbitrary defaults here; we could make these
733	 * module parameters if anyone cared about setting them.
734	 */
735	req->param.responder_resources	      = 4;
736	req->param.remote_cm_response_timeout = 20;
737	req->param.local_cm_response_timeout  = 20;
738	req->param.retry_count                = target->tl_retry_count;
739	req->param.rnr_retry_count 	      = 7;
740	req->param.max_cm_retries 	      = 15;
741
742	req->priv.opcode     	= SRP_LOGIN_REQ;
743	req->priv.tag        	= 0;
744	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746					      SRP_BUF_FORMAT_INDIRECT);
747	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
748				   SRP_MULTICHAN_SINGLE);
749	/*
750	 * In the published SRP specification (draft rev. 16a), the
751	 * port identifier format is 8 bytes of ID extension followed
752	 * by 8 bytes of GUID.  Older drafts put the two halves in the
753	 * opposite order, so that the GUID comes first.
754	 *
755	 * Targets conforming to these obsolete drafts can be
756	 * recognized by the I/O Class they report.
757	 */
758	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759		memcpy(req->priv.initiator_port_id,
760		       &target->sgid.global.interface_id, 8);
761		memcpy(req->priv.initiator_port_id + 8,
762		       &target->initiator_ext, 8);
763		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
764		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
765	} else {
766		memcpy(req->priv.initiator_port_id,
767		       &target->initiator_ext, 8);
768		memcpy(req->priv.initiator_port_id + 8,
769		       &target->sgid.global.interface_id, 8);
770		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
771		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
772	}
773
774	/*
775	 * Topspin/Cisco SRP targets will reject our login unless we
776	 * zero out the first 8 bytes of our initiator port ID and set
777	 * the second 8 bytes to the local node GUID.
778	 */
779	if (srp_target_is_topspin(target)) {
780		shost_printk(KERN_DEBUG, target->scsi_host,
781			     PFX "Topspin/Cisco initiator port ID workaround "
782			     "activated for target GUID %016llx\n",
783			     (unsigned long long) be64_to_cpu(target->ioc_guid));
784		memset(req->priv.initiator_port_id, 0, 8);
785		memcpy(req->priv.initiator_port_id + 8,
786		       &target->srp_host->srp_dev->dev->node_guid, 8);
787	}
788
789	status = ib_send_cm_req(ch->cm_id, &req->param);
790
791	kfree(req);
792
793	return status;
794}
795
796static bool srp_queue_remove_work(struct srp_target_port *target)
797{
798	bool changed = false;
799
800	spin_lock_irq(&target->lock);
801	if (target->state != SRP_TARGET_REMOVED) {
802		target->state = SRP_TARGET_REMOVED;
803		changed = true;
804	}
805	spin_unlock_irq(&target->lock);
806
807	if (changed)
808		queue_work(srp_remove_wq, &target->remove_work);
809
810	return changed;
811}
812
813static void srp_disconnect_target(struct srp_target_port *target)
814{
815	struct srp_rdma_ch *ch;
816	int i;
817
818	/* XXX should send SRP_I_LOGOUT request */
819
820	for (i = 0; i < target->ch_count; i++) {
821		ch = &target->ch[i];
822		ch->connected = false;
823		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
824			shost_printk(KERN_DEBUG, target->scsi_host,
825				     PFX "Sending CM DREQ failed\n");
826		}
827	}
828}
829
830static void srp_free_req_data(struct srp_target_port *target,
831			      struct srp_rdma_ch *ch)
832{
833	struct srp_device *dev = target->srp_host->srp_dev;
834	struct ib_device *ibdev = dev->dev;
835	struct srp_request *req;
836	int i;
837
838	if (!ch->target || !ch->req_ring)
839		return;
840
841	for (i = 0; i < target->req_ring_size; ++i) {
842		req = &ch->req_ring[i];
843		if (dev->use_fast_reg)
844			kfree(req->fr_list);
845		else
846			kfree(req->fmr_list);
847		kfree(req->map_page);
848		if (req->indirect_dma_addr) {
849			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
850					    target->indirect_size,
851					    DMA_TO_DEVICE);
852		}
853		kfree(req->indirect_desc);
854	}
855
856	kfree(ch->req_ring);
857	ch->req_ring = NULL;
858}
859
860static int srp_alloc_req_data(struct srp_rdma_ch *ch)
861{
862	struct srp_target_port *target = ch->target;
863	struct srp_device *srp_dev = target->srp_host->srp_dev;
864	struct ib_device *ibdev = srp_dev->dev;
865	struct srp_request *req;
866	void *mr_list;
867	dma_addr_t dma_addr;
868	int i, ret = -ENOMEM;
869
870	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
871			       GFP_KERNEL);
872	if (!ch->req_ring)
873		goto out;
874
875	for (i = 0; i < target->req_ring_size; ++i) {
876		req = &ch->req_ring[i];
877		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
878				  GFP_KERNEL);
879		if (!mr_list)
880			goto out;
881		if (srp_dev->use_fast_reg)
882			req->fr_list = mr_list;
883		else
884			req->fmr_list = mr_list;
885		req->map_page = kmalloc(srp_dev->max_pages_per_mr *
886					sizeof(void *), GFP_KERNEL);
887		if (!req->map_page)
888			goto out;
889		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
890		if (!req->indirect_desc)
891			goto out;
892
893		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
894					     target->indirect_size,
895					     DMA_TO_DEVICE);
896		if (ib_dma_mapping_error(ibdev, dma_addr))
897			goto out;
898
899		req->indirect_dma_addr = dma_addr;
900	}
901	ret = 0;
902
903out:
904	return ret;
905}
906
907/**
908 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
909 * @shost: SCSI host whose attributes to remove from sysfs.
910 *
911 * Note: Any attributes defined in the host template and that did not exist
912 * before invocation of this function will be ignored.
913 */
914static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
915{
916	struct device_attribute **attr;
917
918	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
919		device_remove_file(&shost->shost_dev, *attr);
920}
921
922static void srp_remove_target(struct srp_target_port *target)
923{
924	struct srp_rdma_ch *ch;
925	int i;
926
927	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
928
929	srp_del_scsi_host_attr(target->scsi_host);
930	srp_rport_get(target->rport);
931	srp_remove_host(target->scsi_host);
932	scsi_remove_host(target->scsi_host);
933	srp_stop_rport_timers(target->rport);
934	srp_disconnect_target(target);
935	for (i = 0; i < target->ch_count; i++) {
936		ch = &target->ch[i];
937		srp_free_ch_ib(target, ch);
938	}
939	cancel_work_sync(&target->tl_err_work);
940	srp_rport_put(target->rport);
941	for (i = 0; i < target->ch_count; i++) {
942		ch = &target->ch[i];
943		srp_free_req_data(target, ch);
944	}
945	kfree(target->ch);
946	target->ch = NULL;
947
948	spin_lock(&target->srp_host->target_lock);
949	list_del(&target->list);
950	spin_unlock(&target->srp_host->target_lock);
951
952	scsi_host_put(target->scsi_host);
953}
954
955static void srp_remove_work(struct work_struct *work)
956{
957	struct srp_target_port *target =
958		container_of(work, struct srp_target_port, remove_work);
959
960	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
961
962	srp_remove_target(target);
963}
964
965static void srp_rport_delete(struct srp_rport *rport)
966{
967	struct srp_target_port *target = rport->lld_data;
968
969	srp_queue_remove_work(target);
970}
971
972/**
973 * srp_connected_ch() - number of connected channels
974 * @target: SRP target port.
975 */
976static int srp_connected_ch(struct srp_target_port *target)
977{
978	int i, c = 0;
979
980	for (i = 0; i < target->ch_count; i++)
981		c += target->ch[i].connected;
982
983	return c;
984}
985
986static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
987{
988	struct srp_target_port *target = ch->target;
989	int ret;
990
991	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
992
993	ret = srp_lookup_path(ch);
994	if (ret)
995		return ret;
996
997	while (1) {
998		init_completion(&ch->done);
999		ret = srp_send_req(ch, multich);
1000		if (ret)
1001			return ret;
1002		ret = wait_for_completion_interruptible(&ch->done);
1003		if (ret < 0)
1004			return ret;
1005
1006		/*
1007		 * The CM event handling code will set status to
1008		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1009		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1010		 * redirect REJ back.
1011		 */
1012		switch (ch->status) {
1013		case 0:
1014			ch->connected = true;
1015			return 0;
1016
1017		case SRP_PORT_REDIRECT:
1018			ret = srp_lookup_path(ch);
1019			if (ret)
1020				return ret;
1021			break;
1022
1023		case SRP_DLID_REDIRECT:
1024			break;
1025
1026		case SRP_STALE_CONN:
1027			shost_printk(KERN_ERR, target->scsi_host, PFX
1028				     "giving up on stale connection\n");
1029			ch->status = -ECONNRESET;
1030			return ch->status;
1031
1032		default:
1033			return ch->status;
1034		}
1035	}
1036}
1037
1038static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1039{
1040	struct ib_send_wr *bad_wr;
1041	struct ib_send_wr wr = {
1042		.opcode		    = IB_WR_LOCAL_INV,
1043		.wr_id		    = LOCAL_INV_WR_ID_MASK,
1044		.next		    = NULL,
1045		.num_sge	    = 0,
1046		.send_flags	    = 0,
1047		.ex.invalidate_rkey = rkey,
1048	};
1049
1050	return ib_post_send(ch->qp, &wr, &bad_wr);
1051}
1052
1053static void srp_unmap_data(struct scsi_cmnd *scmnd,
1054			   struct srp_rdma_ch *ch,
1055			   struct srp_request *req)
1056{
1057	struct srp_target_port *target = ch->target;
1058	struct srp_device *dev = target->srp_host->srp_dev;
1059	struct ib_device *ibdev = dev->dev;
1060	int i, res;
1061
1062	if (!scsi_sglist(scmnd) ||
1063	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1064	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1065		return;
1066
1067	if (dev->use_fast_reg) {
1068		struct srp_fr_desc **pfr;
1069
1070		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1071			res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1072			if (res < 0) {
1073				shost_printk(KERN_ERR, target->scsi_host, PFX
1074				  "Queueing INV WR for rkey %#x failed (%d)\n",
1075				  (*pfr)->mr->rkey, res);
1076				queue_work(system_long_wq,
1077					   &target->tl_err_work);
1078			}
1079		}
1080		if (req->nmdesc)
1081			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1082					req->nmdesc);
1083	} else {
1084		struct ib_pool_fmr **pfmr;
1085
1086		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1087			ib_fmr_pool_unmap(*pfmr);
1088	}
1089
1090	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1091			scmnd->sc_data_direction);
1092}
1093
1094/**
1095 * srp_claim_req - Take ownership of the scmnd associated with a request.
1096 * @ch: SRP RDMA channel.
1097 * @req: SRP request.
1098 * @sdev: If not NULL, only take ownership for this SCSI device.
1099 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1100 *         ownership of @req->scmnd if it equals @scmnd.
1101 *
1102 * Return value:
1103 * Either NULL or a pointer to the SCSI command the caller became owner of.
1104 */
1105static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1106				       struct srp_request *req,
1107				       struct scsi_device *sdev,
1108				       struct scsi_cmnd *scmnd)
1109{
1110	unsigned long flags;
1111
1112	spin_lock_irqsave(&ch->lock, flags);
1113	if (req->scmnd &&
1114	    (!sdev || req->scmnd->device == sdev) &&
1115	    (!scmnd || req->scmnd == scmnd)) {
1116		scmnd = req->scmnd;
1117		req->scmnd = NULL;
1118	} else {
1119		scmnd = NULL;
1120	}
1121	spin_unlock_irqrestore(&ch->lock, flags);
1122
1123	return scmnd;
1124}
1125
1126/**
1127 * srp_free_req() - Unmap data and add request to the free request list.
1128 * @ch:     SRP RDMA channel.
1129 * @req:    Request to be freed.
1130 * @scmnd:  SCSI command associated with @req.
1131 * @req_lim_delta: Amount to be added to @target->req_lim.
1132 */
1133static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1134			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1135{
1136	unsigned long flags;
1137
1138	srp_unmap_data(scmnd, ch, req);
1139
1140	spin_lock_irqsave(&ch->lock, flags);
1141	ch->req_lim += req_lim_delta;
1142	spin_unlock_irqrestore(&ch->lock, flags);
1143}
1144
1145static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1146			   struct scsi_device *sdev, int result)
1147{
1148	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1149
1150	if (scmnd) {
1151		srp_free_req(ch, req, scmnd, 0);
1152		scmnd->result = result;
1153		scmnd->scsi_done(scmnd);
1154	}
1155}
1156
1157static void srp_terminate_io(struct srp_rport *rport)
1158{
1159	struct srp_target_port *target = rport->lld_data;
1160	struct srp_rdma_ch *ch;
1161	struct Scsi_Host *shost = target->scsi_host;
1162	struct scsi_device *sdev;
1163	int i, j;
1164
1165	/*
1166	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1167	 * is not safe. Hence the warning statement below.
1168	 */
1169	shost_for_each_device(sdev, shost)
1170		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1171
1172	for (i = 0; i < target->ch_count; i++) {
1173		ch = &target->ch[i];
1174
1175		for (j = 0; j < target->req_ring_size; ++j) {
1176			struct srp_request *req = &ch->req_ring[j];
1177
1178			srp_finish_req(ch, req, NULL,
1179				       DID_TRANSPORT_FAILFAST << 16);
1180		}
1181	}
1182}
1183
1184/*
1185 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1186 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1187 * srp_reset_device() or srp_reset_host() calls will occur while this function
1188 * is in progress. One way to realize that is not to call this function
1189 * directly but to call srp_reconnect_rport() instead since that last function
1190 * serializes calls of this function via rport->mutex and also blocks
1191 * srp_queuecommand() calls before invoking this function.
1192 */
1193static int srp_rport_reconnect(struct srp_rport *rport)
1194{
1195	struct srp_target_port *target = rport->lld_data;
1196	struct srp_rdma_ch *ch;
1197	int i, j, ret = 0;
1198	bool multich = false;
1199
1200	srp_disconnect_target(target);
1201
1202	if (target->state == SRP_TARGET_SCANNING)
1203		return -ENODEV;
1204
1205	/*
1206	 * Now get a new local CM ID so that we avoid confusing the target in
1207	 * case things are really fouled up. Doing so also ensures that all CM
1208	 * callbacks will have finished before a new QP is allocated.
1209	 */
1210	for (i = 0; i < target->ch_count; i++) {
1211		ch = &target->ch[i];
1212		if (!ch->target)
1213			break;
1214		ret += srp_new_cm_id(ch);
1215	}
1216	for (i = 0; i < target->ch_count; i++) {
1217		ch = &target->ch[i];
1218		if (!ch->target)
1219			break;
1220		for (j = 0; j < target->req_ring_size; ++j) {
1221			struct srp_request *req = &ch->req_ring[j];
1222
1223			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1224		}
1225	}
1226	for (i = 0; i < target->ch_count; i++) {
1227		ch = &target->ch[i];
1228		if (!ch->target)
1229			break;
1230		/*
1231		 * Whether or not creating a new CM ID succeeded, create a new
1232		 * QP. This guarantees that all completion callback function
1233		 * invocations have finished before request resetting starts.
1234		 */
1235		ret += srp_create_ch_ib(ch);
1236
1237		INIT_LIST_HEAD(&ch->free_tx);
1238		for (j = 0; j < target->queue_size; ++j)
1239			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1240	}
1241
1242	target->qp_in_error = false;
1243
1244	for (i = 0; i < target->ch_count; i++) {
1245		ch = &target->ch[i];
1246		if (ret || !ch->target)
1247			break;
1248		ret = srp_connect_ch(ch, multich);
1249		multich = true;
1250	}
1251
1252	if (ret == 0)
1253		shost_printk(KERN_INFO, target->scsi_host,
1254			     PFX "reconnect succeeded\n");
1255
1256	return ret;
1257}
1258
1259static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1260			 unsigned int dma_len, u32 rkey)
1261{
1262	struct srp_direct_buf *desc = state->desc;
1263
1264	desc->va = cpu_to_be64(dma_addr);
1265	desc->key = cpu_to_be32(rkey);
1266	desc->len = cpu_to_be32(dma_len);
1267
1268	state->total_len += dma_len;
1269	state->desc++;
1270	state->ndesc++;
1271}
1272
1273static int srp_map_finish_fmr(struct srp_map_state *state,
1274			      struct srp_rdma_ch *ch)
1275{
1276	struct ib_pool_fmr *fmr;
1277	u64 io_addr = 0;
1278
1279	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1280				   state->npages, io_addr);
1281	if (IS_ERR(fmr))
1282		return PTR_ERR(fmr);
1283
1284	*state->next_fmr++ = fmr;
1285	state->nmdesc++;
1286
1287	srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
1288
1289	return 0;
1290}
1291
1292static int srp_map_finish_fr(struct srp_map_state *state,
1293			     struct srp_rdma_ch *ch)
1294{
1295	struct srp_target_port *target = ch->target;
1296	struct srp_device *dev = target->srp_host->srp_dev;
1297	struct ib_send_wr *bad_wr;
1298	struct ib_send_wr wr;
1299	struct srp_fr_desc *desc;
1300	u32 rkey;
1301
1302	desc = srp_fr_pool_get(ch->fr_pool);
1303	if (!desc)
1304		return -ENOMEM;
1305
1306	rkey = ib_inc_rkey(desc->mr->rkey);
1307	ib_update_fast_reg_key(desc->mr, rkey);
1308
1309	memcpy(desc->frpl->page_list, state->pages,
1310	       sizeof(state->pages[0]) * state->npages);
1311
1312	memset(&wr, 0, sizeof(wr));
1313	wr.opcode = IB_WR_FAST_REG_MR;
1314	wr.wr_id = FAST_REG_WR_ID_MASK;
1315	wr.wr.fast_reg.iova_start = state->base_dma_addr;
1316	wr.wr.fast_reg.page_list = desc->frpl;
1317	wr.wr.fast_reg.page_list_len = state->npages;
1318	wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1319	wr.wr.fast_reg.length = state->dma_len;
1320	wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1321				       IB_ACCESS_REMOTE_READ |
1322				       IB_ACCESS_REMOTE_WRITE);
1323	wr.wr.fast_reg.rkey = desc->mr->lkey;
1324
1325	*state->next_fr++ = desc;
1326	state->nmdesc++;
1327
1328	srp_map_desc(state, state->base_dma_addr, state->dma_len,
1329		     desc->mr->rkey);
1330
1331	return ib_post_send(ch->qp, &wr, &bad_wr);
1332}
1333
1334static int srp_finish_mapping(struct srp_map_state *state,
1335			      struct srp_rdma_ch *ch)
1336{
1337	struct srp_target_port *target = ch->target;
1338	int ret = 0;
1339
1340	if (state->npages == 0)
1341		return 0;
1342
1343	if (state->npages == 1 && !register_always)
1344		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1345			     target->rkey);
1346	else
1347		ret = target->srp_host->srp_dev->use_fast_reg ?
1348			srp_map_finish_fr(state, ch) :
1349			srp_map_finish_fmr(state, ch);
1350
1351	if (ret == 0) {
1352		state->npages = 0;
1353		state->dma_len = 0;
1354	}
1355
1356	return ret;
1357}
1358
1359static void srp_map_update_start(struct srp_map_state *state,
1360				 struct scatterlist *sg, int sg_index,
1361				 dma_addr_t dma_addr)
1362{
1363	state->unmapped_sg = sg;
1364	state->unmapped_index = sg_index;
1365	state->unmapped_addr = dma_addr;
1366}
1367
1368static int srp_map_sg_entry(struct srp_map_state *state,
1369			    struct srp_rdma_ch *ch,
1370			    struct scatterlist *sg, int sg_index,
1371			    bool use_mr)
1372{
1373	struct srp_target_port *target = ch->target;
1374	struct srp_device *dev = target->srp_host->srp_dev;
1375	struct ib_device *ibdev = dev->dev;
1376	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1377	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1378	unsigned int len;
1379	int ret;
1380
1381	if (!dma_len)
1382		return 0;
1383
1384	if (!use_mr) {
1385		/*
1386		 * Once we're in direct map mode for a request, we don't
1387		 * go back to FMR or FR mode, so no need to update anything
1388		 * other than the descriptor.
1389		 */
1390		srp_map_desc(state, dma_addr, dma_len, target->rkey);
1391		return 0;
1392	}
1393
1394	/*
1395	 * Since not all RDMA HW drivers support non-zero page offsets for
1396	 * FMR, if we start at an offset into a page, don't merge into the
1397	 * current FMR mapping. Finish it out, and use the kernel's MR for
1398	 * this sg entry.
1399	 */
1400	if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
1401	    dma_len > dev->mr_max_size) {
1402		ret = srp_finish_mapping(state, ch);
1403		if (ret)
1404			return ret;
1405
1406		srp_map_desc(state, dma_addr, dma_len, target->rkey);
1407		srp_map_update_start(state, NULL, 0, 0);
1408		return 0;
1409	}
1410
1411	/*
1412	 * If this is the first sg that will be mapped via FMR or via FR, save
1413	 * our position. We need to know the first unmapped entry, its index,
1414	 * and the first unmapped address within that entry to be able to
1415	 * restart mapping after an error.
1416	 */
1417	if (!state->unmapped_sg)
1418		srp_map_update_start(state, sg, sg_index, dma_addr);
1419
1420	while (dma_len) {
1421		unsigned offset = dma_addr & ~dev->mr_page_mask;
1422		if (state->npages == dev->max_pages_per_mr || offset != 0) {
1423			ret = srp_finish_mapping(state, ch);
1424			if (ret)
1425				return ret;
1426
1427			srp_map_update_start(state, sg, sg_index, dma_addr);
1428		}
1429
1430		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1431
1432		if (!state->npages)
1433			state->base_dma_addr = dma_addr;
1434		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1435		state->dma_len += len;
1436		dma_addr += len;
1437		dma_len -= len;
1438	}
1439
1440	/*
1441	 * If the last entry of the MR wasn't a full page, then we need to
1442	 * close it out and start a new one -- we can only merge at page
1443	 * boundries.
1444	 */
1445	ret = 0;
1446	if (len != dev->mr_page_size) {
1447		ret = srp_finish_mapping(state, ch);
1448		if (!ret)
1449			srp_map_update_start(state, NULL, 0, 0);
1450	}
1451	return ret;
1452}
1453
1454static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
1455		      struct srp_request *req, struct scatterlist *scat,
1456		      int count)
1457{
1458	struct srp_target_port *target = ch->target;
1459	struct srp_device *dev = target->srp_host->srp_dev;
1460	struct ib_device *ibdev = dev->dev;
1461	struct scatterlist *sg;
1462	int i;
1463	bool use_mr;
1464
1465	state->desc	= req->indirect_desc;
1466	state->pages	= req->map_page;
1467	if (dev->use_fast_reg) {
1468		state->next_fr = req->fr_list;
1469		use_mr = !!ch->fr_pool;
1470	} else {
1471		state->next_fmr = req->fmr_list;
1472		use_mr = !!ch->fmr_pool;
1473	}
1474
1475	for_each_sg(scat, sg, count, i) {
1476		if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
1477			/*
1478			 * Memory registration failed, so backtrack to the
1479			 * first unmapped entry and continue on without using
1480			 * memory registration.
1481			 */
1482			dma_addr_t dma_addr;
1483			unsigned int dma_len;
1484
1485backtrack:
1486			sg = state->unmapped_sg;
1487			i = state->unmapped_index;
1488
1489			dma_addr = ib_sg_dma_address(ibdev, sg);
1490			dma_len = ib_sg_dma_len(ibdev, sg);
1491			dma_len -= (state->unmapped_addr - dma_addr);
1492			dma_addr = state->unmapped_addr;
1493			use_mr = false;
1494			srp_map_desc(state, dma_addr, dma_len, target->rkey);
1495		}
1496	}
1497
1498	if (use_mr && srp_finish_mapping(state, ch))
1499		goto backtrack;
1500
1501	req->nmdesc = state->nmdesc;
1502
1503	return 0;
1504}
1505
1506static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1507			struct srp_request *req)
1508{
1509	struct srp_target_port *target = ch->target;
1510	struct scatterlist *scat;
1511	struct srp_cmd *cmd = req->cmd->buf;
1512	int len, nents, count;
1513	struct srp_device *dev;
1514	struct ib_device *ibdev;
1515	struct srp_map_state state;
1516	struct srp_indirect_buf *indirect_hdr;
1517	u32 table_len;
1518	u8 fmt;
1519
1520	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1521		return sizeof (struct srp_cmd);
1522
1523	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1524	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1525		shost_printk(KERN_WARNING, target->scsi_host,
1526			     PFX "Unhandled data direction %d\n",
1527			     scmnd->sc_data_direction);
1528		return -EINVAL;
1529	}
1530
1531	nents = scsi_sg_count(scmnd);
1532	scat  = scsi_sglist(scmnd);
1533
1534	dev = target->srp_host->srp_dev;
1535	ibdev = dev->dev;
1536
1537	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1538	if (unlikely(count == 0))
1539		return -EIO;
1540
1541	fmt = SRP_DATA_DESC_DIRECT;
1542	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1543
1544	if (count == 1 && !register_always) {
1545		/*
1546		 * The midlayer only generated a single gather/scatter
1547		 * entry, or DMA mapping coalesced everything to a
1548		 * single entry.  So a direct descriptor along with
1549		 * the DMA MR suffices.
1550		 */
1551		struct srp_direct_buf *buf = (void *) cmd->add_data;
1552
1553		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1554		buf->key = cpu_to_be32(target->rkey);
1555		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1556
1557		req->nmdesc = 0;
1558		goto map_complete;
1559	}
1560
1561	/*
1562	 * We have more than one scatter/gather entry, so build our indirect
1563	 * descriptor table, trying to merge as many entries as we can.
1564	 */
1565	indirect_hdr = (void *) cmd->add_data;
1566
1567	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1568				   target->indirect_size, DMA_TO_DEVICE);
1569
1570	memset(&state, 0, sizeof(state));
1571	srp_map_sg(&state, ch, req, scat, count);
1572
1573	/* We've mapped the request, now pull as much of the indirect
1574	 * descriptor table as we can into the command buffer. If this
1575	 * target is not using an external indirect table, we are
1576	 * guaranteed to fit into the command, as the SCSI layer won't
1577	 * give us more S/G entries than we allow.
1578	 */
1579	if (state.ndesc == 1) {
1580		/*
1581		 * Memory registration collapsed the sg-list into one entry,
1582		 * so use a direct descriptor.
1583		 */
1584		struct srp_direct_buf *buf = (void *) cmd->add_data;
1585
1586		*buf = req->indirect_desc[0];
1587		goto map_complete;
1588	}
1589
1590	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1591						!target->allow_ext_sg)) {
1592		shost_printk(KERN_ERR, target->scsi_host,
1593			     "Could not fit S/G list into SRP_CMD\n");
1594		return -EIO;
1595	}
1596
1597	count = min(state.ndesc, target->cmd_sg_cnt);
1598	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1599
1600	fmt = SRP_DATA_DESC_INDIRECT;
1601	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1602	len += count * sizeof (struct srp_direct_buf);
1603
1604	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1605	       count * sizeof (struct srp_direct_buf));
1606
1607	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1608	indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1609	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1610	indirect_hdr->len = cpu_to_be32(state.total_len);
1611
1612	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1613		cmd->data_out_desc_cnt = count;
1614	else
1615		cmd->data_in_desc_cnt = count;
1616
1617	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1618				      DMA_TO_DEVICE);
1619
1620map_complete:
1621	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1622		cmd->buf_fmt = fmt << 4;
1623	else
1624		cmd->buf_fmt = fmt;
1625
1626	return len;
1627}
1628
1629/*
1630 * Return an IU and possible credit to the free pool
1631 */
1632static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1633			  enum srp_iu_type iu_type)
1634{
1635	unsigned long flags;
1636
1637	spin_lock_irqsave(&ch->lock, flags);
1638	list_add(&iu->list, &ch->free_tx);
1639	if (iu_type != SRP_IU_RSP)
1640		++ch->req_lim;
1641	spin_unlock_irqrestore(&ch->lock, flags);
1642}
1643
1644/*
1645 * Must be called with ch->lock held to protect req_lim and free_tx.
1646 * If IU is not sent, it must be returned using srp_put_tx_iu().
1647 *
1648 * Note:
1649 * An upper limit for the number of allocated information units for each
1650 * request type is:
1651 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1652 *   more than Scsi_Host.can_queue requests.
1653 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1654 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1655 *   one unanswered SRP request to an initiator.
1656 */
1657static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1658				      enum srp_iu_type iu_type)
1659{
1660	struct srp_target_port *target = ch->target;
1661	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1662	struct srp_iu *iu;
1663
1664	srp_send_completion(ch->send_cq, ch);
1665
1666	if (list_empty(&ch->free_tx))
1667		return NULL;
1668
1669	/* Initiator responses to target requests do not consume credits */
1670	if (iu_type != SRP_IU_RSP) {
1671		if (ch->req_lim <= rsv) {
1672			++target->zero_req_lim;
1673			return NULL;
1674		}
1675
1676		--ch->req_lim;
1677	}
1678
1679	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1680	list_del(&iu->list);
1681	return iu;
1682}
1683
1684static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1685{
1686	struct srp_target_port *target = ch->target;
1687	struct ib_sge list;
1688	struct ib_send_wr wr, *bad_wr;
1689
1690	list.addr   = iu->dma;
1691	list.length = len;
1692	list.lkey   = target->lkey;
1693
1694	wr.next       = NULL;
1695	wr.wr_id      = (uintptr_t) iu;
1696	wr.sg_list    = &list;
1697	wr.num_sge    = 1;
1698	wr.opcode     = IB_WR_SEND;
1699	wr.send_flags = IB_SEND_SIGNALED;
1700
1701	return ib_post_send(ch->qp, &wr, &bad_wr);
1702}
1703
1704static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1705{
1706	struct srp_target_port *target = ch->target;
1707	struct ib_recv_wr wr, *bad_wr;
1708	struct ib_sge list;
1709
1710	list.addr   = iu->dma;
1711	list.length = iu->size;
1712	list.lkey   = target->lkey;
1713
1714	wr.next     = NULL;
1715	wr.wr_id    = (uintptr_t) iu;
1716	wr.sg_list  = &list;
1717	wr.num_sge  = 1;
1718
1719	return ib_post_recv(ch->qp, &wr, &bad_wr);
1720}
1721
1722static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1723{
1724	struct srp_target_port *target = ch->target;
1725	struct srp_request *req;
1726	struct scsi_cmnd *scmnd;
1727	unsigned long flags;
1728
1729	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1730		spin_lock_irqsave(&ch->lock, flags);
1731		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1732		spin_unlock_irqrestore(&ch->lock, flags);
1733
1734		ch->tsk_mgmt_status = -1;
1735		if (be32_to_cpu(rsp->resp_data_len) >= 4)
1736			ch->tsk_mgmt_status = rsp->data[3];
1737		complete(&ch->tsk_mgmt_done);
1738	} else {
1739		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1740		if (scmnd) {
1741			req = (void *)scmnd->host_scribble;
1742			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1743		}
1744		if (!scmnd) {
1745			shost_printk(KERN_ERR, target->scsi_host,
1746				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1747				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1748
1749			spin_lock_irqsave(&ch->lock, flags);
1750			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1751			spin_unlock_irqrestore(&ch->lock, flags);
1752
1753			return;
1754		}
1755		scmnd->result = rsp->status;
1756
1757		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1758			memcpy(scmnd->sense_buffer, rsp->data +
1759			       be32_to_cpu(rsp->resp_data_len),
1760			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1761				     SCSI_SENSE_BUFFERSIZE));
1762		}
1763
1764		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1765			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1766		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1767			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1768		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1769			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1770		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1771			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1772
1773		srp_free_req(ch, req, scmnd,
1774			     be32_to_cpu(rsp->req_lim_delta));
1775
1776		scmnd->host_scribble = NULL;
1777		scmnd->scsi_done(scmnd);
1778	}
1779}
1780
1781static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1782			       void *rsp, int len)
1783{
1784	struct srp_target_port *target = ch->target;
1785	struct ib_device *dev = target->srp_host->srp_dev->dev;
1786	unsigned long flags;
1787	struct srp_iu *iu;
1788	int err;
1789
1790	spin_lock_irqsave(&ch->lock, flags);
1791	ch->req_lim += req_delta;
1792	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1793	spin_unlock_irqrestore(&ch->lock, flags);
1794
1795	if (!iu) {
1796		shost_printk(KERN_ERR, target->scsi_host, PFX
1797			     "no IU available to send response\n");
1798		return 1;
1799	}
1800
1801	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1802	memcpy(iu->buf, rsp, len);
1803	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1804
1805	err = srp_post_send(ch, iu, len);
1806	if (err) {
1807		shost_printk(KERN_ERR, target->scsi_host, PFX
1808			     "unable to post response: %d\n", err);
1809		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1810	}
1811
1812	return err;
1813}
1814
1815static void srp_process_cred_req(struct srp_rdma_ch *ch,
1816				 struct srp_cred_req *req)
1817{
1818	struct srp_cred_rsp rsp = {
1819		.opcode = SRP_CRED_RSP,
1820		.tag = req->tag,
1821	};
1822	s32 delta = be32_to_cpu(req->req_lim_delta);
1823
1824	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1825		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1826			     "problems processing SRP_CRED_REQ\n");
1827}
1828
1829static void srp_process_aer_req(struct srp_rdma_ch *ch,
1830				struct srp_aer_req *req)
1831{
1832	struct srp_target_port *target = ch->target;
1833	struct srp_aer_rsp rsp = {
1834		.opcode = SRP_AER_RSP,
1835		.tag = req->tag,
1836	};
1837	s32 delta = be32_to_cpu(req->req_lim_delta);
1838
1839	shost_printk(KERN_ERR, target->scsi_host, PFX
1840		     "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
1841
1842	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1843		shost_printk(KERN_ERR, target->scsi_host, PFX
1844			     "problems processing SRP_AER_REQ\n");
1845}
1846
1847static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1848{
1849	struct srp_target_port *target = ch->target;
1850	struct ib_device *dev = target->srp_host->srp_dev->dev;
1851	struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1852	int res;
1853	u8 opcode;
1854
1855	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1856				   DMA_FROM_DEVICE);
1857
1858	opcode = *(u8 *) iu->buf;
1859
1860	if (0) {
1861		shost_printk(KERN_ERR, target->scsi_host,
1862			     PFX "recv completion, opcode 0x%02x\n", opcode);
1863		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1864			       iu->buf, wc->byte_len, true);
1865	}
1866
1867	switch (opcode) {
1868	case SRP_RSP:
1869		srp_process_rsp(ch, iu->buf);
1870		break;
1871
1872	case SRP_CRED_REQ:
1873		srp_process_cred_req(ch, iu->buf);
1874		break;
1875
1876	case SRP_AER_REQ:
1877		srp_process_aer_req(ch, iu->buf);
1878		break;
1879
1880	case SRP_T_LOGOUT:
1881		/* XXX Handle target logout */
1882		shost_printk(KERN_WARNING, target->scsi_host,
1883			     PFX "Got target logout request\n");
1884		break;
1885
1886	default:
1887		shost_printk(KERN_WARNING, target->scsi_host,
1888			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1889		break;
1890	}
1891
1892	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1893				      DMA_FROM_DEVICE);
1894
1895	res = srp_post_recv(ch, iu);
1896	if (res != 0)
1897		shost_printk(KERN_ERR, target->scsi_host,
1898			     PFX "Recv failed with error code %d\n", res);
1899}
1900
1901/**
1902 * srp_tl_err_work() - handle a transport layer error
1903 * @work: Work structure embedded in an SRP target port.
1904 *
1905 * Note: This function may get invoked before the rport has been created,
1906 * hence the target->rport test.
1907 */
1908static void srp_tl_err_work(struct work_struct *work)
1909{
1910	struct srp_target_port *target;
1911
1912	target = container_of(work, struct srp_target_port, tl_err_work);
1913	if (target->rport)
1914		srp_start_tl_fail_timers(target->rport);
1915}
1916
1917static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1918			      bool send_err, struct srp_rdma_ch *ch)
1919{
1920	struct srp_target_port *target = ch->target;
1921
1922	if (wr_id == SRP_LAST_WR_ID) {
1923		complete(&ch->done);
1924		return;
1925	}
1926
1927	if (ch->connected && !target->qp_in_error) {
1928		if (wr_id & LOCAL_INV_WR_ID_MASK) {
1929			shost_printk(KERN_ERR, target->scsi_host, PFX
1930				     "LOCAL_INV failed with status %d\n",
1931				     wc_status);
1932		} else if (wr_id & FAST_REG_WR_ID_MASK) {
1933			shost_printk(KERN_ERR, target->scsi_host, PFX
1934				     "FAST_REG_MR failed status %d\n",
1935				     wc_status);
1936		} else {
1937			shost_printk(KERN_ERR, target->scsi_host,
1938				     PFX "failed %s status %d for iu %p\n",
1939				     send_err ? "send" : "receive",
1940				     wc_status, (void *)(uintptr_t)wr_id);
1941		}
1942		queue_work(system_long_wq, &target->tl_err_work);
1943	}
1944	target->qp_in_error = true;
1945}
1946
1947static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
1948{
1949	struct srp_rdma_ch *ch = ch_ptr;
1950	struct ib_wc wc;
1951
1952	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1953	while (ib_poll_cq(cq, 1, &wc) > 0) {
1954		if (likely(wc.status == IB_WC_SUCCESS)) {
1955			srp_handle_recv(ch, &wc);
1956		} else {
1957			srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
1958		}
1959	}
1960}
1961
1962static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
1963{
1964	struct srp_rdma_ch *ch = ch_ptr;
1965	struct ib_wc wc;
1966	struct srp_iu *iu;
1967
1968	while (ib_poll_cq(cq, 1, &wc) > 0) {
1969		if (likely(wc.status == IB_WC_SUCCESS)) {
1970			iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1971			list_add(&iu->list, &ch->free_tx);
1972		} else {
1973			srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
1974		}
1975	}
1976}
1977
1978static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1979{
1980	struct srp_target_port *target = host_to_target(shost);
1981	struct srp_rport *rport = target->rport;
1982	struct srp_rdma_ch *ch;
1983	struct srp_request *req;
1984	struct srp_iu *iu;
1985	struct srp_cmd *cmd;
1986	struct ib_device *dev;
1987	unsigned long flags;
1988	u32 tag;
1989	u16 idx;
1990	int len, ret;
1991	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1992
1993	/*
1994	 * The SCSI EH thread is the only context from which srp_queuecommand()
1995	 * can get invoked for blocked devices (SDEV_BLOCK /
1996	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1997	 * locking the rport mutex if invoked from inside the SCSI EH.
1998	 */
1999	if (in_scsi_eh)
2000		mutex_lock(&rport->mutex);
2001
2002	scmnd->result = srp_chkready(target->rport);
2003	if (unlikely(scmnd->result))
2004		goto err;
2005
2006	WARN_ON_ONCE(scmnd->request->tag < 0);
2007	tag = blk_mq_unique_tag(scmnd->request);
2008	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2009	idx = blk_mq_unique_tag_to_tag(tag);
2010	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2011		  dev_name(&shost->shost_gendev), tag, idx,
2012		  target->req_ring_size);
2013
2014	spin_lock_irqsave(&ch->lock, flags);
2015	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2016	spin_unlock_irqrestore(&ch->lock, flags);
2017
2018	if (!iu)
2019		goto err;
2020
2021	req = &ch->req_ring[idx];
2022	dev = target->srp_host->srp_dev->dev;
2023	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2024				   DMA_TO_DEVICE);
2025
2026	scmnd->host_scribble = (void *) req;
2027
2028	cmd = iu->buf;
2029	memset(cmd, 0, sizeof *cmd);
2030
2031	cmd->opcode = SRP_CMD;
2032	cmd->lun    = cpu_to_be64((u64) scmnd->device->lun << 48);
2033	cmd->tag    = tag;
2034	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2035
2036	req->scmnd    = scmnd;
2037	req->cmd      = iu;
2038
2039	len = srp_map_data(scmnd, ch, req);
2040	if (len < 0) {
2041		shost_printk(KERN_ERR, target->scsi_host,
2042			     PFX "Failed to map data (%d)\n", len);
2043		/*
2044		 * If we ran out of memory descriptors (-ENOMEM) because an
2045		 * application is queuing many requests with more than
2046		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2047		 * to reduce queue depth temporarily.
2048		 */
2049		scmnd->result = len == -ENOMEM ?
2050			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2051		goto err_iu;
2052	}
2053
2054	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2055				      DMA_TO_DEVICE);
2056
2057	if (srp_post_send(ch, iu, len)) {
2058		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2059		goto err_unmap;
2060	}
2061
2062	ret = 0;
2063
2064unlock_rport:
2065	if (in_scsi_eh)
2066		mutex_unlock(&rport->mutex);
2067
2068	return ret;
2069
2070err_unmap:
2071	srp_unmap_data(scmnd, ch, req);
2072
2073err_iu:
2074	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2075
2076	/*
2077	 * Avoid that the loops that iterate over the request ring can
2078	 * encounter a dangling SCSI command pointer.
2079	 */
2080	req->scmnd = NULL;
2081
2082err:
2083	if (scmnd->result) {
2084		scmnd->scsi_done(scmnd);
2085		ret = 0;
2086	} else {
2087		ret = SCSI_MLQUEUE_HOST_BUSY;
2088	}
2089
2090	goto unlock_rport;
2091}
2092
2093/*
2094 * Note: the resources allocated in this function are freed in
2095 * srp_free_ch_ib().
2096 */
2097static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2098{
2099	struct srp_target_port *target = ch->target;
2100	int i;
2101
2102	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2103			      GFP_KERNEL);
2104	if (!ch->rx_ring)
2105		goto err_no_ring;
2106	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2107			      GFP_KERNEL);
2108	if (!ch->tx_ring)
2109		goto err_no_ring;
2110
2111	for (i = 0; i < target->queue_size; ++i) {
2112		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2113					      ch->max_ti_iu_len,
2114					      GFP_KERNEL, DMA_FROM_DEVICE);
2115		if (!ch->rx_ring[i])
2116			goto err;
2117	}
2118
2119	for (i = 0; i < target->queue_size; ++i) {
2120		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2121					      target->max_iu_len,
2122					      GFP_KERNEL, DMA_TO_DEVICE);
2123		if (!ch->tx_ring[i])
2124			goto err;
2125
2126		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2127	}
2128
2129	return 0;
2130
2131err:
2132	for (i = 0; i < target->queue_size; ++i) {
2133		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2134		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2135	}
2136
2137
2138err_no_ring:
2139	kfree(ch->tx_ring);
2140	ch->tx_ring = NULL;
2141	kfree(ch->rx_ring);
2142	ch->rx_ring = NULL;
2143
2144	return -ENOMEM;
2145}
2146
2147static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2148{
2149	uint64_t T_tr_ns, max_compl_time_ms;
2150	uint32_t rq_tmo_jiffies;
2151
2152	/*
2153	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2154	 * table 91), both the QP timeout and the retry count have to be set
2155	 * for RC QP's during the RTR to RTS transition.
2156	 */
2157	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2158		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2159
2160	/*
2161	 * Set target->rq_tmo_jiffies to one second more than the largest time
2162	 * it can take before an error completion is generated. See also
2163	 * C9-140..142 in the IBTA spec for more information about how to
2164	 * convert the QP Local ACK Timeout value to nanoseconds.
2165	 */
2166	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2167	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2168	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2169	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2170
2171	return rq_tmo_jiffies;
2172}
2173
2174static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2175			       struct srp_login_rsp *lrsp,
2176			       struct srp_rdma_ch *ch)
2177{
2178	struct srp_target_port *target = ch->target;
2179	struct ib_qp_attr *qp_attr = NULL;
2180	int attr_mask = 0;
2181	int ret;
2182	int i;
2183
2184	if (lrsp->opcode == SRP_LOGIN_RSP) {
2185		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2186		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2187
2188		/*
2189		 * Reserve credits for task management so we don't
2190		 * bounce requests back to the SCSI mid-layer.
2191		 */
2192		target->scsi_host->can_queue
2193			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2194			      target->scsi_host->can_queue);
2195		target->scsi_host->cmd_per_lun
2196			= min_t(int, target->scsi_host->can_queue,
2197				target->scsi_host->cmd_per_lun);
2198	} else {
2199		shost_printk(KERN_WARNING, target->scsi_host,
2200			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2201		ret = -ECONNRESET;
2202		goto error;
2203	}
2204
2205	if (!ch->rx_ring) {
2206		ret = srp_alloc_iu_bufs(ch);
2207		if (ret)
2208			goto error;
2209	}
2210
2211	ret = -ENOMEM;
2212	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2213	if (!qp_attr)
2214		goto error;
2215
2216	qp_attr->qp_state = IB_QPS_RTR;
2217	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2218	if (ret)
2219		goto error_free;
2220
2221	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2222	if (ret)
2223		goto error_free;
2224
2225	for (i = 0; i < target->queue_size; i++) {
2226		struct srp_iu *iu = ch->rx_ring[i];
2227
2228		ret = srp_post_recv(ch, iu);
2229		if (ret)
2230			goto error_free;
2231	}
2232
2233	qp_attr->qp_state = IB_QPS_RTS;
2234	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2235	if (ret)
2236		goto error_free;
2237
2238	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2239
2240	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2241	if (ret)
2242		goto error_free;
2243
2244	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2245
2246error_free:
2247	kfree(qp_attr);
2248
2249error:
2250	ch->status = ret;
2251}
2252
2253static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2254			       struct ib_cm_event *event,
2255			       struct srp_rdma_ch *ch)
2256{
2257	struct srp_target_port *target = ch->target;
2258	struct Scsi_Host *shost = target->scsi_host;
2259	struct ib_class_port_info *cpi;
2260	int opcode;
2261
2262	switch (event->param.rej_rcvd.reason) {
2263	case IB_CM_REJ_PORT_CM_REDIRECT:
2264		cpi = event->param.rej_rcvd.ari;
2265		ch->path.dlid = cpi->redirect_lid;
2266		ch->path.pkey = cpi->redirect_pkey;
2267		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2268		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2269
2270		ch->status = ch->path.dlid ?
2271			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2272		break;
2273
2274	case IB_CM_REJ_PORT_REDIRECT:
2275		if (srp_target_is_topspin(target)) {
2276			/*
2277			 * Topspin/Cisco SRP gateways incorrectly send
2278			 * reject reason code 25 when they mean 24
2279			 * (port redirect).
2280			 */
2281			memcpy(ch->path.dgid.raw,
2282			       event->param.rej_rcvd.ari, 16);
2283
2284			shost_printk(KERN_DEBUG, shost,
2285				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2286				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2287				     be64_to_cpu(ch->path.dgid.global.interface_id));
2288
2289			ch->status = SRP_PORT_REDIRECT;
2290		} else {
2291			shost_printk(KERN_WARNING, shost,
2292				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2293			ch->status = -ECONNRESET;
2294		}
2295		break;
2296
2297	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2298		shost_printk(KERN_WARNING, shost,
2299			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2300		ch->status = -ECONNRESET;
2301		break;
2302
2303	case IB_CM_REJ_CONSUMER_DEFINED:
2304		opcode = *(u8 *) event->private_data;
2305		if (opcode == SRP_LOGIN_REJ) {
2306			struct srp_login_rej *rej = event->private_data;
2307			u32 reason = be32_to_cpu(rej->reason);
2308
2309			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2310				shost_printk(KERN_WARNING, shost,
2311					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2312			else
2313				shost_printk(KERN_WARNING, shost, PFX
2314					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2315					     target->sgid.raw,
2316					     target->orig_dgid.raw, reason);
2317		} else
2318			shost_printk(KERN_WARNING, shost,
2319				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2320				     " opcode 0x%02x\n", opcode);
2321		ch->status = -ECONNRESET;
2322		break;
2323
2324	case IB_CM_REJ_STALE_CONN:
2325		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2326		ch->status = SRP_STALE_CONN;
2327		break;
2328
2329	default:
2330		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2331			     event->param.rej_rcvd.reason);
2332		ch->status = -ECONNRESET;
2333	}
2334}
2335
2336static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2337{
2338	struct srp_rdma_ch *ch = cm_id->context;
2339	struct srp_target_port *target = ch->target;
2340	int comp = 0;
2341
2342	switch (event->event) {
2343	case IB_CM_REQ_ERROR:
2344		shost_printk(KERN_DEBUG, target->scsi_host,
2345			     PFX "Sending CM REQ failed\n");
2346		comp = 1;
2347		ch->status = -ECONNRESET;
2348		break;
2349
2350	case IB_CM_REP_RECEIVED:
2351		comp = 1;
2352		srp_cm_rep_handler(cm_id, event->private_data, ch);
2353		break;
2354
2355	case IB_CM_REJ_RECEIVED:
2356		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2357		comp = 1;
2358
2359		srp_cm_rej_handler(cm_id, event, ch);
2360		break;
2361
2362	case IB_CM_DREQ_RECEIVED:
2363		shost_printk(KERN_WARNING, target->scsi_host,
2364			     PFX "DREQ received - connection closed\n");
2365		ch->connected = false;
2366		if (ib_send_cm_drep(cm_id, NULL, 0))
2367			shost_printk(KERN_ERR, target->scsi_host,
2368				     PFX "Sending CM DREP failed\n");
2369		queue_work(system_long_wq, &target->tl_err_work);
2370		break;
2371
2372	case IB_CM_TIMEWAIT_EXIT:
2373		shost_printk(KERN_ERR, target->scsi_host,
2374			     PFX "connection closed\n");
2375		comp = 1;
2376
2377		ch->status = 0;
2378		break;
2379
2380	case IB_CM_MRA_RECEIVED:
2381	case IB_CM_DREQ_ERROR:
2382	case IB_CM_DREP_RECEIVED:
2383		break;
2384
2385	default:
2386		shost_printk(KERN_WARNING, target->scsi_host,
2387			     PFX "Unhandled CM event %d\n", event->event);
2388		break;
2389	}
2390
2391	if (comp)
2392		complete(&ch->done);
2393
2394	return 0;
2395}
2396
2397/**
2398 * srp_change_queue_depth - setting device queue depth
2399 * @sdev: scsi device struct
2400 * @qdepth: requested queue depth
2401 *
2402 * Returns queue depth.
2403 */
2404static int
2405srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2406{
2407	if (!sdev->tagged_supported)
2408		qdepth = 1;
2409	return scsi_change_queue_depth(sdev, qdepth);
2410}
2411
2412static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag,
2413			     unsigned int lun, u8 func)
2414{
2415	struct srp_target_port *target = ch->target;
2416	struct srp_rport *rport = target->rport;
2417	struct ib_device *dev = target->srp_host->srp_dev->dev;
2418	struct srp_iu *iu;
2419	struct srp_tsk_mgmt *tsk_mgmt;
2420
2421	if (!ch->connected || target->qp_in_error)
2422		return -1;
2423
2424	init_completion(&ch->tsk_mgmt_done);
2425
2426	/*
2427	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2428	 * invoked while a task management function is being sent.
2429	 */
2430	mutex_lock(&rport->mutex);
2431	spin_lock_irq(&ch->lock);
2432	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2433	spin_unlock_irq(&ch->lock);
2434
2435	if (!iu) {
2436		mutex_unlock(&rport->mutex);
2437
2438		return -1;
2439	}
2440
2441	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2442				   DMA_TO_DEVICE);
2443	tsk_mgmt = iu->buf;
2444	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2445
2446	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2447	tsk_mgmt->lun		= cpu_to_be64((u64) lun << 48);
2448	tsk_mgmt->tag		= req_tag | SRP_TAG_TSK_MGMT;
2449	tsk_mgmt->tsk_mgmt_func = func;
2450	tsk_mgmt->task_tag	= req_tag;
2451
2452	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2453				      DMA_TO_DEVICE);
2454	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2455		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2456		mutex_unlock(&rport->mutex);
2457
2458		return -1;
2459	}
2460	mutex_unlock(&rport->mutex);
2461
2462	if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2463					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2464		return -1;
2465
2466	return 0;
2467}
2468
2469static int srp_abort(struct scsi_cmnd *scmnd)
2470{
2471	struct srp_target_port *target = host_to_target(scmnd->device->host);
2472	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2473	u32 tag;
2474	u16 ch_idx;
2475	struct srp_rdma_ch *ch;
2476	int ret;
2477
2478	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2479
2480	if (!req)
2481		return SUCCESS;
2482	tag = blk_mq_unique_tag(scmnd->request);
2483	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2484	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2485		return SUCCESS;
2486	ch = &target->ch[ch_idx];
2487	if (!srp_claim_req(ch, req, NULL, scmnd))
2488		return SUCCESS;
2489	shost_printk(KERN_ERR, target->scsi_host,
2490		     "Sending SRP abort for tag %#x\n", tag);
2491	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2492			      SRP_TSK_ABORT_TASK) == 0)
2493		ret = SUCCESS;
2494	else if (target->rport->state == SRP_RPORT_LOST)
2495		ret = FAST_IO_FAIL;
2496	else
2497		ret = FAILED;
2498	srp_free_req(ch, req, scmnd, 0);
2499	scmnd->result = DID_ABORT << 16;
2500	scmnd->scsi_done(scmnd);
2501
2502	return ret;
2503}
2504
2505static int srp_reset_device(struct scsi_cmnd *scmnd)
2506{
2507	struct srp_target_port *target = host_to_target(scmnd->device->host);
2508	struct srp_rdma_ch *ch;
2509	int i;
2510
2511	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2512
2513	ch = &target->ch[0];
2514	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2515			      SRP_TSK_LUN_RESET))
2516		return FAILED;
2517	if (ch->tsk_mgmt_status)
2518		return FAILED;
2519
2520	for (i = 0; i < target->ch_count; i++) {
2521		ch = &target->ch[i];
2522		for (i = 0; i < target->req_ring_size; ++i) {
2523			struct srp_request *req = &ch->req_ring[i];
2524
2525			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2526		}
2527	}
2528
2529	return SUCCESS;
2530}
2531
2532static int srp_reset_host(struct scsi_cmnd *scmnd)
2533{
2534	struct srp_target_port *target = host_to_target(scmnd->device->host);
2535
2536	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2537
2538	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2539}
2540
2541static int srp_slave_configure(struct scsi_device *sdev)
2542{
2543	struct Scsi_Host *shost = sdev->host;
2544	struct srp_target_port *target = host_to_target(shost);
2545	struct request_queue *q = sdev->request_queue;
2546	unsigned long timeout;
2547
2548	if (sdev->type == TYPE_DISK) {
2549		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2550		blk_queue_rq_timeout(q, timeout);
2551	}
2552
2553	return 0;
2554}
2555
2556static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2557			   char *buf)
2558{
2559	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2560
2561	return sprintf(buf, "0x%016llx\n",
2562		       (unsigned long long) be64_to_cpu(target->id_ext));
2563}
2564
2565static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2566			     char *buf)
2567{
2568	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2569
2570	return sprintf(buf, "0x%016llx\n",
2571		       (unsigned long long) be64_to_cpu(target->ioc_guid));
2572}
2573
2574static ssize_t show_service_id(struct device *dev,
2575			       struct device_attribute *attr, char *buf)
2576{
2577	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2578
2579	return sprintf(buf, "0x%016llx\n",
2580		       (unsigned long long) be64_to_cpu(target->service_id));
2581}
2582
2583static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2584			 char *buf)
2585{
2586	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2587
2588	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2589}
2590
2591static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2592			 char *buf)
2593{
2594	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2595
2596	return sprintf(buf, "%pI6\n", target->sgid.raw);
2597}
2598
2599static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2600			 char *buf)
2601{
2602	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2603	struct srp_rdma_ch *ch = &target->ch[0];
2604
2605	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2606}
2607
2608static ssize_t show_orig_dgid(struct device *dev,
2609			      struct device_attribute *attr, char *buf)
2610{
2611	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2612
2613	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2614}
2615
2616static ssize_t show_req_lim(struct device *dev,
2617			    struct device_attribute *attr, char *buf)
2618{
2619	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2620	struct srp_rdma_ch *ch;
2621	int i, req_lim = INT_MAX;
2622
2623	for (i = 0; i < target->ch_count; i++) {
2624		ch = &target->ch[i];
2625		req_lim = min(req_lim, ch->req_lim);
2626	}
2627	return sprintf(buf, "%d\n", req_lim);
2628}
2629
2630static ssize_t show_zero_req_lim(struct device *dev,
2631				 struct device_attribute *attr, char *buf)
2632{
2633	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2634
2635	return sprintf(buf, "%d\n", target->zero_req_lim);
2636}
2637
2638static ssize_t show_local_ib_port(struct device *dev,
2639				  struct device_attribute *attr, char *buf)
2640{
2641	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2642
2643	return sprintf(buf, "%d\n", target->srp_host->port);
2644}
2645
2646static ssize_t show_local_ib_device(struct device *dev,
2647				    struct device_attribute *attr, char *buf)
2648{
2649	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2650
2651	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2652}
2653
2654static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2655			     char *buf)
2656{
2657	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2658
2659	return sprintf(buf, "%d\n", target->ch_count);
2660}
2661
2662static ssize_t show_comp_vector(struct device *dev,
2663				struct device_attribute *attr, char *buf)
2664{
2665	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2666
2667	return sprintf(buf, "%d\n", target->comp_vector);
2668}
2669
2670static ssize_t show_tl_retry_count(struct device *dev,
2671				   struct device_attribute *attr, char *buf)
2672{
2673	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2674
2675	return sprintf(buf, "%d\n", target->tl_retry_count);
2676}
2677
2678static ssize_t show_cmd_sg_entries(struct device *dev,
2679				   struct device_attribute *attr, char *buf)
2680{
2681	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2682
2683	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2684}
2685
2686static ssize_t show_allow_ext_sg(struct device *dev,
2687				 struct device_attribute *attr, char *buf)
2688{
2689	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2690
2691	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2692}
2693
2694static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2695static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2696static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2697static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2698static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2699static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2700static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2701static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2702static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2703static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2704static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2705static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2706static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2707static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2708static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2709static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2710
2711static struct device_attribute *srp_host_attrs[] = {
2712	&dev_attr_id_ext,
2713	&dev_attr_ioc_guid,
2714	&dev_attr_service_id,
2715	&dev_attr_pkey,
2716	&dev_attr_sgid,
2717	&dev_attr_dgid,
2718	&dev_attr_orig_dgid,
2719	&dev_attr_req_lim,
2720	&dev_attr_zero_req_lim,
2721	&dev_attr_local_ib_port,
2722	&dev_attr_local_ib_device,
2723	&dev_attr_ch_count,
2724	&dev_attr_comp_vector,
2725	&dev_attr_tl_retry_count,
2726	&dev_attr_cmd_sg_entries,
2727	&dev_attr_allow_ext_sg,
2728	NULL
2729};
2730
2731static struct scsi_host_template srp_template = {
2732	.module				= THIS_MODULE,
2733	.name				= "InfiniBand SRP initiator",
2734	.proc_name			= DRV_NAME,
2735	.slave_configure		= srp_slave_configure,
2736	.info				= srp_target_info,
2737	.queuecommand			= srp_queuecommand,
2738	.change_queue_depth             = srp_change_queue_depth,
2739	.eh_abort_handler		= srp_abort,
2740	.eh_device_reset_handler	= srp_reset_device,
2741	.eh_host_reset_handler		= srp_reset_host,
2742	.skip_settle_delay		= true,
2743	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2744	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2745	.this_id			= -1,
2746	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2747	.use_clustering			= ENABLE_CLUSTERING,
2748	.shost_attrs			= srp_host_attrs,
2749	.use_blk_tags			= 1,
2750	.track_queue_depth		= 1,
2751};
2752
2753static int srp_sdev_count(struct Scsi_Host *host)
2754{
2755	struct scsi_device *sdev;
2756	int c = 0;
2757
2758	shost_for_each_device(sdev, host)
2759		c++;
2760
2761	return c;
2762}
2763
2764/*
2765 * Return values:
2766 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2767 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2768 *    removal has been scheduled.
2769 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2770 */
2771static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2772{
2773	struct srp_rport_identifiers ids;
2774	struct srp_rport *rport;
2775
2776	target->state = SRP_TARGET_SCANNING;
2777	sprintf(target->target_name, "SRP.T10:%016llX",
2778		 (unsigned long long) be64_to_cpu(target->id_ext));
2779
2780	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2781		return -ENODEV;
2782
2783	memcpy(ids.port_id, &target->id_ext, 8);
2784	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2785	ids.roles = SRP_RPORT_ROLE_TARGET;
2786	rport = srp_rport_add(target->scsi_host, &ids);
2787	if (IS_ERR(rport)) {
2788		scsi_remove_host(target->scsi_host);
2789		return PTR_ERR(rport);
2790	}
2791
2792	rport->lld_data = target;
2793	target->rport = rport;
2794
2795	spin_lock(&host->target_lock);
2796	list_add_tail(&target->list, &host->target_list);
2797	spin_unlock(&host->target_lock);
2798
2799	scsi_scan_target(&target->scsi_host->shost_gendev,
2800			 0, target->scsi_id, SCAN_WILD_CARD, 0);
2801
2802	if (srp_connected_ch(target) < target->ch_count ||
2803	    target->qp_in_error) {
2804		shost_printk(KERN_INFO, target->scsi_host,
2805			     PFX "SCSI scan failed - removing SCSI host\n");
2806		srp_queue_remove_work(target);
2807		goto out;
2808	}
2809
2810	pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2811		 dev_name(&target->scsi_host->shost_gendev),
2812		 srp_sdev_count(target->scsi_host));
2813
2814	spin_lock_irq(&target->lock);
2815	if (target->state == SRP_TARGET_SCANNING)
2816		target->state = SRP_TARGET_LIVE;
2817	spin_unlock_irq(&target->lock);
2818
2819out:
2820	return 0;
2821}
2822
2823static void srp_release_dev(struct device *dev)
2824{
2825	struct srp_host *host =
2826		container_of(dev, struct srp_host, dev);
2827
2828	complete(&host->released);
2829}
2830
2831static struct class srp_class = {
2832	.name    = "infiniband_srp",
2833	.dev_release = srp_release_dev
2834};
2835
2836/**
2837 * srp_conn_unique() - check whether the connection to a target is unique
2838 * @host:   SRP host.
2839 * @target: SRP target port.
2840 */
2841static bool srp_conn_unique(struct srp_host *host,
2842			    struct srp_target_port *target)
2843{
2844	struct srp_target_port *t;
2845	bool ret = false;
2846
2847	if (target->state == SRP_TARGET_REMOVED)
2848		goto out;
2849
2850	ret = true;
2851
2852	spin_lock(&host->target_lock);
2853	list_for_each_entry(t, &host->target_list, list) {
2854		if (t != target &&
2855		    target->id_ext == t->id_ext &&
2856		    target->ioc_guid == t->ioc_guid &&
2857		    target->initiator_ext == t->initiator_ext) {
2858			ret = false;
2859			break;
2860		}
2861	}
2862	spin_unlock(&host->target_lock);
2863
2864out:
2865	return ret;
2866}
2867
2868/*
2869 * Target ports are added by writing
2870 *
2871 *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2872 *     pkey=<P_Key>,service_id=<service ID>
2873 *
2874 * to the add_target sysfs attribute.
2875 */
2876enum {
2877	SRP_OPT_ERR		= 0,
2878	SRP_OPT_ID_EXT		= 1 << 0,
2879	SRP_OPT_IOC_GUID	= 1 << 1,
2880	SRP_OPT_DGID		= 1 << 2,
2881	SRP_OPT_PKEY		= 1 << 3,
2882	SRP_OPT_SERVICE_ID	= 1 << 4,
2883	SRP_OPT_MAX_SECT	= 1 << 5,
2884	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
2885	SRP_OPT_IO_CLASS	= 1 << 7,
2886	SRP_OPT_INITIATOR_EXT	= 1 << 8,
2887	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
2888	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
2889	SRP_OPT_SG_TABLESIZE	= 1 << 11,
2890	SRP_OPT_COMP_VECTOR	= 1 << 12,
2891	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
2892	SRP_OPT_QUEUE_SIZE	= 1 << 14,
2893	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
2894				   SRP_OPT_IOC_GUID	|
2895				   SRP_OPT_DGID		|
2896				   SRP_OPT_PKEY		|
2897				   SRP_OPT_SERVICE_ID),
2898};
2899
2900static const match_table_t srp_opt_tokens = {
2901	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
2902	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
2903	{ SRP_OPT_DGID,			"dgid=%s" 		},
2904	{ SRP_OPT_PKEY,			"pkey=%x" 		},
2905	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
2906	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
2907	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
2908	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
2909	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
2910	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
2911	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
2912	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
2913	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
2914	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
2915	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
2916	{ SRP_OPT_ERR,			NULL 			}
2917};
2918
2919static int srp_parse_options(const char *buf, struct srp_target_port *target)
2920{
2921	char *options, *sep_opt;
2922	char *p;
2923	char dgid[3];
2924	substring_t args[MAX_OPT_ARGS];
2925	int opt_mask = 0;
2926	int token;
2927	int ret = -EINVAL;
2928	int i;
2929
2930	options = kstrdup(buf, GFP_KERNEL);
2931	if (!options)
2932		return -ENOMEM;
2933
2934	sep_opt = options;
2935	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2936		if (!*p)
2937			continue;
2938
2939		token = match_token(p, srp_opt_tokens, args);
2940		opt_mask |= token;
2941
2942		switch (token) {
2943		case SRP_OPT_ID_EXT:
2944			p = match_strdup(args);
2945			if (!p) {
2946				ret = -ENOMEM;
2947				goto out;
2948			}
2949			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2950			kfree(p);
2951			break;
2952
2953		case SRP_OPT_IOC_GUID:
2954			p = match_strdup(args);
2955			if (!p) {
2956				ret = -ENOMEM;
2957				goto out;
2958			}
2959			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2960			kfree(p);
2961			break;
2962
2963		case SRP_OPT_DGID:
2964			p = match_strdup(args);
2965			if (!p) {
2966				ret = -ENOMEM;
2967				goto out;
2968			}
2969			if (strlen(p) != 32) {
2970				pr_warn("bad dest GID parameter '%s'\n", p);
2971				kfree(p);
2972				goto out;
2973			}
2974
2975			for (i = 0; i < 16; ++i) {
2976				strlcpy(dgid, p + i * 2, sizeof(dgid));
2977				if (sscanf(dgid, "%hhx",
2978					   &target->orig_dgid.raw[i]) < 1) {
2979					ret = -EINVAL;
2980					kfree(p);
2981					goto out;
2982				}
2983			}
2984			kfree(p);
2985			break;
2986
2987		case SRP_OPT_PKEY:
2988			if (match_hex(args, &token)) {
2989				pr_warn("bad P_Key parameter '%s'\n", p);
2990				goto out;
2991			}
2992			target->pkey = cpu_to_be16(token);
2993			break;
2994
2995		case SRP_OPT_SERVICE_ID:
2996			p = match_strdup(args);
2997			if (!p) {
2998				ret = -ENOMEM;
2999				goto out;
3000			}
3001			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3002			kfree(p);
3003			break;
3004
3005		case SRP_OPT_MAX_SECT:
3006			if (match_int(args, &token)) {
3007				pr_warn("bad max sect parameter '%s'\n", p);
3008				goto out;
3009			}
3010			target->scsi_host->max_sectors = token;
3011			break;
3012
3013		case SRP_OPT_QUEUE_SIZE:
3014			if (match_int(args, &token) || token < 1) {
3015				pr_warn("bad queue_size parameter '%s'\n", p);
3016				goto out;
3017			}
3018			target->scsi_host->can_queue = token;
3019			target->queue_size = token + SRP_RSP_SQ_SIZE +
3020					     SRP_TSK_MGMT_SQ_SIZE;
3021			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3022				target->scsi_host->cmd_per_lun = token;
3023			break;
3024
3025		case SRP_OPT_MAX_CMD_PER_LUN:
3026			if (match_int(args, &token) || token < 1) {
3027				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3028					p);
3029				goto out;
3030			}
3031			target->scsi_host->cmd_per_lun = token;
3032			break;
3033
3034		case SRP_OPT_IO_CLASS:
3035			if (match_hex(args, &token)) {
3036				pr_warn("bad IO class parameter '%s'\n", p);
3037				goto out;
3038			}
3039			if (token != SRP_REV10_IB_IO_CLASS &&
3040			    token != SRP_REV16A_IB_IO_CLASS) {
3041				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3042					token, SRP_REV10_IB_IO_CLASS,
3043					SRP_REV16A_IB_IO_CLASS);
3044				goto out;
3045			}
3046			target->io_class = token;
3047			break;
3048
3049		case SRP_OPT_INITIATOR_EXT:
3050			p = match_strdup(args);
3051			if (!p) {
3052				ret = -ENOMEM;
3053				goto out;
3054			}
3055			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3056			kfree(p);
3057			break;
3058
3059		case SRP_OPT_CMD_SG_ENTRIES:
3060			if (match_int(args, &token) || token < 1 || token > 255) {
3061				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3062					p);
3063				goto out;
3064			}
3065			target->cmd_sg_cnt = token;
3066			break;
3067
3068		case SRP_OPT_ALLOW_EXT_SG:
3069			if (match_int(args, &token)) {
3070				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3071				goto out;
3072			}
3073			target->allow_ext_sg = !!token;
3074			break;
3075
3076		case SRP_OPT_SG_TABLESIZE:
3077			if (match_int(args, &token) || token < 1 ||
3078					token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3079				pr_warn("bad max sg_tablesize parameter '%s'\n",
3080					p);
3081				goto out;
3082			}
3083			target->sg_tablesize = token;
3084			break;
3085
3086		case SRP_OPT_COMP_VECTOR:
3087			if (match_int(args, &token) || token < 0) {
3088				pr_warn("bad comp_vector parameter '%s'\n", p);
3089				goto out;
3090			}
3091			target->comp_vector = token;
3092			break;
3093
3094		case SRP_OPT_TL_RETRY_COUNT:
3095			if (match_int(args, &token) || token < 2 || token > 7) {
3096				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3097					p);
3098				goto out;
3099			}
3100			target->tl_retry_count = token;
3101			break;
3102
3103		default:
3104			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3105				p);
3106			goto out;
3107		}
3108	}
3109
3110	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3111		ret = 0;
3112	else
3113		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3114			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3115			    !(srp_opt_tokens[i].token & opt_mask))
3116				pr_warn("target creation request is missing parameter '%s'\n",
3117					srp_opt_tokens[i].pattern);
3118
3119	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3120	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3121		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3122			target->scsi_host->cmd_per_lun,
3123			target->scsi_host->can_queue);
3124
3125out:
3126	kfree(options);
3127	return ret;
3128}
3129
3130static ssize_t srp_create_target(struct device *dev,
3131				 struct device_attribute *attr,
3132				 const char *buf, size_t count)
3133{
3134	struct srp_host *host =
3135		container_of(dev, struct srp_host, dev);
3136	struct Scsi_Host *target_host;
3137	struct srp_target_port *target;
3138	struct srp_rdma_ch *ch;
3139	struct srp_device *srp_dev = host->srp_dev;
3140	struct ib_device *ibdev = srp_dev->dev;
3141	int ret, node_idx, node, cpu, i;
3142	bool multich = false;
3143
3144	target_host = scsi_host_alloc(&srp_template,
3145				      sizeof (struct srp_target_port));
3146	if (!target_host)
3147		return -ENOMEM;
3148
3149	target_host->transportt  = ib_srp_transport_template;
3150	target_host->max_channel = 0;
3151	target_host->max_id      = 1;
3152	target_host->max_lun     = SRP_MAX_LUN;
3153	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3154
3155	target = host_to_target(target_host);
3156
3157	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3158	target->scsi_host	= target_host;
3159	target->srp_host	= host;
3160	target->lkey		= host->srp_dev->mr->lkey;
3161	target->rkey		= host->srp_dev->mr->rkey;
3162	target->cmd_sg_cnt	= cmd_sg_entries;
3163	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3164	target->allow_ext_sg	= allow_ext_sg;
3165	target->tl_retry_count	= 7;
3166	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3167
3168	/*
3169	 * Avoid that the SCSI host can be removed by srp_remove_target()
3170	 * before this function returns.
3171	 */
3172	scsi_host_get(target->scsi_host);
3173
3174	mutex_lock(&host->add_target_mutex);
3175
3176	ret = srp_parse_options(buf, target);
3177	if (ret)
3178		goto out;
3179
3180	ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3181	if (ret)
3182		goto out;
3183
3184	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3185
3186	if (!srp_conn_unique(target->srp_host, target)) {
3187		shost_printk(KERN_INFO, target->scsi_host,
3188			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3189			     be64_to_cpu(target->id_ext),
3190			     be64_to_cpu(target->ioc_guid),
3191			     be64_to_cpu(target->initiator_ext));
3192		ret = -EEXIST;
3193		goto out;
3194	}
3195
3196	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3197	    target->cmd_sg_cnt < target->sg_tablesize) {
3198		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3199		target->sg_tablesize = target->cmd_sg_cnt;
3200	}
3201
3202	target_host->sg_tablesize = target->sg_tablesize;
3203	target->indirect_size = target->sg_tablesize *
3204				sizeof (struct srp_direct_buf);
3205	target->max_iu_len = sizeof (struct srp_cmd) +
3206			     sizeof (struct srp_indirect_buf) +
3207			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3208
3209	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3210	INIT_WORK(&target->remove_work, srp_remove_work);
3211	spin_lock_init(&target->lock);
3212	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
3213	if (ret)
3214		goto out;
3215
3216	ret = -ENOMEM;
3217	target->ch_count = max_t(unsigned, num_online_nodes(),
3218				 min(ch_count ? :
3219				     min(4 * num_online_nodes(),
3220					 ibdev->num_comp_vectors),
3221				     num_online_cpus()));
3222	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3223			     GFP_KERNEL);
3224	if (!target->ch)
3225		goto out;
3226
3227	node_idx = 0;
3228	for_each_online_node(node) {
3229		const int ch_start = (node_idx * target->ch_count /
3230				      num_online_nodes());
3231		const int ch_end = ((node_idx + 1) * target->ch_count /
3232				    num_online_nodes());
3233		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3234				      num_online_nodes() + target->comp_vector)
3235				     % ibdev->num_comp_vectors;
3236		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3237				    num_online_nodes() + target->comp_vector)
3238				   % ibdev->num_comp_vectors;
3239		int cpu_idx = 0;
3240
3241		for_each_online_cpu(cpu) {
3242			if (cpu_to_node(cpu) != node)
3243				continue;
3244			if (ch_start + cpu_idx >= ch_end)
3245				continue;
3246			ch = &target->ch[ch_start + cpu_idx];
3247			ch->target = target;
3248			ch->comp_vector = cv_start == cv_end ? cv_start :
3249				cv_start + cpu_idx % (cv_end - cv_start);
3250			spin_lock_init(&ch->lock);
3251			INIT_LIST_HEAD(&ch->free_tx);
3252			ret = srp_new_cm_id(ch);
3253			if (ret)
3254				goto err_disconnect;
3255
3256			ret = srp_create_ch_ib(ch);
3257			if (ret)
3258				goto err_disconnect;
3259
3260			ret = srp_alloc_req_data(ch);
3261			if (ret)
3262				goto err_disconnect;
3263
3264			ret = srp_connect_ch(ch, multich);
3265			if (ret) {
3266				shost_printk(KERN_ERR, target->scsi_host,
3267					     PFX "Connection %d/%d failed\n",
3268					     ch_start + cpu_idx,
3269					     target->ch_count);
3270				if (node_idx == 0 && cpu_idx == 0) {
3271					goto err_disconnect;
3272				} else {
3273					srp_free_ch_ib(target, ch);
3274					srp_free_req_data(target, ch);
3275					target->ch_count = ch - target->ch;
3276					goto connected;
3277				}
3278			}
3279
3280			multich = true;
3281			cpu_idx++;
3282		}
3283		node_idx++;
3284	}
3285
3286connected:
3287	target->scsi_host->nr_hw_queues = target->ch_count;
3288
3289	ret = srp_add_target(host, target);
3290	if (ret)
3291		goto err_disconnect;
3292
3293	if (target->state != SRP_TARGET_REMOVED) {
3294		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3295			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3296			     be64_to_cpu(target->id_ext),
3297			     be64_to_cpu(target->ioc_guid),
3298			     be16_to_cpu(target->pkey),
3299			     be64_to_cpu(target->service_id),
3300			     target->sgid.raw, target->orig_dgid.raw);
3301	}
3302
3303	ret = count;
3304
3305out:
3306	mutex_unlock(&host->add_target_mutex);
3307
3308	scsi_host_put(target->scsi_host);
3309	if (ret < 0)
3310		scsi_host_put(target->scsi_host);
3311
3312	return ret;
3313
3314err_disconnect:
3315	srp_disconnect_target(target);
3316
3317	for (i = 0; i < target->ch_count; i++) {
3318		ch = &target->ch[i];
3319		srp_free_ch_ib(target, ch);
3320		srp_free_req_data(target, ch);
3321	}
3322
3323	kfree(target->ch);
3324	goto out;
3325}
3326
3327static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3328
3329static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3330			  char *buf)
3331{
3332	struct srp_host *host = container_of(dev, struct srp_host, dev);
3333
3334	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3335}
3336
3337static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3338
3339static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3340			 char *buf)
3341{
3342	struct srp_host *host = container_of(dev, struct srp_host, dev);
3343
3344	return sprintf(buf, "%d\n", host->port);
3345}
3346
3347static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3348
3349static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3350{
3351	struct srp_host *host;
3352
3353	host = kzalloc(sizeof *host, GFP_KERNEL);
3354	if (!host)
3355		return NULL;
3356
3357	INIT_LIST_HEAD(&host->target_list);
3358	spin_lock_init(&host->target_lock);
3359	init_completion(&host->released);
3360	mutex_init(&host->add_target_mutex);
3361	host->srp_dev = device;
3362	host->port = port;
3363
3364	host->dev.class = &srp_class;
3365	host->dev.parent = device->dev->dma_device;
3366	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3367
3368	if (device_register(&host->dev))
3369		goto free_host;
3370	if (device_create_file(&host->dev, &dev_attr_add_target))
3371		goto err_class;
3372	if (device_create_file(&host->dev, &dev_attr_ibdev))
3373		goto err_class;
3374	if (device_create_file(&host->dev, &dev_attr_port))
3375		goto err_class;
3376
3377	return host;
3378
3379err_class:
3380	device_unregister(&host->dev);
3381
3382free_host:
3383	kfree(host);
3384
3385	return NULL;
3386}
3387
3388static void srp_add_one(struct ib_device *device)
3389{
3390	struct srp_device *srp_dev;
3391	struct ib_device_attr *dev_attr;
3392	struct srp_host *host;
3393	int mr_page_shift, s, e, p;
3394	u64 max_pages_per_mr;
3395
3396	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3397	if (!dev_attr)
3398		return;
3399
3400	if (ib_query_device(device, dev_attr)) {
3401		pr_warn("Query device failed for %s\n", device->name);
3402		goto free_attr;
3403	}
3404
3405	srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3406	if (!srp_dev)
3407		goto free_attr;
3408
3409	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3410			    device->map_phys_fmr && device->unmap_fmr);
3411	srp_dev->has_fr = (dev_attr->device_cap_flags &
3412			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3413	if (!srp_dev->has_fmr && !srp_dev->has_fr)
3414		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3415
3416	srp_dev->use_fast_reg = (srp_dev->has_fr &&
3417				 (!srp_dev->has_fmr || prefer_fr));
3418
3419	/*
3420	 * Use the smallest page size supported by the HCA, down to a
3421	 * minimum of 4096 bytes. We're unlikely to build large sglists
3422	 * out of smaller entries.
3423	 */
3424	mr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1);
3425	srp_dev->mr_page_size	= 1 << mr_page_shift;
3426	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3427	max_pages_per_mr	= dev_attr->max_mr_size;
3428	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3429	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3430					  max_pages_per_mr);
3431	if (srp_dev->use_fast_reg) {
3432		srp_dev->max_pages_per_mr =
3433			min_t(u32, srp_dev->max_pages_per_mr,
3434			      dev_attr->max_fast_reg_page_list_len);
3435	}
3436	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3437				   srp_dev->max_pages_per_mr;
3438	pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3439		 device->name, mr_page_shift, dev_attr->max_mr_size,
3440		 dev_attr->max_fast_reg_page_list_len,
3441		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3442
3443	INIT_LIST_HEAD(&srp_dev->dev_list);
3444
3445	srp_dev->dev = device;
3446	srp_dev->pd  = ib_alloc_pd(device);
3447	if (IS_ERR(srp_dev->pd))
3448		goto free_dev;
3449
3450	srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
3451				    IB_ACCESS_LOCAL_WRITE |
3452				    IB_ACCESS_REMOTE_READ |
3453				    IB_ACCESS_REMOTE_WRITE);
3454	if (IS_ERR(srp_dev->mr))
3455		goto err_pd;
3456
3457	if (device->node_type == RDMA_NODE_IB_SWITCH) {
3458		s = 0;
3459		e = 0;
3460	} else {
3461		s = 1;
3462		e = device->phys_port_cnt;
3463	}
3464
3465	for (p = s; p <= e; ++p) {
3466		host = srp_add_port(srp_dev, p);
3467		if (host)
3468			list_add_tail(&host->list, &srp_dev->dev_list);
3469	}
3470
3471	ib_set_client_data(device, &srp_client, srp_dev);
3472
3473	goto free_attr;
3474
3475err_pd:
3476	ib_dealloc_pd(srp_dev->pd);
3477
3478free_dev:
3479	kfree(srp_dev);
3480
3481free_attr:
3482	kfree(dev_attr);
3483}
3484
3485static void srp_remove_one(struct ib_device *device)
3486{
3487	struct srp_device *srp_dev;
3488	struct srp_host *host, *tmp_host;
3489	struct srp_target_port *target;
3490
3491	srp_dev = ib_get_client_data(device, &srp_client);
3492	if (!srp_dev)
3493		return;
3494
3495	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3496		device_unregister(&host->dev);
3497		/*
3498		 * Wait for the sysfs entry to go away, so that no new
3499		 * target ports can be created.
3500		 */
3501		wait_for_completion(&host->released);
3502
3503		/*
3504		 * Remove all target ports.
3505		 */
3506		spin_lock(&host->target_lock);
3507		list_for_each_entry(target, &host->target_list, list)
3508			srp_queue_remove_work(target);
3509		spin_unlock(&host->target_lock);
3510
3511		/*
3512		 * Wait for tl_err and target port removal tasks.
3513		 */
3514		flush_workqueue(system_long_wq);
3515		flush_workqueue(srp_remove_wq);
3516
3517		kfree(host);
3518	}
3519
3520	ib_dereg_mr(srp_dev->mr);
3521	ib_dealloc_pd(srp_dev->pd);
3522
3523	kfree(srp_dev);
3524}
3525
3526static struct srp_function_template ib_srp_transport_functions = {
3527	.has_rport_state	 = true,
3528	.reset_timer_if_blocked	 = true,
3529	.reconnect_delay	 = &srp_reconnect_delay,
3530	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3531	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3532	.reconnect		 = srp_rport_reconnect,
3533	.rport_delete		 = srp_rport_delete,
3534	.terminate_rport_io	 = srp_terminate_io,
3535};
3536
3537static int __init srp_init_module(void)
3538{
3539	int ret;
3540
3541	BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3542
3543	if (srp_sg_tablesize) {
3544		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3545		if (!cmd_sg_entries)
3546			cmd_sg_entries = srp_sg_tablesize;
3547	}
3548
3549	if (!cmd_sg_entries)
3550		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3551
3552	if (cmd_sg_entries > 255) {
3553		pr_warn("Clamping cmd_sg_entries to 255\n");
3554		cmd_sg_entries = 255;
3555	}
3556
3557	if (!indirect_sg_entries)
3558		indirect_sg_entries = cmd_sg_entries;
3559	else if (indirect_sg_entries < cmd_sg_entries) {
3560		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3561			cmd_sg_entries);
3562		indirect_sg_entries = cmd_sg_entries;
3563	}
3564
3565	srp_remove_wq = create_workqueue("srp_remove");
3566	if (!srp_remove_wq) {
3567		ret = -ENOMEM;
3568		goto out;
3569	}
3570
3571	ret = -ENOMEM;
3572	ib_srp_transport_template =
3573		srp_attach_transport(&ib_srp_transport_functions);
3574	if (!ib_srp_transport_template)
3575		goto destroy_wq;
3576
3577	ret = class_register(&srp_class);
3578	if (ret) {
3579		pr_err("couldn't register class infiniband_srp\n");
3580		goto release_tr;
3581	}
3582
3583	ib_sa_register_client(&srp_sa_client);
3584
3585	ret = ib_register_client(&srp_client);
3586	if (ret) {
3587		pr_err("couldn't register IB client\n");
3588		goto unreg_sa;
3589	}
3590
3591out:
3592	return ret;
3593
3594unreg_sa:
3595	ib_sa_unregister_client(&srp_sa_client);
3596	class_unregister(&srp_class);
3597
3598release_tr:
3599	srp_release_transport(ib_srp_transport_template);
3600
3601destroy_wq:
3602	destroy_workqueue(srp_remove_wq);
3603	goto out;
3604}
3605
3606static void __exit srp_cleanup_module(void)
3607{
3608	ib_unregister_client(&srp_client);
3609	ib_sa_unregister_client(&srp_sa_client);
3610	class_unregister(&srp_class);
3611	srp_release_transport(ib_srp_transport_template);
3612	destroy_workqueue(srp_remove_wq);
3613}
3614
3615module_init(srp_init_module);
3616module_exit(srp_cleanup_module);
3617