1/*
2 * libcxgbi.c: Chelsio common library for T3/T4 iSCSI driver.
3 *
4 * Copyright (c) 2010 Chelsio Communications, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation.
9 *
10 * Written by: Karen Xie (kxie@chelsio.com)
11 * Written by: Rakesh Ranjan (rranjan@chelsio.com)
12 */
13
14#define pr_fmt(fmt)	KBUILD_MODNAME ":%s: " fmt, __func__
15
16#include <linux/skbuff.h>
17#include <linux/crypto.h>
18#include <linux/scatterlist.h>
19#include <linux/pci.h>
20#include <scsi/scsi.h>
21#include <scsi/scsi_cmnd.h>
22#include <scsi/scsi_host.h>
23#include <linux/if_vlan.h>
24#include <linux/inet.h>
25#include <net/dst.h>
26#include <net/route.h>
27#include <net/ipv6.h>
28#include <net/ip6_route.h>
29#include <net/addrconf.h>
30
31#include <linux/inetdevice.h>	/* ip_dev_find */
32#include <linux/module.h>
33#include <net/tcp.h>
34
35static unsigned int dbg_level;
36
37#include "libcxgbi.h"
38
39#define DRV_MODULE_NAME		"libcxgbi"
40#define DRV_MODULE_DESC		"Chelsio iSCSI driver library"
41#define DRV_MODULE_VERSION	"0.9.0"
42#define DRV_MODULE_RELDATE	"Jun. 2010"
43
44MODULE_AUTHOR("Chelsio Communications, Inc.");
45MODULE_DESCRIPTION(DRV_MODULE_DESC);
46MODULE_VERSION(DRV_MODULE_VERSION);
47MODULE_LICENSE("GPL");
48
49module_param(dbg_level, uint, 0644);
50MODULE_PARM_DESC(dbg_level, "libiscsi debug level (default=0)");
51
52
53/*
54 * cxgbi device management
55 * maintains a list of the cxgbi devices
56 */
57static LIST_HEAD(cdev_list);
58static DEFINE_MUTEX(cdev_mutex);
59
60static LIST_HEAD(cdev_rcu_list);
61static DEFINE_SPINLOCK(cdev_rcu_lock);
62
63int cxgbi_device_portmap_create(struct cxgbi_device *cdev, unsigned int base,
64				unsigned int max_conn)
65{
66	struct cxgbi_ports_map *pmap = &cdev->pmap;
67
68	pmap->port_csk = cxgbi_alloc_big_mem(max_conn *
69					     sizeof(struct cxgbi_sock *),
70					     GFP_KERNEL);
71	if (!pmap->port_csk) {
72		pr_warn("cdev 0x%p, portmap OOM %u.\n", cdev, max_conn);
73		return -ENOMEM;
74	}
75
76	pmap->max_connect = max_conn;
77	pmap->sport_base = base;
78	spin_lock_init(&pmap->lock);
79	return 0;
80}
81EXPORT_SYMBOL_GPL(cxgbi_device_portmap_create);
82
83void cxgbi_device_portmap_cleanup(struct cxgbi_device *cdev)
84{
85	struct cxgbi_ports_map *pmap = &cdev->pmap;
86	struct cxgbi_sock *csk;
87	int i;
88
89	for (i = 0; i < pmap->max_connect; i++) {
90		if (pmap->port_csk[i]) {
91			csk = pmap->port_csk[i];
92			pmap->port_csk[i] = NULL;
93			log_debug(1 << CXGBI_DBG_SOCK,
94				"csk 0x%p, cdev 0x%p, offload down.\n",
95				csk, cdev);
96			spin_lock_bh(&csk->lock);
97			cxgbi_sock_set_flag(csk, CTPF_OFFLOAD_DOWN);
98			cxgbi_sock_closed(csk);
99			spin_unlock_bh(&csk->lock);
100			cxgbi_sock_put(csk);
101		}
102	}
103}
104EXPORT_SYMBOL_GPL(cxgbi_device_portmap_cleanup);
105
106static inline void cxgbi_device_destroy(struct cxgbi_device *cdev)
107{
108	log_debug(1 << CXGBI_DBG_DEV,
109		"cdev 0x%p, p# %u.\n", cdev, cdev->nports);
110	cxgbi_hbas_remove(cdev);
111	cxgbi_device_portmap_cleanup(cdev);
112	if (cdev->dev_ddp_cleanup)
113		cdev->dev_ddp_cleanup(cdev);
114	else
115		cxgbi_ddp_cleanup(cdev);
116	if (cdev->ddp)
117		cxgbi_ddp_cleanup(cdev);
118	if (cdev->pmap.max_connect)
119		cxgbi_free_big_mem(cdev->pmap.port_csk);
120	kfree(cdev);
121}
122
123struct cxgbi_device *cxgbi_device_register(unsigned int extra,
124					   unsigned int nports)
125{
126	struct cxgbi_device *cdev;
127
128	cdev = kzalloc(sizeof(*cdev) + extra + nports *
129			(sizeof(struct cxgbi_hba *) +
130			 sizeof(struct net_device *)),
131			GFP_KERNEL);
132	if (!cdev) {
133		pr_warn("nport %d, OOM.\n", nports);
134		return NULL;
135	}
136	cdev->ports = (struct net_device **)(cdev + 1);
137	cdev->hbas = (struct cxgbi_hba **)(((char*)cdev->ports) + nports *
138						sizeof(struct net_device *));
139	if (extra)
140		cdev->dd_data = ((char *)cdev->hbas) +
141				nports * sizeof(struct cxgbi_hba *);
142	spin_lock_init(&cdev->pmap.lock);
143
144	mutex_lock(&cdev_mutex);
145	list_add_tail(&cdev->list_head, &cdev_list);
146	mutex_unlock(&cdev_mutex);
147
148	spin_lock(&cdev_rcu_lock);
149	list_add_tail_rcu(&cdev->rcu_node, &cdev_rcu_list);
150	spin_unlock(&cdev_rcu_lock);
151
152	log_debug(1 << CXGBI_DBG_DEV,
153		"cdev 0x%p, p# %u.\n", cdev, nports);
154	return cdev;
155}
156EXPORT_SYMBOL_GPL(cxgbi_device_register);
157
158void cxgbi_device_unregister(struct cxgbi_device *cdev)
159{
160	log_debug(1 << CXGBI_DBG_DEV,
161		"cdev 0x%p, p# %u,%s.\n",
162		cdev, cdev->nports, cdev->nports ? cdev->ports[0]->name : "");
163
164	mutex_lock(&cdev_mutex);
165	list_del(&cdev->list_head);
166	mutex_unlock(&cdev_mutex);
167
168	spin_lock(&cdev_rcu_lock);
169	list_del_rcu(&cdev->rcu_node);
170	spin_unlock(&cdev_rcu_lock);
171	synchronize_rcu();
172
173	cxgbi_device_destroy(cdev);
174}
175EXPORT_SYMBOL_GPL(cxgbi_device_unregister);
176
177void cxgbi_device_unregister_all(unsigned int flag)
178{
179	struct cxgbi_device *cdev, *tmp;
180
181	mutex_lock(&cdev_mutex);
182	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
183		if ((cdev->flags & flag) == flag) {
184			mutex_unlock(&cdev_mutex);
185			cxgbi_device_unregister(cdev);
186			mutex_lock(&cdev_mutex);
187		}
188	}
189	mutex_unlock(&cdev_mutex);
190}
191EXPORT_SYMBOL_GPL(cxgbi_device_unregister_all);
192
193struct cxgbi_device *cxgbi_device_find_by_lldev(void *lldev)
194{
195	struct cxgbi_device *cdev, *tmp;
196
197	mutex_lock(&cdev_mutex);
198	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
199		if (cdev->lldev == lldev) {
200			mutex_unlock(&cdev_mutex);
201			return cdev;
202		}
203	}
204	mutex_unlock(&cdev_mutex);
205
206	log_debug(1 << CXGBI_DBG_DEV,
207		"lldev 0x%p, NO match found.\n", lldev);
208	return NULL;
209}
210EXPORT_SYMBOL_GPL(cxgbi_device_find_by_lldev);
211
212struct cxgbi_device *cxgbi_device_find_by_netdev(struct net_device *ndev,
213						 int *port)
214{
215	struct net_device *vdev = NULL;
216	struct cxgbi_device *cdev, *tmp;
217	int i;
218
219	if (ndev->priv_flags & IFF_802_1Q_VLAN) {
220		vdev = ndev;
221		ndev = vlan_dev_real_dev(ndev);
222		log_debug(1 << CXGBI_DBG_DEV,
223			"vlan dev %s -> %s.\n", vdev->name, ndev->name);
224	}
225
226	mutex_lock(&cdev_mutex);
227	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
228		for (i = 0; i < cdev->nports; i++) {
229			if (ndev == cdev->ports[i]) {
230				cdev->hbas[i]->vdev = vdev;
231				mutex_unlock(&cdev_mutex);
232				if (port)
233					*port = i;
234				return cdev;
235			}
236		}
237	}
238	mutex_unlock(&cdev_mutex);
239	log_debug(1 << CXGBI_DBG_DEV,
240		"ndev 0x%p, %s, NO match found.\n", ndev, ndev->name);
241	return NULL;
242}
243EXPORT_SYMBOL_GPL(cxgbi_device_find_by_netdev);
244
245struct cxgbi_device *cxgbi_device_find_by_netdev_rcu(struct net_device *ndev,
246						     int *port)
247{
248	struct net_device *vdev = NULL;
249	struct cxgbi_device *cdev;
250	int i;
251
252	if (ndev->priv_flags & IFF_802_1Q_VLAN) {
253		vdev = ndev;
254		ndev = vlan_dev_real_dev(ndev);
255		pr_info("vlan dev %s -> %s.\n", vdev->name, ndev->name);
256	}
257
258	rcu_read_lock();
259	list_for_each_entry_rcu(cdev, &cdev_rcu_list, rcu_node) {
260		for (i = 0; i < cdev->nports; i++) {
261			if (ndev == cdev->ports[i]) {
262				cdev->hbas[i]->vdev = vdev;
263				rcu_read_unlock();
264				if (port)
265					*port = i;
266				return cdev;
267			}
268		}
269	}
270	rcu_read_unlock();
271
272	log_debug(1 << CXGBI_DBG_DEV,
273		  "ndev 0x%p, %s, NO match found.\n", ndev, ndev->name);
274	return NULL;
275}
276EXPORT_SYMBOL_GPL(cxgbi_device_find_by_netdev_rcu);
277
278#if IS_ENABLED(CONFIG_IPV6)
279static struct cxgbi_device *cxgbi_device_find_by_mac(struct net_device *ndev,
280						     int *port)
281{
282	struct net_device *vdev = NULL;
283	struct cxgbi_device *cdev, *tmp;
284	int i;
285
286	if (ndev->priv_flags & IFF_802_1Q_VLAN) {
287		vdev = ndev;
288		ndev = vlan_dev_real_dev(ndev);
289		pr_info("vlan dev %s -> %s.\n", vdev->name, ndev->name);
290	}
291
292	mutex_lock(&cdev_mutex);
293	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
294		for (i = 0; i < cdev->nports; i++) {
295			if (!memcmp(ndev->dev_addr, cdev->ports[i]->dev_addr,
296				    MAX_ADDR_LEN)) {
297				cdev->hbas[i]->vdev = vdev;
298				mutex_unlock(&cdev_mutex);
299				if (port)
300					*port = i;
301				return cdev;
302			}
303		}
304	}
305	mutex_unlock(&cdev_mutex);
306	log_debug(1 << CXGBI_DBG_DEV,
307		  "ndev 0x%p, %s, NO match mac found.\n",
308		  ndev, ndev->name);
309	return NULL;
310}
311#endif
312
313void cxgbi_hbas_remove(struct cxgbi_device *cdev)
314{
315	int i;
316	struct cxgbi_hba *chba;
317
318	log_debug(1 << CXGBI_DBG_DEV,
319		"cdev 0x%p, p#%u.\n", cdev, cdev->nports);
320
321	for (i = 0; i < cdev->nports; i++) {
322		chba = cdev->hbas[i];
323		if (chba) {
324			cdev->hbas[i] = NULL;
325			iscsi_host_remove(chba->shost);
326			pci_dev_put(cdev->pdev);
327			iscsi_host_free(chba->shost);
328		}
329	}
330}
331EXPORT_SYMBOL_GPL(cxgbi_hbas_remove);
332
333int cxgbi_hbas_add(struct cxgbi_device *cdev, u64 max_lun,
334		unsigned int max_id, struct scsi_host_template *sht,
335		struct scsi_transport_template *stt)
336{
337	struct cxgbi_hba *chba;
338	struct Scsi_Host *shost;
339	int i, err;
340
341	log_debug(1 << CXGBI_DBG_DEV, "cdev 0x%p, p#%u.\n", cdev, cdev->nports);
342
343	for (i = 0; i < cdev->nports; i++) {
344		shost = iscsi_host_alloc(sht, sizeof(*chba), 1);
345		if (!shost) {
346			pr_info("0x%p, p%d, %s, host alloc failed.\n",
347				cdev, i, cdev->ports[i]->name);
348			err = -ENOMEM;
349			goto err_out;
350		}
351
352		shost->transportt = stt;
353		shost->max_lun = max_lun;
354		shost->max_id = max_id;
355		shost->max_channel = 0;
356		shost->max_cmd_len = 16;
357
358		chba = iscsi_host_priv(shost);
359		chba->cdev = cdev;
360		chba->ndev = cdev->ports[i];
361		chba->shost = shost;
362
363		log_debug(1 << CXGBI_DBG_DEV,
364			"cdev 0x%p, p#%d %s: chba 0x%p.\n",
365			cdev, i, cdev->ports[i]->name, chba);
366
367		pci_dev_get(cdev->pdev);
368		err = iscsi_host_add(shost, &cdev->pdev->dev);
369		if (err) {
370			pr_info("cdev 0x%p, p#%d %s, host add failed.\n",
371				cdev, i, cdev->ports[i]->name);
372			pci_dev_put(cdev->pdev);
373			scsi_host_put(shost);
374			goto  err_out;
375		}
376
377		cdev->hbas[i] = chba;
378	}
379
380	return 0;
381
382err_out:
383	cxgbi_hbas_remove(cdev);
384	return err;
385}
386EXPORT_SYMBOL_GPL(cxgbi_hbas_add);
387
388/*
389 * iSCSI offload
390 *
391 * - source port management
392 *   To find a free source port in the port allocation map we use a very simple
393 *   rotor scheme to look for the next free port.
394 *
395 *   If a source port has been specified make sure that it doesn't collide with
396 *   our normal source port allocation map.  If it's outside the range of our
397 *   allocation/deallocation scheme just let them use it.
398 *
399 *   If the source port is outside our allocation range, the caller is
400 *   responsible for keeping track of their port usage.
401 */
402
403static struct cxgbi_sock *find_sock_on_port(struct cxgbi_device *cdev,
404					    unsigned char port_id)
405{
406	struct cxgbi_ports_map *pmap = &cdev->pmap;
407	unsigned int i;
408	unsigned int used;
409
410	if (!pmap->max_connect || !pmap->used)
411		return NULL;
412
413	spin_lock_bh(&pmap->lock);
414	used = pmap->used;
415	for (i = 0; used && i < pmap->max_connect; i++) {
416		struct cxgbi_sock *csk = pmap->port_csk[i];
417
418		if (csk) {
419			if (csk->port_id == port_id) {
420				spin_unlock_bh(&pmap->lock);
421				return csk;
422			}
423			used--;
424		}
425	}
426	spin_unlock_bh(&pmap->lock);
427
428	return NULL;
429}
430
431static int sock_get_port(struct cxgbi_sock *csk)
432{
433	struct cxgbi_device *cdev = csk->cdev;
434	struct cxgbi_ports_map *pmap = &cdev->pmap;
435	unsigned int start;
436	int idx;
437	__be16 *port;
438
439	if (!pmap->max_connect) {
440		pr_err("cdev 0x%p, p#%u %s, NO port map.\n",
441			   cdev, csk->port_id, cdev->ports[csk->port_id]->name);
442		return -EADDRNOTAVAIL;
443	}
444
445	if (csk->csk_family == AF_INET)
446		port = &csk->saddr.sin_port;
447	else /* ipv6 */
448		port = &csk->saddr6.sin6_port;
449
450	if (*port) {
451		pr_err("source port NON-ZERO %u.\n",
452			ntohs(*port));
453		return -EADDRINUSE;
454	}
455
456	spin_lock_bh(&pmap->lock);
457	if (pmap->used >= pmap->max_connect) {
458		spin_unlock_bh(&pmap->lock);
459		pr_info("cdev 0x%p, p#%u %s, ALL ports used.\n",
460			cdev, csk->port_id, cdev->ports[csk->port_id]->name);
461		return -EADDRNOTAVAIL;
462	}
463
464	start = idx = pmap->next;
465	do {
466		if (++idx >= pmap->max_connect)
467			idx = 0;
468		if (!pmap->port_csk[idx]) {
469			pmap->used++;
470			*port = htons(pmap->sport_base + idx);
471			pmap->next = idx;
472			pmap->port_csk[idx] = csk;
473			spin_unlock_bh(&pmap->lock);
474			cxgbi_sock_get(csk);
475			log_debug(1 << CXGBI_DBG_SOCK,
476				"cdev 0x%p, p#%u %s, p %u, %u.\n",
477				cdev, csk->port_id,
478				cdev->ports[csk->port_id]->name,
479				pmap->sport_base + idx, pmap->next);
480			return 0;
481		}
482	} while (idx != start);
483	spin_unlock_bh(&pmap->lock);
484
485	/* should not happen */
486	pr_warn("cdev 0x%p, p#%u %s, next %u?\n",
487		cdev, csk->port_id, cdev->ports[csk->port_id]->name,
488		pmap->next);
489	return -EADDRNOTAVAIL;
490}
491
492static void sock_put_port(struct cxgbi_sock *csk)
493{
494	struct cxgbi_device *cdev = csk->cdev;
495	struct cxgbi_ports_map *pmap = &cdev->pmap;
496	__be16 *port;
497
498	if (csk->csk_family == AF_INET)
499		port = &csk->saddr.sin_port;
500	else /* ipv6 */
501		port = &csk->saddr6.sin6_port;
502
503	if (*port) {
504		int idx = ntohs(*port) - pmap->sport_base;
505
506		*port = 0;
507		if (idx < 0 || idx >= pmap->max_connect) {
508			pr_err("cdev 0x%p, p#%u %s, port %u OOR.\n",
509				cdev, csk->port_id,
510				cdev->ports[csk->port_id]->name,
511				ntohs(*port));
512			return;
513		}
514
515		spin_lock_bh(&pmap->lock);
516		pmap->port_csk[idx] = NULL;
517		pmap->used--;
518		spin_unlock_bh(&pmap->lock);
519
520		log_debug(1 << CXGBI_DBG_SOCK,
521			"cdev 0x%p, p#%u %s, release %u.\n",
522			cdev, csk->port_id, cdev->ports[csk->port_id]->name,
523			pmap->sport_base + idx);
524
525		cxgbi_sock_put(csk);
526	}
527}
528
529/*
530 * iscsi tcp connection
531 */
532void cxgbi_sock_free_cpl_skbs(struct cxgbi_sock *csk)
533{
534	if (csk->cpl_close) {
535		kfree_skb(csk->cpl_close);
536		csk->cpl_close = NULL;
537	}
538	if (csk->cpl_abort_req) {
539		kfree_skb(csk->cpl_abort_req);
540		csk->cpl_abort_req = NULL;
541	}
542	if (csk->cpl_abort_rpl) {
543		kfree_skb(csk->cpl_abort_rpl);
544		csk->cpl_abort_rpl = NULL;
545	}
546}
547EXPORT_SYMBOL_GPL(cxgbi_sock_free_cpl_skbs);
548
549static struct cxgbi_sock *cxgbi_sock_create(struct cxgbi_device *cdev)
550{
551	struct cxgbi_sock *csk = kzalloc(sizeof(*csk), GFP_NOIO);
552
553	if (!csk) {
554		pr_info("alloc csk %zu failed.\n", sizeof(*csk));
555		return NULL;
556	}
557
558	if (cdev->csk_alloc_cpls(csk) < 0) {
559		pr_info("csk 0x%p, alloc cpls failed.\n", csk);
560		kfree(csk);
561		return NULL;
562	}
563
564	spin_lock_init(&csk->lock);
565	kref_init(&csk->refcnt);
566	skb_queue_head_init(&csk->receive_queue);
567	skb_queue_head_init(&csk->write_queue);
568	setup_timer(&csk->retry_timer, NULL, (unsigned long)csk);
569	rwlock_init(&csk->callback_lock);
570	csk->cdev = cdev;
571	csk->flags = 0;
572	cxgbi_sock_set_state(csk, CTP_CLOSED);
573
574	log_debug(1 << CXGBI_DBG_SOCK, "cdev 0x%p, new csk 0x%p.\n", cdev, csk);
575
576	return csk;
577}
578
579static struct rtable *find_route_ipv4(struct flowi4 *fl4,
580				      __be32 saddr, __be32 daddr,
581				      __be16 sport, __be16 dport, u8 tos)
582{
583	struct rtable *rt;
584
585	rt = ip_route_output_ports(&init_net, fl4, NULL, daddr, saddr,
586				   dport, sport, IPPROTO_TCP, tos, 0);
587	if (IS_ERR(rt))
588		return NULL;
589
590	return rt;
591}
592
593static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
594{
595	struct sockaddr_in *daddr = (struct sockaddr_in *)dst_addr;
596	struct dst_entry *dst;
597	struct net_device *ndev;
598	struct cxgbi_device *cdev;
599	struct rtable *rt = NULL;
600	struct neighbour *n;
601	struct flowi4 fl4;
602	struct cxgbi_sock *csk = NULL;
603	unsigned int mtu = 0;
604	int port = 0xFFFF;
605	int err = 0;
606
607	rt = find_route_ipv4(&fl4, 0, daddr->sin_addr.s_addr, 0, daddr->sin_port, 0);
608	if (!rt) {
609		pr_info("no route to ipv4 0x%x, port %u.\n",
610			be32_to_cpu(daddr->sin_addr.s_addr),
611			be16_to_cpu(daddr->sin_port));
612		err = -ENETUNREACH;
613		goto err_out;
614	}
615	dst = &rt->dst;
616	n = dst_neigh_lookup(dst, &daddr->sin_addr.s_addr);
617	if (!n) {
618		err = -ENODEV;
619		goto rel_rt;
620	}
621	ndev = n->dev;
622
623	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
624		pr_info("multi-cast route %pI4, port %u, dev %s.\n",
625			&daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
626			ndev->name);
627		err = -ENETUNREACH;
628		goto rel_neigh;
629	}
630
631	if (ndev->flags & IFF_LOOPBACK) {
632		ndev = ip_dev_find(&init_net, daddr->sin_addr.s_addr);
633		mtu = ndev->mtu;
634		pr_info("rt dev %s, loopback -> %s, mtu %u.\n",
635			n->dev->name, ndev->name, mtu);
636	}
637
638	cdev = cxgbi_device_find_by_netdev(ndev, &port);
639	if (!cdev) {
640		pr_info("dst %pI4, %s, NOT cxgbi device.\n",
641			&daddr->sin_addr.s_addr, ndev->name);
642		err = -ENETUNREACH;
643		goto rel_neigh;
644	}
645	log_debug(1 << CXGBI_DBG_SOCK,
646		"route to %pI4 :%u, ndev p#%d,%s, cdev 0x%p.\n",
647		&daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
648			   port, ndev->name, cdev);
649
650	csk = cxgbi_sock_create(cdev);
651	if (!csk) {
652		err = -ENOMEM;
653		goto rel_neigh;
654	}
655	csk->cdev = cdev;
656	csk->port_id = port;
657	csk->mtu = mtu;
658	csk->dst = dst;
659
660	csk->csk_family = AF_INET;
661	csk->daddr.sin_addr.s_addr = daddr->sin_addr.s_addr;
662	csk->daddr.sin_port = daddr->sin_port;
663	csk->daddr.sin_family = daddr->sin_family;
664	csk->saddr.sin_family = daddr->sin_family;
665	csk->saddr.sin_addr.s_addr = fl4.saddr;
666	neigh_release(n);
667
668	return csk;
669
670rel_neigh:
671	neigh_release(n);
672
673rel_rt:
674	ip_rt_put(rt);
675	if (csk)
676		cxgbi_sock_closed(csk);
677err_out:
678	return ERR_PTR(err);
679}
680
681#if IS_ENABLED(CONFIG_IPV6)
682static struct rt6_info *find_route_ipv6(const struct in6_addr *saddr,
683					const struct in6_addr *daddr)
684{
685	struct flowi6 fl;
686
687	if (saddr)
688		memcpy(&fl.saddr, saddr, sizeof(struct in6_addr));
689	if (daddr)
690		memcpy(&fl.daddr, daddr, sizeof(struct in6_addr));
691	return (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
692}
693
694static struct cxgbi_sock *cxgbi_check_route6(struct sockaddr *dst_addr)
695{
696	struct sockaddr_in6 *daddr6 = (struct sockaddr_in6 *)dst_addr;
697	struct dst_entry *dst;
698	struct net_device *ndev;
699	struct cxgbi_device *cdev;
700	struct rt6_info *rt = NULL;
701	struct neighbour *n;
702	struct in6_addr pref_saddr;
703	struct cxgbi_sock *csk = NULL;
704	unsigned int mtu = 0;
705	int port = 0xFFFF;
706	int err = 0;
707
708	rt = find_route_ipv6(NULL, &daddr6->sin6_addr);
709
710	if (!rt) {
711		pr_info("no route to ipv6 %pI6 port %u\n",
712			daddr6->sin6_addr.s6_addr,
713			be16_to_cpu(daddr6->sin6_port));
714		err = -ENETUNREACH;
715		goto err_out;
716	}
717
718	dst = &rt->dst;
719
720	n = dst_neigh_lookup(dst, &daddr6->sin6_addr);
721
722	if (!n) {
723		pr_info("%pI6, port %u, dst no neighbour.\n",
724			daddr6->sin6_addr.s6_addr,
725			be16_to_cpu(daddr6->sin6_port));
726		err = -ENETUNREACH;
727		goto rel_rt;
728	}
729	ndev = n->dev;
730
731	if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
732		pr_info("multi-cast route %pI6 port %u, dev %s.\n",
733			daddr6->sin6_addr.s6_addr,
734			ntohs(daddr6->sin6_port), ndev->name);
735		err = -ENETUNREACH;
736		goto rel_rt;
737	}
738
739	cdev = cxgbi_device_find_by_netdev(ndev, &port);
740	if (!cdev)
741		cdev = cxgbi_device_find_by_mac(ndev, &port);
742	if (!cdev) {
743		pr_info("dst %pI6 %s, NOT cxgbi device.\n",
744			daddr6->sin6_addr.s6_addr, ndev->name);
745		err = -ENETUNREACH;
746		goto rel_rt;
747	}
748	log_debug(1 << CXGBI_DBG_SOCK,
749		  "route to %pI6 :%u, ndev p#%d,%s, cdev 0x%p.\n",
750		  daddr6->sin6_addr.s6_addr, ntohs(daddr6->sin6_port), port,
751		  ndev->name, cdev);
752
753	csk = cxgbi_sock_create(cdev);
754	if (!csk) {
755		err = -ENOMEM;
756		goto rel_rt;
757	}
758	csk->cdev = cdev;
759	csk->port_id = port;
760	csk->mtu = mtu;
761	csk->dst = dst;
762
763	if (ipv6_addr_any(&rt->rt6i_prefsrc.addr)) {
764		struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
765
766		err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
767					 &daddr6->sin6_addr, 0, &pref_saddr);
768		if (err) {
769			pr_info("failed to get source address to reach %pI6\n",
770				&daddr6->sin6_addr);
771			goto rel_rt;
772		}
773	} else {
774		pref_saddr = rt->rt6i_prefsrc.addr;
775	}
776
777	csk->csk_family = AF_INET6;
778	csk->daddr6.sin6_addr = daddr6->sin6_addr;
779	csk->daddr6.sin6_port = daddr6->sin6_port;
780	csk->daddr6.sin6_family = daddr6->sin6_family;
781	csk->saddr6.sin6_family = daddr6->sin6_family;
782	csk->saddr6.sin6_addr = pref_saddr;
783
784	neigh_release(n);
785	return csk;
786
787rel_rt:
788	if (n)
789		neigh_release(n);
790
791	ip6_rt_put(rt);
792	if (csk)
793		cxgbi_sock_closed(csk);
794err_out:
795	return ERR_PTR(err);
796}
797#endif /* IS_ENABLED(CONFIG_IPV6) */
798
799void cxgbi_sock_established(struct cxgbi_sock *csk, unsigned int snd_isn,
800			unsigned int opt)
801{
802	csk->write_seq = csk->snd_nxt = csk->snd_una = snd_isn;
803	dst_confirm(csk->dst);
804	smp_mb();
805	cxgbi_sock_set_state(csk, CTP_ESTABLISHED);
806}
807EXPORT_SYMBOL_GPL(cxgbi_sock_established);
808
809static void cxgbi_inform_iscsi_conn_closing(struct cxgbi_sock *csk)
810{
811	log_debug(1 << CXGBI_DBG_SOCK,
812		"csk 0x%p, state %u, flags 0x%lx, conn 0x%p.\n",
813		csk, csk->state, csk->flags, csk->user_data);
814
815	if (csk->state != CTP_ESTABLISHED) {
816		read_lock_bh(&csk->callback_lock);
817		if (csk->user_data)
818			iscsi_conn_failure(csk->user_data,
819					ISCSI_ERR_TCP_CONN_CLOSE);
820		read_unlock_bh(&csk->callback_lock);
821	}
822}
823
824void cxgbi_sock_closed(struct cxgbi_sock *csk)
825{
826	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
827		csk, (csk)->state, (csk)->flags, (csk)->tid);
828	cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED);
829	if (csk->state == CTP_ACTIVE_OPEN || csk->state == CTP_CLOSED)
830		return;
831	if (csk->saddr.sin_port)
832		sock_put_port(csk);
833	if (csk->dst)
834		dst_release(csk->dst);
835	csk->cdev->csk_release_offload_resources(csk);
836	cxgbi_sock_set_state(csk, CTP_CLOSED);
837	cxgbi_inform_iscsi_conn_closing(csk);
838	cxgbi_sock_put(csk);
839}
840EXPORT_SYMBOL_GPL(cxgbi_sock_closed);
841
842static void need_active_close(struct cxgbi_sock *csk)
843{
844	int data_lost;
845	int close_req = 0;
846
847	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
848		csk, (csk)->state, (csk)->flags, (csk)->tid);
849	spin_lock_bh(&csk->lock);
850	dst_confirm(csk->dst);
851	data_lost = skb_queue_len(&csk->receive_queue);
852	__skb_queue_purge(&csk->receive_queue);
853
854	if (csk->state == CTP_ACTIVE_OPEN)
855		cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED);
856	else if (csk->state == CTP_ESTABLISHED) {
857		close_req = 1;
858		cxgbi_sock_set_state(csk, CTP_ACTIVE_CLOSE);
859	} else if (csk->state == CTP_PASSIVE_CLOSE) {
860		close_req = 1;
861		cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2);
862	}
863
864	if (close_req) {
865		if (data_lost)
866			csk->cdev->csk_send_abort_req(csk);
867		else
868			csk->cdev->csk_send_close_req(csk);
869	}
870
871	spin_unlock_bh(&csk->lock);
872}
873
874void cxgbi_sock_fail_act_open(struct cxgbi_sock *csk, int errno)
875{
876	pr_info("csk 0x%p,%u,%lx, %pI4:%u-%pI4:%u, err %d.\n",
877			csk, csk->state, csk->flags,
878			&csk->saddr.sin_addr.s_addr, csk->saddr.sin_port,
879			&csk->daddr.sin_addr.s_addr, csk->daddr.sin_port,
880			errno);
881
882	cxgbi_sock_set_state(csk, CTP_CONNECTING);
883	csk->err = errno;
884	cxgbi_sock_closed(csk);
885}
886EXPORT_SYMBOL_GPL(cxgbi_sock_fail_act_open);
887
888void cxgbi_sock_act_open_req_arp_failure(void *handle, struct sk_buff *skb)
889{
890	struct cxgbi_sock *csk = (struct cxgbi_sock *)skb->sk;
891
892	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
893		csk, (csk)->state, (csk)->flags, (csk)->tid);
894	cxgbi_sock_get(csk);
895	spin_lock_bh(&csk->lock);
896	if (csk->state == CTP_ACTIVE_OPEN)
897		cxgbi_sock_fail_act_open(csk, -EHOSTUNREACH);
898	spin_unlock_bh(&csk->lock);
899	cxgbi_sock_put(csk);
900	__kfree_skb(skb);
901}
902EXPORT_SYMBOL_GPL(cxgbi_sock_act_open_req_arp_failure);
903
904void cxgbi_sock_rcv_abort_rpl(struct cxgbi_sock *csk)
905{
906	cxgbi_sock_get(csk);
907	spin_lock_bh(&csk->lock);
908
909	cxgbi_sock_set_flag(csk, CTPF_ABORT_RPL_RCVD);
910	if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING)) {
911		cxgbi_sock_clear_flag(csk, CTPF_ABORT_RPL_PENDING);
912		if (cxgbi_sock_flag(csk, CTPF_ABORT_REQ_RCVD))
913			pr_err("csk 0x%p,%u,0x%lx,%u,ABT_RPL_RSS.\n",
914			       csk, csk->state, csk->flags, csk->tid);
915		cxgbi_sock_closed(csk);
916	}
917
918	spin_unlock_bh(&csk->lock);
919	cxgbi_sock_put(csk);
920}
921EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_abort_rpl);
922
923void cxgbi_sock_rcv_peer_close(struct cxgbi_sock *csk)
924{
925	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
926		csk, (csk)->state, (csk)->flags, (csk)->tid);
927	cxgbi_sock_get(csk);
928	spin_lock_bh(&csk->lock);
929
930	if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING))
931		goto done;
932
933	switch (csk->state) {
934	case CTP_ESTABLISHED:
935		cxgbi_sock_set_state(csk, CTP_PASSIVE_CLOSE);
936		break;
937	case CTP_ACTIVE_CLOSE:
938		cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2);
939		break;
940	case CTP_CLOSE_WAIT_1:
941		cxgbi_sock_closed(csk);
942		break;
943	case CTP_ABORTING:
944		break;
945	default:
946		pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n",
947			csk, csk->state, csk->flags, csk->tid);
948	}
949	cxgbi_inform_iscsi_conn_closing(csk);
950done:
951	spin_unlock_bh(&csk->lock);
952	cxgbi_sock_put(csk);
953}
954EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_peer_close);
955
956void cxgbi_sock_rcv_close_conn_rpl(struct cxgbi_sock *csk, u32 snd_nxt)
957{
958	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
959		csk, (csk)->state, (csk)->flags, (csk)->tid);
960	cxgbi_sock_get(csk);
961	spin_lock_bh(&csk->lock);
962
963	csk->snd_una = snd_nxt - 1;
964	if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING))
965		goto done;
966
967	switch (csk->state) {
968	case CTP_ACTIVE_CLOSE:
969		cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_1);
970		break;
971	case CTP_CLOSE_WAIT_1:
972	case CTP_CLOSE_WAIT_2:
973		cxgbi_sock_closed(csk);
974		break;
975	case CTP_ABORTING:
976		break;
977	default:
978		pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n",
979			csk, csk->state, csk->flags, csk->tid);
980	}
981done:
982	spin_unlock_bh(&csk->lock);
983	cxgbi_sock_put(csk);
984}
985EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_close_conn_rpl);
986
987void cxgbi_sock_rcv_wr_ack(struct cxgbi_sock *csk, unsigned int credits,
988			   unsigned int snd_una, int seq_chk)
989{
990	log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK,
991			"csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, snd_una %u,%d.\n",
992			csk, csk->state, csk->flags, csk->tid, credits,
993			csk->wr_cred, csk->wr_una_cred, snd_una, seq_chk);
994
995	spin_lock_bh(&csk->lock);
996
997	csk->wr_cred += credits;
998	if (csk->wr_una_cred > csk->wr_max_cred - csk->wr_cred)
999		csk->wr_una_cred = csk->wr_max_cred - csk->wr_cred;
1000
1001	while (credits) {
1002		struct sk_buff *p = cxgbi_sock_peek_wr(csk);
1003
1004		if (unlikely(!p)) {
1005			pr_err("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, empty.\n",
1006				csk, csk->state, csk->flags, csk->tid, credits,
1007				csk->wr_cred, csk->wr_una_cred);
1008			break;
1009		}
1010
1011		if (unlikely(credits < p->csum)) {
1012			pr_warn("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, < %u.\n",
1013				csk, csk->state, csk->flags, csk->tid,
1014				credits, csk->wr_cred, csk->wr_una_cred,
1015				p->csum);
1016			p->csum -= credits;
1017			break;
1018		} else {
1019			cxgbi_sock_dequeue_wr(csk);
1020			credits -= p->csum;
1021			kfree_skb(p);
1022		}
1023	}
1024
1025	cxgbi_sock_check_wr_invariants(csk);
1026
1027	if (seq_chk) {
1028		if (unlikely(before(snd_una, csk->snd_una))) {
1029			pr_warn("csk 0x%p,%u,0x%lx,%u, snd_una %u/%u.",
1030				csk, csk->state, csk->flags, csk->tid, snd_una,
1031				csk->snd_una);
1032			goto done;
1033		}
1034
1035		if (csk->snd_una != snd_una) {
1036			csk->snd_una = snd_una;
1037			dst_confirm(csk->dst);
1038		}
1039	}
1040
1041	if (skb_queue_len(&csk->write_queue)) {
1042		if (csk->cdev->csk_push_tx_frames(csk, 0))
1043			cxgbi_conn_tx_open(csk);
1044	} else
1045		cxgbi_conn_tx_open(csk);
1046done:
1047	spin_unlock_bh(&csk->lock);
1048}
1049EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_wr_ack);
1050
1051static unsigned int cxgbi_sock_find_best_mtu(struct cxgbi_sock *csk,
1052					     unsigned short mtu)
1053{
1054	int i = 0;
1055
1056	while (i < csk->cdev->nmtus - 1 && csk->cdev->mtus[i + 1] <= mtu)
1057		++i;
1058
1059	return i;
1060}
1061
1062unsigned int cxgbi_sock_select_mss(struct cxgbi_sock *csk, unsigned int pmtu)
1063{
1064	unsigned int idx;
1065	struct dst_entry *dst = csk->dst;
1066
1067	csk->advmss = dst_metric_advmss(dst);
1068
1069	if (csk->advmss > pmtu - 40)
1070		csk->advmss = pmtu - 40;
1071	if (csk->advmss < csk->cdev->mtus[0] - 40)
1072		csk->advmss = csk->cdev->mtus[0] - 40;
1073	idx = cxgbi_sock_find_best_mtu(csk, csk->advmss + 40);
1074
1075	return idx;
1076}
1077EXPORT_SYMBOL_GPL(cxgbi_sock_select_mss);
1078
1079void cxgbi_sock_skb_entail(struct cxgbi_sock *csk, struct sk_buff *skb)
1080{
1081	cxgbi_skcb_tcp_seq(skb) = csk->write_seq;
1082	__skb_queue_tail(&csk->write_queue, skb);
1083}
1084EXPORT_SYMBOL_GPL(cxgbi_sock_skb_entail);
1085
1086void cxgbi_sock_purge_wr_queue(struct cxgbi_sock *csk)
1087{
1088	struct sk_buff *skb;
1089
1090	while ((skb = cxgbi_sock_dequeue_wr(csk)) != NULL)
1091		kfree_skb(skb);
1092}
1093EXPORT_SYMBOL_GPL(cxgbi_sock_purge_wr_queue);
1094
1095void cxgbi_sock_check_wr_invariants(const struct cxgbi_sock *csk)
1096{
1097	int pending = cxgbi_sock_count_pending_wrs(csk);
1098
1099	if (unlikely(csk->wr_cred + pending != csk->wr_max_cred))
1100		pr_err("csk 0x%p, tid %u, credit %u + %u != %u.\n",
1101			csk, csk->tid, csk->wr_cred, pending, csk->wr_max_cred);
1102}
1103EXPORT_SYMBOL_GPL(cxgbi_sock_check_wr_invariants);
1104
1105static int cxgbi_sock_send_pdus(struct cxgbi_sock *csk, struct sk_buff *skb)
1106{
1107	struct cxgbi_device *cdev = csk->cdev;
1108	struct sk_buff *next;
1109	int err, copied = 0;
1110
1111	spin_lock_bh(&csk->lock);
1112
1113	if (csk->state != CTP_ESTABLISHED) {
1114		log_debug(1 << CXGBI_DBG_PDU_TX,
1115			"csk 0x%p,%u,0x%lx,%u, EAGAIN.\n",
1116			csk, csk->state, csk->flags, csk->tid);
1117		err = -EAGAIN;
1118		goto out_err;
1119	}
1120
1121	if (csk->err) {
1122		log_debug(1 << CXGBI_DBG_PDU_TX,
1123			"csk 0x%p,%u,0x%lx,%u, EPIPE %d.\n",
1124			csk, csk->state, csk->flags, csk->tid, csk->err);
1125		err = -EPIPE;
1126		goto out_err;
1127	}
1128
1129	if (csk->write_seq - csk->snd_una >= cdev->snd_win) {
1130		log_debug(1 << CXGBI_DBG_PDU_TX,
1131			"csk 0x%p,%u,0x%lx,%u, FULL %u-%u >= %u.\n",
1132			csk, csk->state, csk->flags, csk->tid, csk->write_seq,
1133			csk->snd_una, cdev->snd_win);
1134		err = -ENOBUFS;
1135		goto out_err;
1136	}
1137
1138	while (skb) {
1139		int frags = skb_shinfo(skb)->nr_frags +
1140				(skb->len != skb->data_len);
1141
1142		if (unlikely(skb_headroom(skb) < cdev->skb_tx_rsvd)) {
1143			pr_err("csk 0x%p, skb head %u < %u.\n",
1144				csk, skb_headroom(skb), cdev->skb_tx_rsvd);
1145			err = -EINVAL;
1146			goto out_err;
1147		}
1148
1149		if (frags >= SKB_WR_LIST_SIZE) {
1150			pr_err("csk 0x%p, frags %d, %u,%u >%u.\n",
1151				csk, skb_shinfo(skb)->nr_frags, skb->len,
1152				skb->data_len, (uint)(SKB_WR_LIST_SIZE));
1153			err = -EINVAL;
1154			goto out_err;
1155		}
1156
1157		next = skb->next;
1158		skb->next = NULL;
1159		cxgbi_skcb_set_flag(skb, SKCBF_TX_NEED_HDR);
1160		cxgbi_sock_skb_entail(csk, skb);
1161		copied += skb->len;
1162		csk->write_seq += skb->len +
1163				cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb));
1164		skb = next;
1165	}
1166done:
1167	if (likely(skb_queue_len(&csk->write_queue)))
1168		cdev->csk_push_tx_frames(csk, 1);
1169	spin_unlock_bh(&csk->lock);
1170	return copied;
1171
1172out_err:
1173	if (copied == 0 && err == -EPIPE)
1174		copied = csk->err ? csk->err : -EPIPE;
1175	else
1176		copied = err;
1177	goto done;
1178}
1179
1180/*
1181 * Direct Data Placement -
1182 * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted
1183 * final destination host-memory buffers based on the Initiator Task Tag (ITT)
1184 * in Data-In or Target Task Tag (TTT) in Data-Out PDUs.
1185 * The host memory address is programmed into h/w in the format of pagepod
1186 * entries.
1187 * The location of the pagepod entry is encoded into ddp tag which is used as
1188 * the base for ITT/TTT.
1189 */
1190
1191static unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4};
1192static unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16};
1193static unsigned char page_idx = DDP_PGIDX_MAX;
1194
1195static unsigned char sw_tag_idx_bits;
1196static unsigned char sw_tag_age_bits;
1197
1198/*
1199 * Direct-Data Placement page size adjustment
1200 */
1201static int ddp_adjust_page_table(void)
1202{
1203	int i;
1204	unsigned int base_order, order;
1205
1206	if (PAGE_SIZE < (1UL << ddp_page_shift[0])) {
1207		pr_info("PAGE_SIZE 0x%lx too small, min 0x%lx\n",
1208			PAGE_SIZE, 1UL << ddp_page_shift[0]);
1209		return -EINVAL;
1210	}
1211
1212	base_order = get_order(1UL << ddp_page_shift[0]);
1213	order = get_order(1UL << PAGE_SHIFT);
1214
1215	for (i = 0; i < DDP_PGIDX_MAX; i++) {
1216		/* first is the kernel page size, then just doubling */
1217		ddp_page_order[i] = order - base_order + i;
1218		ddp_page_shift[i] = PAGE_SHIFT + i;
1219	}
1220	return 0;
1221}
1222
1223static int ddp_find_page_index(unsigned long pgsz)
1224{
1225	int i;
1226
1227	for (i = 0; i < DDP_PGIDX_MAX; i++) {
1228		if (pgsz == (1UL << ddp_page_shift[i]))
1229			return i;
1230	}
1231	pr_info("ddp page size %lu not supported.\n", pgsz);
1232	return DDP_PGIDX_MAX;
1233}
1234
1235static void ddp_setup_host_page_size(void)
1236{
1237	if (page_idx == DDP_PGIDX_MAX) {
1238		page_idx = ddp_find_page_index(PAGE_SIZE);
1239
1240		if (page_idx == DDP_PGIDX_MAX) {
1241			pr_info("system PAGE %lu, update hw.\n", PAGE_SIZE);
1242			if (ddp_adjust_page_table() < 0) {
1243				pr_info("PAGE %lu, disable ddp.\n", PAGE_SIZE);
1244				return;
1245			}
1246			page_idx = ddp_find_page_index(PAGE_SIZE);
1247		}
1248		pr_info("system PAGE %lu, ddp idx %u.\n", PAGE_SIZE, page_idx);
1249	}
1250}
1251
1252void cxgbi_ddp_page_size_factor(int *pgsz_factor)
1253{
1254	int i;
1255
1256	for (i = 0; i < DDP_PGIDX_MAX; i++)
1257		pgsz_factor[i] = ddp_page_order[i];
1258}
1259EXPORT_SYMBOL_GPL(cxgbi_ddp_page_size_factor);
1260
1261/*
1262 * DDP setup & teardown
1263 */
1264
1265void cxgbi_ddp_ppod_set(struct cxgbi_pagepod *ppod,
1266			struct cxgbi_pagepod_hdr *hdr,
1267			struct cxgbi_gather_list *gl, unsigned int gidx)
1268{
1269	int i;
1270
1271	memcpy(ppod, hdr, sizeof(*hdr));
1272	for (i = 0; i < (PPOD_PAGES_MAX + 1); i++, gidx++) {
1273		ppod->addr[i] = gidx < gl->nelem ?
1274				cpu_to_be64(gl->phys_addr[gidx]) : 0ULL;
1275	}
1276}
1277EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_set);
1278
1279void cxgbi_ddp_ppod_clear(struct cxgbi_pagepod *ppod)
1280{
1281	memset(ppod, 0, sizeof(*ppod));
1282}
1283EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_clear);
1284
1285static inline int ddp_find_unused_entries(struct cxgbi_ddp_info *ddp,
1286					unsigned int start, unsigned int max,
1287					unsigned int count,
1288					struct cxgbi_gather_list *gl)
1289{
1290	unsigned int i, j, k;
1291
1292	/*  not enough entries */
1293	if ((max - start) < count) {
1294		log_debug(1 << CXGBI_DBG_DDP,
1295			"NOT enough entries %u+%u < %u.\n", start, count, max);
1296		return -EBUSY;
1297	}
1298
1299	max -= count;
1300	spin_lock(&ddp->map_lock);
1301	for (i = start; i < max;) {
1302		for (j = 0, k = i; j < count; j++, k++) {
1303			if (ddp->gl_map[k])
1304				break;
1305		}
1306		if (j == count) {
1307			for (j = 0, k = i; j < count; j++, k++)
1308				ddp->gl_map[k] = gl;
1309			spin_unlock(&ddp->map_lock);
1310			return i;
1311		}
1312		i += j + 1;
1313	}
1314	spin_unlock(&ddp->map_lock);
1315	log_debug(1 << CXGBI_DBG_DDP,
1316		"NO suitable entries %u available.\n", count);
1317	return -EBUSY;
1318}
1319
1320static inline void ddp_unmark_entries(struct cxgbi_ddp_info *ddp,
1321						int start, int count)
1322{
1323	spin_lock(&ddp->map_lock);
1324	memset(&ddp->gl_map[start], 0,
1325		count * sizeof(struct cxgbi_gather_list *));
1326	spin_unlock(&ddp->map_lock);
1327}
1328
1329static inline void ddp_gl_unmap(struct pci_dev *pdev,
1330					struct cxgbi_gather_list *gl)
1331{
1332	int i;
1333
1334	for (i = 0; i < gl->nelem; i++)
1335		dma_unmap_page(&pdev->dev, gl->phys_addr[i], PAGE_SIZE,
1336				PCI_DMA_FROMDEVICE);
1337}
1338
1339static inline int ddp_gl_map(struct pci_dev *pdev,
1340				    struct cxgbi_gather_list *gl)
1341{
1342	int i;
1343
1344	for (i = 0; i < gl->nelem; i++) {
1345		gl->phys_addr[i] = dma_map_page(&pdev->dev, gl->pages[i], 0,
1346						PAGE_SIZE,
1347						PCI_DMA_FROMDEVICE);
1348		if (unlikely(dma_mapping_error(&pdev->dev, gl->phys_addr[i]))) {
1349			log_debug(1 << CXGBI_DBG_DDP,
1350				"page %d 0x%p, 0x%p dma mapping err.\n",
1351				i, gl->pages[i], pdev);
1352			goto unmap;
1353		}
1354	}
1355	return i;
1356unmap:
1357	if (i) {
1358		unsigned int nelem = gl->nelem;
1359
1360		gl->nelem = i;
1361		ddp_gl_unmap(pdev, gl);
1362		gl->nelem = nelem;
1363	}
1364	return -EINVAL;
1365}
1366
1367static void ddp_release_gl(struct cxgbi_gather_list *gl,
1368				  struct pci_dev *pdev)
1369{
1370	ddp_gl_unmap(pdev, gl);
1371	kfree(gl);
1372}
1373
1374static struct cxgbi_gather_list *ddp_make_gl(unsigned int xferlen,
1375						    struct scatterlist *sgl,
1376						    unsigned int sgcnt,
1377						    struct pci_dev *pdev,
1378						    gfp_t gfp)
1379{
1380	struct cxgbi_gather_list *gl;
1381	struct scatterlist *sg = sgl;
1382	struct page *sgpage = sg_page(sg);
1383	unsigned int sglen = sg->length;
1384	unsigned int sgoffset = sg->offset;
1385	unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >>
1386				PAGE_SHIFT;
1387	int i = 1, j = 0;
1388
1389	if (xferlen < DDP_THRESHOLD) {
1390		log_debug(1 << CXGBI_DBG_DDP,
1391			"xfer %u < threshold %u, no ddp.\n",
1392			xferlen, DDP_THRESHOLD);
1393		return NULL;
1394	}
1395
1396	gl = kzalloc(sizeof(struct cxgbi_gather_list) +
1397		     npages * (sizeof(dma_addr_t) +
1398		     sizeof(struct page *)), gfp);
1399	if (!gl) {
1400		log_debug(1 << CXGBI_DBG_DDP,
1401			"xfer %u, %u pages, OOM.\n", xferlen, npages);
1402		return NULL;
1403	}
1404
1405	 log_debug(1 << CXGBI_DBG_DDP,
1406		"xfer %u, sgl %u, gl max %u.\n", xferlen, sgcnt, npages);
1407
1408	gl->pages = (struct page **)&gl->phys_addr[npages];
1409	gl->nelem = npages;
1410	gl->length = xferlen;
1411	gl->offset = sgoffset;
1412	gl->pages[0] = sgpage;
1413
1414	for (i = 1, sg = sg_next(sgl), j = 0; i < sgcnt;
1415		i++, sg = sg_next(sg)) {
1416		struct page *page = sg_page(sg);
1417
1418		if (sgpage == page && sg->offset == sgoffset + sglen)
1419			sglen += sg->length;
1420		else {
1421			/*  make sure the sgl is fit for ddp:
1422			 *  each has the same page size, and
1423			 *  all of the middle pages are used completely
1424			 */
1425			if ((j && sgoffset) || ((i != sgcnt - 1) &&
1426			    ((sglen + sgoffset) & ~PAGE_MASK))) {
1427				log_debug(1 << CXGBI_DBG_DDP,
1428					"page %d/%u, %u + %u.\n",
1429					i, sgcnt, sgoffset, sglen);
1430				goto error_out;
1431			}
1432
1433			j++;
1434			if (j == gl->nelem || sg->offset) {
1435				log_debug(1 << CXGBI_DBG_DDP,
1436					"page %d/%u, offset %u.\n",
1437					j, gl->nelem, sg->offset);
1438				goto error_out;
1439			}
1440			gl->pages[j] = page;
1441			sglen = sg->length;
1442			sgoffset = sg->offset;
1443			sgpage = page;
1444		}
1445	}
1446	gl->nelem = ++j;
1447
1448	if (ddp_gl_map(pdev, gl) < 0)
1449		goto error_out;
1450
1451	return gl;
1452
1453error_out:
1454	kfree(gl);
1455	return NULL;
1456}
1457
1458static void ddp_tag_release(struct cxgbi_hba *chba, u32 tag)
1459{
1460	struct cxgbi_device *cdev = chba->cdev;
1461	struct cxgbi_ddp_info *ddp = cdev->ddp;
1462	u32 idx;
1463
1464	idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
1465	if (idx < ddp->nppods) {
1466		struct cxgbi_gather_list *gl = ddp->gl_map[idx];
1467		unsigned int npods;
1468
1469		if (!gl || !gl->nelem) {
1470			pr_warn("tag 0x%x, idx %u, gl 0x%p, %u.\n",
1471				tag, idx, gl, gl ? gl->nelem : 0);
1472			return;
1473		}
1474		npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
1475		log_debug(1 << CXGBI_DBG_DDP,
1476			"tag 0x%x, release idx %u, npods %u.\n",
1477			tag, idx, npods);
1478		cdev->csk_ddp_clear(chba, tag, idx, npods);
1479		ddp_unmark_entries(ddp, idx, npods);
1480		ddp_release_gl(gl, ddp->pdev);
1481	} else
1482		pr_warn("tag 0x%x, idx %u > max %u.\n", tag, idx, ddp->nppods);
1483}
1484
1485static int ddp_tag_reserve(struct cxgbi_sock *csk, unsigned int tid,
1486			   u32 sw_tag, u32 *tagp, struct cxgbi_gather_list *gl,
1487			   gfp_t gfp)
1488{
1489	struct cxgbi_device *cdev = csk->cdev;
1490	struct cxgbi_ddp_info *ddp = cdev->ddp;
1491	struct cxgbi_tag_format *tformat = &cdev->tag_format;
1492	struct cxgbi_pagepod_hdr hdr;
1493	unsigned int npods;
1494	int idx = -1;
1495	int err = -ENOMEM;
1496	u32 tag;
1497
1498	npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
1499	if (ddp->idx_last == ddp->nppods)
1500		idx = ddp_find_unused_entries(ddp, 0, ddp->nppods,
1501							npods, gl);
1502	else {
1503		idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1,
1504							ddp->nppods, npods,
1505							gl);
1506		if (idx < 0 && ddp->idx_last >= npods) {
1507			idx = ddp_find_unused_entries(ddp, 0,
1508				min(ddp->idx_last + npods, ddp->nppods),
1509							npods, gl);
1510		}
1511	}
1512	if (idx < 0) {
1513		log_debug(1 << CXGBI_DBG_DDP,
1514			"xferlen %u, gl %u, npods %u NO DDP.\n",
1515			gl->length, gl->nelem, npods);
1516		return idx;
1517	}
1518
1519	tag = cxgbi_ddp_tag_base(tformat, sw_tag);
1520	tag |= idx << PPOD_IDX_SHIFT;
1521
1522	hdr.rsvd = 0;
1523	hdr.vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
1524	hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
1525	hdr.max_offset = htonl(gl->length);
1526	hdr.page_offset = htonl(gl->offset);
1527
1528	err = cdev->csk_ddp_set(csk, &hdr, idx, npods, gl);
1529	if (err < 0)
1530		goto unmark_entries;
1531
1532	ddp->idx_last = idx;
1533	log_debug(1 << CXGBI_DBG_DDP,
1534		"xfer %u, gl %u,%u, tid 0x%x, tag 0x%x->0x%x(%u,%u).\n",
1535		gl->length, gl->nelem, gl->offset, tid, sw_tag, tag, idx,
1536		npods);
1537	*tagp = tag;
1538	return 0;
1539
1540unmark_entries:
1541	ddp_unmark_entries(ddp, idx, npods);
1542	return err;
1543}
1544
1545int cxgbi_ddp_reserve(struct cxgbi_sock *csk, unsigned int *tagp,
1546			unsigned int sw_tag, unsigned int xferlen,
1547			struct scatterlist *sgl, unsigned int sgcnt, gfp_t gfp)
1548{
1549	struct cxgbi_device *cdev = csk->cdev;
1550	struct cxgbi_tag_format *tformat = &cdev->tag_format;
1551	struct cxgbi_gather_list *gl;
1552	int err;
1553
1554	if (page_idx >= DDP_PGIDX_MAX || !cdev->ddp ||
1555	    xferlen < DDP_THRESHOLD) {
1556		log_debug(1 << CXGBI_DBG_DDP,
1557			"pgidx %u, xfer %u, NO ddp.\n", page_idx, xferlen);
1558		return -EINVAL;
1559	}
1560
1561	if (!cxgbi_sw_tag_usable(tformat, sw_tag)) {
1562		log_debug(1 << CXGBI_DBG_DDP,
1563			"sw_tag 0x%x NOT usable.\n", sw_tag);
1564		return -EINVAL;
1565	}
1566
1567	gl = ddp_make_gl(xferlen, sgl, sgcnt, cdev->pdev, gfp);
1568	if (!gl)
1569		return -ENOMEM;
1570
1571	err = ddp_tag_reserve(csk, csk->tid, sw_tag, tagp, gl, gfp);
1572	if (err < 0)
1573		ddp_release_gl(gl, cdev->pdev);
1574
1575	return err;
1576}
1577
1578static void ddp_destroy(struct kref *kref)
1579{
1580	struct cxgbi_ddp_info *ddp = container_of(kref,
1581						struct cxgbi_ddp_info,
1582						refcnt);
1583	struct cxgbi_device *cdev = ddp->cdev;
1584	int i = 0;
1585
1586	pr_info("kref 0, destroy ddp 0x%p, cdev 0x%p.\n", ddp, cdev);
1587
1588	while (i < ddp->nppods) {
1589		struct cxgbi_gather_list *gl = ddp->gl_map[i];
1590
1591		if (gl) {
1592			int npods = (gl->nelem + PPOD_PAGES_MAX - 1)
1593					>> PPOD_PAGES_SHIFT;
1594			pr_info("cdev 0x%p, ddp %d + %d.\n", cdev, i, npods);
1595			kfree(gl);
1596			i += npods;
1597		} else
1598			i++;
1599	}
1600	cxgbi_free_big_mem(ddp);
1601}
1602
1603int cxgbi_ddp_cleanup(struct cxgbi_device *cdev)
1604{
1605	struct cxgbi_ddp_info *ddp = cdev->ddp;
1606
1607	log_debug(1 << CXGBI_DBG_DDP,
1608		"cdev 0x%p, release ddp 0x%p.\n", cdev, ddp);
1609	cdev->ddp = NULL;
1610	if (ddp)
1611		return kref_put(&ddp->refcnt, ddp_destroy);
1612	return 0;
1613}
1614EXPORT_SYMBOL_GPL(cxgbi_ddp_cleanup);
1615
1616int cxgbi_ddp_init(struct cxgbi_device *cdev,
1617		   unsigned int llimit, unsigned int ulimit,
1618		   unsigned int max_txsz, unsigned int max_rxsz)
1619{
1620	struct cxgbi_ddp_info *ddp;
1621	unsigned int ppmax, bits;
1622
1623	ppmax = (ulimit - llimit + 1) >> PPOD_SIZE_SHIFT;
1624	bits = __ilog2_u32(ppmax) + 1;
1625	if (bits > PPOD_IDX_MAX_SIZE)
1626		bits = PPOD_IDX_MAX_SIZE;
1627	ppmax = (1 << (bits - 1)) - 1;
1628
1629	ddp = cxgbi_alloc_big_mem(sizeof(struct cxgbi_ddp_info) +
1630				ppmax * (sizeof(struct cxgbi_gather_list *) +
1631					 sizeof(struct sk_buff *)),
1632				GFP_KERNEL);
1633	if (!ddp) {
1634		pr_warn("cdev 0x%p, ddp ppmax %u OOM.\n", cdev, ppmax);
1635		return -ENOMEM;
1636	}
1637	ddp->gl_map = (struct cxgbi_gather_list **)(ddp + 1);
1638	cdev->ddp = ddp;
1639
1640	spin_lock_init(&ddp->map_lock);
1641	kref_init(&ddp->refcnt);
1642
1643	ddp->cdev = cdev;
1644	ddp->pdev = cdev->pdev;
1645	ddp->llimit = llimit;
1646	ddp->ulimit = ulimit;
1647	ddp->max_txsz = min_t(unsigned int, max_txsz, ULP2_MAX_PKT_SIZE);
1648	ddp->max_rxsz = min_t(unsigned int, max_rxsz, ULP2_MAX_PKT_SIZE);
1649	ddp->nppods = ppmax;
1650	ddp->idx_last = ppmax;
1651	ddp->idx_bits = bits;
1652	ddp->idx_mask = (1 << bits) - 1;
1653	ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
1654
1655	cdev->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
1656	cdev->tag_format.rsvd_bits = ddp->idx_bits;
1657	cdev->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
1658	cdev->tag_format.rsvd_mask = (1 << cdev->tag_format.rsvd_bits) - 1;
1659
1660	pr_info("%s tag format, sw %u, rsvd %u,%u, mask 0x%x.\n",
1661		cdev->ports[0]->name, cdev->tag_format.sw_bits,
1662		cdev->tag_format.rsvd_bits, cdev->tag_format.rsvd_shift,
1663		cdev->tag_format.rsvd_mask);
1664
1665	cdev->tx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
1666				ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN);
1667	cdev->rx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
1668				ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN);
1669
1670	log_debug(1 << CXGBI_DBG_DDP,
1671		"%s max payload size: %u/%u, %u/%u.\n",
1672		cdev->ports[0]->name, cdev->tx_max_size, ddp->max_txsz,
1673		cdev->rx_max_size, ddp->max_rxsz);
1674	return 0;
1675}
1676EXPORT_SYMBOL_GPL(cxgbi_ddp_init);
1677
1678/*
1679 * APIs interacting with open-iscsi libraries
1680 */
1681
1682static unsigned char padding[4];
1683
1684static void task_release_itt(struct iscsi_task *task, itt_t hdr_itt)
1685{
1686	struct scsi_cmnd *sc = task->sc;
1687	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
1688	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1689	struct cxgbi_hba *chba = cconn->chba;
1690	struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
1691	u32 tag = ntohl((__force u32)hdr_itt);
1692
1693	log_debug(1 << CXGBI_DBG_DDP,
1694		   "cdev 0x%p, release tag 0x%x.\n", chba->cdev, tag);
1695	if (sc &&
1696	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) &&
1697	    cxgbi_is_ddp_tag(tformat, tag))
1698		ddp_tag_release(chba, tag);
1699}
1700
1701static int task_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
1702{
1703	struct scsi_cmnd *sc = task->sc;
1704	struct iscsi_conn *conn = task->conn;
1705	struct iscsi_session *sess = conn->session;
1706	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1707	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1708	struct cxgbi_hba *chba = cconn->chba;
1709	struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
1710	u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
1711	u32 tag = 0;
1712	int err = -EINVAL;
1713
1714	if (sc &&
1715	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE)) {
1716		err = cxgbi_ddp_reserve(cconn->cep->csk, &tag, sw_tag,
1717					scsi_in(sc)->length,
1718					scsi_in(sc)->table.sgl,
1719					scsi_in(sc)->table.nents,
1720					GFP_ATOMIC);
1721		if (err < 0)
1722			log_debug(1 << CXGBI_DBG_DDP,
1723				"csk 0x%p, R task 0x%p, %u,%u, no ddp.\n",
1724				cconn->cep->csk, task, scsi_in(sc)->length,
1725				scsi_in(sc)->table.nents);
1726	}
1727
1728	if (err < 0)
1729		tag = cxgbi_set_non_ddp_tag(tformat, sw_tag);
1730	/*  the itt need to sent in big-endian order */
1731	*hdr_itt = (__force itt_t)htonl(tag);
1732
1733	log_debug(1 << CXGBI_DBG_DDP,
1734		"cdev 0x%p, task 0x%p, 0x%x(0x%x,0x%x)->0x%x/0x%x.\n",
1735		chba->cdev, task, sw_tag, task->itt, sess->age, tag, *hdr_itt);
1736	return 0;
1737}
1738
1739void cxgbi_parse_pdu_itt(struct iscsi_conn *conn, itt_t itt, int *idx, int *age)
1740{
1741	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1742	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1743	struct cxgbi_device *cdev = cconn->chba->cdev;
1744	u32 tag = ntohl((__force u32) itt);
1745	u32 sw_bits;
1746
1747	sw_bits = cxgbi_tag_nonrsvd_bits(&cdev->tag_format, tag);
1748	if (idx)
1749		*idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
1750	if (age)
1751		*age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
1752
1753	log_debug(1 << CXGBI_DBG_DDP,
1754		"cdev 0x%p, tag 0x%x/0x%x, -> 0x%x(0x%x,0x%x).\n",
1755		cdev, tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
1756		age ? *age : 0xFF);
1757}
1758EXPORT_SYMBOL_GPL(cxgbi_parse_pdu_itt);
1759
1760void cxgbi_conn_tx_open(struct cxgbi_sock *csk)
1761{
1762	struct iscsi_conn *conn = csk->user_data;
1763
1764	if (conn) {
1765		log_debug(1 << CXGBI_DBG_SOCK,
1766			"csk 0x%p, cid %d.\n", csk, conn->id);
1767		iscsi_conn_queue_work(conn);
1768	}
1769}
1770EXPORT_SYMBOL_GPL(cxgbi_conn_tx_open);
1771
1772/*
1773 * pdu receive, interact with libiscsi_tcp
1774 */
1775static inline int read_pdu_skb(struct iscsi_conn *conn,
1776			       struct sk_buff *skb,
1777			       unsigned int offset,
1778			       int offloaded)
1779{
1780	int status = 0;
1781	int bytes_read;
1782
1783	bytes_read = iscsi_tcp_recv_skb(conn, skb, offset, offloaded, &status);
1784	switch (status) {
1785	case ISCSI_TCP_CONN_ERR:
1786		pr_info("skb 0x%p, off %u, %d, TCP_ERR.\n",
1787			  skb, offset, offloaded);
1788		return -EIO;
1789	case ISCSI_TCP_SUSPENDED:
1790		log_debug(1 << CXGBI_DBG_PDU_RX,
1791			"skb 0x%p, off %u, %d, TCP_SUSPEND, rc %d.\n",
1792			skb, offset, offloaded, bytes_read);
1793		/* no transfer - just have caller flush queue */
1794		return bytes_read;
1795	case ISCSI_TCP_SKB_DONE:
1796		pr_info("skb 0x%p, off %u, %d, TCP_SKB_DONE.\n",
1797			skb, offset, offloaded);
1798		/*
1799		 * pdus should always fit in the skb and we should get
1800		 * segment done notifcation.
1801		 */
1802		iscsi_conn_printk(KERN_ERR, conn, "Invalid pdu or skb.");
1803		return -EFAULT;
1804	case ISCSI_TCP_SEGMENT_DONE:
1805		log_debug(1 << CXGBI_DBG_PDU_RX,
1806			"skb 0x%p, off %u, %d, TCP_SEG_DONE, rc %d.\n",
1807			skb, offset, offloaded, bytes_read);
1808		return bytes_read;
1809	default:
1810		pr_info("skb 0x%p, off %u, %d, invalid status %d.\n",
1811			skb, offset, offloaded, status);
1812		return -EINVAL;
1813	}
1814}
1815
1816static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
1817{
1818	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1819
1820	log_debug(1 << CXGBI_DBG_PDU_RX,
1821		"conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
1822		conn, skb, skb->len, cxgbi_skcb_flags(skb));
1823
1824	if (!iscsi_tcp_recv_segment_is_hdr(tcp_conn)) {
1825		pr_info("conn 0x%p, skb 0x%p, not hdr.\n", conn, skb);
1826		iscsi_conn_failure(conn, ISCSI_ERR_PROTO);
1827		return -EIO;
1828	}
1829
1830	if (conn->hdrdgst_en &&
1831	    cxgbi_skcb_test_flag(skb, SKCBF_RX_HCRC_ERR)) {
1832		pr_info("conn 0x%p, skb 0x%p, hcrc.\n", conn, skb);
1833		iscsi_conn_failure(conn, ISCSI_ERR_HDR_DGST);
1834		return -EIO;
1835	}
1836
1837	return read_pdu_skb(conn, skb, 0, 0);
1838}
1839
1840static int skb_read_pdu_data(struct iscsi_conn *conn, struct sk_buff *lskb,
1841			     struct sk_buff *skb, unsigned int offset)
1842{
1843	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1844	bool offloaded = 0;
1845	int opcode = tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK;
1846
1847	log_debug(1 << CXGBI_DBG_PDU_RX,
1848		"conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
1849		conn, skb, skb->len, cxgbi_skcb_flags(skb));
1850
1851	if (conn->datadgst_en &&
1852	    cxgbi_skcb_test_flag(lskb, SKCBF_RX_DCRC_ERR)) {
1853		pr_info("conn 0x%p, skb 0x%p, dcrc 0x%lx.\n",
1854			conn, lskb, cxgbi_skcb_flags(lskb));
1855		iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST);
1856		return -EIO;
1857	}
1858
1859	if (iscsi_tcp_recv_segment_is_hdr(tcp_conn))
1860		return 0;
1861
1862	/* coalesced, add header digest length */
1863	if (lskb == skb && conn->hdrdgst_en)
1864		offset += ISCSI_DIGEST_SIZE;
1865
1866	if (cxgbi_skcb_test_flag(lskb, SKCBF_RX_DATA_DDPD))
1867		offloaded = 1;
1868
1869	if (opcode == ISCSI_OP_SCSI_DATA_IN)
1870		log_debug(1 << CXGBI_DBG_PDU_RX,
1871			"skb 0x%p, op 0x%x, itt 0x%x, %u %s ddp'ed.\n",
1872			skb, opcode, ntohl(tcp_conn->in.hdr->itt),
1873			tcp_conn->in.datalen, offloaded ? "is" : "not");
1874
1875	return read_pdu_skb(conn, skb, offset, offloaded);
1876}
1877
1878static void csk_return_rx_credits(struct cxgbi_sock *csk, int copied)
1879{
1880	struct cxgbi_device *cdev = csk->cdev;
1881	int must_send;
1882	u32 credits;
1883
1884	log_debug(1 << CXGBI_DBG_PDU_RX,
1885		"csk 0x%p,%u,0x%lx,%u, seq %u, wup %u, thre %u, %u.\n",
1886		csk, csk->state, csk->flags, csk->tid, csk->copied_seq,
1887		csk->rcv_wup, cdev->rx_credit_thres,
1888		cdev->rcv_win);
1889
1890	if (csk->state != CTP_ESTABLISHED)
1891		return;
1892
1893	credits = csk->copied_seq - csk->rcv_wup;
1894	if (unlikely(!credits))
1895		return;
1896	if (unlikely(cdev->rx_credit_thres == 0))
1897		return;
1898
1899	must_send = credits + 16384 >= cdev->rcv_win;
1900	if (must_send || credits >= cdev->rx_credit_thres)
1901		csk->rcv_wup += cdev->csk_send_rx_credits(csk, credits);
1902}
1903
1904void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
1905{
1906	struct cxgbi_device *cdev = csk->cdev;
1907	struct iscsi_conn *conn = csk->user_data;
1908	struct sk_buff *skb;
1909	unsigned int read = 0;
1910	int err = 0;
1911
1912	log_debug(1 << CXGBI_DBG_PDU_RX,
1913		"csk 0x%p, conn 0x%p.\n", csk, conn);
1914
1915	if (unlikely(!conn || conn->suspend_rx)) {
1916		log_debug(1 << CXGBI_DBG_PDU_RX,
1917			"csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n",
1918			csk, conn, conn ? conn->id : 0xFF,
1919			conn ? conn->suspend_rx : 0xFF);
1920		return;
1921	}
1922
1923	while (!err) {
1924		skb = skb_peek(&csk->receive_queue);
1925		if (!skb ||
1926		    !(cxgbi_skcb_test_flag(skb, SKCBF_RX_STATUS))) {
1927			if (skb)
1928				log_debug(1 << CXGBI_DBG_PDU_RX,
1929					"skb 0x%p, NOT ready 0x%lx.\n",
1930					skb, cxgbi_skcb_flags(skb));
1931			break;
1932		}
1933		__skb_unlink(skb, &csk->receive_queue);
1934
1935		read += cxgbi_skcb_rx_pdulen(skb);
1936		log_debug(1 << CXGBI_DBG_PDU_RX,
1937			"csk 0x%p, skb 0x%p,%u,f 0x%lx, pdu len %u.\n",
1938			csk, skb, skb->len, cxgbi_skcb_flags(skb),
1939			cxgbi_skcb_rx_pdulen(skb));
1940
1941		if (cxgbi_skcb_test_flag(skb, SKCBF_RX_COALESCED)) {
1942			err = skb_read_pdu_bhs(conn, skb);
1943			if (err < 0) {
1944				pr_err("coalesced bhs, csk 0x%p, skb 0x%p,%u, "
1945					"f 0x%lx, plen %u.\n",
1946					csk, skb, skb->len,
1947					cxgbi_skcb_flags(skb),
1948					cxgbi_skcb_rx_pdulen(skb));
1949				goto skb_done;
1950			}
1951			err = skb_read_pdu_data(conn, skb, skb,
1952						err + cdev->skb_rx_extra);
1953			if (err < 0)
1954				pr_err("coalesced data, csk 0x%p, skb 0x%p,%u, "
1955					"f 0x%lx, plen %u.\n",
1956					csk, skb, skb->len,
1957					cxgbi_skcb_flags(skb),
1958					cxgbi_skcb_rx_pdulen(skb));
1959		} else {
1960			err = skb_read_pdu_bhs(conn, skb);
1961			if (err < 0) {
1962				pr_err("bhs, csk 0x%p, skb 0x%p,%u, "
1963					"f 0x%lx, plen %u.\n",
1964					csk, skb, skb->len,
1965					cxgbi_skcb_flags(skb),
1966					cxgbi_skcb_rx_pdulen(skb));
1967				goto skb_done;
1968			}
1969
1970			if (cxgbi_skcb_test_flag(skb, SKCBF_RX_DATA)) {
1971				struct sk_buff *dskb;
1972
1973				dskb = skb_peek(&csk->receive_queue);
1974				if (!dskb) {
1975					pr_err("csk 0x%p, skb 0x%p,%u, f 0x%lx,"
1976						" plen %u, NO data.\n",
1977						csk, skb, skb->len,
1978						cxgbi_skcb_flags(skb),
1979						cxgbi_skcb_rx_pdulen(skb));
1980					err = -EIO;
1981					goto skb_done;
1982				}
1983				__skb_unlink(dskb, &csk->receive_queue);
1984
1985				err = skb_read_pdu_data(conn, skb, dskb, 0);
1986				if (err < 0)
1987					pr_err("data, csk 0x%p, skb 0x%p,%u, "
1988						"f 0x%lx, plen %u, dskb 0x%p,"
1989						"%u.\n",
1990						csk, skb, skb->len,
1991						cxgbi_skcb_flags(skb),
1992						cxgbi_skcb_rx_pdulen(skb),
1993						dskb, dskb->len);
1994				__kfree_skb(dskb);
1995			} else
1996				err = skb_read_pdu_data(conn, skb, skb, 0);
1997		}
1998skb_done:
1999		__kfree_skb(skb);
2000
2001		if (err < 0)
2002			break;
2003	}
2004
2005	log_debug(1 << CXGBI_DBG_PDU_RX, "csk 0x%p, read %u.\n", csk, read);
2006	if (read) {
2007		csk->copied_seq += read;
2008		csk_return_rx_credits(csk, read);
2009		conn->rxdata_octets += read;
2010	}
2011
2012	if (err < 0) {
2013		pr_info("csk 0x%p, 0x%p, rx failed %d, read %u.\n",
2014			csk, conn, err, read);
2015		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
2016	}
2017}
2018EXPORT_SYMBOL_GPL(cxgbi_conn_pdu_ready);
2019
2020static int sgl_seek_offset(struct scatterlist *sgl, unsigned int sgcnt,
2021				unsigned int offset, unsigned int *off,
2022				struct scatterlist **sgp)
2023{
2024	int i;
2025	struct scatterlist *sg;
2026
2027	for_each_sg(sgl, sg, sgcnt, i) {
2028		if (offset < sg->length) {
2029			*off = offset;
2030			*sgp = sg;
2031			return 0;
2032		}
2033		offset -= sg->length;
2034	}
2035	return -EFAULT;
2036}
2037
2038static int sgl_read_to_frags(struct scatterlist *sg, unsigned int sgoffset,
2039				unsigned int dlen, struct page_frag *frags,
2040				int frag_max)
2041{
2042	unsigned int datalen = dlen;
2043	unsigned int sglen = sg->length - sgoffset;
2044	struct page *page = sg_page(sg);
2045	int i;
2046
2047	i = 0;
2048	do {
2049		unsigned int copy;
2050
2051		if (!sglen) {
2052			sg = sg_next(sg);
2053			if (!sg) {
2054				pr_warn("sg %d NULL, len %u/%u.\n",
2055					i, datalen, dlen);
2056				return -EINVAL;
2057			}
2058			sgoffset = 0;
2059			sglen = sg->length;
2060			page = sg_page(sg);
2061
2062		}
2063		copy = min(datalen, sglen);
2064		if (i && page == frags[i - 1].page &&
2065		    sgoffset + sg->offset ==
2066			frags[i - 1].offset + frags[i - 1].size) {
2067			frags[i - 1].size += copy;
2068		} else {
2069			if (i >= frag_max) {
2070				pr_warn("too many pages %u, dlen %u.\n",
2071					frag_max, dlen);
2072				return -EINVAL;
2073			}
2074
2075			frags[i].page = page;
2076			frags[i].offset = sg->offset + sgoffset;
2077			frags[i].size = copy;
2078			i++;
2079		}
2080		datalen -= copy;
2081		sgoffset += copy;
2082		sglen -= copy;
2083	} while (datalen);
2084
2085	return i;
2086}
2087
2088int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
2089{
2090	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
2091	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2092	struct cxgbi_device *cdev = cconn->chba->cdev;
2093	struct iscsi_conn *conn = task->conn;
2094	struct iscsi_tcp_task *tcp_task = task->dd_data;
2095	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
2096	struct scsi_cmnd *sc = task->sc;
2097	int headroom = SKB_TX_ISCSI_PDU_HEADER_MAX;
2098
2099	tcp_task->dd_data = tdata;
2100	task->hdr = NULL;
2101
2102	if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) &&
2103	    (opcode == ISCSI_OP_SCSI_DATA_OUT ||
2104	     (opcode == ISCSI_OP_SCSI_CMD &&
2105	      (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_TO_DEVICE))))
2106		/* data could goes into skb head */
2107		headroom += min_t(unsigned int,
2108				SKB_MAX_HEAD(cdev->skb_tx_rsvd),
2109				conn->max_xmit_dlength);
2110
2111	tdata->skb = alloc_skb(cdev->skb_tx_rsvd + headroom, GFP_ATOMIC);
2112	if (!tdata->skb) {
2113		struct cxgbi_sock *csk = cconn->cep->csk;
2114		struct net_device *ndev = cdev->ports[csk->port_id];
2115		ndev->stats.tx_dropped++;
2116		return -ENOMEM;
2117	}
2118
2119	skb_reserve(tdata->skb, cdev->skb_tx_rsvd);
2120	task->hdr = (struct iscsi_hdr *)tdata->skb->data;
2121	task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */
2122
2123	/* data_out uses scsi_cmd's itt */
2124	if (opcode != ISCSI_OP_SCSI_DATA_OUT)
2125		task_reserve_itt(task, &task->hdr->itt);
2126
2127	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
2128		"task 0x%p, op 0x%x, skb 0x%p,%u+%u/%u, itt 0x%x.\n",
2129		task, opcode, tdata->skb, cdev->skb_tx_rsvd, headroom,
2130		conn->max_xmit_dlength, ntohl(task->hdr->itt));
2131
2132	return 0;
2133}
2134EXPORT_SYMBOL_GPL(cxgbi_conn_alloc_pdu);
2135
2136static inline void tx_skb_setmode(struct sk_buff *skb, int hcrc, int dcrc)
2137{
2138	if (hcrc || dcrc) {
2139		u8 submode = 0;
2140
2141		if (hcrc)
2142			submode |= 1;
2143		if (dcrc)
2144			submode |= 2;
2145		cxgbi_skcb_ulp_mode(skb) = (ULP2_MODE_ISCSI << 4) | submode;
2146	} else
2147		cxgbi_skcb_ulp_mode(skb) = 0;
2148}
2149
2150int cxgbi_conn_init_pdu(struct iscsi_task *task, unsigned int offset,
2151			      unsigned int count)
2152{
2153	struct iscsi_conn *conn = task->conn;
2154	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
2155	struct sk_buff *skb = tdata->skb;
2156	unsigned int datalen = count;
2157	int i, padlen = iscsi_padding(count);
2158	struct page *pg;
2159
2160	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
2161		"task 0x%p,0x%p, skb 0x%p, 0x%x,0x%x,0x%x, %u+%u.\n",
2162		task, task->sc, skb, (*skb->data) & ISCSI_OPCODE_MASK,
2163		ntohl(task->cmdsn), ntohl(task->hdr->itt), offset, count);
2164
2165	skb_put(skb, task->hdr_len);
2166	tx_skb_setmode(skb, conn->hdrdgst_en, datalen ? conn->datadgst_en : 0);
2167	if (!count)
2168		return 0;
2169
2170	if (task->sc) {
2171		struct scsi_data_buffer *sdb = scsi_out(task->sc);
2172		struct scatterlist *sg = NULL;
2173		int err;
2174
2175		tdata->offset = offset;
2176		tdata->count = count;
2177		err = sgl_seek_offset(
2178					sdb->table.sgl, sdb->table.nents,
2179					tdata->offset, &tdata->sgoffset, &sg);
2180		if (err < 0) {
2181			pr_warn("tpdu, sgl %u, bad offset %u/%u.\n",
2182				sdb->table.nents, tdata->offset, sdb->length);
2183			return err;
2184		}
2185		err = sgl_read_to_frags(sg, tdata->sgoffset, tdata->count,
2186					tdata->frags, MAX_PDU_FRAGS);
2187		if (err < 0) {
2188			pr_warn("tpdu, sgl %u, bad offset %u + %u.\n",
2189				sdb->table.nents, tdata->offset, tdata->count);
2190			return err;
2191		}
2192		tdata->nr_frags = err;
2193
2194		if (tdata->nr_frags > MAX_SKB_FRAGS ||
2195		    (padlen && tdata->nr_frags == MAX_SKB_FRAGS)) {
2196			char *dst = skb->data + task->hdr_len;
2197			struct page_frag *frag = tdata->frags;
2198
2199			/* data fits in the skb's headroom */
2200			for (i = 0; i < tdata->nr_frags; i++, frag++) {
2201				char *src = kmap_atomic(frag->page);
2202
2203				memcpy(dst, src+frag->offset, frag->size);
2204				dst += frag->size;
2205				kunmap_atomic(src);
2206			}
2207			if (padlen) {
2208				memset(dst, 0, padlen);
2209				padlen = 0;
2210			}
2211			skb_put(skb, count + padlen);
2212		} else {
2213			/* data fit into frag_list */
2214			for (i = 0; i < tdata->nr_frags; i++) {
2215				__skb_fill_page_desc(skb, i,
2216						tdata->frags[i].page,
2217						tdata->frags[i].offset,
2218						tdata->frags[i].size);
2219				skb_frag_ref(skb, i);
2220			}
2221			skb_shinfo(skb)->nr_frags = tdata->nr_frags;
2222			skb->len += count;
2223			skb->data_len += count;
2224			skb->truesize += count;
2225		}
2226
2227	} else {
2228		pg = virt_to_page(task->data);
2229
2230		get_page(pg);
2231		skb_fill_page_desc(skb, 0, pg, offset_in_page(task->data),
2232					count);
2233		skb->len += count;
2234		skb->data_len += count;
2235		skb->truesize += count;
2236	}
2237
2238	if (padlen) {
2239		i = skb_shinfo(skb)->nr_frags;
2240		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
2241				virt_to_page(padding), offset_in_page(padding),
2242				padlen);
2243
2244		skb->data_len += padlen;
2245		skb->truesize += padlen;
2246		skb->len += padlen;
2247	}
2248
2249	return 0;
2250}
2251EXPORT_SYMBOL_GPL(cxgbi_conn_init_pdu);
2252
2253int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
2254{
2255	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
2256	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2257	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
2258	struct sk_buff *skb = tdata->skb;
2259	unsigned int datalen;
2260	int err;
2261
2262	if (!skb) {
2263		log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
2264			"task 0x%p, skb NULL.\n", task);
2265		return 0;
2266	}
2267
2268	datalen = skb->data_len;
2269	tdata->skb = NULL;
2270	err = cxgbi_sock_send_pdus(cconn->cep->csk, skb);
2271	if (err > 0) {
2272		int pdulen = err;
2273
2274		log_debug(1 << CXGBI_DBG_PDU_TX,
2275			"task 0x%p,0x%p, skb 0x%p, len %u/%u, rv %d.\n",
2276			task, task->sc, skb, skb->len, skb->data_len, err);
2277
2278		if (task->conn->hdrdgst_en)
2279			pdulen += ISCSI_DIGEST_SIZE;
2280
2281		if (datalen && task->conn->datadgst_en)
2282			pdulen += ISCSI_DIGEST_SIZE;
2283
2284		task->conn->txdata_octets += pdulen;
2285		return 0;
2286	}
2287
2288	if (err == -EAGAIN || err == -ENOBUFS) {
2289		log_debug(1 << CXGBI_DBG_PDU_TX,
2290			"task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n",
2291			task, skb, skb->len, skb->data_len, err);
2292		/* reset skb to send when we are called again */
2293		tdata->skb = skb;
2294		return err;
2295	}
2296
2297	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
2298		"itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n",
2299		task->itt, skb, skb->len, skb->data_len, err);
2300
2301	kfree_skb(skb);
2302
2303	iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err);
2304	iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED);
2305	return err;
2306}
2307EXPORT_SYMBOL_GPL(cxgbi_conn_xmit_pdu);
2308
2309void cxgbi_cleanup_task(struct iscsi_task *task)
2310{
2311	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
2312
2313	log_debug(1 << CXGBI_DBG_ISCSI,
2314		"task 0x%p, skb 0x%p, itt 0x%x.\n",
2315		task, tdata->skb, task->hdr_itt);
2316
2317	/*  never reached the xmit task callout */
2318	if (tdata->skb)
2319		__kfree_skb(tdata->skb);
2320	memset(tdata, 0, sizeof(*tdata));
2321
2322	task_release_itt(task, task->hdr_itt);
2323	iscsi_tcp_cleanup_task(task);
2324}
2325EXPORT_SYMBOL_GPL(cxgbi_cleanup_task);
2326
2327void cxgbi_get_conn_stats(struct iscsi_cls_conn *cls_conn,
2328				struct iscsi_stats *stats)
2329{
2330	struct iscsi_conn *conn = cls_conn->dd_data;
2331
2332	stats->txdata_octets = conn->txdata_octets;
2333	stats->rxdata_octets = conn->rxdata_octets;
2334	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
2335	stats->dataout_pdus = conn->dataout_pdus_cnt;
2336	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
2337	stats->datain_pdus = conn->datain_pdus_cnt;
2338	stats->r2t_pdus = conn->r2t_pdus_cnt;
2339	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
2340	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
2341	stats->digest_err = 0;
2342	stats->timeout_err = 0;
2343	stats->custom_length = 1;
2344	strcpy(stats->custom[0].desc, "eh_abort_cnt");
2345	stats->custom[0].value = conn->eh_abort_cnt;
2346}
2347EXPORT_SYMBOL_GPL(cxgbi_get_conn_stats);
2348
2349static int cxgbi_conn_max_xmit_dlength(struct iscsi_conn *conn)
2350{
2351	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2352	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2353	struct cxgbi_device *cdev = cconn->chba->cdev;
2354	unsigned int headroom = SKB_MAX_HEAD(cdev->skb_tx_rsvd);
2355	unsigned int max_def = 512 * MAX_SKB_FRAGS;
2356	unsigned int max = max(max_def, headroom);
2357
2358	max = min(cconn->chba->cdev->tx_max_size, max);
2359	if (conn->max_xmit_dlength)
2360		conn->max_xmit_dlength = min(conn->max_xmit_dlength, max);
2361	else
2362		conn->max_xmit_dlength = max;
2363	cxgbi_align_pdu_size(conn->max_xmit_dlength);
2364
2365	return 0;
2366}
2367
2368static int cxgbi_conn_max_recv_dlength(struct iscsi_conn *conn)
2369{
2370	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2371	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2372	unsigned int max = cconn->chba->cdev->rx_max_size;
2373
2374	cxgbi_align_pdu_size(max);
2375
2376	if (conn->max_recv_dlength) {
2377		if (conn->max_recv_dlength > max) {
2378			pr_err("MaxRecvDataSegmentLength %u > %u.\n",
2379				conn->max_recv_dlength, max);
2380			return -EINVAL;
2381		}
2382		conn->max_recv_dlength = min(conn->max_recv_dlength, max);
2383		cxgbi_align_pdu_size(conn->max_recv_dlength);
2384	} else
2385		conn->max_recv_dlength = max;
2386
2387	return 0;
2388}
2389
2390int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
2391			enum iscsi_param param, char *buf, int buflen)
2392{
2393	struct iscsi_conn *conn = cls_conn->dd_data;
2394	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2395	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2396	struct cxgbi_sock *csk = cconn->cep->csk;
2397	int err;
2398
2399	log_debug(1 << CXGBI_DBG_ISCSI,
2400		"cls_conn 0x%p, param %d, buf(%d) %s.\n",
2401		cls_conn, param, buflen, buf);
2402
2403	switch (param) {
2404	case ISCSI_PARAM_HDRDGST_EN:
2405		err = iscsi_set_param(cls_conn, param, buf, buflen);
2406		if (!err && conn->hdrdgst_en)
2407			err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid,
2408							conn->hdrdgst_en,
2409							conn->datadgst_en, 0);
2410		break;
2411	case ISCSI_PARAM_DATADGST_EN:
2412		err = iscsi_set_param(cls_conn, param, buf, buflen);
2413		if (!err && conn->datadgst_en)
2414			err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid,
2415							conn->hdrdgst_en,
2416							conn->datadgst_en, 0);
2417		break;
2418	case ISCSI_PARAM_MAX_R2T:
2419		return iscsi_tcp_set_max_r2t(conn, buf);
2420	case ISCSI_PARAM_MAX_RECV_DLENGTH:
2421		err = iscsi_set_param(cls_conn, param, buf, buflen);
2422		if (!err)
2423			err = cxgbi_conn_max_recv_dlength(conn);
2424		break;
2425	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
2426		err = iscsi_set_param(cls_conn, param, buf, buflen);
2427		if (!err)
2428			err = cxgbi_conn_max_xmit_dlength(conn);
2429		break;
2430	default:
2431		return iscsi_set_param(cls_conn, param, buf, buflen);
2432	}
2433	return err;
2434}
2435EXPORT_SYMBOL_GPL(cxgbi_set_conn_param);
2436
2437static inline int csk_print_port(struct cxgbi_sock *csk, char *buf)
2438{
2439	int len;
2440
2441	cxgbi_sock_get(csk);
2442	len = sprintf(buf, "%hu\n", ntohs(csk->daddr.sin_port));
2443	cxgbi_sock_put(csk);
2444
2445	return len;
2446}
2447
2448static inline int csk_print_ip(struct cxgbi_sock *csk, char *buf)
2449{
2450	int len;
2451
2452	cxgbi_sock_get(csk);
2453	if (csk->csk_family == AF_INET)
2454		len = sprintf(buf, "%pI4",
2455			      &csk->daddr.sin_addr.s_addr);
2456	else
2457		len = sprintf(buf, "%pI6",
2458			      &csk->daddr6.sin6_addr);
2459
2460	cxgbi_sock_put(csk);
2461
2462	return len;
2463}
2464
2465int cxgbi_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param,
2466		       char *buf)
2467{
2468	struct cxgbi_endpoint *cep = ep->dd_data;
2469	struct cxgbi_sock *csk;
2470	int len;
2471
2472	log_debug(1 << CXGBI_DBG_ISCSI,
2473		"cls_conn 0x%p, param %d.\n", ep, param);
2474
2475	switch (param) {
2476	case ISCSI_PARAM_CONN_PORT:
2477	case ISCSI_PARAM_CONN_ADDRESS:
2478		if (!cep)
2479			return -ENOTCONN;
2480
2481		csk = cep->csk;
2482		if (!csk)
2483			return -ENOTCONN;
2484
2485		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
2486						 &csk->daddr, param, buf);
2487	default:
2488		return -ENOSYS;
2489	}
2490	return len;
2491}
2492EXPORT_SYMBOL_GPL(cxgbi_get_ep_param);
2493
2494struct iscsi_cls_conn *
2495cxgbi_create_conn(struct iscsi_cls_session *cls_session, u32 cid)
2496{
2497	struct iscsi_cls_conn *cls_conn;
2498	struct iscsi_conn *conn;
2499	struct iscsi_tcp_conn *tcp_conn;
2500	struct cxgbi_conn *cconn;
2501
2502	cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*cconn), cid);
2503	if (!cls_conn)
2504		return NULL;
2505
2506	conn = cls_conn->dd_data;
2507	tcp_conn = conn->dd_data;
2508	cconn = tcp_conn->dd_data;
2509	cconn->iconn = conn;
2510
2511	log_debug(1 << CXGBI_DBG_ISCSI,
2512		"cid %u(0x%x), cls 0x%p,0x%p, conn 0x%p,0x%p,0x%p.\n",
2513		cid, cid, cls_session, cls_conn, conn, tcp_conn, cconn);
2514
2515	return cls_conn;
2516}
2517EXPORT_SYMBOL_GPL(cxgbi_create_conn);
2518
2519int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
2520				struct iscsi_cls_conn *cls_conn,
2521				u64 transport_eph, int is_leading)
2522{
2523	struct iscsi_conn *conn = cls_conn->dd_data;
2524	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2525	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2526	struct iscsi_endpoint *ep;
2527	struct cxgbi_endpoint *cep;
2528	struct cxgbi_sock *csk;
2529	int err;
2530
2531	ep = iscsi_lookup_endpoint(transport_eph);
2532	if (!ep)
2533		return -EINVAL;
2534
2535	/*  setup ddp pagesize */
2536	cep = ep->dd_data;
2537	csk = cep->csk;
2538	err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, page_idx, 0);
2539	if (err < 0)
2540		return err;
2541
2542	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
2543	if (err)
2544		return -EINVAL;
2545
2546	/*  calculate the tag idx bits needed for this conn based on cmds_max */
2547	cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
2548
2549	write_lock_bh(&csk->callback_lock);
2550	csk->user_data = conn;
2551	cconn->chba = cep->chba;
2552	cconn->cep = cep;
2553	cep->cconn = cconn;
2554	write_unlock_bh(&csk->callback_lock);
2555
2556	cxgbi_conn_max_xmit_dlength(conn);
2557	cxgbi_conn_max_recv_dlength(conn);
2558
2559	log_debug(1 << CXGBI_DBG_ISCSI,
2560		"cls 0x%p,0x%p, ep 0x%p, cconn 0x%p, csk 0x%p.\n",
2561		cls_session, cls_conn, ep, cconn, csk);
2562	/*  init recv engine */
2563	iscsi_tcp_hdr_recv_prep(tcp_conn);
2564
2565	return 0;
2566}
2567EXPORT_SYMBOL_GPL(cxgbi_bind_conn);
2568
2569struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
2570						u16 cmds_max, u16 qdepth,
2571						u32 initial_cmdsn)
2572{
2573	struct cxgbi_endpoint *cep;
2574	struct cxgbi_hba *chba;
2575	struct Scsi_Host *shost;
2576	struct iscsi_cls_session *cls_session;
2577	struct iscsi_session *session;
2578
2579	if (!ep) {
2580		pr_err("missing endpoint.\n");
2581		return NULL;
2582	}
2583
2584	cep = ep->dd_data;
2585	chba = cep->chba;
2586	shost = chba->shost;
2587
2588	BUG_ON(chba != iscsi_host_priv(shost));
2589
2590	cls_session = iscsi_session_setup(chba->cdev->itp, shost,
2591					cmds_max, 0,
2592					sizeof(struct iscsi_tcp_task) +
2593					sizeof(struct cxgbi_task_data),
2594					initial_cmdsn, ISCSI_MAX_TARGET);
2595	if (!cls_session)
2596		return NULL;
2597
2598	session = cls_session->dd_data;
2599	if (iscsi_tcp_r2tpool_alloc(session))
2600		goto remove_session;
2601
2602	log_debug(1 << CXGBI_DBG_ISCSI,
2603		"ep 0x%p, cls sess 0x%p.\n", ep, cls_session);
2604	return cls_session;
2605
2606remove_session:
2607	iscsi_session_teardown(cls_session);
2608	return NULL;
2609}
2610EXPORT_SYMBOL_GPL(cxgbi_create_session);
2611
2612void cxgbi_destroy_session(struct iscsi_cls_session *cls_session)
2613{
2614	log_debug(1 << CXGBI_DBG_ISCSI,
2615		"cls sess 0x%p.\n", cls_session);
2616
2617	iscsi_tcp_r2tpool_free(cls_session->dd_data);
2618	iscsi_session_teardown(cls_session);
2619}
2620EXPORT_SYMBOL_GPL(cxgbi_destroy_session);
2621
2622int cxgbi_set_host_param(struct Scsi_Host *shost, enum iscsi_host_param param,
2623			char *buf, int buflen)
2624{
2625	struct cxgbi_hba *chba = iscsi_host_priv(shost);
2626
2627	if (!chba->ndev) {
2628		shost_printk(KERN_ERR, shost, "Could not get host param. "
2629				"netdev for host not set.\n");
2630		return -ENODEV;
2631	}
2632
2633	log_debug(1 << CXGBI_DBG_ISCSI,
2634		"shost 0x%p, hba 0x%p,%s, param %d, buf(%d) %s.\n",
2635		shost, chba, chba->ndev->name, param, buflen, buf);
2636
2637	switch (param) {
2638	case ISCSI_HOST_PARAM_IPADDRESS:
2639	{
2640		__be32 addr = in_aton(buf);
2641		log_debug(1 << CXGBI_DBG_ISCSI,
2642			"hba %s, req. ipv4 %pI4.\n", chba->ndev->name, &addr);
2643		cxgbi_set_iscsi_ipv4(chba, addr);
2644		return 0;
2645	}
2646	case ISCSI_HOST_PARAM_HWADDRESS:
2647	case ISCSI_HOST_PARAM_NETDEV_NAME:
2648		return 0;
2649	default:
2650		return iscsi_host_set_param(shost, param, buf, buflen);
2651	}
2652}
2653EXPORT_SYMBOL_GPL(cxgbi_set_host_param);
2654
2655int cxgbi_get_host_param(struct Scsi_Host *shost, enum iscsi_host_param param,
2656			char *buf)
2657{
2658	struct cxgbi_hba *chba = iscsi_host_priv(shost);
2659	int len = 0;
2660
2661	if (!chba->ndev) {
2662		shost_printk(KERN_ERR, shost, "Could not get host param. "
2663				"netdev for host not set.\n");
2664		return -ENODEV;
2665	}
2666
2667	log_debug(1 << CXGBI_DBG_ISCSI,
2668		"shost 0x%p, hba 0x%p,%s, param %d.\n",
2669		shost, chba, chba->ndev->name, param);
2670
2671	switch (param) {
2672	case ISCSI_HOST_PARAM_HWADDRESS:
2673		len = sysfs_format_mac(buf, chba->ndev->dev_addr, 6);
2674		break;
2675	case ISCSI_HOST_PARAM_NETDEV_NAME:
2676		len = sprintf(buf, "%s\n", chba->ndev->name);
2677		break;
2678	case ISCSI_HOST_PARAM_IPADDRESS:
2679	{
2680		struct cxgbi_sock *csk = find_sock_on_port(chba->cdev,
2681							   chba->port_id);
2682		if (csk) {
2683			len = sprintf(buf, "%pIS",
2684				      (struct sockaddr *)&csk->saddr);
2685		}
2686		log_debug(1 << CXGBI_DBG_ISCSI,
2687			  "hba %s, addr %s.\n", chba->ndev->name, buf);
2688		break;
2689	}
2690	default:
2691		return iscsi_host_get_param(shost, param, buf);
2692	}
2693
2694	return len;
2695}
2696EXPORT_SYMBOL_GPL(cxgbi_get_host_param);
2697
2698struct iscsi_endpoint *cxgbi_ep_connect(struct Scsi_Host *shost,
2699					struct sockaddr *dst_addr,
2700					int non_blocking)
2701{
2702	struct iscsi_endpoint *ep;
2703	struct cxgbi_endpoint *cep;
2704	struct cxgbi_hba *hba = NULL;
2705	struct cxgbi_sock *csk;
2706	int err = -EINVAL;
2707
2708	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
2709		"shost 0x%p, non_blocking %d, dst_addr 0x%p.\n",
2710		shost, non_blocking, dst_addr);
2711
2712	if (shost) {
2713		hba = iscsi_host_priv(shost);
2714		if (!hba) {
2715			pr_info("shost 0x%p, priv NULL.\n", shost);
2716			goto err_out;
2717		}
2718	}
2719
2720	if (dst_addr->sa_family == AF_INET) {
2721		csk = cxgbi_check_route(dst_addr);
2722#if IS_ENABLED(CONFIG_IPV6)
2723	} else if (dst_addr->sa_family == AF_INET6) {
2724		csk = cxgbi_check_route6(dst_addr);
2725#endif
2726	} else {
2727		pr_info("address family 0x%x NOT supported.\n",
2728			dst_addr->sa_family);
2729		err = -EAFNOSUPPORT;
2730		return (struct iscsi_endpoint *)ERR_PTR(err);
2731	}
2732
2733	if (IS_ERR(csk))
2734		return (struct iscsi_endpoint *)csk;
2735	cxgbi_sock_get(csk);
2736
2737	if (!hba)
2738		hba = csk->cdev->hbas[csk->port_id];
2739	else if (hba != csk->cdev->hbas[csk->port_id]) {
2740		pr_info("Could not connect through requested host %u"
2741			"hba 0x%p != 0x%p (%u).\n",
2742			shost->host_no, hba,
2743			csk->cdev->hbas[csk->port_id], csk->port_id);
2744		err = -ENOSPC;
2745		goto release_conn;
2746	}
2747
2748	err = sock_get_port(csk);
2749	if (err)
2750		goto release_conn;
2751
2752	cxgbi_sock_set_state(csk, CTP_CONNECTING);
2753	err = csk->cdev->csk_init_act_open(csk);
2754	if (err)
2755		goto release_conn;
2756
2757	if (cxgbi_sock_is_closing(csk)) {
2758		err = -ENOSPC;
2759		pr_info("csk 0x%p is closing.\n", csk);
2760		goto release_conn;
2761	}
2762
2763	ep = iscsi_create_endpoint(sizeof(*cep));
2764	if (!ep) {
2765		err = -ENOMEM;
2766		pr_info("iscsi alloc ep, OOM.\n");
2767		goto release_conn;
2768	}
2769
2770	cep = ep->dd_data;
2771	cep->csk = csk;
2772	cep->chba = hba;
2773
2774	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
2775		"ep 0x%p, cep 0x%p, csk 0x%p, hba 0x%p,%s.\n",
2776		ep, cep, csk, hba, hba->ndev->name);
2777	return ep;
2778
2779release_conn:
2780	cxgbi_sock_put(csk);
2781	cxgbi_sock_closed(csk);
2782err_out:
2783	return ERR_PTR(err);
2784}
2785EXPORT_SYMBOL_GPL(cxgbi_ep_connect);
2786
2787int cxgbi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
2788{
2789	struct cxgbi_endpoint *cep = ep->dd_data;
2790	struct cxgbi_sock *csk = cep->csk;
2791
2792	if (!cxgbi_sock_is_established(csk))
2793		return 0;
2794	return 1;
2795}
2796EXPORT_SYMBOL_GPL(cxgbi_ep_poll);
2797
2798void cxgbi_ep_disconnect(struct iscsi_endpoint *ep)
2799{
2800	struct cxgbi_endpoint *cep = ep->dd_data;
2801	struct cxgbi_conn *cconn = cep->cconn;
2802	struct cxgbi_sock *csk = cep->csk;
2803
2804	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
2805		"ep 0x%p, cep 0x%p, cconn 0x%p, csk 0x%p,%u,0x%lx.\n",
2806		ep, cep, cconn, csk, csk->state, csk->flags);
2807
2808	if (cconn && cconn->iconn) {
2809		iscsi_suspend_tx(cconn->iconn);
2810		write_lock_bh(&csk->callback_lock);
2811		cep->csk->user_data = NULL;
2812		cconn->cep = NULL;
2813		write_unlock_bh(&csk->callback_lock);
2814	}
2815	iscsi_destroy_endpoint(ep);
2816
2817	if (likely(csk->state >= CTP_ESTABLISHED))
2818		need_active_close(csk);
2819	else
2820		cxgbi_sock_closed(csk);
2821
2822	cxgbi_sock_put(csk);
2823}
2824EXPORT_SYMBOL_GPL(cxgbi_ep_disconnect);
2825
2826int cxgbi_iscsi_init(struct iscsi_transport *itp,
2827			struct scsi_transport_template **stt)
2828{
2829	*stt = iscsi_register_transport(itp);
2830	if (*stt == NULL) {
2831		pr_err("unable to register %s transport 0x%p.\n",
2832			itp->name, itp);
2833		return -ENODEV;
2834	}
2835	log_debug(1 << CXGBI_DBG_ISCSI,
2836		"%s, registered iscsi transport 0x%p.\n",
2837		itp->name, stt);
2838	return 0;
2839}
2840EXPORT_SYMBOL_GPL(cxgbi_iscsi_init);
2841
2842void cxgbi_iscsi_cleanup(struct iscsi_transport *itp,
2843			struct scsi_transport_template **stt)
2844{
2845	if (*stt) {
2846		log_debug(1 << CXGBI_DBG_ISCSI,
2847			"de-register transport 0x%p, %s, stt 0x%p.\n",
2848			itp, itp->name, *stt);
2849		*stt = NULL;
2850		iscsi_unregister_transport(itp);
2851	}
2852}
2853EXPORT_SYMBOL_GPL(cxgbi_iscsi_cleanup);
2854
2855umode_t cxgbi_attr_is_visible(int param_type, int param)
2856{
2857	switch (param_type) {
2858	case ISCSI_HOST_PARAM:
2859		switch (param) {
2860		case ISCSI_HOST_PARAM_NETDEV_NAME:
2861		case ISCSI_HOST_PARAM_HWADDRESS:
2862		case ISCSI_HOST_PARAM_IPADDRESS:
2863		case ISCSI_HOST_PARAM_INITIATOR_NAME:
2864			return S_IRUGO;
2865		default:
2866			return 0;
2867		}
2868	case ISCSI_PARAM:
2869		switch (param) {
2870		case ISCSI_PARAM_MAX_RECV_DLENGTH:
2871		case ISCSI_PARAM_MAX_XMIT_DLENGTH:
2872		case ISCSI_PARAM_HDRDGST_EN:
2873		case ISCSI_PARAM_DATADGST_EN:
2874		case ISCSI_PARAM_CONN_ADDRESS:
2875		case ISCSI_PARAM_CONN_PORT:
2876		case ISCSI_PARAM_EXP_STATSN:
2877		case ISCSI_PARAM_PERSISTENT_ADDRESS:
2878		case ISCSI_PARAM_PERSISTENT_PORT:
2879		case ISCSI_PARAM_PING_TMO:
2880		case ISCSI_PARAM_RECV_TMO:
2881		case ISCSI_PARAM_INITIAL_R2T_EN:
2882		case ISCSI_PARAM_MAX_R2T:
2883		case ISCSI_PARAM_IMM_DATA_EN:
2884		case ISCSI_PARAM_FIRST_BURST:
2885		case ISCSI_PARAM_MAX_BURST:
2886		case ISCSI_PARAM_PDU_INORDER_EN:
2887		case ISCSI_PARAM_DATASEQ_INORDER_EN:
2888		case ISCSI_PARAM_ERL:
2889		case ISCSI_PARAM_TARGET_NAME:
2890		case ISCSI_PARAM_TPGT:
2891		case ISCSI_PARAM_USERNAME:
2892		case ISCSI_PARAM_PASSWORD:
2893		case ISCSI_PARAM_USERNAME_IN:
2894		case ISCSI_PARAM_PASSWORD_IN:
2895		case ISCSI_PARAM_FAST_ABORT:
2896		case ISCSI_PARAM_ABORT_TMO:
2897		case ISCSI_PARAM_LU_RESET_TMO:
2898		case ISCSI_PARAM_TGT_RESET_TMO:
2899		case ISCSI_PARAM_IFACE_NAME:
2900		case ISCSI_PARAM_INITIATOR_NAME:
2901			return S_IRUGO;
2902		default:
2903			return 0;
2904		}
2905	}
2906
2907	return 0;
2908}
2909EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible);
2910
2911static int __init libcxgbi_init_module(void)
2912{
2913	sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
2914	sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
2915
2916	pr_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
2917		ISCSI_ITT_MASK, sw_tag_idx_bits,
2918		ISCSI_AGE_MASK, sw_tag_age_bits);
2919
2920	ddp_setup_host_page_size();
2921	return 0;
2922}
2923
2924static void __exit libcxgbi_exit_module(void)
2925{
2926	cxgbi_device_unregister_all(0xFF);
2927	return;
2928}
2929
2930module_init(libcxgbi_init_module);
2931module_exit(libcxgbi_exit_module);
2932