1/*
2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/module.h>
33#include <linux/list.h>
34#include <linux/workqueue.h>
35#include <linux/skbuff.h>
36#include <linux/timer.h>
37#include <linux/notifier.h>
38#include <linux/inetdevice.h>
39#include <linux/ip.h>
40#include <linux/tcp.h>
41#include <linux/if_vlan.h>
42
43#include <net/neighbour.h>
44#include <net/netevent.h>
45#include <net/route.h>
46#include <net/tcp.h>
47#include <net/ip6_route.h>
48#include <net/addrconf.h>
49
50#include <rdma/ib_addr.h>
51
52#include "iw_cxgb4.h"
53
54static char *states[] = {
55	"idle",
56	"listen",
57	"connecting",
58	"mpa_wait_req",
59	"mpa_req_sent",
60	"mpa_req_rcvd",
61	"mpa_rep_sent",
62	"fpdu_mode",
63	"aborting",
64	"closing",
65	"moribund",
66	"dead",
67	NULL,
68};
69
70static int nocong;
71module_param(nocong, int, 0644);
72MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
73
74static int enable_ecn;
75module_param(enable_ecn, int, 0644);
76MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
77
78static int dack_mode = 1;
79module_param(dack_mode, int, 0644);
80MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
81
82uint c4iw_max_read_depth = 32;
83module_param(c4iw_max_read_depth, int, 0644);
84MODULE_PARM_DESC(c4iw_max_read_depth,
85		 "Per-connection max ORD/IRD (default=32)");
86
87static int enable_tcp_timestamps;
88module_param(enable_tcp_timestamps, int, 0644);
89MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
90
91static int enable_tcp_sack;
92module_param(enable_tcp_sack, int, 0644);
93MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
94
95static int enable_tcp_window_scaling = 1;
96module_param(enable_tcp_window_scaling, int, 0644);
97MODULE_PARM_DESC(enable_tcp_window_scaling,
98		 "Enable tcp window scaling (default=1)");
99
100int c4iw_debug;
101module_param(c4iw_debug, int, 0644);
102MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
103
104static int peer2peer = 1;
105module_param(peer2peer, int, 0644);
106MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
107
108static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
109module_param(p2p_type, int, 0644);
110MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
111			   "1=RDMA_READ 0=RDMA_WRITE (default 1)");
112
113static int ep_timeout_secs = 60;
114module_param(ep_timeout_secs, int, 0644);
115MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
116				   "in seconds (default=60)");
117
118static int mpa_rev = 1;
119module_param(mpa_rev, int, 0644);
120MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
121		"1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
122		" compliant (default=1)");
123
124static int markers_enabled;
125module_param(markers_enabled, int, 0644);
126MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
127
128static int crc_enabled = 1;
129module_param(crc_enabled, int, 0644);
130MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
131
132static int rcv_win = 256 * 1024;
133module_param(rcv_win, int, 0644);
134MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
135
136static int snd_win = 128 * 1024;
137module_param(snd_win, int, 0644);
138MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
139
140static struct workqueue_struct *workq;
141
142static struct sk_buff_head rxq;
143
144static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
145static void ep_timeout(unsigned long arg);
146static void connect_reply_upcall(struct c4iw_ep *ep, int status);
147
148static LIST_HEAD(timeout_list);
149static spinlock_t timeout_lock;
150
151static void deref_qp(struct c4iw_ep *ep)
152{
153	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
154	clear_bit(QP_REFERENCED, &ep->com.flags);
155}
156
157static void ref_qp(struct c4iw_ep *ep)
158{
159	set_bit(QP_REFERENCED, &ep->com.flags);
160	c4iw_qp_add_ref(&ep->com.qp->ibqp);
161}
162
163static void start_ep_timer(struct c4iw_ep *ep)
164{
165	PDBG("%s ep %p\n", __func__, ep);
166	if (timer_pending(&ep->timer)) {
167		pr_err("%s timer already started! ep %p\n",
168		       __func__, ep);
169		return;
170	}
171	clear_bit(TIMEOUT, &ep->com.flags);
172	c4iw_get_ep(&ep->com);
173	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
174	ep->timer.data = (unsigned long)ep;
175	ep->timer.function = ep_timeout;
176	add_timer(&ep->timer);
177}
178
179static int stop_ep_timer(struct c4iw_ep *ep)
180{
181	PDBG("%s ep %p stopping\n", __func__, ep);
182	del_timer_sync(&ep->timer);
183	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
184		c4iw_put_ep(&ep->com);
185		return 0;
186	}
187	return 1;
188}
189
190static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
191		  struct l2t_entry *l2e)
192{
193	int	error = 0;
194
195	if (c4iw_fatal_error(rdev)) {
196		kfree_skb(skb);
197		PDBG("%s - device in error state - dropping\n", __func__);
198		return -EIO;
199	}
200	error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
201	if (error < 0)
202		kfree_skb(skb);
203	return error < 0 ? error : 0;
204}
205
206int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
207{
208	int	error = 0;
209
210	if (c4iw_fatal_error(rdev)) {
211		kfree_skb(skb);
212		PDBG("%s - device in error state - dropping\n", __func__);
213		return -EIO;
214	}
215	error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
216	if (error < 0)
217		kfree_skb(skb);
218	return error < 0 ? error : 0;
219}
220
221static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
222{
223	struct cpl_tid_release *req;
224
225	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
226	if (!skb)
227		return;
228	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
229	INIT_TP_WR(req, hwtid);
230	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
231	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
232	c4iw_ofld_send(rdev, skb);
233	return;
234}
235
236static void set_emss(struct c4iw_ep *ep, u16 opt)
237{
238	ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
239		   ((AF_INET == ep->com.remote_addr.ss_family) ?
240		    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
241		   sizeof(struct tcphdr);
242	ep->mss = ep->emss;
243	if (TCPOPT_TSTAMP_G(opt))
244		ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
245	if (ep->emss < 128)
246		ep->emss = 128;
247	if (ep->emss & 7)
248		PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
249		     TCPOPT_MSS_G(opt), ep->mss, ep->emss);
250	PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
251	     ep->mss, ep->emss);
252}
253
254static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
255{
256	enum c4iw_ep_state state;
257
258	mutex_lock(&epc->mutex);
259	state = epc->state;
260	mutex_unlock(&epc->mutex);
261	return state;
262}
263
264static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
265{
266	epc->state = new;
267}
268
269static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
270{
271	mutex_lock(&epc->mutex);
272	PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
273	__state_set(epc, new);
274	mutex_unlock(&epc->mutex);
275	return;
276}
277
278static void *alloc_ep(int size, gfp_t gfp)
279{
280	struct c4iw_ep_common *epc;
281
282	epc = kzalloc(size, gfp);
283	if (epc) {
284		kref_init(&epc->kref);
285		mutex_init(&epc->mutex);
286		c4iw_init_wr_wait(&epc->wr_wait);
287	}
288	PDBG("%s alloc ep %p\n", __func__, epc);
289	return epc;
290}
291
292void _c4iw_free_ep(struct kref *kref)
293{
294	struct c4iw_ep *ep;
295
296	ep = container_of(kref, struct c4iw_ep, com.kref);
297	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
298	if (test_bit(QP_REFERENCED, &ep->com.flags))
299		deref_qp(ep);
300	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
301		remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
302		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
303		dst_release(ep->dst);
304		cxgb4_l2t_release(ep->l2t);
305	}
306	if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
307		print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
308		iwpm_remove_mapinfo(&ep->com.local_addr,
309				    &ep->com.mapped_local_addr);
310		iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
311	}
312	kfree(ep);
313}
314
315static void release_ep_resources(struct c4iw_ep *ep)
316{
317	set_bit(RELEASE_RESOURCES, &ep->com.flags);
318	c4iw_put_ep(&ep->com);
319}
320
321static int status2errno(int status)
322{
323	switch (status) {
324	case CPL_ERR_NONE:
325		return 0;
326	case CPL_ERR_CONN_RESET:
327		return -ECONNRESET;
328	case CPL_ERR_ARP_MISS:
329		return -EHOSTUNREACH;
330	case CPL_ERR_CONN_TIMEDOUT:
331		return -ETIMEDOUT;
332	case CPL_ERR_TCAM_FULL:
333		return -ENOMEM;
334	case CPL_ERR_CONN_EXIST:
335		return -EADDRINUSE;
336	default:
337		return -EIO;
338	}
339}
340
341/*
342 * Try and reuse skbs already allocated...
343 */
344static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
345{
346	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
347		skb_trim(skb, 0);
348		skb_get(skb);
349		skb_reset_transport_header(skb);
350	} else {
351		skb = alloc_skb(len, gfp);
352	}
353	t4_set_arp_err_handler(skb, NULL, NULL);
354	return skb;
355}
356
357static struct net_device *get_real_dev(struct net_device *egress_dev)
358{
359	return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
360}
361
362static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
363{
364	int i;
365
366	egress_dev = get_real_dev(egress_dev);
367	for (i = 0; i < dev->rdev.lldi.nports; i++)
368		if (dev->rdev.lldi.ports[i] == egress_dev)
369			return 1;
370	return 0;
371}
372
373static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
374				     __u8 *peer_ip, __be16 local_port,
375				     __be16 peer_port, u8 tos,
376				     __u32 sin6_scope_id)
377{
378	struct dst_entry *dst = NULL;
379
380	if (IS_ENABLED(CONFIG_IPV6)) {
381		struct flowi6 fl6;
382
383		memset(&fl6, 0, sizeof(fl6));
384		memcpy(&fl6.daddr, peer_ip, 16);
385		memcpy(&fl6.saddr, local_ip, 16);
386		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
387			fl6.flowi6_oif = sin6_scope_id;
388		dst = ip6_route_output(&init_net, NULL, &fl6);
389		if (!dst)
390			goto out;
391		if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
392		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
393			dst_release(dst);
394			dst = NULL;
395		}
396	}
397
398out:
399	return dst;
400}
401
402static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
403				 __be32 peer_ip, __be16 local_port,
404				 __be16 peer_port, u8 tos)
405{
406	struct rtable *rt;
407	struct flowi4 fl4;
408	struct neighbour *n;
409
410	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
411				   peer_port, local_port, IPPROTO_TCP,
412				   tos, 0);
413	if (IS_ERR(rt))
414		return NULL;
415	n = dst_neigh_lookup(&rt->dst, &peer_ip);
416	if (!n)
417		return NULL;
418	if (!our_interface(dev, n->dev) &&
419	    !(n->dev->flags & IFF_LOOPBACK)) {
420		neigh_release(n);
421		dst_release(&rt->dst);
422		return NULL;
423	}
424	neigh_release(n);
425	return &rt->dst;
426}
427
428static void arp_failure_discard(void *handle, struct sk_buff *skb)
429{
430	PDBG("%s c4iw_dev %p\n", __func__, handle);
431	kfree_skb(skb);
432}
433
434/*
435 * Handle an ARP failure for an active open.
436 */
437static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
438{
439	struct c4iw_ep *ep = handle;
440
441	printk(KERN_ERR MOD "ARP failure duing connect\n");
442	kfree_skb(skb);
443	connect_reply_upcall(ep, -EHOSTUNREACH);
444	state_set(&ep->com, DEAD);
445	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
446	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
447	dst_release(ep->dst);
448	cxgb4_l2t_release(ep->l2t);
449	c4iw_put_ep(&ep->com);
450}
451
452/*
453 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
454 * and send it along.
455 */
456static void abort_arp_failure(void *handle, struct sk_buff *skb)
457{
458	struct c4iw_rdev *rdev = handle;
459	struct cpl_abort_req *req = cplhdr(skb);
460
461	PDBG("%s rdev %p\n", __func__, rdev);
462	req->cmd = CPL_ABORT_NO_RST;
463	c4iw_ofld_send(rdev, skb);
464}
465
466static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
467{
468	unsigned int flowclen = 80;
469	struct fw_flowc_wr *flowc;
470	int i;
471
472	skb = get_skb(skb, flowclen, GFP_KERNEL);
473	flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
474
475	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
476					   FW_FLOWC_WR_NPARAMS_V(8));
477	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
478					  16)) | FW_WR_FLOWID_V(ep->hwtid));
479
480	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
481	flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
482					    (ep->com.dev->rdev.lldi.pf));
483	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
484	flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
485	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
486	flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
487	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
488	flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
489	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
490	flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
491	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
492	flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
493	flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
494	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
495	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
496	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
497	/* Pad WR to 16 byte boundary */
498	flowc->mnemval[8].mnemonic = 0;
499	flowc->mnemval[8].val = 0;
500	for (i = 0; i < 9; i++) {
501		flowc->mnemval[i].r4[0] = 0;
502		flowc->mnemval[i].r4[1] = 0;
503		flowc->mnemval[i].r4[2] = 0;
504	}
505
506	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
507	c4iw_ofld_send(&ep->com.dev->rdev, skb);
508}
509
510static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
511{
512	struct cpl_close_con_req *req;
513	struct sk_buff *skb;
514	int wrlen = roundup(sizeof *req, 16);
515
516	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
517	skb = get_skb(NULL, wrlen, gfp);
518	if (!skb) {
519		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
520		return -ENOMEM;
521	}
522	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
523	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
524	req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
525	memset(req, 0, wrlen);
526	INIT_TP_WR(req, ep->hwtid);
527	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
528						    ep->hwtid));
529	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
530}
531
532static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
533{
534	struct cpl_abort_req *req;
535	int wrlen = roundup(sizeof *req, 16);
536
537	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
538	skb = get_skb(skb, wrlen, gfp);
539	if (!skb) {
540		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
541		       __func__);
542		return -ENOMEM;
543	}
544	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
545	t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
546	req = (struct cpl_abort_req *) skb_put(skb, wrlen);
547	memset(req, 0, wrlen);
548	INIT_TP_WR(req, ep->hwtid);
549	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
550	req->cmd = CPL_ABORT_SEND_RST;
551	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
552}
553
554/*
555 * c4iw_form_pm_msg - Form a port mapper message with mapping info
556 */
557static void c4iw_form_pm_msg(struct c4iw_ep *ep,
558				struct iwpm_sa_data *pm_msg)
559{
560	memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
561		sizeof(ep->com.local_addr));
562	memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
563		sizeof(ep->com.remote_addr));
564}
565
566/*
567 * c4iw_form_reg_msg - Form a port mapper message with dev info
568 */
569static void c4iw_form_reg_msg(struct c4iw_dev *dev,
570				struct iwpm_dev_data *pm_msg)
571{
572	memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
573	memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
574				IWPM_IFNAME_SIZE);
575}
576
577static void c4iw_record_pm_msg(struct c4iw_ep *ep,
578			struct iwpm_sa_data *pm_msg)
579{
580	memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
581		sizeof(ep->com.mapped_local_addr));
582	memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
583		sizeof(ep->com.mapped_remote_addr));
584}
585
586static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep)
587{
588	int ret;
589
590	print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep ");
591	print_addr(&child_ep->com, __func__, "get_remote_addr child_ep ");
592
593	ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr,
594				   &child_ep->com.mapped_remote_addr,
595				   &child_ep->com.remote_addr, RDMA_NL_C4IW);
596	if (ret)
597		PDBG("Unable to find remote peer addr info - err %d\n", ret);
598
599	return ret;
600}
601
602static void best_mtu(const unsigned short *mtus, unsigned short mtu,
603		     unsigned int *idx, int use_ts, int ipv6)
604{
605	unsigned short hdr_size = (ipv6 ?
606				   sizeof(struct ipv6hdr) :
607				   sizeof(struct iphdr)) +
608				  sizeof(struct tcphdr) +
609				  (use_ts ?
610				   round_up(TCPOLEN_TIMESTAMP, 4) : 0);
611	unsigned short data_size = mtu - hdr_size;
612
613	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
614}
615
616static int send_connect(struct c4iw_ep *ep)
617{
618	struct cpl_act_open_req *req;
619	struct cpl_t5_act_open_req *t5_req;
620	struct cpl_act_open_req6 *req6;
621	struct cpl_t5_act_open_req6 *t5_req6;
622	struct sk_buff *skb;
623	u64 opt0;
624	u32 opt2;
625	unsigned int mtu_idx;
626	int wscale;
627	int wrlen;
628	int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
629				sizeof(struct cpl_act_open_req) :
630				sizeof(struct cpl_t5_act_open_req);
631	int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
632				sizeof(struct cpl_act_open_req6) :
633				sizeof(struct cpl_t5_act_open_req6);
634	struct sockaddr_in *la = (struct sockaddr_in *)
635				 &ep->com.mapped_local_addr;
636	struct sockaddr_in *ra = (struct sockaddr_in *)
637				 &ep->com.mapped_remote_addr;
638	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
639				   &ep->com.mapped_local_addr;
640	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
641				   &ep->com.mapped_remote_addr;
642	int win;
643
644	wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
645			roundup(sizev4, 16) :
646			roundup(sizev6, 16);
647
648	PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
649
650	skb = get_skb(NULL, wrlen, GFP_KERNEL);
651	if (!skb) {
652		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
653		       __func__);
654		return -ENOMEM;
655	}
656	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
657
658	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
659		 enable_tcp_timestamps,
660		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
661	wscale = compute_wscale(rcv_win);
662
663	/*
664	 * Specify the largest window that will fit in opt0. The
665	 * remainder will be specified in the rx_data_ack.
666	 */
667	win = ep->rcv_win >> 10;
668	if (win > RCV_BUFSIZ_M)
669		win = RCV_BUFSIZ_M;
670
671	opt0 = (nocong ? NO_CONG_F : 0) |
672	       KEEP_ALIVE_F |
673	       DELACK_F |
674	       WND_SCALE_V(wscale) |
675	       MSS_IDX_V(mtu_idx) |
676	       L2T_IDX_V(ep->l2t->idx) |
677	       TX_CHAN_V(ep->tx_chan) |
678	       SMAC_SEL_V(ep->smac_idx) |
679	       DSCP_V(ep->tos) |
680	       ULP_MODE_V(ULP_MODE_TCPDDP) |
681	       RCV_BUFSIZ_V(win);
682	opt2 = RX_CHANNEL_V(0) |
683	       CCTRL_ECN_V(enable_ecn) |
684	       RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
685	if (enable_tcp_timestamps)
686		opt2 |= TSTAMPS_EN_F;
687	if (enable_tcp_sack)
688		opt2 |= SACK_EN_F;
689	if (wscale && enable_tcp_window_scaling)
690		opt2 |= WND_SCALE_EN_F;
691	if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
692		opt2 |= T5_OPT_2_VALID_F;
693		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
694		opt2 |= T5_ISS_F;
695	}
696	t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
697
698	if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
699		if (ep->com.remote_addr.ss_family == AF_INET) {
700			req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
701			INIT_TP_WR(req, 0);
702			OPCODE_TID(req) = cpu_to_be32(
703					MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
704					((ep->rss_qid << 14) | ep->atid)));
705			req->local_port = la->sin_port;
706			req->peer_port = ra->sin_port;
707			req->local_ip = la->sin_addr.s_addr;
708			req->peer_ip = ra->sin_addr.s_addr;
709			req->opt0 = cpu_to_be64(opt0);
710			req->params = cpu_to_be32(cxgb4_select_ntuple(
711						ep->com.dev->rdev.lldi.ports[0],
712						ep->l2t));
713			req->opt2 = cpu_to_be32(opt2);
714		} else {
715			req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
716
717			INIT_TP_WR(req6, 0);
718			OPCODE_TID(req6) = cpu_to_be32(
719					   MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
720					   ((ep->rss_qid<<14)|ep->atid)));
721			req6->local_port = la6->sin6_port;
722			req6->peer_port = ra6->sin6_port;
723			req6->local_ip_hi = *((__be64 *)
724						(la6->sin6_addr.s6_addr));
725			req6->local_ip_lo = *((__be64 *)
726						(la6->sin6_addr.s6_addr + 8));
727			req6->peer_ip_hi = *((__be64 *)
728						(ra6->sin6_addr.s6_addr));
729			req6->peer_ip_lo = *((__be64 *)
730						(ra6->sin6_addr.s6_addr + 8));
731			req6->opt0 = cpu_to_be64(opt0);
732			req6->params = cpu_to_be32(cxgb4_select_ntuple(
733						ep->com.dev->rdev.lldi.ports[0],
734						ep->l2t));
735			req6->opt2 = cpu_to_be32(opt2);
736		}
737	} else {
738		u32 isn = (prandom_u32() & ~7UL) - 1;
739
740		if (peer2peer)
741			isn += 4;
742
743		if (ep->com.remote_addr.ss_family == AF_INET) {
744			t5_req = (struct cpl_t5_act_open_req *)
745				 skb_put(skb, wrlen);
746			INIT_TP_WR(t5_req, 0);
747			OPCODE_TID(t5_req) = cpu_to_be32(
748					MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
749					((ep->rss_qid << 14) | ep->atid)));
750			t5_req->local_port = la->sin_port;
751			t5_req->peer_port = ra->sin_port;
752			t5_req->local_ip = la->sin_addr.s_addr;
753			t5_req->peer_ip = ra->sin_addr.s_addr;
754			t5_req->opt0 = cpu_to_be64(opt0);
755			t5_req->params = cpu_to_be64(FILTER_TUPLE_V(
756						     cxgb4_select_ntuple(
757					     ep->com.dev->rdev.lldi.ports[0],
758					     ep->l2t)));
759			t5_req->rsvd = cpu_to_be32(isn);
760			PDBG("%s snd_isn %u\n", __func__,
761			     be32_to_cpu(t5_req->rsvd));
762			t5_req->opt2 = cpu_to_be32(opt2);
763		} else {
764			t5_req6 = (struct cpl_t5_act_open_req6 *)
765				  skb_put(skb, wrlen);
766			INIT_TP_WR(t5_req6, 0);
767			OPCODE_TID(t5_req6) = cpu_to_be32(
768					      MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
769					      ((ep->rss_qid<<14)|ep->atid)));
770			t5_req6->local_port = la6->sin6_port;
771			t5_req6->peer_port = ra6->sin6_port;
772			t5_req6->local_ip_hi = *((__be64 *)
773						(la6->sin6_addr.s6_addr));
774			t5_req6->local_ip_lo = *((__be64 *)
775						(la6->sin6_addr.s6_addr + 8));
776			t5_req6->peer_ip_hi = *((__be64 *)
777						(ra6->sin6_addr.s6_addr));
778			t5_req6->peer_ip_lo = *((__be64 *)
779						(ra6->sin6_addr.s6_addr + 8));
780			t5_req6->opt0 = cpu_to_be64(opt0);
781			t5_req6->params = cpu_to_be64(FILTER_TUPLE_V(
782							cxgb4_select_ntuple(
783						ep->com.dev->rdev.lldi.ports[0],
784						ep->l2t)));
785			t5_req6->rsvd = cpu_to_be32(isn);
786			PDBG("%s snd_isn %u\n", __func__,
787			     be32_to_cpu(t5_req6->rsvd));
788			t5_req6->opt2 = cpu_to_be32(opt2);
789		}
790	}
791
792	set_bit(ACT_OPEN_REQ, &ep->com.history);
793	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
794}
795
796static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
797		u8 mpa_rev_to_use)
798{
799	int mpalen, wrlen;
800	struct fw_ofld_tx_data_wr *req;
801	struct mpa_message *mpa;
802	struct mpa_v2_conn_params mpa_v2_params;
803
804	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
805
806	BUG_ON(skb_cloned(skb));
807
808	mpalen = sizeof(*mpa) + ep->plen;
809	if (mpa_rev_to_use == 2)
810		mpalen += sizeof(struct mpa_v2_conn_params);
811	wrlen = roundup(mpalen + sizeof *req, 16);
812	skb = get_skb(skb, wrlen, GFP_KERNEL);
813	if (!skb) {
814		connect_reply_upcall(ep, -ENOMEM);
815		return;
816	}
817	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
818
819	req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
820	memset(req, 0, wrlen);
821	req->op_to_immdlen = cpu_to_be32(
822		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
823		FW_WR_COMPL_F |
824		FW_WR_IMMDLEN_V(mpalen));
825	req->flowid_len16 = cpu_to_be32(
826		FW_WR_FLOWID_V(ep->hwtid) |
827		FW_WR_LEN16_V(wrlen >> 4));
828	req->plen = cpu_to_be32(mpalen);
829	req->tunnel_to_proxy = cpu_to_be32(
830		FW_OFLD_TX_DATA_WR_FLUSH_F |
831		FW_OFLD_TX_DATA_WR_SHOVE_F);
832
833	mpa = (struct mpa_message *)(req + 1);
834	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
835	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
836		     (markers_enabled ? MPA_MARKERS : 0) |
837		     (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
838	mpa->private_data_size = htons(ep->plen);
839	mpa->revision = mpa_rev_to_use;
840	if (mpa_rev_to_use == 1) {
841		ep->tried_with_mpa_v1 = 1;
842		ep->retry_with_mpa_v1 = 0;
843	}
844
845	if (mpa_rev_to_use == 2) {
846		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
847					       sizeof (struct mpa_v2_conn_params));
848		PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
849		     ep->ord);
850		mpa_v2_params.ird = htons((u16)ep->ird);
851		mpa_v2_params.ord = htons((u16)ep->ord);
852
853		if (peer2peer) {
854			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
855			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
856				mpa_v2_params.ord |=
857					htons(MPA_V2_RDMA_WRITE_RTR);
858			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
859				mpa_v2_params.ord |=
860					htons(MPA_V2_RDMA_READ_RTR);
861		}
862		memcpy(mpa->private_data, &mpa_v2_params,
863		       sizeof(struct mpa_v2_conn_params));
864
865		if (ep->plen)
866			memcpy(mpa->private_data +
867			       sizeof(struct mpa_v2_conn_params),
868			       ep->mpa_pkt + sizeof(*mpa), ep->plen);
869	} else
870		if (ep->plen)
871			memcpy(mpa->private_data,
872					ep->mpa_pkt + sizeof(*mpa), ep->plen);
873
874	/*
875	 * Reference the mpa skb.  This ensures the data area
876	 * will remain in memory until the hw acks the tx.
877	 * Function fw4_ack() will deref it.
878	 */
879	skb_get(skb);
880	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
881	BUG_ON(ep->mpa_skb);
882	ep->mpa_skb = skb;
883	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
884	start_ep_timer(ep);
885	__state_set(&ep->com, MPA_REQ_SENT);
886	ep->mpa_attr.initiator = 1;
887	ep->snd_seq += mpalen;
888	return;
889}
890
891static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
892{
893	int mpalen, wrlen;
894	struct fw_ofld_tx_data_wr *req;
895	struct mpa_message *mpa;
896	struct sk_buff *skb;
897	struct mpa_v2_conn_params mpa_v2_params;
898
899	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
900
901	mpalen = sizeof(*mpa) + plen;
902	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
903		mpalen += sizeof(struct mpa_v2_conn_params);
904	wrlen = roundup(mpalen + sizeof *req, 16);
905
906	skb = get_skb(NULL, wrlen, GFP_KERNEL);
907	if (!skb) {
908		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
909		return -ENOMEM;
910	}
911	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
912
913	req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
914	memset(req, 0, wrlen);
915	req->op_to_immdlen = cpu_to_be32(
916		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
917		FW_WR_COMPL_F |
918		FW_WR_IMMDLEN_V(mpalen));
919	req->flowid_len16 = cpu_to_be32(
920		FW_WR_FLOWID_V(ep->hwtid) |
921		FW_WR_LEN16_V(wrlen >> 4));
922	req->plen = cpu_to_be32(mpalen);
923	req->tunnel_to_proxy = cpu_to_be32(
924		FW_OFLD_TX_DATA_WR_FLUSH_F |
925		FW_OFLD_TX_DATA_WR_SHOVE_F);
926
927	mpa = (struct mpa_message *)(req + 1);
928	memset(mpa, 0, sizeof(*mpa));
929	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
930	mpa->flags = MPA_REJECT;
931	mpa->revision = ep->mpa_attr.version;
932	mpa->private_data_size = htons(plen);
933
934	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
935		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
936		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
937					       sizeof (struct mpa_v2_conn_params));
938		mpa_v2_params.ird = htons(((u16)ep->ird) |
939					  (peer2peer ? MPA_V2_PEER2PEER_MODEL :
940					   0));
941		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
942					  (p2p_type ==
943					   FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
944					   MPA_V2_RDMA_WRITE_RTR : p2p_type ==
945					   FW_RI_INIT_P2PTYPE_READ_REQ ?
946					   MPA_V2_RDMA_READ_RTR : 0) : 0));
947		memcpy(mpa->private_data, &mpa_v2_params,
948		       sizeof(struct mpa_v2_conn_params));
949
950		if (ep->plen)
951			memcpy(mpa->private_data +
952			       sizeof(struct mpa_v2_conn_params), pdata, plen);
953	} else
954		if (plen)
955			memcpy(mpa->private_data, pdata, plen);
956
957	/*
958	 * Reference the mpa skb again.  This ensures the data area
959	 * will remain in memory until the hw acks the tx.
960	 * Function fw4_ack() will deref it.
961	 */
962	skb_get(skb);
963	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
964	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
965	BUG_ON(ep->mpa_skb);
966	ep->mpa_skb = skb;
967	ep->snd_seq += mpalen;
968	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
969}
970
971static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
972{
973	int mpalen, wrlen;
974	struct fw_ofld_tx_data_wr *req;
975	struct mpa_message *mpa;
976	struct sk_buff *skb;
977	struct mpa_v2_conn_params mpa_v2_params;
978
979	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
980
981	mpalen = sizeof(*mpa) + plen;
982	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
983		mpalen += sizeof(struct mpa_v2_conn_params);
984	wrlen = roundup(mpalen + sizeof *req, 16);
985
986	skb = get_skb(NULL, wrlen, GFP_KERNEL);
987	if (!skb) {
988		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
989		return -ENOMEM;
990	}
991	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
992
993	req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
994	memset(req, 0, wrlen);
995	req->op_to_immdlen = cpu_to_be32(
996		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
997		FW_WR_COMPL_F |
998		FW_WR_IMMDLEN_V(mpalen));
999	req->flowid_len16 = cpu_to_be32(
1000		FW_WR_FLOWID_V(ep->hwtid) |
1001		FW_WR_LEN16_V(wrlen >> 4));
1002	req->plen = cpu_to_be32(mpalen);
1003	req->tunnel_to_proxy = cpu_to_be32(
1004		FW_OFLD_TX_DATA_WR_FLUSH_F |
1005		FW_OFLD_TX_DATA_WR_SHOVE_F);
1006
1007	mpa = (struct mpa_message *)(req + 1);
1008	memset(mpa, 0, sizeof(*mpa));
1009	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1010	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1011		     (markers_enabled ? MPA_MARKERS : 0);
1012	mpa->revision = ep->mpa_attr.version;
1013	mpa->private_data_size = htons(plen);
1014
1015	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1016		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1017		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1018					       sizeof (struct mpa_v2_conn_params));
1019		mpa_v2_params.ird = htons((u16)ep->ird);
1020		mpa_v2_params.ord = htons((u16)ep->ord);
1021		if (peer2peer && (ep->mpa_attr.p2p_type !=
1022					FW_RI_INIT_P2PTYPE_DISABLED)) {
1023			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1024
1025			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1026				mpa_v2_params.ord |=
1027					htons(MPA_V2_RDMA_WRITE_RTR);
1028			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1029				mpa_v2_params.ord |=
1030					htons(MPA_V2_RDMA_READ_RTR);
1031		}
1032
1033		memcpy(mpa->private_data, &mpa_v2_params,
1034		       sizeof(struct mpa_v2_conn_params));
1035
1036		if (ep->plen)
1037			memcpy(mpa->private_data +
1038			       sizeof(struct mpa_v2_conn_params), pdata, plen);
1039	} else
1040		if (plen)
1041			memcpy(mpa->private_data, pdata, plen);
1042
1043	/*
1044	 * Reference the mpa skb.  This ensures the data area
1045	 * will remain in memory until the hw acks the tx.
1046	 * Function fw4_ack() will deref it.
1047	 */
1048	skb_get(skb);
1049	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1050	ep->mpa_skb = skb;
1051	__state_set(&ep->com, MPA_REP_SENT);
1052	ep->snd_seq += mpalen;
1053	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1054}
1055
1056static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1057{
1058	struct c4iw_ep *ep;
1059	struct cpl_act_establish *req = cplhdr(skb);
1060	unsigned int tid = GET_TID(req);
1061	unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1062	struct tid_info *t = dev->rdev.lldi.tids;
1063
1064	ep = lookup_atid(t, atid);
1065
1066	PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
1067	     be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1068
1069	mutex_lock(&ep->com.mutex);
1070	dst_confirm(ep->dst);
1071
1072	/* setup the hwtid for this connection */
1073	ep->hwtid = tid;
1074	cxgb4_insert_tid(t, ep, tid);
1075	insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
1076
1077	ep->snd_seq = be32_to_cpu(req->snd_isn);
1078	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1079
1080	set_emss(ep, ntohs(req->tcp_opt));
1081
1082	/* dealloc the atid */
1083	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
1084	cxgb4_free_atid(t, atid);
1085	set_bit(ACT_ESTAB, &ep->com.history);
1086
1087	/* start MPA negotiation */
1088	send_flowc(ep, NULL);
1089	if (ep->retry_with_mpa_v1)
1090		send_mpa_req(ep, skb, 1);
1091	else
1092		send_mpa_req(ep, skb, mpa_rev);
1093	mutex_unlock(&ep->com.mutex);
1094	return 0;
1095}
1096
1097static void close_complete_upcall(struct c4iw_ep *ep, int status)
1098{
1099	struct iw_cm_event event;
1100
1101	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1102	memset(&event, 0, sizeof(event));
1103	event.event = IW_CM_EVENT_CLOSE;
1104	event.status = status;
1105	if (ep->com.cm_id) {
1106		PDBG("close complete delivered ep %p cm_id %p tid %u\n",
1107		     ep, ep->com.cm_id, ep->hwtid);
1108		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1109		ep->com.cm_id->rem_ref(ep->com.cm_id);
1110		ep->com.cm_id = NULL;
1111		set_bit(CLOSE_UPCALL, &ep->com.history);
1112	}
1113}
1114
1115static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
1116{
1117	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1118	__state_set(&ep->com, ABORTING);
1119	set_bit(ABORT_CONN, &ep->com.history);
1120	return send_abort(ep, skb, gfp);
1121}
1122
1123static void peer_close_upcall(struct c4iw_ep *ep)
1124{
1125	struct iw_cm_event event;
1126
1127	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1128	memset(&event, 0, sizeof(event));
1129	event.event = IW_CM_EVENT_DISCONNECT;
1130	if (ep->com.cm_id) {
1131		PDBG("peer close delivered ep %p cm_id %p tid %u\n",
1132		     ep, ep->com.cm_id, ep->hwtid);
1133		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1134		set_bit(DISCONN_UPCALL, &ep->com.history);
1135	}
1136}
1137
1138static void peer_abort_upcall(struct c4iw_ep *ep)
1139{
1140	struct iw_cm_event event;
1141
1142	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1143	memset(&event, 0, sizeof(event));
1144	event.event = IW_CM_EVENT_CLOSE;
1145	event.status = -ECONNRESET;
1146	if (ep->com.cm_id) {
1147		PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
1148		     ep->com.cm_id, ep->hwtid);
1149		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1150		ep->com.cm_id->rem_ref(ep->com.cm_id);
1151		ep->com.cm_id = NULL;
1152		set_bit(ABORT_UPCALL, &ep->com.history);
1153	}
1154}
1155
1156static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1157{
1158	struct iw_cm_event event;
1159
1160	PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
1161	memset(&event, 0, sizeof(event));
1162	event.event = IW_CM_EVENT_CONNECT_REPLY;
1163	event.status = status;
1164	memcpy(&event.local_addr, &ep->com.local_addr,
1165	       sizeof(ep->com.local_addr));
1166	memcpy(&event.remote_addr, &ep->com.remote_addr,
1167	       sizeof(ep->com.remote_addr));
1168
1169	if ((status == 0) || (status == -ECONNREFUSED)) {
1170		if (!ep->tried_with_mpa_v1) {
1171			/* this means MPA_v2 is used */
1172			event.private_data_len = ep->plen -
1173				sizeof(struct mpa_v2_conn_params);
1174			event.private_data = ep->mpa_pkt +
1175				sizeof(struct mpa_message) +
1176				sizeof(struct mpa_v2_conn_params);
1177		} else {
1178			/* this means MPA_v1 is used */
1179			event.private_data_len = ep->plen;
1180			event.private_data = ep->mpa_pkt +
1181				sizeof(struct mpa_message);
1182		}
1183	}
1184
1185	PDBG("%s ep %p tid %u status %d\n", __func__, ep,
1186	     ep->hwtid, status);
1187	set_bit(CONN_RPL_UPCALL, &ep->com.history);
1188	ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1189
1190	if (status < 0) {
1191		ep->com.cm_id->rem_ref(ep->com.cm_id);
1192		ep->com.cm_id = NULL;
1193	}
1194}
1195
1196static int connect_request_upcall(struct c4iw_ep *ep)
1197{
1198	struct iw_cm_event event;
1199	int ret;
1200
1201	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1202	memset(&event, 0, sizeof(event));
1203	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1204	memcpy(&event.local_addr, &ep->com.local_addr,
1205	       sizeof(ep->com.local_addr));
1206	memcpy(&event.remote_addr, &ep->com.remote_addr,
1207	       sizeof(ep->com.remote_addr));
1208	event.provider_data = ep;
1209	if (!ep->tried_with_mpa_v1) {
1210		/* this means MPA_v2 is used */
1211		event.ord = ep->ord;
1212		event.ird = ep->ird;
1213		event.private_data_len = ep->plen -
1214			sizeof(struct mpa_v2_conn_params);
1215		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1216			sizeof(struct mpa_v2_conn_params);
1217	} else {
1218		/* this means MPA_v1 is used. Send max supported */
1219		event.ord = cur_max_read_depth(ep->com.dev);
1220		event.ird = cur_max_read_depth(ep->com.dev);
1221		event.private_data_len = ep->plen;
1222		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1223	}
1224	c4iw_get_ep(&ep->com);
1225	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1226						      &event);
1227	if (ret)
1228		c4iw_put_ep(&ep->com);
1229	set_bit(CONNREQ_UPCALL, &ep->com.history);
1230	c4iw_put_ep(&ep->parent_ep->com);
1231	return ret;
1232}
1233
1234static void established_upcall(struct c4iw_ep *ep)
1235{
1236	struct iw_cm_event event;
1237
1238	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1239	memset(&event, 0, sizeof(event));
1240	event.event = IW_CM_EVENT_ESTABLISHED;
1241	event.ird = ep->ird;
1242	event.ord = ep->ord;
1243	if (ep->com.cm_id) {
1244		PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1245		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1246		set_bit(ESTAB_UPCALL, &ep->com.history);
1247	}
1248}
1249
1250static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1251{
1252	struct cpl_rx_data_ack *req;
1253	struct sk_buff *skb;
1254	int wrlen = roundup(sizeof *req, 16);
1255
1256	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
1257	skb = get_skb(NULL, wrlen, GFP_KERNEL);
1258	if (!skb) {
1259		printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
1260		return 0;
1261	}
1262
1263	/*
1264	 * If we couldn't specify the entire rcv window at connection setup
1265	 * due to the limit in the number of bits in the RCV_BUFSIZ field,
1266	 * then add the overage in to the credits returned.
1267	 */
1268	if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1269		credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1270
1271	req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
1272	memset(req, 0, wrlen);
1273	INIT_TP_WR(req, ep->hwtid);
1274	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1275						    ep->hwtid));
1276	req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
1277				       RX_DACK_CHANGE_F |
1278				       RX_DACK_MODE_V(dack_mode));
1279	set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
1280	c4iw_ofld_send(&ep->com.dev->rdev, skb);
1281	return credits;
1282}
1283
1284#define RELAXED_IRD_NEGOTIATION 1
1285
1286static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1287{
1288	struct mpa_message *mpa;
1289	struct mpa_v2_conn_params *mpa_v2_params;
1290	u16 plen;
1291	u16 resp_ird, resp_ord;
1292	u8 rtr_mismatch = 0, insuff_ird = 0;
1293	struct c4iw_qp_attributes attrs;
1294	enum c4iw_qp_attr_mask mask;
1295	int err;
1296	int disconnect = 0;
1297
1298	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1299
1300	/*
1301	 * Stop mpa timer.  If it expired, then
1302	 * we ignore the MPA reply.  process_timeout()
1303	 * will abort the connection.
1304	 */
1305	if (stop_ep_timer(ep))
1306		return 0;
1307
1308	/*
1309	 * If we get more than the supported amount of private data
1310	 * then we must fail this connection.
1311	 */
1312	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1313		err = -EINVAL;
1314		goto err;
1315	}
1316
1317	/*
1318	 * copy the new data into our accumulation buffer.
1319	 */
1320	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1321				  skb->len);
1322	ep->mpa_pkt_len += skb->len;
1323
1324	/*
1325	 * if we don't even have the mpa message, then bail.
1326	 */
1327	if (ep->mpa_pkt_len < sizeof(*mpa))
1328		return 0;
1329	mpa = (struct mpa_message *) ep->mpa_pkt;
1330
1331	/* Validate MPA header. */
1332	if (mpa->revision > mpa_rev) {
1333		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1334		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
1335		err = -EPROTO;
1336		goto err;
1337	}
1338	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1339		err = -EPROTO;
1340		goto err;
1341	}
1342
1343	plen = ntohs(mpa->private_data_size);
1344
1345	/*
1346	 * Fail if there's too much private data.
1347	 */
1348	if (plen > MPA_MAX_PRIVATE_DATA) {
1349		err = -EPROTO;
1350		goto err;
1351	}
1352
1353	/*
1354	 * If plen does not account for pkt size
1355	 */
1356	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1357		err = -EPROTO;
1358		goto err;
1359	}
1360
1361	ep->plen = (u8) plen;
1362
1363	/*
1364	 * If we don't have all the pdata yet, then bail.
1365	 * We'll continue process when more data arrives.
1366	 */
1367	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1368		return 0;
1369
1370	if (mpa->flags & MPA_REJECT) {
1371		err = -ECONNREFUSED;
1372		goto err;
1373	}
1374
1375	/*
1376	 * If we get here we have accumulated the entire mpa
1377	 * start reply message including private data. And
1378	 * the MPA header is valid.
1379	 */
1380	__state_set(&ep->com, FPDU_MODE);
1381	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1382	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1383	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1384	ep->mpa_attr.version = mpa->revision;
1385	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1386
1387	if (mpa->revision == 2) {
1388		ep->mpa_attr.enhanced_rdma_conn =
1389			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1390		if (ep->mpa_attr.enhanced_rdma_conn) {
1391			mpa_v2_params = (struct mpa_v2_conn_params *)
1392				(ep->mpa_pkt + sizeof(*mpa));
1393			resp_ird = ntohs(mpa_v2_params->ird) &
1394				MPA_V2_IRD_ORD_MASK;
1395			resp_ord = ntohs(mpa_v2_params->ord) &
1396				MPA_V2_IRD_ORD_MASK;
1397			PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
1398			     __func__, resp_ird, resp_ord, ep->ird, ep->ord);
1399
1400			/*
1401			 * This is a double-check. Ideally, below checks are
1402			 * not required since ird/ord stuff has been taken
1403			 * care of in c4iw_accept_cr
1404			 */
1405			if (ep->ird < resp_ord) {
1406				if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1407				    ep->com.dev->rdev.lldi.max_ordird_qp)
1408					ep->ird = resp_ord;
1409				else
1410					insuff_ird = 1;
1411			} else if (ep->ird > resp_ord) {
1412				ep->ird = resp_ord;
1413			}
1414			if (ep->ord > resp_ird) {
1415				if (RELAXED_IRD_NEGOTIATION)
1416					ep->ord = resp_ird;
1417				else
1418					insuff_ird = 1;
1419			}
1420			if (insuff_ird) {
1421				err = -ENOMEM;
1422				ep->ird = resp_ord;
1423				ep->ord = resp_ird;
1424			}
1425
1426			if (ntohs(mpa_v2_params->ird) &
1427					MPA_V2_PEER2PEER_MODEL) {
1428				if (ntohs(mpa_v2_params->ord) &
1429						MPA_V2_RDMA_WRITE_RTR)
1430					ep->mpa_attr.p2p_type =
1431						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1432				else if (ntohs(mpa_v2_params->ord) &
1433						MPA_V2_RDMA_READ_RTR)
1434					ep->mpa_attr.p2p_type =
1435						FW_RI_INIT_P2PTYPE_READ_REQ;
1436			}
1437		}
1438	} else if (mpa->revision == 1)
1439		if (peer2peer)
1440			ep->mpa_attr.p2p_type = p2p_type;
1441
1442	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1443	     "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
1444	     "%d\n", __func__, ep->mpa_attr.crc_enabled,
1445	     ep->mpa_attr.recv_marker_enabled,
1446	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1447	     ep->mpa_attr.p2p_type, p2p_type);
1448
1449	/*
1450	 * If responder's RTR does not match with that of initiator, assign
1451	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1452	 * generated when moving QP to RTS state.
1453	 * A TERM message will be sent after QP has moved to RTS state
1454	 */
1455	if ((ep->mpa_attr.version == 2) && peer2peer &&
1456			(ep->mpa_attr.p2p_type != p2p_type)) {
1457		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1458		rtr_mismatch = 1;
1459	}
1460
1461	attrs.mpa_attr = ep->mpa_attr;
1462	attrs.max_ird = ep->ird;
1463	attrs.max_ord = ep->ord;
1464	attrs.llp_stream_handle = ep;
1465	attrs.next_state = C4IW_QP_STATE_RTS;
1466
1467	mask = C4IW_QP_ATTR_NEXT_STATE |
1468	    C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1469	    C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1470
1471	/* bind QP and TID with INIT_WR */
1472	err = c4iw_modify_qp(ep->com.qp->rhp,
1473			     ep->com.qp, mask, &attrs, 1);
1474	if (err)
1475		goto err;
1476
1477	/*
1478	 * If responder's RTR requirement did not match with what initiator
1479	 * supports, generate TERM message
1480	 */
1481	if (rtr_mismatch) {
1482		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1483		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1484		attrs.ecode = MPA_NOMATCH_RTR;
1485		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1486		attrs.send_term = 1;
1487		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1488				C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1489		err = -ENOMEM;
1490		disconnect = 1;
1491		goto out;
1492	}
1493
1494	/*
1495	 * Generate TERM if initiator IRD is not sufficient for responder
1496	 * provided ORD. Currently, we do the same behaviour even when
1497	 * responder provided IRD is also not sufficient as regards to
1498	 * initiator ORD.
1499	 */
1500	if (insuff_ird) {
1501		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1502				__func__);
1503		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1504		attrs.ecode = MPA_INSUFF_IRD;
1505		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1506		attrs.send_term = 1;
1507		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1508				C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1509		err = -ENOMEM;
1510		disconnect = 1;
1511		goto out;
1512	}
1513	goto out;
1514err:
1515	__state_set(&ep->com, ABORTING);
1516	send_abort(ep, skb, GFP_KERNEL);
1517out:
1518	connect_reply_upcall(ep, err);
1519	return disconnect;
1520}
1521
1522static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1523{
1524	struct mpa_message *mpa;
1525	struct mpa_v2_conn_params *mpa_v2_params;
1526	u16 plen;
1527
1528	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1529
1530	/*
1531	 * If we get more than the supported amount of private data
1532	 * then we must fail this connection.
1533	 */
1534	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1535		(void)stop_ep_timer(ep);
1536		abort_connection(ep, skb, GFP_KERNEL);
1537		return;
1538	}
1539
1540	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1541
1542	/*
1543	 * Copy the new data into our accumulation buffer.
1544	 */
1545	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1546				  skb->len);
1547	ep->mpa_pkt_len += skb->len;
1548
1549	/*
1550	 * If we don't even have the mpa message, then bail.
1551	 * We'll continue process when more data arrives.
1552	 */
1553	if (ep->mpa_pkt_len < sizeof(*mpa))
1554		return;
1555
1556	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1557	mpa = (struct mpa_message *) ep->mpa_pkt;
1558
1559	/*
1560	 * Validate MPA Header.
1561	 */
1562	if (mpa->revision > mpa_rev) {
1563		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1564		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
1565		(void)stop_ep_timer(ep);
1566		abort_connection(ep, skb, GFP_KERNEL);
1567		return;
1568	}
1569
1570	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
1571		(void)stop_ep_timer(ep);
1572		abort_connection(ep, skb, GFP_KERNEL);
1573		return;
1574	}
1575
1576	plen = ntohs(mpa->private_data_size);
1577
1578	/*
1579	 * Fail if there's too much private data.
1580	 */
1581	if (plen > MPA_MAX_PRIVATE_DATA) {
1582		(void)stop_ep_timer(ep);
1583		abort_connection(ep, skb, GFP_KERNEL);
1584		return;
1585	}
1586
1587	/*
1588	 * If plen does not account for pkt size
1589	 */
1590	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1591		(void)stop_ep_timer(ep);
1592		abort_connection(ep, skb, GFP_KERNEL);
1593		return;
1594	}
1595	ep->plen = (u8) plen;
1596
1597	/*
1598	 * If we don't have all the pdata yet, then bail.
1599	 */
1600	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1601		return;
1602
1603	/*
1604	 * If we get here we have accumulated the entire mpa
1605	 * start reply message including private data.
1606	 */
1607	ep->mpa_attr.initiator = 0;
1608	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1609	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1610	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1611	ep->mpa_attr.version = mpa->revision;
1612	if (mpa->revision == 1)
1613		ep->tried_with_mpa_v1 = 1;
1614	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1615
1616	if (mpa->revision == 2) {
1617		ep->mpa_attr.enhanced_rdma_conn =
1618			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1619		if (ep->mpa_attr.enhanced_rdma_conn) {
1620			mpa_v2_params = (struct mpa_v2_conn_params *)
1621				(ep->mpa_pkt + sizeof(*mpa));
1622			ep->ird = ntohs(mpa_v2_params->ird) &
1623				MPA_V2_IRD_ORD_MASK;
1624			ep->ord = ntohs(mpa_v2_params->ord) &
1625				MPA_V2_IRD_ORD_MASK;
1626			PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
1627			     ep->ord);
1628			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1629				if (peer2peer) {
1630					if (ntohs(mpa_v2_params->ord) &
1631							MPA_V2_RDMA_WRITE_RTR)
1632						ep->mpa_attr.p2p_type =
1633						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1634					else if (ntohs(mpa_v2_params->ord) &
1635							MPA_V2_RDMA_READ_RTR)
1636						ep->mpa_attr.p2p_type =
1637						FW_RI_INIT_P2PTYPE_READ_REQ;
1638				}
1639		}
1640	} else if (mpa->revision == 1)
1641		if (peer2peer)
1642			ep->mpa_attr.p2p_type = p2p_type;
1643
1644	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1645	     "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1646	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1647	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1648	     ep->mpa_attr.p2p_type);
1649
1650	/*
1651	 * If the endpoint timer already expired, then we ignore
1652	 * the start request.  process_timeout() will abort
1653	 * the connection.
1654	 */
1655	if (!stop_ep_timer(ep)) {
1656		__state_set(&ep->com, MPA_REQ_RCVD);
1657
1658		/* drive upcall */
1659		mutex_lock_nested(&ep->parent_ep->com.mutex,
1660				  SINGLE_DEPTH_NESTING);
1661		if (ep->parent_ep->com.state != DEAD) {
1662			if (connect_request_upcall(ep))
1663				abort_connection(ep, skb, GFP_KERNEL);
1664		} else {
1665			abort_connection(ep, skb, GFP_KERNEL);
1666		}
1667		mutex_unlock(&ep->parent_ep->com.mutex);
1668	}
1669	return;
1670}
1671
1672static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1673{
1674	struct c4iw_ep *ep;
1675	struct cpl_rx_data *hdr = cplhdr(skb);
1676	unsigned int dlen = ntohs(hdr->len);
1677	unsigned int tid = GET_TID(hdr);
1678	struct tid_info *t = dev->rdev.lldi.tids;
1679	__u8 status = hdr->status;
1680	int disconnect = 0;
1681
1682	ep = lookup_tid(t, tid);
1683	if (!ep)
1684		return 0;
1685	PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
1686	skb_pull(skb, sizeof(*hdr));
1687	skb_trim(skb, dlen);
1688	mutex_lock(&ep->com.mutex);
1689
1690	/* update RX credits */
1691	update_rx_credits(ep, dlen);
1692
1693	switch (ep->com.state) {
1694	case MPA_REQ_SENT:
1695		ep->rcv_seq += dlen;
1696		disconnect = process_mpa_reply(ep, skb);
1697		break;
1698	case MPA_REQ_WAIT:
1699		ep->rcv_seq += dlen;
1700		process_mpa_request(ep, skb);
1701		break;
1702	case FPDU_MODE: {
1703		struct c4iw_qp_attributes attrs;
1704		BUG_ON(!ep->com.qp);
1705		if (status)
1706			pr_err("%s Unexpected streaming data." \
1707			       " qpid %u ep %p state %d tid %u status %d\n",
1708			       __func__, ep->com.qp->wq.sq.qid, ep,
1709			       ep->com.state, ep->hwtid, status);
1710		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1711		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1712			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1713		disconnect = 1;
1714		break;
1715	}
1716	default:
1717		break;
1718	}
1719	mutex_unlock(&ep->com.mutex);
1720	if (disconnect)
1721		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1722	return 0;
1723}
1724
1725static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1726{
1727	struct c4iw_ep *ep;
1728	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
1729	int release = 0;
1730	unsigned int tid = GET_TID(rpl);
1731	struct tid_info *t = dev->rdev.lldi.tids;
1732
1733	ep = lookup_tid(t, tid);
1734	if (!ep) {
1735		printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1736		return 0;
1737	}
1738	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1739	mutex_lock(&ep->com.mutex);
1740	switch (ep->com.state) {
1741	case ABORTING:
1742		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1743		__state_set(&ep->com, DEAD);
1744		release = 1;
1745		break;
1746	default:
1747		printk(KERN_ERR "%s ep %p state %d\n",
1748		     __func__, ep, ep->com.state);
1749		break;
1750	}
1751	mutex_unlock(&ep->com.mutex);
1752
1753	if (release)
1754		release_ep_resources(ep);
1755	return 0;
1756}
1757
1758static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1759{
1760	struct sk_buff *skb;
1761	struct fw_ofld_connection_wr *req;
1762	unsigned int mtu_idx;
1763	int wscale;
1764	struct sockaddr_in *sin;
1765	int win;
1766
1767	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1768	req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
1769	memset(req, 0, sizeof(*req));
1770	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1771	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1772	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1773				     ep->com.dev->rdev.lldi.ports[0],
1774				     ep->l2t));
1775	sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
1776	req->le.lport = sin->sin_port;
1777	req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1778	sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
1779	req->le.pport = sin->sin_port;
1780	req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1781	req->tcb.t_state_to_astid =
1782			htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1783			FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1784	req->tcb.cplrxdataack_cplpassacceptrpl =
1785			htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1786	req->tcb.tx_max = (__force __be32) jiffies;
1787	req->tcb.rcv_adv = htons(1);
1788	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1789		 enable_tcp_timestamps,
1790		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
1791	wscale = compute_wscale(rcv_win);
1792
1793	/*
1794	 * Specify the largest window that will fit in opt0. The
1795	 * remainder will be specified in the rx_data_ack.
1796	 */
1797	win = ep->rcv_win >> 10;
1798	if (win > RCV_BUFSIZ_M)
1799		win = RCV_BUFSIZ_M;
1800
1801	req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
1802		(nocong ? NO_CONG_F : 0) |
1803		KEEP_ALIVE_F |
1804		DELACK_F |
1805		WND_SCALE_V(wscale) |
1806		MSS_IDX_V(mtu_idx) |
1807		L2T_IDX_V(ep->l2t->idx) |
1808		TX_CHAN_V(ep->tx_chan) |
1809		SMAC_SEL_V(ep->smac_idx) |
1810		DSCP_V(ep->tos) |
1811		ULP_MODE_V(ULP_MODE_TCPDDP) |
1812		RCV_BUFSIZ_V(win));
1813	req->tcb.opt2 = (__force __be32) (PACE_V(1) |
1814		TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
1815		RX_CHANNEL_V(0) |
1816		CCTRL_ECN_V(enable_ecn) |
1817		RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
1818	if (enable_tcp_timestamps)
1819		req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
1820	if (enable_tcp_sack)
1821		req->tcb.opt2 |= (__force __be32)SACK_EN_F;
1822	if (wscale && enable_tcp_window_scaling)
1823		req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
1824	req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
1825	req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
1826	set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
1827	set_bit(ACT_OFLD_CONN, &ep->com.history);
1828	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1829}
1830
1831/*
1832 * Return whether a failed active open has allocated a TID
1833 */
1834static inline int act_open_has_tid(int status)
1835{
1836	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1837	       status != CPL_ERR_ARP_MISS;
1838}
1839
1840/* Returns whether a CPL status conveys negative advice.
1841 */
1842static int is_neg_adv(unsigned int status)
1843{
1844	return status == CPL_ERR_RTX_NEG_ADVICE ||
1845	       status == CPL_ERR_PERSIST_NEG_ADVICE ||
1846	       status == CPL_ERR_KEEPALV_NEG_ADVICE;
1847}
1848
1849static char *neg_adv_str(unsigned int status)
1850{
1851	switch (status) {
1852	case CPL_ERR_RTX_NEG_ADVICE:
1853		return "Retransmit timeout";
1854	case CPL_ERR_PERSIST_NEG_ADVICE:
1855		return "Persist timeout";
1856	case CPL_ERR_KEEPALV_NEG_ADVICE:
1857		return "Keepalive timeout";
1858	default:
1859		return "Unknown";
1860	}
1861}
1862
1863static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
1864{
1865	ep->snd_win = snd_win;
1866	ep->rcv_win = rcv_win;
1867	PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
1868}
1869
1870#define ACT_OPEN_RETRY_COUNT 2
1871
1872static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1873		     struct dst_entry *dst, struct c4iw_dev *cdev,
1874		     bool clear_mpa_v1)
1875{
1876	struct neighbour *n;
1877	int err, step;
1878	struct net_device *pdev;
1879
1880	n = dst_neigh_lookup(dst, peer_ip);
1881	if (!n)
1882		return -ENODEV;
1883
1884	rcu_read_lock();
1885	err = -ENOMEM;
1886	if (n->dev->flags & IFF_LOOPBACK) {
1887		if (iptype == 4)
1888			pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
1889		else if (IS_ENABLED(CONFIG_IPV6))
1890			for_each_netdev(&init_net, pdev) {
1891				if (ipv6_chk_addr(&init_net,
1892						  (struct in6_addr *)peer_ip,
1893						  pdev, 1))
1894					break;
1895			}
1896		else
1897			pdev = NULL;
1898
1899		if (!pdev) {
1900			err = -ENODEV;
1901			goto out;
1902		}
1903		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1904					n, pdev, 0);
1905		if (!ep->l2t)
1906			goto out;
1907		ep->mtu = pdev->mtu;
1908		ep->tx_chan = cxgb4_port_chan(pdev);
1909		ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1910		step = cdev->rdev.lldi.ntxq /
1911			cdev->rdev.lldi.nchan;
1912		ep->txq_idx = cxgb4_port_idx(pdev) * step;
1913		step = cdev->rdev.lldi.nrxq /
1914			cdev->rdev.lldi.nchan;
1915		ep->ctrlq_idx = cxgb4_port_idx(pdev);
1916		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1917			cxgb4_port_idx(pdev) * step];
1918		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
1919		dev_put(pdev);
1920	} else {
1921		pdev = get_real_dev(n->dev);
1922		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1923					n, pdev, 0);
1924		if (!ep->l2t)
1925			goto out;
1926		ep->mtu = dst_mtu(dst);
1927		ep->tx_chan = cxgb4_port_chan(pdev);
1928		ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1929		step = cdev->rdev.lldi.ntxq /
1930			cdev->rdev.lldi.nchan;
1931		ep->txq_idx = cxgb4_port_idx(pdev) * step;
1932		ep->ctrlq_idx = cxgb4_port_idx(pdev);
1933		step = cdev->rdev.lldi.nrxq /
1934			cdev->rdev.lldi.nchan;
1935		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1936			cxgb4_port_idx(pdev) * step];
1937		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
1938
1939		if (clear_mpa_v1) {
1940			ep->retry_with_mpa_v1 = 0;
1941			ep->tried_with_mpa_v1 = 0;
1942		}
1943	}
1944	err = 0;
1945out:
1946	rcu_read_unlock();
1947
1948	neigh_release(n);
1949
1950	return err;
1951}
1952
1953static int c4iw_reconnect(struct c4iw_ep *ep)
1954{
1955	int err = 0;
1956	struct sockaddr_in *laddr = (struct sockaddr_in *)
1957				    &ep->com.cm_id->local_addr;
1958	struct sockaddr_in *raddr = (struct sockaddr_in *)
1959				    &ep->com.cm_id->remote_addr;
1960	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
1961				      &ep->com.cm_id->local_addr;
1962	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
1963				      &ep->com.cm_id->remote_addr;
1964	int iptype;
1965	__u8 *ra;
1966
1967	PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1968	init_timer(&ep->timer);
1969
1970	/*
1971	 * Allocate an active TID to initiate a TCP connection.
1972	 */
1973	ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1974	if (ep->atid == -1) {
1975		pr_err("%s - cannot alloc atid.\n", __func__);
1976		err = -ENOMEM;
1977		goto fail2;
1978	}
1979	insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
1980
1981	/* find a route */
1982	if (ep->com.cm_id->local_addr.ss_family == AF_INET) {
1983		ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
1984				     raddr->sin_addr.s_addr, laddr->sin_port,
1985				     raddr->sin_port, 0);
1986		iptype = 4;
1987		ra = (__u8 *)&raddr->sin_addr;
1988	} else {
1989		ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
1990				      raddr6->sin6_addr.s6_addr,
1991				      laddr6->sin6_port, raddr6->sin6_port, 0,
1992				      raddr6->sin6_scope_id);
1993		iptype = 6;
1994		ra = (__u8 *)&raddr6->sin6_addr;
1995	}
1996	if (!ep->dst) {
1997		pr_err("%s - cannot find route.\n", __func__);
1998		err = -EHOSTUNREACH;
1999		goto fail3;
2000	}
2001	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
2002	if (err) {
2003		pr_err("%s - cannot alloc l2e.\n", __func__);
2004		goto fail4;
2005	}
2006
2007	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2008	     __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2009	     ep->l2t->idx);
2010
2011	state_set(&ep->com, CONNECTING);
2012	ep->tos = 0;
2013
2014	/* send connect request to rnic */
2015	err = send_connect(ep);
2016	if (!err)
2017		goto out;
2018
2019	cxgb4_l2t_release(ep->l2t);
2020fail4:
2021	dst_release(ep->dst);
2022fail3:
2023	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
2024	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2025fail2:
2026	/*
2027	 * remember to send notification to upper layer.
2028	 * We are in here so the upper layer is not aware that this is
2029	 * re-connect attempt and so, upper layer is still waiting for
2030	 * response of 1st connect request.
2031	 */
2032	connect_reply_upcall(ep, -ECONNRESET);
2033	c4iw_put_ep(&ep->com);
2034out:
2035	return err;
2036}
2037
2038static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2039{
2040	struct c4iw_ep *ep;
2041	struct cpl_act_open_rpl *rpl = cplhdr(skb);
2042	unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2043				      ntohl(rpl->atid_status)));
2044	struct tid_info *t = dev->rdev.lldi.tids;
2045	int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2046	struct sockaddr_in *la;
2047	struct sockaddr_in *ra;
2048	struct sockaddr_in6 *la6;
2049	struct sockaddr_in6 *ra6;
2050
2051	ep = lookup_atid(t, atid);
2052	la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
2053	ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
2054	la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
2055	ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
2056
2057	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
2058	     status, status2errno(status));
2059
2060	if (is_neg_adv(status)) {
2061		PDBG("%s Connection problems for atid %u status %u (%s)\n",
2062		     __func__, atid, status, neg_adv_str(status));
2063		ep->stats.connect_neg_adv++;
2064		mutex_lock(&dev->rdev.stats.lock);
2065		dev->rdev.stats.neg_adv++;
2066		mutex_unlock(&dev->rdev.stats.lock);
2067		return 0;
2068	}
2069
2070	set_bit(ACT_OPEN_RPL, &ep->com.history);
2071
2072	/*
2073	 * Log interesting failures.
2074	 */
2075	switch (status) {
2076	case CPL_ERR_CONN_RESET:
2077	case CPL_ERR_CONN_TIMEDOUT:
2078		break;
2079	case CPL_ERR_TCAM_FULL:
2080		mutex_lock(&dev->rdev.stats.lock);
2081		dev->rdev.stats.tcam_full++;
2082		mutex_unlock(&dev->rdev.stats.lock);
2083		if (ep->com.local_addr.ss_family == AF_INET &&
2084		    dev->rdev.lldi.enable_fw_ofld_conn) {
2085			send_fw_act_open_req(ep,
2086					     TID_TID_G(AOPEN_ATID_G(
2087					     ntohl(rpl->atid_status))));
2088			return 0;
2089		}
2090		break;
2091	case CPL_ERR_CONN_EXIST:
2092		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2093			set_bit(ACT_RETRY_INUSE, &ep->com.history);
2094			remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
2095					atid);
2096			cxgb4_free_atid(t, atid);
2097			dst_release(ep->dst);
2098			cxgb4_l2t_release(ep->l2t);
2099			c4iw_reconnect(ep);
2100			return 0;
2101		}
2102		break;
2103	default:
2104		if (ep->com.local_addr.ss_family == AF_INET) {
2105			pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2106				atid, status, status2errno(status),
2107				&la->sin_addr.s_addr, ntohs(la->sin_port),
2108				&ra->sin_addr.s_addr, ntohs(ra->sin_port));
2109		} else {
2110			pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2111				atid, status, status2errno(status),
2112				la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2113				ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2114		}
2115		break;
2116	}
2117
2118	connect_reply_upcall(ep, status2errno(status));
2119	state_set(&ep->com, DEAD);
2120
2121	if (status && act_open_has_tid(status))
2122		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
2123
2124	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
2125	cxgb4_free_atid(t, atid);
2126	dst_release(ep->dst);
2127	cxgb4_l2t_release(ep->l2t);
2128	c4iw_put_ep(&ep->com);
2129
2130	return 0;
2131}
2132
2133static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2134{
2135	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2136	struct tid_info *t = dev->rdev.lldi.tids;
2137	unsigned int stid = GET_TID(rpl);
2138	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
2139
2140	if (!ep) {
2141		PDBG("%s stid %d lookup failure!\n", __func__, stid);
2142		goto out;
2143	}
2144	PDBG("%s ep %p status %d error %d\n", __func__, ep,
2145	     rpl->status, status2errno(rpl->status));
2146	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
2147
2148out:
2149	return 0;
2150}
2151
2152static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2153{
2154	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2155	struct tid_info *t = dev->rdev.lldi.tids;
2156	unsigned int stid = GET_TID(rpl);
2157	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
2158
2159	PDBG("%s ep %p\n", __func__, ep);
2160	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
2161	return 0;
2162}
2163
2164static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2165		      struct cpl_pass_accept_req *req)
2166{
2167	struct cpl_pass_accept_rpl *rpl;
2168	unsigned int mtu_idx;
2169	u64 opt0;
2170	u32 opt2;
2171	int wscale;
2172	struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2173	int win;
2174
2175	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2176	BUG_ON(skb_cloned(skb));
2177
2178	skb_get(skb);
2179	rpl = cplhdr(skb);
2180	if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
2181		skb_trim(skb, roundup(sizeof(*rpl5), 16));
2182		rpl5 = (void *)rpl;
2183		INIT_TP_WR(rpl5, ep->hwtid);
2184	} else {
2185		skb_trim(skb, sizeof(*rpl));
2186		INIT_TP_WR(rpl, ep->hwtid);
2187	}
2188	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2189						    ep->hwtid));
2190
2191	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2192		 enable_tcp_timestamps && req->tcpopt.tstamp,
2193		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
2194	wscale = compute_wscale(rcv_win);
2195
2196	/*
2197	 * Specify the largest window that will fit in opt0. The
2198	 * remainder will be specified in the rx_data_ack.
2199	 */
2200	win = ep->rcv_win >> 10;
2201	if (win > RCV_BUFSIZ_M)
2202		win = RCV_BUFSIZ_M;
2203	opt0 = (nocong ? NO_CONG_F : 0) |
2204	       KEEP_ALIVE_F |
2205	       DELACK_F |
2206	       WND_SCALE_V(wscale) |
2207	       MSS_IDX_V(mtu_idx) |
2208	       L2T_IDX_V(ep->l2t->idx) |
2209	       TX_CHAN_V(ep->tx_chan) |
2210	       SMAC_SEL_V(ep->smac_idx) |
2211	       DSCP_V(ep->tos >> 2) |
2212	       ULP_MODE_V(ULP_MODE_TCPDDP) |
2213	       RCV_BUFSIZ_V(win);
2214	opt2 = RX_CHANNEL_V(0) |
2215	       RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2216
2217	if (enable_tcp_timestamps && req->tcpopt.tstamp)
2218		opt2 |= TSTAMPS_EN_F;
2219	if (enable_tcp_sack && req->tcpopt.sack)
2220		opt2 |= SACK_EN_F;
2221	if (wscale && enable_tcp_window_scaling)
2222		opt2 |= WND_SCALE_EN_F;
2223	if (enable_ecn) {
2224		const struct tcphdr *tcph;
2225		u32 hlen = ntohl(req->hdr_len);
2226
2227		tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2228			IP_HDR_LEN_G(hlen);
2229		if (tcph->ece && tcph->cwr)
2230			opt2 |= CCTRL_ECN_V(1);
2231	}
2232	if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
2233		u32 isn = (prandom_u32() & ~7UL) - 1;
2234		opt2 |= T5_OPT_2_VALID_F;
2235		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2236		opt2 |= T5_ISS_F;
2237		rpl5 = (void *)rpl;
2238		memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2239		if (peer2peer)
2240			isn += 4;
2241		rpl5->iss = cpu_to_be32(isn);
2242		PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
2243	}
2244
2245	rpl->opt0 = cpu_to_be64(opt0);
2246	rpl->opt2 = cpu_to_be32(opt2);
2247	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2248	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
2249	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2250
2251	return;
2252}
2253
2254static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2255{
2256	PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
2257	BUG_ON(skb_cloned(skb));
2258	skb_trim(skb, sizeof(struct cpl_tid_release));
2259	release_tid(&dev->rdev, hwtid, skb);
2260	return;
2261}
2262
2263static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
2264		       __u8 *local_ip, __u8 *peer_ip,
2265		       __be16 *local_port, __be16 *peer_port)
2266{
2267	int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
2268	int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
2269	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
2270	struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
2271	struct tcphdr *tcp = (struct tcphdr *)
2272			     ((u8 *)(req + 1) + eth_len + ip_len);
2273
2274	if (ip->version == 4) {
2275		PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
2276		     ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
2277		     ntohs(tcp->dest));
2278		*iptype = 4;
2279		memcpy(peer_ip, &ip->saddr, 4);
2280		memcpy(local_ip, &ip->daddr, 4);
2281	} else {
2282		PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
2283		     ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
2284		     ntohs(tcp->dest));
2285		*iptype = 6;
2286		memcpy(peer_ip, ip6->saddr.s6_addr, 16);
2287		memcpy(local_ip, ip6->daddr.s6_addr, 16);
2288	}
2289	*peer_port = tcp->source;
2290	*local_port = tcp->dest;
2291
2292	return;
2293}
2294
2295static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2296{
2297	struct c4iw_ep *child_ep = NULL, *parent_ep;
2298	struct cpl_pass_accept_req *req = cplhdr(skb);
2299	unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2300	struct tid_info *t = dev->rdev.lldi.tids;
2301	unsigned int hwtid = GET_TID(req);
2302	struct dst_entry *dst;
2303	__u8 local_ip[16], peer_ip[16];
2304	__be16 local_port, peer_port;
2305	int err;
2306	u16 peer_mss = ntohs(req->tcpopt.mss);
2307	int iptype;
2308	unsigned short hdrs;
2309
2310	parent_ep = lookup_stid(t, stid);
2311	if (!parent_ep) {
2312		PDBG("%s connect request on invalid stid %d\n", __func__, stid);
2313		goto reject;
2314	}
2315
2316	if (state_read(&parent_ep->com) != LISTEN) {
2317		printk(KERN_ERR "%s - listening ep not in LISTEN\n",
2318		       __func__);
2319		goto reject;
2320	}
2321
2322	get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
2323
2324	/* Find output route */
2325	if (iptype == 4)  {
2326		PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2327		     , __func__, parent_ep, hwtid,
2328		     local_ip, peer_ip, ntohs(local_port),
2329		     ntohs(peer_port), peer_mss);
2330		dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
2331				 local_port, peer_port,
2332				 PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
2333	} else {
2334		PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2335		     , __func__, parent_ep, hwtid,
2336		     local_ip, peer_ip, ntohs(local_port),
2337		     ntohs(peer_port), peer_mss);
2338		dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
2339				  PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
2340				  ((struct sockaddr_in6 *)
2341				  &parent_ep->com.local_addr)->sin6_scope_id);
2342	}
2343	if (!dst) {
2344		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
2345		       __func__);
2346		goto reject;
2347	}
2348
2349	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2350	if (!child_ep) {
2351		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
2352		       __func__);
2353		dst_release(dst);
2354		goto reject;
2355	}
2356
2357	err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
2358	if (err) {
2359		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
2360		       __func__);
2361		dst_release(dst);
2362		kfree(child_ep);
2363		goto reject;
2364	}
2365
2366	hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
2367	       ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2368	if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2369		child_ep->mtu = peer_mss + hdrs;
2370
2371	state_set(&child_ep->com, CONNECTING);
2372	child_ep->com.dev = dev;
2373	child_ep->com.cm_id = NULL;
2374
2375	/*
2376	 * The mapped_local and mapped_remote addresses get setup with
2377	 * the actual 4-tuple.  The local address will be based on the
2378	 * actual local address of the connection, but on the port number
2379	 * of the parent listening endpoint.  The remote address is
2380	 * setup based on a query to the IWPM since we don't know what it
2381	 * originally was before mapping.  If no mapping was done, then
2382	 * mapped_remote == remote, and mapped_local == local.
2383	 */
2384	if (iptype == 4) {
2385		struct sockaddr_in *sin = (struct sockaddr_in *)
2386			&child_ep->com.mapped_local_addr;
2387
2388		sin->sin_family = PF_INET;
2389		sin->sin_port = local_port;
2390		sin->sin_addr.s_addr = *(__be32 *)local_ip;
2391
2392		sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2393		sin->sin_family = PF_INET;
2394		sin->sin_port = ((struct sockaddr_in *)
2395				 &parent_ep->com.local_addr)->sin_port;
2396		sin->sin_addr.s_addr = *(__be32 *)local_ip;
2397
2398		sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr;
2399		sin->sin_family = PF_INET;
2400		sin->sin_port = peer_port;
2401		sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2402	} else {
2403		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
2404			&child_ep->com.mapped_local_addr;
2405
2406		sin6->sin6_family = PF_INET6;
2407		sin6->sin6_port = local_port;
2408		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2409
2410		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2411		sin6->sin6_family = PF_INET6;
2412		sin6->sin6_port = ((struct sockaddr_in6 *)
2413				   &parent_ep->com.local_addr)->sin6_port;
2414		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2415
2416		sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr;
2417		sin6->sin6_family = PF_INET6;
2418		sin6->sin6_port = peer_port;
2419		memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2420	}
2421	memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr,
2422	       sizeof(child_ep->com.remote_addr));
2423	get_remote_addr(parent_ep, child_ep);
2424
2425	c4iw_get_ep(&parent_ep->com);
2426	child_ep->parent_ep = parent_ep;
2427	child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2428	child_ep->dst = dst;
2429	child_ep->hwtid = hwtid;
2430
2431	PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
2432	     child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2433
2434	init_timer(&child_ep->timer);
2435	cxgb4_insert_tid(t, child_ep, hwtid);
2436	insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
2437	accept_cr(child_ep, skb, req);
2438	set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2439	goto out;
2440reject:
2441	reject_cr(dev, hwtid, skb);
2442out:
2443	return 0;
2444}
2445
2446static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2447{
2448	struct c4iw_ep *ep;
2449	struct cpl_pass_establish *req = cplhdr(skb);
2450	struct tid_info *t = dev->rdev.lldi.tids;
2451	unsigned int tid = GET_TID(req);
2452
2453	ep = lookup_tid(t, tid);
2454	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2455	ep->snd_seq = be32_to_cpu(req->snd_isn);
2456	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2457
2458	PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
2459	     ntohs(req->tcp_opt));
2460
2461	set_emss(ep, ntohs(req->tcp_opt));
2462
2463	dst_confirm(ep->dst);
2464	state_set(&ep->com, MPA_REQ_WAIT);
2465	start_ep_timer(ep);
2466	send_flowc(ep, skb);
2467	set_bit(PASS_ESTAB, &ep->com.history);
2468
2469	return 0;
2470}
2471
2472static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2473{
2474	struct cpl_peer_close *hdr = cplhdr(skb);
2475	struct c4iw_ep *ep;
2476	struct c4iw_qp_attributes attrs;
2477	int disconnect = 1;
2478	int release = 0;
2479	struct tid_info *t = dev->rdev.lldi.tids;
2480	unsigned int tid = GET_TID(hdr);
2481	int ret;
2482
2483	ep = lookup_tid(t, tid);
2484	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2485	dst_confirm(ep->dst);
2486
2487	set_bit(PEER_CLOSE, &ep->com.history);
2488	mutex_lock(&ep->com.mutex);
2489	switch (ep->com.state) {
2490	case MPA_REQ_WAIT:
2491		__state_set(&ep->com, CLOSING);
2492		break;
2493	case MPA_REQ_SENT:
2494		__state_set(&ep->com, CLOSING);
2495		connect_reply_upcall(ep, -ECONNRESET);
2496		break;
2497	case MPA_REQ_RCVD:
2498
2499		/*
2500		 * We're gonna mark this puppy DEAD, but keep
2501		 * the reference on it until the ULP accepts or
2502		 * rejects the CR. Also wake up anyone waiting
2503		 * in rdma connection migration (see c4iw_accept_cr()).
2504		 */
2505		__state_set(&ep->com, CLOSING);
2506		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
2507		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2508		break;
2509	case MPA_REP_SENT:
2510		__state_set(&ep->com, CLOSING);
2511		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
2512		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2513		break;
2514	case FPDU_MODE:
2515		start_ep_timer(ep);
2516		__state_set(&ep->com, CLOSING);
2517		attrs.next_state = C4IW_QP_STATE_CLOSING;
2518		ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2519				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2520		if (ret != -ECONNRESET) {
2521			peer_close_upcall(ep);
2522			disconnect = 1;
2523		}
2524		break;
2525	case ABORTING:
2526		disconnect = 0;
2527		break;
2528	case CLOSING:
2529		__state_set(&ep->com, MORIBUND);
2530		disconnect = 0;
2531		break;
2532	case MORIBUND:
2533		(void)stop_ep_timer(ep);
2534		if (ep->com.cm_id && ep->com.qp) {
2535			attrs.next_state = C4IW_QP_STATE_IDLE;
2536			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2537				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2538		}
2539		close_complete_upcall(ep, 0);
2540		__state_set(&ep->com, DEAD);
2541		release = 1;
2542		disconnect = 0;
2543		break;
2544	case DEAD:
2545		disconnect = 0;
2546		break;
2547	default:
2548		BUG_ON(1);
2549	}
2550	mutex_unlock(&ep->com.mutex);
2551	if (disconnect)
2552		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2553	if (release)
2554		release_ep_resources(ep);
2555	return 0;
2556}
2557
2558static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2559{
2560	struct cpl_abort_req_rss *req = cplhdr(skb);
2561	struct c4iw_ep *ep;
2562	struct cpl_abort_rpl *rpl;
2563	struct sk_buff *rpl_skb;
2564	struct c4iw_qp_attributes attrs;
2565	int ret;
2566	int release = 0;
2567	struct tid_info *t = dev->rdev.lldi.tids;
2568	unsigned int tid = GET_TID(req);
2569
2570	ep = lookup_tid(t, tid);
2571	if (is_neg_adv(req->status)) {
2572		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
2573		     __func__, ep->hwtid, req->status,
2574		     neg_adv_str(req->status));
2575		ep->stats.abort_neg_adv++;
2576		mutex_lock(&dev->rdev.stats.lock);
2577		dev->rdev.stats.neg_adv++;
2578		mutex_unlock(&dev->rdev.stats.lock);
2579		return 0;
2580	}
2581	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2582	     ep->com.state);
2583	set_bit(PEER_ABORT, &ep->com.history);
2584
2585	/*
2586	 * Wake up any threads in rdma_init() or rdma_fini().
2587	 * However, this is not needed if com state is just
2588	 * MPA_REQ_SENT
2589	 */
2590	if (ep->com.state != MPA_REQ_SENT)
2591		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2592
2593	mutex_lock(&ep->com.mutex);
2594	switch (ep->com.state) {
2595	case CONNECTING:
2596		break;
2597	case MPA_REQ_WAIT:
2598		(void)stop_ep_timer(ep);
2599		break;
2600	case MPA_REQ_SENT:
2601		(void)stop_ep_timer(ep);
2602		if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
2603			connect_reply_upcall(ep, -ECONNRESET);
2604		else {
2605			/*
2606			 * we just don't send notification upwards because we
2607			 * want to retry with mpa_v1 without upper layers even
2608			 * knowing it.
2609			 *
2610			 * do some housekeeping so as to re-initiate the
2611			 * connection
2612			 */
2613			PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
2614			     mpa_rev);
2615			ep->retry_with_mpa_v1 = 1;
2616		}
2617		break;
2618	case MPA_REP_SENT:
2619		break;
2620	case MPA_REQ_RCVD:
2621		break;
2622	case MORIBUND:
2623	case CLOSING:
2624		stop_ep_timer(ep);
2625		/*FALLTHROUGH*/
2626	case FPDU_MODE:
2627		if (ep->com.cm_id && ep->com.qp) {
2628			attrs.next_state = C4IW_QP_STATE_ERROR;
2629			ret = c4iw_modify_qp(ep->com.qp->rhp,
2630				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2631				     &attrs, 1);
2632			if (ret)
2633				printk(KERN_ERR MOD
2634				       "%s - qp <- error failed!\n",
2635				       __func__);
2636		}
2637		peer_abort_upcall(ep);
2638		break;
2639	case ABORTING:
2640		break;
2641	case DEAD:
2642		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2643		mutex_unlock(&ep->com.mutex);
2644		return 0;
2645	default:
2646		BUG_ON(1);
2647		break;
2648	}
2649	dst_confirm(ep->dst);
2650	if (ep->com.state != ABORTING) {
2651		__state_set(&ep->com, DEAD);
2652		/* we don't release if we want to retry with mpa_v1 */
2653		if (!ep->retry_with_mpa_v1)
2654			release = 1;
2655	}
2656	mutex_unlock(&ep->com.mutex);
2657
2658	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
2659	if (!rpl_skb) {
2660		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
2661		       __func__);
2662		release = 1;
2663		goto out;
2664	}
2665	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
2666	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
2667	INIT_TP_WR(rpl, ep->hwtid);
2668	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
2669	rpl->cmd = CPL_ABORT_NO_RST;
2670	c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2671out:
2672	if (release)
2673		release_ep_resources(ep);
2674	else if (ep->retry_with_mpa_v1) {
2675		remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
2676		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2677		dst_release(ep->dst);
2678		cxgb4_l2t_release(ep->l2t);
2679		c4iw_reconnect(ep);
2680	}
2681
2682	return 0;
2683}
2684
2685static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2686{
2687	struct c4iw_ep *ep;
2688	struct c4iw_qp_attributes attrs;
2689	struct cpl_close_con_rpl *rpl = cplhdr(skb);
2690	int release = 0;
2691	struct tid_info *t = dev->rdev.lldi.tids;
2692	unsigned int tid = GET_TID(rpl);
2693
2694	ep = lookup_tid(t, tid);
2695
2696	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2697	BUG_ON(!ep);
2698
2699	/* The cm_id may be null if we failed to connect */
2700	mutex_lock(&ep->com.mutex);
2701	switch (ep->com.state) {
2702	case CLOSING:
2703		__state_set(&ep->com, MORIBUND);
2704		break;
2705	case MORIBUND:
2706		(void)stop_ep_timer(ep);
2707		if ((ep->com.cm_id) && (ep->com.qp)) {
2708			attrs.next_state = C4IW_QP_STATE_IDLE;
2709			c4iw_modify_qp(ep->com.qp->rhp,
2710					     ep->com.qp,
2711					     C4IW_QP_ATTR_NEXT_STATE,
2712					     &attrs, 1);
2713		}
2714		close_complete_upcall(ep, 0);
2715		__state_set(&ep->com, DEAD);
2716		release = 1;
2717		break;
2718	case ABORTING:
2719	case DEAD:
2720		break;
2721	default:
2722		BUG_ON(1);
2723		break;
2724	}
2725	mutex_unlock(&ep->com.mutex);
2726	if (release)
2727		release_ep_resources(ep);
2728	return 0;
2729}
2730
2731static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2732{
2733	struct cpl_rdma_terminate *rpl = cplhdr(skb);
2734	struct tid_info *t = dev->rdev.lldi.tids;
2735	unsigned int tid = GET_TID(rpl);
2736	struct c4iw_ep *ep;
2737	struct c4iw_qp_attributes attrs;
2738
2739	ep = lookup_tid(t, tid);
2740	BUG_ON(!ep);
2741
2742	if (ep && ep->com.qp) {
2743		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2744		       ep->com.qp->wq.sq.qid);
2745		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2746		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2747			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2748	} else
2749		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2750
2751	return 0;
2752}
2753
2754/*
2755 * Upcall from the adapter indicating data has been transmitted.
2756 * For us its just the single MPA request or reply.  We can now free
2757 * the skb holding the mpa message.
2758 */
2759static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2760{
2761	struct c4iw_ep *ep;
2762	struct cpl_fw4_ack *hdr = cplhdr(skb);
2763	u8 credits = hdr->credits;
2764	unsigned int tid = GET_TID(hdr);
2765	struct tid_info *t = dev->rdev.lldi.tids;
2766
2767
2768	ep = lookup_tid(t, tid);
2769	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
2770	if (credits == 0) {
2771		PDBG("%s 0 credit ack ep %p tid %u state %u\n",
2772		     __func__, ep, ep->hwtid, state_read(&ep->com));
2773		return 0;
2774	}
2775
2776	dst_confirm(ep->dst);
2777	if (ep->mpa_skb) {
2778		PDBG("%s last streaming msg ack ep %p tid %u state %u "
2779		     "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
2780		     state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
2781		kfree_skb(ep->mpa_skb);
2782		ep->mpa_skb = NULL;
2783	}
2784	return 0;
2785}
2786
2787int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2788{
2789	int err = 0;
2790	int disconnect = 0;
2791	struct c4iw_ep *ep = to_ep(cm_id);
2792	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2793
2794	mutex_lock(&ep->com.mutex);
2795	if (ep->com.state == DEAD) {
2796		mutex_unlock(&ep->com.mutex);
2797		c4iw_put_ep(&ep->com);
2798		return -ECONNRESET;
2799	}
2800	set_bit(ULP_REJECT, &ep->com.history);
2801	BUG_ON(ep->com.state != MPA_REQ_RCVD);
2802	if (mpa_rev == 0)
2803		abort_connection(ep, NULL, GFP_KERNEL);
2804	else {
2805		err = send_mpa_reject(ep, pdata, pdata_len);
2806		disconnect = 1;
2807	}
2808	mutex_unlock(&ep->com.mutex);
2809	if (disconnect)
2810		err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2811	c4iw_put_ep(&ep->com);
2812	return 0;
2813}
2814
2815int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2816{
2817	int err;
2818	struct c4iw_qp_attributes attrs;
2819	enum c4iw_qp_attr_mask mask;
2820	struct c4iw_ep *ep = to_ep(cm_id);
2821	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2822	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2823
2824	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2825
2826	mutex_lock(&ep->com.mutex);
2827	if (ep->com.state == DEAD) {
2828		err = -ECONNRESET;
2829		goto err;
2830	}
2831
2832	BUG_ON(ep->com.state != MPA_REQ_RCVD);
2833	BUG_ON(!qp);
2834
2835	set_bit(ULP_ACCEPT, &ep->com.history);
2836	if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
2837	    (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
2838		abort_connection(ep, NULL, GFP_KERNEL);
2839		err = -EINVAL;
2840		goto err;
2841	}
2842
2843	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2844		if (conn_param->ord > ep->ird) {
2845			if (RELAXED_IRD_NEGOTIATION) {
2846				ep->ord = ep->ird;
2847			} else {
2848				ep->ird = conn_param->ird;
2849				ep->ord = conn_param->ord;
2850				send_mpa_reject(ep, conn_param->private_data,
2851						conn_param->private_data_len);
2852				abort_connection(ep, NULL, GFP_KERNEL);
2853				err = -ENOMEM;
2854				goto err;
2855			}
2856		}
2857		if (conn_param->ird < ep->ord) {
2858			if (RELAXED_IRD_NEGOTIATION &&
2859			    ep->ord <= h->rdev.lldi.max_ordird_qp) {
2860				conn_param->ird = ep->ord;
2861			} else {
2862				abort_connection(ep, NULL, GFP_KERNEL);
2863				err = -ENOMEM;
2864				goto err;
2865			}
2866		}
2867	}
2868	ep->ird = conn_param->ird;
2869	ep->ord = conn_param->ord;
2870
2871	if (ep->mpa_attr.version == 1) {
2872		if (peer2peer && ep->ird == 0)
2873			ep->ird = 1;
2874	} else {
2875		if (peer2peer &&
2876		    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
2877		    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0)
2878			ep->ird = 1;
2879	}
2880
2881	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2882
2883	cm_id->add_ref(cm_id);
2884	ep->com.cm_id = cm_id;
2885	ep->com.qp = qp;
2886	ref_qp(ep);
2887
2888	/* bind QP to EP and move to RTS */
2889	attrs.mpa_attr = ep->mpa_attr;
2890	attrs.max_ird = ep->ird;
2891	attrs.max_ord = ep->ord;
2892	attrs.llp_stream_handle = ep;
2893	attrs.next_state = C4IW_QP_STATE_RTS;
2894
2895	/* bind QP and TID with INIT_WR */
2896	mask = C4IW_QP_ATTR_NEXT_STATE |
2897			     C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2898			     C4IW_QP_ATTR_MPA_ATTR |
2899			     C4IW_QP_ATTR_MAX_IRD |
2900			     C4IW_QP_ATTR_MAX_ORD;
2901
2902	err = c4iw_modify_qp(ep->com.qp->rhp,
2903			     ep->com.qp, mask, &attrs, 1);
2904	if (err)
2905		goto err1;
2906	err = send_mpa_reply(ep, conn_param->private_data,
2907			     conn_param->private_data_len);
2908	if (err)
2909		goto err1;
2910
2911	__state_set(&ep->com, FPDU_MODE);
2912	established_upcall(ep);
2913	mutex_unlock(&ep->com.mutex);
2914	c4iw_put_ep(&ep->com);
2915	return 0;
2916err1:
2917	ep->com.cm_id = NULL;
2918	abort_connection(ep, NULL, GFP_KERNEL);
2919	cm_id->rem_ref(cm_id);
2920err:
2921	mutex_unlock(&ep->com.mutex);
2922	c4iw_put_ep(&ep->com);
2923	return err;
2924}
2925
2926static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
2927{
2928	struct in_device *ind;
2929	int found = 0;
2930	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
2931	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
2932
2933	ind = in_dev_get(dev->rdev.lldi.ports[0]);
2934	if (!ind)
2935		return -EADDRNOTAVAIL;
2936	for_primary_ifa(ind) {
2937		laddr->sin_addr.s_addr = ifa->ifa_address;
2938		raddr->sin_addr.s_addr = ifa->ifa_address;
2939		found = 1;
2940		break;
2941	}
2942	endfor_ifa(ind);
2943	in_dev_put(ind);
2944	return found ? 0 : -EADDRNOTAVAIL;
2945}
2946
2947static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
2948		      unsigned char banned_flags)
2949{
2950	struct inet6_dev *idev;
2951	int err = -EADDRNOTAVAIL;
2952
2953	rcu_read_lock();
2954	idev = __in6_dev_get(dev);
2955	if (idev != NULL) {
2956		struct inet6_ifaddr *ifp;
2957
2958		read_lock_bh(&idev->lock);
2959		list_for_each_entry(ifp, &idev->addr_list, if_list) {
2960			if (ifp->scope == IFA_LINK &&
2961			    !(ifp->flags & banned_flags)) {
2962				memcpy(addr, &ifp->addr, 16);
2963				err = 0;
2964				break;
2965			}
2966		}
2967		read_unlock_bh(&idev->lock);
2968	}
2969	rcu_read_unlock();
2970	return err;
2971}
2972
2973static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
2974{
2975	struct in6_addr uninitialized_var(addr);
2976	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr;
2977	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
2978
2979	if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
2980		memcpy(la6->sin6_addr.s6_addr, &addr, 16);
2981		memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
2982		return 0;
2983	}
2984	return -EADDRNOTAVAIL;
2985}
2986
2987int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2988{
2989	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2990	struct c4iw_ep *ep;
2991	int err = 0;
2992	struct sockaddr_in *laddr;
2993	struct sockaddr_in *raddr;
2994	struct sockaddr_in6 *laddr6;
2995	struct sockaddr_in6 *raddr6;
2996	struct iwpm_dev_data pm_reg_msg;
2997	struct iwpm_sa_data pm_msg;
2998	__u8 *ra;
2999	int iptype;
3000	int iwpm_err = 0;
3001
3002	if ((conn_param->ord > cur_max_read_depth(dev)) ||
3003	    (conn_param->ird > cur_max_read_depth(dev))) {
3004		err = -EINVAL;
3005		goto out;
3006	}
3007	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3008	if (!ep) {
3009		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
3010		err = -ENOMEM;
3011		goto out;
3012	}
3013	init_timer(&ep->timer);
3014	ep->plen = conn_param->private_data_len;
3015	if (ep->plen)
3016		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3017		       conn_param->private_data, ep->plen);
3018	ep->ird = conn_param->ird;
3019	ep->ord = conn_param->ord;
3020
3021	if (peer2peer && ep->ord == 0)
3022		ep->ord = 1;
3023
3024	cm_id->add_ref(cm_id);
3025	ep->com.dev = dev;
3026	ep->com.cm_id = cm_id;
3027	ep->com.qp = get_qhp(dev, conn_param->qpn);
3028	if (!ep->com.qp) {
3029		PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3030		err = -EINVAL;
3031		goto fail1;
3032	}
3033	ref_qp(ep);
3034	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
3035	     ep->com.qp, cm_id);
3036
3037	/*
3038	 * Allocate an active TID to initiate a TCP connection.
3039	 */
3040	ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3041	if (ep->atid == -1) {
3042		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
3043		err = -ENOMEM;
3044		goto fail1;
3045	}
3046	insert_handle(dev, &dev->atid_idr, ep, ep->atid);
3047
3048	memcpy(&ep->com.local_addr, &cm_id->local_addr,
3049	       sizeof(ep->com.local_addr));
3050	memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
3051	       sizeof(ep->com.remote_addr));
3052
3053	/* No port mapper available, go with the specified peer information */
3054	memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
3055	       sizeof(ep->com.mapped_local_addr));
3056	memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
3057	       sizeof(ep->com.mapped_remote_addr));
3058
3059	c4iw_form_reg_msg(dev, &pm_reg_msg);
3060	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
3061	if (iwpm_err) {
3062		PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
3063			__func__, iwpm_err);
3064	}
3065	if (iwpm_valid_pid() && !iwpm_err) {
3066		c4iw_form_pm_msg(ep, &pm_msg);
3067		iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
3068		if (iwpm_err)
3069			PDBG("%s: Port Mapper query fail (err = %d).\n",
3070				__func__, iwpm_err);
3071		else
3072			c4iw_record_pm_msg(ep, &pm_msg);
3073	}
3074	if (iwpm_create_mapinfo(&ep->com.local_addr,
3075				&ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
3076		iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
3077		err = -ENOMEM;
3078		goto fail1;
3079	}
3080	print_addr(&ep->com, __func__, "add_query/create_mapinfo");
3081	set_bit(RELEASE_MAPINFO, &ep->com.flags);
3082
3083	laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
3084	raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
3085	laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
3086	raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
3087
3088	if (cm_id->remote_addr.ss_family == AF_INET) {
3089		iptype = 4;
3090		ra = (__u8 *)&raddr->sin_addr;
3091
3092		/*
3093		 * Handle loopback requests to INADDR_ANY.
3094		 */
3095		if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
3096			err = pick_local_ipaddrs(dev, cm_id);
3097			if (err)
3098				goto fail1;
3099		}
3100
3101		/* find a route */
3102		PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3103		     __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
3104		     ra, ntohs(raddr->sin_port));
3105		ep->dst = find_route(dev, laddr->sin_addr.s_addr,
3106				     raddr->sin_addr.s_addr, laddr->sin_port,
3107				     raddr->sin_port, 0);
3108	} else {
3109		iptype = 6;
3110		ra = (__u8 *)&raddr6->sin6_addr;
3111
3112		/*
3113		 * Handle loopback requests to INADDR_ANY.
3114		 */
3115		if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3116			err = pick_local_ip6addrs(dev, cm_id);
3117			if (err)
3118				goto fail1;
3119		}
3120
3121		/* find a route */
3122		PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3123		     __func__, laddr6->sin6_addr.s6_addr,
3124		     ntohs(laddr6->sin6_port),
3125		     raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3126		ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
3127				      raddr6->sin6_addr.s6_addr,
3128				      laddr6->sin6_port, raddr6->sin6_port, 0,
3129				      raddr6->sin6_scope_id);
3130	}
3131	if (!ep->dst) {
3132		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
3133		err = -EHOSTUNREACH;
3134		goto fail2;
3135	}
3136
3137	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
3138	if (err) {
3139		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
3140		goto fail3;
3141	}
3142
3143	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3144		__func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3145		ep->l2t->idx);
3146
3147	state_set(&ep->com, CONNECTING);
3148	ep->tos = 0;
3149
3150	/* send connect request to rnic */
3151	err = send_connect(ep);
3152	if (!err)
3153		goto out;
3154
3155	cxgb4_l2t_release(ep->l2t);
3156fail3:
3157	dst_release(ep->dst);
3158fail2:
3159	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
3160	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3161fail1:
3162	cm_id->rem_ref(cm_id);
3163	c4iw_put_ep(&ep->com);
3164out:
3165	return err;
3166}
3167
3168static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3169{
3170	int err;
3171	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3172				    &ep->com.mapped_local_addr;
3173
3174	c4iw_init_wr_wait(&ep->com.wr_wait);
3175	err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3176				   ep->stid, &sin6->sin6_addr,
3177				   sin6->sin6_port,
3178				   ep->com.dev->rdev.lldi.rxq_ids[0]);
3179	if (!err)
3180		err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3181					  &ep->com.wr_wait,
3182					  0, 0, __func__);
3183	else if (err > 0)
3184		err = net_xmit_errno(err);
3185	if (err)
3186		pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3187		       err, ep->stid,
3188		       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3189	return err;
3190}
3191
3192static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3193{
3194	int err;
3195	struct sockaddr_in *sin = (struct sockaddr_in *)
3196				  &ep->com.mapped_local_addr;
3197
3198	if (dev->rdev.lldi.enable_fw_ofld_conn) {
3199		do {
3200			err = cxgb4_create_server_filter(
3201				ep->com.dev->rdev.lldi.ports[0], ep->stid,
3202				sin->sin_addr.s_addr, sin->sin_port, 0,
3203				ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3204			if (err == -EBUSY) {
3205				set_current_state(TASK_UNINTERRUPTIBLE);
3206				schedule_timeout(usecs_to_jiffies(100));
3207			}
3208		} while (err == -EBUSY);
3209	} else {
3210		c4iw_init_wr_wait(&ep->com.wr_wait);
3211		err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3212				ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3213				0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3214		if (!err)
3215			err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3216						  &ep->com.wr_wait,
3217						  0, 0, __func__);
3218		else if (err > 0)
3219			err = net_xmit_errno(err);
3220	}
3221	if (err)
3222		pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3223		       , err, ep->stid,
3224		       &sin->sin_addr, ntohs(sin->sin_port));
3225	return err;
3226}
3227
3228int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3229{
3230	int err = 0;
3231	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3232	struct c4iw_listen_ep *ep;
3233	struct iwpm_dev_data pm_reg_msg;
3234	struct iwpm_sa_data pm_msg;
3235	int iwpm_err = 0;
3236
3237	might_sleep();
3238
3239	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3240	if (!ep) {
3241		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
3242		err = -ENOMEM;
3243		goto fail1;
3244	}
3245	PDBG("%s ep %p\n", __func__, ep);
3246	cm_id->add_ref(cm_id);
3247	ep->com.cm_id = cm_id;
3248	ep->com.dev = dev;
3249	ep->backlog = backlog;
3250	memcpy(&ep->com.local_addr, &cm_id->local_addr,
3251	       sizeof(ep->com.local_addr));
3252
3253	/*
3254	 * Allocate a server TID.
3255	 */
3256	if (dev->rdev.lldi.enable_fw_ofld_conn &&
3257	    ep->com.local_addr.ss_family == AF_INET)
3258		ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3259					     cm_id->local_addr.ss_family, ep);
3260	else
3261		ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3262					    cm_id->local_addr.ss_family, ep);
3263
3264	if (ep->stid == -1) {
3265		printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
3266		err = -ENOMEM;
3267		goto fail2;
3268	}
3269	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
3270
3271	/* No port mapper available, go with the specified info */
3272	memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
3273	       sizeof(ep->com.mapped_local_addr));
3274
3275	c4iw_form_reg_msg(dev, &pm_reg_msg);
3276	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
3277	if (iwpm_err) {
3278		PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
3279			__func__, iwpm_err);
3280	}
3281	if (iwpm_valid_pid() && !iwpm_err) {
3282		memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
3283				sizeof(ep->com.local_addr));
3284		iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
3285		if (iwpm_err)
3286			PDBG("%s: Port Mapper query fail (err = %d).\n",
3287				__func__, iwpm_err);
3288		else
3289			memcpy(&ep->com.mapped_local_addr,
3290				&pm_msg.mapped_loc_addr,
3291				sizeof(ep->com.mapped_local_addr));
3292	}
3293	if (iwpm_create_mapinfo(&ep->com.local_addr,
3294				&ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
3295		err = -ENOMEM;
3296		goto fail3;
3297	}
3298	print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
3299
3300	set_bit(RELEASE_MAPINFO, &ep->com.flags);
3301	state_set(&ep->com, LISTEN);
3302	if (ep->com.local_addr.ss_family == AF_INET)
3303		err = create_server4(dev, ep);
3304	else
3305		err = create_server6(dev, ep);
3306	if (!err) {
3307		cm_id->provider_data = ep;
3308		goto out;
3309	}
3310
3311fail3:
3312	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3313			ep->com.local_addr.ss_family);
3314fail2:
3315	cm_id->rem_ref(cm_id);
3316	c4iw_put_ep(&ep->com);
3317fail1:
3318out:
3319	return err;
3320}
3321
3322int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3323{
3324	int err;
3325	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3326
3327	PDBG("%s ep %p\n", __func__, ep);
3328
3329	might_sleep();
3330	state_set(&ep->com, DEAD);
3331	if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3332	    ep->com.local_addr.ss_family == AF_INET) {
3333		err = cxgb4_remove_server_filter(
3334			ep->com.dev->rdev.lldi.ports[0], ep->stid,
3335			ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3336	} else {
3337		c4iw_init_wr_wait(&ep->com.wr_wait);
3338		err = cxgb4_remove_server(
3339				ep->com.dev->rdev.lldi.ports[0], ep->stid,
3340				ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3341		if (err)
3342			goto done;
3343		err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
3344					  0, 0, __func__);
3345	}
3346	remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
3347	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3348			ep->com.local_addr.ss_family);
3349done:
3350	cm_id->rem_ref(cm_id);
3351	c4iw_put_ep(&ep->com);
3352	return err;
3353}
3354
3355int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3356{
3357	int ret = 0;
3358	int close = 0;
3359	int fatal = 0;
3360	struct c4iw_rdev *rdev;
3361
3362	mutex_lock(&ep->com.mutex);
3363
3364	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
3365	     states[ep->com.state], abrupt);
3366
3367	rdev = &ep->com.dev->rdev;
3368	if (c4iw_fatal_error(rdev)) {
3369		fatal = 1;
3370		close_complete_upcall(ep, -EIO);
3371		ep->com.state = DEAD;
3372	}
3373	switch (ep->com.state) {
3374	case MPA_REQ_WAIT:
3375	case MPA_REQ_SENT:
3376	case MPA_REQ_RCVD:
3377	case MPA_REP_SENT:
3378	case FPDU_MODE:
3379		close = 1;
3380		if (abrupt)
3381			ep->com.state = ABORTING;
3382		else {
3383			ep->com.state = CLOSING;
3384			start_ep_timer(ep);
3385		}
3386		set_bit(CLOSE_SENT, &ep->com.flags);
3387		break;
3388	case CLOSING:
3389		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3390			close = 1;
3391			if (abrupt) {
3392				(void)stop_ep_timer(ep);
3393				ep->com.state = ABORTING;
3394			} else
3395				ep->com.state = MORIBUND;
3396		}
3397		break;
3398	case MORIBUND:
3399	case ABORTING:
3400	case DEAD:
3401		PDBG("%s ignoring disconnect ep %p state %u\n",
3402		     __func__, ep, ep->com.state);
3403		break;
3404	default:
3405		BUG();
3406		break;
3407	}
3408
3409	if (close) {
3410		if (abrupt) {
3411			set_bit(EP_DISC_ABORT, &ep->com.history);
3412			close_complete_upcall(ep, -ECONNRESET);
3413			ret = send_abort(ep, NULL, gfp);
3414		} else {
3415			set_bit(EP_DISC_CLOSE, &ep->com.history);
3416			ret = send_halfclose(ep, gfp);
3417		}
3418		if (ret)
3419			fatal = 1;
3420	}
3421	mutex_unlock(&ep->com.mutex);
3422	if (fatal)
3423		release_ep_resources(ep);
3424	return ret;
3425}
3426
3427static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3428			struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3429{
3430	struct c4iw_ep *ep;
3431	int atid = be32_to_cpu(req->tid);
3432
3433	ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3434					   (__force u32) req->tid);
3435	if (!ep)
3436		return;
3437
3438	switch (req->retval) {
3439	case FW_ENOMEM:
3440		set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3441		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3442			send_fw_act_open_req(ep, atid);
3443			return;
3444		}
3445	case FW_EADDRINUSE:
3446		set_bit(ACT_RETRY_INUSE, &ep->com.history);
3447		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3448			send_fw_act_open_req(ep, atid);
3449			return;
3450		}
3451		break;
3452	default:
3453		pr_info("%s unexpected ofld conn wr retval %d\n",
3454		       __func__, req->retval);
3455		break;
3456	}
3457	pr_err("active ofld_connect_wr failure %d atid %d\n",
3458	       req->retval, atid);
3459	mutex_lock(&dev->rdev.stats.lock);
3460	dev->rdev.stats.act_ofld_conn_fails++;
3461	mutex_unlock(&dev->rdev.stats.lock);
3462	connect_reply_upcall(ep, status2errno(req->retval));
3463	state_set(&ep->com, DEAD);
3464	remove_handle(dev, &dev->atid_idr, atid);
3465	cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3466	dst_release(ep->dst);
3467	cxgb4_l2t_release(ep->l2t);
3468	c4iw_put_ep(&ep->com);
3469}
3470
3471static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3472			struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3473{
3474	struct sk_buff *rpl_skb;
3475	struct cpl_pass_accept_req *cpl;
3476	int ret;
3477
3478	rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3479	BUG_ON(!rpl_skb);
3480	if (req->retval) {
3481		PDBG("%s passive open failure %d\n", __func__, req->retval);
3482		mutex_lock(&dev->rdev.stats.lock);
3483		dev->rdev.stats.pas_ofld_conn_fails++;
3484		mutex_unlock(&dev->rdev.stats.lock);
3485		kfree_skb(rpl_skb);
3486	} else {
3487		cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3488		OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3489					(__force u32) htonl(
3490					(__force u32) req->tid)));
3491		ret = pass_accept_req(dev, rpl_skb);
3492		if (!ret)
3493			kfree_skb(rpl_skb);
3494	}
3495	return;
3496}
3497
3498static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3499{
3500	struct cpl_fw6_msg *rpl = cplhdr(skb);
3501	struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3502
3503	switch (rpl->type) {
3504	case FW6_TYPE_CQE:
3505		c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3506		break;
3507	case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3508		req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3509		switch (req->t_state) {
3510		case TCP_SYN_SENT:
3511			active_ofld_conn_reply(dev, skb, req);
3512			break;
3513		case TCP_SYN_RECV:
3514			passive_ofld_conn_reply(dev, skb, req);
3515			break;
3516		default:
3517			pr_err("%s unexpected ofld conn wr state %d\n",
3518			       __func__, req->t_state);
3519			break;
3520		}
3521		break;
3522	}
3523	return 0;
3524}
3525
3526static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3527{
3528	u32 l2info;
3529	u16 vlantag, len, hdr_len, eth_hdr_len;
3530	u8 intf;
3531	struct cpl_rx_pkt *cpl = cplhdr(skb);
3532	struct cpl_pass_accept_req *req;
3533	struct tcp_options_received tmp_opt;
3534	struct c4iw_dev *dev;
3535
3536	dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3537	/* Store values from cpl_rx_pkt in temporary location. */
3538	vlantag = (__force u16) cpl->vlan;
3539	len = (__force u16) cpl->len;
3540	l2info  = (__force u32) cpl->l2info;
3541	hdr_len = (__force u16) cpl->hdr_len;
3542	intf = cpl->iff;
3543
3544	__skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3545
3546	/*
3547	 * We need to parse the TCP options from SYN packet.
3548	 * to generate cpl_pass_accept_req.
3549	 */
3550	memset(&tmp_opt, 0, sizeof(tmp_opt));
3551	tcp_clear_options(&tmp_opt);
3552	tcp_parse_options(skb, &tmp_opt, 0, NULL);
3553
3554	req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
3555	memset(req, 0, sizeof(*req));
3556	req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3557			 SYN_MAC_IDX_V(RX_MACIDX_G(
3558			 (__force int) htonl(l2info))) |
3559			 SYN_XACT_MATCH_F);
3560	eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
3561			    RX_ETHHDR_LEN_G((__force int)htonl(l2info)) :
3562			    RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info));
3563	req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(
3564					(__force int) htonl(l2info))) |
3565				   TCP_HDR_LEN_V(RX_TCPHDR_LEN_G(
3566					(__force int) htons(hdr_len))) |
3567				   IP_HDR_LEN_V(RX_IPHDR_LEN_G(
3568					(__force int) htons(hdr_len))) |
3569				   ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len)));
3570	req->vlan = (__force __be16) vlantag;
3571	req->len = (__force __be16) len;
3572	req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3573				    PASS_OPEN_TOS_V(tos));
3574	req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3575	if (tmp_opt.wscale_ok)
3576		req->tcpopt.wsf = tmp_opt.snd_wscale;
3577	req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3578	if (tmp_opt.sack_ok)
3579		req->tcpopt.sack = 1;
3580	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3581	return;
3582}
3583
3584static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3585				  __be32 laddr, __be16 lport,
3586				  __be32 raddr, __be16 rport,
3587				  u32 rcv_isn, u32 filter, u16 window,
3588				  u32 rss_qid, u8 port_id)
3589{
3590	struct sk_buff *req_skb;
3591	struct fw_ofld_connection_wr *req;
3592	struct cpl_pass_accept_req *cpl = cplhdr(skb);
3593	int ret;
3594
3595	req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
3596	req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
3597	memset(req, 0, sizeof(*req));
3598	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
3599	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
3600	req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
3601	req->le.filter = (__force __be32) filter;
3602	req->le.lport = lport;
3603	req->le.pport = rport;
3604	req->le.u.ipv4.lip = laddr;
3605	req->le.u.ipv4.pip = raddr;
3606	req->tcb.rcv_nxt = htonl(rcv_isn + 1);
3607	req->tcb.rcv_adv = htons(window);
3608	req->tcb.t_state_to_astid =
3609		 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
3610			FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
3611			FW_OFLD_CONNECTION_WR_ASTID_V(
3612			PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
3613
3614	/*
3615	 * We store the qid in opt2 which will be used by the firmware
3616	 * to send us the wr response.
3617	 */
3618	req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
3619
3620	/*
3621	 * We initialize the MSS index in TCB to 0xF.
3622	 * So that when driver sends cpl_pass_accept_rpl
3623	 * TCB picks up the correct value. If this was 0
3624	 * TP will ignore any value > 0 for MSS index.
3625	 */
3626	req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
3627	req->cookie = (uintptr_t)skb;
3628
3629	set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
3630	ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
3631	if (ret < 0) {
3632		pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
3633		       ret);
3634		kfree_skb(skb);
3635		kfree_skb(req_skb);
3636	}
3637}
3638
3639/*
3640 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
3641 * messages when a filter is being used instead of server to
3642 * redirect a syn packet. When packets hit filter they are redirected
3643 * to the offload queue and driver tries to establish the connection
3644 * using firmware work request.
3645 */
3646static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
3647{
3648	int stid;
3649	unsigned int filter;
3650	struct ethhdr *eh = NULL;
3651	struct vlan_ethhdr *vlan_eh = NULL;
3652	struct iphdr *iph;
3653	struct tcphdr *tcph;
3654	struct rss_header *rss = (void *)skb->data;
3655	struct cpl_rx_pkt *cpl = (void *)skb->data;
3656	struct cpl_pass_accept_req *req = (void *)(rss + 1);
3657	struct l2t_entry *e;
3658	struct dst_entry *dst;
3659	struct c4iw_ep *lep;
3660	u16 window;
3661	struct port_info *pi;
3662	struct net_device *pdev;
3663	u16 rss_qid, eth_hdr_len;
3664	int step;
3665	u32 tx_chan;
3666	struct neighbour *neigh;
3667
3668	/* Drop all non-SYN packets */
3669	if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
3670		goto reject;
3671
3672	/*
3673	 * Drop all packets which did not hit the filter.
3674	 * Unlikely to happen.
3675	 */
3676	if (!(rss->filter_hit && rss->filter_tid))
3677		goto reject;
3678
3679	/*
3680	 * Calculate the server tid from filter hit index from cpl_rx_pkt.
3681	 */
3682	stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
3683
3684	lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
3685	if (!lep) {
3686		PDBG("%s connect request on invalid stid %d\n", __func__, stid);
3687		goto reject;
3688	}
3689
3690	eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
3691			    RX_ETHHDR_LEN_G(htonl(cpl->l2info)) :
3692			    RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info));
3693	if (eth_hdr_len == ETH_HLEN) {
3694		eh = (struct ethhdr *)(req + 1);
3695		iph = (struct iphdr *)(eh + 1);
3696	} else {
3697		vlan_eh = (struct vlan_ethhdr *)(req + 1);
3698		iph = (struct iphdr *)(vlan_eh + 1);
3699		skb->vlan_tci = ntohs(cpl->vlan);
3700	}
3701
3702	if (iph->version != 0x4)
3703		goto reject;
3704
3705	tcph = (struct tcphdr *)(iph + 1);
3706	skb_set_network_header(skb, (void *)iph - (void *)rss);
3707	skb_set_transport_header(skb, (void *)tcph - (void *)rss);
3708	skb_get(skb);
3709
3710	PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
3711	     ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
3712	     ntohs(tcph->source), iph->tos);
3713
3714	dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
3715			 iph->tos);
3716	if (!dst) {
3717		pr_err("%s - failed to find dst entry!\n",
3718		       __func__);
3719		goto reject;
3720	}
3721	neigh = dst_neigh_lookup_skb(dst, skb);
3722
3723	if (!neigh) {
3724		pr_err("%s - failed to allocate neigh!\n",
3725		       __func__);
3726		goto free_dst;
3727	}
3728
3729	if (neigh->dev->flags & IFF_LOOPBACK) {
3730		pdev = ip_dev_find(&init_net, iph->daddr);
3731		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
3732				    pdev, 0);
3733		pi = (struct port_info *)netdev_priv(pdev);
3734		tx_chan = cxgb4_port_chan(pdev);
3735		dev_put(pdev);
3736	} else {
3737		pdev = get_real_dev(neigh->dev);
3738		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
3739					pdev, 0);
3740		pi = (struct port_info *)netdev_priv(pdev);
3741		tx_chan = cxgb4_port_chan(pdev);
3742	}
3743	neigh_release(neigh);
3744	if (!e) {
3745		pr_err("%s - failed to allocate l2t entry!\n",
3746		       __func__);
3747		goto free_dst;
3748	}
3749
3750	step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
3751	rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
3752	window = (__force u16) htons((__force u16)tcph->window);
3753
3754	/* Calcuate filter portion for LE region. */
3755	filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
3756						    dev->rdev.lldi.ports[0],
3757						    e));
3758
3759	/*
3760	 * Synthesize the cpl_pass_accept_req. We have everything except the
3761	 * TID. Once firmware sends a reply with TID we update the TID field
3762	 * in cpl and pass it through the regular cpl_pass_accept_req path.
3763	 */
3764	build_cpl_pass_accept_req(skb, stid, iph->tos);
3765	send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
3766			      tcph->source, ntohl(tcph->seq), filter, window,
3767			      rss_qid, pi->port_id);
3768	cxgb4_l2t_release(e);
3769free_dst:
3770	dst_release(dst);
3771reject:
3772	return 0;
3773}
3774
3775/*
3776 * These are the real handlers that are called from a
3777 * work queue.
3778 */
3779static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
3780	[CPL_ACT_ESTABLISH] = act_establish,
3781	[CPL_ACT_OPEN_RPL] = act_open_rpl,
3782	[CPL_RX_DATA] = rx_data,
3783	[CPL_ABORT_RPL_RSS] = abort_rpl,
3784	[CPL_ABORT_RPL] = abort_rpl,
3785	[CPL_PASS_OPEN_RPL] = pass_open_rpl,
3786	[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
3787	[CPL_PASS_ACCEPT_REQ] = pass_accept_req,
3788	[CPL_PASS_ESTABLISH] = pass_establish,
3789	[CPL_PEER_CLOSE] = peer_close,
3790	[CPL_ABORT_REQ_RSS] = peer_abort,
3791	[CPL_CLOSE_CON_RPL] = close_con_rpl,
3792	[CPL_RDMA_TERMINATE] = terminate,
3793	[CPL_FW4_ACK] = fw4_ack,
3794	[CPL_FW6_MSG] = deferred_fw6_msg,
3795	[CPL_RX_PKT] = rx_pkt
3796};
3797
3798static void process_timeout(struct c4iw_ep *ep)
3799{
3800	struct c4iw_qp_attributes attrs;
3801	int abort = 1;
3802
3803	mutex_lock(&ep->com.mutex);
3804	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
3805	     ep->com.state);
3806	set_bit(TIMEDOUT, &ep->com.history);
3807	switch (ep->com.state) {
3808	case MPA_REQ_SENT:
3809		__state_set(&ep->com, ABORTING);
3810		connect_reply_upcall(ep, -ETIMEDOUT);
3811		break;
3812	case MPA_REQ_WAIT:
3813		__state_set(&ep->com, ABORTING);
3814		break;
3815	case CLOSING:
3816	case MORIBUND:
3817		if (ep->com.cm_id && ep->com.qp) {
3818			attrs.next_state = C4IW_QP_STATE_ERROR;
3819			c4iw_modify_qp(ep->com.qp->rhp,
3820				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
3821				     &attrs, 1);
3822		}
3823		__state_set(&ep->com, ABORTING);
3824		close_complete_upcall(ep, -ETIMEDOUT);
3825		break;
3826	case ABORTING:
3827	case DEAD:
3828
3829		/*
3830		 * These states are expected if the ep timed out at the same
3831		 * time as another thread was calling stop_ep_timer().
3832		 * So we silently do nothing for these states.
3833		 */
3834		abort = 0;
3835		break;
3836	default:
3837		WARN(1, "%s unexpected state ep %p tid %u state %u\n",
3838			__func__, ep, ep->hwtid, ep->com.state);
3839		abort = 0;
3840	}
3841	if (abort)
3842		abort_connection(ep, NULL, GFP_KERNEL);
3843	mutex_unlock(&ep->com.mutex);
3844	c4iw_put_ep(&ep->com);
3845}
3846
3847static void process_timedout_eps(void)
3848{
3849	struct c4iw_ep *ep;
3850
3851	spin_lock_irq(&timeout_lock);
3852	while (!list_empty(&timeout_list)) {
3853		struct list_head *tmp;
3854
3855		tmp = timeout_list.next;
3856		list_del(tmp);
3857		tmp->next = NULL;
3858		tmp->prev = NULL;
3859		spin_unlock_irq(&timeout_lock);
3860		ep = list_entry(tmp, struct c4iw_ep, entry);
3861		process_timeout(ep);
3862		spin_lock_irq(&timeout_lock);
3863	}
3864	spin_unlock_irq(&timeout_lock);
3865}
3866
3867static void process_work(struct work_struct *work)
3868{
3869	struct sk_buff *skb = NULL;
3870	struct c4iw_dev *dev;
3871	struct cpl_act_establish *rpl;
3872	unsigned int opcode;
3873	int ret;
3874
3875	process_timedout_eps();
3876	while ((skb = skb_dequeue(&rxq))) {
3877		rpl = cplhdr(skb);
3878		dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3879		opcode = rpl->ot.opcode;
3880
3881		BUG_ON(!work_handlers[opcode]);
3882		ret = work_handlers[opcode](dev, skb);
3883		if (!ret)
3884			kfree_skb(skb);
3885		process_timedout_eps();
3886	}
3887}
3888
3889static DECLARE_WORK(skb_work, process_work);
3890
3891static void ep_timeout(unsigned long arg)
3892{
3893	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
3894	int kickit = 0;
3895
3896	spin_lock(&timeout_lock);
3897	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
3898		/*
3899		 * Only insert if it is not already on the list.
3900		 */
3901		if (!ep->entry.next) {
3902			list_add_tail(&ep->entry, &timeout_list);
3903			kickit = 1;
3904		}
3905	}
3906	spin_unlock(&timeout_lock);
3907	if (kickit)
3908		queue_work(workq, &skb_work);
3909}
3910
3911/*
3912 * All the CM events are handled on a work queue to have a safe context.
3913 */
3914static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
3915{
3916
3917	/*
3918	 * Save dev in the skb->cb area.
3919	 */
3920	*((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
3921
3922	/*
3923	 * Queue the skb and schedule the worker thread.
3924	 */
3925	skb_queue_tail(&rxq, skb);
3926	queue_work(workq, &skb_work);
3927	return 0;
3928}
3929
3930static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3931{
3932	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
3933
3934	if (rpl->status != CPL_ERR_NONE) {
3935		printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
3936		       "for tid %u\n", rpl->status, GET_TID(rpl));
3937	}
3938	kfree_skb(skb);
3939	return 0;
3940}
3941
3942static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3943{
3944	struct cpl_fw6_msg *rpl = cplhdr(skb);
3945	struct c4iw_wr_wait *wr_waitp;
3946	int ret;
3947
3948	PDBG("%s type %u\n", __func__, rpl->type);
3949
3950	switch (rpl->type) {
3951	case FW6_TYPE_WR_RPL:
3952		ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
3953		wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
3954		PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
3955		if (wr_waitp)
3956			c4iw_wake_up(wr_waitp, ret ? -ret : 0);
3957		kfree_skb(skb);
3958		break;
3959	case FW6_TYPE_CQE:
3960	case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3961		sched(dev, skb);
3962		break;
3963	default:
3964		printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
3965		       rpl->type);
3966		kfree_skb(skb);
3967		break;
3968	}
3969	return 0;
3970}
3971
3972static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
3973{
3974	struct cpl_abort_req_rss *req = cplhdr(skb);
3975	struct c4iw_ep *ep;
3976	struct tid_info *t = dev->rdev.lldi.tids;
3977	unsigned int tid = GET_TID(req);
3978
3979	ep = lookup_tid(t, tid);
3980	if (!ep) {
3981		printk(KERN_WARNING MOD
3982		       "Abort on non-existent endpoint, tid %d\n", tid);
3983		kfree_skb(skb);
3984		return 0;
3985	}
3986	if (is_neg_adv(req->status)) {
3987		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
3988		     __func__, ep->hwtid, req->status,
3989		     neg_adv_str(req->status));
3990		ep->stats.abort_neg_adv++;
3991		dev->rdev.stats.neg_adv++;
3992		kfree_skb(skb);
3993		return 0;
3994	}
3995	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
3996	     ep->com.state);
3997
3998	/*
3999	 * Wake up any threads in rdma_init() or rdma_fini().
4000	 * However, if we are on MPAv2 and want to retry with MPAv1
4001	 * then, don't wake up yet.
4002	 */
4003	if (mpa_rev == 2 && !ep->tried_with_mpa_v1) {
4004		if (ep->com.state != MPA_REQ_SENT)
4005			c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4006	} else
4007		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4008	sched(dev, skb);
4009	return 0;
4010}
4011
4012/*
4013 * Most upcalls from the T4 Core go to sched() to
4014 * schedule the processing on a work queue.
4015 */
4016c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4017	[CPL_ACT_ESTABLISH] = sched,
4018	[CPL_ACT_OPEN_RPL] = sched,
4019	[CPL_RX_DATA] = sched,
4020	[CPL_ABORT_RPL_RSS] = sched,
4021	[CPL_ABORT_RPL] = sched,
4022	[CPL_PASS_OPEN_RPL] = sched,
4023	[CPL_CLOSE_LISTSRV_RPL] = sched,
4024	[CPL_PASS_ACCEPT_REQ] = sched,
4025	[CPL_PASS_ESTABLISH] = sched,
4026	[CPL_PEER_CLOSE] = sched,
4027	[CPL_CLOSE_CON_RPL] = sched,
4028	[CPL_ABORT_REQ_RSS] = peer_abort_intr,
4029	[CPL_RDMA_TERMINATE] = sched,
4030	[CPL_FW4_ACK] = sched,
4031	[CPL_SET_TCB_RPL] = set_tcb_rpl,
4032	[CPL_FW6_MSG] = fw6_msg,
4033	[CPL_RX_PKT] = sched
4034};
4035
4036int __init c4iw_cm_init(void)
4037{
4038	spin_lock_init(&timeout_lock);
4039	skb_queue_head_init(&rxq);
4040
4041	workq = create_singlethread_workqueue("iw_cxgb4");
4042	if (!workq)
4043		return -ENOMEM;
4044
4045	return 0;
4046}
4047
4048void c4iw_cm_term(void)
4049{
4050	WARN_ON(!list_empty(&timeout_list));
4051	flush_workqueue(workq);
4052	destroy_workqueue(workq);
4053}
4054