1 /*
2  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
3  *
4  * Copyright (c) 2012, Intel Corporation.
5  *
6  *   Author: Zach Brown <zab@zabbo.net>
7  *   Author: Peter J. Braam <braam@clusterfs.com>
8  *   Author: Phil Schwan <phil@clusterfs.com>
9  *   Author: Eric Barton <eric@bartonsoftware.com>
10  *
11  *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
12  *
13  *   Portals is free software; you can redistribute it and/or
14  *   modify it under the terms of version 2 of the GNU General Public
15  *   License as published by the Free Software Foundation.
16  *
17  *   Portals is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *   GNU General Public License for more details.
21  *
22  *   You should have received a copy of the GNU General Public License
23  *   along with Portals; if not, write to the Free Software
24  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26 
27 #include "socklnd.h"
28 
29 /*
30  * Protocol entries :
31  *   pro_send_hello       : send hello message
32  *   pro_recv_hello       : receive hello message
33  *   pro_pack	     : pack message header
34  *   pro_unpack	   : unpack message header
35  *   pro_queue_tx_zcack() : Called holding BH lock: kss_lock
36  *			  return 1 if ACK is piggybacked, otherwise return 0
37  *   pro_queue_tx_msg()   : Called holding BH lock: kss_lock
38  *			  return the ACK that piggybacked by my message, or NULL
39  *   pro_handle_zcreq()   : handler of incoming ZC-REQ
40  *   pro_handle_zcack()   : handler of incoming ZC-ACK
41  *   pro_match_tx()       : Called holding glock
42  */
43 
44 static ksock_tx_t *
ksocknal_queue_tx_msg_v1(ksock_conn_t * conn,ksock_tx_t * tx_msg)45 ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
46 {
47 	/* V1.x, just enqueue it */
48 	list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
49 	return NULL;
50 }
51 
52 void
ksocknal_next_tx_carrier(ksock_conn_t * conn)53 ksocknal_next_tx_carrier(ksock_conn_t *conn)
54 {
55 	ksock_tx_t     *tx = conn->ksnc_tx_carrier;
56 
57 	/* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
58 	LASSERT(!list_empty(&conn->ksnc_tx_queue));
59 	LASSERT(tx != NULL);
60 
61 	/* Next TX that can carry ZC-ACK or LNet message */
62 	if (tx->tx_list.next == &conn->ksnc_tx_queue) {
63 		/* no more packets queued */
64 		conn->ksnc_tx_carrier = NULL;
65 	} else {
66 		conn->ksnc_tx_carrier = list_entry(tx->tx_list.next,
67 						       ksock_tx_t, tx_list);
68 		LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
69 	}
70 }
71 
72 static int
ksocknal_queue_tx_zcack_v2(ksock_conn_t * conn,ksock_tx_t * tx_ack,__u64 cookie)73 ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
74 			   ksock_tx_t *tx_ack, __u64 cookie)
75 {
76 	ksock_tx_t *tx = conn->ksnc_tx_carrier;
77 
78 	LASSERT(tx_ack == NULL ||
79 		 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
80 
81 	/*
82 	 * Enqueue or piggyback tx_ack / cookie
83 	 * . no tx can piggyback cookie of tx_ack (or cookie), just
84 	 *   enqueue the tx_ack (if tx_ack != NUL) and return NULL.
85 	 * . There is tx can piggyback cookie of tx_ack (or cookie),
86 	 *   piggyback the cookie and return the tx.
87 	 */
88 	if (tx == NULL) {
89 		if (tx_ack != NULL) {
90 			list_add_tail(&tx_ack->tx_list,
91 					  &conn->ksnc_tx_queue);
92 			conn->ksnc_tx_carrier = tx_ack;
93 		}
94 		return 0;
95 	}
96 
97 	if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
98 		/* tx is noop zc-ack, can't piggyback zc-ack cookie */
99 		if (tx_ack != NULL)
100 			list_add_tail(&tx_ack->tx_list,
101 					  &conn->ksnc_tx_queue);
102 		return 0;
103 	}
104 
105 	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
106 	LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0);
107 
108 	if (tx_ack != NULL)
109 		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
110 
111 	/* piggyback the zc-ack cookie */
112 	tx->tx_msg.ksm_zc_cookies[1] = cookie;
113 	/* move on to the next TX which can carry cookie */
114 	ksocknal_next_tx_carrier(conn);
115 
116 	return 1;
117 }
118 
119 static ksock_tx_t *
ksocknal_queue_tx_msg_v2(ksock_conn_t * conn,ksock_tx_t * tx_msg)120 ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
121 {
122 	ksock_tx_t  *tx  = conn->ksnc_tx_carrier;
123 
124 	/*
125 	 * Enqueue tx_msg:
126 	 * . If there is no NOOP on the connection, just enqueue
127 	 *   tx_msg and return NULL
128 	 * . If there is NOOP on the connection, piggyback the cookie
129 	 *   and replace the NOOP tx, and return the NOOP tx.
130 	 */
131 	if (tx == NULL) { /* nothing on queue */
132 		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
133 		conn->ksnc_tx_carrier = tx_msg;
134 		return NULL;
135 	}
136 
137 	if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
138 		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
139 		return NULL;
140 	}
141 
142 	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
143 
144 	/* There is a noop zc-ack can be piggybacked */
145 	tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
146 	ksocknal_next_tx_carrier(conn);
147 
148 	/* use new_tx to replace the noop zc-ack packet */
149 	list_add(&tx_msg->tx_list, &tx->tx_list);
150 	list_del(&tx->tx_list);
151 
152 	return tx;
153 }
154 
155 static int
ksocknal_queue_tx_zcack_v3(ksock_conn_t * conn,ksock_tx_t * tx_ack,__u64 cookie)156 ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
157 			   ksock_tx_t *tx_ack, __u64 cookie)
158 {
159 	ksock_tx_t *tx;
160 
161 	if (conn->ksnc_type != SOCKLND_CONN_ACK)
162 		return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
163 
164 	/* non-blocking ZC-ACK (to router) */
165 	LASSERT(tx_ack == NULL ||
166 		 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
167 
168 	tx = conn->ksnc_tx_carrier;
169 	if (tx == NULL) {
170 		if (tx_ack != NULL) {
171 			list_add_tail(&tx_ack->tx_list,
172 					  &conn->ksnc_tx_queue);
173 			conn->ksnc_tx_carrier = tx_ack;
174 		}
175 		return 0;
176 	}
177 
178 	/* conn->ksnc_tx_carrier != NULL */
179 
180 	if (tx_ack != NULL)
181 		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
182 
183 	if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
184 		return 1;
185 
186 	if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
187 		/* replace the keepalive PING with a real ACK */
188 		LASSERT(tx->tx_msg.ksm_zc_cookies[0] == 0);
189 		tx->tx_msg.ksm_zc_cookies[1] = cookie;
190 		return 1;
191 	}
192 
193 	if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
194 	    cookie == tx->tx_msg.ksm_zc_cookies[1]) {
195 		CWARN("%s: duplicated ZC cookie: %llu\n",
196 		      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
197 		return 1; /* XXX return error in the future */
198 	}
199 
200 	if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
201 		/* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
202 		if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
203 			tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
204 			tx->tx_msg.ksm_zc_cookies[1] = cookie;
205 		} else {
206 			tx->tx_msg.ksm_zc_cookies[0] = cookie;
207 		}
208 
209 		if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
210 			/* not likely to carry more ACKs, skip it to simplify logic */
211 			ksocknal_next_tx_carrier(conn);
212 		}
213 
214 		return 1;
215 	}
216 
217 	/* takes two or more cookies already */
218 
219 	if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
220 		__u64   tmp = 0;
221 
222 		/* two separated cookies: (a+2, a) or (a+1, a) */
223 		LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
224 			 tx->tx_msg.ksm_zc_cookies[1] <= 2);
225 
226 		if (tx->tx_msg.ksm_zc_cookies[0] -
227 		    tx->tx_msg.ksm_zc_cookies[1] == 2) {
228 			if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
229 				tmp = cookie;
230 		} else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
231 			tmp = tx->tx_msg.ksm_zc_cookies[1];
232 		} else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
233 			tmp = tx->tx_msg.ksm_zc_cookies[0];
234 		}
235 
236 		if (tmp != 0) {
237 			/* range of cookies */
238 			tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
239 			tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
240 			return 1;
241 		}
242 
243 	} else {
244 		/* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
245 		if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
246 		    cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
247 			CWARN("%s: duplicated ZC cookie: %llu\n",
248 			      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
249 			return 1; /* XXX: return error in the future */
250 		}
251 
252 		if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
253 			tx->tx_msg.ksm_zc_cookies[1] = cookie;
254 			return 1;
255 		}
256 
257 		if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
258 			tx->tx_msg.ksm_zc_cookies[0] = cookie;
259 			return 1;
260 		}
261 	}
262 
263 	/* failed to piggyback ZC-ACK */
264 	if (tx_ack != NULL) {
265 		list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
266 		/* the next tx can piggyback at least 1 ACK */
267 		ksocknal_next_tx_carrier(conn);
268 	}
269 
270 	return 0;
271 }
272 
273 static int
ksocknal_match_tx(ksock_conn_t * conn,ksock_tx_t * tx,int nonblk)274 ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
275 {
276 	int nob;
277 
278 #if SOCKNAL_VERSION_DEBUG
279 	if (!*ksocknal_tunables.ksnd_typed_conns)
280 		return SOCKNAL_MATCH_YES;
281 #endif
282 
283 	if (tx == NULL || tx->tx_lnetmsg == NULL) {
284 		/* noop packet */
285 		nob = offsetof(ksock_msg_t, ksm_u);
286 	} else {
287 		nob = tx->tx_lnetmsg->msg_len +
288 		      ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
289 		       sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
290 	}
291 
292 	/* default checking for typed connection */
293 	switch (conn->ksnc_type) {
294 	default:
295 		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
296 		LBUG();
297 	case SOCKLND_CONN_ANY:
298 		return SOCKNAL_MATCH_YES;
299 
300 	case SOCKLND_CONN_BULK_IN:
301 		return SOCKNAL_MATCH_MAY;
302 
303 	case SOCKLND_CONN_BULK_OUT:
304 		if (nob < *ksocknal_tunables.ksnd_min_bulk)
305 			return SOCKNAL_MATCH_MAY;
306 		else
307 			return SOCKNAL_MATCH_YES;
308 
309 	case SOCKLND_CONN_CONTROL:
310 		if (nob >= *ksocknal_tunables.ksnd_min_bulk)
311 			return SOCKNAL_MATCH_MAY;
312 		else
313 			return SOCKNAL_MATCH_YES;
314 	}
315 }
316 
317 static int
ksocknal_match_tx_v3(ksock_conn_t * conn,ksock_tx_t * tx,int nonblk)318 ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
319 {
320 	int nob;
321 
322 	if (tx == NULL || tx->tx_lnetmsg == NULL)
323 		nob = offsetof(ksock_msg_t, ksm_u);
324 	else
325 		nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
326 
327 	switch (conn->ksnc_type) {
328 	default:
329 		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
330 		LBUG();
331 	case SOCKLND_CONN_ANY:
332 		return SOCKNAL_MATCH_NO;
333 
334 	case SOCKLND_CONN_ACK:
335 		if (nonblk)
336 			return SOCKNAL_MATCH_YES;
337 		else if (tx == NULL || tx->tx_lnetmsg == NULL)
338 			return SOCKNAL_MATCH_MAY;
339 		else
340 			return SOCKNAL_MATCH_NO;
341 
342 	case SOCKLND_CONN_BULK_OUT:
343 		if (nonblk)
344 			return SOCKNAL_MATCH_NO;
345 		else if (nob < *ksocknal_tunables.ksnd_min_bulk)
346 			return SOCKNAL_MATCH_MAY;
347 		else
348 			return SOCKNAL_MATCH_YES;
349 
350 	case SOCKLND_CONN_CONTROL:
351 		if (nonblk)
352 			return SOCKNAL_MATCH_NO;
353 		else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
354 			return SOCKNAL_MATCH_MAY;
355 		else
356 			return SOCKNAL_MATCH_YES;
357 	}
358 }
359 
360 /* (Sink) handle incoming ZC request from sender */
361 static int
ksocknal_handle_zcreq(ksock_conn_t * c,__u64 cookie,int remote)362 ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
363 {
364 	ksock_peer_t   *peer = c->ksnc_peer;
365 	ksock_conn_t   *conn;
366 	ksock_tx_t     *tx;
367 	int	     rc;
368 
369 	read_lock(&ksocknal_data.ksnd_global_lock);
370 
371 	conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
372 	if (conn != NULL) {
373 		ksock_sched_t *sched = conn->ksnc_scheduler;
374 
375 		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL);
376 
377 		spin_lock_bh(&sched->kss_lock);
378 
379 		rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
380 
381 		spin_unlock_bh(&sched->kss_lock);
382 
383 		if (rc) { /* piggybacked */
384 			read_unlock(&ksocknal_data.ksnd_global_lock);
385 			return 0;
386 		}
387 	}
388 
389 	read_unlock(&ksocknal_data.ksnd_global_lock);
390 
391 	/* ACK connection is not ready, or can't piggyback the ACK */
392 	tx = ksocknal_alloc_tx_noop(cookie, !!remote);
393 	if (tx == NULL)
394 		return -ENOMEM;
395 
396 	rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
397 	if (rc == 0)
398 		return 0;
399 
400 	ksocknal_free_tx(tx);
401 	return rc;
402 }
403 
404 /* (Sender) handle ZC_ACK from sink */
405 static int
ksocknal_handle_zcack(ksock_conn_t * conn,__u64 cookie1,__u64 cookie2)406 ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
407 {
408 	ksock_peer_t      *peer = conn->ksnc_peer;
409 	ksock_tx_t	*tx;
410 	ksock_tx_t	*tmp;
411 	LIST_HEAD(zlist);
412 	int		count;
413 
414 	if (cookie1 == 0)
415 		cookie1 = cookie2;
416 
417 	count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
418 
419 	if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
420 	    conn->ksnc_proto == &ksocknal_protocol_v3x) {
421 		/* keepalive PING for V3.x, just ignore it */
422 		return count == 1 ? 0 : -EPROTO;
423 	}
424 
425 	spin_lock(&peer->ksnp_lock);
426 
427 	list_for_each_entry_safe(tx, tmp,
428 				     &peer->ksnp_zc_req_list, tx_zc_list) {
429 		__u64 c = tx->tx_msg.ksm_zc_cookies[0];
430 
431 		if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
432 			tx->tx_msg.ksm_zc_cookies[0] = 0;
433 			list_del(&tx->tx_zc_list);
434 			list_add(&tx->tx_zc_list, &zlist);
435 
436 			if (--count == 0)
437 				break;
438 		}
439 	}
440 
441 	spin_unlock(&peer->ksnp_lock);
442 
443 	while (!list_empty(&zlist)) {
444 		tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
445 		list_del(&tx->tx_zc_list);
446 		ksocknal_tx_decref(tx);
447 	}
448 
449 	return count == 0 ? 0 : -EPROTO;
450 }
451 
452 static int
ksocknal_send_hello_v1(ksock_conn_t * conn,ksock_hello_msg_t * hello)453 ksocknal_send_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello)
454 {
455 	struct socket	*sock = conn->ksnc_sock;
456 	lnet_hdr_t	  *hdr;
457 	lnet_magicversion_t *hmv;
458 	int		  rc;
459 	int		  i;
460 
461 	CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
462 
463 	LIBCFS_ALLOC(hdr, sizeof(*hdr));
464 	if (hdr == NULL) {
465 		CERROR("Can't allocate lnet_hdr_t\n");
466 		return -ENOMEM;
467 	}
468 
469 	hmv = (lnet_magicversion_t *)&hdr->dest_nid;
470 
471 	/* Re-organize V2.x message header to V1.x (lnet_hdr_t)
472 	 * header and send out */
473 	hmv->magic	 = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
474 	hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
475 	hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
476 
477 	if (the_lnet.ln_testprotocompat != 0) {
478 		/* single-shot proto check */
479 		LNET_LOCK();
480 		if ((the_lnet.ln_testprotocompat & 1) != 0) {
481 			hmv->version_major++;   /* just different! */
482 			the_lnet.ln_testprotocompat &= ~1;
483 		}
484 		if ((the_lnet.ln_testprotocompat & 2) != 0) {
485 			hmv->magic = LNET_PROTO_MAGIC;
486 			the_lnet.ln_testprotocompat &= ~2;
487 		}
488 		LNET_UNLOCK();
489 	}
490 
491 	hdr->src_nid	= cpu_to_le64 (hello->kshm_src_nid);
492 	hdr->src_pid	= cpu_to_le32 (hello->kshm_src_pid);
493 	hdr->type	   = cpu_to_le32 (LNET_MSG_HELLO);
494 	hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
495 	hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
496 	hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
497 
498 	rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),
499 			       lnet_acceptor_timeout());
500 
501 	if (rc != 0) {
502 		CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
503 			rc, &conn->ksnc_ipaddr, conn->ksnc_port);
504 		goto out;
505 	}
506 
507 	if (hello->kshm_nips == 0)
508 		goto out;
509 
510 	for (i = 0; i < (int) hello->kshm_nips; i++) {
511 		hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
512 	}
513 
514 	rc = libcfs_sock_write(sock, hello->kshm_ips,
515 			       hello->kshm_nips * sizeof(__u32),
516 			       lnet_acceptor_timeout());
517 	if (rc != 0) {
518 		CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
519 			rc, hello->kshm_nips,
520 			&conn->ksnc_ipaddr, conn->ksnc_port);
521 	}
522 out:
523 	LIBCFS_FREE(hdr, sizeof(*hdr));
524 
525 	return rc;
526 }
527 
528 static int
ksocknal_send_hello_v2(ksock_conn_t * conn,ksock_hello_msg_t * hello)529 ksocknal_send_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello)
530 {
531 	struct socket *sock = conn->ksnc_sock;
532 	int	     rc;
533 
534 	hello->kshm_magic   = LNET_PROTO_MAGIC;
535 	hello->kshm_version = conn->ksnc_proto->pro_version;
536 
537 	if (the_lnet.ln_testprotocompat != 0) {
538 		/* single-shot proto check */
539 		LNET_LOCK();
540 		if ((the_lnet.ln_testprotocompat & 1) != 0) {
541 			hello->kshm_version++;   /* just different! */
542 			the_lnet.ln_testprotocompat &= ~1;
543 		}
544 		LNET_UNLOCK();
545 	}
546 
547 	rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
548 			       lnet_acceptor_timeout());
549 
550 	if (rc != 0) {
551 		CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
552 			rc, &conn->ksnc_ipaddr, conn->ksnc_port);
553 		return rc;
554 	}
555 
556 	if (hello->kshm_nips == 0)
557 		return 0;
558 
559 	rc = libcfs_sock_write(sock, hello->kshm_ips,
560 			       hello->kshm_nips * sizeof(__u32),
561 			       lnet_acceptor_timeout());
562 	if (rc != 0) {
563 		CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
564 			rc, hello->kshm_nips,
565 			&conn->ksnc_ipaddr, conn->ksnc_port);
566 	}
567 
568 	return rc;
569 }
570 
571 static int
ksocknal_recv_hello_v1(ksock_conn_t * conn,ksock_hello_msg_t * hello,int timeout)572 ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,
573 		       int timeout)
574 {
575 	struct socket	*sock = conn->ksnc_sock;
576 	lnet_hdr_t	  *hdr;
577 	int		  rc;
578 	int		  i;
579 
580 	LIBCFS_ALLOC(hdr, sizeof(*hdr));
581 	if (hdr == NULL) {
582 		CERROR("Can't allocate lnet_hdr_t\n");
583 		return -ENOMEM;
584 	}
585 
586 	rc = libcfs_sock_read(sock, &hdr->src_nid,
587 			      sizeof(*hdr) - offsetof(lnet_hdr_t, src_nid),
588 			      timeout);
589 	if (rc != 0) {
590 		CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
591 			rc, &conn->ksnc_ipaddr);
592 		LASSERT(rc < 0 && rc != -EALREADY);
593 		goto out;
594 	}
595 
596 	/* ...and check we got what we expected */
597 	if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
598 		CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
599 		       le32_to_cpu(hdr->type),
600 		       &conn->ksnc_ipaddr);
601 		rc = -EPROTO;
602 		goto out;
603 	}
604 
605 	hello->kshm_src_nid	 = le64_to_cpu(hdr->src_nid);
606 	hello->kshm_src_pid	 = le32_to_cpu(hdr->src_pid);
607 	hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
608 	hello->kshm_ctype	   = le32_to_cpu(hdr->msg.hello.type);
609 	hello->kshm_nips	    = le32_to_cpu(hdr->payload_length) /
610 					 sizeof(__u32);
611 
612 	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
613 		CERROR("Bad nips %d from ip %pI4h\n",
614 		       hello->kshm_nips, &conn->ksnc_ipaddr);
615 		rc = -EPROTO;
616 		goto out;
617 	}
618 
619 	if (hello->kshm_nips == 0)
620 		goto out;
621 
622 	rc = libcfs_sock_read(sock, hello->kshm_ips,
623 			      hello->kshm_nips * sizeof(__u32), timeout);
624 	if (rc != 0) {
625 		CERROR("Error %d reading IPs from ip %pI4h\n",
626 			rc, &conn->ksnc_ipaddr);
627 		LASSERT(rc < 0 && rc != -EALREADY);
628 		goto out;
629 	}
630 
631 	for (i = 0; i < (int) hello->kshm_nips; i++) {
632 		hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
633 
634 		if (hello->kshm_ips[i] == 0) {
635 			CERROR("Zero IP[%d] from ip %pI4h\n",
636 			       i, &conn->ksnc_ipaddr);
637 			rc = -EPROTO;
638 			break;
639 		}
640 	}
641 out:
642 	LIBCFS_FREE(hdr, sizeof(*hdr));
643 
644 	return rc;
645 }
646 
647 static int
ksocknal_recv_hello_v2(ksock_conn_t * conn,ksock_hello_msg_t * hello,int timeout)648 ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
649 {
650 	struct socket   *sock = conn->ksnc_sock;
651 	int		rc;
652 	int		i;
653 
654 	if (hello->kshm_magic == LNET_PROTO_MAGIC)
655 		conn->ksnc_flip = 0;
656 	else
657 		conn->ksnc_flip = 1;
658 
659 	rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
660 			      offsetof(ksock_hello_msg_t, kshm_ips) -
661 				       offsetof(ksock_hello_msg_t, kshm_src_nid),
662 			      timeout);
663 	if (rc != 0) {
664 		CERROR("Error %d reading HELLO from %pI4h\n",
665 			rc, &conn->ksnc_ipaddr);
666 		LASSERT(rc < 0 && rc != -EALREADY);
667 		return rc;
668 	}
669 
670 	if (conn->ksnc_flip) {
671 		__swab32s(&hello->kshm_src_pid);
672 		__swab64s(&hello->kshm_src_nid);
673 		__swab32s(&hello->kshm_dst_pid);
674 		__swab64s(&hello->kshm_dst_nid);
675 		__swab64s(&hello->kshm_src_incarnation);
676 		__swab64s(&hello->kshm_dst_incarnation);
677 		__swab32s(&hello->kshm_ctype);
678 		__swab32s(&hello->kshm_nips);
679 	}
680 
681 	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
682 		CERROR("Bad nips %d from ip %pI4h\n",
683 		       hello->kshm_nips, &conn->ksnc_ipaddr);
684 		return -EPROTO;
685 	}
686 
687 	if (hello->kshm_nips == 0)
688 		return 0;
689 
690 	rc = libcfs_sock_read(sock, hello->kshm_ips,
691 			      hello->kshm_nips * sizeof(__u32), timeout);
692 	if (rc != 0) {
693 		CERROR("Error %d reading IPs from ip %pI4h\n",
694 			rc, &conn->ksnc_ipaddr);
695 		LASSERT(rc < 0 && rc != -EALREADY);
696 		return rc;
697 	}
698 
699 	for (i = 0; i < (int) hello->kshm_nips; i++) {
700 		if (conn->ksnc_flip)
701 			__swab32s(&hello->kshm_ips[i]);
702 
703 		if (hello->kshm_ips[i] == 0) {
704 			CERROR("Zero IP[%d] from ip %pI4h\n",
705 			       i, &conn->ksnc_ipaddr);
706 			return -EPROTO;
707 		}
708 	}
709 
710 	return 0;
711 }
712 
713 static void
ksocknal_pack_msg_v1(ksock_tx_t * tx)714 ksocknal_pack_msg_v1(ksock_tx_t *tx)
715 {
716 	/* V1.x has no KSOCK_MSG_NOOP */
717 	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
718 	LASSERT(tx->tx_lnetmsg != NULL);
719 
720 	tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
721 	tx->tx_iov[0].iov_len  = sizeof(lnet_hdr_t);
722 
723 	tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
724 }
725 
726 static void
ksocknal_pack_msg_v2(ksock_tx_t * tx)727 ksocknal_pack_msg_v2(ksock_tx_t *tx)
728 {
729 	tx->tx_iov[0].iov_base = &tx->tx_msg;
730 
731 	if (tx->tx_lnetmsg != NULL) {
732 		LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
733 
734 		tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
735 		tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
736 		tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
737 	} else {
738 		LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
739 
740 		tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
741 		tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t,  ksm_u.lnetmsg.ksnm_hdr);
742 	}
743 	/* Don't checksum before start sending, because packet can be piggybacked with ACK */
744 }
745 
746 static void
ksocknal_unpack_msg_v1(ksock_msg_t * msg)747 ksocknal_unpack_msg_v1(ksock_msg_t *msg)
748 {
749 	msg->ksm_csum	   = 0;
750 	msg->ksm_type	   = KSOCK_MSG_LNET;
751 	msg->ksm_zc_cookies[0]  = msg->ksm_zc_cookies[1]  = 0;
752 }
753 
754 static void
ksocknal_unpack_msg_v2(ksock_msg_t * msg)755 ksocknal_unpack_msg_v2(ksock_msg_t *msg)
756 {
757 	return;  /* Do nothing */
758 }
759 
760 ksock_proto_t  ksocknal_protocol_v1x = {
761 	.pro_version	    = KSOCK_PROTO_V1,
762 	.pro_send_hello	 = ksocknal_send_hello_v1,
763 	.pro_recv_hello	 = ksocknal_recv_hello_v1,
764 	.pro_pack	       = ksocknal_pack_msg_v1,
765 	.pro_unpack	     = ksocknal_unpack_msg_v1,
766 	.pro_queue_tx_msg       = ksocknal_queue_tx_msg_v1,
767 	.pro_handle_zcreq       = NULL,
768 	.pro_handle_zcack       = NULL,
769 	.pro_queue_tx_zcack     = NULL,
770 	.pro_match_tx	   = ksocknal_match_tx
771 };
772 
773 ksock_proto_t  ksocknal_protocol_v2x = {
774 	.pro_version	    = KSOCK_PROTO_V2,
775 	.pro_send_hello	 = ksocknal_send_hello_v2,
776 	.pro_recv_hello	 = ksocknal_recv_hello_v2,
777 	.pro_pack	       = ksocknal_pack_msg_v2,
778 	.pro_unpack	     = ksocknal_unpack_msg_v2,
779 	.pro_queue_tx_msg       = ksocknal_queue_tx_msg_v2,
780 	.pro_queue_tx_zcack     = ksocknal_queue_tx_zcack_v2,
781 	.pro_handle_zcreq       = ksocknal_handle_zcreq,
782 	.pro_handle_zcack       = ksocknal_handle_zcack,
783 	.pro_match_tx	   = ksocknal_match_tx
784 };
785 
786 ksock_proto_t  ksocknal_protocol_v3x = {
787 	.pro_version	    = KSOCK_PROTO_V3,
788 	.pro_send_hello	 = ksocknal_send_hello_v2,
789 	.pro_recv_hello	 = ksocknal_recv_hello_v2,
790 	.pro_pack	       = ksocknal_pack_msg_v2,
791 	.pro_unpack	     = ksocknal_unpack_msg_v2,
792 	.pro_queue_tx_msg       = ksocknal_queue_tx_msg_v2,
793 	.pro_queue_tx_zcack     = ksocknal_queue_tx_zcack_v3,
794 	.pro_handle_zcreq       = ksocknal_handle_zcreq,
795 	.pro_handle_zcack       = ksocknal_handle_zcack,
796 	.pro_match_tx	   = ksocknal_match_tx_v3
797 };
798