1 /*
2 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
3 *
4 * Copyright (c) 2012, Intel Corporation.
5 *
6 * Author: Zach Brown <zab@zabbo.net>
7 * Author: Peter J. Braam <braam@clusterfs.com>
8 * Author: Phil Schwan <phil@clusterfs.com>
9 * Author: Eric Barton <eric@bartonsoftware.com>
10 *
11 * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
12 *
13 * Portals is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
16 *
17 * Portals is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with Portals; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 */
26
27 #include "socklnd.h"
28
29 /*
30 * Protocol entries :
31 * pro_send_hello : send hello message
32 * pro_recv_hello : receive hello message
33 * pro_pack : pack message header
34 * pro_unpack : unpack message header
35 * pro_queue_tx_zcack() : Called holding BH lock: kss_lock
36 * return 1 if ACK is piggybacked, otherwise return 0
37 * pro_queue_tx_msg() : Called holding BH lock: kss_lock
38 * return the ACK that piggybacked by my message, or NULL
39 * pro_handle_zcreq() : handler of incoming ZC-REQ
40 * pro_handle_zcack() : handler of incoming ZC-ACK
41 * pro_match_tx() : Called holding glock
42 */
43
44 static ksock_tx_t *
ksocknal_queue_tx_msg_v1(ksock_conn_t * conn,ksock_tx_t * tx_msg)45 ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
46 {
47 /* V1.x, just enqueue it */
48 list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
49 return NULL;
50 }
51
52 void
ksocknal_next_tx_carrier(ksock_conn_t * conn)53 ksocknal_next_tx_carrier(ksock_conn_t *conn)
54 {
55 ksock_tx_t *tx = conn->ksnc_tx_carrier;
56
57 /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
58 LASSERT(!list_empty(&conn->ksnc_tx_queue));
59 LASSERT(tx != NULL);
60
61 /* Next TX that can carry ZC-ACK or LNet message */
62 if (tx->tx_list.next == &conn->ksnc_tx_queue) {
63 /* no more packets queued */
64 conn->ksnc_tx_carrier = NULL;
65 } else {
66 conn->ksnc_tx_carrier = list_entry(tx->tx_list.next,
67 ksock_tx_t, tx_list);
68 LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
69 }
70 }
71
72 static int
ksocknal_queue_tx_zcack_v2(ksock_conn_t * conn,ksock_tx_t * tx_ack,__u64 cookie)73 ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
74 ksock_tx_t *tx_ack, __u64 cookie)
75 {
76 ksock_tx_t *tx = conn->ksnc_tx_carrier;
77
78 LASSERT(tx_ack == NULL ||
79 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
80
81 /*
82 * Enqueue or piggyback tx_ack / cookie
83 * . no tx can piggyback cookie of tx_ack (or cookie), just
84 * enqueue the tx_ack (if tx_ack != NUL) and return NULL.
85 * . There is tx can piggyback cookie of tx_ack (or cookie),
86 * piggyback the cookie and return the tx.
87 */
88 if (tx == NULL) {
89 if (tx_ack != NULL) {
90 list_add_tail(&tx_ack->tx_list,
91 &conn->ksnc_tx_queue);
92 conn->ksnc_tx_carrier = tx_ack;
93 }
94 return 0;
95 }
96
97 if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
98 /* tx is noop zc-ack, can't piggyback zc-ack cookie */
99 if (tx_ack != NULL)
100 list_add_tail(&tx_ack->tx_list,
101 &conn->ksnc_tx_queue);
102 return 0;
103 }
104
105 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
106 LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0);
107
108 if (tx_ack != NULL)
109 cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
110
111 /* piggyback the zc-ack cookie */
112 tx->tx_msg.ksm_zc_cookies[1] = cookie;
113 /* move on to the next TX which can carry cookie */
114 ksocknal_next_tx_carrier(conn);
115
116 return 1;
117 }
118
119 static ksock_tx_t *
ksocknal_queue_tx_msg_v2(ksock_conn_t * conn,ksock_tx_t * tx_msg)120 ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
121 {
122 ksock_tx_t *tx = conn->ksnc_tx_carrier;
123
124 /*
125 * Enqueue tx_msg:
126 * . If there is no NOOP on the connection, just enqueue
127 * tx_msg and return NULL
128 * . If there is NOOP on the connection, piggyback the cookie
129 * and replace the NOOP tx, and return the NOOP tx.
130 */
131 if (tx == NULL) { /* nothing on queue */
132 list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
133 conn->ksnc_tx_carrier = tx_msg;
134 return NULL;
135 }
136
137 if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
138 list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
139 return NULL;
140 }
141
142 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
143
144 /* There is a noop zc-ack can be piggybacked */
145 tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
146 ksocknal_next_tx_carrier(conn);
147
148 /* use new_tx to replace the noop zc-ack packet */
149 list_add(&tx_msg->tx_list, &tx->tx_list);
150 list_del(&tx->tx_list);
151
152 return tx;
153 }
154
155 static int
ksocknal_queue_tx_zcack_v3(ksock_conn_t * conn,ksock_tx_t * tx_ack,__u64 cookie)156 ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
157 ksock_tx_t *tx_ack, __u64 cookie)
158 {
159 ksock_tx_t *tx;
160
161 if (conn->ksnc_type != SOCKLND_CONN_ACK)
162 return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
163
164 /* non-blocking ZC-ACK (to router) */
165 LASSERT(tx_ack == NULL ||
166 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
167
168 tx = conn->ksnc_tx_carrier;
169 if (tx == NULL) {
170 if (tx_ack != NULL) {
171 list_add_tail(&tx_ack->tx_list,
172 &conn->ksnc_tx_queue);
173 conn->ksnc_tx_carrier = tx_ack;
174 }
175 return 0;
176 }
177
178 /* conn->ksnc_tx_carrier != NULL */
179
180 if (tx_ack != NULL)
181 cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
182
183 if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
184 return 1;
185
186 if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
187 /* replace the keepalive PING with a real ACK */
188 LASSERT(tx->tx_msg.ksm_zc_cookies[0] == 0);
189 tx->tx_msg.ksm_zc_cookies[1] = cookie;
190 return 1;
191 }
192
193 if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
194 cookie == tx->tx_msg.ksm_zc_cookies[1]) {
195 CWARN("%s: duplicated ZC cookie: %llu\n",
196 libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
197 return 1; /* XXX return error in the future */
198 }
199
200 if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
201 /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
202 if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
203 tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
204 tx->tx_msg.ksm_zc_cookies[1] = cookie;
205 } else {
206 tx->tx_msg.ksm_zc_cookies[0] = cookie;
207 }
208
209 if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
210 /* not likely to carry more ACKs, skip it to simplify logic */
211 ksocknal_next_tx_carrier(conn);
212 }
213
214 return 1;
215 }
216
217 /* takes two or more cookies already */
218
219 if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
220 __u64 tmp = 0;
221
222 /* two separated cookies: (a+2, a) or (a+1, a) */
223 LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
224 tx->tx_msg.ksm_zc_cookies[1] <= 2);
225
226 if (tx->tx_msg.ksm_zc_cookies[0] -
227 tx->tx_msg.ksm_zc_cookies[1] == 2) {
228 if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
229 tmp = cookie;
230 } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
231 tmp = tx->tx_msg.ksm_zc_cookies[1];
232 } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
233 tmp = tx->tx_msg.ksm_zc_cookies[0];
234 }
235
236 if (tmp != 0) {
237 /* range of cookies */
238 tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
239 tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
240 return 1;
241 }
242
243 } else {
244 /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
245 if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
246 cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
247 CWARN("%s: duplicated ZC cookie: %llu\n",
248 libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
249 return 1; /* XXX: return error in the future */
250 }
251
252 if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
253 tx->tx_msg.ksm_zc_cookies[1] = cookie;
254 return 1;
255 }
256
257 if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
258 tx->tx_msg.ksm_zc_cookies[0] = cookie;
259 return 1;
260 }
261 }
262
263 /* failed to piggyback ZC-ACK */
264 if (tx_ack != NULL) {
265 list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
266 /* the next tx can piggyback at least 1 ACK */
267 ksocknal_next_tx_carrier(conn);
268 }
269
270 return 0;
271 }
272
273 static int
ksocknal_match_tx(ksock_conn_t * conn,ksock_tx_t * tx,int nonblk)274 ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
275 {
276 int nob;
277
278 #if SOCKNAL_VERSION_DEBUG
279 if (!*ksocknal_tunables.ksnd_typed_conns)
280 return SOCKNAL_MATCH_YES;
281 #endif
282
283 if (tx == NULL || tx->tx_lnetmsg == NULL) {
284 /* noop packet */
285 nob = offsetof(ksock_msg_t, ksm_u);
286 } else {
287 nob = tx->tx_lnetmsg->msg_len +
288 ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
289 sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
290 }
291
292 /* default checking for typed connection */
293 switch (conn->ksnc_type) {
294 default:
295 CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
296 LBUG();
297 case SOCKLND_CONN_ANY:
298 return SOCKNAL_MATCH_YES;
299
300 case SOCKLND_CONN_BULK_IN:
301 return SOCKNAL_MATCH_MAY;
302
303 case SOCKLND_CONN_BULK_OUT:
304 if (nob < *ksocknal_tunables.ksnd_min_bulk)
305 return SOCKNAL_MATCH_MAY;
306 else
307 return SOCKNAL_MATCH_YES;
308
309 case SOCKLND_CONN_CONTROL:
310 if (nob >= *ksocknal_tunables.ksnd_min_bulk)
311 return SOCKNAL_MATCH_MAY;
312 else
313 return SOCKNAL_MATCH_YES;
314 }
315 }
316
317 static int
ksocknal_match_tx_v3(ksock_conn_t * conn,ksock_tx_t * tx,int nonblk)318 ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
319 {
320 int nob;
321
322 if (tx == NULL || tx->tx_lnetmsg == NULL)
323 nob = offsetof(ksock_msg_t, ksm_u);
324 else
325 nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
326
327 switch (conn->ksnc_type) {
328 default:
329 CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
330 LBUG();
331 case SOCKLND_CONN_ANY:
332 return SOCKNAL_MATCH_NO;
333
334 case SOCKLND_CONN_ACK:
335 if (nonblk)
336 return SOCKNAL_MATCH_YES;
337 else if (tx == NULL || tx->tx_lnetmsg == NULL)
338 return SOCKNAL_MATCH_MAY;
339 else
340 return SOCKNAL_MATCH_NO;
341
342 case SOCKLND_CONN_BULK_OUT:
343 if (nonblk)
344 return SOCKNAL_MATCH_NO;
345 else if (nob < *ksocknal_tunables.ksnd_min_bulk)
346 return SOCKNAL_MATCH_MAY;
347 else
348 return SOCKNAL_MATCH_YES;
349
350 case SOCKLND_CONN_CONTROL:
351 if (nonblk)
352 return SOCKNAL_MATCH_NO;
353 else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
354 return SOCKNAL_MATCH_MAY;
355 else
356 return SOCKNAL_MATCH_YES;
357 }
358 }
359
360 /* (Sink) handle incoming ZC request from sender */
361 static int
ksocknal_handle_zcreq(ksock_conn_t * c,__u64 cookie,int remote)362 ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
363 {
364 ksock_peer_t *peer = c->ksnc_peer;
365 ksock_conn_t *conn;
366 ksock_tx_t *tx;
367 int rc;
368
369 read_lock(&ksocknal_data.ksnd_global_lock);
370
371 conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
372 if (conn != NULL) {
373 ksock_sched_t *sched = conn->ksnc_scheduler;
374
375 LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL);
376
377 spin_lock_bh(&sched->kss_lock);
378
379 rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
380
381 spin_unlock_bh(&sched->kss_lock);
382
383 if (rc) { /* piggybacked */
384 read_unlock(&ksocknal_data.ksnd_global_lock);
385 return 0;
386 }
387 }
388
389 read_unlock(&ksocknal_data.ksnd_global_lock);
390
391 /* ACK connection is not ready, or can't piggyback the ACK */
392 tx = ksocknal_alloc_tx_noop(cookie, !!remote);
393 if (tx == NULL)
394 return -ENOMEM;
395
396 rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
397 if (rc == 0)
398 return 0;
399
400 ksocknal_free_tx(tx);
401 return rc;
402 }
403
404 /* (Sender) handle ZC_ACK from sink */
405 static int
ksocknal_handle_zcack(ksock_conn_t * conn,__u64 cookie1,__u64 cookie2)406 ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
407 {
408 ksock_peer_t *peer = conn->ksnc_peer;
409 ksock_tx_t *tx;
410 ksock_tx_t *tmp;
411 LIST_HEAD(zlist);
412 int count;
413
414 if (cookie1 == 0)
415 cookie1 = cookie2;
416
417 count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
418
419 if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
420 conn->ksnc_proto == &ksocknal_protocol_v3x) {
421 /* keepalive PING for V3.x, just ignore it */
422 return count == 1 ? 0 : -EPROTO;
423 }
424
425 spin_lock(&peer->ksnp_lock);
426
427 list_for_each_entry_safe(tx, tmp,
428 &peer->ksnp_zc_req_list, tx_zc_list) {
429 __u64 c = tx->tx_msg.ksm_zc_cookies[0];
430
431 if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
432 tx->tx_msg.ksm_zc_cookies[0] = 0;
433 list_del(&tx->tx_zc_list);
434 list_add(&tx->tx_zc_list, &zlist);
435
436 if (--count == 0)
437 break;
438 }
439 }
440
441 spin_unlock(&peer->ksnp_lock);
442
443 while (!list_empty(&zlist)) {
444 tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
445 list_del(&tx->tx_zc_list);
446 ksocknal_tx_decref(tx);
447 }
448
449 return count == 0 ? 0 : -EPROTO;
450 }
451
452 static int
ksocknal_send_hello_v1(ksock_conn_t * conn,ksock_hello_msg_t * hello)453 ksocknal_send_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello)
454 {
455 struct socket *sock = conn->ksnc_sock;
456 lnet_hdr_t *hdr;
457 lnet_magicversion_t *hmv;
458 int rc;
459 int i;
460
461 CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
462
463 LIBCFS_ALLOC(hdr, sizeof(*hdr));
464 if (hdr == NULL) {
465 CERROR("Can't allocate lnet_hdr_t\n");
466 return -ENOMEM;
467 }
468
469 hmv = (lnet_magicversion_t *)&hdr->dest_nid;
470
471 /* Re-organize V2.x message header to V1.x (lnet_hdr_t)
472 * header and send out */
473 hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
474 hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
475 hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
476
477 if (the_lnet.ln_testprotocompat != 0) {
478 /* single-shot proto check */
479 LNET_LOCK();
480 if ((the_lnet.ln_testprotocompat & 1) != 0) {
481 hmv->version_major++; /* just different! */
482 the_lnet.ln_testprotocompat &= ~1;
483 }
484 if ((the_lnet.ln_testprotocompat & 2) != 0) {
485 hmv->magic = LNET_PROTO_MAGIC;
486 the_lnet.ln_testprotocompat &= ~2;
487 }
488 LNET_UNLOCK();
489 }
490
491 hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid);
492 hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid);
493 hdr->type = cpu_to_le32 (LNET_MSG_HELLO);
494 hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
495 hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
496 hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
497
498 rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),
499 lnet_acceptor_timeout());
500
501 if (rc != 0) {
502 CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
503 rc, &conn->ksnc_ipaddr, conn->ksnc_port);
504 goto out;
505 }
506
507 if (hello->kshm_nips == 0)
508 goto out;
509
510 for (i = 0; i < (int) hello->kshm_nips; i++) {
511 hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
512 }
513
514 rc = libcfs_sock_write(sock, hello->kshm_ips,
515 hello->kshm_nips * sizeof(__u32),
516 lnet_acceptor_timeout());
517 if (rc != 0) {
518 CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
519 rc, hello->kshm_nips,
520 &conn->ksnc_ipaddr, conn->ksnc_port);
521 }
522 out:
523 LIBCFS_FREE(hdr, sizeof(*hdr));
524
525 return rc;
526 }
527
528 static int
ksocknal_send_hello_v2(ksock_conn_t * conn,ksock_hello_msg_t * hello)529 ksocknal_send_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello)
530 {
531 struct socket *sock = conn->ksnc_sock;
532 int rc;
533
534 hello->kshm_magic = LNET_PROTO_MAGIC;
535 hello->kshm_version = conn->ksnc_proto->pro_version;
536
537 if (the_lnet.ln_testprotocompat != 0) {
538 /* single-shot proto check */
539 LNET_LOCK();
540 if ((the_lnet.ln_testprotocompat & 1) != 0) {
541 hello->kshm_version++; /* just different! */
542 the_lnet.ln_testprotocompat &= ~1;
543 }
544 LNET_UNLOCK();
545 }
546
547 rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
548 lnet_acceptor_timeout());
549
550 if (rc != 0) {
551 CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
552 rc, &conn->ksnc_ipaddr, conn->ksnc_port);
553 return rc;
554 }
555
556 if (hello->kshm_nips == 0)
557 return 0;
558
559 rc = libcfs_sock_write(sock, hello->kshm_ips,
560 hello->kshm_nips * sizeof(__u32),
561 lnet_acceptor_timeout());
562 if (rc != 0) {
563 CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
564 rc, hello->kshm_nips,
565 &conn->ksnc_ipaddr, conn->ksnc_port);
566 }
567
568 return rc;
569 }
570
571 static int
ksocknal_recv_hello_v1(ksock_conn_t * conn,ksock_hello_msg_t * hello,int timeout)572 ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,
573 int timeout)
574 {
575 struct socket *sock = conn->ksnc_sock;
576 lnet_hdr_t *hdr;
577 int rc;
578 int i;
579
580 LIBCFS_ALLOC(hdr, sizeof(*hdr));
581 if (hdr == NULL) {
582 CERROR("Can't allocate lnet_hdr_t\n");
583 return -ENOMEM;
584 }
585
586 rc = libcfs_sock_read(sock, &hdr->src_nid,
587 sizeof(*hdr) - offsetof(lnet_hdr_t, src_nid),
588 timeout);
589 if (rc != 0) {
590 CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
591 rc, &conn->ksnc_ipaddr);
592 LASSERT(rc < 0 && rc != -EALREADY);
593 goto out;
594 }
595
596 /* ...and check we got what we expected */
597 if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
598 CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
599 le32_to_cpu(hdr->type),
600 &conn->ksnc_ipaddr);
601 rc = -EPROTO;
602 goto out;
603 }
604
605 hello->kshm_src_nid = le64_to_cpu(hdr->src_nid);
606 hello->kshm_src_pid = le32_to_cpu(hdr->src_pid);
607 hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
608 hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type);
609 hello->kshm_nips = le32_to_cpu(hdr->payload_length) /
610 sizeof(__u32);
611
612 if (hello->kshm_nips > LNET_MAX_INTERFACES) {
613 CERROR("Bad nips %d from ip %pI4h\n",
614 hello->kshm_nips, &conn->ksnc_ipaddr);
615 rc = -EPROTO;
616 goto out;
617 }
618
619 if (hello->kshm_nips == 0)
620 goto out;
621
622 rc = libcfs_sock_read(sock, hello->kshm_ips,
623 hello->kshm_nips * sizeof(__u32), timeout);
624 if (rc != 0) {
625 CERROR("Error %d reading IPs from ip %pI4h\n",
626 rc, &conn->ksnc_ipaddr);
627 LASSERT(rc < 0 && rc != -EALREADY);
628 goto out;
629 }
630
631 for (i = 0; i < (int) hello->kshm_nips; i++) {
632 hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
633
634 if (hello->kshm_ips[i] == 0) {
635 CERROR("Zero IP[%d] from ip %pI4h\n",
636 i, &conn->ksnc_ipaddr);
637 rc = -EPROTO;
638 break;
639 }
640 }
641 out:
642 LIBCFS_FREE(hdr, sizeof(*hdr));
643
644 return rc;
645 }
646
647 static int
ksocknal_recv_hello_v2(ksock_conn_t * conn,ksock_hello_msg_t * hello,int timeout)648 ksocknal_recv_hello_v2(ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
649 {
650 struct socket *sock = conn->ksnc_sock;
651 int rc;
652 int i;
653
654 if (hello->kshm_magic == LNET_PROTO_MAGIC)
655 conn->ksnc_flip = 0;
656 else
657 conn->ksnc_flip = 1;
658
659 rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
660 offsetof(ksock_hello_msg_t, kshm_ips) -
661 offsetof(ksock_hello_msg_t, kshm_src_nid),
662 timeout);
663 if (rc != 0) {
664 CERROR("Error %d reading HELLO from %pI4h\n",
665 rc, &conn->ksnc_ipaddr);
666 LASSERT(rc < 0 && rc != -EALREADY);
667 return rc;
668 }
669
670 if (conn->ksnc_flip) {
671 __swab32s(&hello->kshm_src_pid);
672 __swab64s(&hello->kshm_src_nid);
673 __swab32s(&hello->kshm_dst_pid);
674 __swab64s(&hello->kshm_dst_nid);
675 __swab64s(&hello->kshm_src_incarnation);
676 __swab64s(&hello->kshm_dst_incarnation);
677 __swab32s(&hello->kshm_ctype);
678 __swab32s(&hello->kshm_nips);
679 }
680
681 if (hello->kshm_nips > LNET_MAX_INTERFACES) {
682 CERROR("Bad nips %d from ip %pI4h\n",
683 hello->kshm_nips, &conn->ksnc_ipaddr);
684 return -EPROTO;
685 }
686
687 if (hello->kshm_nips == 0)
688 return 0;
689
690 rc = libcfs_sock_read(sock, hello->kshm_ips,
691 hello->kshm_nips * sizeof(__u32), timeout);
692 if (rc != 0) {
693 CERROR("Error %d reading IPs from ip %pI4h\n",
694 rc, &conn->ksnc_ipaddr);
695 LASSERT(rc < 0 && rc != -EALREADY);
696 return rc;
697 }
698
699 for (i = 0; i < (int) hello->kshm_nips; i++) {
700 if (conn->ksnc_flip)
701 __swab32s(&hello->kshm_ips[i]);
702
703 if (hello->kshm_ips[i] == 0) {
704 CERROR("Zero IP[%d] from ip %pI4h\n",
705 i, &conn->ksnc_ipaddr);
706 return -EPROTO;
707 }
708 }
709
710 return 0;
711 }
712
713 static void
ksocknal_pack_msg_v1(ksock_tx_t * tx)714 ksocknal_pack_msg_v1(ksock_tx_t *tx)
715 {
716 /* V1.x has no KSOCK_MSG_NOOP */
717 LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
718 LASSERT(tx->tx_lnetmsg != NULL);
719
720 tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
721 tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t);
722
723 tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
724 }
725
726 static void
ksocknal_pack_msg_v2(ksock_tx_t * tx)727 ksocknal_pack_msg_v2(ksock_tx_t *tx)
728 {
729 tx->tx_iov[0].iov_base = &tx->tx_msg;
730
731 if (tx->tx_lnetmsg != NULL) {
732 LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
733
734 tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
735 tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
736 tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
737 } else {
738 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
739
740 tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
741 tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
742 }
743 /* Don't checksum before start sending, because packet can be piggybacked with ACK */
744 }
745
746 static void
ksocknal_unpack_msg_v1(ksock_msg_t * msg)747 ksocknal_unpack_msg_v1(ksock_msg_t *msg)
748 {
749 msg->ksm_csum = 0;
750 msg->ksm_type = KSOCK_MSG_LNET;
751 msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0;
752 }
753
754 static void
ksocknal_unpack_msg_v2(ksock_msg_t * msg)755 ksocknal_unpack_msg_v2(ksock_msg_t *msg)
756 {
757 return; /* Do nothing */
758 }
759
760 ksock_proto_t ksocknal_protocol_v1x = {
761 .pro_version = KSOCK_PROTO_V1,
762 .pro_send_hello = ksocknal_send_hello_v1,
763 .pro_recv_hello = ksocknal_recv_hello_v1,
764 .pro_pack = ksocknal_pack_msg_v1,
765 .pro_unpack = ksocknal_unpack_msg_v1,
766 .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1,
767 .pro_handle_zcreq = NULL,
768 .pro_handle_zcack = NULL,
769 .pro_queue_tx_zcack = NULL,
770 .pro_match_tx = ksocknal_match_tx
771 };
772
773 ksock_proto_t ksocknal_protocol_v2x = {
774 .pro_version = KSOCK_PROTO_V2,
775 .pro_send_hello = ksocknal_send_hello_v2,
776 .pro_recv_hello = ksocknal_recv_hello_v2,
777 .pro_pack = ksocknal_pack_msg_v2,
778 .pro_unpack = ksocknal_unpack_msg_v2,
779 .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
780 .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
781 .pro_handle_zcreq = ksocknal_handle_zcreq,
782 .pro_handle_zcack = ksocknal_handle_zcack,
783 .pro_match_tx = ksocknal_match_tx
784 };
785
786 ksock_proto_t ksocknal_protocol_v3x = {
787 .pro_version = KSOCK_PROTO_V3,
788 .pro_send_hello = ksocknal_send_hello_v2,
789 .pro_recv_hello = ksocknal_recv_hello_v2,
790 .pro_pack = ksocknal_pack_msg_v2,
791 .pro_unpack = ksocknal_unpack_msg_v2,
792 .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
793 .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
794 .pro_handle_zcreq = ksocknal_handle_zcreq,
795 .pro_handle_zcack = ksocknal_handle_zcack,
796 .pro_match_tx = ksocknal_match_tx_v3
797 };
798