1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36 
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include "../../include/linux/lnet/lib-lnet.h"
39 #include <linux/log2.h>
40 #include <linux/ktime.h>
41 
42 #define D_LNI D_CONSOLE
43 
44 lnet_t      the_lnet;			   /* THE state of the network */
45 EXPORT_SYMBOL(the_lnet);
46 
47 
48 static char *ip2nets = "";
49 module_param(ip2nets, charp, 0444);
50 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
51 
52 static char *networks = "";
53 module_param(networks, charp, 0444);
54 MODULE_PARM_DESC(networks, "local networks");
55 
56 static char *routes = "";
57 module_param(routes, charp, 0444);
58 MODULE_PARM_DESC(routes, "routes to non-local networks");
59 
60 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
61 module_param(rnet_htable_size, int, 0444);
62 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
63 
64 static char *
lnet_get_routes(void)65 lnet_get_routes(void)
66 {
67 	return routes;
68 }
69 
70 static char *
lnet_get_networks(void)71 lnet_get_networks(void)
72 {
73 	char   *nets;
74 	int     rc;
75 
76 	if (*networks != 0 && *ip2nets != 0) {
77 		LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n");
78 		return NULL;
79 	}
80 
81 	if (*ip2nets != 0) {
82 		rc = lnet_parse_ip2nets(&nets, ip2nets);
83 		return (rc == 0) ? nets : NULL;
84 	}
85 
86 	if (*networks != 0)
87 		return networks;
88 
89 	return "tcp";
90 }
91 
92 static void
lnet_init_locks(void)93 lnet_init_locks(void)
94 {
95 	spin_lock_init(&the_lnet.ln_eq_wait_lock);
96 	init_waitqueue_head(&the_lnet.ln_eq_waitq);
97 	mutex_init(&the_lnet.ln_lnd_mutex);
98 	mutex_init(&the_lnet.ln_api_mutex);
99 }
100 
101 static void
lnet_fini_locks(void)102 lnet_fini_locks(void)
103 {
104 }
105 
106 
107 static int
lnet_create_remote_nets_table(void)108 lnet_create_remote_nets_table(void)
109 {
110 	int		i;
111 	struct list_head	*hash;
112 
113 	LASSERT(the_lnet.ln_remote_nets_hash == NULL);
114 	LASSERT(the_lnet.ln_remote_nets_hbits > 0);
115 	LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
116 	if (hash == NULL) {
117 		CERROR("Failed to create remote nets hash table\n");
118 		return -ENOMEM;
119 	}
120 
121 	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
122 		INIT_LIST_HEAD(&hash[i]);
123 	the_lnet.ln_remote_nets_hash = hash;
124 	return 0;
125 }
126 
127 static void
lnet_destroy_remote_nets_table(void)128 lnet_destroy_remote_nets_table(void)
129 {
130 	int i;
131 
132 	if (the_lnet.ln_remote_nets_hash == NULL)
133 		return;
134 
135 	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
136 		LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
137 
138 	LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
139 		    LNET_REMOTE_NETS_HASH_SIZE *
140 		    sizeof(the_lnet.ln_remote_nets_hash[0]));
141 	the_lnet.ln_remote_nets_hash = NULL;
142 }
143 
144 static void
lnet_destroy_locks(void)145 lnet_destroy_locks(void)
146 {
147 	if (the_lnet.ln_res_lock != NULL) {
148 		cfs_percpt_lock_free(the_lnet.ln_res_lock);
149 		the_lnet.ln_res_lock = NULL;
150 	}
151 
152 	if (the_lnet.ln_net_lock != NULL) {
153 		cfs_percpt_lock_free(the_lnet.ln_net_lock);
154 		the_lnet.ln_net_lock = NULL;
155 	}
156 
157 	lnet_fini_locks();
158 }
159 
160 static int
lnet_create_locks(void)161 lnet_create_locks(void)
162 {
163 	lnet_init_locks();
164 
165 	the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
166 	if (the_lnet.ln_res_lock == NULL)
167 		goto failed;
168 
169 	the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
170 	if (the_lnet.ln_net_lock == NULL)
171 		goto failed;
172 
173 	return 0;
174 
175  failed:
176 	lnet_destroy_locks();
177 	return -ENOMEM;
178 }
179 
lnet_assert_wire_constants(void)180 static void lnet_assert_wire_constants(void)
181 {
182 	/* Wire protocol assertions generated by 'wirecheck'
183 	 * running on Linux robert.bartonsoftware.com 2.6.8-1.521
184 	 * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
185 	 * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
186 
187 	/* Constants... */
188 	CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
189 	CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
190 	CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
191 	CLASSERT(LNET_MSG_ACK == 0);
192 	CLASSERT(LNET_MSG_PUT == 1);
193 	CLASSERT(LNET_MSG_GET == 2);
194 	CLASSERT(LNET_MSG_REPLY == 3);
195 	CLASSERT(LNET_MSG_HELLO == 4);
196 
197 	/* Checks for struct ptl_handle_wire_t */
198 	CLASSERT((int)sizeof(lnet_handle_wire_t) == 16);
199 	CLASSERT((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
200 	CLASSERT((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
201 	CLASSERT((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
202 	CLASSERT((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
203 
204 	/* Checks for struct lnet_magicversion_t */
205 	CLASSERT((int)sizeof(lnet_magicversion_t) == 8);
206 	CLASSERT((int)offsetof(lnet_magicversion_t, magic) == 0);
207 	CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
208 	CLASSERT((int)offsetof(lnet_magicversion_t, version_major) == 4);
209 	CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
210 	CLASSERT((int)offsetof(lnet_magicversion_t, version_minor) == 6);
211 	CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
212 
213 	/* Checks for struct lnet_hdr_t */
214 	CLASSERT((int)sizeof(lnet_hdr_t) == 72);
215 	CLASSERT((int)offsetof(lnet_hdr_t, dest_nid) == 0);
216 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
217 	CLASSERT((int)offsetof(lnet_hdr_t, src_nid) == 8);
218 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
219 	CLASSERT((int)offsetof(lnet_hdr_t, dest_pid) == 16);
220 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
221 	CLASSERT((int)offsetof(lnet_hdr_t, src_pid) == 20);
222 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
223 	CLASSERT((int)offsetof(lnet_hdr_t, type) == 24);
224 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
225 	CLASSERT((int)offsetof(lnet_hdr_t, payload_length) == 28);
226 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
227 	CLASSERT((int)offsetof(lnet_hdr_t, msg) == 32);
228 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
229 
230 	/* Ack */
231 	CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
232 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
233 	CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
234 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
235 	CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
236 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
237 
238 	/* Put */
239 	CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
240 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
241 	CLASSERT((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
242 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
243 	CLASSERT((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
244 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
245 	CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
246 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
247 	CLASSERT((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
248 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
249 
250 	/* Get */
251 	CLASSERT((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
252 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
253 	CLASSERT((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
254 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
255 	CLASSERT((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
256 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
257 	CLASSERT((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
258 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
259 	CLASSERT((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
260 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
261 
262 	/* Reply */
263 	CLASSERT((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
264 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
265 
266 	/* Hello */
267 	CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
268 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
269 	CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
270 	CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
271 }
272 
273 static lnd_t *
lnet_find_lnd_by_type(int type)274 lnet_find_lnd_by_type(int type)
275 {
276 	lnd_t	      *lnd;
277 	struct list_head	 *tmp;
278 
279 	/* holding lnd mutex */
280 	list_for_each(tmp, &the_lnet.ln_lnds) {
281 		lnd = list_entry(tmp, lnd_t, lnd_list);
282 
283 		if ((int)lnd->lnd_type == type)
284 			return lnd;
285 	}
286 
287 	return NULL;
288 }
289 
290 void
lnet_register_lnd(lnd_t * lnd)291 lnet_register_lnd(lnd_t *lnd)
292 {
293 	LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
294 
295 	LASSERT(the_lnet.ln_init);
296 	LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
297 	LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
298 
299 	list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
300 	lnd->lnd_refcount = 0;
301 
302 	CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
303 
304 	LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
305 }
306 EXPORT_SYMBOL(lnet_register_lnd);
307 
308 void
lnet_unregister_lnd(lnd_t * lnd)309 lnet_unregister_lnd(lnd_t *lnd)
310 {
311 	LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
312 
313 	LASSERT(the_lnet.ln_init);
314 	LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
315 	LASSERT(lnd->lnd_refcount == 0);
316 
317 	list_del(&lnd->lnd_list);
318 	CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
319 
320 	LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
321 }
322 EXPORT_SYMBOL(lnet_unregister_lnd);
323 
324 void
lnet_counters_get(lnet_counters_t * counters)325 lnet_counters_get(lnet_counters_t *counters)
326 {
327 	lnet_counters_t *ctr;
328 	int		i;
329 
330 	memset(counters, 0, sizeof(*counters));
331 
332 	lnet_net_lock(LNET_LOCK_EX);
333 
334 	cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
335 		counters->msgs_max     += ctr->msgs_max;
336 		counters->msgs_alloc   += ctr->msgs_alloc;
337 		counters->errors       += ctr->errors;
338 		counters->send_count   += ctr->send_count;
339 		counters->recv_count   += ctr->recv_count;
340 		counters->route_count  += ctr->route_count;
341 		counters->drop_count   += ctr->drop_count;
342 		counters->send_length  += ctr->send_length;
343 		counters->recv_length  += ctr->recv_length;
344 		counters->route_length += ctr->route_length;
345 		counters->drop_length  += ctr->drop_length;
346 
347 	}
348 	lnet_net_unlock(LNET_LOCK_EX);
349 }
350 EXPORT_SYMBOL(lnet_counters_get);
351 
352 void
lnet_counters_reset(void)353 lnet_counters_reset(void)
354 {
355 	lnet_counters_t *counters;
356 	int		i;
357 
358 	lnet_net_lock(LNET_LOCK_EX);
359 
360 	cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
361 		memset(counters, 0, sizeof(lnet_counters_t));
362 
363 	lnet_net_unlock(LNET_LOCK_EX);
364 }
365 EXPORT_SYMBOL(lnet_counters_reset);
366 
367 #ifdef LNET_USE_LIB_FREELIST
368 
369 int
lnet_freelist_init(lnet_freelist_t * fl,int n,int size)370 lnet_freelist_init(lnet_freelist_t *fl, int n, int size)
371 {
372 	char *space;
373 
374 	LASSERT(n > 0);
375 
376 	size += offsetof(lnet_freeobj_t, fo_contents);
377 
378 	LIBCFS_ALLOC(space, n * size);
379 	if (space == NULL)
380 		return -ENOMEM;
381 
382 	INIT_LIST_HEAD(&fl->fl_list);
383 	fl->fl_objs = space;
384 	fl->fl_nobjs = n;
385 	fl->fl_objsize = size;
386 
387 	do {
388 		memset(space, 0, size);
389 		list_add((struct list_head *)space, &fl->fl_list);
390 		space += size;
391 	} while (--n != 0);
392 
393 	return 0;
394 }
395 
396 void
lnet_freelist_fini(lnet_freelist_t * fl)397 lnet_freelist_fini(lnet_freelist_t *fl)
398 {
399 	struct list_head       *el;
400 	int	       count;
401 
402 	if (fl->fl_nobjs == 0)
403 		return;
404 
405 	count = 0;
406 	for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
407 		count++;
408 
409 	LASSERT(count == fl->fl_nobjs);
410 
411 	LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
412 	memset(fl, 0, sizeof(*fl));
413 }
414 
415 #endif /* LNET_USE_LIB_FREELIST */
416 
417 static __u64
lnet_create_interface_cookie(void)418 lnet_create_interface_cookie(void)
419 {
420 	/* NB the interface cookie in wire handles guards against delayed
421 	 * replies and ACKs appearing valid after reboot.
422 	 */
423 	return ktime_get_ns();
424 }
425 
426 static char *
lnet_res_type2str(int type)427 lnet_res_type2str(int type)
428 {
429 	switch (type) {
430 	default:
431 		LBUG();
432 	case LNET_COOKIE_TYPE_MD:
433 		return "MD";
434 	case LNET_COOKIE_TYPE_ME:
435 		return "ME";
436 	case LNET_COOKIE_TYPE_EQ:
437 		return "EQ";
438 	}
439 }
440 
441 static void
lnet_res_container_cleanup(struct lnet_res_container * rec)442 lnet_res_container_cleanup(struct lnet_res_container *rec)
443 {
444 	int	count = 0;
445 
446 	if (rec->rec_type == 0) /* not set yet, it's uninitialized */
447 		return;
448 
449 	while (!list_empty(&rec->rec_active)) {
450 		struct list_head *e = rec->rec_active.next;
451 
452 		list_del_init(e);
453 		if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
454 			lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
455 
456 		} else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
457 			lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
458 
459 		} else { /* NB: Active MEs should be attached on portals */
460 			LBUG();
461 		}
462 		count++;
463 	}
464 
465 	if (count > 0) {
466 		/* Found alive MD/ME/EQ, user really should unlink/free
467 		 * all of them before finalize LNet, but if someone didn't,
468 		 * we have to recycle garbage for him */
469 		CERROR("%d active elements on exit of %s container\n",
470 		       count, lnet_res_type2str(rec->rec_type));
471 	}
472 
473 #ifdef LNET_USE_LIB_FREELIST
474 	lnet_freelist_fini(&rec->rec_freelist);
475 #endif
476 	if (rec->rec_lh_hash != NULL) {
477 		LIBCFS_FREE(rec->rec_lh_hash,
478 			    LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
479 		rec->rec_lh_hash = NULL;
480 	}
481 
482 	rec->rec_type = 0; /* mark it as finalized */
483 }
484 
485 static int
lnet_res_container_setup(struct lnet_res_container * rec,int cpt,int type,int objnum,int objsz)486 lnet_res_container_setup(struct lnet_res_container *rec,
487 			 int cpt, int type, int objnum, int objsz)
488 {
489 	int	rc = 0;
490 	int	i;
491 
492 	LASSERT(rec->rec_type == 0);
493 
494 	rec->rec_type = type;
495 	INIT_LIST_HEAD(&rec->rec_active);
496 
497 #ifdef LNET_USE_LIB_FREELIST
498 	memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
499 	rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
500 	if (rc != 0)
501 		goto out;
502 #endif
503 	rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
504 
505 	/* Arbitrary choice of hash table size */
506 	LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
507 			 LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
508 	if (rec->rec_lh_hash == NULL) {
509 		rc = -ENOMEM;
510 		goto out;
511 	}
512 
513 	for (i = 0; i < LNET_LH_HASH_SIZE; i++)
514 		INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
515 
516 	return 0;
517 
518 out:
519 	CERROR("Failed to setup %s resource container\n",
520 	       lnet_res_type2str(type));
521 	lnet_res_container_cleanup(rec);
522 	return rc;
523 }
524 
525 static void
lnet_res_containers_destroy(struct lnet_res_container ** recs)526 lnet_res_containers_destroy(struct lnet_res_container **recs)
527 {
528 	struct lnet_res_container	*rec;
529 	int				i;
530 
531 	cfs_percpt_for_each(rec, i, recs)
532 		lnet_res_container_cleanup(rec);
533 
534 	cfs_percpt_free(recs);
535 }
536 
537 static struct lnet_res_container **
lnet_res_containers_create(int type,int objnum,int objsz)538 lnet_res_containers_create(int type, int objnum, int objsz)
539 {
540 	struct lnet_res_container	**recs;
541 	struct lnet_res_container	*rec;
542 	int				rc;
543 	int				i;
544 
545 	recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
546 	if (recs == NULL) {
547 		CERROR("Failed to allocate %s resource containers\n",
548 		       lnet_res_type2str(type));
549 		return NULL;
550 	}
551 
552 	cfs_percpt_for_each(rec, i, recs) {
553 		rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
554 		if (rc != 0) {
555 			lnet_res_containers_destroy(recs);
556 			return NULL;
557 		}
558 	}
559 
560 	return recs;
561 }
562 
563 lnet_libhandle_t *
lnet_res_lh_lookup(struct lnet_res_container * rec,__u64 cookie)564 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
565 {
566 	/* ALWAYS called with lnet_res_lock held */
567 	struct list_head		*head;
568 	lnet_libhandle_t	*lh;
569 	unsigned int		hash;
570 
571 	if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
572 		return NULL;
573 
574 	hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
575 	head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
576 
577 	list_for_each_entry(lh, head, lh_hash_chain) {
578 		if (lh->lh_cookie == cookie)
579 			return lh;
580 	}
581 
582 	return NULL;
583 }
584 
585 void
lnet_res_lh_initialize(struct lnet_res_container * rec,lnet_libhandle_t * lh)586 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
587 {
588 	/* ALWAYS called with lnet_res_lock held */
589 	unsigned int	ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
590 	unsigned int	hash;
591 
592 	lh->lh_cookie = rec->rec_lh_cookie;
593 	rec->rec_lh_cookie += 1 << ibits;
594 
595 	hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
596 
597 	list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
598 }
599 
600 
601 int lnet_unprepare(void);
602 
603 static int
lnet_prepare(lnet_pid_t requested_pid)604 lnet_prepare(lnet_pid_t requested_pid)
605 {
606 	/* Prepare to bring up the network */
607 	struct lnet_res_container **recs;
608 	int			  rc = 0;
609 
610 	LASSERT(the_lnet.ln_refcount == 0);
611 
612 	the_lnet.ln_routing = 0;
613 
614 	LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
615 	the_lnet.ln_pid = requested_pid;
616 
617 	INIT_LIST_HEAD(&the_lnet.ln_test_peers);
618 	INIT_LIST_HEAD(&the_lnet.ln_nis);
619 	INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
620 	INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
621 	INIT_LIST_HEAD(&the_lnet.ln_routers);
622 
623 	rc = lnet_create_remote_nets_table();
624 	if (rc != 0)
625 		goto failed;
626 
627 	the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
628 
629 	the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
630 						sizeof(lnet_counters_t));
631 	if (the_lnet.ln_counters == NULL) {
632 		CERROR("Failed to allocate counters for LNet\n");
633 		rc = -ENOMEM;
634 		goto failed;
635 	}
636 
637 	rc = lnet_peer_tables_create();
638 	if (rc != 0)
639 		goto failed;
640 
641 	rc = lnet_msg_containers_create();
642 	if (rc != 0)
643 		goto failed;
644 
645 	rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
646 				      LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
647 				      sizeof(lnet_eq_t));
648 	if (rc != 0)
649 		goto failed;
650 
651 	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
652 					  sizeof(lnet_me_t));
653 	if (recs == NULL) {
654 		rc = -ENOMEM;
655 		goto failed;
656 	}
657 
658 	the_lnet.ln_me_containers = recs;
659 
660 	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
661 					  sizeof(lnet_libmd_t));
662 	if (recs == NULL) {
663 		rc = -ENOMEM;
664 		goto failed;
665 	}
666 
667 	the_lnet.ln_md_containers = recs;
668 
669 	rc = lnet_portals_create();
670 	if (rc != 0) {
671 		CERROR("Failed to create portals for LNet: %d\n", rc);
672 		goto failed;
673 	}
674 
675 	return 0;
676 
677  failed:
678 	lnet_unprepare();
679 	return rc;
680 }
681 
682 int
lnet_unprepare(void)683 lnet_unprepare(void)
684 {
685 	/* NB no LNET_LOCK since this is the last reference.  All LND instances
686 	 * have shut down already, so it is safe to unlink and free all
687 	 * descriptors, even those that appear committed to a network op (eg MD
688 	 * with non-zero pending count) */
689 
690 	lnet_fail_nid(LNET_NID_ANY, 0);
691 
692 	LASSERT(the_lnet.ln_refcount == 0);
693 	LASSERT(list_empty(&the_lnet.ln_test_peers));
694 	LASSERT(list_empty(&the_lnet.ln_nis));
695 	LASSERT(list_empty(&the_lnet.ln_nis_cpt));
696 	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
697 
698 	lnet_portals_destroy();
699 
700 	if (the_lnet.ln_md_containers != NULL) {
701 		lnet_res_containers_destroy(the_lnet.ln_md_containers);
702 		the_lnet.ln_md_containers = NULL;
703 	}
704 
705 	if (the_lnet.ln_me_containers != NULL) {
706 		lnet_res_containers_destroy(the_lnet.ln_me_containers);
707 		the_lnet.ln_me_containers = NULL;
708 	}
709 
710 	lnet_res_container_cleanup(&the_lnet.ln_eq_container);
711 
712 	lnet_msg_containers_destroy();
713 	lnet_peer_tables_destroy();
714 	lnet_rtrpools_free();
715 
716 	if (the_lnet.ln_counters != NULL) {
717 		cfs_percpt_free(the_lnet.ln_counters);
718 		the_lnet.ln_counters = NULL;
719 	}
720 	lnet_destroy_remote_nets_table();
721 
722 	return 0;
723 }
724 
725 lnet_ni_t  *
lnet_net2ni_locked(__u32 net,int cpt)726 lnet_net2ni_locked(__u32 net, int cpt)
727 {
728 	struct list_head	*tmp;
729 	lnet_ni_t	*ni;
730 
731 	LASSERT(cpt != LNET_LOCK_EX);
732 
733 	list_for_each(tmp, &the_lnet.ln_nis) {
734 		ni = list_entry(tmp, lnet_ni_t, ni_list);
735 
736 		if (LNET_NIDNET(ni->ni_nid) == net) {
737 			lnet_ni_addref_locked(ni, cpt);
738 			return ni;
739 		}
740 	}
741 
742 	return NULL;
743 }
744 
745 lnet_ni_t *
lnet_net2ni(__u32 net)746 lnet_net2ni(__u32 net)
747 {
748 	lnet_ni_t *ni;
749 
750 	lnet_net_lock(0);
751 	ni = lnet_net2ni_locked(net, 0);
752 	lnet_net_unlock(0);
753 
754 	return ni;
755 }
756 EXPORT_SYMBOL(lnet_net2ni);
757 
758 static unsigned int
lnet_nid_cpt_hash(lnet_nid_t nid,unsigned int number)759 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
760 {
761 	__u64		key = nid;
762 	unsigned int	val;
763 
764 	LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
765 
766 	if (number == 1)
767 		return 0;
768 
769 	val = hash_long(key, LNET_CPT_BITS);
770 	/* NB: LNET_CP_NUMBER doesn't have to be PO2 */
771 	if (val < number)
772 		return val;
773 
774 	return (unsigned int)(key + val + (val >> 1)) % number;
775 }
776 
777 int
lnet_cpt_of_nid_locked(lnet_nid_t nid)778 lnet_cpt_of_nid_locked(lnet_nid_t nid)
779 {
780 	struct lnet_ni *ni;
781 
782 	/* must called with hold of lnet_net_lock */
783 	if (LNET_CPT_NUMBER == 1)
784 		return 0; /* the only one */
785 
786 	/* take lnet_net_lock(any) would be OK */
787 	if (!list_empty(&the_lnet.ln_nis_cpt)) {
788 		list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
789 			if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
790 				continue;
791 
792 			LASSERT(ni->ni_cpts != NULL);
793 			return ni->ni_cpts[lnet_nid_cpt_hash
794 					   (nid, ni->ni_ncpts)];
795 		}
796 	}
797 
798 	return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
799 }
800 
801 int
lnet_cpt_of_nid(lnet_nid_t nid)802 lnet_cpt_of_nid(lnet_nid_t nid)
803 {
804 	int	cpt;
805 	int	cpt2;
806 
807 	if (LNET_CPT_NUMBER == 1)
808 		return 0; /* the only one */
809 
810 	if (list_empty(&the_lnet.ln_nis_cpt))
811 		return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
812 
813 	cpt = lnet_net_lock_current();
814 	cpt2 = lnet_cpt_of_nid_locked(nid);
815 	lnet_net_unlock(cpt);
816 
817 	return cpt2;
818 }
819 EXPORT_SYMBOL(lnet_cpt_of_nid);
820 
821 int
lnet_islocalnet(__u32 net)822 lnet_islocalnet(__u32 net)
823 {
824 	struct lnet_ni	*ni;
825 	int		cpt;
826 
827 	cpt = lnet_net_lock_current();
828 
829 	ni = lnet_net2ni_locked(net, cpt);
830 	if (ni != NULL)
831 		lnet_ni_decref_locked(ni, cpt);
832 
833 	lnet_net_unlock(cpt);
834 
835 	return ni != NULL;
836 }
837 
838 lnet_ni_t  *
lnet_nid2ni_locked(lnet_nid_t nid,int cpt)839 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
840 {
841 	struct lnet_ni	*ni;
842 	struct list_head	*tmp;
843 
844 	LASSERT(cpt != LNET_LOCK_EX);
845 
846 	list_for_each(tmp, &the_lnet.ln_nis) {
847 		ni = list_entry(tmp, lnet_ni_t, ni_list);
848 
849 		if (ni->ni_nid == nid) {
850 			lnet_ni_addref_locked(ni, cpt);
851 			return ni;
852 		}
853 	}
854 
855 	return NULL;
856 }
857 
858 int
lnet_islocalnid(lnet_nid_t nid)859 lnet_islocalnid(lnet_nid_t nid)
860 {
861 	struct lnet_ni	*ni;
862 	int		cpt;
863 
864 	cpt = lnet_net_lock_current();
865 	ni = lnet_nid2ni_locked(nid, cpt);
866 	if (ni != NULL)
867 		lnet_ni_decref_locked(ni, cpt);
868 	lnet_net_unlock(cpt);
869 
870 	return ni != NULL;
871 }
872 
873 int
lnet_count_acceptor_nis(void)874 lnet_count_acceptor_nis(void)
875 {
876 	/* Return the # of NIs that need the acceptor. */
877 	int		count = 0;
878 	struct list_head	*tmp;
879 	struct lnet_ni	*ni;
880 	int		cpt;
881 
882 	cpt = lnet_net_lock_current();
883 	list_for_each(tmp, &the_lnet.ln_nis) {
884 		ni = list_entry(tmp, lnet_ni_t, ni_list);
885 
886 		if (ni->ni_lnd->lnd_accept != NULL)
887 			count++;
888 	}
889 
890 	lnet_net_unlock(cpt);
891 
892 	return count;
893 }
894 
895 static int
lnet_ni_tq_credits(lnet_ni_t * ni)896 lnet_ni_tq_credits(lnet_ni_t *ni)
897 {
898 	int	credits;
899 
900 	LASSERT(ni->ni_ncpts >= 1);
901 
902 	if (ni->ni_ncpts == 1)
903 		return ni->ni_maxtxcredits;
904 
905 	credits = ni->ni_maxtxcredits / ni->ni_ncpts;
906 	credits = max(credits, 8 * ni->ni_peertxcredits);
907 	credits = min(credits, ni->ni_maxtxcredits);
908 
909 	return credits;
910 }
911 
912 static void
lnet_shutdown_lndnis(void)913 lnet_shutdown_lndnis(void)
914 {
915 	int		i;
916 	int		islo;
917 	lnet_ni_t	 *ni;
918 
919 	/* NB called holding the global mutex */
920 
921 	/* All quiet on the API front */
922 	LASSERT(!the_lnet.ln_shutdown);
923 	LASSERT(the_lnet.ln_refcount == 0);
924 	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
925 
926 	lnet_net_lock(LNET_LOCK_EX);
927 	the_lnet.ln_shutdown = 1;	/* flag shutdown */
928 
929 	/* Unlink NIs from the global table */
930 	while (!list_empty(&the_lnet.ln_nis)) {
931 		ni = list_entry(the_lnet.ln_nis.next,
932 				    lnet_ni_t, ni_list);
933 		/* move it to zombie list and nobody can find it anymore */
934 		list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
935 		lnet_ni_decref_locked(ni, 0);	/* drop ln_nis' ref */
936 
937 		if (!list_empty(&ni->ni_cptlist)) {
938 			list_del_init(&ni->ni_cptlist);
939 			lnet_ni_decref_locked(ni, 0);
940 		}
941 	}
942 
943 	/* Drop the cached eqwait NI. */
944 	if (the_lnet.ln_eq_waitni != NULL) {
945 		lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
946 		the_lnet.ln_eq_waitni = NULL;
947 	}
948 
949 	/* Drop the cached loopback NI. */
950 	if (the_lnet.ln_loni != NULL) {
951 		lnet_ni_decref_locked(the_lnet.ln_loni, 0);
952 		the_lnet.ln_loni = NULL;
953 	}
954 
955 	lnet_net_unlock(LNET_LOCK_EX);
956 
957 	/* Clear lazy portals and drop delayed messages which hold refs
958 	 * on their lnet_msg_t::msg_rxpeer */
959 	for (i = 0; i < the_lnet.ln_nportals; i++)
960 		LNetClearLazyPortal(i);
961 
962 	/* Clear the peer table and wait for all peers to go (they hold refs on
963 	 * their NIs) */
964 	lnet_peer_tables_cleanup();
965 
966 	lnet_net_lock(LNET_LOCK_EX);
967 	/* Now wait for the NI's I just nuked to show up on ln_zombie_nis
968 	 * and shut them down in guaranteed thread context */
969 	i = 2;
970 	while (!list_empty(&the_lnet.ln_nis_zombie)) {
971 		int	*ref;
972 		int	j;
973 
974 		ni = list_entry(the_lnet.ln_nis_zombie.next,
975 				    lnet_ni_t, ni_list);
976 		list_del_init(&ni->ni_list);
977 		cfs_percpt_for_each(ref, j, ni->ni_refs) {
978 			if (*ref == 0)
979 				continue;
980 			/* still busy, add it back to zombie list */
981 			list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
982 			break;
983 		}
984 
985 		if (!list_empty(&ni->ni_list)) {
986 			lnet_net_unlock(LNET_LOCK_EX);
987 			++i;
988 			if ((i & (-i)) == i) {
989 				CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
990 				       libcfs_nid2str(ni->ni_nid));
991 			}
992 			set_current_state(TASK_UNINTERRUPTIBLE);
993 			schedule_timeout(cfs_time_seconds(1));
994 			lnet_net_lock(LNET_LOCK_EX);
995 			continue;
996 		}
997 
998 		ni->ni_lnd->lnd_refcount--;
999 		lnet_net_unlock(LNET_LOCK_EX);
1000 
1001 		islo = ni->ni_lnd->lnd_type == LOLND;
1002 
1003 		LASSERT(!in_interrupt());
1004 		(ni->ni_lnd->lnd_shutdown)(ni);
1005 
1006 		/* can't deref lnd anymore now; it might have unregistered
1007 		 * itself...  */
1008 
1009 		if (!islo)
1010 			CDEBUG(D_LNI, "Removed LNI %s\n",
1011 			       libcfs_nid2str(ni->ni_nid));
1012 
1013 		lnet_ni_free(ni);
1014 		i = 2;
1015 
1016 		lnet_net_lock(LNET_LOCK_EX);
1017 	}
1018 
1019 	the_lnet.ln_shutdown = 0;
1020 	lnet_net_unlock(LNET_LOCK_EX);
1021 
1022 	if (the_lnet.ln_network_tokens != NULL) {
1023 		LIBCFS_FREE(the_lnet.ln_network_tokens,
1024 			    the_lnet.ln_network_tokens_nob);
1025 		the_lnet.ln_network_tokens = NULL;
1026 	}
1027 }
1028 
1029 static int
lnet_startup_lndnis(void)1030 lnet_startup_lndnis(void)
1031 {
1032 	lnd_t			*lnd;
1033 	struct lnet_ni		*ni;
1034 	struct lnet_tx_queue	*tq;
1035 	struct list_head		nilist;
1036 	int			i;
1037 	int		rc = 0;
1038 	int		lnd_type;
1039 	int		nicount = 0;
1040 	char	      *nets = lnet_get_networks();
1041 
1042 	INIT_LIST_HEAD(&nilist);
1043 
1044 	if (nets == NULL)
1045 		goto failed;
1046 
1047 	rc = lnet_parse_networks(&nilist, nets);
1048 	if (rc != 0)
1049 		goto failed;
1050 
1051 	while (!list_empty(&nilist)) {
1052 		ni = list_entry(nilist.next, lnet_ni_t, ni_list);
1053 		lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1054 
1055 		LASSERT(libcfs_isknown_lnd(lnd_type));
1056 
1057 		if (lnd_type == CIBLND    ||
1058 		    lnd_type == OPENIBLND ||
1059 		    lnd_type == IIBLND    ||
1060 		    lnd_type == VIBLND) {
1061 			CERROR("LND %s obsoleted\n",
1062 			       libcfs_lnd2str(lnd_type));
1063 			goto failed;
1064 		}
1065 
1066 		LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1067 		lnd = lnet_find_lnd_by_type(lnd_type);
1068 
1069 		if (lnd == NULL) {
1070 			LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1071 			rc = request_module("%s",
1072 						libcfs_lnd2modname(lnd_type));
1073 			LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1074 
1075 			lnd = lnet_find_lnd_by_type(lnd_type);
1076 			if (lnd == NULL) {
1077 				LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1078 				CERROR("Can't load LND %s, module %s, rc=%d\n",
1079 				       libcfs_lnd2str(lnd_type),
1080 				       libcfs_lnd2modname(lnd_type), rc);
1081 				goto failed;
1082 			}
1083 		}
1084 
1085 		lnet_net_lock(LNET_LOCK_EX);
1086 		lnd->lnd_refcount++;
1087 		lnet_net_unlock(LNET_LOCK_EX);
1088 
1089 		ni->ni_lnd = lnd;
1090 
1091 		rc = (lnd->lnd_startup)(ni);
1092 
1093 		LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1094 
1095 		if (rc != 0) {
1096 			LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1097 					   rc, libcfs_lnd2str(lnd->lnd_type));
1098 			lnet_net_lock(LNET_LOCK_EX);
1099 			lnd->lnd_refcount--;
1100 			lnet_net_unlock(LNET_LOCK_EX);
1101 			goto failed;
1102 		}
1103 
1104 		LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1105 
1106 		list_del(&ni->ni_list);
1107 
1108 		lnet_net_lock(LNET_LOCK_EX);
1109 		/* refcount for ln_nis */
1110 		lnet_ni_addref_locked(ni, 0);
1111 		list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1112 		if (ni->ni_cpts != NULL) {
1113 			list_add_tail(&ni->ni_cptlist,
1114 					  &the_lnet.ln_nis_cpt);
1115 			lnet_ni_addref_locked(ni, 0);
1116 		}
1117 
1118 		lnet_net_unlock(LNET_LOCK_EX);
1119 
1120 		if (lnd->lnd_type == LOLND) {
1121 			lnet_ni_addref(ni);
1122 			LASSERT(the_lnet.ln_loni == NULL);
1123 			the_lnet.ln_loni = ni;
1124 			continue;
1125 		}
1126 
1127 		if (ni->ni_peertxcredits == 0 ||
1128 		    ni->ni_maxtxcredits == 0) {
1129 			LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1130 					   libcfs_lnd2str(lnd->lnd_type),
1131 					   ni->ni_peertxcredits == 0 ?
1132 					   "" : "per-peer ");
1133 			goto failed;
1134 		}
1135 
1136 		cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1137 			tq->tq_credits_min =
1138 			tq->tq_credits_max =
1139 			tq->tq_credits = lnet_ni_tq_credits(ni);
1140 		}
1141 
1142 		CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1143 		       libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1144 		       lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1145 		       ni->ni_peerrtrcredits, ni->ni_peertimeout);
1146 
1147 		nicount++;
1148 	}
1149 
1150 	if (the_lnet.ln_eq_waitni != NULL && nicount > 1) {
1151 		lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
1152 		LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network\n",
1153 				   libcfs_lnd2str(lnd_type));
1154 		goto failed;
1155 	}
1156 
1157 	return 0;
1158 
1159  failed:
1160 	lnet_shutdown_lndnis();
1161 
1162 	while (!list_empty(&nilist)) {
1163 		ni = list_entry(nilist.next, lnet_ni_t, ni_list);
1164 		list_del(&ni->ni_list);
1165 		lnet_ni_free(ni);
1166 	}
1167 
1168 	return -ENETDOWN;
1169 }
1170 
1171 /**
1172  * Initialize LNet library.
1173  *
1174  * Only userspace program needs to call this function - it's automatically
1175  * called in the kernel at module loading time. Caller has to call LNetFini()
1176  * after a call to LNetInit(), if and only if the latter returned 0. It must
1177  * be called exactly once.
1178  *
1179  * \return 0 on success, and -ve on failures.
1180  */
1181 int
LNetInit(void)1182 LNetInit(void)
1183 {
1184 	int	rc;
1185 
1186 	lnet_assert_wire_constants();
1187 	LASSERT(!the_lnet.ln_init);
1188 
1189 	memset(&the_lnet, 0, sizeof(the_lnet));
1190 
1191 	/* refer to global cfs_cpt_table for now */
1192 	the_lnet.ln_cpt_table	= cfs_cpt_table;
1193 	the_lnet.ln_cpt_number	= cfs_cpt_number(cfs_cpt_table);
1194 
1195 	LASSERT(the_lnet.ln_cpt_number > 0);
1196 	if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1197 		/* we are under risk of consuming all lh_cookie */
1198 		CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n",
1199 		       the_lnet.ln_cpt_number, LNET_CPT_MAX);
1200 		return -1;
1201 	}
1202 
1203 	while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1204 		the_lnet.ln_cpt_bits++;
1205 
1206 	rc = lnet_create_locks();
1207 	if (rc != 0) {
1208 		CERROR("Can't create LNet global locks: %d\n", rc);
1209 		return -1;
1210 	}
1211 
1212 	the_lnet.ln_refcount = 0;
1213 	the_lnet.ln_init = 1;
1214 	LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1215 	INIT_LIST_HEAD(&the_lnet.ln_lnds);
1216 	INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1217 	INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1218 
1219 	/* The hash table size is the number of bits it takes to express the set
1220 	 * ln_num_routes, minus 1 (better to under estimate than over so we
1221 	 * don't waste memory). */
1222 	if (rnet_htable_size <= 0)
1223 		rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1224 	else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1225 		rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1226 	the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1227 					   order_base_2(rnet_htable_size) - 1);
1228 
1229 	/* All LNDs apart from the LOLND are in separate modules.  They
1230 	 * register themselves when their module loads, and unregister
1231 	 * themselves when their module is unloaded. */
1232 	lnet_register_lnd(&the_lolnd);
1233 	return 0;
1234 }
1235 EXPORT_SYMBOL(LNetInit);
1236 
1237 /**
1238  * Finalize LNet library.
1239  *
1240  * Only userspace program needs to call this function. It can be called
1241  * at most once.
1242  *
1243  * \pre LNetInit() called with success.
1244  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1245  */
1246 void
LNetFini(void)1247 LNetFini(void)
1248 {
1249 	LASSERT(the_lnet.ln_init);
1250 	LASSERT(the_lnet.ln_refcount == 0);
1251 
1252 	while (!list_empty(&the_lnet.ln_lnds))
1253 		lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1254 						   lnd_t, lnd_list));
1255 	lnet_destroy_locks();
1256 
1257 	the_lnet.ln_init = 0;
1258 }
1259 EXPORT_SYMBOL(LNetFini);
1260 
1261 /**
1262  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1263  *
1264  * Userspace program should call this after a successful call to LNetInit().
1265  * Users must call this function at least once before any other functions.
1266  * For each successful call there must be a corresponding call to
1267  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1268  * ignored.
1269  *
1270  * The PID used by LNet may be different from the one requested.
1271  * See LNetGetId().
1272  *
1273  * \param requested_pid PID requested by the caller.
1274  *
1275  * \return >= 0 on success, and < 0 error code on failures.
1276  */
1277 int
LNetNIInit(lnet_pid_t requested_pid)1278 LNetNIInit(lnet_pid_t requested_pid)
1279 {
1280 	int	 im_a_router = 0;
1281 	int	 rc;
1282 
1283 	LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1284 
1285 	LASSERT(the_lnet.ln_init);
1286 	CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1287 
1288 	if (the_lnet.ln_refcount > 0) {
1289 		rc = the_lnet.ln_refcount++;
1290 		goto out;
1291 	}
1292 
1293 	lnet_get_tunables();
1294 
1295 	if (requested_pid == LNET_PID_ANY) {
1296 		/* Don't instantiate LNET just for me */
1297 		rc = -ENETDOWN;
1298 		goto failed0;
1299 	}
1300 
1301 	rc = lnet_prepare(requested_pid);
1302 	if (rc != 0)
1303 		goto failed0;
1304 
1305 	rc = lnet_startup_lndnis();
1306 	if (rc != 0)
1307 		goto failed1;
1308 
1309 	rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1310 	if (rc != 0)
1311 		goto failed2;
1312 
1313 	rc = lnet_check_routes();
1314 	if (rc != 0)
1315 		goto failed2;
1316 
1317 	rc = lnet_rtrpools_alloc(im_a_router);
1318 	if (rc != 0)
1319 		goto failed2;
1320 
1321 	rc = lnet_acceptor_start();
1322 	if (rc != 0)
1323 		goto failed2;
1324 
1325 	the_lnet.ln_refcount = 1;
1326 	/* Now I may use my own API functions... */
1327 
1328 	/* NB router checker needs the_lnet.ln_ping_info in
1329 	 * lnet_router_checker -> lnet_update_ni_status_locked */
1330 	rc = lnet_ping_target_init();
1331 	if (rc != 0)
1332 		goto failed3;
1333 
1334 	rc = lnet_router_checker_start();
1335 	if (rc != 0)
1336 		goto failed4;
1337 
1338 	lnet_proc_init();
1339 	goto out;
1340 
1341  failed4:
1342 	lnet_ping_target_fini();
1343  failed3:
1344 	the_lnet.ln_refcount = 0;
1345 	lnet_acceptor_stop();
1346  failed2:
1347 	lnet_destroy_routes();
1348 	lnet_shutdown_lndnis();
1349  failed1:
1350 	lnet_unprepare();
1351  failed0:
1352 	LASSERT(rc < 0);
1353  out:
1354 	LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1355 	return rc;
1356 }
1357 EXPORT_SYMBOL(LNetNIInit);
1358 
1359 /**
1360  * Stop LNet interfaces, routing, and forwarding.
1361  *
1362  * Users must call this function once for each successful call to LNetNIInit().
1363  * Once the LNetNIFini() operation has been started, the results of pending
1364  * API operations are undefined.
1365  *
1366  * \return always 0 for current implementation.
1367  */
1368 int
LNetNIFini(void)1369 LNetNIFini(void)
1370 {
1371 	LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1372 
1373 	LASSERT(the_lnet.ln_init);
1374 	LASSERT(the_lnet.ln_refcount > 0);
1375 
1376 	if (the_lnet.ln_refcount != 1) {
1377 		the_lnet.ln_refcount--;
1378 	} else {
1379 		LASSERT(!the_lnet.ln_niinit_self);
1380 
1381 		lnet_proc_fini();
1382 		lnet_router_checker_stop();
1383 		lnet_ping_target_fini();
1384 
1385 		/* Teardown fns that use my own API functions BEFORE here */
1386 		the_lnet.ln_refcount = 0;
1387 
1388 		lnet_acceptor_stop();
1389 		lnet_destroy_routes();
1390 		lnet_shutdown_lndnis();
1391 		lnet_unprepare();
1392 	}
1393 
1394 	LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1395 	return 0;
1396 }
1397 EXPORT_SYMBOL(LNetNIFini);
1398 
1399 /**
1400  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
1401  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
1402  * internal ioctl handler.
1403  *
1404  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
1405  *
1406  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
1407  * The data will be printed to system console. Don't use it excessively.
1408  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
1409  *
1410  * \return Always return 0 when called by users directly (i.e., not via ioctl).
1411  */
1412 int
LNetCtl(unsigned int cmd,void * arg)1413 LNetCtl(unsigned int cmd, void *arg)
1414 {
1415 	struct libcfs_ioctl_data *data = arg;
1416 	lnet_process_id_t	 id = {0};
1417 	lnet_ni_t		*ni;
1418 	int		       rc;
1419 
1420 	LASSERT(the_lnet.ln_init);
1421 	LASSERT(the_lnet.ln_refcount > 0);
1422 
1423 	switch (cmd) {
1424 	case IOC_LIBCFS_GET_NI:
1425 		rc = LNetGetId(data->ioc_count, &id);
1426 		data->ioc_nid = id.nid;
1427 		return rc;
1428 
1429 	case IOC_LIBCFS_FAIL_NID:
1430 		return lnet_fail_nid(data->ioc_nid, data->ioc_count);
1431 
1432 	case IOC_LIBCFS_ADD_ROUTE:
1433 		rc = lnet_add_route(data->ioc_net, data->ioc_count,
1434 				    data->ioc_nid, data->ioc_priority);
1435 		return (rc != 0) ? rc : lnet_check_routes();
1436 
1437 	case IOC_LIBCFS_DEL_ROUTE:
1438 		return lnet_del_route(data->ioc_net, data->ioc_nid);
1439 
1440 	case IOC_LIBCFS_GET_ROUTE:
1441 		return lnet_get_route(data->ioc_count,
1442 				      &data->ioc_net, &data->ioc_count,
1443 				      &data->ioc_nid, &data->ioc_flags,
1444 				      &data->ioc_priority);
1445 	case IOC_LIBCFS_NOTIFY_ROUTER:
1446 		return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
1447 				   cfs_time_current() -
1448 				   cfs_time_seconds(get_seconds() -
1449 						    (time_t)data->ioc_u64[0]));
1450 
1451 	case IOC_LIBCFS_PORTALS_COMPATIBILITY:
1452 		/* This can be removed once lustre stops calling it */
1453 		return 0;
1454 
1455 	case IOC_LIBCFS_LNET_DIST:
1456 		rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
1457 		if (rc < 0 && rc != -EHOSTUNREACH)
1458 			return rc;
1459 
1460 		data->ioc_u32[0] = rc;
1461 		return 0;
1462 
1463 	case IOC_LIBCFS_TESTPROTOCOMPAT:
1464 		lnet_net_lock(LNET_LOCK_EX);
1465 		the_lnet.ln_testprotocompat = data->ioc_flags;
1466 		lnet_net_unlock(LNET_LOCK_EX);
1467 		return 0;
1468 
1469 	case IOC_LIBCFS_PING:
1470 		id.nid = data->ioc_nid;
1471 		id.pid = data->ioc_u32[0];
1472 		rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
1473 			       (lnet_process_id_t *)data->ioc_pbuf1,
1474 			       data->ioc_plen1/sizeof(lnet_process_id_t));
1475 		if (rc < 0)
1476 			return rc;
1477 		data->ioc_count = rc;
1478 		return 0;
1479 
1480 	case IOC_LIBCFS_DEBUG_PEER: {
1481 		/* CAVEAT EMPTOR: this one designed for calling directly; not
1482 		 * via an ioctl */
1483 		id = *((lnet_process_id_t *) arg);
1484 
1485 		lnet_debug_peer(id.nid);
1486 
1487 		ni = lnet_net2ni(LNET_NIDNET(id.nid));
1488 		if (ni == NULL) {
1489 			CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
1490 		} else {
1491 			if (ni->ni_lnd->lnd_ctl == NULL) {
1492 				CDEBUG(D_WARNING, "No ctl for %s\n",
1493 				       libcfs_id2str(id));
1494 			} else {
1495 				(void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
1496 			}
1497 
1498 			lnet_ni_decref(ni);
1499 		}
1500 		return 0;
1501 	}
1502 
1503 	default:
1504 		ni = lnet_net2ni(data->ioc_net);
1505 		if (ni == NULL)
1506 			return -EINVAL;
1507 
1508 		if (ni->ni_lnd->lnd_ctl == NULL)
1509 			rc = -EINVAL;
1510 		else
1511 			rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
1512 
1513 		lnet_ni_decref(ni);
1514 		return rc;
1515 	}
1516 	/* not reached */
1517 }
1518 EXPORT_SYMBOL(LNetCtl);
1519 
1520 /**
1521  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
1522  * all interfaces share a same PID, as requested by LNetNIInit().
1523  *
1524  * \param index Index of the interface to look up.
1525  * \param id On successful return, this location will hold the
1526  * lnet_process_id_t ID of the interface.
1527  *
1528  * \retval 0 If an interface exists at \a index.
1529  * \retval -ENOENT If no interface has been found.
1530  */
1531 int
LNetGetId(unsigned int index,lnet_process_id_t * id)1532 LNetGetId(unsigned int index, lnet_process_id_t *id)
1533 {
1534 	struct lnet_ni	*ni;
1535 	struct list_head	*tmp;
1536 	int		cpt;
1537 	int		rc = -ENOENT;
1538 
1539 	LASSERT(the_lnet.ln_init);
1540 
1541 	/* LNetNI initilization failed? */
1542 	if (the_lnet.ln_refcount == 0)
1543 		return rc;
1544 
1545 	cpt = lnet_net_lock_current();
1546 
1547 	list_for_each(tmp, &the_lnet.ln_nis) {
1548 		if (index-- != 0)
1549 			continue;
1550 
1551 		ni = list_entry(tmp, lnet_ni_t, ni_list);
1552 
1553 		id->nid = ni->ni_nid;
1554 		id->pid = the_lnet.ln_pid;
1555 		rc = 0;
1556 		break;
1557 	}
1558 
1559 	lnet_net_unlock(cpt);
1560 	return rc;
1561 }
1562 EXPORT_SYMBOL(LNetGetId);
1563 
1564 /**
1565  * Print a string representation of handle \a h into buffer \a str of
1566  * \a len bytes.
1567  */
1568 void
LNetSnprintHandle(char * str,int len,lnet_handle_any_t h)1569 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
1570 {
1571 	snprintf(str, len, "%#llx", h.cookie);
1572 }
1573 EXPORT_SYMBOL(LNetSnprintHandle);
1574 
1575 static int
lnet_create_ping_info(void)1576 lnet_create_ping_info(void)
1577 {
1578 	int	       i;
1579 	int	       n;
1580 	int	       rc;
1581 	unsigned int      infosz;
1582 	lnet_ni_t	*ni;
1583 	lnet_process_id_t id;
1584 	lnet_ping_info_t *pinfo;
1585 
1586 	for (n = 0; ; n++) {
1587 		rc = LNetGetId(n, &id);
1588 		if (rc == -ENOENT)
1589 			break;
1590 
1591 		LASSERT(rc == 0);
1592 	}
1593 
1594 	infosz = offsetof(lnet_ping_info_t, pi_ni[n]);
1595 	LIBCFS_ALLOC(pinfo, infosz);
1596 	if (pinfo == NULL) {
1597 		CERROR("Can't allocate ping info[%d]\n", n);
1598 		return -ENOMEM;
1599 	}
1600 
1601 	pinfo->pi_nnis    = n;
1602 	pinfo->pi_pid     = the_lnet.ln_pid;
1603 	pinfo->pi_magic   = LNET_PROTO_PING_MAGIC;
1604 	pinfo->pi_features = LNET_PING_FEAT_NI_STATUS;
1605 
1606 	for (i = 0; i < n; i++) {
1607 		lnet_ni_status_t *ns = &pinfo->pi_ni[i];
1608 
1609 		rc = LNetGetId(i, &id);
1610 		LASSERT(rc == 0);
1611 
1612 		ns->ns_nid    = id.nid;
1613 		ns->ns_status = LNET_NI_STATUS_UP;
1614 
1615 		lnet_net_lock(0);
1616 
1617 		ni = lnet_nid2ni_locked(id.nid, 0);
1618 		LASSERT(ni != NULL);
1619 
1620 		lnet_ni_lock(ni);
1621 		LASSERT(ni->ni_status == NULL);
1622 		ni->ni_status = ns;
1623 		lnet_ni_unlock(ni);
1624 
1625 		lnet_ni_decref_locked(ni, 0);
1626 		lnet_net_unlock(0);
1627 	}
1628 
1629 	the_lnet.ln_ping_info = pinfo;
1630 	return 0;
1631 }
1632 
1633 static void
lnet_destroy_ping_info(void)1634 lnet_destroy_ping_info(void)
1635 {
1636 	struct lnet_ni	*ni;
1637 
1638 	lnet_net_lock(0);
1639 
1640 	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1641 		lnet_ni_lock(ni);
1642 		ni->ni_status = NULL;
1643 		lnet_ni_unlock(ni);
1644 	}
1645 
1646 	lnet_net_unlock(0);
1647 
1648 	LIBCFS_FREE(the_lnet.ln_ping_info,
1649 		    offsetof(lnet_ping_info_t,
1650 			     pi_ni[the_lnet.ln_ping_info->pi_nnis]));
1651 	the_lnet.ln_ping_info = NULL;
1652 }
1653 
1654 int
lnet_ping_target_init(void)1655 lnet_ping_target_init(void)
1656 {
1657 	lnet_md_t	 md = { NULL };
1658 	lnet_handle_me_t  meh;
1659 	lnet_process_id_t id;
1660 	int	       rc;
1661 	int	       rc2;
1662 	int	       infosz;
1663 
1664 	rc = lnet_create_ping_info();
1665 	if (rc != 0)
1666 		return rc;
1667 
1668 	/* We can have a tiny EQ since we only need to see the unlink event on
1669 	 * teardown, which by definition is the last one! */
1670 	rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
1671 	if (rc != 0) {
1672 		CERROR("Can't allocate ping EQ: %d\n", rc);
1673 		goto failed_0;
1674 	}
1675 
1676 	memset(&id, 0, sizeof(lnet_process_id_t));
1677 	id.nid = LNET_NID_ANY;
1678 	id.pid = LNET_PID_ANY;
1679 
1680 	rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1681 			  LNET_PROTO_PING_MATCHBITS, 0,
1682 			  LNET_UNLINK, LNET_INS_AFTER,
1683 			  &meh);
1684 	if (rc != 0) {
1685 		CERROR("Can't create ping ME: %d\n", rc);
1686 		goto failed_1;
1687 	}
1688 
1689 	/* initialize md content */
1690 	infosz = offsetof(lnet_ping_info_t,
1691 			  pi_ni[the_lnet.ln_ping_info->pi_nnis]);
1692 	md.start     = the_lnet.ln_ping_info;
1693 	md.length    = infosz;
1694 	md.threshold = LNET_MD_THRESH_INF;
1695 	md.max_size  = 0;
1696 	md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1697 		       LNET_MD_MANAGE_REMOTE;
1698 	md.user_ptr  = NULL;
1699 	md.eq_handle = the_lnet.ln_ping_target_eq;
1700 
1701 	rc = LNetMDAttach(meh, md,
1702 			  LNET_RETAIN,
1703 			  &the_lnet.ln_ping_target_md);
1704 	if (rc != 0) {
1705 		CERROR("Can't attach ping MD: %d\n", rc);
1706 		goto failed_2;
1707 	}
1708 
1709 	return 0;
1710 
1711  failed_2:
1712 	rc2 = LNetMEUnlink(meh);
1713 	LASSERT(rc2 == 0);
1714  failed_1:
1715 	rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1716 	LASSERT(rc2 == 0);
1717  failed_0:
1718 	lnet_destroy_ping_info();
1719 	return rc;
1720 }
1721 
1722 void
lnet_ping_target_fini(void)1723 lnet_ping_target_fini(void)
1724 {
1725 	lnet_event_t    event;
1726 	int	     rc;
1727 	int	     which;
1728 	int	     timeout_ms = 1000;
1729 	sigset_t    blocked = cfs_block_allsigs();
1730 
1731 	LNetMDUnlink(the_lnet.ln_ping_target_md);
1732 	/* NB md could be busy; this just starts the unlink */
1733 
1734 	for (;;) {
1735 		rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1,
1736 				timeout_ms, &event, &which);
1737 
1738 		/* I expect overflow... */
1739 		LASSERT(rc >= 0 || rc == -EOVERFLOW);
1740 
1741 		if (rc == 0) {
1742 			/* timed out: provide a diagnostic */
1743 			CWARN("Still waiting for ping MD to unlink\n");
1744 			timeout_ms *= 2;
1745 			continue;
1746 		}
1747 
1748 		/* Got a valid event */
1749 		if (event.unlinked)
1750 			break;
1751 	}
1752 
1753 	rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1754 	LASSERT(rc == 0);
1755 	lnet_destroy_ping_info();
1756 	cfs_restore_sigs(blocked);
1757 }
1758 
1759 int
lnet_ping(lnet_process_id_t id,int timeout_ms,lnet_process_id_t * ids,int n_ids)1760 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids)
1761 {
1762 	lnet_handle_eq_t     eqh;
1763 	lnet_handle_md_t     mdh;
1764 	lnet_event_t	 event;
1765 	lnet_md_t	    md = { NULL };
1766 	int		  which;
1767 	int		  unlinked = 0;
1768 	int		  replied = 0;
1769 	const int	    a_long_time = 60000; /* mS */
1770 	int		  infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
1771 	lnet_ping_info_t    *info;
1772 	lnet_process_id_t    tmpid;
1773 	int		  i;
1774 	int		  nob;
1775 	int		  rc;
1776 	int		  rc2;
1777 	sigset_t	 blocked;
1778 
1779 	if (n_ids <= 0 ||
1780 	    id.nid == LNET_NID_ANY ||
1781 	    timeout_ms > 500000 ||	      /* arbitrary limit! */
1782 	    n_ids > 20)			 /* arbitrary limit! */
1783 		return -EINVAL;
1784 
1785 	if (id.pid == LNET_PID_ANY)
1786 		id.pid = LUSTRE_SRV_LNET_PID;
1787 
1788 	LIBCFS_ALLOC(info, infosz);
1789 	if (info == NULL)
1790 		return -ENOMEM;
1791 
1792 	/* NB 2 events max (including any unlink event) */
1793 	rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
1794 	if (rc != 0) {
1795 		CERROR("Can't allocate EQ: %d\n", rc);
1796 		goto out_0;
1797 	}
1798 
1799 	/* initialize md content */
1800 	md.start     = info;
1801 	md.length    = infosz;
1802 	md.threshold = 2; /*GET/REPLY*/
1803 	md.max_size  = 0;
1804 	md.options   = LNET_MD_TRUNCATE;
1805 	md.user_ptr  = NULL;
1806 	md.eq_handle = eqh;
1807 
1808 	rc = LNetMDBind(md, LNET_UNLINK, &mdh);
1809 	if (rc != 0) {
1810 		CERROR("Can't bind MD: %d\n", rc);
1811 		goto out_1;
1812 	}
1813 
1814 	rc = LNetGet(LNET_NID_ANY, mdh, id,
1815 		     LNET_RESERVED_PORTAL,
1816 		     LNET_PROTO_PING_MATCHBITS, 0);
1817 
1818 	if (rc != 0) {
1819 		/* Don't CERROR; this could be deliberate! */
1820 
1821 		rc2 = LNetMDUnlink(mdh);
1822 		LASSERT(rc2 == 0);
1823 
1824 		/* NB must wait for the UNLINK event below... */
1825 		unlinked = 1;
1826 		timeout_ms = a_long_time;
1827 	}
1828 
1829 	do {
1830 		/* MUST block for unlink to complete */
1831 		if (unlinked)
1832 			blocked = cfs_block_allsigs();
1833 
1834 		rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
1835 
1836 		if (unlinked)
1837 			cfs_restore_sigs(blocked);
1838 
1839 		CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
1840 		       (rc2 <= 0) ? -1 : event.type,
1841 		       (rc2 <= 0) ? -1 : event.status,
1842 		       (rc2 > 0 && event.unlinked) ? " unlinked" : "");
1843 
1844 		LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
1845 
1846 		if (rc2 <= 0 || event.status != 0) {
1847 			/* timeout or error */
1848 			if (!replied && rc == 0)
1849 				rc = (rc2 < 0) ? rc2 :
1850 				     (rc2 == 0) ? -ETIMEDOUT :
1851 				     event.status;
1852 
1853 			if (!unlinked) {
1854 				/* Ensure completion in finite time... */
1855 				LNetMDUnlink(mdh);
1856 				/* No assertion (racing with network) */
1857 				unlinked = 1;
1858 				timeout_ms = a_long_time;
1859 			} else if (rc2 == 0) {
1860 				/* timed out waiting for unlink */
1861 				CWARN("ping %s: late network completion\n",
1862 				      libcfs_id2str(id));
1863 			}
1864 		} else if (event.type == LNET_EVENT_REPLY) {
1865 			replied = 1;
1866 			rc = event.mlength;
1867 		}
1868 
1869 	} while (rc2 <= 0 || !event.unlinked);
1870 
1871 	if (!replied) {
1872 		if (rc >= 0)
1873 			CWARN("%s: Unexpected rc >= 0 but no reply!\n",
1874 			      libcfs_id2str(id));
1875 		rc = -EIO;
1876 		goto out_1;
1877 	}
1878 
1879 	nob = rc;
1880 	LASSERT(nob >= 0 && nob <= infosz);
1881 
1882 	rc = -EPROTO;			   /* if I can't parse... */
1883 
1884 	if (nob < 8) {
1885 		/* can't check magic/version */
1886 		CERROR("%s: ping info too short %d\n",
1887 		       libcfs_id2str(id), nob);
1888 		goto out_1;
1889 	}
1890 
1891 	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
1892 		lnet_swap_pinginfo(info);
1893 	} else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
1894 		CERROR("%s: Unexpected magic %08x\n",
1895 		       libcfs_id2str(id), info->pi_magic);
1896 		goto out_1;
1897 	}
1898 
1899 	if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
1900 		CERROR("%s: ping w/o NI status: 0x%x\n",
1901 		       libcfs_id2str(id), info->pi_features);
1902 		goto out_1;
1903 	}
1904 
1905 	if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
1906 		CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
1907 		       nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
1908 		goto out_1;
1909 	}
1910 
1911 	if (info->pi_nnis < n_ids)
1912 		n_ids = info->pi_nnis;
1913 
1914 	if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
1915 		CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
1916 		       nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
1917 		goto out_1;
1918 	}
1919 
1920 	rc = -EFAULT;			   /* If I SEGV... */
1921 
1922 	memset(&tmpid, 0, sizeof(tmpid));
1923 	for (i = 0; i < n_ids; i++) {
1924 		tmpid.pid = info->pi_pid;
1925 		tmpid.nid = info->pi_ni[i].ns_nid;
1926 		if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
1927 			goto out_1;
1928 	}
1929 	rc = info->pi_nnis;
1930 
1931  out_1:
1932 	rc2 = LNetEQFree(eqh);
1933 	if (rc2 != 0)
1934 		CERROR("rc2 %d\n", rc2);
1935 	LASSERT(rc2 == 0);
1936 
1937  out_0:
1938 	LIBCFS_FREE(info, infosz);
1939 	return rc;
1940 }
1941