1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2015 Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Seagate, Inc.
31  */
32 #define DEBUG_SUBSYSTEM S_LNET
33 
34 #include <linux/if.h>
35 #include <linux/in.h>
36 #include <linux/net.h>
37 #include <linux/file.h>
38 #include <linux/pagemap.h>
39 /* For sys_open & sys_close */
40 #include <linux/syscalls.h>
41 #include <net/sock.h>
42 
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include "../../include/linux/lnet/lib-lnet.h"
45 
46 static int
kernel_sock_unlocked_ioctl(struct file * filp,int cmd,unsigned long arg)47 kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
48 {
49 	mm_segment_t oldfs = get_fs();
50 	int err;
51 
52 	set_fs(KERNEL_DS);
53 	err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
54 	set_fs(oldfs);
55 
56 	return err;
57 }
58 
59 static int
lnet_sock_ioctl(int cmd,unsigned long arg)60 lnet_sock_ioctl(int cmd, unsigned long arg)
61 {
62 	struct file *sock_filp;
63 	struct socket *sock;
64 	int rc;
65 
66 	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
67 	if (rc != 0) {
68 		CERROR("Can't create socket: %d\n", rc);
69 		return rc;
70 	}
71 
72 	sock_filp = sock_alloc_file(sock, 0, NULL);
73 	if (IS_ERR(sock_filp)) {
74 		sock_release(sock);
75 		rc = PTR_ERR(sock_filp);
76 		goto out;
77 	}
78 
79 	rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
80 
81 	fput(sock_filp);
82 out:
83 	return rc;
84 }
85 
86 int
lnet_ipif_query(char * name,int * up,__u32 * ip,__u32 * mask)87 lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
88 {
89 	struct ifreq ifr;
90 	int nob;
91 	int rc;
92 	__u32 val;
93 
94 	nob = strnlen(name, IFNAMSIZ);
95 	if (nob == IFNAMSIZ) {
96 		CERROR("Interface name %s too long\n", name);
97 		return -EINVAL;
98 	}
99 
100 	CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
101 
102 	strcpy(ifr.ifr_name, name);
103 	rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
104 	if (rc != 0) {
105 		CERROR("Can't get flags for interface %s\n", name);
106 		return rc;
107 	}
108 
109 	if ((ifr.ifr_flags & IFF_UP) == 0) {
110 		CDEBUG(D_NET, "Interface %s down\n", name);
111 		*up = 0;
112 		*ip = *mask = 0;
113 		return 0;
114 	}
115 	*up = 1;
116 
117 	strcpy(ifr.ifr_name, name);
118 	ifr.ifr_addr.sa_family = AF_INET;
119 	rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
120 	if (rc != 0) {
121 		CERROR("Can't get IP address for interface %s\n", name);
122 		return rc;
123 	}
124 
125 	val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
126 	*ip = ntohl(val);
127 
128 	strcpy(ifr.ifr_name, name);
129 	ifr.ifr_addr.sa_family = AF_INET;
130 	rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
131 	if (rc != 0) {
132 		CERROR("Can't get netmask for interface %s\n", name);
133 		return rc;
134 	}
135 
136 	val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
137 	*mask = ntohl(val);
138 
139 	return 0;
140 }
141 EXPORT_SYMBOL(lnet_ipif_query);
142 
143 int
lnet_ipif_enumerate(char *** namesp)144 lnet_ipif_enumerate(char ***namesp)
145 {
146 	/* Allocate and fill in 'names', returning # interfaces/error */
147 	char **names;
148 	int toobig;
149 	int nalloc;
150 	int nfound;
151 	struct ifreq *ifr;
152 	struct ifconf ifc;
153 	int rc;
154 	int nob;
155 	int i;
156 
157 	nalloc = 16;	/* first guess at max interfaces */
158 	toobig = 0;
159 	for (;;) {
160 		if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) {
161 			toobig = 1;
162 			nalloc = PAGE_CACHE_SIZE/sizeof(*ifr);
163 			CWARN("Too many interfaces: only enumerating first %d\n",
164 			      nalloc);
165 		}
166 
167 		LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
168 		if (ifr == NULL) {
169 			CERROR("ENOMEM enumerating up to %d interfaces\n",
170 			       nalloc);
171 			rc = -ENOMEM;
172 			goto out0;
173 		}
174 
175 		ifc.ifc_buf = (char *)ifr;
176 		ifc.ifc_len = nalloc * sizeof(*ifr);
177 
178 		rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
179 		if (rc < 0) {
180 			CERROR("Error %d enumerating interfaces\n", rc);
181 			goto out1;
182 		}
183 
184 		LASSERT(rc == 0);
185 
186 		nfound = ifc.ifc_len/sizeof(*ifr);
187 		LASSERT(nfound <= nalloc);
188 
189 		if (nfound < nalloc || toobig)
190 			break;
191 
192 		LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
193 		nalloc *= 2;
194 	}
195 
196 	if (nfound == 0)
197 		goto out1;
198 
199 	LIBCFS_ALLOC(names, nfound * sizeof(*names));
200 	if (names == NULL) {
201 		rc = -ENOMEM;
202 		goto out1;
203 	}
204 
205 	for (i = 0; i < nfound; i++) {
206 		nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
207 		if (nob == IFNAMSIZ) {
208 			/* no space for terminating NULL */
209 			CERROR("interface name %.*s too long (%d max)\n",
210 			       nob, ifr[i].ifr_name, IFNAMSIZ);
211 			rc = -ENAMETOOLONG;
212 			goto out2;
213 		}
214 
215 		LIBCFS_ALLOC(names[i], IFNAMSIZ);
216 		if (names[i] == NULL) {
217 			rc = -ENOMEM;
218 			goto out2;
219 		}
220 
221 		memcpy(names[i], ifr[i].ifr_name, nob);
222 		names[i][nob] = 0;
223 	}
224 
225 	*namesp = names;
226 	rc = nfound;
227 
228 out2:
229 	if (rc < 0)
230 		lnet_ipif_free_enumeration(names, nfound);
231 out1:
232 	LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
233 out0:
234 	return rc;
235 }
236 EXPORT_SYMBOL(lnet_ipif_enumerate);
237 
238 void
lnet_ipif_free_enumeration(char ** names,int n)239 lnet_ipif_free_enumeration(char **names, int n)
240 {
241 	int i;
242 
243 	LASSERT(n > 0);
244 
245 	for (i = 0; i < n && names[i] != NULL; i++)
246 		LIBCFS_FREE(names[i], IFNAMSIZ);
247 
248 	LIBCFS_FREE(names, n * sizeof(*names));
249 }
250 EXPORT_SYMBOL(lnet_ipif_free_enumeration);
251 
252 int
lnet_sock_write(struct socket * sock,void * buffer,int nob,int timeout)253 lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
254 {
255 	int rc;
256 	long ticks = timeout * HZ;
257 	unsigned long then;
258 	struct timeval tv;
259 
260 	LASSERT(nob > 0);
261 	/* Caller may pass a zero timeout if she thinks the socket buffer is
262 	 * empty enough to take the whole message immediately */
263 
264 	for (;;) {
265 		struct kvec  iov = {
266 			.iov_base = buffer,
267 			.iov_len  = nob
268 		};
269 		struct msghdr msg = {
270 			.msg_flags      = (timeout == 0) ? MSG_DONTWAIT : 0
271 		};
272 
273 		if (timeout != 0) {
274 			/* Set send timeout to remaining time */
275 			tv = (struct timeval) {
276 				.tv_sec = ticks / HZ,
277 				.tv_usec = ((ticks % HZ) * 1000000) / HZ
278 			};
279 			rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
280 					       (char *)&tv, sizeof(tv));
281 			if (rc != 0) {
282 				CERROR("Can't set socket send timeout %ld.%06d: %d\n",
283 				       (long)tv.tv_sec, (int)tv.tv_usec, rc);
284 				return rc;
285 			}
286 		}
287 
288 		then = jiffies;
289 		rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
290 		ticks -= jiffies - then;
291 
292 		if (rc == nob)
293 			return 0;
294 
295 		if (rc < 0)
296 			return rc;
297 
298 		if (rc == 0) {
299 			CERROR("Unexpected zero rc\n");
300 			return -ECONNABORTED;
301 		}
302 
303 		if (ticks <= 0)
304 			return -EAGAIN;
305 
306 		buffer = ((char *)buffer) + rc;
307 		nob -= rc;
308 	}
309 	return 0;
310 }
311 EXPORT_SYMBOL(lnet_sock_write);
312 
313 int
lnet_sock_read(struct socket * sock,void * buffer,int nob,int timeout)314 lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
315 {
316 	int rc;
317 	long ticks = timeout * HZ;
318 	unsigned long then;
319 	struct timeval tv;
320 
321 	LASSERT(nob > 0);
322 	LASSERT(ticks > 0);
323 
324 	for (;;) {
325 		struct kvec  iov = {
326 			.iov_base = buffer,
327 			.iov_len  = nob
328 		};
329 		struct msghdr msg = {
330 			.msg_flags = 0
331 		};
332 
333 		/* Set receive timeout to remaining time */
334 		tv = (struct timeval) {
335 			.tv_sec = ticks / HZ,
336 			.tv_usec = ((ticks % HZ) * 1000000) / HZ
337 		};
338 		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
339 				       (char *)&tv, sizeof(tv));
340 		if (rc != 0) {
341 			CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
342 			       (long)tv.tv_sec, (int)tv.tv_usec, rc);
343 			return rc;
344 		}
345 
346 		then = jiffies;
347 		rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
348 		ticks -= jiffies - then;
349 
350 		if (rc < 0)
351 			return rc;
352 
353 		if (rc == 0)
354 			return -ECONNRESET;
355 
356 		buffer = ((char *)buffer) + rc;
357 		nob -= rc;
358 
359 		if (nob == 0)
360 			return 0;
361 
362 		if (ticks <= 0)
363 			return -ETIMEDOUT;
364 	}
365 }
366 EXPORT_SYMBOL(lnet_sock_read);
367 
368 static int
lnet_sock_create(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port)369 lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
370 		 int local_port)
371 {
372 	struct sockaddr_in locaddr;
373 	struct socket *sock;
374 	int rc;
375 	int option;
376 
377 	/* All errors are fatal except bind failure if the port is in use */
378 	*fatal = 1;
379 
380 	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
381 	*sockp = sock;
382 	if (rc != 0) {
383 		CERROR("Can't create socket: %d\n", rc);
384 		return rc;
385 	}
386 
387 	option = 1;
388 	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
389 			       (char *)&option, sizeof(option));
390 	if (rc != 0) {
391 		CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
392 		goto failed;
393 	}
394 
395 	if (local_ip != 0 || local_port != 0) {
396 		memset(&locaddr, 0, sizeof(locaddr));
397 		locaddr.sin_family = AF_INET;
398 		locaddr.sin_port = htons(local_port);
399 		locaddr.sin_addr.s_addr = (local_ip == 0) ?
400 					  INADDR_ANY : htonl(local_ip);
401 
402 		rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
403 				 sizeof(locaddr));
404 		if (rc == -EADDRINUSE) {
405 			CDEBUG(D_NET, "Port %d already in use\n", local_port);
406 			*fatal = 0;
407 			goto failed;
408 		}
409 		if (rc != 0) {
410 			CERROR("Error trying to bind to port %d: %d\n",
411 			       local_port, rc);
412 			goto failed;
413 		}
414 	}
415 	return 0;
416 
417 failed:
418 	sock_release(sock);
419 	return rc;
420 }
421 
422 int
lnet_sock_setbuf(struct socket * sock,int txbufsize,int rxbufsize)423 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
424 {
425 	int option;
426 	int rc;
427 
428 	if (txbufsize != 0) {
429 		option = txbufsize;
430 		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
431 				       (char *)&option, sizeof(option));
432 		if (rc != 0) {
433 			CERROR("Can't set send buffer %d: %d\n",
434 			       option, rc);
435 			return rc;
436 		}
437 	}
438 
439 	if (rxbufsize != 0) {
440 		option = rxbufsize;
441 		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
442 				      (char *)&option, sizeof(option));
443 		if (rc != 0) {
444 			CERROR("Can't set receive buffer %d: %d\n",
445 			       option, rc);
446 			return rc;
447 		}
448 	}
449 	return 0;
450 }
451 EXPORT_SYMBOL(lnet_sock_setbuf);
452 
453 int
lnet_sock_getaddr(struct socket * sock,bool remote,__u32 * ip,int * port)454 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
455 {
456 	struct sockaddr_in sin;
457 	int len = sizeof(sin);
458 	int rc;
459 
460 	if (remote)
461 		rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
462 	else
463 		rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
464 	if (rc != 0) {
465 		CERROR("Error %d getting sock %s IP/port\n",
466 		       rc, remote ? "peer" : "local");
467 		return rc;
468 	}
469 
470 	if (ip != NULL)
471 		*ip = ntohl(sin.sin_addr.s_addr);
472 
473 	if (port != NULL)
474 		*port = ntohs(sin.sin_port);
475 
476 	return 0;
477 }
478 EXPORT_SYMBOL(lnet_sock_getaddr);
479 
480 int
lnet_sock_getbuf(struct socket * sock,int * txbufsize,int * rxbufsize)481 lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
482 {
483 	if (txbufsize != NULL)
484 		*txbufsize = sock->sk->sk_sndbuf;
485 
486 	if (rxbufsize != NULL)
487 		*rxbufsize = sock->sk->sk_rcvbuf;
488 
489 	return 0;
490 }
491 EXPORT_SYMBOL(lnet_sock_getbuf);
492 
493 int
lnet_sock_listen(struct socket ** sockp,__u32 local_ip,int local_port,int backlog)494 lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
495 		 int backlog)
496 {
497 	int fatal;
498 	int rc;
499 
500 	rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
501 	if (rc != 0) {
502 		if (!fatal)
503 			CERROR("Can't create socket: port %d already in use\n",
504 			       local_port);
505 		return rc;
506 	}
507 
508 	rc = kernel_listen(*sockp, backlog);
509 	if (rc == 0)
510 		return 0;
511 
512 	CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
513 	sock_release(*sockp);
514 	return rc;
515 }
516 EXPORT_SYMBOL(lnet_sock_listen);
517 
518 int
lnet_sock_accept(struct socket ** newsockp,struct socket * sock)519 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
520 {
521 	wait_queue_t wait;
522 	struct socket *newsock;
523 	int rc;
524 
525 	init_waitqueue_entry(&wait, current);
526 
527 	/* XXX this should add a ref to sock->ops->owner, if
528 	 * TCP could be a module */
529 	rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
530 	if (rc) {
531 		CERROR("Can't allocate socket\n");
532 		return rc;
533 	}
534 
535 	newsock->ops = sock->ops;
536 
537 	rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
538 	if (rc == -EAGAIN) {
539 		/* Nothing ready, so wait for activity */
540 		set_current_state(TASK_INTERRUPTIBLE);
541 		add_wait_queue(sk_sleep(sock->sk), &wait);
542 		schedule();
543 		remove_wait_queue(sk_sleep(sock->sk), &wait);
544 		set_current_state(TASK_RUNNING);
545 		rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
546 	}
547 
548 	if (rc != 0)
549 		goto failed;
550 
551 	*newsockp = newsock;
552 	return 0;
553 
554 failed:
555 	sock_release(newsock);
556 	return rc;
557 }
558 EXPORT_SYMBOL(lnet_sock_accept);
559 
560 int
lnet_sock_connect(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port,__u32 peer_ip,int peer_port)561 lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
562 		  int local_port, __u32 peer_ip, int peer_port)
563 {
564 	struct sockaddr_in srvaddr;
565 	int rc;
566 
567 	rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
568 	if (rc != 0)
569 		return rc;
570 
571 	memset(&srvaddr, 0, sizeof(srvaddr));
572 	srvaddr.sin_family = AF_INET;
573 	srvaddr.sin_port = htons(peer_port);
574 	srvaddr.sin_addr.s_addr = htonl(peer_ip);
575 
576 	rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
577 			    sizeof(srvaddr), 0);
578 	if (rc == 0)
579 		return 0;
580 
581 	/* EADDRNOTAVAIL probably means we're already connected to the same
582 	 * peer/port on the same local port on a differently typed
583 	 * connection.  Let our caller retry with a different local
584 	 * port... */
585 	*fatal = !(rc == -EADDRNOTAVAIL);
586 
587 	CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
588 		     "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
589 		     &local_ip, local_port, &peer_ip, peer_port);
590 
591 	sock_release(*sockp);
592 	return rc;
593 }
594 EXPORT_SYMBOL(lnet_sock_connect);
595