1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
19 *
20 * GPL HEADER END
21 */
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright (c) 2012, 2015 Intel Corporation.
27 */
28 /*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Seagate, Inc.
31 */
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/if.h>
35 #include <linux/in.h>
36 #include <linux/net.h>
37 #include <linux/file.h>
38 #include <linux/pagemap.h>
39 /* For sys_open & sys_close */
40 #include <linux/syscalls.h>
41 #include <net/sock.h>
42
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include "../../include/linux/lnet/lib-lnet.h"
45
46 static int
kernel_sock_unlocked_ioctl(struct file * filp,int cmd,unsigned long arg)47 kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
48 {
49 mm_segment_t oldfs = get_fs();
50 int err;
51
52 set_fs(KERNEL_DS);
53 err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
54 set_fs(oldfs);
55
56 return err;
57 }
58
59 static int
lnet_sock_ioctl(int cmd,unsigned long arg)60 lnet_sock_ioctl(int cmd, unsigned long arg)
61 {
62 struct file *sock_filp;
63 struct socket *sock;
64 int rc;
65
66 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
67 if (rc != 0) {
68 CERROR("Can't create socket: %d\n", rc);
69 return rc;
70 }
71
72 sock_filp = sock_alloc_file(sock, 0, NULL);
73 if (IS_ERR(sock_filp)) {
74 sock_release(sock);
75 rc = PTR_ERR(sock_filp);
76 goto out;
77 }
78
79 rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
80
81 fput(sock_filp);
82 out:
83 return rc;
84 }
85
86 int
lnet_ipif_query(char * name,int * up,__u32 * ip,__u32 * mask)87 lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
88 {
89 struct ifreq ifr;
90 int nob;
91 int rc;
92 __u32 val;
93
94 nob = strnlen(name, IFNAMSIZ);
95 if (nob == IFNAMSIZ) {
96 CERROR("Interface name %s too long\n", name);
97 return -EINVAL;
98 }
99
100 CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
101
102 strcpy(ifr.ifr_name, name);
103 rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
104 if (rc != 0) {
105 CERROR("Can't get flags for interface %s\n", name);
106 return rc;
107 }
108
109 if ((ifr.ifr_flags & IFF_UP) == 0) {
110 CDEBUG(D_NET, "Interface %s down\n", name);
111 *up = 0;
112 *ip = *mask = 0;
113 return 0;
114 }
115 *up = 1;
116
117 strcpy(ifr.ifr_name, name);
118 ifr.ifr_addr.sa_family = AF_INET;
119 rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
120 if (rc != 0) {
121 CERROR("Can't get IP address for interface %s\n", name);
122 return rc;
123 }
124
125 val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
126 *ip = ntohl(val);
127
128 strcpy(ifr.ifr_name, name);
129 ifr.ifr_addr.sa_family = AF_INET;
130 rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
131 if (rc != 0) {
132 CERROR("Can't get netmask for interface %s\n", name);
133 return rc;
134 }
135
136 val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
137 *mask = ntohl(val);
138
139 return 0;
140 }
141 EXPORT_SYMBOL(lnet_ipif_query);
142
143 int
lnet_ipif_enumerate(char *** namesp)144 lnet_ipif_enumerate(char ***namesp)
145 {
146 /* Allocate and fill in 'names', returning # interfaces/error */
147 char **names;
148 int toobig;
149 int nalloc;
150 int nfound;
151 struct ifreq *ifr;
152 struct ifconf ifc;
153 int rc;
154 int nob;
155 int i;
156
157 nalloc = 16; /* first guess at max interfaces */
158 toobig = 0;
159 for (;;) {
160 if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) {
161 toobig = 1;
162 nalloc = PAGE_CACHE_SIZE/sizeof(*ifr);
163 CWARN("Too many interfaces: only enumerating first %d\n",
164 nalloc);
165 }
166
167 LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
168 if (ifr == NULL) {
169 CERROR("ENOMEM enumerating up to %d interfaces\n",
170 nalloc);
171 rc = -ENOMEM;
172 goto out0;
173 }
174
175 ifc.ifc_buf = (char *)ifr;
176 ifc.ifc_len = nalloc * sizeof(*ifr);
177
178 rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
179 if (rc < 0) {
180 CERROR("Error %d enumerating interfaces\n", rc);
181 goto out1;
182 }
183
184 LASSERT(rc == 0);
185
186 nfound = ifc.ifc_len/sizeof(*ifr);
187 LASSERT(nfound <= nalloc);
188
189 if (nfound < nalloc || toobig)
190 break;
191
192 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
193 nalloc *= 2;
194 }
195
196 if (nfound == 0)
197 goto out1;
198
199 LIBCFS_ALLOC(names, nfound * sizeof(*names));
200 if (names == NULL) {
201 rc = -ENOMEM;
202 goto out1;
203 }
204
205 for (i = 0; i < nfound; i++) {
206 nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
207 if (nob == IFNAMSIZ) {
208 /* no space for terminating NULL */
209 CERROR("interface name %.*s too long (%d max)\n",
210 nob, ifr[i].ifr_name, IFNAMSIZ);
211 rc = -ENAMETOOLONG;
212 goto out2;
213 }
214
215 LIBCFS_ALLOC(names[i], IFNAMSIZ);
216 if (names[i] == NULL) {
217 rc = -ENOMEM;
218 goto out2;
219 }
220
221 memcpy(names[i], ifr[i].ifr_name, nob);
222 names[i][nob] = 0;
223 }
224
225 *namesp = names;
226 rc = nfound;
227
228 out2:
229 if (rc < 0)
230 lnet_ipif_free_enumeration(names, nfound);
231 out1:
232 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
233 out0:
234 return rc;
235 }
236 EXPORT_SYMBOL(lnet_ipif_enumerate);
237
238 void
lnet_ipif_free_enumeration(char ** names,int n)239 lnet_ipif_free_enumeration(char **names, int n)
240 {
241 int i;
242
243 LASSERT(n > 0);
244
245 for (i = 0; i < n && names[i] != NULL; i++)
246 LIBCFS_FREE(names[i], IFNAMSIZ);
247
248 LIBCFS_FREE(names, n * sizeof(*names));
249 }
250 EXPORT_SYMBOL(lnet_ipif_free_enumeration);
251
252 int
lnet_sock_write(struct socket * sock,void * buffer,int nob,int timeout)253 lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
254 {
255 int rc;
256 long ticks = timeout * HZ;
257 unsigned long then;
258 struct timeval tv;
259
260 LASSERT(nob > 0);
261 /* Caller may pass a zero timeout if she thinks the socket buffer is
262 * empty enough to take the whole message immediately */
263
264 for (;;) {
265 struct kvec iov = {
266 .iov_base = buffer,
267 .iov_len = nob
268 };
269 struct msghdr msg = {
270 .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
271 };
272
273 if (timeout != 0) {
274 /* Set send timeout to remaining time */
275 tv = (struct timeval) {
276 .tv_sec = ticks / HZ,
277 .tv_usec = ((ticks % HZ) * 1000000) / HZ
278 };
279 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
280 (char *)&tv, sizeof(tv));
281 if (rc != 0) {
282 CERROR("Can't set socket send timeout %ld.%06d: %d\n",
283 (long)tv.tv_sec, (int)tv.tv_usec, rc);
284 return rc;
285 }
286 }
287
288 then = jiffies;
289 rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
290 ticks -= jiffies - then;
291
292 if (rc == nob)
293 return 0;
294
295 if (rc < 0)
296 return rc;
297
298 if (rc == 0) {
299 CERROR("Unexpected zero rc\n");
300 return -ECONNABORTED;
301 }
302
303 if (ticks <= 0)
304 return -EAGAIN;
305
306 buffer = ((char *)buffer) + rc;
307 nob -= rc;
308 }
309 return 0;
310 }
311 EXPORT_SYMBOL(lnet_sock_write);
312
313 int
lnet_sock_read(struct socket * sock,void * buffer,int nob,int timeout)314 lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
315 {
316 int rc;
317 long ticks = timeout * HZ;
318 unsigned long then;
319 struct timeval tv;
320
321 LASSERT(nob > 0);
322 LASSERT(ticks > 0);
323
324 for (;;) {
325 struct kvec iov = {
326 .iov_base = buffer,
327 .iov_len = nob
328 };
329 struct msghdr msg = {
330 .msg_flags = 0
331 };
332
333 /* Set receive timeout to remaining time */
334 tv = (struct timeval) {
335 .tv_sec = ticks / HZ,
336 .tv_usec = ((ticks % HZ) * 1000000) / HZ
337 };
338 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
339 (char *)&tv, sizeof(tv));
340 if (rc != 0) {
341 CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
342 (long)tv.tv_sec, (int)tv.tv_usec, rc);
343 return rc;
344 }
345
346 then = jiffies;
347 rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
348 ticks -= jiffies - then;
349
350 if (rc < 0)
351 return rc;
352
353 if (rc == 0)
354 return -ECONNRESET;
355
356 buffer = ((char *)buffer) + rc;
357 nob -= rc;
358
359 if (nob == 0)
360 return 0;
361
362 if (ticks <= 0)
363 return -ETIMEDOUT;
364 }
365 }
366 EXPORT_SYMBOL(lnet_sock_read);
367
368 static int
lnet_sock_create(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port)369 lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
370 int local_port)
371 {
372 struct sockaddr_in locaddr;
373 struct socket *sock;
374 int rc;
375 int option;
376
377 /* All errors are fatal except bind failure if the port is in use */
378 *fatal = 1;
379
380 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
381 *sockp = sock;
382 if (rc != 0) {
383 CERROR("Can't create socket: %d\n", rc);
384 return rc;
385 }
386
387 option = 1;
388 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
389 (char *)&option, sizeof(option));
390 if (rc != 0) {
391 CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
392 goto failed;
393 }
394
395 if (local_ip != 0 || local_port != 0) {
396 memset(&locaddr, 0, sizeof(locaddr));
397 locaddr.sin_family = AF_INET;
398 locaddr.sin_port = htons(local_port);
399 locaddr.sin_addr.s_addr = (local_ip == 0) ?
400 INADDR_ANY : htonl(local_ip);
401
402 rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
403 sizeof(locaddr));
404 if (rc == -EADDRINUSE) {
405 CDEBUG(D_NET, "Port %d already in use\n", local_port);
406 *fatal = 0;
407 goto failed;
408 }
409 if (rc != 0) {
410 CERROR("Error trying to bind to port %d: %d\n",
411 local_port, rc);
412 goto failed;
413 }
414 }
415 return 0;
416
417 failed:
418 sock_release(sock);
419 return rc;
420 }
421
422 int
lnet_sock_setbuf(struct socket * sock,int txbufsize,int rxbufsize)423 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
424 {
425 int option;
426 int rc;
427
428 if (txbufsize != 0) {
429 option = txbufsize;
430 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
431 (char *)&option, sizeof(option));
432 if (rc != 0) {
433 CERROR("Can't set send buffer %d: %d\n",
434 option, rc);
435 return rc;
436 }
437 }
438
439 if (rxbufsize != 0) {
440 option = rxbufsize;
441 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
442 (char *)&option, sizeof(option));
443 if (rc != 0) {
444 CERROR("Can't set receive buffer %d: %d\n",
445 option, rc);
446 return rc;
447 }
448 }
449 return 0;
450 }
451 EXPORT_SYMBOL(lnet_sock_setbuf);
452
453 int
lnet_sock_getaddr(struct socket * sock,bool remote,__u32 * ip,int * port)454 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
455 {
456 struct sockaddr_in sin;
457 int len = sizeof(sin);
458 int rc;
459
460 if (remote)
461 rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
462 else
463 rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
464 if (rc != 0) {
465 CERROR("Error %d getting sock %s IP/port\n",
466 rc, remote ? "peer" : "local");
467 return rc;
468 }
469
470 if (ip != NULL)
471 *ip = ntohl(sin.sin_addr.s_addr);
472
473 if (port != NULL)
474 *port = ntohs(sin.sin_port);
475
476 return 0;
477 }
478 EXPORT_SYMBOL(lnet_sock_getaddr);
479
480 int
lnet_sock_getbuf(struct socket * sock,int * txbufsize,int * rxbufsize)481 lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
482 {
483 if (txbufsize != NULL)
484 *txbufsize = sock->sk->sk_sndbuf;
485
486 if (rxbufsize != NULL)
487 *rxbufsize = sock->sk->sk_rcvbuf;
488
489 return 0;
490 }
491 EXPORT_SYMBOL(lnet_sock_getbuf);
492
493 int
lnet_sock_listen(struct socket ** sockp,__u32 local_ip,int local_port,int backlog)494 lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
495 int backlog)
496 {
497 int fatal;
498 int rc;
499
500 rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
501 if (rc != 0) {
502 if (!fatal)
503 CERROR("Can't create socket: port %d already in use\n",
504 local_port);
505 return rc;
506 }
507
508 rc = kernel_listen(*sockp, backlog);
509 if (rc == 0)
510 return 0;
511
512 CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
513 sock_release(*sockp);
514 return rc;
515 }
516 EXPORT_SYMBOL(lnet_sock_listen);
517
518 int
lnet_sock_accept(struct socket ** newsockp,struct socket * sock)519 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
520 {
521 wait_queue_t wait;
522 struct socket *newsock;
523 int rc;
524
525 init_waitqueue_entry(&wait, current);
526
527 /* XXX this should add a ref to sock->ops->owner, if
528 * TCP could be a module */
529 rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
530 if (rc) {
531 CERROR("Can't allocate socket\n");
532 return rc;
533 }
534
535 newsock->ops = sock->ops;
536
537 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
538 if (rc == -EAGAIN) {
539 /* Nothing ready, so wait for activity */
540 set_current_state(TASK_INTERRUPTIBLE);
541 add_wait_queue(sk_sleep(sock->sk), &wait);
542 schedule();
543 remove_wait_queue(sk_sleep(sock->sk), &wait);
544 set_current_state(TASK_RUNNING);
545 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
546 }
547
548 if (rc != 0)
549 goto failed;
550
551 *newsockp = newsock;
552 return 0;
553
554 failed:
555 sock_release(newsock);
556 return rc;
557 }
558 EXPORT_SYMBOL(lnet_sock_accept);
559
560 int
lnet_sock_connect(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port,__u32 peer_ip,int peer_port)561 lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
562 int local_port, __u32 peer_ip, int peer_port)
563 {
564 struct sockaddr_in srvaddr;
565 int rc;
566
567 rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
568 if (rc != 0)
569 return rc;
570
571 memset(&srvaddr, 0, sizeof(srvaddr));
572 srvaddr.sin_family = AF_INET;
573 srvaddr.sin_port = htons(peer_port);
574 srvaddr.sin_addr.s_addr = htonl(peer_ip);
575
576 rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
577 sizeof(srvaddr), 0);
578 if (rc == 0)
579 return 0;
580
581 /* EADDRNOTAVAIL probably means we're already connected to the same
582 * peer/port on the same local port on a differently typed
583 * connection. Let our caller retry with a different local
584 * port... */
585 *fatal = !(rc == -EADDRNOTAVAIL);
586
587 CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
588 "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
589 &local_ip, local_port, &peer_ip, peer_port);
590
591 sock_release(*sockp);
592 return rc;
593 }
594 EXPORT_SYMBOL(lnet_sock_connect);
595