This source file includes following definitions.
- sk_ns_capable
- sk_capable
- sk_net_capable
- sk_set_memalloc
- sk_clear_memalloc
- __sk_backlog_rcv
- sock_get_timeout
- sock_set_timeout
- sock_warn_obsolete_bsdism
- sock_needs_netstamp
- sock_disable_timestamp
- __sock_queue_rcv_skb
- sock_queue_rcv_skb
- __sk_receive_skb
- __sk_dst_check
- sk_dst_check
- sock_setbindtodevice_locked
- sock_setbindtodevice
- sock_getbindtodevice
- sock_valbool_flag
- sk_mc_loop
- sock_setsockopt
- cred_to_ucred
- groups_to_user
- sock_getsockopt
- sock_lock_init
- sock_copy
- sk_prot_alloc
- sk_prot_free
- sk_alloc
- __sk_destruct
- sk_destruct
- __sk_free
- sk_free
- sk_init_common
- sk_clone_lock
- sk_free_unlock_clone
- sk_setup_caps
- sock_wfree
- __sock_wfree
- skb_set_owner_w
- can_skb_orphan_partial
- skb_orphan_partial
- sock_rfree
- sock_efree
- sock_i_uid
- sock_i_ino
- sock_wmalloc
- sock_ofree
- sock_omalloc
- sock_kmalloc
- __sock_kfree_s
- sock_kfree_s
- sock_kzfree_s
- sock_wait_for_wmem
- sock_alloc_send_pskb
- sock_alloc_send_skb
- __sock_cmsg_send
- sock_cmsg_send
- sk_enter_memory_pressure
- sk_leave_memory_pressure
- skb_page_frag_refill
- sk_page_frag_refill
- __lock_sock
- __release_sock
- __sk_flush_backlog
- sk_wait_data
- __sk_mem_raise_allocated
- __sk_mem_schedule
- __sk_mem_reduce_allocated
- __sk_mem_reclaim
- sk_set_peek_off
- sock_no_bind
- sock_no_connect
- sock_no_socketpair
- sock_no_accept
- sock_no_getname
- sock_no_ioctl
- sock_no_listen
- sock_no_shutdown
- sock_no_setsockopt
- sock_no_getsockopt
- sock_no_sendmsg
- sock_no_sendmsg_locked
- sock_no_recvmsg
- sock_no_mmap
- sock_no_sendpage
- sock_no_sendpage_locked
- sock_def_wakeup
- sock_def_error_report
- sock_def_readable
- sock_def_write_space
- sock_def_destruct
- sk_send_sigurg
- sk_reset_timer
- sk_stop_timer
- sock_init_data
- lock_sock_nested
- release_sock
- lock_sock_fast
- sock_gettstamp
- sock_enable_timestamp
- sock_recv_errqueue
- sock_common_getsockopt
- compat_sock_common_getsockopt
- sock_common_recvmsg
- sock_common_setsockopt
- compat_sock_common_setsockopt
- sk_common_release
- sk_get_meminfo
- sock_prot_inuse_add
- sock_prot_inuse_get
- sock_inuse_add
- sock_inuse_get
- sock_inuse_init_net
- sock_inuse_exit_net
- net_inuse_init
- assign_proto_idx
- release_proto_idx
- assign_proto_idx
- release_proto_idx
- sock_inuse_add
- req_prot_cleanup
- req_prot_init
- proto_register
- proto_unregister
- sock_load_diag_module
- proto_seq_start
- proto_seq_next
- proto_seq_stop
- proto_method_implemented
- sock_prot_memory_allocated
- sock_prot_memory_pressure
- proto_seq_printf
- proto_seq_show
- proto_init_net
- proto_exit_net
- proto_init
- sk_busy_loop_end
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87
88 #include <asm/unaligned.h>
89 #include <linux/capability.h>
90 #include <linux/errno.h>
91 #include <linux/errqueue.h>
92 #include <linux/types.h>
93 #include <linux/socket.h>
94 #include <linux/in.h>
95 #include <linux/kernel.h>
96 #include <linux/module.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/init.h>
111 #include <linux/highmem.h>
112 #include <linux/user_namespace.h>
113 #include <linux/static_key.h>
114 #include <linux/memcontrol.h>
115 #include <linux/prefetch.h>
116
117 #include <linux/uaccess.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/net_namespace.h>
123 #include <net/request_sock.h>
124 #include <net/sock.h>
125 #include <linux/net_tstamp.h>
126 #include <net/xfrm.h>
127 #include <linux/ipsec.h>
128 #include <net/cls_cgroup.h>
129 #include <net/netprio_cgroup.h>
130 #include <linux/sock_diag.h>
131
132 #include <linux/filter.h>
133 #include <net/sock_reuseport.h>
134 #include <net/bpf_sk_storage.h>
135
136 #include <trace/events/sock.h>
137
138 #include <net/tcp.h>
139 #include <net/busy_poll.h>
140
141 static DEFINE_MUTEX(proto_list_mutex);
142 static LIST_HEAD(proto_list);
143
144 static void sock_inuse_add(struct net *net, int val);
145
146
147
148
149
150
151
152
153
154
155
156 bool sk_ns_capable(const struct sock *sk,
157 struct user_namespace *user_ns, int cap)
158 {
159 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
160 ns_capable(user_ns, cap);
161 }
162 EXPORT_SYMBOL(sk_ns_capable);
163
164
165
166
167
168
169
170
171
172
173 bool sk_capable(const struct sock *sk, int cap)
174 {
175 return sk_ns_capable(sk, &init_user_ns, cap);
176 }
177 EXPORT_SYMBOL(sk_capable);
178
179
180
181
182
183
184
185
186
187
188 bool sk_net_capable(const struct sock *sk, int cap)
189 {
190 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
191 }
192 EXPORT_SYMBOL(sk_net_capable);
193
194
195
196
197
198
199 static struct lock_class_key af_family_keys[AF_MAX];
200 static struct lock_class_key af_family_kern_keys[AF_MAX];
201 static struct lock_class_key af_family_slock_keys[AF_MAX];
202 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
203
204
205
206
207
208
209
210 #define _sock_locks(x) \
211 x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
212 x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
213 x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
214 x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
215 x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
216 x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
217 x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
218 x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
219 x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
220 x "27" , x "28" , x "AF_CAN" , \
221 x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
222 x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
223 x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
224 x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
225 x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
226 x "AF_MAX"
227
228 static const char *const af_family_key_strings[AF_MAX+1] = {
229 _sock_locks("sk_lock-")
230 };
231 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
232 _sock_locks("slock-")
233 };
234 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
235 _sock_locks("clock-")
236 };
237
238 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
239 _sock_locks("k-sk_lock-")
240 };
241 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
242 _sock_locks("k-slock-")
243 };
244 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
245 _sock_locks("k-clock-")
246 };
247 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
248 _sock_locks("rlock-")
249 };
250 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
251 _sock_locks("wlock-")
252 };
253 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
254 _sock_locks("elock-")
255 };
256
257
258
259
260
261 static struct lock_class_key af_callback_keys[AF_MAX];
262 static struct lock_class_key af_rlock_keys[AF_MAX];
263 static struct lock_class_key af_wlock_keys[AF_MAX];
264 static struct lock_class_key af_elock_keys[AF_MAX];
265 static struct lock_class_key af_kern_callback_keys[AF_MAX];
266
267
268 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
269 EXPORT_SYMBOL(sysctl_wmem_max);
270 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
271 EXPORT_SYMBOL(sysctl_rmem_max);
272 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
273 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
274
275
276 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
277 EXPORT_SYMBOL(sysctl_optmem_max);
278
279 int sysctl_tstamp_allow_data __read_mostly = 1;
280
281 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
282 EXPORT_SYMBOL_GPL(memalloc_socks_key);
283
284
285
286
287
288
289
290
291
292 void sk_set_memalloc(struct sock *sk)
293 {
294 sock_set_flag(sk, SOCK_MEMALLOC);
295 sk->sk_allocation |= __GFP_MEMALLOC;
296 static_branch_inc(&memalloc_socks_key);
297 }
298 EXPORT_SYMBOL_GPL(sk_set_memalloc);
299
300 void sk_clear_memalloc(struct sock *sk)
301 {
302 sock_reset_flag(sk, SOCK_MEMALLOC);
303 sk->sk_allocation &= ~__GFP_MEMALLOC;
304 static_branch_dec(&memalloc_socks_key);
305
306
307
308
309
310
311
312
313 sk_mem_reclaim(sk);
314 }
315 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
316
317 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
318 {
319 int ret;
320 unsigned int noreclaim_flag;
321
322
323 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
324
325 noreclaim_flag = memalloc_noreclaim_save();
326 ret = sk->sk_backlog_rcv(sk, skb);
327 memalloc_noreclaim_restore(noreclaim_flag);
328
329 return ret;
330 }
331 EXPORT_SYMBOL(__sk_backlog_rcv);
332
333 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
334 {
335 struct __kernel_sock_timeval tv;
336 int size;
337
338 if (timeo == MAX_SCHEDULE_TIMEOUT) {
339 tv.tv_sec = 0;
340 tv.tv_usec = 0;
341 } else {
342 tv.tv_sec = timeo / HZ;
343 tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
344 }
345
346 if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
347 struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
348 *(struct old_timeval32 *)optval = tv32;
349 return sizeof(tv32);
350 }
351
352 if (old_timeval) {
353 struct __kernel_old_timeval old_tv;
354 old_tv.tv_sec = tv.tv_sec;
355 old_tv.tv_usec = tv.tv_usec;
356 *(struct __kernel_old_timeval *)optval = old_tv;
357 size = sizeof(old_tv);
358 } else {
359 *(struct __kernel_sock_timeval *)optval = tv;
360 size = sizeof(tv);
361 }
362
363 return size;
364 }
365
366 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
367 {
368 struct __kernel_sock_timeval tv;
369
370 if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
371 struct old_timeval32 tv32;
372
373 if (optlen < sizeof(tv32))
374 return -EINVAL;
375
376 if (copy_from_user(&tv32, optval, sizeof(tv32)))
377 return -EFAULT;
378 tv.tv_sec = tv32.tv_sec;
379 tv.tv_usec = tv32.tv_usec;
380 } else if (old_timeval) {
381 struct __kernel_old_timeval old_tv;
382
383 if (optlen < sizeof(old_tv))
384 return -EINVAL;
385 if (copy_from_user(&old_tv, optval, sizeof(old_tv)))
386 return -EFAULT;
387 tv.tv_sec = old_tv.tv_sec;
388 tv.tv_usec = old_tv.tv_usec;
389 } else {
390 if (optlen < sizeof(tv))
391 return -EINVAL;
392 if (copy_from_user(&tv, optval, sizeof(tv)))
393 return -EFAULT;
394 }
395 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
396 return -EDOM;
397
398 if (tv.tv_sec < 0) {
399 static int warned __read_mostly;
400
401 *timeo_p = 0;
402 if (warned < 10 && net_ratelimit()) {
403 warned++;
404 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
405 __func__, current->comm, task_pid_nr(current));
406 }
407 return 0;
408 }
409 *timeo_p = MAX_SCHEDULE_TIMEOUT;
410 if (tv.tv_sec == 0 && tv.tv_usec == 0)
411 return 0;
412 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
413 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
414 return 0;
415 }
416
417 static void sock_warn_obsolete_bsdism(const char *name)
418 {
419 static int warned;
420 static char warncomm[TASK_COMM_LEN];
421 if (strcmp(warncomm, current->comm) && warned < 5) {
422 strcpy(warncomm, current->comm);
423 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
424 warncomm, name);
425 warned++;
426 }
427 }
428
429 static bool sock_needs_netstamp(const struct sock *sk)
430 {
431 switch (sk->sk_family) {
432 case AF_UNSPEC:
433 case AF_UNIX:
434 return false;
435 default:
436 return true;
437 }
438 }
439
440 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
441 {
442 if (sk->sk_flags & flags) {
443 sk->sk_flags &= ~flags;
444 if (sock_needs_netstamp(sk) &&
445 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
446 net_disable_timestamp();
447 }
448 }
449
450
451 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
452 {
453 unsigned long flags;
454 struct sk_buff_head *list = &sk->sk_receive_queue;
455
456 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
457 atomic_inc(&sk->sk_drops);
458 trace_sock_rcvqueue_full(sk, skb);
459 return -ENOMEM;
460 }
461
462 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
463 atomic_inc(&sk->sk_drops);
464 return -ENOBUFS;
465 }
466
467 skb->dev = NULL;
468 skb_set_owner_r(skb, sk);
469
470
471
472
473 skb_dst_force(skb);
474
475 spin_lock_irqsave(&list->lock, flags);
476 sock_skb_set_dropcount(sk, skb);
477 __skb_queue_tail(list, skb);
478 spin_unlock_irqrestore(&list->lock, flags);
479
480 if (!sock_flag(sk, SOCK_DEAD))
481 sk->sk_data_ready(sk);
482 return 0;
483 }
484 EXPORT_SYMBOL(__sock_queue_rcv_skb);
485
486 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
487 {
488 int err;
489
490 err = sk_filter(sk, skb);
491 if (err)
492 return err;
493
494 return __sock_queue_rcv_skb(sk, skb);
495 }
496 EXPORT_SYMBOL(sock_queue_rcv_skb);
497
498 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
499 const int nested, unsigned int trim_cap, bool refcounted)
500 {
501 int rc = NET_RX_SUCCESS;
502
503 if (sk_filter_trim_cap(sk, skb, trim_cap))
504 goto discard_and_relse;
505
506 skb->dev = NULL;
507
508 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
509 atomic_inc(&sk->sk_drops);
510 goto discard_and_relse;
511 }
512 if (nested)
513 bh_lock_sock_nested(sk);
514 else
515 bh_lock_sock(sk);
516 if (!sock_owned_by_user(sk)) {
517
518
519
520 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
521
522 rc = sk_backlog_rcv(sk, skb);
523
524 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
525 } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
526 bh_unlock_sock(sk);
527 atomic_inc(&sk->sk_drops);
528 goto discard_and_relse;
529 }
530
531 bh_unlock_sock(sk);
532 out:
533 if (refcounted)
534 sock_put(sk);
535 return rc;
536 discard_and_relse:
537 kfree_skb(skb);
538 goto out;
539 }
540 EXPORT_SYMBOL(__sk_receive_skb);
541
542 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
543 {
544 struct dst_entry *dst = __sk_dst_get(sk);
545
546 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
547 sk_tx_queue_clear(sk);
548 sk->sk_dst_pending_confirm = 0;
549 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
550 dst_release(dst);
551 return NULL;
552 }
553
554 return dst;
555 }
556 EXPORT_SYMBOL(__sk_dst_check);
557
558 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
559 {
560 struct dst_entry *dst = sk_dst_get(sk);
561
562 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
563 sk_dst_reset(sk);
564 dst_release(dst);
565 return NULL;
566 }
567
568 return dst;
569 }
570 EXPORT_SYMBOL(sk_dst_check);
571
572 static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
573 {
574 int ret = -ENOPROTOOPT;
575 #ifdef CONFIG_NETDEVICES
576 struct net *net = sock_net(sk);
577
578
579 ret = -EPERM;
580 if (!ns_capable(net->user_ns, CAP_NET_RAW))
581 goto out;
582
583 ret = -EINVAL;
584 if (ifindex < 0)
585 goto out;
586
587 sk->sk_bound_dev_if = ifindex;
588 if (sk->sk_prot->rehash)
589 sk->sk_prot->rehash(sk);
590 sk_dst_reset(sk);
591
592 ret = 0;
593
594 out:
595 #endif
596
597 return ret;
598 }
599
600 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
601 int optlen)
602 {
603 int ret = -ENOPROTOOPT;
604 #ifdef CONFIG_NETDEVICES
605 struct net *net = sock_net(sk);
606 char devname[IFNAMSIZ];
607 int index;
608
609 ret = -EINVAL;
610 if (optlen < 0)
611 goto out;
612
613
614
615
616
617
618 if (optlen > IFNAMSIZ - 1)
619 optlen = IFNAMSIZ - 1;
620 memset(devname, 0, sizeof(devname));
621
622 ret = -EFAULT;
623 if (copy_from_user(devname, optval, optlen))
624 goto out;
625
626 index = 0;
627 if (devname[0] != '\0') {
628 struct net_device *dev;
629
630 rcu_read_lock();
631 dev = dev_get_by_name_rcu(net, devname);
632 if (dev)
633 index = dev->ifindex;
634 rcu_read_unlock();
635 ret = -ENODEV;
636 if (!dev)
637 goto out;
638 }
639
640 lock_sock(sk);
641 ret = sock_setbindtodevice_locked(sk, index);
642 release_sock(sk);
643
644 out:
645 #endif
646
647 return ret;
648 }
649
650 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
651 int __user *optlen, int len)
652 {
653 int ret = -ENOPROTOOPT;
654 #ifdef CONFIG_NETDEVICES
655 struct net *net = sock_net(sk);
656 char devname[IFNAMSIZ];
657
658 if (sk->sk_bound_dev_if == 0) {
659 len = 0;
660 goto zero;
661 }
662
663 ret = -EINVAL;
664 if (len < IFNAMSIZ)
665 goto out;
666
667 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
668 if (ret)
669 goto out;
670
671 len = strlen(devname) + 1;
672
673 ret = -EFAULT;
674 if (copy_to_user(optval, devname, len))
675 goto out;
676
677 zero:
678 ret = -EFAULT;
679 if (put_user(len, optlen))
680 goto out;
681
682 ret = 0;
683
684 out:
685 #endif
686
687 return ret;
688 }
689
690 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
691 {
692 if (valbool)
693 sock_set_flag(sk, bit);
694 else
695 sock_reset_flag(sk, bit);
696 }
697
698 bool sk_mc_loop(struct sock *sk)
699 {
700 if (dev_recursion_level())
701 return false;
702 if (!sk)
703 return true;
704 switch (sk->sk_family) {
705 case AF_INET:
706 return inet_sk(sk)->mc_loop;
707 #if IS_ENABLED(CONFIG_IPV6)
708 case AF_INET6:
709 return inet6_sk(sk)->mc_loop;
710 #endif
711 }
712 WARN_ON(1);
713 return true;
714 }
715 EXPORT_SYMBOL(sk_mc_loop);
716
717
718
719
720
721
722 int sock_setsockopt(struct socket *sock, int level, int optname,
723 char __user *optval, unsigned int optlen)
724 {
725 struct sock_txtime sk_txtime;
726 struct sock *sk = sock->sk;
727 int val;
728 int valbool;
729 struct linger ling;
730 int ret = 0;
731
732
733
734
735
736 if (optname == SO_BINDTODEVICE)
737 return sock_setbindtodevice(sk, optval, optlen);
738
739 if (optlen < sizeof(int))
740 return -EINVAL;
741
742 if (get_user(val, (int __user *)optval))
743 return -EFAULT;
744
745 valbool = val ? 1 : 0;
746
747 lock_sock(sk);
748
749 switch (optname) {
750 case SO_DEBUG:
751 if (val && !capable(CAP_NET_ADMIN))
752 ret = -EACCES;
753 else
754 sock_valbool_flag(sk, SOCK_DBG, valbool);
755 break;
756 case SO_REUSEADDR:
757 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
758 break;
759 case SO_REUSEPORT:
760 sk->sk_reuseport = valbool;
761 break;
762 case SO_TYPE:
763 case SO_PROTOCOL:
764 case SO_DOMAIN:
765 case SO_ERROR:
766 ret = -ENOPROTOOPT;
767 break;
768 case SO_DONTROUTE:
769 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
770 sk_dst_reset(sk);
771 break;
772 case SO_BROADCAST:
773 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
774 break;
775 case SO_SNDBUF:
776
777
778
779
780
781 val = min_t(u32, val, sysctl_wmem_max);
782 set_sndbuf:
783
784
785
786 val = min_t(int, val, INT_MAX / 2);
787 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
788 WRITE_ONCE(sk->sk_sndbuf,
789 max_t(int, val * 2, SOCK_MIN_SNDBUF));
790
791 sk->sk_write_space(sk);
792 break;
793
794 case SO_SNDBUFFORCE:
795 if (!capable(CAP_NET_ADMIN)) {
796 ret = -EPERM;
797 break;
798 }
799
800
801
802
803 if (val < 0)
804 val = 0;
805 goto set_sndbuf;
806
807 case SO_RCVBUF:
808
809
810
811
812
813 val = min_t(u32, val, sysctl_rmem_max);
814 set_rcvbuf:
815
816
817
818 val = min_t(int, val, INT_MAX / 2);
819 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835 WRITE_ONCE(sk->sk_rcvbuf,
836 max_t(int, val * 2, SOCK_MIN_RCVBUF));
837 break;
838
839 case SO_RCVBUFFORCE:
840 if (!capable(CAP_NET_ADMIN)) {
841 ret = -EPERM;
842 break;
843 }
844
845
846
847
848 if (val < 0)
849 val = 0;
850 goto set_rcvbuf;
851
852 case SO_KEEPALIVE:
853 if (sk->sk_prot->keepalive)
854 sk->sk_prot->keepalive(sk, valbool);
855 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
856 break;
857
858 case SO_OOBINLINE:
859 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
860 break;
861
862 case SO_NO_CHECK:
863 sk->sk_no_check_tx = valbool;
864 break;
865
866 case SO_PRIORITY:
867 if ((val >= 0 && val <= 6) ||
868 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
869 sk->sk_priority = val;
870 else
871 ret = -EPERM;
872 break;
873
874 case SO_LINGER:
875 if (optlen < sizeof(ling)) {
876 ret = -EINVAL;
877 break;
878 }
879 if (copy_from_user(&ling, optval, sizeof(ling))) {
880 ret = -EFAULT;
881 break;
882 }
883 if (!ling.l_onoff)
884 sock_reset_flag(sk, SOCK_LINGER);
885 else {
886 #if (BITS_PER_LONG == 32)
887 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
888 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
889 else
890 #endif
891 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
892 sock_set_flag(sk, SOCK_LINGER);
893 }
894 break;
895
896 case SO_BSDCOMPAT:
897 sock_warn_obsolete_bsdism("setsockopt");
898 break;
899
900 case SO_PASSCRED:
901 if (valbool)
902 set_bit(SOCK_PASSCRED, &sock->flags);
903 else
904 clear_bit(SOCK_PASSCRED, &sock->flags);
905 break;
906
907 case SO_TIMESTAMP_OLD:
908 case SO_TIMESTAMP_NEW:
909 case SO_TIMESTAMPNS_OLD:
910 case SO_TIMESTAMPNS_NEW:
911 if (valbool) {
912 if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
913 sock_set_flag(sk, SOCK_TSTAMP_NEW);
914 else
915 sock_reset_flag(sk, SOCK_TSTAMP_NEW);
916
917 if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
918 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
919 else
920 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
921 sock_set_flag(sk, SOCK_RCVTSTAMP);
922 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
923 } else {
924 sock_reset_flag(sk, SOCK_RCVTSTAMP);
925 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
926 sock_reset_flag(sk, SOCK_TSTAMP_NEW);
927 }
928 break;
929
930 case SO_TIMESTAMPING_NEW:
931 sock_set_flag(sk, SOCK_TSTAMP_NEW);
932
933 case SO_TIMESTAMPING_OLD:
934 if (val & ~SOF_TIMESTAMPING_MASK) {
935 ret = -EINVAL;
936 break;
937 }
938
939 if (val & SOF_TIMESTAMPING_OPT_ID &&
940 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
941 if (sk->sk_protocol == IPPROTO_TCP &&
942 sk->sk_type == SOCK_STREAM) {
943 if ((1 << sk->sk_state) &
944 (TCPF_CLOSE | TCPF_LISTEN)) {
945 ret = -EINVAL;
946 break;
947 }
948 sk->sk_tskey = tcp_sk(sk)->snd_una;
949 } else {
950 sk->sk_tskey = 0;
951 }
952 }
953
954 if (val & SOF_TIMESTAMPING_OPT_STATS &&
955 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
956 ret = -EINVAL;
957 break;
958 }
959
960 sk->sk_tsflags = val;
961 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
962 sock_enable_timestamp(sk,
963 SOCK_TIMESTAMPING_RX_SOFTWARE);
964 else {
965 if (optname == SO_TIMESTAMPING_NEW)
966 sock_reset_flag(sk, SOCK_TSTAMP_NEW);
967
968 sock_disable_timestamp(sk,
969 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
970 }
971 break;
972
973 case SO_RCVLOWAT:
974 if (val < 0)
975 val = INT_MAX;
976 if (sock->ops->set_rcvlowat)
977 ret = sock->ops->set_rcvlowat(sk, val);
978 else
979 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
980 break;
981
982 case SO_RCVTIMEO_OLD:
983 case SO_RCVTIMEO_NEW:
984 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
985 break;
986
987 case SO_SNDTIMEO_OLD:
988 case SO_SNDTIMEO_NEW:
989 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
990 break;
991
992 case SO_ATTACH_FILTER:
993 ret = -EINVAL;
994 if (optlen == sizeof(struct sock_fprog)) {
995 struct sock_fprog fprog;
996
997 ret = -EFAULT;
998 if (copy_from_user(&fprog, optval, sizeof(fprog)))
999 break;
1000
1001 ret = sk_attach_filter(&fprog, sk);
1002 }
1003 break;
1004
1005 case SO_ATTACH_BPF:
1006 ret = -EINVAL;
1007 if (optlen == sizeof(u32)) {
1008 u32 ufd;
1009
1010 ret = -EFAULT;
1011 if (copy_from_user(&ufd, optval, sizeof(ufd)))
1012 break;
1013
1014 ret = sk_attach_bpf(ufd, sk);
1015 }
1016 break;
1017
1018 case SO_ATTACH_REUSEPORT_CBPF:
1019 ret = -EINVAL;
1020 if (optlen == sizeof(struct sock_fprog)) {
1021 struct sock_fprog fprog;
1022
1023 ret = -EFAULT;
1024 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1025 break;
1026
1027 ret = sk_reuseport_attach_filter(&fprog, sk);
1028 }
1029 break;
1030
1031 case SO_ATTACH_REUSEPORT_EBPF:
1032 ret = -EINVAL;
1033 if (optlen == sizeof(u32)) {
1034 u32 ufd;
1035
1036 ret = -EFAULT;
1037 if (copy_from_user(&ufd, optval, sizeof(ufd)))
1038 break;
1039
1040 ret = sk_reuseport_attach_bpf(ufd, sk);
1041 }
1042 break;
1043
1044 case SO_DETACH_REUSEPORT_BPF:
1045 ret = reuseport_detach_prog(sk);
1046 break;
1047
1048 case SO_DETACH_FILTER:
1049 ret = sk_detach_filter(sk);
1050 break;
1051
1052 case SO_LOCK_FILTER:
1053 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1054 ret = -EPERM;
1055 else
1056 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1057 break;
1058
1059 case SO_PASSSEC:
1060 if (valbool)
1061 set_bit(SOCK_PASSSEC, &sock->flags);
1062 else
1063 clear_bit(SOCK_PASSSEC, &sock->flags);
1064 break;
1065 case SO_MARK:
1066 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1067 ret = -EPERM;
1068 } else if (val != sk->sk_mark) {
1069 sk->sk_mark = val;
1070 sk_dst_reset(sk);
1071 }
1072 break;
1073
1074 case SO_RXQ_OVFL:
1075 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1076 break;
1077
1078 case SO_WIFI_STATUS:
1079 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1080 break;
1081
1082 case SO_PEEK_OFF:
1083 if (sock->ops->set_peek_off)
1084 ret = sock->ops->set_peek_off(sk, val);
1085 else
1086 ret = -EOPNOTSUPP;
1087 break;
1088
1089 case SO_NOFCS:
1090 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1091 break;
1092
1093 case SO_SELECT_ERR_QUEUE:
1094 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1095 break;
1096
1097 #ifdef CONFIG_NET_RX_BUSY_POLL
1098 case SO_BUSY_POLL:
1099
1100 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1101 ret = -EPERM;
1102 else {
1103 if (val < 0)
1104 ret = -EINVAL;
1105 else
1106 sk->sk_ll_usec = val;
1107 }
1108 break;
1109 #endif
1110
1111 case SO_MAX_PACING_RATE:
1112 {
1113 unsigned long ulval = (val == ~0U) ? ~0UL : val;
1114
1115 if (sizeof(ulval) != sizeof(val) &&
1116 optlen >= sizeof(ulval) &&
1117 get_user(ulval, (unsigned long __user *)optval)) {
1118 ret = -EFAULT;
1119 break;
1120 }
1121 if (ulval != ~0UL)
1122 cmpxchg(&sk->sk_pacing_status,
1123 SK_PACING_NONE,
1124 SK_PACING_NEEDED);
1125 sk->sk_max_pacing_rate = ulval;
1126 sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
1127 break;
1128 }
1129 case SO_INCOMING_CPU:
1130 WRITE_ONCE(sk->sk_incoming_cpu, val);
1131 break;
1132
1133 case SO_CNX_ADVICE:
1134 if (val == 1)
1135 dst_negative_advice(sk);
1136 break;
1137
1138 case SO_ZEROCOPY:
1139 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1140 if (!((sk->sk_type == SOCK_STREAM &&
1141 sk->sk_protocol == IPPROTO_TCP) ||
1142 (sk->sk_type == SOCK_DGRAM &&
1143 sk->sk_protocol == IPPROTO_UDP)))
1144 ret = -ENOTSUPP;
1145 } else if (sk->sk_family != PF_RDS) {
1146 ret = -ENOTSUPP;
1147 }
1148 if (!ret) {
1149 if (val < 0 || val > 1)
1150 ret = -EINVAL;
1151 else
1152 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1153 }
1154 break;
1155
1156 case SO_TXTIME:
1157 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1158 ret = -EPERM;
1159 } else if (optlen != sizeof(struct sock_txtime)) {
1160 ret = -EINVAL;
1161 } else if (copy_from_user(&sk_txtime, optval,
1162 sizeof(struct sock_txtime))) {
1163 ret = -EFAULT;
1164 } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1165 ret = -EINVAL;
1166 } else {
1167 sock_valbool_flag(sk, SOCK_TXTIME, true);
1168 sk->sk_clockid = sk_txtime.clockid;
1169 sk->sk_txtime_deadline_mode =
1170 !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1171 sk->sk_txtime_report_errors =
1172 !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1173 }
1174 break;
1175
1176 case SO_BINDTOIFINDEX:
1177 ret = sock_setbindtodevice_locked(sk, val);
1178 break;
1179
1180 default:
1181 ret = -ENOPROTOOPT;
1182 break;
1183 }
1184 release_sock(sk);
1185 return ret;
1186 }
1187 EXPORT_SYMBOL(sock_setsockopt);
1188
1189
1190 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1191 struct ucred *ucred)
1192 {
1193 ucred->pid = pid_vnr(pid);
1194 ucred->uid = ucred->gid = -1;
1195 if (cred) {
1196 struct user_namespace *current_ns = current_user_ns();
1197
1198 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1199 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1200 }
1201 }
1202
1203 static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1204 {
1205 struct user_namespace *user_ns = current_user_ns();
1206 int i;
1207
1208 for (i = 0; i < src->ngroups; i++)
1209 if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1210 return -EFAULT;
1211
1212 return 0;
1213 }
1214
1215 int sock_getsockopt(struct socket *sock, int level, int optname,
1216 char __user *optval, int __user *optlen)
1217 {
1218 struct sock *sk = sock->sk;
1219
1220 union {
1221 int val;
1222 u64 val64;
1223 unsigned long ulval;
1224 struct linger ling;
1225 struct old_timeval32 tm32;
1226 struct __kernel_old_timeval tm;
1227 struct __kernel_sock_timeval stm;
1228 struct sock_txtime txtime;
1229 } v;
1230
1231 int lv = sizeof(int);
1232 int len;
1233
1234 if (get_user(len, optlen))
1235 return -EFAULT;
1236 if (len < 0)
1237 return -EINVAL;
1238
1239 memset(&v, 0, sizeof(v));
1240
1241 switch (optname) {
1242 case SO_DEBUG:
1243 v.val = sock_flag(sk, SOCK_DBG);
1244 break;
1245
1246 case SO_DONTROUTE:
1247 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1248 break;
1249
1250 case SO_BROADCAST:
1251 v.val = sock_flag(sk, SOCK_BROADCAST);
1252 break;
1253
1254 case SO_SNDBUF:
1255 v.val = sk->sk_sndbuf;
1256 break;
1257
1258 case SO_RCVBUF:
1259 v.val = sk->sk_rcvbuf;
1260 break;
1261
1262 case SO_REUSEADDR:
1263 v.val = sk->sk_reuse;
1264 break;
1265
1266 case SO_REUSEPORT:
1267 v.val = sk->sk_reuseport;
1268 break;
1269
1270 case SO_KEEPALIVE:
1271 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1272 break;
1273
1274 case SO_TYPE:
1275 v.val = sk->sk_type;
1276 break;
1277
1278 case SO_PROTOCOL:
1279 v.val = sk->sk_protocol;
1280 break;
1281
1282 case SO_DOMAIN:
1283 v.val = sk->sk_family;
1284 break;
1285
1286 case SO_ERROR:
1287 v.val = -sock_error(sk);
1288 if (v.val == 0)
1289 v.val = xchg(&sk->sk_err_soft, 0);
1290 break;
1291
1292 case SO_OOBINLINE:
1293 v.val = sock_flag(sk, SOCK_URGINLINE);
1294 break;
1295
1296 case SO_NO_CHECK:
1297 v.val = sk->sk_no_check_tx;
1298 break;
1299
1300 case SO_PRIORITY:
1301 v.val = sk->sk_priority;
1302 break;
1303
1304 case SO_LINGER:
1305 lv = sizeof(v.ling);
1306 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1307 v.ling.l_linger = sk->sk_lingertime / HZ;
1308 break;
1309
1310 case SO_BSDCOMPAT:
1311 sock_warn_obsolete_bsdism("getsockopt");
1312 break;
1313
1314 case SO_TIMESTAMP_OLD:
1315 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1316 !sock_flag(sk, SOCK_TSTAMP_NEW) &&
1317 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1318 break;
1319
1320 case SO_TIMESTAMPNS_OLD:
1321 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1322 break;
1323
1324 case SO_TIMESTAMP_NEW:
1325 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1326 break;
1327
1328 case SO_TIMESTAMPNS_NEW:
1329 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1330 break;
1331
1332 case SO_TIMESTAMPING_OLD:
1333 v.val = sk->sk_tsflags;
1334 break;
1335
1336 case SO_RCVTIMEO_OLD:
1337 case SO_RCVTIMEO_NEW:
1338 lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
1339 break;
1340
1341 case SO_SNDTIMEO_OLD:
1342 case SO_SNDTIMEO_NEW:
1343 lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
1344 break;
1345
1346 case SO_RCVLOWAT:
1347 v.val = sk->sk_rcvlowat;
1348 break;
1349
1350 case SO_SNDLOWAT:
1351 v.val = 1;
1352 break;
1353
1354 case SO_PASSCRED:
1355 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1356 break;
1357
1358 case SO_PEERCRED:
1359 {
1360 struct ucred peercred;
1361 if (len > sizeof(peercred))
1362 len = sizeof(peercred);
1363 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1364 if (copy_to_user(optval, &peercred, len))
1365 return -EFAULT;
1366 goto lenout;
1367 }
1368
1369 case SO_PEERGROUPS:
1370 {
1371 int ret, n;
1372
1373 if (!sk->sk_peer_cred)
1374 return -ENODATA;
1375
1376 n = sk->sk_peer_cred->group_info->ngroups;
1377 if (len < n * sizeof(gid_t)) {
1378 len = n * sizeof(gid_t);
1379 return put_user(len, optlen) ? -EFAULT : -ERANGE;
1380 }
1381 len = n * sizeof(gid_t);
1382
1383 ret = groups_to_user((gid_t __user *)optval,
1384 sk->sk_peer_cred->group_info);
1385 if (ret)
1386 return ret;
1387 goto lenout;
1388 }
1389
1390 case SO_PEERNAME:
1391 {
1392 char address[128];
1393
1394 lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1395 if (lv < 0)
1396 return -ENOTCONN;
1397 if (lv < len)
1398 return -EINVAL;
1399 if (copy_to_user(optval, address, len))
1400 return -EFAULT;
1401 goto lenout;
1402 }
1403
1404
1405
1406
1407 case SO_ACCEPTCONN:
1408 v.val = sk->sk_state == TCP_LISTEN;
1409 break;
1410
1411 case SO_PASSSEC:
1412 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1413 break;
1414
1415 case SO_PEERSEC:
1416 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1417
1418 case SO_MARK:
1419 v.val = sk->sk_mark;
1420 break;
1421
1422 case SO_RXQ_OVFL:
1423 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1424 break;
1425
1426 case SO_WIFI_STATUS:
1427 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1428 break;
1429
1430 case SO_PEEK_OFF:
1431 if (!sock->ops->set_peek_off)
1432 return -EOPNOTSUPP;
1433
1434 v.val = sk->sk_peek_off;
1435 break;
1436 case SO_NOFCS:
1437 v.val = sock_flag(sk, SOCK_NOFCS);
1438 break;
1439
1440 case SO_BINDTODEVICE:
1441 return sock_getbindtodevice(sk, optval, optlen, len);
1442
1443 case SO_GET_FILTER:
1444 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1445 if (len < 0)
1446 return len;
1447
1448 goto lenout;
1449
1450 case SO_LOCK_FILTER:
1451 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1452 break;
1453
1454 case SO_BPF_EXTENSIONS:
1455 v.val = bpf_tell_extensions();
1456 break;
1457
1458 case SO_SELECT_ERR_QUEUE:
1459 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1460 break;
1461
1462 #ifdef CONFIG_NET_RX_BUSY_POLL
1463 case SO_BUSY_POLL:
1464 v.val = sk->sk_ll_usec;
1465 break;
1466 #endif
1467
1468 case SO_MAX_PACING_RATE:
1469 if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1470 lv = sizeof(v.ulval);
1471 v.ulval = sk->sk_max_pacing_rate;
1472 } else {
1473
1474 v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
1475 }
1476 break;
1477
1478 case SO_INCOMING_CPU:
1479 v.val = READ_ONCE(sk->sk_incoming_cpu);
1480 break;
1481
1482 case SO_MEMINFO:
1483 {
1484 u32 meminfo[SK_MEMINFO_VARS];
1485
1486 sk_get_meminfo(sk, meminfo);
1487
1488 len = min_t(unsigned int, len, sizeof(meminfo));
1489 if (copy_to_user(optval, &meminfo, len))
1490 return -EFAULT;
1491
1492 goto lenout;
1493 }
1494
1495 #ifdef CONFIG_NET_RX_BUSY_POLL
1496 case SO_INCOMING_NAPI_ID:
1497 v.val = READ_ONCE(sk->sk_napi_id);
1498
1499
1500 if (v.val < MIN_NAPI_ID)
1501 v.val = 0;
1502
1503 break;
1504 #endif
1505
1506 case SO_COOKIE:
1507 lv = sizeof(u64);
1508 if (len < lv)
1509 return -EINVAL;
1510 v.val64 = sock_gen_cookie(sk);
1511 break;
1512
1513 case SO_ZEROCOPY:
1514 v.val = sock_flag(sk, SOCK_ZEROCOPY);
1515 break;
1516
1517 case SO_TXTIME:
1518 lv = sizeof(v.txtime);
1519 v.txtime.clockid = sk->sk_clockid;
1520 v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1521 SOF_TXTIME_DEADLINE_MODE : 0;
1522 v.txtime.flags |= sk->sk_txtime_report_errors ?
1523 SOF_TXTIME_REPORT_ERRORS : 0;
1524 break;
1525
1526 case SO_BINDTOIFINDEX:
1527 v.val = sk->sk_bound_dev_if;
1528 break;
1529
1530 default:
1531
1532
1533
1534 return -ENOPROTOOPT;
1535 }
1536
1537 if (len > lv)
1538 len = lv;
1539 if (copy_to_user(optval, &v, len))
1540 return -EFAULT;
1541 lenout:
1542 if (put_user(len, optlen))
1543 return -EFAULT;
1544 return 0;
1545 }
1546
1547
1548
1549
1550
1551
1552 static inline void sock_lock_init(struct sock *sk)
1553 {
1554 if (sk->sk_kern_sock)
1555 sock_lock_init_class_and_name(
1556 sk,
1557 af_family_kern_slock_key_strings[sk->sk_family],
1558 af_family_kern_slock_keys + sk->sk_family,
1559 af_family_kern_key_strings[sk->sk_family],
1560 af_family_kern_keys + sk->sk_family);
1561 else
1562 sock_lock_init_class_and_name(
1563 sk,
1564 af_family_slock_key_strings[sk->sk_family],
1565 af_family_slock_keys + sk->sk_family,
1566 af_family_key_strings[sk->sk_family],
1567 af_family_keys + sk->sk_family);
1568 }
1569
1570
1571
1572
1573
1574
1575 static void sock_copy(struct sock *nsk, const struct sock *osk)
1576 {
1577 #ifdef CONFIG_SECURITY_NETWORK
1578 void *sptr = nsk->sk_security;
1579 #endif
1580 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1581
1582 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1583 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1584
1585 #ifdef CONFIG_SECURITY_NETWORK
1586 nsk->sk_security = sptr;
1587 security_sk_clone(osk, nsk);
1588 #endif
1589 }
1590
1591 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1592 int family)
1593 {
1594 struct sock *sk;
1595 struct kmem_cache *slab;
1596
1597 slab = prot->slab;
1598 if (slab != NULL) {
1599 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1600 if (!sk)
1601 return sk;
1602 if (want_init_on_alloc(priority))
1603 sk_prot_clear_nulls(sk, prot->obj_size);
1604 } else
1605 sk = kmalloc(prot->obj_size, priority);
1606
1607 if (sk != NULL) {
1608 if (security_sk_alloc(sk, family, priority))
1609 goto out_free;
1610
1611 if (!try_module_get(prot->owner))
1612 goto out_free_sec;
1613 sk_tx_queue_clear(sk);
1614 }
1615
1616 return sk;
1617
1618 out_free_sec:
1619 security_sk_free(sk);
1620 out_free:
1621 if (slab != NULL)
1622 kmem_cache_free(slab, sk);
1623 else
1624 kfree(sk);
1625 return NULL;
1626 }
1627
1628 static void sk_prot_free(struct proto *prot, struct sock *sk)
1629 {
1630 struct kmem_cache *slab;
1631 struct module *owner;
1632
1633 owner = prot->owner;
1634 slab = prot->slab;
1635
1636 cgroup_sk_free(&sk->sk_cgrp_data);
1637 mem_cgroup_sk_free(sk);
1638 security_sk_free(sk);
1639 if (slab != NULL)
1640 kmem_cache_free(slab, sk);
1641 else
1642 kfree(sk);
1643 module_put(owner);
1644 }
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1655 struct proto *prot, int kern)
1656 {
1657 struct sock *sk;
1658
1659 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1660 if (sk) {
1661 sk->sk_family = family;
1662
1663
1664
1665
1666 sk->sk_prot = sk->sk_prot_creator = prot;
1667 sk->sk_kern_sock = kern;
1668 sock_lock_init(sk);
1669 sk->sk_net_refcnt = kern ? 0 : 1;
1670 if (likely(sk->sk_net_refcnt)) {
1671 get_net(net);
1672 sock_inuse_add(net, 1);
1673 }
1674
1675 sock_net_set(sk, net);
1676 refcount_set(&sk->sk_wmem_alloc, 1);
1677
1678 mem_cgroup_sk_alloc(sk);
1679 cgroup_sk_alloc(&sk->sk_cgrp_data);
1680 sock_update_classid(&sk->sk_cgrp_data);
1681 sock_update_netprioidx(&sk->sk_cgrp_data);
1682 }
1683
1684 return sk;
1685 }
1686 EXPORT_SYMBOL(sk_alloc);
1687
1688
1689
1690
1691 static void __sk_destruct(struct rcu_head *head)
1692 {
1693 struct sock *sk = container_of(head, struct sock, sk_rcu);
1694 struct sk_filter *filter;
1695
1696 if (sk->sk_destruct)
1697 sk->sk_destruct(sk);
1698
1699 filter = rcu_dereference_check(sk->sk_filter,
1700 refcount_read(&sk->sk_wmem_alloc) == 0);
1701 if (filter) {
1702 sk_filter_uncharge(sk, filter);
1703 RCU_INIT_POINTER(sk->sk_filter, NULL);
1704 }
1705
1706 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1707
1708 #ifdef CONFIG_BPF_SYSCALL
1709 bpf_sk_storage_free(sk);
1710 #endif
1711
1712 if (atomic_read(&sk->sk_omem_alloc))
1713 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1714 __func__, atomic_read(&sk->sk_omem_alloc));
1715
1716 if (sk->sk_frag.page) {
1717 put_page(sk->sk_frag.page);
1718 sk->sk_frag.page = NULL;
1719 }
1720
1721 if (sk->sk_peer_cred)
1722 put_cred(sk->sk_peer_cred);
1723 put_pid(sk->sk_peer_pid);
1724 if (likely(sk->sk_net_refcnt))
1725 put_net(sock_net(sk));
1726 sk_prot_free(sk->sk_prot_creator, sk);
1727 }
1728
1729 void sk_destruct(struct sock *sk)
1730 {
1731 bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
1732
1733 if (rcu_access_pointer(sk->sk_reuseport_cb)) {
1734 reuseport_detach_sock(sk);
1735 use_call_rcu = true;
1736 }
1737
1738 if (use_call_rcu)
1739 call_rcu(&sk->sk_rcu, __sk_destruct);
1740 else
1741 __sk_destruct(&sk->sk_rcu);
1742 }
1743
1744 static void __sk_free(struct sock *sk)
1745 {
1746 if (likely(sk->sk_net_refcnt))
1747 sock_inuse_add(sock_net(sk), -1);
1748
1749 if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
1750 sock_diag_broadcast_destroy(sk);
1751 else
1752 sk_destruct(sk);
1753 }
1754
1755 void sk_free(struct sock *sk)
1756 {
1757
1758
1759
1760
1761
1762 if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1763 __sk_free(sk);
1764 }
1765 EXPORT_SYMBOL(sk_free);
1766
1767 static void sk_init_common(struct sock *sk)
1768 {
1769 skb_queue_head_init(&sk->sk_receive_queue);
1770 skb_queue_head_init(&sk->sk_write_queue);
1771 skb_queue_head_init(&sk->sk_error_queue);
1772
1773 rwlock_init(&sk->sk_callback_lock);
1774 lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1775 af_rlock_keys + sk->sk_family,
1776 af_family_rlock_key_strings[sk->sk_family]);
1777 lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1778 af_wlock_keys + sk->sk_family,
1779 af_family_wlock_key_strings[sk->sk_family]);
1780 lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1781 af_elock_keys + sk->sk_family,
1782 af_family_elock_key_strings[sk->sk_family]);
1783 lockdep_set_class_and_name(&sk->sk_callback_lock,
1784 af_callback_keys + sk->sk_family,
1785 af_family_clock_key_strings[sk->sk_family]);
1786 }
1787
1788
1789
1790
1791
1792
1793
1794
1795 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1796 {
1797 struct sock *newsk;
1798 bool is_charged = true;
1799
1800 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1801 if (newsk != NULL) {
1802 struct sk_filter *filter;
1803
1804 sock_copy(newsk, sk);
1805
1806 newsk->sk_prot_creator = sk->sk_prot;
1807
1808
1809 if (likely(newsk->sk_net_refcnt))
1810 get_net(sock_net(newsk));
1811 sk_node_init(&newsk->sk_node);
1812 sock_lock_init(newsk);
1813 bh_lock_sock(newsk);
1814 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1815 newsk->sk_backlog.len = 0;
1816
1817 atomic_set(&newsk->sk_rmem_alloc, 0);
1818
1819
1820
1821 refcount_set(&newsk->sk_wmem_alloc, 1);
1822 atomic_set(&newsk->sk_omem_alloc, 0);
1823 sk_init_common(newsk);
1824
1825 newsk->sk_dst_cache = NULL;
1826 newsk->sk_dst_pending_confirm = 0;
1827 newsk->sk_wmem_queued = 0;
1828 newsk->sk_forward_alloc = 0;
1829 atomic_set(&newsk->sk_drops, 0);
1830 newsk->sk_send_head = NULL;
1831 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1832 atomic_set(&newsk->sk_zckey, 0);
1833
1834 sock_reset_flag(newsk, SOCK_DONE);
1835
1836
1837 newsk->sk_memcg = NULL;
1838
1839 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1840
1841 rcu_read_lock();
1842 filter = rcu_dereference(sk->sk_filter);
1843 if (filter != NULL)
1844
1845
1846
1847
1848 is_charged = sk_filter_charge(newsk, filter);
1849 RCU_INIT_POINTER(newsk->sk_filter, filter);
1850 rcu_read_unlock();
1851
1852 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1853
1854
1855
1856
1857 if (!is_charged)
1858 RCU_INIT_POINTER(newsk->sk_filter, NULL);
1859 sk_free_unlock_clone(newsk);
1860 newsk = NULL;
1861 goto out;
1862 }
1863 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1864
1865 if (bpf_sk_storage_clone(sk, newsk)) {
1866 sk_free_unlock_clone(newsk);
1867 newsk = NULL;
1868 goto out;
1869 }
1870
1871 newsk->sk_err = 0;
1872 newsk->sk_err_soft = 0;
1873 newsk->sk_priority = 0;
1874 newsk->sk_incoming_cpu = raw_smp_processor_id();
1875 if (likely(newsk->sk_net_refcnt))
1876 sock_inuse_add(sock_net(newsk), 1);
1877
1878
1879
1880
1881
1882 smp_wmb();
1883 refcount_set(&newsk->sk_refcnt, 2);
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896 sk_refcnt_debug_inc(newsk);
1897 sk_set_socket(newsk, NULL);
1898 RCU_INIT_POINTER(newsk->sk_wq, NULL);
1899
1900 if (newsk->sk_prot->sockets_allocated)
1901 sk_sockets_allocated_inc(newsk);
1902
1903 if (sock_needs_netstamp(sk) &&
1904 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1905 net_enable_timestamp();
1906 }
1907 out:
1908 return newsk;
1909 }
1910 EXPORT_SYMBOL_GPL(sk_clone_lock);
1911
1912 void sk_free_unlock_clone(struct sock *sk)
1913 {
1914
1915
1916 sk->sk_destruct = NULL;
1917 bh_unlock_sock(sk);
1918 sk_free(sk);
1919 }
1920 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1921
1922 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1923 {
1924 u32 max_segs = 1;
1925
1926 sk_dst_set(sk, dst);
1927 sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
1928 if (sk->sk_route_caps & NETIF_F_GSO)
1929 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1930 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1931 if (sk_can_gso(sk)) {
1932 if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
1933 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1934 } else {
1935 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1936 sk->sk_gso_max_size = dst->dev->gso_max_size;
1937 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1938 }
1939 }
1940 sk->sk_gso_max_segs = max_segs;
1941 }
1942 EXPORT_SYMBOL_GPL(sk_setup_caps);
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952 void sock_wfree(struct sk_buff *skb)
1953 {
1954 struct sock *sk = skb->sk;
1955 unsigned int len = skb->truesize;
1956
1957 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1958
1959
1960
1961
1962 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
1963 sk->sk_write_space(sk);
1964 len = 1;
1965 }
1966
1967
1968
1969
1970 if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
1971 __sk_free(sk);
1972 }
1973 EXPORT_SYMBOL(sock_wfree);
1974
1975
1976
1977
1978 void __sock_wfree(struct sk_buff *skb)
1979 {
1980 struct sock *sk = skb->sk;
1981
1982 if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1983 __sk_free(sk);
1984 }
1985
1986 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1987 {
1988 skb_orphan(skb);
1989 skb->sk = sk;
1990 #ifdef CONFIG_INET
1991 if (unlikely(!sk_fullsock(sk))) {
1992 skb->destructor = sock_edemux;
1993 sock_hold(sk);
1994 return;
1995 }
1996 #endif
1997 skb->destructor = sock_wfree;
1998 skb_set_hash_from_sk(skb, sk);
1999
2000
2001
2002
2003
2004 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2005 }
2006 EXPORT_SYMBOL(skb_set_owner_w);
2007
2008 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2009 {
2010 #ifdef CONFIG_TLS_DEVICE
2011
2012
2013
2014 if (skb->decrypted)
2015 return false;
2016 #endif
2017 return (skb->destructor == sock_wfree ||
2018 (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2019 }
2020
2021
2022
2023
2024
2025
2026
2027 void skb_orphan_partial(struct sk_buff *skb)
2028 {
2029 if (skb_is_tcp_pure_ack(skb))
2030 return;
2031
2032 if (can_skb_orphan_partial(skb)) {
2033 struct sock *sk = skb->sk;
2034
2035 if (refcount_inc_not_zero(&sk->sk_refcnt)) {
2036 WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
2037 skb->destructor = sock_efree;
2038 }
2039 } else {
2040 skb_orphan(skb);
2041 }
2042 }
2043 EXPORT_SYMBOL(skb_orphan_partial);
2044
2045
2046
2047
2048 void sock_rfree(struct sk_buff *skb)
2049 {
2050 struct sock *sk = skb->sk;
2051 unsigned int len = skb->truesize;
2052
2053 atomic_sub(len, &sk->sk_rmem_alloc);
2054 sk_mem_uncharge(sk, len);
2055 }
2056 EXPORT_SYMBOL(sock_rfree);
2057
2058
2059
2060
2061
2062 void sock_efree(struct sk_buff *skb)
2063 {
2064 sock_put(skb->sk);
2065 }
2066 EXPORT_SYMBOL(sock_efree);
2067
2068 kuid_t sock_i_uid(struct sock *sk)
2069 {
2070 kuid_t uid;
2071
2072 read_lock_bh(&sk->sk_callback_lock);
2073 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2074 read_unlock_bh(&sk->sk_callback_lock);
2075 return uid;
2076 }
2077 EXPORT_SYMBOL(sock_i_uid);
2078
2079 unsigned long sock_i_ino(struct sock *sk)
2080 {
2081 unsigned long ino;
2082
2083 read_lock_bh(&sk->sk_callback_lock);
2084 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2085 read_unlock_bh(&sk->sk_callback_lock);
2086 return ino;
2087 }
2088 EXPORT_SYMBOL(sock_i_ino);
2089
2090
2091
2092
2093 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2094 gfp_t priority)
2095 {
2096 if (force ||
2097 refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2098 struct sk_buff *skb = alloc_skb(size, priority);
2099
2100 if (skb) {
2101 skb_set_owner_w(skb, sk);
2102 return skb;
2103 }
2104 }
2105 return NULL;
2106 }
2107 EXPORT_SYMBOL(sock_wmalloc);
2108
2109 static void sock_ofree(struct sk_buff *skb)
2110 {
2111 struct sock *sk = skb->sk;
2112
2113 atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2114 }
2115
2116 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2117 gfp_t priority)
2118 {
2119 struct sk_buff *skb;
2120
2121
2122 if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2123 sysctl_optmem_max)
2124 return NULL;
2125
2126 skb = alloc_skb(size, priority);
2127 if (!skb)
2128 return NULL;
2129
2130 atomic_add(skb->truesize, &sk->sk_omem_alloc);
2131 skb->sk = sk;
2132 skb->destructor = sock_ofree;
2133 return skb;
2134 }
2135
2136
2137
2138
2139 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2140 {
2141 if ((unsigned int)size <= sysctl_optmem_max &&
2142 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
2143 void *mem;
2144
2145
2146
2147 atomic_add(size, &sk->sk_omem_alloc);
2148 mem = kmalloc(size, priority);
2149 if (mem)
2150 return mem;
2151 atomic_sub(size, &sk->sk_omem_alloc);
2152 }
2153 return NULL;
2154 }
2155 EXPORT_SYMBOL(sock_kmalloc);
2156
2157
2158
2159
2160
2161 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2162 const bool nullify)
2163 {
2164 if (WARN_ON_ONCE(!mem))
2165 return;
2166 if (nullify)
2167 kzfree(mem);
2168 else
2169 kfree(mem);
2170 atomic_sub(size, &sk->sk_omem_alloc);
2171 }
2172
2173 void sock_kfree_s(struct sock *sk, void *mem, int size)
2174 {
2175 __sock_kfree_s(sk, mem, size, false);
2176 }
2177 EXPORT_SYMBOL(sock_kfree_s);
2178
2179 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2180 {
2181 __sock_kfree_s(sk, mem, size, true);
2182 }
2183 EXPORT_SYMBOL(sock_kzfree_s);
2184
2185
2186
2187
2188 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2189 {
2190 DEFINE_WAIT(wait);
2191
2192 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2193 for (;;) {
2194 if (!timeo)
2195 break;
2196 if (signal_pending(current))
2197 break;
2198 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2199 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2200 if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2201 break;
2202 if (sk->sk_shutdown & SEND_SHUTDOWN)
2203 break;
2204 if (sk->sk_err)
2205 break;
2206 timeo = schedule_timeout(timeo);
2207 }
2208 finish_wait(sk_sleep(sk), &wait);
2209 return timeo;
2210 }
2211
2212
2213
2214
2215
2216
2217 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2218 unsigned long data_len, int noblock,
2219 int *errcode, int max_page_order)
2220 {
2221 struct sk_buff *skb;
2222 long timeo;
2223 int err;
2224
2225 timeo = sock_sndtimeo(sk, noblock);
2226 for (;;) {
2227 err = sock_error(sk);
2228 if (err != 0)
2229 goto failure;
2230
2231 err = -EPIPE;
2232 if (sk->sk_shutdown & SEND_SHUTDOWN)
2233 goto failure;
2234
2235 if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2236 break;
2237
2238 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2239 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2240 err = -EAGAIN;
2241 if (!timeo)
2242 goto failure;
2243 if (signal_pending(current))
2244 goto interrupted;
2245 timeo = sock_wait_for_wmem(sk, timeo);
2246 }
2247 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2248 errcode, sk->sk_allocation);
2249 if (skb)
2250 skb_set_owner_w(skb, sk);
2251 return skb;
2252
2253 interrupted:
2254 err = sock_intr_errno(timeo);
2255 failure:
2256 *errcode = err;
2257 return NULL;
2258 }
2259 EXPORT_SYMBOL(sock_alloc_send_pskb);
2260
2261 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2262 int noblock, int *errcode)
2263 {
2264 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2265 }
2266 EXPORT_SYMBOL(sock_alloc_send_skb);
2267
2268 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2269 struct sockcm_cookie *sockc)
2270 {
2271 u32 tsflags;
2272
2273 switch (cmsg->cmsg_type) {
2274 case SO_MARK:
2275 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2276 return -EPERM;
2277 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2278 return -EINVAL;
2279 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2280 break;
2281 case SO_TIMESTAMPING_OLD:
2282 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2283 return -EINVAL;
2284
2285 tsflags = *(u32 *)CMSG_DATA(cmsg);
2286 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2287 return -EINVAL;
2288
2289 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2290 sockc->tsflags |= tsflags;
2291 break;
2292 case SCM_TXTIME:
2293 if (!sock_flag(sk, SOCK_TXTIME))
2294 return -EINVAL;
2295 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2296 return -EINVAL;
2297 sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2298 break;
2299
2300 case SCM_RIGHTS:
2301 case SCM_CREDENTIALS:
2302 break;
2303 default:
2304 return -EINVAL;
2305 }
2306 return 0;
2307 }
2308 EXPORT_SYMBOL(__sock_cmsg_send);
2309
2310 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2311 struct sockcm_cookie *sockc)
2312 {
2313 struct cmsghdr *cmsg;
2314 int ret;
2315
2316 for_each_cmsghdr(cmsg, msg) {
2317 if (!CMSG_OK(msg, cmsg))
2318 return -EINVAL;
2319 if (cmsg->cmsg_level != SOL_SOCKET)
2320 continue;
2321 ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2322 if (ret)
2323 return ret;
2324 }
2325 return 0;
2326 }
2327 EXPORT_SYMBOL(sock_cmsg_send);
2328
2329 static void sk_enter_memory_pressure(struct sock *sk)
2330 {
2331 if (!sk->sk_prot->enter_memory_pressure)
2332 return;
2333
2334 sk->sk_prot->enter_memory_pressure(sk);
2335 }
2336
2337 static void sk_leave_memory_pressure(struct sock *sk)
2338 {
2339 if (sk->sk_prot->leave_memory_pressure) {
2340 sk->sk_prot->leave_memory_pressure(sk);
2341 } else {
2342 unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2343
2344 if (memory_pressure && READ_ONCE(*memory_pressure))
2345 WRITE_ONCE(*memory_pressure, 0);
2346 }
2347 }
2348
2349
2350 #define SKB_FRAG_PAGE_ORDER get_order(32768)
2351 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2364 {
2365 if (pfrag->page) {
2366 if (page_ref_count(pfrag->page) == 1) {
2367 pfrag->offset = 0;
2368 return true;
2369 }
2370 if (pfrag->offset + sz <= pfrag->size)
2371 return true;
2372 put_page(pfrag->page);
2373 }
2374
2375 pfrag->offset = 0;
2376 if (SKB_FRAG_PAGE_ORDER &&
2377 !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2378
2379 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2380 __GFP_COMP | __GFP_NOWARN |
2381 __GFP_NORETRY,
2382 SKB_FRAG_PAGE_ORDER);
2383 if (likely(pfrag->page)) {
2384 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2385 return true;
2386 }
2387 }
2388 pfrag->page = alloc_page(gfp);
2389 if (likely(pfrag->page)) {
2390 pfrag->size = PAGE_SIZE;
2391 return true;
2392 }
2393 return false;
2394 }
2395 EXPORT_SYMBOL(skb_page_frag_refill);
2396
2397 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2398 {
2399 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2400 return true;
2401
2402 sk_enter_memory_pressure(sk);
2403 sk_stream_moderate_sndbuf(sk);
2404 return false;
2405 }
2406 EXPORT_SYMBOL(sk_page_frag_refill);
2407
2408 static void __lock_sock(struct sock *sk)
2409 __releases(&sk->sk_lock.slock)
2410 __acquires(&sk->sk_lock.slock)
2411 {
2412 DEFINE_WAIT(wait);
2413
2414 for (;;) {
2415 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2416 TASK_UNINTERRUPTIBLE);
2417 spin_unlock_bh(&sk->sk_lock.slock);
2418 schedule();
2419 spin_lock_bh(&sk->sk_lock.slock);
2420 if (!sock_owned_by_user(sk))
2421 break;
2422 }
2423 finish_wait(&sk->sk_lock.wq, &wait);
2424 }
2425
2426 void __release_sock(struct sock *sk)
2427 __releases(&sk->sk_lock.slock)
2428 __acquires(&sk->sk_lock.slock)
2429 {
2430 struct sk_buff *skb, *next;
2431
2432 while ((skb = sk->sk_backlog.head) != NULL) {
2433 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2434
2435 spin_unlock_bh(&sk->sk_lock.slock);
2436
2437 do {
2438 next = skb->next;
2439 prefetch(next);
2440 WARN_ON_ONCE(skb_dst_is_noref(skb));
2441 skb_mark_not_on_list(skb);
2442 sk_backlog_rcv(sk, skb);
2443
2444 cond_resched();
2445
2446 skb = next;
2447 } while (skb != NULL);
2448
2449 spin_lock_bh(&sk->sk_lock.slock);
2450 }
2451
2452
2453
2454
2455
2456 sk->sk_backlog.len = 0;
2457 }
2458
2459 void __sk_flush_backlog(struct sock *sk)
2460 {
2461 spin_lock_bh(&sk->sk_lock.slock);
2462 __release_sock(sk);
2463 spin_unlock_bh(&sk->sk_lock.slock);
2464 }
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2478 {
2479 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2480 int rc;
2481
2482 add_wait_queue(sk_sleep(sk), &wait);
2483 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2484 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2485 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2486 remove_wait_queue(sk_sleep(sk), &wait);
2487 return rc;
2488 }
2489 EXPORT_SYMBOL(sk_wait_data);
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2501 {
2502 struct proto *prot = sk->sk_prot;
2503 long allocated = sk_memory_allocated_add(sk, amt);
2504 bool charged = true;
2505
2506 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2507 !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
2508 goto suppress_allocation;
2509
2510
2511 if (allocated <= sk_prot_mem_limits(sk, 0)) {
2512 sk_leave_memory_pressure(sk);
2513 return 1;
2514 }
2515
2516
2517 if (allocated > sk_prot_mem_limits(sk, 1))
2518 sk_enter_memory_pressure(sk);
2519
2520
2521 if (allocated > sk_prot_mem_limits(sk, 2))
2522 goto suppress_allocation;
2523
2524
2525 if (kind == SK_MEM_RECV) {
2526 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2527 return 1;
2528
2529 } else {
2530 int wmem0 = sk_get_wmem0(sk, prot);
2531
2532 if (sk->sk_type == SOCK_STREAM) {
2533 if (sk->sk_wmem_queued < wmem0)
2534 return 1;
2535 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2536 return 1;
2537 }
2538 }
2539
2540 if (sk_has_memory_pressure(sk)) {
2541 u64 alloc;
2542
2543 if (!sk_under_memory_pressure(sk))
2544 return 1;
2545 alloc = sk_sockets_allocated_read_positive(sk);
2546 if (sk_prot_mem_limits(sk, 2) > alloc *
2547 sk_mem_pages(sk->sk_wmem_queued +
2548 atomic_read(&sk->sk_rmem_alloc) +
2549 sk->sk_forward_alloc))
2550 return 1;
2551 }
2552
2553 suppress_allocation:
2554
2555 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2556 sk_stream_moderate_sndbuf(sk);
2557
2558
2559
2560
2561 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2562 return 1;
2563 }
2564
2565 if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
2566 trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
2567
2568 sk_memory_allocated_sub(sk, amt);
2569
2570 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2571 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2572
2573 return 0;
2574 }
2575 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2588 {
2589 int ret, amt = sk_mem_pages(size);
2590
2591 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2592 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2593 if (!ret)
2594 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2595 return ret;
2596 }
2597 EXPORT_SYMBOL(__sk_mem_schedule);
2598
2599
2600
2601
2602
2603
2604
2605
2606 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2607 {
2608 sk_memory_allocated_sub(sk, amount);
2609
2610 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2611 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2612
2613 if (sk_under_memory_pressure(sk) &&
2614 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2615 sk_leave_memory_pressure(sk);
2616 }
2617 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2618
2619
2620
2621
2622
2623
2624 void __sk_mem_reclaim(struct sock *sk, int amount)
2625 {
2626 amount >>= SK_MEM_QUANTUM_SHIFT;
2627 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2628 __sk_mem_reduce_allocated(sk, amount);
2629 }
2630 EXPORT_SYMBOL(__sk_mem_reclaim);
2631
2632 int sk_set_peek_off(struct sock *sk, int val)
2633 {
2634 sk->sk_peek_off = val;
2635 return 0;
2636 }
2637 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2638
2639
2640
2641
2642
2643
2644
2645
2646 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2647 {
2648 return -EOPNOTSUPP;
2649 }
2650 EXPORT_SYMBOL(sock_no_bind);
2651
2652 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2653 int len, int flags)
2654 {
2655 return -EOPNOTSUPP;
2656 }
2657 EXPORT_SYMBOL(sock_no_connect);
2658
2659 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2660 {
2661 return -EOPNOTSUPP;
2662 }
2663 EXPORT_SYMBOL(sock_no_socketpair);
2664
2665 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2666 bool kern)
2667 {
2668 return -EOPNOTSUPP;
2669 }
2670 EXPORT_SYMBOL(sock_no_accept);
2671
2672 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2673 int peer)
2674 {
2675 return -EOPNOTSUPP;
2676 }
2677 EXPORT_SYMBOL(sock_no_getname);
2678
2679 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2680 {
2681 return -EOPNOTSUPP;
2682 }
2683 EXPORT_SYMBOL(sock_no_ioctl);
2684
2685 int sock_no_listen(struct socket *sock, int backlog)
2686 {
2687 return -EOPNOTSUPP;
2688 }
2689 EXPORT_SYMBOL(sock_no_listen);
2690
2691 int sock_no_shutdown(struct socket *sock, int how)
2692 {
2693 return -EOPNOTSUPP;
2694 }
2695 EXPORT_SYMBOL(sock_no_shutdown);
2696
2697 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2698 char __user *optval, unsigned int optlen)
2699 {
2700 return -EOPNOTSUPP;
2701 }
2702 EXPORT_SYMBOL(sock_no_setsockopt);
2703
2704 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2705 char __user *optval, int __user *optlen)
2706 {
2707 return -EOPNOTSUPP;
2708 }
2709 EXPORT_SYMBOL(sock_no_getsockopt);
2710
2711 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2712 {
2713 return -EOPNOTSUPP;
2714 }
2715 EXPORT_SYMBOL(sock_no_sendmsg);
2716
2717 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
2718 {
2719 return -EOPNOTSUPP;
2720 }
2721 EXPORT_SYMBOL(sock_no_sendmsg_locked);
2722
2723 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2724 int flags)
2725 {
2726 return -EOPNOTSUPP;
2727 }
2728 EXPORT_SYMBOL(sock_no_recvmsg);
2729
2730 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2731 {
2732
2733 return -ENODEV;
2734 }
2735 EXPORT_SYMBOL(sock_no_mmap);
2736
2737 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2738 {
2739 ssize_t res;
2740 struct msghdr msg = {.msg_flags = flags};
2741 struct kvec iov;
2742 char *kaddr = kmap(page);
2743 iov.iov_base = kaddr + offset;
2744 iov.iov_len = size;
2745 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2746 kunmap(page);
2747 return res;
2748 }
2749 EXPORT_SYMBOL(sock_no_sendpage);
2750
2751 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
2752 int offset, size_t size, int flags)
2753 {
2754 ssize_t res;
2755 struct msghdr msg = {.msg_flags = flags};
2756 struct kvec iov;
2757 char *kaddr = kmap(page);
2758
2759 iov.iov_base = kaddr + offset;
2760 iov.iov_len = size;
2761 res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
2762 kunmap(page);
2763 return res;
2764 }
2765 EXPORT_SYMBOL(sock_no_sendpage_locked);
2766
2767
2768
2769
2770
2771 static void sock_def_wakeup(struct sock *sk)
2772 {
2773 struct socket_wq *wq;
2774
2775 rcu_read_lock();
2776 wq = rcu_dereference(sk->sk_wq);
2777 if (skwq_has_sleeper(wq))
2778 wake_up_interruptible_all(&wq->wait);
2779 rcu_read_unlock();
2780 }
2781
2782 static void sock_def_error_report(struct sock *sk)
2783 {
2784 struct socket_wq *wq;
2785
2786 rcu_read_lock();
2787 wq = rcu_dereference(sk->sk_wq);
2788 if (skwq_has_sleeper(wq))
2789 wake_up_interruptible_poll(&wq->wait, EPOLLERR);
2790 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2791 rcu_read_unlock();
2792 }
2793
2794 static void sock_def_readable(struct sock *sk)
2795 {
2796 struct socket_wq *wq;
2797
2798 rcu_read_lock();
2799 wq = rcu_dereference(sk->sk_wq);
2800 if (skwq_has_sleeper(wq))
2801 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
2802 EPOLLRDNORM | EPOLLRDBAND);
2803 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2804 rcu_read_unlock();
2805 }
2806
2807 static void sock_def_write_space(struct sock *sk)
2808 {
2809 struct socket_wq *wq;
2810
2811 rcu_read_lock();
2812
2813
2814
2815
2816 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
2817 wq = rcu_dereference(sk->sk_wq);
2818 if (skwq_has_sleeper(wq))
2819 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
2820 EPOLLWRNORM | EPOLLWRBAND);
2821
2822
2823 if (sock_writeable(sk))
2824 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2825 }
2826
2827 rcu_read_unlock();
2828 }
2829
2830 static void sock_def_destruct(struct sock *sk)
2831 {
2832 }
2833
2834 void sk_send_sigurg(struct sock *sk)
2835 {
2836 if (sk->sk_socket && sk->sk_socket->file)
2837 if (send_sigurg(&sk->sk_socket->file->f_owner))
2838 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2839 }
2840 EXPORT_SYMBOL(sk_send_sigurg);
2841
2842 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2843 unsigned long expires)
2844 {
2845 if (!mod_timer(timer, expires))
2846 sock_hold(sk);
2847 }
2848 EXPORT_SYMBOL(sk_reset_timer);
2849
2850 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2851 {
2852 if (del_timer(timer))
2853 __sock_put(sk);
2854 }
2855 EXPORT_SYMBOL(sk_stop_timer);
2856
2857 void sock_init_data(struct socket *sock, struct sock *sk)
2858 {
2859 sk_init_common(sk);
2860 sk->sk_send_head = NULL;
2861
2862 timer_setup(&sk->sk_timer, NULL, 0);
2863
2864 sk->sk_allocation = GFP_KERNEL;
2865 sk->sk_rcvbuf = sysctl_rmem_default;
2866 sk->sk_sndbuf = sysctl_wmem_default;
2867 sk->sk_state = TCP_CLOSE;
2868 sk_set_socket(sk, sock);
2869
2870 sock_set_flag(sk, SOCK_ZAPPED);
2871
2872 if (sock) {
2873 sk->sk_type = sock->type;
2874 RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
2875 sock->sk = sk;
2876 sk->sk_uid = SOCK_INODE(sock)->i_uid;
2877 } else {
2878 RCU_INIT_POINTER(sk->sk_wq, NULL);
2879 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
2880 }
2881
2882 rwlock_init(&sk->sk_callback_lock);
2883 if (sk->sk_kern_sock)
2884 lockdep_set_class_and_name(
2885 &sk->sk_callback_lock,
2886 af_kern_callback_keys + sk->sk_family,
2887 af_family_kern_clock_key_strings[sk->sk_family]);
2888 else
2889 lockdep_set_class_and_name(
2890 &sk->sk_callback_lock,
2891 af_callback_keys + sk->sk_family,
2892 af_family_clock_key_strings[sk->sk_family]);
2893
2894 sk->sk_state_change = sock_def_wakeup;
2895 sk->sk_data_ready = sock_def_readable;
2896 sk->sk_write_space = sock_def_write_space;
2897 sk->sk_error_report = sock_def_error_report;
2898 sk->sk_destruct = sock_def_destruct;
2899
2900 sk->sk_frag.page = NULL;
2901 sk->sk_frag.offset = 0;
2902 sk->sk_peek_off = -1;
2903
2904 sk->sk_peer_pid = NULL;
2905 sk->sk_peer_cred = NULL;
2906 sk->sk_write_pending = 0;
2907 sk->sk_rcvlowat = 1;
2908 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2909 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2910
2911 sk->sk_stamp = SK_DEFAULT_STAMP;
2912 #if BITS_PER_LONG==32
2913 seqlock_init(&sk->sk_stamp_seq);
2914 #endif
2915 atomic_set(&sk->sk_zckey, 0);
2916
2917 #ifdef CONFIG_NET_RX_BUSY_POLL
2918 sk->sk_napi_id = 0;
2919 sk->sk_ll_usec = sysctl_net_busy_read;
2920 #endif
2921
2922 sk->sk_max_pacing_rate = ~0UL;
2923 sk->sk_pacing_rate = ~0UL;
2924 WRITE_ONCE(sk->sk_pacing_shift, 10);
2925 sk->sk_incoming_cpu = -1;
2926
2927 sk_rx_queue_clear(sk);
2928
2929
2930
2931
2932 smp_wmb();
2933 refcount_set(&sk->sk_refcnt, 1);
2934 atomic_set(&sk->sk_drops, 0);
2935 }
2936 EXPORT_SYMBOL(sock_init_data);
2937
2938 void lock_sock_nested(struct sock *sk, int subclass)
2939 {
2940 might_sleep();
2941 spin_lock_bh(&sk->sk_lock.slock);
2942 if (sk->sk_lock.owned)
2943 __lock_sock(sk);
2944 sk->sk_lock.owned = 1;
2945 spin_unlock(&sk->sk_lock.slock);
2946
2947
2948
2949 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2950 local_bh_enable();
2951 }
2952 EXPORT_SYMBOL(lock_sock_nested);
2953
2954 void release_sock(struct sock *sk)
2955 {
2956 spin_lock_bh(&sk->sk_lock.slock);
2957 if (sk->sk_backlog.tail)
2958 __release_sock(sk);
2959
2960
2961
2962
2963 if (sk->sk_prot->release_cb)
2964 sk->sk_prot->release_cb(sk);
2965
2966 sock_release_ownership(sk);
2967 if (waitqueue_active(&sk->sk_lock.wq))
2968 wake_up(&sk->sk_lock.wq);
2969 spin_unlock_bh(&sk->sk_lock.slock);
2970 }
2971 EXPORT_SYMBOL(release_sock);
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986 bool lock_sock_fast(struct sock *sk)
2987 {
2988 might_sleep();
2989 spin_lock_bh(&sk->sk_lock.slock);
2990
2991 if (!sk->sk_lock.owned)
2992
2993
2994
2995 return false;
2996
2997 __lock_sock(sk);
2998 sk->sk_lock.owned = 1;
2999 spin_unlock(&sk->sk_lock.slock);
3000
3001
3002
3003 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
3004 local_bh_enable();
3005 return true;
3006 }
3007 EXPORT_SYMBOL(lock_sock_fast);
3008
3009 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3010 bool timeval, bool time32)
3011 {
3012 struct sock *sk = sock->sk;
3013 struct timespec64 ts;
3014
3015 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3016 ts = ktime_to_timespec64(sock_read_timestamp(sk));
3017 if (ts.tv_sec == -1)
3018 return -ENOENT;
3019 if (ts.tv_sec == 0) {
3020 ktime_t kt = ktime_get_real();
3021 sock_write_timestamp(sk, kt);;
3022 ts = ktime_to_timespec64(kt);
3023 }
3024
3025 if (timeval)
3026 ts.tv_nsec /= 1000;
3027
3028 #ifdef CONFIG_COMPAT_32BIT_TIME
3029 if (time32)
3030 return put_old_timespec32(&ts, userstamp);
3031 #endif
3032 #ifdef CONFIG_SPARC64
3033
3034 if (timeval && !in_compat_syscall()) {
3035 struct __kernel_old_timeval __user tv = {
3036 .tv_sec = ts.tv_sec,
3037 .tv_usec = ts.tv_nsec,
3038 };
3039 if (copy_to_user(userstamp, &tv, sizeof(tv)))
3040 return -EFAULT;
3041 return 0;
3042 }
3043 #endif
3044 return put_timespec64(&ts, userstamp);
3045 }
3046 EXPORT_SYMBOL(sock_gettstamp);
3047
3048 void sock_enable_timestamp(struct sock *sk, int flag)
3049 {
3050 if (!sock_flag(sk, flag)) {
3051 unsigned long previous_flags = sk->sk_flags;
3052
3053 sock_set_flag(sk, flag);
3054
3055
3056
3057
3058
3059 if (sock_needs_netstamp(sk) &&
3060 !(previous_flags & SK_FLAGS_TIMESTAMP))
3061 net_enable_timestamp();
3062 }
3063 }
3064
3065 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3066 int level, int type)
3067 {
3068 struct sock_exterr_skb *serr;
3069 struct sk_buff *skb;
3070 int copied, err;
3071
3072 err = -EAGAIN;
3073 skb = sock_dequeue_err_skb(sk);
3074 if (skb == NULL)
3075 goto out;
3076
3077 copied = skb->len;
3078 if (copied > len) {
3079 msg->msg_flags |= MSG_TRUNC;
3080 copied = len;
3081 }
3082 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3083 if (err)
3084 goto out_free_skb;
3085
3086 sock_recv_timestamp(msg, sk, skb);
3087
3088 serr = SKB_EXT_ERR(skb);
3089 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3090
3091 msg->msg_flags |= MSG_ERRQUEUE;
3092 err = copied;
3093
3094 out_free_skb:
3095 kfree_skb(skb);
3096 out:
3097 return err;
3098 }
3099 EXPORT_SYMBOL(sock_recv_errqueue);
3100
3101
3102
3103
3104
3105
3106
3107
3108 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3109 char __user *optval, int __user *optlen)
3110 {
3111 struct sock *sk = sock->sk;
3112
3113 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3114 }
3115 EXPORT_SYMBOL(sock_common_getsockopt);
3116
3117 #ifdef CONFIG_COMPAT
3118 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
3119 char __user *optval, int __user *optlen)
3120 {
3121 struct sock *sk = sock->sk;
3122
3123 if (sk->sk_prot->compat_getsockopt != NULL)
3124 return sk->sk_prot->compat_getsockopt(sk, level, optname,
3125 optval, optlen);
3126 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3127 }
3128 EXPORT_SYMBOL(compat_sock_common_getsockopt);
3129 #endif
3130
3131 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3132 int flags)
3133 {
3134 struct sock *sk = sock->sk;
3135 int addr_len = 0;
3136 int err;
3137
3138 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
3139 flags & ~MSG_DONTWAIT, &addr_len);
3140 if (err >= 0)
3141 msg->msg_namelen = addr_len;
3142 return err;
3143 }
3144 EXPORT_SYMBOL(sock_common_recvmsg);
3145
3146
3147
3148
3149 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3150 char __user *optval, unsigned int optlen)
3151 {
3152 struct sock *sk = sock->sk;
3153
3154 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3155 }
3156 EXPORT_SYMBOL(sock_common_setsockopt);
3157
3158 #ifdef CONFIG_COMPAT
3159 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
3160 char __user *optval, unsigned int optlen)
3161 {
3162 struct sock *sk = sock->sk;
3163
3164 if (sk->sk_prot->compat_setsockopt != NULL)
3165 return sk->sk_prot->compat_setsockopt(sk, level, optname,
3166 optval, optlen);
3167 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3168 }
3169 EXPORT_SYMBOL(compat_sock_common_setsockopt);
3170 #endif
3171
3172 void sk_common_release(struct sock *sk)
3173 {
3174 if (sk->sk_prot->destroy)
3175 sk->sk_prot->destroy(sk);
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185 sk->sk_prot->unhash(sk);
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199 sock_orphan(sk);
3200
3201 xfrm_sk_free_policy(sk);
3202
3203 sk_refcnt_debug_release(sk);
3204
3205 sock_put(sk);
3206 }
3207 EXPORT_SYMBOL(sk_common_release);
3208
3209 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3210 {
3211 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3212
3213 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3214 mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3215 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3216 mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3217 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3218 mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3219 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3220 mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3221 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3222 }
3223
3224 #ifdef CONFIG_PROC_FS
3225 #define PROTO_INUSE_NR 64
3226 struct prot_inuse {
3227 int val[PROTO_INUSE_NR];
3228 };
3229
3230 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3231
3232 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3233 {
3234 __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3235 }
3236 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3237
3238 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3239 {
3240 int cpu, idx = prot->inuse_idx;
3241 int res = 0;
3242
3243 for_each_possible_cpu(cpu)
3244 res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3245
3246 return res >= 0 ? res : 0;
3247 }
3248 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3249
3250 static void sock_inuse_add(struct net *net, int val)
3251 {
3252 this_cpu_add(*net->core.sock_inuse, val);
3253 }
3254
3255 int sock_inuse_get(struct net *net)
3256 {
3257 int cpu, res = 0;
3258
3259 for_each_possible_cpu(cpu)
3260 res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3261
3262 return res;
3263 }
3264
3265 EXPORT_SYMBOL_GPL(sock_inuse_get);
3266
3267 static int __net_init sock_inuse_init_net(struct net *net)
3268 {
3269 net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3270 if (net->core.prot_inuse == NULL)
3271 return -ENOMEM;
3272
3273 net->core.sock_inuse = alloc_percpu(int);
3274 if (net->core.sock_inuse == NULL)
3275 goto out;
3276
3277 return 0;
3278
3279 out:
3280 free_percpu(net->core.prot_inuse);
3281 return -ENOMEM;
3282 }
3283
3284 static void __net_exit sock_inuse_exit_net(struct net *net)
3285 {
3286 free_percpu(net->core.prot_inuse);
3287 free_percpu(net->core.sock_inuse);
3288 }
3289
3290 static struct pernet_operations net_inuse_ops = {
3291 .init = sock_inuse_init_net,
3292 .exit = sock_inuse_exit_net,
3293 };
3294
3295 static __init int net_inuse_init(void)
3296 {
3297 if (register_pernet_subsys(&net_inuse_ops))
3298 panic("Cannot initialize net inuse counters");
3299
3300 return 0;
3301 }
3302
3303 core_initcall(net_inuse_init);
3304
3305 static int assign_proto_idx(struct proto *prot)
3306 {
3307 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3308
3309 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3310 pr_err("PROTO_INUSE_NR exhausted\n");
3311 return -ENOSPC;
3312 }
3313
3314 set_bit(prot->inuse_idx, proto_inuse_idx);
3315 return 0;
3316 }
3317
3318 static void release_proto_idx(struct proto *prot)
3319 {
3320 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3321 clear_bit(prot->inuse_idx, proto_inuse_idx);
3322 }
3323 #else
3324 static inline int assign_proto_idx(struct proto *prot)
3325 {
3326 return 0;
3327 }
3328
3329 static inline void release_proto_idx(struct proto *prot)
3330 {
3331 }
3332
3333 static void sock_inuse_add(struct net *net, int val)
3334 {
3335 }
3336 #endif
3337
3338 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3339 {
3340 if (!rsk_prot)
3341 return;
3342 kfree(rsk_prot->slab_name);
3343 rsk_prot->slab_name = NULL;
3344 kmem_cache_destroy(rsk_prot->slab);
3345 rsk_prot->slab = NULL;
3346 }
3347
3348 static int req_prot_init(const struct proto *prot)
3349 {
3350 struct request_sock_ops *rsk_prot = prot->rsk_prot;
3351
3352 if (!rsk_prot)
3353 return 0;
3354
3355 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3356 prot->name);
3357 if (!rsk_prot->slab_name)
3358 return -ENOMEM;
3359
3360 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3361 rsk_prot->obj_size, 0,
3362 SLAB_ACCOUNT | prot->slab_flags,
3363 NULL);
3364
3365 if (!rsk_prot->slab) {
3366 pr_crit("%s: Can't create request sock SLAB cache!\n",
3367 prot->name);
3368 return -ENOMEM;
3369 }
3370 return 0;
3371 }
3372
3373 int proto_register(struct proto *prot, int alloc_slab)
3374 {
3375 int ret = -ENOBUFS;
3376
3377 if (alloc_slab) {
3378 prot->slab = kmem_cache_create_usercopy(prot->name,
3379 prot->obj_size, 0,
3380 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3381 prot->slab_flags,
3382 prot->useroffset, prot->usersize,
3383 NULL);
3384
3385 if (prot->slab == NULL) {
3386 pr_crit("%s: Can't create sock SLAB cache!\n",
3387 prot->name);
3388 goto out;
3389 }
3390
3391 if (req_prot_init(prot))
3392 goto out_free_request_sock_slab;
3393
3394 if (prot->twsk_prot != NULL) {
3395 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3396
3397 if (prot->twsk_prot->twsk_slab_name == NULL)
3398 goto out_free_request_sock_slab;
3399
3400 prot->twsk_prot->twsk_slab =
3401 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3402 prot->twsk_prot->twsk_obj_size,
3403 0,
3404 SLAB_ACCOUNT |
3405 prot->slab_flags,
3406 NULL);
3407 if (prot->twsk_prot->twsk_slab == NULL)
3408 goto out_free_timewait_sock_slab_name;
3409 }
3410 }
3411
3412 mutex_lock(&proto_list_mutex);
3413 ret = assign_proto_idx(prot);
3414 if (ret) {
3415 mutex_unlock(&proto_list_mutex);
3416 goto out_free_timewait_sock_slab_name;
3417 }
3418 list_add(&prot->node, &proto_list);
3419 mutex_unlock(&proto_list_mutex);
3420 return ret;
3421
3422 out_free_timewait_sock_slab_name:
3423 if (alloc_slab && prot->twsk_prot)
3424 kfree(prot->twsk_prot->twsk_slab_name);
3425 out_free_request_sock_slab:
3426 if (alloc_slab) {
3427 req_prot_cleanup(prot->rsk_prot);
3428
3429 kmem_cache_destroy(prot->slab);
3430 prot->slab = NULL;
3431 }
3432 out:
3433 return ret;
3434 }
3435 EXPORT_SYMBOL(proto_register);
3436
3437 void proto_unregister(struct proto *prot)
3438 {
3439 mutex_lock(&proto_list_mutex);
3440 release_proto_idx(prot);
3441 list_del(&prot->node);
3442 mutex_unlock(&proto_list_mutex);
3443
3444 kmem_cache_destroy(prot->slab);
3445 prot->slab = NULL;
3446
3447 req_prot_cleanup(prot->rsk_prot);
3448
3449 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3450 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3451 kfree(prot->twsk_prot->twsk_slab_name);
3452 prot->twsk_prot->twsk_slab = NULL;
3453 }
3454 }
3455 EXPORT_SYMBOL(proto_unregister);
3456
3457 int sock_load_diag_module(int family, int protocol)
3458 {
3459 if (!protocol) {
3460 if (!sock_is_registered(family))
3461 return -ENOENT;
3462
3463 return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3464 NETLINK_SOCK_DIAG, family);
3465 }
3466
3467 #ifdef CONFIG_INET
3468 if (family == AF_INET &&
3469 protocol != IPPROTO_RAW &&
3470 !rcu_access_pointer(inet_protos[protocol]))
3471 return -ENOENT;
3472 #endif
3473
3474 return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
3475 NETLINK_SOCK_DIAG, family, protocol);
3476 }
3477 EXPORT_SYMBOL(sock_load_diag_module);
3478
3479 #ifdef CONFIG_PROC_FS
3480 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3481 __acquires(proto_list_mutex)
3482 {
3483 mutex_lock(&proto_list_mutex);
3484 return seq_list_start_head(&proto_list, *pos);
3485 }
3486
3487 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3488 {
3489 return seq_list_next(v, &proto_list, pos);
3490 }
3491
3492 static void proto_seq_stop(struct seq_file *seq, void *v)
3493 __releases(proto_list_mutex)
3494 {
3495 mutex_unlock(&proto_list_mutex);
3496 }
3497
3498 static char proto_method_implemented(const void *method)
3499 {
3500 return method == NULL ? 'n' : 'y';
3501 }
3502 static long sock_prot_memory_allocated(struct proto *proto)
3503 {
3504 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3505 }
3506
3507 static const char *sock_prot_memory_pressure(struct proto *proto)
3508 {
3509 return proto->memory_pressure != NULL ?
3510 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3511 }
3512
3513 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3514 {
3515
3516 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
3517 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3518 proto->name,
3519 proto->obj_size,
3520 sock_prot_inuse_get(seq_file_net(seq), proto),
3521 sock_prot_memory_allocated(proto),
3522 sock_prot_memory_pressure(proto),
3523 proto->max_header,
3524 proto->slab == NULL ? "no" : "yes",
3525 module_name(proto->owner),
3526 proto_method_implemented(proto->close),
3527 proto_method_implemented(proto->connect),
3528 proto_method_implemented(proto->disconnect),
3529 proto_method_implemented(proto->accept),
3530 proto_method_implemented(proto->ioctl),
3531 proto_method_implemented(proto->init),
3532 proto_method_implemented(proto->destroy),
3533 proto_method_implemented(proto->shutdown),
3534 proto_method_implemented(proto->setsockopt),
3535 proto_method_implemented(proto->getsockopt),
3536 proto_method_implemented(proto->sendmsg),
3537 proto_method_implemented(proto->recvmsg),
3538 proto_method_implemented(proto->sendpage),
3539 proto_method_implemented(proto->bind),
3540 proto_method_implemented(proto->backlog_rcv),
3541 proto_method_implemented(proto->hash),
3542 proto_method_implemented(proto->unhash),
3543 proto_method_implemented(proto->get_port),
3544 proto_method_implemented(proto->enter_memory_pressure));
3545 }
3546
3547 static int proto_seq_show(struct seq_file *seq, void *v)
3548 {
3549 if (v == &proto_list)
3550 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3551 "protocol",
3552 "size",
3553 "sockets",
3554 "memory",
3555 "press",
3556 "maxhdr",
3557 "slab",
3558 "module",
3559 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3560 else
3561 proto_seq_printf(seq, list_entry(v, struct proto, node));
3562 return 0;
3563 }
3564
3565 static const struct seq_operations proto_seq_ops = {
3566 .start = proto_seq_start,
3567 .next = proto_seq_next,
3568 .stop = proto_seq_stop,
3569 .show = proto_seq_show,
3570 };
3571
3572 static __net_init int proto_init_net(struct net *net)
3573 {
3574 if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
3575 sizeof(struct seq_net_private)))
3576 return -ENOMEM;
3577
3578 return 0;
3579 }
3580
3581 static __net_exit void proto_exit_net(struct net *net)
3582 {
3583 remove_proc_entry("protocols", net->proc_net);
3584 }
3585
3586
3587 static __net_initdata struct pernet_operations proto_net_ops = {
3588 .init = proto_init_net,
3589 .exit = proto_exit_net,
3590 };
3591
3592 static int __init proto_init(void)
3593 {
3594 return register_pernet_subsys(&proto_net_ops);
3595 }
3596
3597 subsys_initcall(proto_init);
3598
3599 #endif
3600
3601 #ifdef CONFIG_NET_RX_BUSY_POLL
3602 bool sk_busy_loop_end(void *p, unsigned long start_time)
3603 {
3604 struct sock *sk = p;
3605
3606 return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
3607 sk_busy_loop_timeout(sk, start_time);
3608 }
3609 EXPORT_SYMBOL(sk_busy_loop_end);
3610 #endif