This source file includes following definitions.
- connection_based
- receiver_wake_function
- __skb_wait_for_more_packets
- skb_set_peeked
- __skb_try_recv_from_queue
- __skb_try_recv_datagram
- __skb_recv_datagram
- skb_recv_datagram
- skb_free_datagram
- __skb_free_datagram_locked
- __sk_queue_drop_skb
- skb_kill_datagram
- __skb_datagram_iter
- skb_copy_and_hash_datagram_iter
- simple_copy_to_iter
- skb_copy_datagram_iter
- skb_copy_datagram_from_iter
- __zerocopy_sg_from_iter
- zerocopy_sg_from_iter
- skb_copy_and_csum_datagram
- skb_copy_and_csum_datagram_msg
- datagram_poll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 #include <linux/module.h>
38 #include <linux/types.h>
39 #include <linux/kernel.h>
40 #include <linux/uaccess.h>
41 #include <linux/mm.h>
42 #include <linux/interrupt.h>
43 #include <linux/errno.h>
44 #include <linux/sched.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/rtnetlink.h>
48 #include <linux/poll.h>
49 #include <linux/highmem.h>
50 #include <linux/spinlock.h>
51 #include <linux/slab.h>
52 #include <linux/pagemap.h>
53 #include <linux/uio.h>
54 #include <linux/indirect_call_wrapper.h>
55
56 #include <net/protocol.h>
57 #include <linux/skbuff.h>
58
59 #include <net/checksum.h>
60 #include <net/sock.h>
61 #include <net/tcp_states.h>
62 #include <trace/events/skb.h>
63 #include <net/busy_poll.h>
64
65 #include "datagram.h"
66
67
68
69
70 static inline int connection_based(struct sock *sk)
71 {
72 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
73 }
74
75 static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
76 void *key)
77 {
78
79
80
81 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
82 return 0;
83 return autoremove_wake_function(wait, mode, sync, key);
84 }
85
86
87
88 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
89 const struct sk_buff *skb)
90 {
91 int error;
92 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
93
94 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
95
96
97 error = sock_error(sk);
98 if (error)
99 goto out_err;
100
101 if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
102 goto out;
103
104
105 if (sk->sk_shutdown & RCV_SHUTDOWN)
106 goto out_noerr;
107
108
109
110
111 error = -ENOTCONN;
112 if (connection_based(sk) &&
113 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
114 goto out_err;
115
116
117 if (signal_pending(current))
118 goto interrupted;
119
120 error = 0;
121 *timeo_p = schedule_timeout(*timeo_p);
122 out:
123 finish_wait(sk_sleep(sk), &wait);
124 return error;
125 interrupted:
126 error = sock_intr_errno(*timeo_p);
127 out_err:
128 *err = error;
129 goto out;
130 out_noerr:
131 *err = 0;
132 error = 1;
133 goto out;
134 }
135 EXPORT_SYMBOL(__skb_wait_for_more_packets);
136
137 static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
138 {
139 struct sk_buff *nskb;
140
141 if (skb->peeked)
142 return skb;
143
144
145 if (!skb_shared(skb))
146 goto done;
147
148 nskb = skb_clone(skb, GFP_ATOMIC);
149 if (!nskb)
150 return ERR_PTR(-ENOMEM);
151
152 skb->prev->next = nskb;
153 skb->next->prev = nskb;
154 nskb->prev = skb->prev;
155 nskb->next = skb->next;
156
157 consume_skb(skb);
158 skb = nskb;
159
160 done:
161 skb->peeked = 1;
162
163 return skb;
164 }
165
166 struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
167 struct sk_buff_head *queue,
168 unsigned int flags,
169 void (*destructor)(struct sock *sk,
170 struct sk_buff *skb),
171 int *off, int *err,
172 struct sk_buff **last)
173 {
174 bool peek_at_off = false;
175 struct sk_buff *skb;
176 int _off = 0;
177
178 if (unlikely(flags & MSG_PEEK && *off >= 0)) {
179 peek_at_off = true;
180 _off = *off;
181 }
182
183 *last = queue->prev;
184 skb_queue_walk(queue, skb) {
185 if (flags & MSG_PEEK) {
186 if (peek_at_off && _off >= skb->len &&
187 (_off || skb->peeked)) {
188 _off -= skb->len;
189 continue;
190 }
191 if (!skb->len) {
192 skb = skb_set_peeked(skb);
193 if (IS_ERR(skb)) {
194 *err = PTR_ERR(skb);
195 return NULL;
196 }
197 }
198 refcount_inc(&skb->users);
199 } else {
200 __skb_unlink(skb, queue);
201 if (destructor)
202 destructor(sk, skb);
203 }
204 *off = _off;
205 return skb;
206 }
207 return NULL;
208 }
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
246 void (*destructor)(struct sock *sk,
247 struct sk_buff *skb),
248 int *off, int *err,
249 struct sk_buff **last)
250 {
251 struct sk_buff_head *queue = &sk->sk_receive_queue;
252 struct sk_buff *skb;
253 unsigned long cpu_flags;
254
255
256
257 int error = sock_error(sk);
258
259 if (error)
260 goto no_packet;
261
262 do {
263
264
265
266
267
268
269 spin_lock_irqsave(&queue->lock, cpu_flags);
270 skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
271 off, &error, last);
272 spin_unlock_irqrestore(&queue->lock, cpu_flags);
273 if (error)
274 goto no_packet;
275 if (skb)
276 return skb;
277
278 if (!sk_can_busy_loop(sk))
279 break;
280
281 sk_busy_loop(sk, flags & MSG_DONTWAIT);
282 } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
283
284 error = -EAGAIN;
285
286 no_packet:
287 *err = error;
288 return NULL;
289 }
290 EXPORT_SYMBOL(__skb_try_recv_datagram);
291
292 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
293 void (*destructor)(struct sock *sk,
294 struct sk_buff *skb),
295 int *off, int *err)
296 {
297 struct sk_buff *skb, *last;
298 long timeo;
299
300 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
301
302 do {
303 skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
304 &last);
305 if (skb)
306 return skb;
307
308 if (*err != -EAGAIN)
309 break;
310 } while (timeo &&
311 !__skb_wait_for_more_packets(sk, err, &timeo, last));
312
313 return NULL;
314 }
315 EXPORT_SYMBOL(__skb_recv_datagram);
316
317 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
318 int noblock, int *err)
319 {
320 int off = 0;
321
322 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
323 NULL, &off, err);
324 }
325 EXPORT_SYMBOL(skb_recv_datagram);
326
327 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
328 {
329 consume_skb(skb);
330 sk_mem_reclaim_partial(sk);
331 }
332 EXPORT_SYMBOL(skb_free_datagram);
333
334 void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
335 {
336 bool slow;
337
338 if (!skb_unref(skb)) {
339 sk_peek_offset_bwd(sk, len);
340 return;
341 }
342
343 slow = lock_sock_fast(sk);
344 sk_peek_offset_bwd(sk, len);
345 skb_orphan(skb);
346 sk_mem_reclaim_partial(sk);
347 unlock_sock_fast(sk, slow);
348
349
350 __kfree_skb(skb);
351 }
352 EXPORT_SYMBOL(__skb_free_datagram_locked);
353
354 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
355 struct sk_buff *skb, unsigned int flags,
356 void (*destructor)(struct sock *sk,
357 struct sk_buff *skb))
358 {
359 int err = 0;
360
361 if (flags & MSG_PEEK) {
362 err = -ENOENT;
363 spin_lock_bh(&sk_queue->lock);
364 if (skb->next) {
365 __skb_unlink(skb, sk_queue);
366 refcount_dec(&skb->users);
367 if (destructor)
368 destructor(sk, skb);
369 err = 0;
370 }
371 spin_unlock_bh(&sk_queue->lock);
372 }
373
374 atomic_inc(&sk->sk_drops);
375 return err;
376 }
377 EXPORT_SYMBOL(__sk_queue_drop_skb);
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
401 {
402 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
403 NULL);
404
405 kfree_skb(skb);
406 sk_mem_reclaim_partial(sk);
407 return err;
408 }
409 EXPORT_SYMBOL(skb_kill_datagram);
410
411 INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr,
412 size_t bytes,
413 void *data __always_unused,
414 struct iov_iter *i));
415
416 static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
417 struct iov_iter *to, int len, bool fault_short,
418 size_t (*cb)(const void *, size_t, void *,
419 struct iov_iter *), void *data)
420 {
421 int start = skb_headlen(skb);
422 int i, copy = start - offset, start_off = offset, n;
423 struct sk_buff *frag_iter;
424
425
426 if (copy > 0) {
427 if (copy > len)
428 copy = len;
429 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
430 skb->data + offset, copy, data, to);
431 offset += n;
432 if (n != copy)
433 goto short_copy;
434 if ((len -= copy) == 0)
435 return 0;
436 }
437
438
439 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
440 int end;
441 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
442
443 WARN_ON(start > offset + len);
444
445 end = start + skb_frag_size(frag);
446 if ((copy = end - offset) > 0) {
447 struct page *page = skb_frag_page(frag);
448 u8 *vaddr = kmap(page);
449
450 if (copy > len)
451 copy = len;
452 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
453 vaddr + skb_frag_off(frag) + offset - start,
454 copy, data, to);
455 kunmap(page);
456 offset += n;
457 if (n != copy)
458 goto short_copy;
459 if (!(len -= copy))
460 return 0;
461 }
462 start = end;
463 }
464
465 skb_walk_frags(skb, frag_iter) {
466 int end;
467
468 WARN_ON(start > offset + len);
469
470 end = start + frag_iter->len;
471 if ((copy = end - offset) > 0) {
472 if (copy > len)
473 copy = len;
474 if (__skb_datagram_iter(frag_iter, offset - start,
475 to, copy, fault_short, cb, data))
476 goto fault;
477 if ((len -= copy) == 0)
478 return 0;
479 offset += copy;
480 }
481 start = end;
482 }
483 if (!len)
484 return 0;
485
486
487
488
489
490
491 fault:
492 iov_iter_revert(to, offset - start_off);
493 return -EFAULT;
494
495 short_copy:
496 if (fault_short || iov_iter_count(to))
497 goto fault;
498
499 return 0;
500 }
501
502
503
504
505
506
507
508
509
510
511 int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
512 struct iov_iter *to, int len,
513 struct ahash_request *hash)
514 {
515 return __skb_datagram_iter(skb, offset, to, len, true,
516 hash_and_copy_to_iter, hash);
517 }
518 EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
519
520 static size_t simple_copy_to_iter(const void *addr, size_t bytes,
521 void *data __always_unused, struct iov_iter *i)
522 {
523 return copy_to_iter(addr, bytes, i);
524 }
525
526
527
528
529
530
531
532
533 int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
534 struct iov_iter *to, int len)
535 {
536 trace_skb_copy_datagram_iovec(skb, len);
537 return __skb_datagram_iter(skb, offset, to, len, false,
538 simple_copy_to_iter, NULL);
539 }
540 EXPORT_SYMBOL(skb_copy_datagram_iter);
541
542
543
544
545
546
547
548
549
550
551 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
552 struct iov_iter *from,
553 int len)
554 {
555 int start = skb_headlen(skb);
556 int i, copy = start - offset;
557 struct sk_buff *frag_iter;
558
559
560 if (copy > 0) {
561 if (copy > len)
562 copy = len;
563 if (copy_from_iter(skb->data + offset, copy, from) != copy)
564 goto fault;
565 if ((len -= copy) == 0)
566 return 0;
567 offset += copy;
568 }
569
570
571 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
572 int end;
573 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
574
575 WARN_ON(start > offset + len);
576
577 end = start + skb_frag_size(frag);
578 if ((copy = end - offset) > 0) {
579 size_t copied;
580
581 if (copy > len)
582 copy = len;
583 copied = copy_page_from_iter(skb_frag_page(frag),
584 skb_frag_off(frag) + offset - start,
585 copy, from);
586 if (copied != copy)
587 goto fault;
588
589 if (!(len -= copy))
590 return 0;
591 offset += copy;
592 }
593 start = end;
594 }
595
596 skb_walk_frags(skb, frag_iter) {
597 int end;
598
599 WARN_ON(start > offset + len);
600
601 end = start + frag_iter->len;
602 if ((copy = end - offset) > 0) {
603 if (copy > len)
604 copy = len;
605 if (skb_copy_datagram_from_iter(frag_iter,
606 offset - start,
607 from, copy))
608 goto fault;
609 if ((len -= copy) == 0)
610 return 0;
611 offset += copy;
612 }
613 start = end;
614 }
615 if (!len)
616 return 0;
617
618 fault:
619 return -EFAULT;
620 }
621 EXPORT_SYMBOL(skb_copy_datagram_from_iter);
622
623 int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
624 struct iov_iter *from, size_t length)
625 {
626 int frag = skb_shinfo(skb)->nr_frags;
627
628 while (length && iov_iter_count(from)) {
629 struct page *pages[MAX_SKB_FRAGS];
630 size_t start;
631 ssize_t copied;
632 unsigned long truesize;
633 int n = 0;
634
635 if (frag == MAX_SKB_FRAGS)
636 return -EMSGSIZE;
637
638 copied = iov_iter_get_pages(from, pages, length,
639 MAX_SKB_FRAGS - frag, &start);
640 if (copied < 0)
641 return -EFAULT;
642
643 iov_iter_advance(from, copied);
644 length -= copied;
645
646 truesize = PAGE_ALIGN(copied + start);
647 skb->data_len += copied;
648 skb->len += copied;
649 skb->truesize += truesize;
650 if (sk && sk->sk_type == SOCK_STREAM) {
651 sk_wmem_queued_add(sk, truesize);
652 sk_mem_charge(sk, truesize);
653 } else {
654 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
655 }
656 while (copied) {
657 int size = min_t(int, copied, PAGE_SIZE - start);
658 skb_fill_page_desc(skb, frag++, pages[n], start, size);
659 start = 0;
660 copied -= size;
661 n++;
662 }
663 }
664 return 0;
665 }
666 EXPORT_SYMBOL(__zerocopy_sg_from_iter);
667
668
669
670
671
672
673
674
675
676
677
678 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
679 {
680 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
681
682
683 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
684 return -EFAULT;
685
686 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
687 }
688 EXPORT_SYMBOL(zerocopy_sg_from_iter);
689
690
691
692
693
694
695
696
697
698
699 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
700 struct iov_iter *to, int len,
701 __wsum *csump)
702 {
703 return __skb_datagram_iter(skb, offset, to, len, true,
704 csum_and_copy_to_iter, csump);
705 }
706
707
708
709
710
711
712
713
714
715
716
717
718
719 int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
720 int hlen, struct msghdr *msg)
721 {
722 __wsum csum;
723 int chunk = skb->len - hlen;
724
725 if (!chunk)
726 return 0;
727
728 if (msg_data_left(msg) < chunk) {
729 if (__skb_checksum_complete(skb))
730 return -EINVAL;
731 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
732 goto fault;
733 } else {
734 csum = csum_partial(skb->data, hlen, skb->csum);
735 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
736 chunk, &csum))
737 goto fault;
738
739 if (csum_fold(csum)) {
740 iov_iter_revert(&msg->msg_iter, chunk);
741 return -EINVAL;
742 }
743
744 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
745 !skb->csum_complete_sw)
746 netdev_rx_csum_fault(NULL, skb);
747 }
748 return 0;
749 fault:
750 return -EFAULT;
751 }
752 EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768 __poll_t datagram_poll(struct file *file, struct socket *sock,
769 poll_table *wait)
770 {
771 struct sock *sk = sock->sk;
772 __poll_t mask;
773
774 sock_poll_wait(file, sock, wait);
775 mask = 0;
776
777
778 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
779 mask |= EPOLLERR |
780 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
781
782 if (sk->sk_shutdown & RCV_SHUTDOWN)
783 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
784 if (sk->sk_shutdown == SHUTDOWN_MASK)
785 mask |= EPOLLHUP;
786
787
788 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
789 mask |= EPOLLIN | EPOLLRDNORM;
790
791
792 if (connection_based(sk)) {
793 if (sk->sk_state == TCP_CLOSE)
794 mask |= EPOLLHUP;
795
796 if (sk->sk_state == TCP_SYN_SENT)
797 return mask;
798 }
799
800
801 if (sock_writeable(sk))
802 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
803 else
804 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
805
806 return mask;
807 }
808 EXPORT_SYMBOL(datagram_poll);