This source file includes following definitions.
- packet_direct_xmit
- packet_cached_dev_get
- packet_cached_dev_assign
- packet_cached_dev_reset
- packet_use_direct_xmit
- packet_pick_tx_queue
- __register_prot_hook
- register_prot_hook
- __unregister_prot_hook
- unregister_prot_hook
- pgv_to_page
- __packet_set_status
- __packet_get_status
- tpacket_get_timestamp
- __packet_set_timestamp
- packet_lookup_frame
- packet_current_frame
- prb_del_retire_blk_timer
- prb_shutdown_retire_blk_timer
- prb_setup_retire_blk_timer
- prb_calc_retire_blk_tmo
- prb_init_ft_ops
- init_prb_bdqc
- _prb_refresh_rx_retire_blk_timer
- prb_retire_rx_blk_timer_expired
- prb_flush_block
- prb_close_block
- prb_thaw_queue
- prb_open_block
- prb_freeze_queue
- prb_dispatch_next_block
- prb_retire_current_block
- prb_curr_blk_in_use
- prb_queue_frozen
- prb_clear_blk_fill_status
- prb_fill_rxhash
- prb_clear_rxhash
- prb_fill_vlan_info
- prb_run_all_ft_ops
- prb_fill_curr_block
- __packet_lookup_frame_in_block
- packet_current_rx_frame
- prb_lookup_block
- prb_previous_blk_num
- __prb_previous_block
- packet_previous_rx_frame
- packet_increment_rx_head
- packet_previous_frame
- packet_increment_head
- packet_inc_pending
- packet_dec_pending
- packet_read_pending
- packet_alloc_pending
- packet_free_pending
- __tpacket_has_room
- __tpacket_v3_has_room
- __packet_rcv_has_room
- packet_rcv_has_room
- packet_rcv_try_clear_pressure
- packet_sock_destruct
- fanout_flow_is_huge
- fanout_demux_hash
- fanout_demux_lb
- fanout_demux_cpu
- fanout_demux_rnd
- fanout_demux_rollover
- fanout_demux_qm
- fanout_demux_bpf
- fanout_has_flag
- packet_rcv_fanout
- __fanout_link
- __fanout_unlink
- match_fanout_group
- fanout_init_data
- __fanout_set_data_bpf
- fanout_set_data_cbpf
- fanout_set_data_ebpf
- fanout_set_data
- fanout_release_data
- __fanout_id_is_free
- fanout_find_new_id
- fanout_add
- fanout_release
- packet_extra_vlan_len_allowed
- packet_rcv_spkt
- packet_parse_headers
- packet_sendmsg_spkt
- run_filter
- packet_rcv_vnet
- packet_rcv
- tpacket_rcv
- tpacket_destruct_skb
- __packet_snd_vnet_parse
- packet_snd_vnet_parse
- tpacket_fill_skb
- tpacket_parse_header
- tpacket_snd
- packet_alloc_skb
- packet_snd
- packet_sendmsg
- packet_release
- packet_do_bind
- packet_bind_spkt
- packet_bind
- packet_create
- packet_recvmsg
- packet_getname_spkt
- packet_getname
- packet_dev_mc
- packet_dev_mclist_delete
- packet_mc_add
- packet_mc_drop
- packet_flush_mclist
- packet_setsockopt
- packet_getsockopt
- compat_packet_setsockopt
- packet_notifier
- packet_ioctl
- packet_poll
- packet_mm_open
- packet_mm_close
- free_pg_vec
- alloc_one_pg_vec_page
- alloc_pg_vec
- packet_set_ring
- packet_mmap
- packet_seq_start
- packet_seq_next
- packet_seq_stop
- packet_seq_show
- packet_net_init
- packet_net_exit
- packet_exit
- packet_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 #include <linux/types.h>
50 #include <linux/mm.h>
51 #include <linux/capability.h>
52 #include <linux/fcntl.h>
53 #include <linux/socket.h>
54 #include <linux/in.h>
55 #include <linux/inet.h>
56 #include <linux/netdevice.h>
57 #include <linux/if_packet.h>
58 #include <linux/wireless.h>
59 #include <linux/kernel.h>
60 #include <linux/kmod.h>
61 #include <linux/slab.h>
62 #include <linux/vmalloc.h>
63 #include <net/net_namespace.h>
64 #include <net/ip.h>
65 #include <net/protocol.h>
66 #include <linux/skbuff.h>
67 #include <net/sock.h>
68 #include <linux/errno.h>
69 #include <linux/timer.h>
70 #include <linux/uaccess.h>
71 #include <asm/ioctls.h>
72 #include <asm/page.h>
73 #include <asm/cacheflush.h>
74 #include <asm/io.h>
75 #include <linux/proc_fs.h>
76 #include <linux/seq_file.h>
77 #include <linux/poll.h>
78 #include <linux/module.h>
79 #include <linux/init.h>
80 #include <linux/mutex.h>
81 #include <linux/if_vlan.h>
82 #include <linux/virtio_net.h>
83 #include <linux/errqueue.h>
84 #include <linux/net_tstamp.h>
85 #include <linux/percpu.h>
86 #ifdef CONFIG_INET
87 #include <net/inet_common.h>
88 #endif
89 #include <linux/bpf.h>
90 #include <net/compat.h>
91
92 #include "internal.h"
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 struct packet_mreq_max {
151 int mr_ifindex;
152 unsigned short mr_type;
153 unsigned short mr_alen;
154 unsigned char mr_address[MAX_ADDR_LEN];
155 };
156
157 union tpacket_uhdr {
158 struct tpacket_hdr *h1;
159 struct tpacket2_hdr *h2;
160 struct tpacket3_hdr *h3;
161 void *raw;
162 };
163
164 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
165 int closing, int tx_ring);
166
167 #define V3_ALIGNMENT (8)
168
169 #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
170
171 #define BLK_PLUS_PRIV(sz_of_priv) \
172 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
173
174 #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
175 #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
176 #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
177 #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
178 #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
179 #define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
180 #define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
181
182 struct packet_sock;
183 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
184 struct packet_type *pt, struct net_device *orig_dev);
185
186 static void *packet_previous_frame(struct packet_sock *po,
187 struct packet_ring_buffer *rb,
188 int status);
189 static void packet_increment_head(struct packet_ring_buffer *buff);
190 static int prb_curr_blk_in_use(struct tpacket_block_desc *);
191 static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
192 struct packet_sock *);
193 static void prb_retire_current_block(struct tpacket_kbdq_core *,
194 struct packet_sock *, unsigned int status);
195 static int prb_queue_frozen(struct tpacket_kbdq_core *);
196 static void prb_open_block(struct tpacket_kbdq_core *,
197 struct tpacket_block_desc *);
198 static void prb_retire_rx_blk_timer_expired(struct timer_list *);
199 static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
200 static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
201 static void prb_clear_rxhash(struct tpacket_kbdq_core *,
202 struct tpacket3_hdr *);
203 static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
204 struct tpacket3_hdr *);
205 static void packet_flush_mclist(struct sock *sk);
206 static u16 packet_pick_tx_queue(struct sk_buff *skb);
207
208 struct packet_skb_cb {
209 union {
210 struct sockaddr_pkt pkt;
211 union {
212
213
214
215
216 unsigned int origlen;
217 struct sockaddr_ll ll;
218 };
219 } sa;
220 };
221
222 #define vio_le() virtio_legacy_is_little_endian()
223
224 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
225
226 #define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
227 #define GET_PBLOCK_DESC(x, bid) \
228 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
229 #define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
230 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
231 #define GET_NEXT_PRB_BLK_NUM(x) \
232 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
233 ((x)->kactive_blk_num+1) : 0)
234
235 static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
236 static void __fanout_link(struct sock *sk, struct packet_sock *po);
237
238 static int packet_direct_xmit(struct sk_buff *skb)
239 {
240 return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
241 }
242
243 static struct net_device *packet_cached_dev_get(struct packet_sock *po)
244 {
245 struct net_device *dev;
246
247 rcu_read_lock();
248 dev = rcu_dereference(po->cached_dev);
249 if (likely(dev))
250 dev_hold(dev);
251 rcu_read_unlock();
252
253 return dev;
254 }
255
256 static void packet_cached_dev_assign(struct packet_sock *po,
257 struct net_device *dev)
258 {
259 rcu_assign_pointer(po->cached_dev, dev);
260 }
261
262 static void packet_cached_dev_reset(struct packet_sock *po)
263 {
264 RCU_INIT_POINTER(po->cached_dev, NULL);
265 }
266
267 static bool packet_use_direct_xmit(const struct packet_sock *po)
268 {
269 return po->xmit == packet_direct_xmit;
270 }
271
272 static u16 packet_pick_tx_queue(struct sk_buff *skb)
273 {
274 struct net_device *dev = skb->dev;
275 const struct net_device_ops *ops = dev->netdev_ops;
276 int cpu = raw_smp_processor_id();
277 u16 queue_index;
278
279 #ifdef CONFIG_XPS
280 skb->sender_cpu = cpu + 1;
281 #endif
282 skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
283 if (ops->ndo_select_queue) {
284 queue_index = ops->ndo_select_queue(dev, skb, NULL);
285 queue_index = netdev_cap_txqueue(dev, queue_index);
286 } else {
287 queue_index = netdev_pick_tx(dev, skb, NULL);
288 }
289
290 return queue_index;
291 }
292
293
294
295
296
297 static void __register_prot_hook(struct sock *sk)
298 {
299 struct packet_sock *po = pkt_sk(sk);
300
301 if (!po->running) {
302 if (po->fanout)
303 __fanout_link(sk, po);
304 else
305 dev_add_pack(&po->prot_hook);
306
307 sock_hold(sk);
308 po->running = 1;
309 }
310 }
311
312 static void register_prot_hook(struct sock *sk)
313 {
314 lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
315 __register_prot_hook(sk);
316 }
317
318
319
320
321
322
323
324 static void __unregister_prot_hook(struct sock *sk, bool sync)
325 {
326 struct packet_sock *po = pkt_sk(sk);
327
328 lockdep_assert_held_once(&po->bind_lock);
329
330 po->running = 0;
331
332 if (po->fanout)
333 __fanout_unlink(sk, po);
334 else
335 __dev_remove_pack(&po->prot_hook);
336
337 __sock_put(sk);
338
339 if (sync) {
340 spin_unlock(&po->bind_lock);
341 synchronize_net();
342 spin_lock(&po->bind_lock);
343 }
344 }
345
346 static void unregister_prot_hook(struct sock *sk, bool sync)
347 {
348 struct packet_sock *po = pkt_sk(sk);
349
350 if (po->running)
351 __unregister_prot_hook(sk, sync);
352 }
353
354 static inline struct page * __pure pgv_to_page(void *addr)
355 {
356 if (is_vmalloc_addr(addr))
357 return vmalloc_to_page(addr);
358 return virt_to_page(addr);
359 }
360
361 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
362 {
363 union tpacket_uhdr h;
364
365 h.raw = frame;
366 switch (po->tp_version) {
367 case TPACKET_V1:
368 h.h1->tp_status = status;
369 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
370 break;
371 case TPACKET_V2:
372 h.h2->tp_status = status;
373 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
374 break;
375 case TPACKET_V3:
376 h.h3->tp_status = status;
377 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
378 break;
379 default:
380 WARN(1, "TPACKET version not supported.\n");
381 BUG();
382 }
383
384 smp_wmb();
385 }
386
387 static int __packet_get_status(const struct packet_sock *po, void *frame)
388 {
389 union tpacket_uhdr h;
390
391 smp_rmb();
392
393 h.raw = frame;
394 switch (po->tp_version) {
395 case TPACKET_V1:
396 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
397 return h.h1->tp_status;
398 case TPACKET_V2:
399 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
400 return h.h2->tp_status;
401 case TPACKET_V3:
402 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
403 return h.h3->tp_status;
404 default:
405 WARN(1, "TPACKET version not supported.\n");
406 BUG();
407 return 0;
408 }
409 }
410
411 static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
412 unsigned int flags)
413 {
414 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
415
416 if (shhwtstamps &&
417 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
418 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
419 return TP_STATUS_TS_RAW_HARDWARE;
420
421 if (ktime_to_timespec_cond(skb->tstamp, ts))
422 return TP_STATUS_TS_SOFTWARE;
423
424 return 0;
425 }
426
427 static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
428 struct sk_buff *skb)
429 {
430 union tpacket_uhdr h;
431 struct timespec ts;
432 __u32 ts_status;
433
434 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
435 return 0;
436
437 h.raw = frame;
438 switch (po->tp_version) {
439 case TPACKET_V1:
440 h.h1->tp_sec = ts.tv_sec;
441 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
442 break;
443 case TPACKET_V2:
444 h.h2->tp_sec = ts.tv_sec;
445 h.h2->tp_nsec = ts.tv_nsec;
446 break;
447 case TPACKET_V3:
448 h.h3->tp_sec = ts.tv_sec;
449 h.h3->tp_nsec = ts.tv_nsec;
450 break;
451 default:
452 WARN(1, "TPACKET version not supported.\n");
453 BUG();
454 }
455
456
457 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
458 smp_wmb();
459
460 return ts_status;
461 }
462
463 static void *packet_lookup_frame(const struct packet_sock *po,
464 const struct packet_ring_buffer *rb,
465 unsigned int position,
466 int status)
467 {
468 unsigned int pg_vec_pos, frame_offset;
469 union tpacket_uhdr h;
470
471 pg_vec_pos = position / rb->frames_per_block;
472 frame_offset = position % rb->frames_per_block;
473
474 h.raw = rb->pg_vec[pg_vec_pos].buffer +
475 (frame_offset * rb->frame_size);
476
477 if (status != __packet_get_status(po, h.raw))
478 return NULL;
479
480 return h.raw;
481 }
482
483 static void *packet_current_frame(struct packet_sock *po,
484 struct packet_ring_buffer *rb,
485 int status)
486 {
487 return packet_lookup_frame(po, rb, rb->head, status);
488 }
489
490 static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
491 {
492 del_timer_sync(&pkc->retire_blk_timer);
493 }
494
495 static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
496 struct sk_buff_head *rb_queue)
497 {
498 struct tpacket_kbdq_core *pkc;
499
500 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
501
502 spin_lock_bh(&rb_queue->lock);
503 pkc->delete_blk_timer = 1;
504 spin_unlock_bh(&rb_queue->lock);
505
506 prb_del_retire_blk_timer(pkc);
507 }
508
509 static void prb_setup_retire_blk_timer(struct packet_sock *po)
510 {
511 struct tpacket_kbdq_core *pkc;
512
513 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
514 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
515 0);
516 pkc->retire_blk_timer.expires = jiffies;
517 }
518
519 static int prb_calc_retire_blk_tmo(struct packet_sock *po,
520 int blk_size_in_bytes)
521 {
522 struct net_device *dev;
523 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
524 struct ethtool_link_ksettings ecmd;
525 int err;
526
527 rtnl_lock();
528 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
529 if (unlikely(!dev)) {
530 rtnl_unlock();
531 return DEFAULT_PRB_RETIRE_TOV;
532 }
533 err = __ethtool_get_link_ksettings(dev, &ecmd);
534 rtnl_unlock();
535 if (!err) {
536
537
538
539
540 if (ecmd.base.speed < SPEED_1000 ||
541 ecmd.base.speed == SPEED_UNKNOWN) {
542 return DEFAULT_PRB_RETIRE_TOV;
543 } else {
544 msec = 1;
545 div = ecmd.base.speed / 1000;
546 }
547 } else
548 return DEFAULT_PRB_RETIRE_TOV;
549
550 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
551
552 if (div)
553 mbits /= div;
554
555 tmo = mbits * msec;
556
557 if (div)
558 return tmo+1;
559 return tmo;
560 }
561
562 static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
563 union tpacket_req_u *req_u)
564 {
565 p1->feature_req_word = req_u->req3.tp_feature_req_word;
566 }
567
568 static void init_prb_bdqc(struct packet_sock *po,
569 struct packet_ring_buffer *rb,
570 struct pgv *pg_vec,
571 union tpacket_req_u *req_u)
572 {
573 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
574 struct tpacket_block_desc *pbd;
575
576 memset(p1, 0x0, sizeof(*p1));
577
578 p1->knxt_seq_num = 1;
579 p1->pkbdq = pg_vec;
580 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
581 p1->pkblk_start = pg_vec[0].buffer;
582 p1->kblk_size = req_u->req3.tp_block_size;
583 p1->knum_blocks = req_u->req3.tp_block_nr;
584 p1->hdrlen = po->tp_hdrlen;
585 p1->version = po->tp_version;
586 p1->last_kactive_blk_num = 0;
587 po->stats.stats3.tp_freeze_q_cnt = 0;
588 if (req_u->req3.tp_retire_blk_tov)
589 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
590 else
591 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
592 req_u->req3.tp_block_size);
593 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
594 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
595
596 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
597 prb_init_ft_ops(p1, req_u);
598 prb_setup_retire_blk_timer(po);
599 prb_open_block(p1, pbd);
600 }
601
602
603
604
605 static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
606 {
607 mod_timer(&pkc->retire_blk_timer,
608 jiffies + pkc->tov_in_jiffies);
609 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
610 }
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635 static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
636 {
637 struct packet_sock *po =
638 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
639 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
640 unsigned int frozen;
641 struct tpacket_block_desc *pbd;
642
643 spin_lock(&po->sk.sk_receive_queue.lock);
644
645 frozen = prb_queue_frozen(pkc);
646 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
647
648 if (unlikely(pkc->delete_blk_timer))
649 goto out;
650
651
652
653
654
655
656
657
658
659
660 if (BLOCK_NUM_PKTS(pbd)) {
661 while (atomic_read(&pkc->blk_fill_in_prog)) {
662
663 cpu_relax();
664 }
665 }
666
667 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
668 if (!frozen) {
669 if (!BLOCK_NUM_PKTS(pbd)) {
670
671 goto refresh_timer;
672 }
673 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
674 if (!prb_dispatch_next_block(pkc, po))
675 goto refresh_timer;
676 else
677 goto out;
678 } else {
679
680
681
682 if (prb_curr_blk_in_use(pbd)) {
683
684
685
686
687 goto refresh_timer;
688 } else {
689
690
691
692
693
694
695
696 prb_open_block(pkc, pbd);
697 goto out;
698 }
699 }
700 }
701
702 refresh_timer:
703 _prb_refresh_rx_retire_blk_timer(pkc);
704
705 out:
706 spin_unlock(&po->sk.sk_receive_queue.lock);
707 }
708
709 static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
710 struct tpacket_block_desc *pbd1, __u32 status)
711 {
712
713
714 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
715 u8 *start, *end;
716
717 start = (u8 *)pbd1;
718
719
720 start += PAGE_SIZE;
721
722 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
723 for (; start < end; start += PAGE_SIZE)
724 flush_dcache_page(pgv_to_page(start));
725
726 smp_wmb();
727 #endif
728
729
730
731 BLOCK_STATUS(pbd1) = status;
732
733
734
735 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
736 start = (u8 *)pbd1;
737 flush_dcache_page(pgv_to_page(start));
738
739 smp_wmb();
740 #endif
741 }
742
743
744
745
746
747
748
749
750
751
752 static void prb_close_block(struct tpacket_kbdq_core *pkc1,
753 struct tpacket_block_desc *pbd1,
754 struct packet_sock *po, unsigned int stat)
755 {
756 __u32 status = TP_STATUS_USER | stat;
757
758 struct tpacket3_hdr *last_pkt;
759 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
760 struct sock *sk = &po->sk;
761
762 if (atomic_read(&po->tp_drops))
763 status |= TP_STATUS_LOSING;
764
765 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
766 last_pkt->tp_next_offset = 0;
767
768
769 if (BLOCK_NUM_PKTS(pbd1)) {
770 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
771 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
772 } else {
773
774
775
776
777
778 struct timespec ts;
779 getnstimeofday(&ts);
780 h1->ts_last_pkt.ts_sec = ts.tv_sec;
781 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
782 }
783
784 smp_wmb();
785
786
787 prb_flush_block(pkc1, pbd1, status);
788
789 sk->sk_data_ready(sk);
790
791 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
792 }
793
794 static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
795 {
796 pkc->reset_pending_on_curr_blk = 0;
797 }
798
799
800
801
802
803
804
805
806 static void prb_open_block(struct tpacket_kbdq_core *pkc1,
807 struct tpacket_block_desc *pbd1)
808 {
809 struct timespec ts;
810 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
811
812 smp_rmb();
813
814
815
816
817
818 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
819 BLOCK_NUM_PKTS(pbd1) = 0;
820 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
821
822 getnstimeofday(&ts);
823
824 h1->ts_first_pkt.ts_sec = ts.tv_sec;
825 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
826
827 pkc1->pkblk_start = (char *)pbd1;
828 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
829
830 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
831 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
832
833 pbd1->version = pkc1->version;
834 pkc1->prev = pkc1->nxt_offset;
835 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
836
837 prb_thaw_queue(pkc1);
838 _prb_refresh_rx_retire_blk_timer(pkc1);
839
840 smp_wmb();
841 }
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866 static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
867 struct packet_sock *po)
868 {
869 pkc->reset_pending_on_curr_blk = 1;
870 po->stats.stats3.tp_freeze_q_cnt++;
871 }
872
873 #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
874
875
876
877
878
879
880
881 static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
882 struct packet_sock *po)
883 {
884 struct tpacket_block_desc *pbd;
885
886 smp_rmb();
887
888
889 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
890
891
892 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
893 prb_freeze_queue(pkc, po);
894 return NULL;
895 }
896
897
898
899
900
901
902 prb_open_block(pkc, pbd);
903 return (void *)pkc->nxt_offset;
904 }
905
906 static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
907 struct packet_sock *po, unsigned int status)
908 {
909 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
910
911
912 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
913
914
915
916
917
918
919
920
921
922 if (!(status & TP_STATUS_BLK_TMO)) {
923 while (atomic_read(&pkc->blk_fill_in_prog)) {
924
925 cpu_relax();
926 }
927 }
928 prb_close_block(pkc, pbd, po, status);
929 return;
930 }
931 }
932
933 static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
934 {
935 return TP_STATUS_USER & BLOCK_STATUS(pbd);
936 }
937
938 static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
939 {
940 return pkc->reset_pending_on_curr_blk;
941 }
942
943 static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
944 {
945 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
946 atomic_dec(&pkc->blk_fill_in_prog);
947 }
948
949 static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
950 struct tpacket3_hdr *ppd)
951 {
952 ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
953 }
954
955 static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
956 struct tpacket3_hdr *ppd)
957 {
958 ppd->hv1.tp_rxhash = 0;
959 }
960
961 static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
962 struct tpacket3_hdr *ppd)
963 {
964 if (skb_vlan_tag_present(pkc->skb)) {
965 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
966 ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
967 ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
968 } else {
969 ppd->hv1.tp_vlan_tci = 0;
970 ppd->hv1.tp_vlan_tpid = 0;
971 ppd->tp_status = TP_STATUS_AVAILABLE;
972 }
973 }
974
975 static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
976 struct tpacket3_hdr *ppd)
977 {
978 ppd->hv1.tp_padding = 0;
979 prb_fill_vlan_info(pkc, ppd);
980
981 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
982 prb_fill_rxhash(pkc, ppd);
983 else
984 prb_clear_rxhash(pkc, ppd);
985 }
986
987 static void prb_fill_curr_block(char *curr,
988 struct tpacket_kbdq_core *pkc,
989 struct tpacket_block_desc *pbd,
990 unsigned int len)
991 {
992 struct tpacket3_hdr *ppd;
993
994 ppd = (struct tpacket3_hdr *)curr;
995 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
996 pkc->prev = curr;
997 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
998 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
999 BLOCK_NUM_PKTS(pbd) += 1;
1000 atomic_inc(&pkc->blk_fill_in_prog);
1001 prb_run_all_ft_ops(pkc, ppd);
1002 }
1003
1004
1005 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1006 struct sk_buff *skb,
1007 unsigned int len
1008 )
1009 {
1010 struct tpacket_kbdq_core *pkc;
1011 struct tpacket_block_desc *pbd;
1012 char *curr, *end;
1013
1014 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1015 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1016
1017
1018 if (prb_queue_frozen(pkc)) {
1019
1020
1021
1022
1023 if (prb_curr_blk_in_use(pbd)) {
1024
1025 return NULL;
1026 } else {
1027
1028
1029
1030
1031
1032
1033 prb_open_block(pkc, pbd);
1034 }
1035 }
1036
1037 smp_mb();
1038 curr = pkc->nxt_offset;
1039 pkc->skb = skb;
1040 end = (char *)pbd + pkc->kblk_size;
1041
1042
1043 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1044 prb_fill_curr_block(curr, pkc, pbd, len);
1045 return (void *)curr;
1046 }
1047
1048
1049 prb_retire_current_block(pkc, po, 0);
1050
1051
1052 curr = (char *)prb_dispatch_next_block(pkc, po);
1053 if (curr) {
1054 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1055 prb_fill_curr_block(curr, pkc, pbd, len);
1056 return (void *)curr;
1057 }
1058
1059
1060
1061
1062
1063 return NULL;
1064 }
1065
1066 static void *packet_current_rx_frame(struct packet_sock *po,
1067 struct sk_buff *skb,
1068 int status, unsigned int len)
1069 {
1070 char *curr = NULL;
1071 switch (po->tp_version) {
1072 case TPACKET_V1:
1073 case TPACKET_V2:
1074 curr = packet_lookup_frame(po, &po->rx_ring,
1075 po->rx_ring.head, status);
1076 return curr;
1077 case TPACKET_V3:
1078 return __packet_lookup_frame_in_block(po, skb, len);
1079 default:
1080 WARN(1, "TPACKET version not supported\n");
1081 BUG();
1082 return NULL;
1083 }
1084 }
1085
1086 static void *prb_lookup_block(const struct packet_sock *po,
1087 const struct packet_ring_buffer *rb,
1088 unsigned int idx,
1089 int status)
1090 {
1091 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1092 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1093
1094 if (status != BLOCK_STATUS(pbd))
1095 return NULL;
1096 return pbd;
1097 }
1098
1099 static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1100 {
1101 unsigned int prev;
1102 if (rb->prb_bdqc.kactive_blk_num)
1103 prev = rb->prb_bdqc.kactive_blk_num-1;
1104 else
1105 prev = rb->prb_bdqc.knum_blocks-1;
1106 return prev;
1107 }
1108
1109
1110 static void *__prb_previous_block(struct packet_sock *po,
1111 struct packet_ring_buffer *rb,
1112 int status)
1113 {
1114 unsigned int previous = prb_previous_blk_num(rb);
1115 return prb_lookup_block(po, rb, previous, status);
1116 }
1117
1118 static void *packet_previous_rx_frame(struct packet_sock *po,
1119 struct packet_ring_buffer *rb,
1120 int status)
1121 {
1122 if (po->tp_version <= TPACKET_V2)
1123 return packet_previous_frame(po, rb, status);
1124
1125 return __prb_previous_block(po, rb, status);
1126 }
1127
1128 static void packet_increment_rx_head(struct packet_sock *po,
1129 struct packet_ring_buffer *rb)
1130 {
1131 switch (po->tp_version) {
1132 case TPACKET_V1:
1133 case TPACKET_V2:
1134 return packet_increment_head(rb);
1135 case TPACKET_V3:
1136 default:
1137 WARN(1, "TPACKET version not supported.\n");
1138 BUG();
1139 return;
1140 }
1141 }
1142
1143 static void *packet_previous_frame(struct packet_sock *po,
1144 struct packet_ring_buffer *rb,
1145 int status)
1146 {
1147 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1148 return packet_lookup_frame(po, rb, previous, status);
1149 }
1150
1151 static void packet_increment_head(struct packet_ring_buffer *buff)
1152 {
1153 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1154 }
1155
1156 static void packet_inc_pending(struct packet_ring_buffer *rb)
1157 {
1158 this_cpu_inc(*rb->pending_refcnt);
1159 }
1160
1161 static void packet_dec_pending(struct packet_ring_buffer *rb)
1162 {
1163 this_cpu_dec(*rb->pending_refcnt);
1164 }
1165
1166 static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1167 {
1168 unsigned int refcnt = 0;
1169 int cpu;
1170
1171
1172 if (rb->pending_refcnt == NULL)
1173 return 0;
1174
1175 for_each_possible_cpu(cpu)
1176 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1177
1178 return refcnt;
1179 }
1180
1181 static int packet_alloc_pending(struct packet_sock *po)
1182 {
1183 po->rx_ring.pending_refcnt = NULL;
1184
1185 po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1186 if (unlikely(po->tx_ring.pending_refcnt == NULL))
1187 return -ENOBUFS;
1188
1189 return 0;
1190 }
1191
1192 static void packet_free_pending(struct packet_sock *po)
1193 {
1194 free_percpu(po->tx_ring.pending_refcnt);
1195 }
1196
1197 #define ROOM_POW_OFF 2
1198 #define ROOM_NONE 0x0
1199 #define ROOM_LOW 0x1
1200 #define ROOM_NORMAL 0x2
1201
1202 static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
1203 {
1204 int idx, len;
1205
1206 len = READ_ONCE(po->rx_ring.frame_max) + 1;
1207 idx = READ_ONCE(po->rx_ring.head);
1208 if (pow_off)
1209 idx += len >> pow_off;
1210 if (idx >= len)
1211 idx -= len;
1212 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1213 }
1214
1215 static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
1216 {
1217 int idx, len;
1218
1219 len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1220 idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
1221 if (pow_off)
1222 idx += len >> pow_off;
1223 if (idx >= len)
1224 idx -= len;
1225 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1226 }
1227
1228 static int __packet_rcv_has_room(const struct packet_sock *po,
1229 const struct sk_buff *skb)
1230 {
1231 const struct sock *sk = &po->sk;
1232 int ret = ROOM_NONE;
1233
1234 if (po->prot_hook.func != tpacket_rcv) {
1235 int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1236 int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1237 - (skb ? skb->truesize : 0);
1238
1239 if (avail > (rcvbuf >> ROOM_POW_OFF))
1240 return ROOM_NORMAL;
1241 else if (avail > 0)
1242 return ROOM_LOW;
1243 else
1244 return ROOM_NONE;
1245 }
1246
1247 if (po->tp_version == TPACKET_V3) {
1248 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1249 ret = ROOM_NORMAL;
1250 else if (__tpacket_v3_has_room(po, 0))
1251 ret = ROOM_LOW;
1252 } else {
1253 if (__tpacket_has_room(po, ROOM_POW_OFF))
1254 ret = ROOM_NORMAL;
1255 else if (__tpacket_has_room(po, 0))
1256 ret = ROOM_LOW;
1257 }
1258
1259 return ret;
1260 }
1261
1262 static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1263 {
1264 int pressure, ret;
1265
1266 ret = __packet_rcv_has_room(po, skb);
1267 pressure = ret != ROOM_NORMAL;
1268
1269 if (READ_ONCE(po->pressure) != pressure)
1270 WRITE_ONCE(po->pressure, pressure);
1271
1272 return ret;
1273 }
1274
1275 static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1276 {
1277 if (READ_ONCE(po->pressure) &&
1278 __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1279 WRITE_ONCE(po->pressure, 0);
1280 }
1281
1282 static void packet_sock_destruct(struct sock *sk)
1283 {
1284 skb_queue_purge(&sk->sk_error_queue);
1285
1286 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1287 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
1288
1289 if (!sock_flag(sk, SOCK_DEAD)) {
1290 pr_err("Attempt to release alive packet socket: %p\n", sk);
1291 return;
1292 }
1293
1294 sk_refcnt_debug_dec(sk);
1295 }
1296
1297 static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1298 {
1299 u32 *history = po->rollover->history;
1300 u32 victim, rxhash;
1301 int i, count = 0;
1302
1303 rxhash = skb_get_hash(skb);
1304 for (i = 0; i < ROLLOVER_HLEN; i++)
1305 if (READ_ONCE(history[i]) == rxhash)
1306 count++;
1307
1308 victim = prandom_u32() % ROLLOVER_HLEN;
1309
1310
1311 if (READ_ONCE(history[victim]) != rxhash)
1312 WRITE_ONCE(history[victim], rxhash);
1313
1314 return count > (ROLLOVER_HLEN >> 1);
1315 }
1316
1317 static unsigned int fanout_demux_hash(struct packet_fanout *f,
1318 struct sk_buff *skb,
1319 unsigned int num)
1320 {
1321 return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1322 }
1323
1324 static unsigned int fanout_demux_lb(struct packet_fanout *f,
1325 struct sk_buff *skb,
1326 unsigned int num)
1327 {
1328 unsigned int val = atomic_inc_return(&f->rr_cur);
1329
1330 return val % num;
1331 }
1332
1333 static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1334 struct sk_buff *skb,
1335 unsigned int num)
1336 {
1337 return smp_processor_id() % num;
1338 }
1339
1340 static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1341 struct sk_buff *skb,
1342 unsigned int num)
1343 {
1344 return prandom_u32_max(num);
1345 }
1346
1347 static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1348 struct sk_buff *skb,
1349 unsigned int idx, bool try_self,
1350 unsigned int num)
1351 {
1352 struct packet_sock *po, *po_next, *po_skip = NULL;
1353 unsigned int i, j, room = ROOM_NONE;
1354
1355 po = pkt_sk(f->arr[idx]);
1356
1357 if (try_self) {
1358 room = packet_rcv_has_room(po, skb);
1359 if (room == ROOM_NORMAL ||
1360 (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1361 return idx;
1362 po_skip = po;
1363 }
1364
1365 i = j = min_t(int, po->rollover->sock, num - 1);
1366 do {
1367 po_next = pkt_sk(f->arr[i]);
1368 if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
1369 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1370 if (i != j)
1371 po->rollover->sock = i;
1372 atomic_long_inc(&po->rollover->num);
1373 if (room == ROOM_LOW)
1374 atomic_long_inc(&po->rollover->num_huge);
1375 return i;
1376 }
1377
1378 if (++i == num)
1379 i = 0;
1380 } while (i != j);
1381
1382 atomic_long_inc(&po->rollover->num_failed);
1383 return idx;
1384 }
1385
1386 static unsigned int fanout_demux_qm(struct packet_fanout *f,
1387 struct sk_buff *skb,
1388 unsigned int num)
1389 {
1390 return skb_get_queue_mapping(skb) % num;
1391 }
1392
1393 static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1394 struct sk_buff *skb,
1395 unsigned int num)
1396 {
1397 struct bpf_prog *prog;
1398 unsigned int ret = 0;
1399
1400 rcu_read_lock();
1401 prog = rcu_dereference(f->bpf_prog);
1402 if (prog)
1403 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1404 rcu_read_unlock();
1405
1406 return ret;
1407 }
1408
1409 static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1410 {
1411 return f->flags & (flag >> 8);
1412 }
1413
1414 static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1415 struct packet_type *pt, struct net_device *orig_dev)
1416 {
1417 struct packet_fanout *f = pt->af_packet_priv;
1418 unsigned int num = READ_ONCE(f->num_members);
1419 struct net *net = read_pnet(&f->net);
1420 struct packet_sock *po;
1421 unsigned int idx;
1422
1423 if (!net_eq(dev_net(dev), net) || !num) {
1424 kfree_skb(skb);
1425 return 0;
1426 }
1427
1428 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1429 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1430 if (!skb)
1431 return 0;
1432 }
1433 switch (f->type) {
1434 case PACKET_FANOUT_HASH:
1435 default:
1436 idx = fanout_demux_hash(f, skb, num);
1437 break;
1438 case PACKET_FANOUT_LB:
1439 idx = fanout_demux_lb(f, skb, num);
1440 break;
1441 case PACKET_FANOUT_CPU:
1442 idx = fanout_demux_cpu(f, skb, num);
1443 break;
1444 case PACKET_FANOUT_RND:
1445 idx = fanout_demux_rnd(f, skb, num);
1446 break;
1447 case PACKET_FANOUT_QM:
1448 idx = fanout_demux_qm(f, skb, num);
1449 break;
1450 case PACKET_FANOUT_ROLLOVER:
1451 idx = fanout_demux_rollover(f, skb, 0, false, num);
1452 break;
1453 case PACKET_FANOUT_CBPF:
1454 case PACKET_FANOUT_EBPF:
1455 idx = fanout_demux_bpf(f, skb, num);
1456 break;
1457 }
1458
1459 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1460 idx = fanout_demux_rollover(f, skb, idx, true, num);
1461
1462 po = pkt_sk(f->arr[idx]);
1463 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1464 }
1465
1466 DEFINE_MUTEX(fanout_mutex);
1467 EXPORT_SYMBOL_GPL(fanout_mutex);
1468 static LIST_HEAD(fanout_list);
1469 static u16 fanout_next_id;
1470
1471 static void __fanout_link(struct sock *sk, struct packet_sock *po)
1472 {
1473 struct packet_fanout *f = po->fanout;
1474
1475 spin_lock(&f->lock);
1476 f->arr[f->num_members] = sk;
1477 smp_wmb();
1478 f->num_members++;
1479 if (f->num_members == 1)
1480 dev_add_pack(&f->prot_hook);
1481 spin_unlock(&f->lock);
1482 }
1483
1484 static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1485 {
1486 struct packet_fanout *f = po->fanout;
1487 int i;
1488
1489 spin_lock(&f->lock);
1490 for (i = 0; i < f->num_members; i++) {
1491 if (f->arr[i] == sk)
1492 break;
1493 }
1494 BUG_ON(i >= f->num_members);
1495 f->arr[i] = f->arr[f->num_members - 1];
1496 f->num_members--;
1497 if (f->num_members == 0)
1498 __dev_remove_pack(&f->prot_hook);
1499 spin_unlock(&f->lock);
1500 }
1501
1502 static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1503 {
1504 if (sk->sk_family != PF_PACKET)
1505 return false;
1506
1507 return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1508 }
1509
1510 static void fanout_init_data(struct packet_fanout *f)
1511 {
1512 switch (f->type) {
1513 case PACKET_FANOUT_LB:
1514 atomic_set(&f->rr_cur, 0);
1515 break;
1516 case PACKET_FANOUT_CBPF:
1517 case PACKET_FANOUT_EBPF:
1518 RCU_INIT_POINTER(f->bpf_prog, NULL);
1519 break;
1520 }
1521 }
1522
1523 static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1524 {
1525 struct bpf_prog *old;
1526
1527 spin_lock(&f->lock);
1528 old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1529 rcu_assign_pointer(f->bpf_prog, new);
1530 spin_unlock(&f->lock);
1531
1532 if (old) {
1533 synchronize_net();
1534 bpf_prog_destroy(old);
1535 }
1536 }
1537
1538 static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1539 unsigned int len)
1540 {
1541 struct bpf_prog *new;
1542 struct sock_fprog fprog;
1543 int ret;
1544
1545 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1546 return -EPERM;
1547 if (len != sizeof(fprog))
1548 return -EINVAL;
1549 if (copy_from_user(&fprog, data, len))
1550 return -EFAULT;
1551
1552 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1553 if (ret)
1554 return ret;
1555
1556 __fanout_set_data_bpf(po->fanout, new);
1557 return 0;
1558 }
1559
1560 static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
1561 unsigned int len)
1562 {
1563 struct bpf_prog *new;
1564 u32 fd;
1565
1566 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1567 return -EPERM;
1568 if (len != sizeof(fd))
1569 return -EINVAL;
1570 if (copy_from_user(&fd, data, len))
1571 return -EFAULT;
1572
1573 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
1574 if (IS_ERR(new))
1575 return PTR_ERR(new);
1576
1577 __fanout_set_data_bpf(po->fanout, new);
1578 return 0;
1579 }
1580
1581 static int fanout_set_data(struct packet_sock *po, char __user *data,
1582 unsigned int len)
1583 {
1584 switch (po->fanout->type) {
1585 case PACKET_FANOUT_CBPF:
1586 return fanout_set_data_cbpf(po, data, len);
1587 case PACKET_FANOUT_EBPF:
1588 return fanout_set_data_ebpf(po, data, len);
1589 default:
1590 return -EINVAL;
1591 }
1592 }
1593
1594 static void fanout_release_data(struct packet_fanout *f)
1595 {
1596 switch (f->type) {
1597 case PACKET_FANOUT_CBPF:
1598 case PACKET_FANOUT_EBPF:
1599 __fanout_set_data_bpf(f, NULL);
1600 }
1601 }
1602
1603 static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
1604 {
1605 struct packet_fanout *f;
1606
1607 list_for_each_entry(f, &fanout_list, list) {
1608 if (f->id == candidate_id &&
1609 read_pnet(&f->net) == sock_net(sk)) {
1610 return false;
1611 }
1612 }
1613 return true;
1614 }
1615
1616 static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
1617 {
1618 u16 id = fanout_next_id;
1619
1620 do {
1621 if (__fanout_id_is_free(sk, id)) {
1622 *new_id = id;
1623 fanout_next_id = id + 1;
1624 return true;
1625 }
1626
1627 id++;
1628 } while (id != fanout_next_id);
1629
1630 return false;
1631 }
1632
1633 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1634 {
1635 struct packet_rollover *rollover = NULL;
1636 struct packet_sock *po = pkt_sk(sk);
1637 struct packet_fanout *f, *match;
1638 u8 type = type_flags & 0xff;
1639 u8 flags = type_flags >> 8;
1640 int err;
1641
1642 switch (type) {
1643 case PACKET_FANOUT_ROLLOVER:
1644 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1645 return -EINVAL;
1646 case PACKET_FANOUT_HASH:
1647 case PACKET_FANOUT_LB:
1648 case PACKET_FANOUT_CPU:
1649 case PACKET_FANOUT_RND:
1650 case PACKET_FANOUT_QM:
1651 case PACKET_FANOUT_CBPF:
1652 case PACKET_FANOUT_EBPF:
1653 break;
1654 default:
1655 return -EINVAL;
1656 }
1657
1658 mutex_lock(&fanout_mutex);
1659
1660 err = -EALREADY;
1661 if (po->fanout)
1662 goto out;
1663
1664 if (type == PACKET_FANOUT_ROLLOVER ||
1665 (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1666 err = -ENOMEM;
1667 rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
1668 if (!rollover)
1669 goto out;
1670 atomic_long_set(&rollover->num, 0);
1671 atomic_long_set(&rollover->num_huge, 0);
1672 atomic_long_set(&rollover->num_failed, 0);
1673 }
1674
1675 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
1676 if (id != 0) {
1677 err = -EINVAL;
1678 goto out;
1679 }
1680 if (!fanout_find_new_id(sk, &id)) {
1681 err = -ENOMEM;
1682 goto out;
1683 }
1684
1685 flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
1686 }
1687
1688 match = NULL;
1689 list_for_each_entry(f, &fanout_list, list) {
1690 if (f->id == id &&
1691 read_pnet(&f->net) == sock_net(sk)) {
1692 match = f;
1693 break;
1694 }
1695 }
1696 err = -EINVAL;
1697 if (match && match->flags != flags)
1698 goto out;
1699 if (!match) {
1700 err = -ENOMEM;
1701 match = kzalloc(sizeof(*match), GFP_KERNEL);
1702 if (!match)
1703 goto out;
1704 write_pnet(&match->net, sock_net(sk));
1705 match->id = id;
1706 match->type = type;
1707 match->flags = flags;
1708 INIT_LIST_HEAD(&match->list);
1709 spin_lock_init(&match->lock);
1710 refcount_set(&match->sk_ref, 0);
1711 fanout_init_data(match);
1712 match->prot_hook.type = po->prot_hook.type;
1713 match->prot_hook.dev = po->prot_hook.dev;
1714 match->prot_hook.func = packet_rcv_fanout;
1715 match->prot_hook.af_packet_priv = match;
1716 match->prot_hook.id_match = match_fanout_group;
1717 list_add(&match->list, &fanout_list);
1718 }
1719 err = -EINVAL;
1720
1721 spin_lock(&po->bind_lock);
1722 if (po->running &&
1723 match->type == type &&
1724 match->prot_hook.type == po->prot_hook.type &&
1725 match->prot_hook.dev == po->prot_hook.dev) {
1726 err = -ENOSPC;
1727 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1728 __dev_remove_pack(&po->prot_hook);
1729 po->fanout = match;
1730 po->rollover = rollover;
1731 rollover = NULL;
1732 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1733 __fanout_link(sk, po);
1734 err = 0;
1735 }
1736 }
1737 spin_unlock(&po->bind_lock);
1738
1739 if (err && !refcount_read(&match->sk_ref)) {
1740 list_del(&match->list);
1741 kfree(match);
1742 }
1743
1744 out:
1745 kfree(rollover);
1746 mutex_unlock(&fanout_mutex);
1747 return err;
1748 }
1749
1750
1751
1752
1753
1754
1755 static struct packet_fanout *fanout_release(struct sock *sk)
1756 {
1757 struct packet_sock *po = pkt_sk(sk);
1758 struct packet_fanout *f;
1759
1760 mutex_lock(&fanout_mutex);
1761 f = po->fanout;
1762 if (f) {
1763 po->fanout = NULL;
1764
1765 if (refcount_dec_and_test(&f->sk_ref))
1766 list_del(&f->list);
1767 else
1768 f = NULL;
1769 }
1770 mutex_unlock(&fanout_mutex);
1771
1772 return f;
1773 }
1774
1775 static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1776 struct sk_buff *skb)
1777 {
1778
1779
1780
1781
1782 if (unlikely(dev->type != ARPHRD_ETHER))
1783 return false;
1784
1785 skb_reset_mac_header(skb);
1786 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1787 }
1788
1789 static const struct proto_ops packet_ops;
1790
1791 static const struct proto_ops packet_ops_spkt;
1792
1793 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1794 struct packet_type *pt, struct net_device *orig_dev)
1795 {
1796 struct sock *sk;
1797 struct sockaddr_pkt *spkt;
1798
1799
1800
1801
1802
1803
1804 sk = pt->af_packet_priv;
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817 if (skb->pkt_type == PACKET_LOOPBACK)
1818 goto out;
1819
1820 if (!net_eq(dev_net(dev), sock_net(sk)))
1821 goto out;
1822
1823 skb = skb_share_check(skb, GFP_ATOMIC);
1824 if (skb == NULL)
1825 goto oom;
1826
1827
1828 skb_dst_drop(skb);
1829
1830
1831 nf_reset_ct(skb);
1832
1833 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1834
1835 skb_push(skb, skb->data - skb_mac_header(skb));
1836
1837
1838
1839
1840
1841 spkt->spkt_family = dev->type;
1842 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1843 spkt->spkt_protocol = skb->protocol;
1844
1845
1846
1847
1848
1849
1850 if (sock_queue_rcv_skb(sk, skb) == 0)
1851 return 0;
1852
1853 out:
1854 kfree_skb(skb);
1855 oom:
1856 return 0;
1857 }
1858
1859 static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1860 {
1861 if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1862 sock->type == SOCK_RAW) {
1863 skb_reset_mac_header(skb);
1864 skb->protocol = dev_parse_header_protocol(skb);
1865 }
1866
1867 skb_probe_transport_header(skb);
1868 }
1869
1870
1871
1872
1873
1874
1875 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
1876 size_t len)
1877 {
1878 struct sock *sk = sock->sk;
1879 DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
1880 struct sk_buff *skb = NULL;
1881 struct net_device *dev;
1882 struct sockcm_cookie sockc;
1883 __be16 proto = 0;
1884 int err;
1885 int extra_len = 0;
1886
1887
1888
1889
1890
1891 if (saddr) {
1892 if (msg->msg_namelen < sizeof(struct sockaddr))
1893 return -EINVAL;
1894 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1895 proto = saddr->spkt_protocol;
1896 } else
1897 return -ENOTCONN;
1898
1899
1900
1901
1902
1903 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1904 retry:
1905 rcu_read_lock();
1906 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1907 err = -ENODEV;
1908 if (dev == NULL)
1909 goto out_unlock;
1910
1911 err = -ENETDOWN;
1912 if (!(dev->flags & IFF_UP))
1913 goto out_unlock;
1914
1915
1916
1917
1918
1919
1920 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1921 if (!netif_supports_nofcs(dev)) {
1922 err = -EPROTONOSUPPORT;
1923 goto out_unlock;
1924 }
1925 extra_len = 4;
1926 }
1927
1928 err = -EMSGSIZE;
1929 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1930 goto out_unlock;
1931
1932 if (!skb) {
1933 size_t reserved = LL_RESERVED_SPACE(dev);
1934 int tlen = dev->needed_tailroom;
1935 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1936
1937 rcu_read_unlock();
1938 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1939 if (skb == NULL)
1940 return -ENOBUFS;
1941
1942
1943
1944
1945 skb_reserve(skb, reserved);
1946 skb_reset_network_header(skb);
1947
1948
1949 if (hhlen) {
1950 skb->data -= hhlen;
1951 skb->tail -= hhlen;
1952 if (len < hhlen)
1953 skb_reset_network_header(skb);
1954 }
1955 err = memcpy_from_msg(skb_put(skb, len), msg, len);
1956 if (err)
1957 goto out_free;
1958 goto retry;
1959 }
1960
1961 if (!dev_validate_header(dev, skb->data, len)) {
1962 err = -EINVAL;
1963 goto out_unlock;
1964 }
1965 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1966 !packet_extra_vlan_len_allowed(dev, skb)) {
1967 err = -EMSGSIZE;
1968 goto out_unlock;
1969 }
1970
1971 sockcm_init(&sockc, sk);
1972 if (msg->msg_controllen) {
1973 err = sock_cmsg_send(sk, msg, &sockc);
1974 if (unlikely(err))
1975 goto out_unlock;
1976 }
1977
1978 skb->protocol = proto;
1979 skb->dev = dev;
1980 skb->priority = sk->sk_priority;
1981 skb->mark = sk->sk_mark;
1982 skb->tstamp = sockc.transmit_time;
1983
1984 skb_setup_tx_timestamp(skb, sockc.tsflags);
1985
1986 if (unlikely(extra_len == 4))
1987 skb->no_fcs = 1;
1988
1989 packet_parse_headers(skb, sock);
1990
1991 dev_queue_xmit(skb);
1992 rcu_read_unlock();
1993 return len;
1994
1995 out_unlock:
1996 rcu_read_unlock();
1997 out_free:
1998 kfree_skb(skb);
1999 return err;
2000 }
2001
2002 static unsigned int run_filter(struct sk_buff *skb,
2003 const struct sock *sk,
2004 unsigned int res)
2005 {
2006 struct sk_filter *filter;
2007
2008 rcu_read_lock();
2009 filter = rcu_dereference(sk->sk_filter);
2010 if (filter != NULL)
2011 res = bpf_prog_run_clear_cb(filter->prog, skb);
2012 rcu_read_unlock();
2013
2014 return res;
2015 }
2016
2017 static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2018 size_t *len)
2019 {
2020 struct virtio_net_hdr vnet_hdr;
2021
2022 if (*len < sizeof(vnet_hdr))
2023 return -EINVAL;
2024 *len -= sizeof(vnet_hdr);
2025
2026 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
2027 return -EINVAL;
2028
2029 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2030 }
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
2045 struct packet_type *pt, struct net_device *orig_dev)
2046 {
2047 struct sock *sk;
2048 struct sockaddr_ll *sll;
2049 struct packet_sock *po;
2050 u8 *skb_head = skb->data;
2051 int skb_len = skb->len;
2052 unsigned int snaplen, res;
2053 bool is_drop_n_account = false;
2054
2055 if (skb->pkt_type == PACKET_LOOPBACK)
2056 goto drop;
2057
2058 sk = pt->af_packet_priv;
2059 po = pkt_sk(sk);
2060
2061 if (!net_eq(dev_net(dev), sock_net(sk)))
2062 goto drop;
2063
2064 skb->dev = dev;
2065
2066 if (dev->header_ops) {
2067
2068
2069
2070
2071
2072
2073
2074 if (sk->sk_type != SOCK_DGRAM)
2075 skb_push(skb, skb->data - skb_mac_header(skb));
2076 else if (skb->pkt_type == PACKET_OUTGOING) {
2077
2078 skb_pull(skb, skb_network_offset(skb));
2079 }
2080 }
2081
2082 snaplen = skb->len;
2083
2084 res = run_filter(skb, sk, snaplen);
2085 if (!res)
2086 goto drop_n_restore;
2087 if (snaplen > res)
2088 snaplen = res;
2089
2090 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2091 goto drop_n_acct;
2092
2093 if (skb_shared(skb)) {
2094 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2095 if (nskb == NULL)
2096 goto drop_n_acct;
2097
2098 if (skb_head != skb->data) {
2099 skb->data = skb_head;
2100 skb->len = skb_len;
2101 }
2102 consume_skb(skb);
2103 skb = nskb;
2104 }
2105
2106 sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
2107
2108 sll = &PACKET_SKB_CB(skb)->sa.ll;
2109 sll->sll_hatype = dev->type;
2110 sll->sll_pkttype = skb->pkt_type;
2111 if (unlikely(po->origdev))
2112 sll->sll_ifindex = orig_dev->ifindex;
2113 else
2114 sll->sll_ifindex = dev->ifindex;
2115
2116 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2117
2118
2119
2120
2121 PACKET_SKB_CB(skb)->sa.origlen = skb->len;
2122
2123 if (pskb_trim(skb, snaplen))
2124 goto drop_n_acct;
2125
2126 skb_set_owner_r(skb, sk);
2127 skb->dev = NULL;
2128 skb_dst_drop(skb);
2129
2130
2131 nf_reset_ct(skb);
2132
2133 spin_lock(&sk->sk_receive_queue.lock);
2134 po->stats.stats1.tp_packets++;
2135 sock_skb_set_dropcount(sk, skb);
2136 __skb_queue_tail(&sk->sk_receive_queue, skb);
2137 spin_unlock(&sk->sk_receive_queue.lock);
2138 sk->sk_data_ready(sk);
2139 return 0;
2140
2141 drop_n_acct:
2142 is_drop_n_account = true;
2143 atomic_inc(&po->tp_drops);
2144 atomic_inc(&sk->sk_drops);
2145
2146 drop_n_restore:
2147 if (skb_head != skb->data && skb_shared(skb)) {
2148 skb->data = skb_head;
2149 skb->len = skb_len;
2150 }
2151 drop:
2152 if (!is_drop_n_account)
2153 consume_skb(skb);
2154 else
2155 kfree_skb(skb);
2156 return 0;
2157 }
2158
2159 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2160 struct packet_type *pt, struct net_device *orig_dev)
2161 {
2162 struct sock *sk;
2163 struct packet_sock *po;
2164 struct sockaddr_ll *sll;
2165 union tpacket_uhdr h;
2166 u8 *skb_head = skb->data;
2167 int skb_len = skb->len;
2168 unsigned int snaplen, res;
2169 unsigned long status = TP_STATUS_USER;
2170 unsigned short macoff, netoff, hdrlen;
2171 struct sk_buff *copy_skb = NULL;
2172 struct timespec ts;
2173 __u32 ts_status;
2174 bool is_drop_n_account = false;
2175 unsigned int slot_id = 0;
2176 bool do_vnet = false;
2177
2178
2179
2180
2181
2182 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
2183 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
2184
2185 if (skb->pkt_type == PACKET_LOOPBACK)
2186 goto drop;
2187
2188 sk = pt->af_packet_priv;
2189 po = pkt_sk(sk);
2190
2191 if (!net_eq(dev_net(dev), sock_net(sk)))
2192 goto drop;
2193
2194 if (dev->header_ops) {
2195 if (sk->sk_type != SOCK_DGRAM)
2196 skb_push(skb, skb->data - skb_mac_header(skb));
2197 else if (skb->pkt_type == PACKET_OUTGOING) {
2198
2199 skb_pull(skb, skb_network_offset(skb));
2200 }
2201 }
2202
2203 snaplen = skb->len;
2204
2205 res = run_filter(skb, sk, snaplen);
2206 if (!res)
2207 goto drop_n_restore;
2208
2209
2210 if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2211 atomic_inc(&po->tp_drops);
2212 goto drop_n_restore;
2213 }
2214
2215 if (skb->ip_summed == CHECKSUM_PARTIAL)
2216 status |= TP_STATUS_CSUMNOTREADY;
2217 else if (skb->pkt_type != PACKET_OUTGOING &&
2218 (skb->ip_summed == CHECKSUM_COMPLETE ||
2219 skb_csum_unnecessary(skb)))
2220 status |= TP_STATUS_CSUM_VALID;
2221
2222 if (snaplen > res)
2223 snaplen = res;
2224
2225 if (sk->sk_type == SOCK_DGRAM) {
2226 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
2227 po->tp_reserve;
2228 } else {
2229 unsigned int maclen = skb_network_offset(skb);
2230 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2231 (maclen < 16 ? 16 : maclen)) +
2232 po->tp_reserve;
2233 if (po->has_vnet_hdr) {
2234 netoff += sizeof(struct virtio_net_hdr);
2235 do_vnet = true;
2236 }
2237 macoff = netoff - maclen;
2238 }
2239 if (po->tp_version <= TPACKET_V2) {
2240 if (macoff + snaplen > po->rx_ring.frame_size) {
2241 if (po->copy_thresh &&
2242 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
2243 if (skb_shared(skb)) {
2244 copy_skb = skb_clone(skb, GFP_ATOMIC);
2245 } else {
2246 copy_skb = skb_get(skb);
2247 skb_head = skb->data;
2248 }
2249 if (copy_skb)
2250 skb_set_owner_r(copy_skb, sk);
2251 }
2252 snaplen = po->rx_ring.frame_size - macoff;
2253 if ((int)snaplen < 0) {
2254 snaplen = 0;
2255 do_vnet = false;
2256 }
2257 }
2258 } else if (unlikely(macoff + snaplen >
2259 GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
2260 u32 nval;
2261
2262 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
2263 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
2264 snaplen, nval, macoff);
2265 snaplen = nval;
2266 if (unlikely((int)snaplen < 0)) {
2267 snaplen = 0;
2268 macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
2269 do_vnet = false;
2270 }
2271 }
2272 spin_lock(&sk->sk_receive_queue.lock);
2273 h.raw = packet_current_rx_frame(po, skb,
2274 TP_STATUS_KERNEL, (macoff+snaplen));
2275 if (!h.raw)
2276 goto drop_n_account;
2277
2278 if (po->tp_version <= TPACKET_V2) {
2279 slot_id = po->rx_ring.head;
2280 if (test_bit(slot_id, po->rx_ring.rx_owner_map))
2281 goto drop_n_account;
2282 __set_bit(slot_id, po->rx_ring.rx_owner_map);
2283 }
2284
2285 if (do_vnet &&
2286 virtio_net_hdr_from_skb(skb, h.raw + macoff -
2287 sizeof(struct virtio_net_hdr),
2288 vio_le(), true, 0))
2289 goto drop_n_account;
2290
2291 if (po->tp_version <= TPACKET_V2) {
2292 packet_increment_rx_head(po, &po->rx_ring);
2293
2294
2295
2296
2297
2298
2299 if (atomic_read(&po->tp_drops))
2300 status |= TP_STATUS_LOSING;
2301 }
2302
2303 po->stats.stats1.tp_packets++;
2304 if (copy_skb) {
2305 status |= TP_STATUS_COPY;
2306 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
2307 }
2308 spin_unlock(&sk->sk_receive_queue.lock);
2309
2310 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2311
2312 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
2313 getnstimeofday(&ts);
2314
2315 status |= ts_status;
2316
2317 switch (po->tp_version) {
2318 case TPACKET_V1:
2319 h.h1->tp_len = skb->len;
2320 h.h1->tp_snaplen = snaplen;
2321 h.h1->tp_mac = macoff;
2322 h.h1->tp_net = netoff;
2323 h.h1->tp_sec = ts.tv_sec;
2324 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
2325 hdrlen = sizeof(*h.h1);
2326 break;
2327 case TPACKET_V2:
2328 h.h2->tp_len = skb->len;
2329 h.h2->tp_snaplen = snaplen;
2330 h.h2->tp_mac = macoff;
2331 h.h2->tp_net = netoff;
2332 h.h2->tp_sec = ts.tv_sec;
2333 h.h2->tp_nsec = ts.tv_nsec;
2334 if (skb_vlan_tag_present(skb)) {
2335 h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
2336 h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
2337 status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
2338 } else {
2339 h.h2->tp_vlan_tci = 0;
2340 h.h2->tp_vlan_tpid = 0;
2341 }
2342 memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
2343 hdrlen = sizeof(*h.h2);
2344 break;
2345 case TPACKET_V3:
2346
2347
2348
2349 h.h3->tp_status |= status;
2350 h.h3->tp_len = skb->len;
2351 h.h3->tp_snaplen = snaplen;
2352 h.h3->tp_mac = macoff;
2353 h.h3->tp_net = netoff;
2354 h.h3->tp_sec = ts.tv_sec;
2355 h.h3->tp_nsec = ts.tv_nsec;
2356 memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
2357 hdrlen = sizeof(*h.h3);
2358 break;
2359 default:
2360 BUG();
2361 }
2362
2363 sll = h.raw + TPACKET_ALIGN(hdrlen);
2364 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2365 sll->sll_family = AF_PACKET;
2366 sll->sll_hatype = dev->type;
2367 sll->sll_protocol = skb->protocol;
2368 sll->sll_pkttype = skb->pkt_type;
2369 if (unlikely(po->origdev))
2370 sll->sll_ifindex = orig_dev->ifindex;
2371 else
2372 sll->sll_ifindex = dev->ifindex;
2373
2374 smp_mb();
2375
2376 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
2377 if (po->tp_version <= TPACKET_V2) {
2378 u8 *start, *end;
2379
2380 end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
2381 macoff + snaplen);
2382
2383 for (start = h.raw; start < end; start += PAGE_SIZE)
2384 flush_dcache_page(pgv_to_page(start));
2385 }
2386 smp_wmb();
2387 #endif
2388
2389 if (po->tp_version <= TPACKET_V2) {
2390 spin_lock(&sk->sk_receive_queue.lock);
2391 __packet_set_status(po, h.raw, status);
2392 __clear_bit(slot_id, po->rx_ring.rx_owner_map);
2393 spin_unlock(&sk->sk_receive_queue.lock);
2394 sk->sk_data_ready(sk);
2395 } else {
2396 prb_clear_blk_fill_status(&po->rx_ring);
2397 }
2398
2399 drop_n_restore:
2400 if (skb_head != skb->data && skb_shared(skb)) {
2401 skb->data = skb_head;
2402 skb->len = skb_len;
2403 }
2404 drop:
2405 if (!is_drop_n_account)
2406 consume_skb(skb);
2407 else
2408 kfree_skb(skb);
2409 return 0;
2410
2411 drop_n_account:
2412 spin_unlock(&sk->sk_receive_queue.lock);
2413 atomic_inc(&po->tp_drops);
2414 is_drop_n_account = true;
2415
2416 sk->sk_data_ready(sk);
2417 kfree_skb(copy_skb);
2418 goto drop_n_restore;
2419 }
2420
2421 static void tpacket_destruct_skb(struct sk_buff *skb)
2422 {
2423 struct packet_sock *po = pkt_sk(skb->sk);
2424
2425 if (likely(po->tx_ring.pg_vec)) {
2426 void *ph;
2427 __u32 ts;
2428
2429 ph = skb_zcopy_get_nouarg(skb);
2430 packet_dec_pending(&po->tx_ring);
2431
2432 ts = __packet_set_timestamp(po, ph, skb);
2433 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2434
2435 if (!packet_read_pending(&po->tx_ring))
2436 complete(&po->skb_completion);
2437 }
2438
2439 sock_wfree(skb);
2440 }
2441
2442 static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2443 {
2444 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2445 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2446 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2447 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2448 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2449 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2450 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2451
2452 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2453 return -EINVAL;
2454
2455 return 0;
2456 }
2457
2458 static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2459 struct virtio_net_hdr *vnet_hdr)
2460 {
2461 if (*len < sizeof(*vnet_hdr))
2462 return -EINVAL;
2463 *len -= sizeof(*vnet_hdr);
2464
2465 if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2466 return -EFAULT;
2467
2468 return __packet_snd_vnet_parse(vnet_hdr, *len);
2469 }
2470
2471 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2472 void *frame, struct net_device *dev, void *data, int tp_len,
2473 __be16 proto, unsigned char *addr, int hlen, int copylen,
2474 const struct sockcm_cookie *sockc)
2475 {
2476 union tpacket_uhdr ph;
2477 int to_write, offset, len, nr_frags, len_max;
2478 struct socket *sock = po->sk.sk_socket;
2479 struct page *page;
2480 int err;
2481
2482 ph.raw = frame;
2483
2484 skb->protocol = proto;
2485 skb->dev = dev;
2486 skb->priority = po->sk.sk_priority;
2487 skb->mark = po->sk.sk_mark;
2488 skb->tstamp = sockc->transmit_time;
2489 skb_setup_tx_timestamp(skb, sockc->tsflags);
2490 skb_zcopy_set_nouarg(skb, ph.raw);
2491
2492 skb_reserve(skb, hlen);
2493 skb_reset_network_header(skb);
2494
2495 to_write = tp_len;
2496
2497 if (sock->type == SOCK_DGRAM) {
2498 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2499 NULL, tp_len);
2500 if (unlikely(err < 0))
2501 return -EINVAL;
2502 } else if (copylen) {
2503 int hdrlen = min_t(int, copylen, tp_len);
2504
2505 skb_push(skb, dev->hard_header_len);
2506 skb_put(skb, copylen - dev->hard_header_len);
2507 err = skb_store_bits(skb, 0, data, hdrlen);
2508 if (unlikely(err))
2509 return err;
2510 if (!dev_validate_header(dev, skb->data, hdrlen))
2511 return -EINVAL;
2512
2513 data += hdrlen;
2514 to_write -= hdrlen;
2515 }
2516
2517 offset = offset_in_page(data);
2518 len_max = PAGE_SIZE - offset;
2519 len = ((to_write > len_max) ? len_max : to_write);
2520
2521 skb->data_len = to_write;
2522 skb->len += to_write;
2523 skb->truesize += to_write;
2524 refcount_add(to_write, &po->sk.sk_wmem_alloc);
2525
2526 while (likely(to_write)) {
2527 nr_frags = skb_shinfo(skb)->nr_frags;
2528
2529 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2530 pr_err("Packet exceed the number of skb frags(%lu)\n",
2531 MAX_SKB_FRAGS);
2532 return -EFAULT;
2533 }
2534
2535 page = pgv_to_page(data);
2536 data += len;
2537 flush_dcache_page(page);
2538 get_page(page);
2539 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2540 to_write -= len;
2541 offset = 0;
2542 len_max = PAGE_SIZE;
2543 len = ((to_write > len_max) ? len_max : to_write);
2544 }
2545
2546 packet_parse_headers(skb, sock);
2547
2548 return tp_len;
2549 }
2550
2551 static int tpacket_parse_header(struct packet_sock *po, void *frame,
2552 int size_max, void **data)
2553 {
2554 union tpacket_uhdr ph;
2555 int tp_len, off;
2556
2557 ph.raw = frame;
2558
2559 switch (po->tp_version) {
2560 case TPACKET_V3:
2561 if (ph.h3->tp_next_offset != 0) {
2562 pr_warn_once("variable sized slot not supported");
2563 return -EINVAL;
2564 }
2565 tp_len = ph.h3->tp_len;
2566 break;
2567 case TPACKET_V2:
2568 tp_len = ph.h2->tp_len;
2569 break;
2570 default:
2571 tp_len = ph.h1->tp_len;
2572 break;
2573 }
2574 if (unlikely(tp_len > size_max)) {
2575 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2576 return -EMSGSIZE;
2577 }
2578
2579 if (unlikely(po->tp_tx_has_off)) {
2580 int off_min, off_max;
2581
2582 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2583 off_max = po->tx_ring.frame_size - tp_len;
2584 if (po->sk.sk_type == SOCK_DGRAM) {
2585 switch (po->tp_version) {
2586 case TPACKET_V3:
2587 off = ph.h3->tp_net;
2588 break;
2589 case TPACKET_V2:
2590 off = ph.h2->tp_net;
2591 break;
2592 default:
2593 off = ph.h1->tp_net;
2594 break;
2595 }
2596 } else {
2597 switch (po->tp_version) {
2598 case TPACKET_V3:
2599 off = ph.h3->tp_mac;
2600 break;
2601 case TPACKET_V2:
2602 off = ph.h2->tp_mac;
2603 break;
2604 default:
2605 off = ph.h1->tp_mac;
2606 break;
2607 }
2608 }
2609 if (unlikely((off < off_min) || (off_max < off)))
2610 return -EINVAL;
2611 } else {
2612 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2613 }
2614
2615 *data = frame + off;
2616 return tp_len;
2617 }
2618
2619 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2620 {
2621 struct sk_buff *skb = NULL;
2622 struct net_device *dev;
2623 struct virtio_net_hdr *vnet_hdr = NULL;
2624 struct sockcm_cookie sockc;
2625 __be16 proto;
2626 int err, reserve = 0;
2627 void *ph;
2628 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2629 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2630 unsigned char *addr = NULL;
2631 int tp_len, size_max;
2632 void *data;
2633 int len_sum = 0;
2634 int status = TP_STATUS_AVAILABLE;
2635 int hlen, tlen, copylen = 0;
2636 long timeo = 0;
2637
2638 mutex_lock(&po->pg_vec_lock);
2639
2640
2641
2642
2643 if (unlikely(!po->tx_ring.pg_vec)) {
2644 err = -EBUSY;
2645 goto out;
2646 }
2647 if (likely(saddr == NULL)) {
2648 dev = packet_cached_dev_get(po);
2649 proto = po->num;
2650 } else {
2651 err = -EINVAL;
2652 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2653 goto out;
2654 if (msg->msg_namelen < (saddr->sll_halen
2655 + offsetof(struct sockaddr_ll,
2656 sll_addr)))
2657 goto out;
2658 proto = saddr->sll_protocol;
2659 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2660 if (po->sk.sk_socket->type == SOCK_DGRAM) {
2661 if (dev && msg->msg_namelen < dev->addr_len +
2662 offsetof(struct sockaddr_ll, sll_addr))
2663 goto out_put;
2664 addr = saddr->sll_addr;
2665 }
2666 }
2667
2668 err = -ENXIO;
2669 if (unlikely(dev == NULL))
2670 goto out;
2671 err = -ENETDOWN;
2672 if (unlikely(!(dev->flags & IFF_UP)))
2673 goto out_put;
2674
2675 sockcm_init(&sockc, &po->sk);
2676 if (msg->msg_controllen) {
2677 err = sock_cmsg_send(&po->sk, msg, &sockc);
2678 if (unlikely(err))
2679 goto out_put;
2680 }
2681
2682 if (po->sk.sk_socket->type == SOCK_RAW)
2683 reserve = dev->hard_header_len;
2684 size_max = po->tx_ring.frame_size
2685 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2686
2687 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2688 size_max = dev->mtu + reserve + VLAN_HLEN;
2689
2690 reinit_completion(&po->skb_completion);
2691
2692 do {
2693 ph = packet_current_frame(po, &po->tx_ring,
2694 TP_STATUS_SEND_REQUEST);
2695 if (unlikely(ph == NULL)) {
2696 if (need_wait && skb) {
2697 timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
2698 timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
2699 if (timeo <= 0) {
2700 err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
2701 goto out_put;
2702 }
2703 }
2704
2705 continue;
2706 }
2707
2708 skb = NULL;
2709 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2710 if (tp_len < 0)
2711 goto tpacket_error;
2712
2713 status = TP_STATUS_SEND_REQUEST;
2714 hlen = LL_RESERVED_SPACE(dev);
2715 tlen = dev->needed_tailroom;
2716 if (po->has_vnet_hdr) {
2717 vnet_hdr = data;
2718 data += sizeof(*vnet_hdr);
2719 tp_len -= sizeof(*vnet_hdr);
2720 if (tp_len < 0 ||
2721 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2722 tp_len = -EINVAL;
2723 goto tpacket_error;
2724 }
2725 copylen = __virtio16_to_cpu(vio_le(),
2726 vnet_hdr->hdr_len);
2727 }
2728 copylen = max_t(int, copylen, dev->hard_header_len);
2729 skb = sock_alloc_send_skb(&po->sk,
2730 hlen + tlen + sizeof(struct sockaddr_ll) +
2731 (copylen - dev->hard_header_len),
2732 !need_wait, &err);
2733
2734 if (unlikely(skb == NULL)) {
2735
2736 if (likely(len_sum > 0))
2737 err = len_sum;
2738 goto out_status;
2739 }
2740 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2741 addr, hlen, copylen, &sockc);
2742 if (likely(tp_len >= 0) &&
2743 tp_len > dev->mtu + reserve &&
2744 !po->has_vnet_hdr &&
2745 !packet_extra_vlan_len_allowed(dev, skb))
2746 tp_len = -EMSGSIZE;
2747
2748 if (unlikely(tp_len < 0)) {
2749 tpacket_error:
2750 if (po->tp_loss) {
2751 __packet_set_status(po, ph,
2752 TP_STATUS_AVAILABLE);
2753 packet_increment_head(&po->tx_ring);
2754 kfree_skb(skb);
2755 continue;
2756 } else {
2757 status = TP_STATUS_WRONG_FORMAT;
2758 err = tp_len;
2759 goto out_status;
2760 }
2761 }
2762
2763 if (po->has_vnet_hdr) {
2764 if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
2765 tp_len = -EINVAL;
2766 goto tpacket_error;
2767 }
2768 virtio_net_hdr_set_proto(skb, vnet_hdr);
2769 }
2770
2771 skb->destructor = tpacket_destruct_skb;
2772 __packet_set_status(po, ph, TP_STATUS_SENDING);
2773 packet_inc_pending(&po->tx_ring);
2774
2775 status = TP_STATUS_SEND_REQUEST;
2776 err = po->xmit(skb);
2777 if (unlikely(err > 0)) {
2778 err = net_xmit_errno(err);
2779 if (err && __packet_get_status(po, ph) ==
2780 TP_STATUS_AVAILABLE) {
2781
2782 skb = NULL;
2783 goto out_status;
2784 }
2785
2786
2787
2788
2789 err = 0;
2790 }
2791 packet_increment_head(&po->tx_ring);
2792 len_sum += tp_len;
2793 } while (likely((ph != NULL) ||
2794
2795
2796
2797
2798
2799
2800 (need_wait && packet_read_pending(&po->tx_ring))));
2801
2802 err = len_sum;
2803 goto out_put;
2804
2805 out_status:
2806 __packet_set_status(po, ph, status);
2807 kfree_skb(skb);
2808 out_put:
2809 dev_put(dev);
2810 out:
2811 mutex_unlock(&po->pg_vec_lock);
2812 return err;
2813 }
2814
2815 static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2816 size_t reserve, size_t len,
2817 size_t linear, int noblock,
2818 int *err)
2819 {
2820 struct sk_buff *skb;
2821
2822
2823 if (prepad + len < PAGE_SIZE || !linear)
2824 linear = len;
2825
2826 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2827 err, 0);
2828 if (!skb)
2829 return NULL;
2830
2831 skb_reserve(skb, reserve);
2832 skb_put(skb, linear);
2833 skb->data_len = len - linear;
2834 skb->len += len - linear;
2835
2836 return skb;
2837 }
2838
2839 static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2840 {
2841 struct sock *sk = sock->sk;
2842 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2843 struct sk_buff *skb;
2844 struct net_device *dev;
2845 __be16 proto;
2846 unsigned char *addr = NULL;
2847 int err, reserve = 0;
2848 struct sockcm_cookie sockc;
2849 struct virtio_net_hdr vnet_hdr = { 0 };
2850 int offset = 0;
2851 struct packet_sock *po = pkt_sk(sk);
2852 bool has_vnet_hdr = false;
2853 int hlen, tlen, linear;
2854 int extra_len = 0;
2855
2856
2857
2858
2859
2860 if (likely(saddr == NULL)) {
2861 dev = packet_cached_dev_get(po);
2862 proto = po->num;
2863 } else {
2864 err = -EINVAL;
2865 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2866 goto out;
2867 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2868 goto out;
2869 proto = saddr->sll_protocol;
2870 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2871 if (sock->type == SOCK_DGRAM) {
2872 if (dev && msg->msg_namelen < dev->addr_len +
2873 offsetof(struct sockaddr_ll, sll_addr))
2874 goto out_unlock;
2875 addr = saddr->sll_addr;
2876 }
2877 }
2878
2879 err = -ENXIO;
2880 if (unlikely(dev == NULL))
2881 goto out_unlock;
2882 err = -ENETDOWN;
2883 if (unlikely(!(dev->flags & IFF_UP)))
2884 goto out_unlock;
2885
2886 sockcm_init(&sockc, sk);
2887 sockc.mark = sk->sk_mark;
2888 if (msg->msg_controllen) {
2889 err = sock_cmsg_send(sk, msg, &sockc);
2890 if (unlikely(err))
2891 goto out_unlock;
2892 }
2893
2894 if (sock->type == SOCK_RAW)
2895 reserve = dev->hard_header_len;
2896 if (po->has_vnet_hdr) {
2897 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2898 if (err)
2899 goto out_unlock;
2900 has_vnet_hdr = true;
2901 }
2902
2903 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2904 if (!netif_supports_nofcs(dev)) {
2905 err = -EPROTONOSUPPORT;
2906 goto out_unlock;
2907 }
2908 extra_len = 4;
2909 }
2910
2911 err = -EMSGSIZE;
2912 if (!vnet_hdr.gso_type &&
2913 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2914 goto out_unlock;
2915
2916 err = -ENOBUFS;
2917 hlen = LL_RESERVED_SPACE(dev);
2918 tlen = dev->needed_tailroom;
2919 linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
2920 linear = max(linear, min_t(int, len, dev->hard_header_len));
2921 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
2922 msg->msg_flags & MSG_DONTWAIT, &err);
2923 if (skb == NULL)
2924 goto out_unlock;
2925
2926 skb_reset_network_header(skb);
2927
2928 err = -EINVAL;
2929 if (sock->type == SOCK_DGRAM) {
2930 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2931 if (unlikely(offset < 0))
2932 goto out_free;
2933 } else if (reserve) {
2934 skb_reserve(skb, -reserve);
2935 if (len < reserve + sizeof(struct ipv6hdr) &&
2936 dev->min_header_len != dev->hard_header_len)
2937 skb_reset_network_header(skb);
2938 }
2939
2940
2941 err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
2942 if (err)
2943 goto out_free;
2944
2945 if (sock->type == SOCK_RAW &&
2946 !dev_validate_header(dev, skb->data, len)) {
2947 err = -EINVAL;
2948 goto out_free;
2949 }
2950
2951 skb_setup_tx_timestamp(skb, sockc.tsflags);
2952
2953 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2954 !packet_extra_vlan_len_allowed(dev, skb)) {
2955 err = -EMSGSIZE;
2956 goto out_free;
2957 }
2958
2959 skb->protocol = proto;
2960 skb->dev = dev;
2961 skb->priority = sk->sk_priority;
2962 skb->mark = sockc.mark;
2963 skb->tstamp = sockc.transmit_time;
2964
2965 if (has_vnet_hdr) {
2966 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
2967 if (err)
2968 goto out_free;
2969 len += sizeof(vnet_hdr);
2970 virtio_net_hdr_set_proto(skb, &vnet_hdr);
2971 }
2972
2973 packet_parse_headers(skb, sock);
2974
2975 if (unlikely(extra_len == 4))
2976 skb->no_fcs = 1;
2977
2978 err = po->xmit(skb);
2979 if (err > 0 && (err = net_xmit_errno(err)) != 0)
2980 goto out_unlock;
2981
2982 dev_put(dev);
2983
2984 return len;
2985
2986 out_free:
2987 kfree_skb(skb);
2988 out_unlock:
2989 if (dev)
2990 dev_put(dev);
2991 out:
2992 return err;
2993 }
2994
2995 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2996 {
2997 struct sock *sk = sock->sk;
2998 struct packet_sock *po = pkt_sk(sk);
2999
3000 if (po->tx_ring.pg_vec)
3001 return tpacket_snd(po, msg);
3002 else
3003 return packet_snd(sock, msg, len);
3004 }
3005
3006
3007
3008
3009
3010
3011 static int packet_release(struct socket *sock)
3012 {
3013 struct sock *sk = sock->sk;
3014 struct packet_sock *po;
3015 struct packet_fanout *f;
3016 struct net *net;
3017 union tpacket_req_u req_u;
3018
3019 if (!sk)
3020 return 0;
3021
3022 net = sock_net(sk);
3023 po = pkt_sk(sk);
3024
3025 mutex_lock(&net->packet.sklist_lock);
3026 sk_del_node_init_rcu(sk);
3027 mutex_unlock(&net->packet.sklist_lock);
3028
3029 preempt_disable();
3030 sock_prot_inuse_add(net, sk->sk_prot, -1);
3031 preempt_enable();
3032
3033 spin_lock(&po->bind_lock);
3034 unregister_prot_hook(sk, false);
3035 packet_cached_dev_reset(po);
3036
3037 if (po->prot_hook.dev) {
3038 dev_put(po->prot_hook.dev);
3039 po->prot_hook.dev = NULL;
3040 }
3041 spin_unlock(&po->bind_lock);
3042
3043 packet_flush_mclist(sk);
3044
3045 lock_sock(sk);
3046 if (po->rx_ring.pg_vec) {
3047 memset(&req_u, 0, sizeof(req_u));
3048 packet_set_ring(sk, &req_u, 1, 0);
3049 }
3050
3051 if (po->tx_ring.pg_vec) {
3052 memset(&req_u, 0, sizeof(req_u));
3053 packet_set_ring(sk, &req_u, 1, 1);
3054 }
3055 release_sock(sk);
3056
3057 f = fanout_release(sk);
3058
3059 synchronize_net();
3060
3061 kfree(po->rollover);
3062 if (f) {
3063 fanout_release_data(f);
3064 kfree(f);
3065 }
3066
3067
3068
3069 sock_orphan(sk);
3070 sock->sk = NULL;
3071
3072
3073
3074 skb_queue_purge(&sk->sk_receive_queue);
3075 packet_free_pending(po);
3076 sk_refcnt_debug_release(sk);
3077
3078 sock_put(sk);
3079 return 0;
3080 }
3081
3082
3083
3084
3085
3086 static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3087 __be16 proto)
3088 {
3089 struct packet_sock *po = pkt_sk(sk);
3090 struct net_device *dev_curr;
3091 __be16 proto_curr;
3092 bool need_rehook;
3093 struct net_device *dev = NULL;
3094 int ret = 0;
3095 bool unlisted = false;
3096
3097 lock_sock(sk);
3098 spin_lock(&po->bind_lock);
3099 rcu_read_lock();
3100
3101 if (po->fanout) {
3102 ret = -EINVAL;
3103 goto out_unlock;
3104 }
3105
3106 if (name) {
3107 dev = dev_get_by_name_rcu(sock_net(sk), name);
3108 if (!dev) {
3109 ret = -ENODEV;
3110 goto out_unlock;
3111 }
3112 } else if (ifindex) {
3113 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3114 if (!dev) {
3115 ret = -ENODEV;
3116 goto out_unlock;
3117 }
3118 }
3119
3120 if (dev)
3121 dev_hold(dev);
3122
3123 proto_curr = po->prot_hook.type;
3124 dev_curr = po->prot_hook.dev;
3125
3126 need_rehook = proto_curr != proto || dev_curr != dev;
3127
3128 if (need_rehook) {
3129 if (po->running) {
3130 rcu_read_unlock();
3131
3132
3133
3134 po->num = 0;
3135 __unregister_prot_hook(sk, true);
3136 rcu_read_lock();
3137 dev_curr = po->prot_hook.dev;
3138 if (dev)
3139 unlisted = !dev_get_by_index_rcu(sock_net(sk),
3140 dev->ifindex);
3141 }
3142
3143 BUG_ON(po->running);
3144 po->num = proto;
3145 po->prot_hook.type = proto;
3146
3147 if (unlikely(unlisted)) {
3148 dev_put(dev);
3149 po->prot_hook.dev = NULL;
3150 po->ifindex = -1;
3151 packet_cached_dev_reset(po);
3152 } else {
3153 po->prot_hook.dev = dev;
3154 po->ifindex = dev ? dev->ifindex : 0;
3155 packet_cached_dev_assign(po, dev);
3156 }
3157 }
3158 if (dev_curr)
3159 dev_put(dev_curr);
3160
3161 if (proto == 0 || !need_rehook)
3162 goto out_unlock;
3163
3164 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
3165 register_prot_hook(sk);
3166 } else {
3167 sk->sk_err = ENETDOWN;
3168 if (!sock_flag(sk, SOCK_DEAD))
3169 sk->sk_error_report(sk);
3170 }
3171
3172 out_unlock:
3173 rcu_read_unlock();
3174 spin_unlock(&po->bind_lock);
3175 release_sock(sk);
3176 return ret;
3177 }
3178
3179
3180
3181
3182
3183 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3184 int addr_len)
3185 {
3186 struct sock *sk = sock->sk;
3187 char name[sizeof(uaddr->sa_data) + 1];
3188
3189
3190
3191
3192
3193 if (addr_len != sizeof(struct sockaddr))
3194 return -EINVAL;
3195
3196
3197
3198 memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3199 name[sizeof(uaddr->sa_data)] = 0;
3200
3201 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3202 }
3203
3204 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3205 {
3206 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
3207 struct sock *sk = sock->sk;
3208
3209
3210
3211
3212
3213 if (addr_len < sizeof(struct sockaddr_ll))
3214 return -EINVAL;
3215 if (sll->sll_family != AF_PACKET)
3216 return -EINVAL;
3217
3218 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3219 sll->sll_protocol ? : pkt_sk(sk)->num);
3220 }
3221
3222 static struct proto packet_proto = {
3223 .name = "PACKET",
3224 .owner = THIS_MODULE,
3225 .obj_size = sizeof(struct packet_sock),
3226 };
3227
3228
3229
3230
3231
3232 static int packet_create(struct net *net, struct socket *sock, int protocol,
3233 int kern)
3234 {
3235 struct sock *sk;
3236 struct packet_sock *po;
3237 __be16 proto = (__force __be16)protocol;
3238 int err;
3239
3240 if (!ns_capable(net->user_ns, CAP_NET_RAW))
3241 return -EPERM;
3242 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
3243 sock->type != SOCK_PACKET)
3244 return -ESOCKTNOSUPPORT;
3245
3246 sock->state = SS_UNCONNECTED;
3247
3248 err = -ENOBUFS;
3249 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
3250 if (sk == NULL)
3251 goto out;
3252
3253 sock->ops = &packet_ops;
3254 if (sock->type == SOCK_PACKET)
3255 sock->ops = &packet_ops_spkt;
3256
3257 sock_init_data(sock, sk);
3258
3259 po = pkt_sk(sk);
3260 init_completion(&po->skb_completion);
3261 sk->sk_family = PF_PACKET;
3262 po->num = proto;
3263 po->xmit = dev_queue_xmit;
3264
3265 err = packet_alloc_pending(po);
3266 if (err)
3267 goto out2;
3268
3269 packet_cached_dev_reset(po);
3270
3271 sk->sk_destruct = packet_sock_destruct;
3272 sk_refcnt_debug_inc(sk);
3273
3274
3275
3276
3277
3278 spin_lock_init(&po->bind_lock);
3279 mutex_init(&po->pg_vec_lock);
3280 po->rollover = NULL;
3281 po->prot_hook.func = packet_rcv;
3282
3283 if (sock->type == SOCK_PACKET)
3284 po->prot_hook.func = packet_rcv_spkt;
3285
3286 po->prot_hook.af_packet_priv = sk;
3287
3288 if (proto) {
3289 po->prot_hook.type = proto;
3290 __register_prot_hook(sk);
3291 }
3292
3293 mutex_lock(&net->packet.sklist_lock);
3294 sk_add_node_tail_rcu(sk, &net->packet.sklist);
3295 mutex_unlock(&net->packet.sklist_lock);
3296
3297 preempt_disable();
3298 sock_prot_inuse_add(net, &packet_proto, 1);
3299 preempt_enable();
3300
3301 return 0;
3302 out2:
3303 sk_free(sk);
3304 out:
3305 return err;
3306 }
3307
3308
3309
3310
3311
3312
3313 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3314 int flags)
3315 {
3316 struct sock *sk = sock->sk;
3317 struct sk_buff *skb;
3318 int copied, err;
3319 int vnet_hdr_len = 0;
3320 unsigned int origlen = 0;
3321
3322 err = -EINVAL;
3323 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
3324 goto out;
3325
3326 #if 0
3327
3328 if (pkt_sk(sk)->ifindex < 0)
3329 return -ENODEV;
3330 #endif
3331
3332 if (flags & MSG_ERRQUEUE) {
3333 err = sock_recv_errqueue(sk, msg, len,
3334 SOL_PACKET, PACKET_TX_TIMESTAMP);
3335 goto out;
3336 }
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
3348
3349
3350
3351
3352
3353
3354
3355 if (skb == NULL)
3356 goto out;
3357
3358 packet_rcv_try_clear_pressure(pkt_sk(sk));
3359
3360 if (pkt_sk(sk)->has_vnet_hdr) {
3361 err = packet_rcv_vnet(msg, skb, &len);
3362 if (err)
3363 goto out_free;
3364 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3365 }
3366
3367
3368
3369
3370
3371 copied = skb->len;
3372 if (copied > len) {
3373 copied = len;
3374 msg->msg_flags |= MSG_TRUNC;
3375 }
3376
3377 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3378 if (err)
3379 goto out_free;
3380
3381 if (sock->type != SOCK_PACKET) {
3382 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3383
3384
3385 origlen = PACKET_SKB_CB(skb)->sa.origlen;
3386 sll->sll_family = AF_PACKET;
3387 sll->sll_protocol = skb->protocol;
3388 }
3389
3390 sock_recv_ts_and_drops(msg, sk, skb);
3391
3392 if (msg->msg_name) {
3393 int copy_len;
3394
3395
3396
3397
3398 if (sock->type == SOCK_PACKET) {
3399 __sockaddr_check_size(sizeof(struct sockaddr_pkt));
3400 msg->msg_namelen = sizeof(struct sockaddr_pkt);
3401 copy_len = msg->msg_namelen;
3402 } else {
3403 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3404
3405 msg->msg_namelen = sll->sll_halen +
3406 offsetof(struct sockaddr_ll, sll_addr);
3407 copy_len = msg->msg_namelen;
3408 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
3409 memset(msg->msg_name +
3410 offsetof(struct sockaddr_ll, sll_addr),
3411 0, sizeof(sll->sll_addr));
3412 msg->msg_namelen = sizeof(struct sockaddr_ll);
3413 }
3414 }
3415 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
3416 }
3417
3418 if (pkt_sk(sk)->auxdata) {
3419 struct tpacket_auxdata aux;
3420
3421 aux.tp_status = TP_STATUS_USER;
3422 if (skb->ip_summed == CHECKSUM_PARTIAL)
3423 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
3424 else if (skb->pkt_type != PACKET_OUTGOING &&
3425 (skb->ip_summed == CHECKSUM_COMPLETE ||
3426 skb_csum_unnecessary(skb)))
3427 aux.tp_status |= TP_STATUS_CSUM_VALID;
3428
3429 aux.tp_len = origlen;
3430 aux.tp_snaplen = skb->len;
3431 aux.tp_mac = 0;
3432 aux.tp_net = skb_network_offset(skb);
3433 if (skb_vlan_tag_present(skb)) {
3434 aux.tp_vlan_tci = skb_vlan_tag_get(skb);
3435 aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
3436 aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
3437 } else {
3438 aux.tp_vlan_tci = 0;
3439 aux.tp_vlan_tpid = 0;
3440 }
3441 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
3442 }
3443
3444
3445
3446
3447
3448 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
3449
3450 out_free:
3451 skb_free_datagram(sk, skb);
3452 out:
3453 return err;
3454 }
3455
3456 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3457 int peer)
3458 {
3459 struct net_device *dev;
3460 struct sock *sk = sock->sk;
3461
3462 if (peer)
3463 return -EOPNOTSUPP;
3464
3465 uaddr->sa_family = AF_PACKET;
3466 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3467 rcu_read_lock();
3468 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
3469 if (dev)
3470 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3471 rcu_read_unlock();
3472
3473 return sizeof(*uaddr);
3474 }
3475
3476 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3477 int peer)
3478 {
3479 struct net_device *dev;
3480 struct sock *sk = sock->sk;
3481 struct packet_sock *po = pkt_sk(sk);
3482 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3483
3484 if (peer)
3485 return -EOPNOTSUPP;
3486
3487 sll->sll_family = AF_PACKET;
3488 sll->sll_ifindex = po->ifindex;
3489 sll->sll_protocol = po->num;
3490 sll->sll_pkttype = 0;
3491 rcu_read_lock();
3492 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
3493 if (dev) {
3494 sll->sll_hatype = dev->type;
3495 sll->sll_halen = dev->addr_len;
3496 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3497 } else {
3498 sll->sll_hatype = 0;
3499 sll->sll_halen = 0;
3500 }
3501 rcu_read_unlock();
3502
3503 return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3504 }
3505
3506 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3507 int what)
3508 {
3509 switch (i->type) {
3510 case PACKET_MR_MULTICAST:
3511 if (i->alen != dev->addr_len)
3512 return -EINVAL;
3513 if (what > 0)
3514 return dev_mc_add(dev, i->addr);
3515 else
3516 return dev_mc_del(dev, i->addr);
3517 break;
3518 case PACKET_MR_PROMISC:
3519 return dev_set_promiscuity(dev, what);
3520 case PACKET_MR_ALLMULTI:
3521 return dev_set_allmulti(dev, what);
3522 case PACKET_MR_UNICAST:
3523 if (i->alen != dev->addr_len)
3524 return -EINVAL;
3525 if (what > 0)
3526 return dev_uc_add(dev, i->addr);
3527 else
3528 return dev_uc_del(dev, i->addr);
3529 break;
3530 default:
3531 break;
3532 }
3533 return 0;
3534 }
3535
3536 static void packet_dev_mclist_delete(struct net_device *dev,
3537 struct packet_mclist **mlp)
3538 {
3539 struct packet_mclist *ml;
3540
3541 while ((ml = *mlp) != NULL) {
3542 if (ml->ifindex == dev->ifindex) {
3543 packet_dev_mc(dev, ml, -1);
3544 *mlp = ml->next;
3545 kfree(ml);
3546 } else
3547 mlp = &ml->next;
3548 }
3549 }
3550
3551 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3552 {
3553 struct packet_sock *po = pkt_sk(sk);
3554 struct packet_mclist *ml, *i;
3555 struct net_device *dev;
3556 int err;
3557
3558 rtnl_lock();
3559
3560 err = -ENODEV;
3561 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3562 if (!dev)
3563 goto done;
3564
3565 err = -EINVAL;
3566 if (mreq->mr_alen > dev->addr_len)
3567 goto done;
3568
3569 err = -ENOBUFS;
3570 i = kmalloc(sizeof(*i), GFP_KERNEL);
3571 if (i == NULL)
3572 goto done;
3573
3574 err = 0;
3575 for (ml = po->mclist; ml; ml = ml->next) {
3576 if (ml->ifindex == mreq->mr_ifindex &&
3577 ml->type == mreq->mr_type &&
3578 ml->alen == mreq->mr_alen &&
3579 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3580 ml->count++;
3581
3582 kfree(i);
3583 goto done;
3584 }
3585 }
3586
3587 i->type = mreq->mr_type;
3588 i->ifindex = mreq->mr_ifindex;
3589 i->alen = mreq->mr_alen;
3590 memcpy(i->addr, mreq->mr_address, i->alen);
3591 memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3592 i->count = 1;
3593 i->next = po->mclist;
3594 po->mclist = i;
3595 err = packet_dev_mc(dev, i, 1);
3596 if (err) {
3597 po->mclist = i->next;
3598 kfree(i);
3599 }
3600
3601 done:
3602 rtnl_unlock();
3603 return err;
3604 }
3605
3606 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3607 {
3608 struct packet_mclist *ml, **mlp;
3609
3610 rtnl_lock();
3611
3612 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3613 if (ml->ifindex == mreq->mr_ifindex &&
3614 ml->type == mreq->mr_type &&
3615 ml->alen == mreq->mr_alen &&
3616 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3617 if (--ml->count == 0) {
3618 struct net_device *dev;
3619 *mlp = ml->next;
3620 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3621 if (dev)
3622 packet_dev_mc(dev, ml, -1);
3623 kfree(ml);
3624 }
3625 break;
3626 }
3627 }
3628 rtnl_unlock();
3629 return 0;
3630 }
3631
3632 static void packet_flush_mclist(struct sock *sk)
3633 {
3634 struct packet_sock *po = pkt_sk(sk);
3635 struct packet_mclist *ml;
3636
3637 if (!po->mclist)
3638 return;
3639
3640 rtnl_lock();
3641 while ((ml = po->mclist) != NULL) {
3642 struct net_device *dev;
3643
3644 po->mclist = ml->next;
3645 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3646 if (dev != NULL)
3647 packet_dev_mc(dev, ml, -1);
3648 kfree(ml);
3649 }
3650 rtnl_unlock();
3651 }
3652
3653 static int
3654 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3655 {
3656 struct sock *sk = sock->sk;
3657 struct packet_sock *po = pkt_sk(sk);
3658 int ret;
3659
3660 if (level != SOL_PACKET)
3661 return -ENOPROTOOPT;
3662
3663 switch (optname) {
3664 case PACKET_ADD_MEMBERSHIP:
3665 case PACKET_DROP_MEMBERSHIP:
3666 {
3667 struct packet_mreq_max mreq;
3668 int len = optlen;
3669 memset(&mreq, 0, sizeof(mreq));
3670 if (len < sizeof(struct packet_mreq))
3671 return -EINVAL;
3672 if (len > sizeof(mreq))
3673 len = sizeof(mreq);
3674 if (copy_from_user(&mreq, optval, len))
3675 return -EFAULT;
3676 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3677 return -EINVAL;
3678 if (optname == PACKET_ADD_MEMBERSHIP)
3679 ret = packet_mc_add(sk, &mreq);
3680 else
3681 ret = packet_mc_drop(sk, &mreq);
3682 return ret;
3683 }
3684
3685 case PACKET_RX_RING:
3686 case PACKET_TX_RING:
3687 {
3688 union tpacket_req_u req_u;
3689 int len;
3690
3691 lock_sock(sk);
3692 switch (po->tp_version) {
3693 case TPACKET_V1:
3694 case TPACKET_V2:
3695 len = sizeof(req_u.req);
3696 break;
3697 case TPACKET_V3:
3698 default:
3699 len = sizeof(req_u.req3);
3700 break;
3701 }
3702 if (optlen < len) {
3703 ret = -EINVAL;
3704 } else {
3705 if (copy_from_user(&req_u.req, optval, len))
3706 ret = -EFAULT;
3707 else
3708 ret = packet_set_ring(sk, &req_u, 0,
3709 optname == PACKET_TX_RING);
3710 }
3711 release_sock(sk);
3712 return ret;
3713 }
3714 case PACKET_COPY_THRESH:
3715 {
3716 int val;
3717
3718 if (optlen != sizeof(val))
3719 return -EINVAL;
3720 if (copy_from_user(&val, optval, sizeof(val)))
3721 return -EFAULT;
3722
3723 pkt_sk(sk)->copy_thresh = val;
3724 return 0;
3725 }
3726 case PACKET_VERSION:
3727 {
3728 int val;
3729
3730 if (optlen != sizeof(val))
3731 return -EINVAL;
3732 if (copy_from_user(&val, optval, sizeof(val)))
3733 return -EFAULT;
3734 switch (val) {
3735 case TPACKET_V1:
3736 case TPACKET_V2:
3737 case TPACKET_V3:
3738 break;
3739 default:
3740 return -EINVAL;
3741 }
3742 lock_sock(sk);
3743 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3744 ret = -EBUSY;
3745 } else {
3746 po->tp_version = val;
3747 ret = 0;
3748 }
3749 release_sock(sk);
3750 return ret;
3751 }
3752 case PACKET_RESERVE:
3753 {
3754 unsigned int val;
3755
3756 if (optlen != sizeof(val))
3757 return -EINVAL;
3758 if (copy_from_user(&val, optval, sizeof(val)))
3759 return -EFAULT;
3760 if (val > INT_MAX)
3761 return -EINVAL;
3762 lock_sock(sk);
3763 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3764 ret = -EBUSY;
3765 } else {
3766 po->tp_reserve = val;
3767 ret = 0;
3768 }
3769 release_sock(sk);
3770 return ret;
3771 }
3772 case PACKET_LOSS:
3773 {
3774 unsigned int val;
3775
3776 if (optlen != sizeof(val))
3777 return -EINVAL;
3778 if (copy_from_user(&val, optval, sizeof(val)))
3779 return -EFAULT;
3780
3781 lock_sock(sk);
3782 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3783 ret = -EBUSY;
3784 } else {
3785 po->tp_loss = !!val;
3786 ret = 0;
3787 }
3788 release_sock(sk);
3789 return ret;
3790 }
3791 case PACKET_AUXDATA:
3792 {
3793 int val;
3794
3795 if (optlen < sizeof(val))
3796 return -EINVAL;
3797 if (copy_from_user(&val, optval, sizeof(val)))
3798 return -EFAULT;
3799
3800 lock_sock(sk);
3801 po->auxdata = !!val;
3802 release_sock(sk);
3803 return 0;
3804 }
3805 case PACKET_ORIGDEV:
3806 {
3807 int val;
3808
3809 if (optlen < sizeof(val))
3810 return -EINVAL;
3811 if (copy_from_user(&val, optval, sizeof(val)))
3812 return -EFAULT;
3813
3814 lock_sock(sk);
3815 po->origdev = !!val;
3816 release_sock(sk);
3817 return 0;
3818 }
3819 case PACKET_VNET_HDR:
3820 {
3821 int val;
3822
3823 if (sock->type != SOCK_RAW)
3824 return -EINVAL;
3825 if (optlen < sizeof(val))
3826 return -EINVAL;
3827 if (copy_from_user(&val, optval, sizeof(val)))
3828 return -EFAULT;
3829
3830 lock_sock(sk);
3831 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3832 ret = -EBUSY;
3833 } else {
3834 po->has_vnet_hdr = !!val;
3835 ret = 0;
3836 }
3837 release_sock(sk);
3838 return ret;
3839 }
3840 case PACKET_TIMESTAMP:
3841 {
3842 int val;
3843
3844 if (optlen != sizeof(val))
3845 return -EINVAL;
3846 if (copy_from_user(&val, optval, sizeof(val)))
3847 return -EFAULT;
3848
3849 po->tp_tstamp = val;
3850 return 0;
3851 }
3852 case PACKET_FANOUT:
3853 {
3854 int val;
3855
3856 if (optlen != sizeof(val))
3857 return -EINVAL;
3858 if (copy_from_user(&val, optval, sizeof(val)))
3859 return -EFAULT;
3860
3861 return fanout_add(sk, val & 0xffff, val >> 16);
3862 }
3863 case PACKET_FANOUT_DATA:
3864 {
3865 if (!po->fanout)
3866 return -EINVAL;
3867
3868 return fanout_set_data(po, optval, optlen);
3869 }
3870 case PACKET_IGNORE_OUTGOING:
3871 {
3872 int val;
3873
3874 if (optlen != sizeof(val))
3875 return -EINVAL;
3876 if (copy_from_user(&val, optval, sizeof(val)))
3877 return -EFAULT;
3878 if (val < 0 || val > 1)
3879 return -EINVAL;
3880
3881 po->prot_hook.ignore_outgoing = !!val;
3882 return 0;
3883 }
3884 case PACKET_TX_HAS_OFF:
3885 {
3886 unsigned int val;
3887
3888 if (optlen != sizeof(val))
3889 return -EINVAL;
3890 if (copy_from_user(&val, optval, sizeof(val)))
3891 return -EFAULT;
3892
3893 lock_sock(sk);
3894 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3895 ret = -EBUSY;
3896 } else {
3897 po->tp_tx_has_off = !!val;
3898 ret = 0;
3899 }
3900 release_sock(sk);
3901 return 0;
3902 }
3903 case PACKET_QDISC_BYPASS:
3904 {
3905 int val;
3906
3907 if (optlen != sizeof(val))
3908 return -EINVAL;
3909 if (copy_from_user(&val, optval, sizeof(val)))
3910 return -EFAULT;
3911
3912 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
3913 return 0;
3914 }
3915 default:
3916 return -ENOPROTOOPT;
3917 }
3918 }
3919
3920 static int packet_getsockopt(struct socket *sock, int level, int optname,
3921 char __user *optval, int __user *optlen)
3922 {
3923 int len;
3924 int val, lv = sizeof(val);
3925 struct sock *sk = sock->sk;
3926 struct packet_sock *po = pkt_sk(sk);
3927 void *data = &val;
3928 union tpacket_stats_u st;
3929 struct tpacket_rollover_stats rstats;
3930 int drops;
3931
3932 if (level != SOL_PACKET)
3933 return -ENOPROTOOPT;
3934
3935 if (get_user(len, optlen))
3936 return -EFAULT;
3937
3938 if (len < 0)
3939 return -EINVAL;
3940
3941 switch (optname) {
3942 case PACKET_STATISTICS:
3943 spin_lock_bh(&sk->sk_receive_queue.lock);
3944 memcpy(&st, &po->stats, sizeof(st));
3945 memset(&po->stats, 0, sizeof(po->stats));
3946 spin_unlock_bh(&sk->sk_receive_queue.lock);
3947 drops = atomic_xchg(&po->tp_drops, 0);
3948
3949 if (po->tp_version == TPACKET_V3) {
3950 lv = sizeof(struct tpacket_stats_v3);
3951 st.stats3.tp_drops = drops;
3952 st.stats3.tp_packets += drops;
3953 data = &st.stats3;
3954 } else {
3955 lv = sizeof(struct tpacket_stats);
3956 st.stats1.tp_drops = drops;
3957 st.stats1.tp_packets += drops;
3958 data = &st.stats1;
3959 }
3960
3961 break;
3962 case PACKET_AUXDATA:
3963 val = po->auxdata;
3964 break;
3965 case PACKET_ORIGDEV:
3966 val = po->origdev;
3967 break;
3968 case PACKET_VNET_HDR:
3969 val = po->has_vnet_hdr;
3970 break;
3971 case PACKET_VERSION:
3972 val = po->tp_version;
3973 break;
3974 case PACKET_HDRLEN:
3975 if (len > sizeof(int))
3976 len = sizeof(int);
3977 if (len < sizeof(int))
3978 return -EINVAL;
3979 if (copy_from_user(&val, optval, len))
3980 return -EFAULT;
3981 switch (val) {
3982 case TPACKET_V1:
3983 val = sizeof(struct tpacket_hdr);
3984 break;
3985 case TPACKET_V2:
3986 val = sizeof(struct tpacket2_hdr);
3987 break;
3988 case TPACKET_V3:
3989 val = sizeof(struct tpacket3_hdr);
3990 break;
3991 default:
3992 return -EINVAL;
3993 }
3994 break;
3995 case PACKET_RESERVE:
3996 val = po->tp_reserve;
3997 break;
3998 case PACKET_LOSS:
3999 val = po->tp_loss;
4000 break;
4001 case PACKET_TIMESTAMP:
4002 val = po->tp_tstamp;
4003 break;
4004 case PACKET_FANOUT:
4005 val = (po->fanout ?
4006 ((u32)po->fanout->id |
4007 ((u32)po->fanout->type << 16) |
4008 ((u32)po->fanout->flags << 24)) :
4009 0);
4010 break;
4011 case PACKET_IGNORE_OUTGOING:
4012 val = po->prot_hook.ignore_outgoing;
4013 break;
4014 case PACKET_ROLLOVER_STATS:
4015 if (!po->rollover)
4016 return -EINVAL;
4017 rstats.tp_all = atomic_long_read(&po->rollover->num);
4018 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
4019 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
4020 data = &rstats;
4021 lv = sizeof(rstats);
4022 break;
4023 case PACKET_TX_HAS_OFF:
4024 val = po->tp_tx_has_off;
4025 break;
4026 case PACKET_QDISC_BYPASS:
4027 val = packet_use_direct_xmit(po);
4028 break;
4029 default:
4030 return -ENOPROTOOPT;
4031 }
4032
4033 if (len > lv)
4034 len = lv;
4035 if (put_user(len, optlen))
4036 return -EFAULT;
4037 if (copy_to_user(optval, data, len))
4038 return -EFAULT;
4039 return 0;
4040 }
4041
4042
4043 #ifdef CONFIG_COMPAT
4044 static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
4045 char __user *optval, unsigned int optlen)
4046 {
4047 struct packet_sock *po = pkt_sk(sock->sk);
4048
4049 if (level != SOL_PACKET)
4050 return -ENOPROTOOPT;
4051
4052 if (optname == PACKET_FANOUT_DATA &&
4053 po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
4054 optval = (char __user *)get_compat_bpf_fprog(optval);
4055 if (!optval)
4056 return -EFAULT;
4057 optlen = sizeof(struct sock_fprog);
4058 }
4059
4060 return packet_setsockopt(sock, level, optname, optval, optlen);
4061 }
4062 #endif
4063
4064 static int packet_notifier(struct notifier_block *this,
4065 unsigned long msg, void *ptr)
4066 {
4067 struct sock *sk;
4068 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4069 struct net *net = dev_net(dev);
4070
4071 rcu_read_lock();
4072 sk_for_each_rcu(sk, &net->packet.sklist) {
4073 struct packet_sock *po = pkt_sk(sk);
4074
4075 switch (msg) {
4076 case NETDEV_UNREGISTER:
4077 if (po->mclist)
4078 packet_dev_mclist_delete(dev, &po->mclist);
4079
4080
4081 case NETDEV_DOWN:
4082 if (dev->ifindex == po->ifindex) {
4083 spin_lock(&po->bind_lock);
4084 if (po->running) {
4085 __unregister_prot_hook(sk, false);
4086 sk->sk_err = ENETDOWN;
4087 if (!sock_flag(sk, SOCK_DEAD))
4088 sk->sk_error_report(sk);
4089 }
4090 if (msg == NETDEV_UNREGISTER) {
4091 packet_cached_dev_reset(po);
4092 po->ifindex = -1;
4093 if (po->prot_hook.dev)
4094 dev_put(po->prot_hook.dev);
4095 po->prot_hook.dev = NULL;
4096 }
4097 spin_unlock(&po->bind_lock);
4098 }
4099 break;
4100 case NETDEV_UP:
4101 if (dev->ifindex == po->ifindex) {
4102 spin_lock(&po->bind_lock);
4103 if (po->num)
4104 register_prot_hook(sk);
4105 spin_unlock(&po->bind_lock);
4106 }
4107 break;
4108 }
4109 }
4110 rcu_read_unlock();
4111 return NOTIFY_DONE;
4112 }
4113
4114
4115 static int packet_ioctl(struct socket *sock, unsigned int cmd,
4116 unsigned long arg)
4117 {
4118 struct sock *sk = sock->sk;
4119
4120 switch (cmd) {
4121 case SIOCOUTQ:
4122 {
4123 int amount = sk_wmem_alloc_get(sk);
4124
4125 return put_user(amount, (int __user *)arg);
4126 }
4127 case SIOCINQ:
4128 {
4129 struct sk_buff *skb;
4130 int amount = 0;
4131
4132 spin_lock_bh(&sk->sk_receive_queue.lock);
4133 skb = skb_peek(&sk->sk_receive_queue);
4134 if (skb)
4135 amount = skb->len;
4136 spin_unlock_bh(&sk->sk_receive_queue.lock);
4137 return put_user(amount, (int __user *)arg);
4138 }
4139 #ifdef CONFIG_INET
4140 case SIOCADDRT:
4141 case SIOCDELRT:
4142 case SIOCDARP:
4143 case SIOCGARP:
4144 case SIOCSARP:
4145 case SIOCGIFADDR:
4146 case SIOCSIFADDR:
4147 case SIOCGIFBRDADDR:
4148 case SIOCSIFBRDADDR:
4149 case SIOCGIFNETMASK:
4150 case SIOCSIFNETMASK:
4151 case SIOCGIFDSTADDR:
4152 case SIOCSIFDSTADDR:
4153 case SIOCSIFFLAGS:
4154 return inet_dgram_ops.ioctl(sock, cmd, arg);
4155 #endif
4156
4157 default:
4158 return -ENOIOCTLCMD;
4159 }
4160 return 0;
4161 }
4162
4163 static __poll_t packet_poll(struct file *file, struct socket *sock,
4164 poll_table *wait)
4165 {
4166 struct sock *sk = sock->sk;
4167 struct packet_sock *po = pkt_sk(sk);
4168 __poll_t mask = datagram_poll(file, sock, wait);
4169
4170 spin_lock_bh(&sk->sk_receive_queue.lock);
4171 if (po->rx_ring.pg_vec) {
4172 if (!packet_previous_rx_frame(po, &po->rx_ring,
4173 TP_STATUS_KERNEL))
4174 mask |= EPOLLIN | EPOLLRDNORM;
4175 }
4176 packet_rcv_try_clear_pressure(po);
4177 spin_unlock_bh(&sk->sk_receive_queue.lock);
4178 spin_lock_bh(&sk->sk_write_queue.lock);
4179 if (po->tx_ring.pg_vec) {
4180 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4181 mask |= EPOLLOUT | EPOLLWRNORM;
4182 }
4183 spin_unlock_bh(&sk->sk_write_queue.lock);
4184 return mask;
4185 }
4186
4187
4188
4189
4190
4191
4192 static void packet_mm_open(struct vm_area_struct *vma)
4193 {
4194 struct file *file = vma->vm_file;
4195 struct socket *sock = file->private_data;
4196 struct sock *sk = sock->sk;
4197
4198 if (sk)
4199 atomic_inc(&pkt_sk(sk)->mapped);
4200 }
4201
4202 static void packet_mm_close(struct vm_area_struct *vma)
4203 {
4204 struct file *file = vma->vm_file;
4205 struct socket *sock = file->private_data;
4206 struct sock *sk = sock->sk;
4207
4208 if (sk)
4209 atomic_dec(&pkt_sk(sk)->mapped);
4210 }
4211
4212 static const struct vm_operations_struct packet_mmap_ops = {
4213 .open = packet_mm_open,
4214 .close = packet_mm_close,
4215 };
4216
4217 static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
4218 unsigned int len)
4219 {
4220 int i;
4221
4222 for (i = 0; i < len; i++) {
4223 if (likely(pg_vec[i].buffer)) {
4224 if (is_vmalloc_addr(pg_vec[i].buffer))
4225 vfree(pg_vec[i].buffer);
4226 else
4227 free_pages((unsigned long)pg_vec[i].buffer,
4228 order);
4229 pg_vec[i].buffer = NULL;
4230 }
4231 }
4232 kfree(pg_vec);
4233 }
4234
4235 static char *alloc_one_pg_vec_page(unsigned long order)
4236 {
4237 char *buffer;
4238 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
4239 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
4240
4241 buffer = (char *) __get_free_pages(gfp_flags, order);
4242 if (buffer)
4243 return buffer;
4244
4245
4246 buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
4247 if (buffer)
4248 return buffer;
4249
4250
4251 gfp_flags &= ~__GFP_NORETRY;
4252 buffer = (char *) __get_free_pages(gfp_flags, order);
4253 if (buffer)
4254 return buffer;
4255
4256
4257 return NULL;
4258 }
4259
4260 static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4261 {
4262 unsigned int block_nr = req->tp_block_nr;
4263 struct pgv *pg_vec;
4264 int i;
4265
4266 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
4267 if (unlikely(!pg_vec))
4268 goto out;
4269
4270 for (i = 0; i < block_nr; i++) {
4271 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
4272 if (unlikely(!pg_vec[i].buffer))
4273 goto out_free_pgvec;
4274 }
4275
4276 out:
4277 return pg_vec;
4278
4279 out_free_pgvec:
4280 free_pg_vec(pg_vec, order, block_nr);
4281 pg_vec = NULL;
4282 goto out;
4283 }
4284
4285 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4286 int closing, int tx_ring)
4287 {
4288 struct pgv *pg_vec = NULL;
4289 struct packet_sock *po = pkt_sk(sk);
4290 unsigned long *rx_owner_map = NULL;
4291 int was_running, order = 0;
4292 struct packet_ring_buffer *rb;
4293 struct sk_buff_head *rb_queue;
4294 __be16 num;
4295 int err = -EINVAL;
4296
4297 struct tpacket_req *req = &req_u->req;
4298
4299 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
4300 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
4301
4302 err = -EBUSY;
4303 if (!closing) {
4304 if (atomic_read(&po->mapped))
4305 goto out;
4306 if (packet_read_pending(rb))
4307 goto out;
4308 }
4309
4310 if (req->tp_block_nr) {
4311 unsigned int min_frame_size;
4312
4313
4314 err = -EBUSY;
4315 if (unlikely(rb->pg_vec))
4316 goto out;
4317
4318 switch (po->tp_version) {
4319 case TPACKET_V1:
4320 po->tp_hdrlen = TPACKET_HDRLEN;
4321 break;
4322 case TPACKET_V2:
4323 po->tp_hdrlen = TPACKET2_HDRLEN;
4324 break;
4325 case TPACKET_V3:
4326 po->tp_hdrlen = TPACKET3_HDRLEN;
4327 break;
4328 }
4329
4330 err = -EINVAL;
4331 if (unlikely((int)req->tp_block_size <= 0))
4332 goto out;
4333 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4334 goto out;
4335 min_frame_size = po->tp_hdrlen + po->tp_reserve;
4336 if (po->tp_version >= TPACKET_V3 &&
4337 req->tp_block_size <
4338 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
4339 goto out;
4340 if (unlikely(req->tp_frame_size < min_frame_size))
4341 goto out;
4342 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4343 goto out;
4344
4345 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4346 if (unlikely(rb->frames_per_block == 0))
4347 goto out;
4348 if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
4349 goto out;
4350 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4351 req->tp_frame_nr))
4352 goto out;
4353
4354 err = -ENOMEM;
4355 order = get_order(req->tp_block_size);
4356 pg_vec = alloc_pg_vec(req, order);
4357 if (unlikely(!pg_vec))
4358 goto out;
4359 switch (po->tp_version) {
4360 case TPACKET_V3:
4361
4362 if (!tx_ring) {
4363 init_prb_bdqc(po, rb, pg_vec, req_u);
4364 } else {
4365 struct tpacket_req3 *req3 = &req_u->req3;
4366
4367 if (req3->tp_retire_blk_tov ||
4368 req3->tp_sizeof_priv ||
4369 req3->tp_feature_req_word) {
4370 err = -EINVAL;
4371 goto out_free_pg_vec;
4372 }
4373 }
4374 break;
4375 default:
4376 if (!tx_ring) {
4377 rx_owner_map = bitmap_alloc(req->tp_frame_nr,
4378 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
4379 if (!rx_owner_map)
4380 goto out_free_pg_vec;
4381 }
4382 break;
4383 }
4384 }
4385
4386 else {
4387 err = -EINVAL;
4388 if (unlikely(req->tp_frame_nr))
4389 goto out;
4390 }
4391
4392
4393
4394 spin_lock(&po->bind_lock);
4395 was_running = po->running;
4396 num = po->num;
4397 if (was_running) {
4398 po->num = 0;
4399 __unregister_prot_hook(sk, false);
4400 }
4401 spin_unlock(&po->bind_lock);
4402
4403 synchronize_net();
4404
4405 err = -EBUSY;
4406 mutex_lock(&po->pg_vec_lock);
4407 if (closing || atomic_read(&po->mapped) == 0) {
4408 err = 0;
4409 spin_lock_bh(&rb_queue->lock);
4410 swap(rb->pg_vec, pg_vec);
4411 if (po->tp_version <= TPACKET_V2)
4412 swap(rb->rx_owner_map, rx_owner_map);
4413 rb->frame_max = (req->tp_frame_nr - 1);
4414 rb->head = 0;
4415 rb->frame_size = req->tp_frame_size;
4416 spin_unlock_bh(&rb_queue->lock);
4417
4418 swap(rb->pg_vec_order, order);
4419 swap(rb->pg_vec_len, req->tp_block_nr);
4420
4421 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
4422 po->prot_hook.func = (po->rx_ring.pg_vec) ?
4423 tpacket_rcv : packet_rcv;
4424 skb_queue_purge(rb_queue);
4425 if (atomic_read(&po->mapped))
4426 pr_err("packet_mmap: vma is busy: %d\n",
4427 atomic_read(&po->mapped));
4428 }
4429 mutex_unlock(&po->pg_vec_lock);
4430
4431 spin_lock(&po->bind_lock);
4432 if (was_running) {
4433 po->num = num;
4434 register_prot_hook(sk);
4435 }
4436 spin_unlock(&po->bind_lock);
4437 if (pg_vec && (po->tp_version > TPACKET_V2)) {
4438
4439 if (!tx_ring)
4440 prb_shutdown_retire_blk_timer(po, rb_queue);
4441 }
4442
4443 out_free_pg_vec:
4444 bitmap_free(rx_owner_map);
4445 if (pg_vec)
4446 free_pg_vec(pg_vec, order, req->tp_block_nr);
4447 out:
4448 return err;
4449 }
4450
4451 static int packet_mmap(struct file *file, struct socket *sock,
4452 struct vm_area_struct *vma)
4453 {
4454 struct sock *sk = sock->sk;
4455 struct packet_sock *po = pkt_sk(sk);
4456 unsigned long size, expected_size;
4457 struct packet_ring_buffer *rb;
4458 unsigned long start;
4459 int err = -EINVAL;
4460 int i;
4461
4462 if (vma->vm_pgoff)
4463 return -EINVAL;
4464
4465 mutex_lock(&po->pg_vec_lock);
4466
4467 expected_size = 0;
4468 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4469 if (rb->pg_vec) {
4470 expected_size += rb->pg_vec_len
4471 * rb->pg_vec_pages
4472 * PAGE_SIZE;
4473 }
4474 }
4475
4476 if (expected_size == 0)
4477 goto out;
4478
4479 size = vma->vm_end - vma->vm_start;
4480 if (size != expected_size)
4481 goto out;
4482
4483 start = vma->vm_start;
4484 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4485 if (rb->pg_vec == NULL)
4486 continue;
4487
4488 for (i = 0; i < rb->pg_vec_len; i++) {
4489 struct page *page;
4490 void *kaddr = rb->pg_vec[i].buffer;
4491 int pg_num;
4492
4493 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
4494 page = pgv_to_page(kaddr);
4495 err = vm_insert_page(vma, start, page);
4496 if (unlikely(err))
4497 goto out;
4498 start += PAGE_SIZE;
4499 kaddr += PAGE_SIZE;
4500 }
4501 }
4502 }
4503
4504 atomic_inc(&po->mapped);
4505 vma->vm_ops = &packet_mmap_ops;
4506 err = 0;
4507
4508 out:
4509 mutex_unlock(&po->pg_vec_lock);
4510 return err;
4511 }
4512
4513 static const struct proto_ops packet_ops_spkt = {
4514 .family = PF_PACKET,
4515 .owner = THIS_MODULE,
4516 .release = packet_release,
4517 .bind = packet_bind_spkt,
4518 .connect = sock_no_connect,
4519 .socketpair = sock_no_socketpair,
4520 .accept = sock_no_accept,
4521 .getname = packet_getname_spkt,
4522 .poll = datagram_poll,
4523 .ioctl = packet_ioctl,
4524 .gettstamp = sock_gettstamp,
4525 .listen = sock_no_listen,
4526 .shutdown = sock_no_shutdown,
4527 .setsockopt = sock_no_setsockopt,
4528 .getsockopt = sock_no_getsockopt,
4529 .sendmsg = packet_sendmsg_spkt,
4530 .recvmsg = packet_recvmsg,
4531 .mmap = sock_no_mmap,
4532 .sendpage = sock_no_sendpage,
4533 };
4534
4535 static const struct proto_ops packet_ops = {
4536 .family = PF_PACKET,
4537 .owner = THIS_MODULE,
4538 .release = packet_release,
4539 .bind = packet_bind,
4540 .connect = sock_no_connect,
4541 .socketpair = sock_no_socketpair,
4542 .accept = sock_no_accept,
4543 .getname = packet_getname,
4544 .poll = packet_poll,
4545 .ioctl = packet_ioctl,
4546 .gettstamp = sock_gettstamp,
4547 .listen = sock_no_listen,
4548 .shutdown = sock_no_shutdown,
4549 .setsockopt = packet_setsockopt,
4550 .getsockopt = packet_getsockopt,
4551 #ifdef CONFIG_COMPAT
4552 .compat_setsockopt = compat_packet_setsockopt,
4553 #endif
4554 .sendmsg = packet_sendmsg,
4555 .recvmsg = packet_recvmsg,
4556 .mmap = packet_mmap,
4557 .sendpage = sock_no_sendpage,
4558 };
4559
4560 static const struct net_proto_family packet_family_ops = {
4561 .family = PF_PACKET,
4562 .create = packet_create,
4563 .owner = THIS_MODULE,
4564 };
4565
4566 static struct notifier_block packet_netdev_notifier = {
4567 .notifier_call = packet_notifier,
4568 };
4569
4570 #ifdef CONFIG_PROC_FS
4571
4572 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
4573 __acquires(RCU)
4574 {
4575 struct net *net = seq_file_net(seq);
4576
4577 rcu_read_lock();
4578 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
4579 }
4580
4581 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4582 {
4583 struct net *net = seq_file_net(seq);
4584 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4585 }
4586
4587 static void packet_seq_stop(struct seq_file *seq, void *v)
4588 __releases(RCU)
4589 {
4590 rcu_read_unlock();
4591 }
4592
4593 static int packet_seq_show(struct seq_file *seq, void *v)
4594 {
4595 if (v == SEQ_START_TOKEN)
4596 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
4597 else {
4598 struct sock *s = sk_entry(v);
4599 const struct packet_sock *po = pkt_sk(s);
4600
4601 seq_printf(seq,
4602 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
4603 s,
4604 refcount_read(&s->sk_refcnt),
4605 s->sk_type,
4606 ntohs(po->num),
4607 po->ifindex,
4608 po->running,
4609 atomic_read(&s->sk_rmem_alloc),
4610 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4611 sock_i_ino(s));
4612 }
4613
4614 return 0;
4615 }
4616
4617 static const struct seq_operations packet_seq_ops = {
4618 .start = packet_seq_start,
4619 .next = packet_seq_next,
4620 .stop = packet_seq_stop,
4621 .show = packet_seq_show,
4622 };
4623 #endif
4624
4625 static int __net_init packet_net_init(struct net *net)
4626 {
4627 mutex_init(&net->packet.sklist_lock);
4628 INIT_HLIST_HEAD(&net->packet.sklist);
4629
4630 if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
4631 sizeof(struct seq_net_private)))
4632 return -ENOMEM;
4633
4634 return 0;
4635 }
4636
4637 static void __net_exit packet_net_exit(struct net *net)
4638 {
4639 remove_proc_entry("packet", net->proc_net);
4640 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4641 }
4642
4643 static struct pernet_operations packet_net_ops = {
4644 .init = packet_net_init,
4645 .exit = packet_net_exit,
4646 };
4647
4648
4649 static void __exit packet_exit(void)
4650 {
4651 unregister_netdevice_notifier(&packet_netdev_notifier);
4652 unregister_pernet_subsys(&packet_net_ops);
4653 sock_unregister(PF_PACKET);
4654 proto_unregister(&packet_proto);
4655 }
4656
4657 static int __init packet_init(void)
4658 {
4659 int rc;
4660
4661 rc = proto_register(&packet_proto, 0);
4662 if (rc)
4663 goto out;
4664 rc = sock_register(&packet_family_ops);
4665 if (rc)
4666 goto out_proto;
4667 rc = register_pernet_subsys(&packet_net_ops);
4668 if (rc)
4669 goto out_sock;
4670 rc = register_netdevice_notifier(&packet_netdev_notifier);
4671 if (rc)
4672 goto out_pernet;
4673
4674 return 0;
4675
4676 out_pernet:
4677 unregister_pernet_subsys(&packet_net_ops);
4678 out_sock:
4679 sock_unregister(PF_PACKET);
4680 out_proto:
4681 proto_unregister(&packet_proto);
4682 out:
4683 return rc;
4684 }
4685
4686 module_init(packet_init);
4687 module_exit(packet_exit);
4688 MODULE_LICENSE("GPL");
4689 MODULE_ALIAS_NETPROTO(PF_PACKET);