This source file includes following definitions.
- dev_base_seq_inc
- dev_name_hash
- dev_index_hash
- rps_lock
- rps_unlock
- list_netdevice
- unlist_netdevice
- ptype_head
- dev_add_pack
- __dev_remove_pack
- dev_remove_pack
- dev_add_offload
- __dev_remove_offload
- dev_remove_offload
- netdev_boot_setup_add
- netdev_boot_setup_check
- netdev_boot_base
- netdev_boot_setup
- dev_get_iflink
- dev_fill_metadata_dst
- __dev_get_by_name
- dev_get_by_name_rcu
- dev_get_by_name
- __dev_get_by_index
- dev_get_by_index_rcu
- dev_get_by_index
- dev_get_by_napi_id
- netdev_get_name
- dev_getbyhwaddr_rcu
- __dev_getfirstbyhwtype
- dev_getfirstbyhwtype
- __dev_get_by_flags
- dev_valid_name
- __dev_alloc_name
- dev_alloc_name_ns
- dev_alloc_name
- dev_get_valid_name
- dev_change_name
- dev_set_alias
- dev_get_alias
- netdev_features_change
- netdev_state_change
- netdev_notify_peers
- __dev_open
- dev_open
- __dev_close_many
- __dev_close
- dev_close_many
- dev_close
- dev_disable_lro
- dev_disable_gro_hw
- netdev_cmd_to_name
- call_netdevice_notifier
- register_netdevice_notifier
- unregister_netdevice_notifier
- call_netdevice_notifiers_info
- call_netdevice_notifiers_extack
- call_netdevice_notifiers
- call_netdevice_notifiers_mtu
- net_inc_ingress_queue
- net_dec_ingress_queue
- net_inc_egress_queue
- net_dec_egress_queue
- netstamp_clear
- net_enable_timestamp
- net_disable_timestamp
- net_timestamp_set
- is_skb_forwardable
- __dev_forward_skb
- dev_forward_skb
- deliver_skb
- deliver_ptype_list_skb
- skb_loop_sk
- dev_nit_active
- dev_queue_xmit_nit
- netif_setup_tc
- netdev_txq_to_tc
- remove_xps_queue
- remove_xps_queue_cpu
- reset_xps_maps
- clean_xps_maps
- netif_reset_xps_queues
- netif_reset_xps_queues_gt
- expand_xps_map
- __netif_set_xps_queue
- netif_set_xps_queue
- netdev_unbind_all_sb_channels
- netdev_reset_tc
- netdev_set_tc_queue
- netdev_set_num_tc
- netdev_unbind_sb_channel
- netdev_bind_sb_channel_queue
- netdev_set_sb_channel
- netif_set_real_num_tx_queues
- netif_set_real_num_rx_queues
- netif_get_num_default_rss_queues
- __netif_reschedule
- __netif_schedule
- get_kfree_skb_cb
- netif_schedule_queue
- netif_tx_wake_queue
- __dev_kfree_skb_irq
- __dev_kfree_skb_any
- netif_device_detach
- netif_device_attach
- skb_tx_hash
- skb_warn_bad_offload
- skb_checksum_help
- skb_crc32c_csum_help
- skb_network_protocol
- skb_mac_gso_segment
- skb_needs_check
- __skb_gso_segment
- netdev_rx_csum_fault
- illegal_highdma
- net_mpls_features
- net_mpls_features
- harmonize_features
- passthru_features_check
- dflt_features_check
- gso_features_check
- netif_skb_features
- xmit_one
- dev_hard_start_xmit
- validate_xmit_vlan
- skb_csum_hwoffload_help
- validate_xmit_skb
- validate_xmit_skb_list
- qdisc_pkt_len_init
- __dev_xmit_skb
- skb_update_prio
- dev_loopback_xmit
- sch_handle_egress
- __get_xps_queue_idx
- get_xps_queue
- dev_pick_tx_zero
- dev_pick_tx_cpu_id
- netdev_pick_tx
- netdev_core_pick_tx
- __dev_queue_xmit
- dev_queue_xmit
- dev_queue_xmit_accel
- dev_direct_xmit
- ____napi_schedule
- set_rps_cpu
- get_rps_cpu
- rps_may_expire_flow
- rps_trigger_softirq
- rps_ipi_queued
- skb_flow_limit
- enqueue_to_backlog
- netif_get_rxqueue
- netif_receive_generic_xdp
- generic_xdp_tx
- do_xdp_generic
- netif_rx_internal
- netif_rx
- netif_rx_ni
- net_tx_action
- sch_handle_ingress
- netdev_is_rx_handler_busy
- netdev_rx_handler_register
- netdev_rx_handler_unregister
- skb_pfmemalloc_protocol
- nf_ingress
- __netif_receive_skb_core
- __netif_receive_skb_one_core
- netif_receive_skb_core
- __netif_receive_skb_list_ptype
- __netif_receive_skb_list_core
- __netif_receive_skb
- __netif_receive_skb_list
- generic_xdp_install
- netif_receive_skb_internal
- netif_receive_skb_list_internal
- netif_receive_skb
- netif_receive_skb_list
- flush_backlog
- flush_all_backlogs
- gro_normal_list
- gro_normal_one
- napi_gro_complete
- __napi_gro_flush_chain
- napi_gro_flush
- gro_list_prepare
- skb_gro_reset_offset
- gro_pull_from_frag0
- gro_flush_oldest
- dev_gro_receive
- gro_find_receive_by_type
- gro_find_complete_by_type
- napi_skb_free_stolen_head
- napi_skb_finish
- napi_gro_receive
- napi_reuse_skb
- napi_get_frags
- napi_frags_finish
- napi_frags_skb
- napi_gro_frags
- __skb_gro_checksum_complete
- net_rps_send_ipi
- net_rps_action_and_irq_enable
- sd_has_rps_ipi_waiting
- process_backlog
- __napi_schedule
- napi_schedule_prep
- __napi_schedule_irqoff
- napi_complete_done
- napi_by_id
- busy_poll_stop
- napi_busy_loop
- napi_hash_add
- napi_hash_del
- napi_watchdog
- init_gro_hash
- netif_napi_add
- napi_disable
- flush_gro_hash
- netif_napi_del
- napi_poll
- net_rx_action
- __netdev_find_adj
- ____netdev_has_upper_dev
- netdev_has_upper_dev
- netdev_has_upper_dev_all_rcu
- netdev_has_any_upper_dev
- netdev_master_upper_dev_get
- __netdev_master_upper_dev_get
- netdev_has_any_lower_dev
- netdev_adjacent_get_private
- netdev_upper_get_next_dev_rcu
- __netdev_next_upper_dev
- netdev_next_upper_dev_rcu
- __netdev_walk_all_upper_dev
- netdev_walk_all_upper_dev_rcu
- __netdev_has_upper_dev
- netdev_lower_get_next_private
- netdev_lower_get_next_private_rcu
- netdev_lower_get_next
- netdev_next_lower_dev
- __netdev_next_lower_dev
- netdev_walk_all_lower_dev
- __netdev_walk_all_lower_dev
- netdev_next_lower_dev_rcu
- __netdev_upper_depth
- __netdev_lower_depth
- __netdev_update_upper_level
- __netdev_update_lower_level
- netdev_walk_all_lower_dev_rcu
- netdev_lower_get_first_private_rcu
- netdev_master_upper_dev_get_rcu
- netdev_adjacent_sysfs_add
- netdev_adjacent_sysfs_del
- netdev_adjacent_is_neigh_list
- __netdev_adjacent_dev_insert
- __netdev_adjacent_dev_remove
- __netdev_adjacent_dev_link_lists
- __netdev_adjacent_dev_unlink_lists
- __netdev_adjacent_dev_link_neighbour
- __netdev_adjacent_dev_unlink_neighbour
- __netdev_upper_dev_link
- netdev_upper_dev_link
- netdev_master_upper_dev_link
- netdev_upper_dev_unlink
- __netdev_adjacent_dev_set
- netdev_adjacent_dev_disable
- netdev_adjacent_dev_enable
- netdev_adjacent_change_prepare
- netdev_adjacent_change_commit
- netdev_adjacent_change_abort
- netdev_bonding_info_change
- netdev_adjacent_add_links
- netdev_adjacent_del_links
- netdev_adjacent_rename_links
- netdev_lower_dev_get_private
- netdev_lower_state_changed
- dev_change_rx_flags
- __dev_set_promiscuity
- dev_set_promiscuity
- __dev_set_allmulti
- dev_set_allmulti
- __dev_set_rx_mode
- dev_set_rx_mode
- dev_get_flags
- __dev_change_flags
- __dev_notify_flags
- dev_change_flags
- __dev_set_mtu
- dev_validate_mtu
- dev_set_mtu_ext
- dev_set_mtu
- dev_change_tx_queue_len
- dev_set_group
- dev_pre_changeaddr_notify
- dev_set_mac_address
- dev_change_carrier
- dev_get_phys_port_id
- dev_get_phys_port_name
- dev_get_port_parent_id
- netdev_port_same_parent_id
- dev_change_proto_down
- dev_change_proto_down_generic
- __dev_xdp_query
- dev_xdp_install
- dev_xdp_uninstall
- dev_change_xdp_fd
- dev_new_index
- net_set_todo
- rollback_registered_many
- rollback_registered
- netdev_sync_upper_features
- netdev_sync_lower_features
- netdev_fix_features
- __netdev_update_features
- netdev_update_features
- netdev_change_features
- netif_stacked_transfer_operstate
- netif_alloc_rx_queues
- netif_free_rx_queues
- netdev_init_one_queue
- netif_free_tx_queues
- netif_alloc_netdev_queues
- netif_tx_stop_all_queues
- netdev_register_lockdep_key
- netdev_unregister_lockdep_key
- netdev_update_lockdep_key
- register_netdevice
- init_dummy_netdev
- register_netdev
- netdev_refcnt_read
- netdev_wait_allrefs
- netdev_run_todo
- netdev_stats_to_stats64
- dev_get_stats
- dev_ingress_queue_create
- netdev_set_default_ethtool_ops
- netdev_freemem
- alloc_netdev_mqs
- free_netdev
- synchronize_net
- unregister_netdevice_queue
- unregister_netdevice_many
- unregister_netdev
- dev_change_net_namespace
- dev_cpu_dead
- netdev_increment_features
- netdev_create_hash
- netdev_init
- netdev_drivername
- __netdev_printk
- netdev_printk
- netdev_exit
- default_device_exit
- rtnl_lock_unregistering
- default_device_exit_batch
- net_dev_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 #include <linux/uaccess.h>
72 #include <linux/bitops.h>
73 #include <linux/capability.h>
74 #include <linux/cpu.h>
75 #include <linux/types.h>
76 #include <linux/kernel.h>
77 #include <linux/hash.h>
78 #include <linux/slab.h>
79 #include <linux/sched.h>
80 #include <linux/sched/mm.h>
81 #include <linux/mutex.h>
82 #include <linux/string.h>
83 #include <linux/mm.h>
84 #include <linux/socket.h>
85 #include <linux/sockios.h>
86 #include <linux/errno.h>
87 #include <linux/interrupt.h>
88 #include <linux/if_ether.h>
89 #include <linux/netdevice.h>
90 #include <linux/etherdevice.h>
91 #include <linux/ethtool.h>
92 #include <linux/skbuff.h>
93 #include <linux/bpf.h>
94 #include <linux/bpf_trace.h>
95 #include <net/net_namespace.h>
96 #include <net/sock.h>
97 #include <net/busy_poll.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/stat.h>
100 #include <net/dst.h>
101 #include <net/dst_metadata.h>
102 #include <net/pkt_sched.h>
103 #include <net/pkt_cls.h>
104 #include <net/checksum.h>
105 #include <net/xfrm.h>
106 #include <linux/highmem.h>
107 #include <linux/init.h>
108 #include <linux/module.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #include <net/iw_handler.h>
113 #include <asm/current.h>
114 #include <linux/audit.h>
115 #include <linux/dmaengine.h>
116 #include <linux/err.h>
117 #include <linux/ctype.h>
118 #include <linux/if_arp.h>
119 #include <linux/if_vlan.h>
120 #include <linux/ip.h>
121 #include <net/ip.h>
122 #include <net/mpls.h>
123 #include <linux/ipv6.h>
124 #include <linux/in.h>
125 #include <linux/jhash.h>
126 #include <linux/random.h>
127 #include <trace/events/napi.h>
128 #include <trace/events/net.h>
129 #include <trace/events/skb.h>
130 #include <linux/inetdevice.h>
131 #include <linux/cpu_rmap.h>
132 #include <linux/static_key.h>
133 #include <linux/hashtable.h>
134 #include <linux/vmalloc.h>
135 #include <linux/if_macvlan.h>
136 #include <linux/errqueue.h>
137 #include <linux/hrtimer.h>
138 #include <linux/netfilter_ingress.h>
139 #include <linux/crash_dump.h>
140 #include <linux/sctp.h>
141 #include <net/udp_tunnel.h>
142 #include <linux/net_namespace.h>
143 #include <linux/indirect_call_wrapper.h>
144 #include <net/devlink.h>
145
146 #include "net-sysfs.h"
147
148 #define MAX_GRO_SKBS 8
149
150
151 #define GRO_MAX_HEAD (MAX_HEADER + 128)
152
153 static DEFINE_SPINLOCK(ptype_lock);
154 static DEFINE_SPINLOCK(offload_lock);
155 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
156 struct list_head ptype_all __read_mostly;
157 static struct list_head offload_base __read_mostly;
158
159 static int netif_rx_internal(struct sk_buff *skb);
160 static int call_netdevice_notifiers_info(unsigned long val,
161 struct netdev_notifier_info *info);
162 static int call_netdevice_notifiers_extack(unsigned long val,
163 struct net_device *dev,
164 struct netlink_ext_ack *extack);
165 static struct napi_struct *napi_by_id(unsigned int napi_id);
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186 DEFINE_RWLOCK(dev_base_lock);
187 EXPORT_SYMBOL(dev_base_lock);
188
189 static DEFINE_MUTEX(ifalias_mutex);
190
191
192 static DEFINE_SPINLOCK(napi_hash_lock);
193
194 static unsigned int napi_gen_id = NR_CPUS;
195 static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
196
197 static seqcount_t devnet_rename_seq;
198
199 static inline void dev_base_seq_inc(struct net *net)
200 {
201 while (++net->dev_base_seq == 0)
202 ;
203 }
204
205 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
206 {
207 unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
208
209 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
210 }
211
212 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
213 {
214 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
215 }
216
217 static inline void rps_lock(struct softnet_data *sd)
218 {
219 #ifdef CONFIG_RPS
220 spin_lock(&sd->input_pkt_queue.lock);
221 #endif
222 }
223
224 static inline void rps_unlock(struct softnet_data *sd)
225 {
226 #ifdef CONFIG_RPS
227 spin_unlock(&sd->input_pkt_queue.lock);
228 #endif
229 }
230
231
232 static void list_netdevice(struct net_device *dev)
233 {
234 struct net *net = dev_net(dev);
235
236 ASSERT_RTNL();
237
238 write_lock_bh(&dev_base_lock);
239 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
240 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
241 hlist_add_head_rcu(&dev->index_hlist,
242 dev_index_hash(net, dev->ifindex));
243 write_unlock_bh(&dev_base_lock);
244
245 dev_base_seq_inc(net);
246 }
247
248
249
250
251 static void unlist_netdevice(struct net_device *dev)
252 {
253 ASSERT_RTNL();
254
255
256 write_lock_bh(&dev_base_lock);
257 list_del_rcu(&dev->dev_list);
258 hlist_del_rcu(&dev->name_hlist);
259 hlist_del_rcu(&dev->index_hlist);
260 write_unlock_bh(&dev_base_lock);
261
262 dev_base_seq_inc(dev_net(dev));
263 }
264
265
266
267
268
269 static RAW_NOTIFIER_HEAD(netdev_chain);
270
271
272
273
274
275
276 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
277 EXPORT_PER_CPU_SYMBOL(softnet_data);
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302 static inline struct list_head *ptype_head(const struct packet_type *pt)
303 {
304 if (pt->type == htons(ETH_P_ALL))
305 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
306 else
307 return pt->dev ? &pt->dev->ptype_specific :
308 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
309 }
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324 void dev_add_pack(struct packet_type *pt)
325 {
326 struct list_head *head = ptype_head(pt);
327
328 spin_lock(&ptype_lock);
329 list_add_rcu(&pt->list, head);
330 spin_unlock(&ptype_lock);
331 }
332 EXPORT_SYMBOL(dev_add_pack);
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347 void __dev_remove_pack(struct packet_type *pt)
348 {
349 struct list_head *head = ptype_head(pt);
350 struct packet_type *pt1;
351
352 spin_lock(&ptype_lock);
353
354 list_for_each_entry(pt1, head, list) {
355 if (pt == pt1) {
356 list_del_rcu(&pt->list);
357 goto out;
358 }
359 }
360
361 pr_warn("dev_remove_pack: %p not found\n", pt);
362 out:
363 spin_unlock(&ptype_lock);
364 }
365 EXPORT_SYMBOL(__dev_remove_pack);
366
367
368
369
370
371
372
373
374
375
376
377
378
379 void dev_remove_pack(struct packet_type *pt)
380 {
381 __dev_remove_pack(pt);
382
383 synchronize_net();
384 }
385 EXPORT_SYMBOL(dev_remove_pack);
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 void dev_add_offload(struct packet_offload *po)
401 {
402 struct packet_offload *elem;
403
404 spin_lock(&offload_lock);
405 list_for_each_entry(elem, &offload_base, list) {
406 if (po->priority < elem->priority)
407 break;
408 }
409 list_add_rcu(&po->list, elem->list.prev);
410 spin_unlock(&offload_lock);
411 }
412 EXPORT_SYMBOL(dev_add_offload);
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427 static void __dev_remove_offload(struct packet_offload *po)
428 {
429 struct list_head *head = &offload_base;
430 struct packet_offload *po1;
431
432 spin_lock(&offload_lock);
433
434 list_for_each_entry(po1, head, list) {
435 if (po == po1) {
436 list_del_rcu(&po->list);
437 goto out;
438 }
439 }
440
441 pr_warn("dev_remove_offload: %p not found\n", po);
442 out:
443 spin_unlock(&offload_lock);
444 }
445
446
447
448
449
450
451
452
453
454
455
456
457
458 void dev_remove_offload(struct packet_offload *po)
459 {
460 __dev_remove_offload(po);
461
462 synchronize_net();
463 }
464 EXPORT_SYMBOL(dev_remove_offload);
465
466
467
468
469
470
471
472
473 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
474
475
476
477
478
479
480
481
482
483
484 static int netdev_boot_setup_add(char *name, struct ifmap *map)
485 {
486 struct netdev_boot_setup *s;
487 int i;
488
489 s = dev_boot_setup;
490 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
491 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
492 memset(s[i].name, 0, sizeof(s[i].name));
493 strlcpy(s[i].name, name, IFNAMSIZ);
494 memcpy(&s[i].map, map, sizeof(s[i].map));
495 break;
496 }
497 }
498
499 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
500 }
501
502
503
504
505
506
507
508
509
510
511 int netdev_boot_setup_check(struct net_device *dev)
512 {
513 struct netdev_boot_setup *s = dev_boot_setup;
514 int i;
515
516 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
517 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
518 !strcmp(dev->name, s[i].name)) {
519 dev->irq = s[i].map.irq;
520 dev->base_addr = s[i].map.base_addr;
521 dev->mem_start = s[i].map.mem_start;
522 dev->mem_end = s[i].map.mem_end;
523 return 1;
524 }
525 }
526 return 0;
527 }
528 EXPORT_SYMBOL(netdev_boot_setup_check);
529
530
531
532
533
534
535
536
537
538
539
540
541 unsigned long netdev_boot_base(const char *prefix, int unit)
542 {
543 const struct netdev_boot_setup *s = dev_boot_setup;
544 char name[IFNAMSIZ];
545 int i;
546
547 sprintf(name, "%s%d", prefix, unit);
548
549
550
551
552
553 if (__dev_get_by_name(&init_net, name))
554 return 1;
555
556 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
557 if (!strcmp(name, s[i].name))
558 return s[i].map.base_addr;
559 return 0;
560 }
561
562
563
564
565 int __init netdev_boot_setup(char *str)
566 {
567 int ints[5];
568 struct ifmap map;
569
570 str = get_options(str, ARRAY_SIZE(ints), ints);
571 if (!str || !*str)
572 return 0;
573
574
575 memset(&map, 0, sizeof(map));
576 if (ints[0] > 0)
577 map.irq = ints[1];
578 if (ints[0] > 1)
579 map.base_addr = ints[2];
580 if (ints[0] > 2)
581 map.mem_start = ints[3];
582 if (ints[0] > 3)
583 map.mem_end = ints[4];
584
585
586 return netdev_boot_setup_add(str, &map);
587 }
588
589 __setup("netdev=", netdev_boot_setup);
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605 int dev_get_iflink(const struct net_device *dev)
606 {
607 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
608 return dev->netdev_ops->ndo_get_iflink(dev);
609
610 return dev->ifindex;
611 }
612 EXPORT_SYMBOL(dev_get_iflink);
613
614
615
616
617
618
619
620
621
622
623 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
624 {
625 struct ip_tunnel_info *info;
626
627 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
628 return -EINVAL;
629
630 info = skb_tunnel_info_unclone(skb);
631 if (!info)
632 return -ENOMEM;
633 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
634 return -EINVAL;
635
636 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
637 }
638 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
639
640
641
642
643
644
645
646
647
648
649
650
651
652 struct net_device *__dev_get_by_name(struct net *net, const char *name)
653 {
654 struct net_device *dev;
655 struct hlist_head *head = dev_name_hash(net, name);
656
657 hlist_for_each_entry(dev, head, name_hlist)
658 if (!strncmp(dev->name, name, IFNAMSIZ))
659 return dev;
660
661 return NULL;
662 }
663 EXPORT_SYMBOL(__dev_get_by_name);
664
665
666
667
668
669
670
671
672
673
674
675
676
677 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
678 {
679 struct net_device *dev;
680 struct hlist_head *head = dev_name_hash(net, name);
681
682 hlist_for_each_entry_rcu(dev, head, name_hlist)
683 if (!strncmp(dev->name, name, IFNAMSIZ))
684 return dev;
685
686 return NULL;
687 }
688 EXPORT_SYMBOL(dev_get_by_name_rcu);
689
690
691
692
693
694
695
696
697
698
699
700
701
702 struct net_device *dev_get_by_name(struct net *net, const char *name)
703 {
704 struct net_device *dev;
705
706 rcu_read_lock();
707 dev = dev_get_by_name_rcu(net, name);
708 if (dev)
709 dev_hold(dev);
710 rcu_read_unlock();
711 return dev;
712 }
713 EXPORT_SYMBOL(dev_get_by_name);
714
715
716
717
718
719
720
721
722
723
724
725
726
727 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
728 {
729 struct net_device *dev;
730 struct hlist_head *head = dev_index_hash(net, ifindex);
731
732 hlist_for_each_entry(dev, head, index_hlist)
733 if (dev->ifindex == ifindex)
734 return dev;
735
736 return NULL;
737 }
738 EXPORT_SYMBOL(__dev_get_by_index);
739
740
741
742
743
744
745
746
747
748
749
750
751 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
752 {
753 struct net_device *dev;
754 struct hlist_head *head = dev_index_hash(net, ifindex);
755
756 hlist_for_each_entry_rcu(dev, head, index_hlist)
757 if (dev->ifindex == ifindex)
758 return dev;
759
760 return NULL;
761 }
762 EXPORT_SYMBOL(dev_get_by_index_rcu);
763
764
765
766
767
768
769
770
771
772
773
774
775
776 struct net_device *dev_get_by_index(struct net *net, int ifindex)
777 {
778 struct net_device *dev;
779
780 rcu_read_lock();
781 dev = dev_get_by_index_rcu(net, ifindex);
782 if (dev)
783 dev_hold(dev);
784 rcu_read_unlock();
785 return dev;
786 }
787 EXPORT_SYMBOL(dev_get_by_index);
788
789
790
791
792
793
794
795
796
797
798
799 struct net_device *dev_get_by_napi_id(unsigned int napi_id)
800 {
801 struct napi_struct *napi;
802
803 WARN_ON_ONCE(!rcu_read_lock_held());
804
805 if (napi_id < MIN_NAPI_ID)
806 return NULL;
807
808 napi = napi_by_id(napi_id);
809
810 return napi ? napi->dev : NULL;
811 }
812 EXPORT_SYMBOL(dev_get_by_napi_id);
813
814
815
816
817
818
819
820
821
822
823
824 int netdev_get_name(struct net *net, char *name, int ifindex)
825 {
826 struct net_device *dev;
827 unsigned int seq;
828
829 retry:
830 seq = raw_seqcount_begin(&devnet_rename_seq);
831 rcu_read_lock();
832 dev = dev_get_by_index_rcu(net, ifindex);
833 if (!dev) {
834 rcu_read_unlock();
835 return -ENODEV;
836 }
837
838 strcpy(name, dev->name);
839 rcu_read_unlock();
840 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
841 cond_resched();
842 goto retry;
843 }
844
845 return 0;
846 }
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
863 const char *ha)
864 {
865 struct net_device *dev;
866
867 for_each_netdev_rcu(net, dev)
868 if (dev->type == type &&
869 !memcmp(dev->dev_addr, ha, dev->addr_len))
870 return dev;
871
872 return NULL;
873 }
874 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
875
876 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
877 {
878 struct net_device *dev;
879
880 ASSERT_RTNL();
881 for_each_netdev(net, dev)
882 if (dev->type == type)
883 return dev;
884
885 return NULL;
886 }
887 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
888
889 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
890 {
891 struct net_device *dev, *ret = NULL;
892
893 rcu_read_lock();
894 for_each_netdev_rcu(net, dev)
895 if (dev->type == type) {
896 dev_hold(dev);
897 ret = dev;
898 break;
899 }
900 rcu_read_unlock();
901 return ret;
902 }
903 EXPORT_SYMBOL(dev_getfirstbyhwtype);
904
905
906
907
908
909
910
911
912
913
914
915
916 struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
917 unsigned short mask)
918 {
919 struct net_device *dev, *ret;
920
921 ASSERT_RTNL();
922
923 ret = NULL;
924 for_each_netdev(net, dev) {
925 if (((dev->flags ^ if_flags) & mask) == 0) {
926 ret = dev;
927 break;
928 }
929 }
930 return ret;
931 }
932 EXPORT_SYMBOL(__dev_get_by_flags);
933
934
935
936
937
938
939
940
941
942 bool dev_valid_name(const char *name)
943 {
944 if (*name == '\0')
945 return false;
946 if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
947 return false;
948 if (!strcmp(name, ".") || !strcmp(name, ".."))
949 return false;
950
951 while (*name) {
952 if (*name == '/' || *name == ':' || isspace(*name))
953 return false;
954 name++;
955 }
956 return true;
957 }
958 EXPORT_SYMBOL(dev_valid_name);
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
976 {
977 int i = 0;
978 const char *p;
979 const int max_netdevices = 8*PAGE_SIZE;
980 unsigned long *inuse;
981 struct net_device *d;
982
983 if (!dev_valid_name(name))
984 return -EINVAL;
985
986 p = strchr(name, '%');
987 if (p) {
988
989
990
991
992
993 if (p[1] != 'd' || strchr(p + 2, '%'))
994 return -EINVAL;
995
996
997 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
998 if (!inuse)
999 return -ENOMEM;
1000
1001 for_each_netdev(net, d) {
1002 if (!sscanf(d->name, name, &i))
1003 continue;
1004 if (i < 0 || i >= max_netdevices)
1005 continue;
1006
1007
1008 snprintf(buf, IFNAMSIZ, name, i);
1009 if (!strncmp(buf, d->name, IFNAMSIZ))
1010 set_bit(i, inuse);
1011 }
1012
1013 i = find_first_zero_bit(inuse, max_netdevices);
1014 free_page((unsigned long) inuse);
1015 }
1016
1017 snprintf(buf, IFNAMSIZ, name, i);
1018 if (!__dev_get_by_name(net, buf))
1019 return i;
1020
1021
1022
1023
1024
1025 return -ENFILE;
1026 }
1027
1028 static int dev_alloc_name_ns(struct net *net,
1029 struct net_device *dev,
1030 const char *name)
1031 {
1032 char buf[IFNAMSIZ];
1033 int ret;
1034
1035 BUG_ON(!net);
1036 ret = __dev_alloc_name(net, name, buf);
1037 if (ret >= 0)
1038 strlcpy(dev->name, buf, IFNAMSIZ);
1039 return ret;
1040 }
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 int dev_alloc_name(struct net_device *dev, const char *name)
1057 {
1058 return dev_alloc_name_ns(dev_net(dev), dev, name);
1059 }
1060 EXPORT_SYMBOL(dev_alloc_name);
1061
1062 int dev_get_valid_name(struct net *net, struct net_device *dev,
1063 const char *name)
1064 {
1065 BUG_ON(!net);
1066
1067 if (!dev_valid_name(name))
1068 return -EINVAL;
1069
1070 if (strchr(name, '%'))
1071 return dev_alloc_name_ns(net, dev, name);
1072 else if (__dev_get_by_name(net, name))
1073 return -EEXIST;
1074 else if (dev->name != name)
1075 strlcpy(dev->name, name, IFNAMSIZ);
1076
1077 return 0;
1078 }
1079 EXPORT_SYMBOL(dev_get_valid_name);
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089 int dev_change_name(struct net_device *dev, const char *newname)
1090 {
1091 unsigned char old_assign_type;
1092 char oldname[IFNAMSIZ];
1093 int err = 0;
1094 int ret;
1095 struct net *net;
1096
1097 ASSERT_RTNL();
1098 BUG_ON(!dev_net(dev));
1099
1100 net = dev_net(dev);
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 if (dev->flags & IFF_UP &&
1115 likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
1116 return -EBUSY;
1117
1118 write_seqcount_begin(&devnet_rename_seq);
1119
1120 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1121 write_seqcount_end(&devnet_rename_seq);
1122 return 0;
1123 }
1124
1125 memcpy(oldname, dev->name, IFNAMSIZ);
1126
1127 err = dev_get_valid_name(net, dev, newname);
1128 if (err < 0) {
1129 write_seqcount_end(&devnet_rename_seq);
1130 return err;
1131 }
1132
1133 if (oldname[0] && !strchr(oldname, '%'))
1134 netdev_info(dev, "renamed from %s\n", oldname);
1135
1136 old_assign_type = dev->name_assign_type;
1137 dev->name_assign_type = NET_NAME_RENAMED;
1138
1139 rollback:
1140 ret = device_rename(&dev->dev, dev->name);
1141 if (ret) {
1142 memcpy(dev->name, oldname, IFNAMSIZ);
1143 dev->name_assign_type = old_assign_type;
1144 write_seqcount_end(&devnet_rename_seq);
1145 return ret;
1146 }
1147
1148 write_seqcount_end(&devnet_rename_seq);
1149
1150 netdev_adjacent_rename_links(dev, oldname);
1151
1152 write_lock_bh(&dev_base_lock);
1153 hlist_del_rcu(&dev->name_hlist);
1154 write_unlock_bh(&dev_base_lock);
1155
1156 synchronize_rcu();
1157
1158 write_lock_bh(&dev_base_lock);
1159 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1160 write_unlock_bh(&dev_base_lock);
1161
1162 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1163 ret = notifier_to_errno(ret);
1164
1165 if (ret) {
1166
1167 if (err >= 0) {
1168 err = ret;
1169 write_seqcount_begin(&devnet_rename_seq);
1170 memcpy(dev->name, oldname, IFNAMSIZ);
1171 memcpy(oldname, newname, IFNAMSIZ);
1172 dev->name_assign_type = old_assign_type;
1173 old_assign_type = NET_NAME_RENAMED;
1174 goto rollback;
1175 } else {
1176 pr_err("%s: name change rollback failed: %d\n",
1177 dev->name, ret);
1178 }
1179 }
1180
1181 return err;
1182 }
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1193 {
1194 struct dev_ifalias *new_alias = NULL;
1195
1196 if (len >= IFALIASZ)
1197 return -EINVAL;
1198
1199 if (len) {
1200 new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
1201 if (!new_alias)
1202 return -ENOMEM;
1203
1204 memcpy(new_alias->ifalias, alias, len);
1205 new_alias->ifalias[len] = 0;
1206 }
1207
1208 mutex_lock(&ifalias_mutex);
1209 rcu_swap_protected(dev->ifalias, new_alias,
1210 mutex_is_locked(&ifalias_mutex));
1211 mutex_unlock(&ifalias_mutex);
1212
1213 if (new_alias)
1214 kfree_rcu(new_alias, rcuhead);
1215
1216 return len;
1217 }
1218 EXPORT_SYMBOL(dev_set_alias);
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229 int dev_get_alias(const struct net_device *dev, char *name, size_t len)
1230 {
1231 const struct dev_ifalias *alias;
1232 int ret = 0;
1233
1234 rcu_read_lock();
1235 alias = rcu_dereference(dev->ifalias);
1236 if (alias)
1237 ret = snprintf(name, len, "%s", alias->ifalias);
1238 rcu_read_unlock();
1239
1240 return ret;
1241 }
1242
1243
1244
1245
1246
1247
1248
1249 void netdev_features_change(struct net_device *dev)
1250 {
1251 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1252 }
1253 EXPORT_SYMBOL(netdev_features_change);
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263 void netdev_state_change(struct net_device *dev)
1264 {
1265 if (dev->flags & IFF_UP) {
1266 struct netdev_notifier_change_info change_info = {
1267 .info.dev = dev,
1268 };
1269
1270 call_netdevice_notifiers_info(NETDEV_CHANGE,
1271 &change_info.info);
1272 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1273 }
1274 }
1275 EXPORT_SYMBOL(netdev_state_change);
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287 void netdev_notify_peers(struct net_device *dev)
1288 {
1289 rtnl_lock();
1290 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1291 call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
1292 rtnl_unlock();
1293 }
1294 EXPORT_SYMBOL(netdev_notify_peers);
1295
1296 static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1297 {
1298 const struct net_device_ops *ops = dev->netdev_ops;
1299 int ret;
1300
1301 ASSERT_RTNL();
1302
1303 if (!netif_device_present(dev))
1304 return -ENODEV;
1305
1306
1307
1308
1309
1310 netpoll_poll_disable(dev);
1311
1312 ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
1313 ret = notifier_to_errno(ret);
1314 if (ret)
1315 return ret;
1316
1317 set_bit(__LINK_STATE_START, &dev->state);
1318
1319 if (ops->ndo_validate_addr)
1320 ret = ops->ndo_validate_addr(dev);
1321
1322 if (!ret && ops->ndo_open)
1323 ret = ops->ndo_open(dev);
1324
1325 netpoll_poll_enable(dev);
1326
1327 if (ret)
1328 clear_bit(__LINK_STATE_START, &dev->state);
1329 else {
1330 dev->flags |= IFF_UP;
1331 dev_set_rx_mode(dev);
1332 dev_activate(dev);
1333 add_device_randomness(dev->dev_addr, dev->addr_len);
1334 }
1335
1336 return ret;
1337 }
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1353 {
1354 int ret;
1355
1356 if (dev->flags & IFF_UP)
1357 return 0;
1358
1359 ret = __dev_open(dev, extack);
1360 if (ret < 0)
1361 return ret;
1362
1363 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1364 call_netdevice_notifiers(NETDEV_UP, dev);
1365
1366 return ret;
1367 }
1368 EXPORT_SYMBOL(dev_open);
1369
1370 static void __dev_close_many(struct list_head *head)
1371 {
1372 struct net_device *dev;
1373
1374 ASSERT_RTNL();
1375 might_sleep();
1376
1377 list_for_each_entry(dev, head, close_list) {
1378
1379 netpoll_poll_disable(dev);
1380
1381 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1382
1383 clear_bit(__LINK_STATE_START, &dev->state);
1384
1385
1386
1387
1388
1389
1390
1391 smp_mb__after_atomic();
1392 }
1393
1394 dev_deactivate_many(head);
1395
1396 list_for_each_entry(dev, head, close_list) {
1397 const struct net_device_ops *ops = dev->netdev_ops;
1398
1399
1400
1401
1402
1403
1404
1405
1406 if (ops->ndo_stop)
1407 ops->ndo_stop(dev);
1408
1409 dev->flags &= ~IFF_UP;
1410 netpoll_poll_enable(dev);
1411 }
1412 }
1413
1414 static void __dev_close(struct net_device *dev)
1415 {
1416 LIST_HEAD(single);
1417
1418 list_add(&dev->close_list, &single);
1419 __dev_close_many(&single);
1420 list_del(&single);
1421 }
1422
1423 void dev_close_many(struct list_head *head, bool unlink)
1424 {
1425 struct net_device *dev, *tmp;
1426
1427
1428 list_for_each_entry_safe(dev, tmp, head, close_list)
1429 if (!(dev->flags & IFF_UP))
1430 list_del_init(&dev->close_list);
1431
1432 __dev_close_many(head);
1433
1434 list_for_each_entry_safe(dev, tmp, head, close_list) {
1435 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1436 call_netdevice_notifiers(NETDEV_DOWN, dev);
1437 if (unlink)
1438 list_del_init(&dev->close_list);
1439 }
1440 }
1441 EXPORT_SYMBOL(dev_close_many);
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452 void dev_close(struct net_device *dev)
1453 {
1454 if (dev->flags & IFF_UP) {
1455 LIST_HEAD(single);
1456
1457 list_add(&dev->close_list, &single);
1458 dev_close_many(&single, true);
1459 list_del(&single);
1460 }
1461 }
1462 EXPORT_SYMBOL(dev_close);
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473 void dev_disable_lro(struct net_device *dev)
1474 {
1475 struct net_device *lower_dev;
1476 struct list_head *iter;
1477
1478 dev->wanted_features &= ~NETIF_F_LRO;
1479 netdev_update_features(dev);
1480
1481 if (unlikely(dev->features & NETIF_F_LRO))
1482 netdev_WARN(dev, "failed to disable LRO!\n");
1483
1484 netdev_for_each_lower_dev(dev, lower_dev, iter)
1485 dev_disable_lro(lower_dev);
1486 }
1487 EXPORT_SYMBOL(dev_disable_lro);
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497 static void dev_disable_gro_hw(struct net_device *dev)
1498 {
1499 dev->wanted_features &= ~NETIF_F_GRO_HW;
1500 netdev_update_features(dev);
1501
1502 if (unlikely(dev->features & NETIF_F_GRO_HW))
1503 netdev_WARN(dev, "failed to disable GRO_HW!\n");
1504 }
1505
1506 const char *netdev_cmd_to_name(enum netdev_cmd cmd)
1507 {
1508 #define N(val) \
1509 case NETDEV_##val: \
1510 return "NETDEV_" __stringify(val);
1511 switch (cmd) {
1512 N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
1513 N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
1514 N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
1515 N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
1516 N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
1517 N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
1518 N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
1519 N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
1520 N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
1521 N(PRE_CHANGEADDR)
1522 }
1523 #undef N
1524 return "UNKNOWN_NETDEV_EVENT";
1525 }
1526 EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
1527
1528 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1529 struct net_device *dev)
1530 {
1531 struct netdev_notifier_info info = {
1532 .dev = dev,
1533 };
1534
1535 return nb->notifier_call(nb, val, &info);
1536 }
1537
1538 static int dev_boot_phase = 1;
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554 int register_netdevice_notifier(struct notifier_block *nb)
1555 {
1556 struct net_device *dev;
1557 struct net_device *last;
1558 struct net *net;
1559 int err;
1560
1561
1562 down_write(&pernet_ops_rwsem);
1563 rtnl_lock();
1564 err = raw_notifier_chain_register(&netdev_chain, nb);
1565 if (err)
1566 goto unlock;
1567 if (dev_boot_phase)
1568 goto unlock;
1569 for_each_net(net) {
1570 for_each_netdev(net, dev) {
1571 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1572 err = notifier_to_errno(err);
1573 if (err)
1574 goto rollback;
1575
1576 if (!(dev->flags & IFF_UP))
1577 continue;
1578
1579 call_netdevice_notifier(nb, NETDEV_UP, dev);
1580 }
1581 }
1582
1583 unlock:
1584 rtnl_unlock();
1585 up_write(&pernet_ops_rwsem);
1586 return err;
1587
1588 rollback:
1589 last = dev;
1590 for_each_net(net) {
1591 for_each_netdev(net, dev) {
1592 if (dev == last)
1593 goto outroll;
1594
1595 if (dev->flags & IFF_UP) {
1596 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1597 dev);
1598 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1599 }
1600 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1601 }
1602 }
1603
1604 outroll:
1605 raw_notifier_chain_unregister(&netdev_chain, nb);
1606 goto unlock;
1607 }
1608 EXPORT_SYMBOL(register_netdevice_notifier);
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624 int unregister_netdevice_notifier(struct notifier_block *nb)
1625 {
1626 struct net_device *dev;
1627 struct net *net;
1628 int err;
1629
1630
1631 down_write(&pernet_ops_rwsem);
1632 rtnl_lock();
1633 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1634 if (err)
1635 goto unlock;
1636
1637 for_each_net(net) {
1638 for_each_netdev(net, dev) {
1639 if (dev->flags & IFF_UP) {
1640 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1641 dev);
1642 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1643 }
1644 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1645 }
1646 }
1647 unlock:
1648 rtnl_unlock();
1649 up_write(&pernet_ops_rwsem);
1650 return err;
1651 }
1652 EXPORT_SYMBOL(unregister_netdevice_notifier);
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663 static int call_netdevice_notifiers_info(unsigned long val,
1664 struct netdev_notifier_info *info)
1665 {
1666 ASSERT_RTNL();
1667 return raw_notifier_call_chain(&netdev_chain, val, info);
1668 }
1669
1670 static int call_netdevice_notifiers_extack(unsigned long val,
1671 struct net_device *dev,
1672 struct netlink_ext_ack *extack)
1673 {
1674 struct netdev_notifier_info info = {
1675 .dev = dev,
1676 .extack = extack,
1677 };
1678
1679 return call_netdevice_notifiers_info(val, &info);
1680 }
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1692 {
1693 return call_netdevice_notifiers_extack(val, dev, NULL);
1694 }
1695 EXPORT_SYMBOL(call_netdevice_notifiers);
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706 static int call_netdevice_notifiers_mtu(unsigned long val,
1707 struct net_device *dev, u32 arg)
1708 {
1709 struct netdev_notifier_info_ext info = {
1710 .info.dev = dev,
1711 .ext.mtu = arg,
1712 };
1713
1714 BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
1715
1716 return call_netdevice_notifiers_info(val, &info.info);
1717 }
1718
1719 #ifdef CONFIG_NET_INGRESS
1720 static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
1721
1722 void net_inc_ingress_queue(void)
1723 {
1724 static_branch_inc(&ingress_needed_key);
1725 }
1726 EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
1727
1728 void net_dec_ingress_queue(void)
1729 {
1730 static_branch_dec(&ingress_needed_key);
1731 }
1732 EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
1733 #endif
1734
1735 #ifdef CONFIG_NET_EGRESS
1736 static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
1737
1738 void net_inc_egress_queue(void)
1739 {
1740 static_branch_inc(&egress_needed_key);
1741 }
1742 EXPORT_SYMBOL_GPL(net_inc_egress_queue);
1743
1744 void net_dec_egress_queue(void)
1745 {
1746 static_branch_dec(&egress_needed_key);
1747 }
1748 EXPORT_SYMBOL_GPL(net_dec_egress_queue);
1749 #endif
1750
1751 static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
1752 #ifdef CONFIG_JUMP_LABEL
1753 static atomic_t netstamp_needed_deferred;
1754 static atomic_t netstamp_wanted;
1755 static void netstamp_clear(struct work_struct *work)
1756 {
1757 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1758 int wanted;
1759
1760 wanted = atomic_add_return(deferred, &netstamp_wanted);
1761 if (wanted > 0)
1762 static_branch_enable(&netstamp_needed_key);
1763 else
1764 static_branch_disable(&netstamp_needed_key);
1765 }
1766 static DECLARE_WORK(netstamp_work, netstamp_clear);
1767 #endif
1768
1769 void net_enable_timestamp(void)
1770 {
1771 #ifdef CONFIG_JUMP_LABEL
1772 int wanted;
1773
1774 while (1) {
1775 wanted = atomic_read(&netstamp_wanted);
1776 if (wanted <= 0)
1777 break;
1778 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
1779 return;
1780 }
1781 atomic_inc(&netstamp_needed_deferred);
1782 schedule_work(&netstamp_work);
1783 #else
1784 static_branch_inc(&netstamp_needed_key);
1785 #endif
1786 }
1787 EXPORT_SYMBOL(net_enable_timestamp);
1788
1789 void net_disable_timestamp(void)
1790 {
1791 #ifdef CONFIG_JUMP_LABEL
1792 int wanted;
1793
1794 while (1) {
1795 wanted = atomic_read(&netstamp_wanted);
1796 if (wanted <= 1)
1797 break;
1798 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
1799 return;
1800 }
1801 atomic_dec(&netstamp_needed_deferred);
1802 schedule_work(&netstamp_work);
1803 #else
1804 static_branch_dec(&netstamp_needed_key);
1805 #endif
1806 }
1807 EXPORT_SYMBOL(net_disable_timestamp);
1808
1809 static inline void net_timestamp_set(struct sk_buff *skb)
1810 {
1811 skb->tstamp = 0;
1812 if (static_branch_unlikely(&netstamp_needed_key))
1813 __net_timestamp(skb);
1814 }
1815
1816 #define net_timestamp_check(COND, SKB) \
1817 if (static_branch_unlikely(&netstamp_needed_key)) { \
1818 if ((COND) && !(SKB)->tstamp) \
1819 __net_timestamp(SKB); \
1820 } \
1821
1822 bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
1823 {
1824 unsigned int len;
1825
1826 if (!(dev->flags & IFF_UP))
1827 return false;
1828
1829 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1830 if (skb->len <= len)
1831 return true;
1832
1833
1834
1835
1836 if (skb_is_gso(skb))
1837 return true;
1838
1839 return false;
1840 }
1841 EXPORT_SYMBOL_GPL(is_skb_forwardable);
1842
1843 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1844 {
1845 int ret = ____dev_forward_skb(dev, skb);
1846
1847 if (likely(!ret)) {
1848 skb->protocol = eth_type_trans(skb, dev);
1849 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1850 }
1851
1852 return ret;
1853 }
1854 EXPORT_SYMBOL_GPL(__dev_forward_skb);
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1875 {
1876 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
1877 }
1878 EXPORT_SYMBOL_GPL(dev_forward_skb);
1879
1880 static inline int deliver_skb(struct sk_buff *skb,
1881 struct packet_type *pt_prev,
1882 struct net_device *orig_dev)
1883 {
1884 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
1885 return -ENOMEM;
1886 refcount_inc(&skb->users);
1887 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1888 }
1889
1890 static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1891 struct packet_type **pt,
1892 struct net_device *orig_dev,
1893 __be16 type,
1894 struct list_head *ptype_list)
1895 {
1896 struct packet_type *ptype, *pt_prev = *pt;
1897
1898 list_for_each_entry_rcu(ptype, ptype_list, list) {
1899 if (ptype->type != type)
1900 continue;
1901 if (pt_prev)
1902 deliver_skb(skb, pt_prev, orig_dev);
1903 pt_prev = ptype;
1904 }
1905 *pt = pt_prev;
1906 }
1907
1908 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1909 {
1910 if (!ptype->af_packet_priv || !skb->sk)
1911 return false;
1912
1913 if (ptype->id_match)
1914 return ptype->id_match(ptype, skb->sk);
1915 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1916 return true;
1917
1918 return false;
1919 }
1920
1921
1922
1923
1924
1925
1926 bool dev_nit_active(struct net_device *dev)
1927 {
1928 return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
1929 }
1930 EXPORT_SYMBOL_GPL(dev_nit_active);
1931
1932
1933
1934
1935
1936
1937 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1938 {
1939 struct packet_type *ptype;
1940 struct sk_buff *skb2 = NULL;
1941 struct packet_type *pt_prev = NULL;
1942 struct list_head *ptype_list = &ptype_all;
1943
1944 rcu_read_lock();
1945 again:
1946 list_for_each_entry_rcu(ptype, ptype_list, list) {
1947 if (ptype->ignore_outgoing)
1948 continue;
1949
1950
1951
1952
1953 if (skb_loop_sk(ptype, skb))
1954 continue;
1955
1956 if (pt_prev) {
1957 deliver_skb(skb2, pt_prev, skb->dev);
1958 pt_prev = ptype;
1959 continue;
1960 }
1961
1962
1963 skb2 = skb_clone(skb, GFP_ATOMIC);
1964 if (!skb2)
1965 goto out_unlock;
1966
1967 net_timestamp_set(skb2);
1968
1969
1970
1971
1972
1973 skb_reset_mac_header(skb2);
1974
1975 if (skb_network_header(skb2) < skb2->data ||
1976 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1977 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1978 ntohs(skb2->protocol),
1979 dev->name);
1980 skb_reset_network_header(skb2);
1981 }
1982
1983 skb2->transport_header = skb2->network_header;
1984 skb2->pkt_type = PACKET_OUTGOING;
1985 pt_prev = ptype;
1986 }
1987
1988 if (ptype_list == &ptype_all) {
1989 ptype_list = &dev->ptype_all;
1990 goto again;
1991 }
1992 out_unlock:
1993 if (pt_prev) {
1994 if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
1995 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1996 else
1997 kfree_skb(skb2);
1998 }
1999 rcu_read_unlock();
2000 }
2001 EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
2017 {
2018 int i;
2019 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2020
2021
2022 if (tc->offset + tc->count > txq) {
2023 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
2024 dev->num_tc = 0;
2025 return;
2026 }
2027
2028
2029 for (i = 1; i < TC_BITMASK + 1; i++) {
2030 int q = netdev_get_prio_tc_map(dev, i);
2031
2032 tc = &dev->tc_to_txq[q];
2033 if (tc->offset + tc->count > txq) {
2034 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
2035 i, q);
2036 netdev_set_prio_tc_map(dev, i, 0);
2037 }
2038 }
2039 }
2040
2041 int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
2042 {
2043 if (dev->num_tc) {
2044 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2045 int i;
2046
2047
2048 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
2049 if ((txq - tc->offset) < tc->count)
2050 return i;
2051 }
2052
2053
2054 return -1;
2055 }
2056
2057 return 0;
2058 }
2059 EXPORT_SYMBOL(netdev_txq_to_tc);
2060
2061 #ifdef CONFIG_XPS
2062 struct static_key xps_needed __read_mostly;
2063 EXPORT_SYMBOL(xps_needed);
2064 struct static_key xps_rxqs_needed __read_mostly;
2065 EXPORT_SYMBOL(xps_rxqs_needed);
2066 static DEFINE_MUTEX(xps_map_mutex);
2067 #define xmap_dereference(P) \
2068 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
2069
2070 static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
2071 int tci, u16 index)
2072 {
2073 struct xps_map *map = NULL;
2074 int pos;
2075
2076 if (dev_maps)
2077 map = xmap_dereference(dev_maps->attr_map[tci]);
2078 if (!map)
2079 return false;
2080
2081 for (pos = map->len; pos--;) {
2082 if (map->queues[pos] != index)
2083 continue;
2084
2085 if (map->len > 1) {
2086 map->queues[pos] = map->queues[--map->len];
2087 break;
2088 }
2089
2090 RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
2091 kfree_rcu(map, rcu);
2092 return false;
2093 }
2094
2095 return true;
2096 }
2097
2098 static bool remove_xps_queue_cpu(struct net_device *dev,
2099 struct xps_dev_maps *dev_maps,
2100 int cpu, u16 offset, u16 count)
2101 {
2102 int num_tc = dev->num_tc ? : 1;
2103 bool active = false;
2104 int tci;
2105
2106 for (tci = cpu * num_tc; num_tc--; tci++) {
2107 int i, j;
2108
2109 for (i = count, j = offset; i--; j++) {
2110 if (!remove_xps_queue(dev_maps, tci, j))
2111 break;
2112 }
2113
2114 active |= i < 0;
2115 }
2116
2117 return active;
2118 }
2119
2120 static void reset_xps_maps(struct net_device *dev,
2121 struct xps_dev_maps *dev_maps,
2122 bool is_rxqs_map)
2123 {
2124 if (is_rxqs_map) {
2125 static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
2126 RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
2127 } else {
2128 RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
2129 }
2130 static_key_slow_dec_cpuslocked(&xps_needed);
2131 kfree_rcu(dev_maps, rcu);
2132 }
2133
2134 static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
2135 struct xps_dev_maps *dev_maps, unsigned int nr_ids,
2136 u16 offset, u16 count, bool is_rxqs_map)
2137 {
2138 bool active = false;
2139 int i, j;
2140
2141 for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
2142 j < nr_ids;)
2143 active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
2144 count);
2145 if (!active)
2146 reset_xps_maps(dev, dev_maps, is_rxqs_map);
2147
2148 if (!is_rxqs_map) {
2149 for (i = offset + (count - 1); count--; i--) {
2150 netdev_queue_numa_node_write(
2151 netdev_get_tx_queue(dev, i),
2152 NUMA_NO_NODE);
2153 }
2154 }
2155 }
2156
2157 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
2158 u16 count)
2159 {
2160 const unsigned long *possible_mask = NULL;
2161 struct xps_dev_maps *dev_maps;
2162 unsigned int nr_ids;
2163
2164 if (!static_key_false(&xps_needed))
2165 return;
2166
2167 cpus_read_lock();
2168 mutex_lock(&xps_map_mutex);
2169
2170 if (static_key_false(&xps_rxqs_needed)) {
2171 dev_maps = xmap_dereference(dev->xps_rxqs_map);
2172 if (dev_maps) {
2173 nr_ids = dev->num_rx_queues;
2174 clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
2175 offset, count, true);
2176 }
2177 }
2178
2179 dev_maps = xmap_dereference(dev->xps_cpus_map);
2180 if (!dev_maps)
2181 goto out_no_maps;
2182
2183 if (num_possible_cpus() > 1)
2184 possible_mask = cpumask_bits(cpu_possible_mask);
2185 nr_ids = nr_cpu_ids;
2186 clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
2187 false);
2188
2189 out_no_maps:
2190 mutex_unlock(&xps_map_mutex);
2191 cpus_read_unlock();
2192 }
2193
2194 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
2195 {
2196 netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
2197 }
2198
2199 static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
2200 u16 index, bool is_rxqs_map)
2201 {
2202 struct xps_map *new_map;
2203 int alloc_len = XPS_MIN_MAP_ALLOC;
2204 int i, pos;
2205
2206 for (pos = 0; map && pos < map->len; pos++) {
2207 if (map->queues[pos] != index)
2208 continue;
2209 return map;
2210 }
2211
2212
2213 if (map) {
2214 if (pos < map->alloc_len)
2215 return map;
2216
2217 alloc_len = map->alloc_len * 2;
2218 }
2219
2220
2221
2222
2223 if (is_rxqs_map)
2224 new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
2225 else
2226 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2227 cpu_to_node(attr_index));
2228 if (!new_map)
2229 return NULL;
2230
2231 for (i = 0; i < pos; i++)
2232 new_map->queues[i] = map->queues[i];
2233 new_map->alloc_len = alloc_len;
2234 new_map->len = pos;
2235
2236 return new_map;
2237 }
2238
2239
2240 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
2241 u16 index, bool is_rxqs_map)
2242 {
2243 const unsigned long *online_mask = NULL, *possible_mask = NULL;
2244 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
2245 int i, j, tci, numa_node_id = -2;
2246 int maps_sz, num_tc = 1, tc = 0;
2247 struct xps_map *map, *new_map;
2248 bool active = false;
2249 unsigned int nr_ids;
2250
2251 if (dev->num_tc) {
2252
2253 num_tc = dev->num_tc;
2254 if (num_tc < 0)
2255 return -EINVAL;
2256
2257
2258 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
2259
2260 tc = netdev_txq_to_tc(dev, index);
2261 if (tc < 0)
2262 return -EINVAL;
2263 }
2264
2265 mutex_lock(&xps_map_mutex);
2266 if (is_rxqs_map) {
2267 maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
2268 dev_maps = xmap_dereference(dev->xps_rxqs_map);
2269 nr_ids = dev->num_rx_queues;
2270 } else {
2271 maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
2272 if (num_possible_cpus() > 1) {
2273 online_mask = cpumask_bits(cpu_online_mask);
2274 possible_mask = cpumask_bits(cpu_possible_mask);
2275 }
2276 dev_maps = xmap_dereference(dev->xps_cpus_map);
2277 nr_ids = nr_cpu_ids;
2278 }
2279
2280 if (maps_sz < L1_CACHE_BYTES)
2281 maps_sz = L1_CACHE_BYTES;
2282
2283
2284 for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
2285 j < nr_ids;) {
2286 if (!new_dev_maps)
2287 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2288 if (!new_dev_maps) {
2289 mutex_unlock(&xps_map_mutex);
2290 return -ENOMEM;
2291 }
2292
2293 tci = j * num_tc + tc;
2294 map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
2295 NULL;
2296
2297 map = expand_xps_map(map, j, index, is_rxqs_map);
2298 if (!map)
2299 goto error;
2300
2301 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2302 }
2303
2304 if (!new_dev_maps)
2305 goto out_no_new_maps;
2306
2307 if (!dev_maps) {
2308
2309 static_key_slow_inc_cpuslocked(&xps_needed);
2310 if (is_rxqs_map)
2311 static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
2312 }
2313
2314 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2315 j < nr_ids;) {
2316
2317 for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
2318
2319 map = xmap_dereference(dev_maps->attr_map[tci]);
2320 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2321 }
2322
2323
2324
2325
2326 tci = j * num_tc + tc;
2327
2328 if (netif_attr_test_mask(j, mask, nr_ids) &&
2329 netif_attr_test_online(j, online_mask, nr_ids)) {
2330
2331 int pos = 0;
2332
2333 map = xmap_dereference(new_dev_maps->attr_map[tci]);
2334 while ((pos < map->len) && (map->queues[pos] != index))
2335 pos++;
2336
2337 if (pos == map->len)
2338 map->queues[map->len++] = index;
2339 #ifdef CONFIG_NUMA
2340 if (!is_rxqs_map) {
2341 if (numa_node_id == -2)
2342 numa_node_id = cpu_to_node(j);
2343 else if (numa_node_id != cpu_to_node(j))
2344 numa_node_id = -1;
2345 }
2346 #endif
2347 } else if (dev_maps) {
2348
2349 map = xmap_dereference(dev_maps->attr_map[tci]);
2350 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2351 }
2352
2353
2354 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
2355
2356 map = xmap_dereference(dev_maps->attr_map[tci]);
2357 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2358 }
2359 }
2360
2361 if (is_rxqs_map)
2362 rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
2363 else
2364 rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
2365
2366
2367 if (!dev_maps)
2368 goto out_no_old_maps;
2369
2370 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2371 j < nr_ids;) {
2372 for (i = num_tc, tci = j * num_tc; i--; tci++) {
2373 new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2374 map = xmap_dereference(dev_maps->attr_map[tci]);
2375 if (map && map != new_map)
2376 kfree_rcu(map, rcu);
2377 }
2378 }
2379
2380 kfree_rcu(dev_maps, rcu);
2381
2382 out_no_old_maps:
2383 dev_maps = new_dev_maps;
2384 active = true;
2385
2386 out_no_new_maps:
2387 if (!is_rxqs_map) {
2388
2389 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2390 (numa_node_id >= 0) ?
2391 numa_node_id : NUMA_NO_NODE);
2392 }
2393
2394 if (!dev_maps)
2395 goto out_no_maps;
2396
2397
2398 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2399 j < nr_ids;) {
2400 for (i = tc, tci = j * num_tc; i--; tci++)
2401 active |= remove_xps_queue(dev_maps, tci, index);
2402 if (!netif_attr_test_mask(j, mask, nr_ids) ||
2403 !netif_attr_test_online(j, online_mask, nr_ids))
2404 active |= remove_xps_queue(dev_maps, tci, index);
2405 for (i = num_tc - tc, tci++; --i; tci++)
2406 active |= remove_xps_queue(dev_maps, tci, index);
2407 }
2408
2409
2410 if (!active)
2411 reset_xps_maps(dev, dev_maps, is_rxqs_map);
2412
2413 out_no_maps:
2414 mutex_unlock(&xps_map_mutex);
2415
2416 return 0;
2417 error:
2418
2419 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2420 j < nr_ids;) {
2421 for (i = num_tc, tci = j * num_tc; i--; tci++) {
2422 new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2423 map = dev_maps ?
2424 xmap_dereference(dev_maps->attr_map[tci]) :
2425 NULL;
2426 if (new_map && new_map != map)
2427 kfree(new_map);
2428 }
2429 }
2430
2431 mutex_unlock(&xps_map_mutex);
2432
2433 kfree(new_dev_maps);
2434 return -ENOMEM;
2435 }
2436 EXPORT_SYMBOL_GPL(__netif_set_xps_queue);
2437
2438 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2439 u16 index)
2440 {
2441 int ret;
2442
2443 cpus_read_lock();
2444 ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
2445 cpus_read_unlock();
2446
2447 return ret;
2448 }
2449 EXPORT_SYMBOL(netif_set_xps_queue);
2450
2451 #endif
2452 static void netdev_unbind_all_sb_channels(struct net_device *dev)
2453 {
2454 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2455
2456
2457 while (txq-- != &dev->_tx[0]) {
2458 if (txq->sb_dev)
2459 netdev_unbind_sb_channel(dev, txq->sb_dev);
2460 }
2461 }
2462
2463 void netdev_reset_tc(struct net_device *dev)
2464 {
2465 #ifdef CONFIG_XPS
2466 netif_reset_xps_queues_gt(dev, 0);
2467 #endif
2468 netdev_unbind_all_sb_channels(dev);
2469
2470
2471 dev->num_tc = 0;
2472 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
2473 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
2474 }
2475 EXPORT_SYMBOL(netdev_reset_tc);
2476
2477 int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
2478 {
2479 if (tc >= dev->num_tc)
2480 return -EINVAL;
2481
2482 #ifdef CONFIG_XPS
2483 netif_reset_xps_queues(dev, offset, count);
2484 #endif
2485 dev->tc_to_txq[tc].count = count;
2486 dev->tc_to_txq[tc].offset = offset;
2487 return 0;
2488 }
2489 EXPORT_SYMBOL(netdev_set_tc_queue);
2490
2491 int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
2492 {
2493 if (num_tc > TC_MAX_QUEUE)
2494 return -EINVAL;
2495
2496 #ifdef CONFIG_XPS
2497 netif_reset_xps_queues_gt(dev, 0);
2498 #endif
2499 netdev_unbind_all_sb_channels(dev);
2500
2501 dev->num_tc = num_tc;
2502 return 0;
2503 }
2504 EXPORT_SYMBOL(netdev_set_num_tc);
2505
2506 void netdev_unbind_sb_channel(struct net_device *dev,
2507 struct net_device *sb_dev)
2508 {
2509 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2510
2511 #ifdef CONFIG_XPS
2512 netif_reset_xps_queues_gt(sb_dev, 0);
2513 #endif
2514 memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
2515 memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
2516
2517 while (txq-- != &dev->_tx[0]) {
2518 if (txq->sb_dev == sb_dev)
2519 txq->sb_dev = NULL;
2520 }
2521 }
2522 EXPORT_SYMBOL(netdev_unbind_sb_channel);
2523
2524 int netdev_bind_sb_channel_queue(struct net_device *dev,
2525 struct net_device *sb_dev,
2526 u8 tc, u16 count, u16 offset)
2527 {
2528
2529 if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
2530 return -EINVAL;
2531
2532
2533 if ((offset + count) > dev->real_num_tx_queues)
2534 return -EINVAL;
2535
2536
2537 sb_dev->tc_to_txq[tc].count = count;
2538 sb_dev->tc_to_txq[tc].offset = offset;
2539
2540
2541
2542
2543 while (count--)
2544 netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
2545
2546 return 0;
2547 }
2548 EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
2549
2550 int netdev_set_sb_channel(struct net_device *dev, u16 channel)
2551 {
2552
2553 if (netif_is_multiqueue(dev))
2554 return -ENODEV;
2555
2556
2557
2558
2559
2560
2561 if (channel > S16_MAX)
2562 return -EINVAL;
2563
2564 dev->num_tc = -channel;
2565
2566 return 0;
2567 }
2568 EXPORT_SYMBOL(netdev_set_sb_channel);
2569
2570
2571
2572
2573
2574 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2575 {
2576 bool disabling;
2577 int rc;
2578
2579 disabling = txq < dev->real_num_tx_queues;
2580
2581 if (txq < 1 || txq > dev->num_tx_queues)
2582 return -EINVAL;
2583
2584 if (dev->reg_state == NETREG_REGISTERED ||
2585 dev->reg_state == NETREG_UNREGISTERING) {
2586 ASSERT_RTNL();
2587
2588 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2589 txq);
2590 if (rc)
2591 return rc;
2592
2593 if (dev->num_tc)
2594 netif_setup_tc(dev, txq);
2595
2596 dev->real_num_tx_queues = txq;
2597
2598 if (disabling) {
2599 synchronize_net();
2600 qdisc_reset_all_tx_gt(dev, txq);
2601 #ifdef CONFIG_XPS
2602 netif_reset_xps_queues_gt(dev, txq);
2603 #endif
2604 }
2605 } else {
2606 dev->real_num_tx_queues = txq;
2607 }
2608
2609 return 0;
2610 }
2611 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2612
2613 #ifdef CONFIG_SYSFS
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2625 {
2626 int rc;
2627
2628 if (rxq < 1 || rxq > dev->num_rx_queues)
2629 return -EINVAL;
2630
2631 if (dev->reg_state == NETREG_REGISTERED) {
2632 ASSERT_RTNL();
2633
2634 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2635 rxq);
2636 if (rc)
2637 return rc;
2638 }
2639
2640 dev->real_num_rx_queues = rxq;
2641 return 0;
2642 }
2643 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2644 #endif
2645
2646
2647
2648
2649
2650
2651
2652 int netif_get_num_default_rss_queues(void)
2653 {
2654 return is_kdump_kernel() ?
2655 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2656 }
2657 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2658
2659 static void __netif_reschedule(struct Qdisc *q)
2660 {
2661 struct softnet_data *sd;
2662 unsigned long flags;
2663
2664 local_irq_save(flags);
2665 sd = this_cpu_ptr(&softnet_data);
2666 q->next_sched = NULL;
2667 *sd->output_queue_tailp = q;
2668 sd->output_queue_tailp = &q->next_sched;
2669 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2670 local_irq_restore(flags);
2671 }
2672
2673 void __netif_schedule(struct Qdisc *q)
2674 {
2675 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2676 __netif_reschedule(q);
2677 }
2678 EXPORT_SYMBOL(__netif_schedule);
2679
2680 struct dev_kfree_skb_cb {
2681 enum skb_free_reason reason;
2682 };
2683
2684 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2685 {
2686 return (struct dev_kfree_skb_cb *)skb->cb;
2687 }
2688
2689 void netif_schedule_queue(struct netdev_queue *txq)
2690 {
2691 rcu_read_lock();
2692 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
2693 struct Qdisc *q = rcu_dereference(txq->qdisc);
2694
2695 __netif_schedule(q);
2696 }
2697 rcu_read_unlock();
2698 }
2699 EXPORT_SYMBOL(netif_schedule_queue);
2700
2701 void netif_tx_wake_queue(struct netdev_queue *dev_queue)
2702 {
2703 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
2704 struct Qdisc *q;
2705
2706 rcu_read_lock();
2707 q = rcu_dereference(dev_queue->qdisc);
2708 __netif_schedule(q);
2709 rcu_read_unlock();
2710 }
2711 }
2712 EXPORT_SYMBOL(netif_tx_wake_queue);
2713
2714 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2715 {
2716 unsigned long flags;
2717
2718 if (unlikely(!skb))
2719 return;
2720
2721 if (likely(refcount_read(&skb->users) == 1)) {
2722 smp_rmb();
2723 refcount_set(&skb->users, 0);
2724 } else if (likely(!refcount_dec_and_test(&skb->users))) {
2725 return;
2726 }
2727 get_kfree_skb_cb(skb)->reason = reason;
2728 local_irq_save(flags);
2729 skb->next = __this_cpu_read(softnet_data.completion_queue);
2730 __this_cpu_write(softnet_data.completion_queue, skb);
2731 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2732 local_irq_restore(flags);
2733 }
2734 EXPORT_SYMBOL(__dev_kfree_skb_irq);
2735
2736 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2737 {
2738 if (in_irq() || irqs_disabled())
2739 __dev_kfree_skb_irq(skb, reason);
2740 else
2741 dev_kfree_skb(skb);
2742 }
2743 EXPORT_SYMBOL(__dev_kfree_skb_any);
2744
2745
2746
2747
2748
2749
2750
2751
2752 void netif_device_detach(struct net_device *dev)
2753 {
2754 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2755 netif_running(dev)) {
2756 netif_tx_stop_all_queues(dev);
2757 }
2758 }
2759 EXPORT_SYMBOL(netif_device_detach);
2760
2761
2762
2763
2764
2765
2766
2767 void netif_device_attach(struct net_device *dev)
2768 {
2769 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2770 netif_running(dev)) {
2771 netif_tx_wake_all_queues(dev);
2772 __netdev_watchdog_up(dev);
2773 }
2774 }
2775 EXPORT_SYMBOL(netif_device_attach);
2776
2777
2778
2779
2780
2781 static u16 skb_tx_hash(const struct net_device *dev,
2782 const struct net_device *sb_dev,
2783 struct sk_buff *skb)
2784 {
2785 u32 hash;
2786 u16 qoffset = 0;
2787 u16 qcount = dev->real_num_tx_queues;
2788
2789 if (dev->num_tc) {
2790 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2791
2792 qoffset = sb_dev->tc_to_txq[tc].offset;
2793 qcount = sb_dev->tc_to_txq[tc].count;
2794 }
2795
2796 if (skb_rx_queue_recorded(skb)) {
2797 hash = skb_get_rx_queue(skb);
2798 if (hash >= qoffset)
2799 hash -= qoffset;
2800 while (unlikely(hash >= qcount))
2801 hash -= qcount;
2802 return hash + qoffset;
2803 }
2804
2805 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
2806 }
2807
2808 static void skb_warn_bad_offload(const struct sk_buff *skb)
2809 {
2810 static const netdev_features_t null_features;
2811 struct net_device *dev = skb->dev;
2812 const char *name = "";
2813
2814 if (!net_ratelimit())
2815 return;
2816
2817 if (dev) {
2818 if (dev->dev.parent)
2819 name = dev_driver_string(dev->dev.parent);
2820 else
2821 name = netdev_name(dev);
2822 }
2823 skb_dump(KERN_WARNING, skb, false);
2824 WARN(1, "%s: caps=(%pNF, %pNF)\n",
2825 name, dev ? &dev->features : &null_features,
2826 skb->sk ? &skb->sk->sk_route_caps : &null_features);
2827 }
2828
2829
2830
2831
2832
2833 int skb_checksum_help(struct sk_buff *skb)
2834 {
2835 __wsum csum;
2836 int ret = 0, offset;
2837
2838 if (skb->ip_summed == CHECKSUM_COMPLETE)
2839 goto out_set_summed;
2840
2841 if (unlikely(skb_shinfo(skb)->gso_size)) {
2842 skb_warn_bad_offload(skb);
2843 return -EINVAL;
2844 }
2845
2846
2847
2848
2849 if (skb_has_shared_frag(skb)) {
2850 ret = __skb_linearize(skb);
2851 if (ret)
2852 goto out;
2853 }
2854
2855 offset = skb_checksum_start_offset(skb);
2856 BUG_ON(offset >= skb_headlen(skb));
2857 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2858
2859 offset += skb->csum_offset;
2860 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2861
2862 if (skb_cloned(skb) &&
2863 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2864 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2865 if (ret)
2866 goto out;
2867 }
2868
2869 *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
2870 out_set_summed:
2871 skb->ip_summed = CHECKSUM_NONE;
2872 out:
2873 return ret;
2874 }
2875 EXPORT_SYMBOL(skb_checksum_help);
2876
2877 int skb_crc32c_csum_help(struct sk_buff *skb)
2878 {
2879 __le32 crc32c_csum;
2880 int ret = 0, offset, start;
2881
2882 if (skb->ip_summed != CHECKSUM_PARTIAL)
2883 goto out;
2884
2885 if (unlikely(skb_is_gso(skb)))
2886 goto out;
2887
2888
2889
2890
2891 if (unlikely(skb_has_shared_frag(skb))) {
2892 ret = __skb_linearize(skb);
2893 if (ret)
2894 goto out;
2895 }
2896 start = skb_checksum_start_offset(skb);
2897 offset = start + offsetof(struct sctphdr, checksum);
2898 if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
2899 ret = -EINVAL;
2900 goto out;
2901 }
2902 if (skb_cloned(skb) &&
2903 !skb_clone_writable(skb, offset + sizeof(__le32))) {
2904 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2905 if (ret)
2906 goto out;
2907 }
2908 crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
2909 skb->len - start, ~(__u32)0,
2910 crc32c_csum_stub));
2911 *(__le32 *)(skb->data + offset) = crc32c_csum;
2912 skb->ip_summed = CHECKSUM_NONE;
2913 skb->csum_not_inet = 0;
2914 out:
2915 return ret;
2916 }
2917
2918 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2919 {
2920 __be16 type = skb->protocol;
2921
2922
2923 if (type == htons(ETH_P_TEB)) {
2924 struct ethhdr *eth;
2925
2926 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2927 return 0;
2928
2929 eth = (struct ethhdr *)skb->data;
2930 type = eth->h_proto;
2931 }
2932
2933 return __vlan_get_protocol(skb, type, depth);
2934 }
2935
2936
2937
2938
2939
2940
2941 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2942 netdev_features_t features)
2943 {
2944 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2945 struct packet_offload *ptype;
2946 int vlan_depth = skb->mac_len;
2947 __be16 type = skb_network_protocol(skb, &vlan_depth);
2948
2949 if (unlikely(!type))
2950 return ERR_PTR(-EINVAL);
2951
2952 __skb_pull(skb, vlan_depth);
2953
2954 rcu_read_lock();
2955 list_for_each_entry_rcu(ptype, &offload_base, list) {
2956 if (ptype->type == type && ptype->callbacks.gso_segment) {
2957 segs = ptype->callbacks.gso_segment(skb, features);
2958 break;
2959 }
2960 }
2961 rcu_read_unlock();
2962
2963 __skb_push(skb, skb->data - skb_mac_header(skb));
2964
2965 return segs;
2966 }
2967 EXPORT_SYMBOL(skb_mac_gso_segment);
2968
2969
2970
2971
2972 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2973 {
2974 if (tx_path)
2975 return skb->ip_summed != CHECKSUM_PARTIAL &&
2976 skb->ip_summed != CHECKSUM_UNNECESSARY;
2977
2978 return skb->ip_summed == CHECKSUM_NONE;
2979 }
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2995 netdev_features_t features, bool tx_path)
2996 {
2997 struct sk_buff *segs;
2998
2999 if (unlikely(skb_needs_check(skb, tx_path))) {
3000 int err;
3001
3002
3003 err = skb_cow_head(skb, 0);
3004 if (err < 0)
3005 return ERR_PTR(err);
3006 }
3007
3008
3009
3010
3011
3012 if (features & NETIF_F_GSO_PARTIAL) {
3013 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
3014 struct net_device *dev = skb->dev;
3015
3016 partial_features |= dev->features & dev->gso_partial_features;
3017 if (!skb_gso_ok(skb, features | partial_features))
3018 features &= ~NETIF_F_GSO_PARTIAL;
3019 }
3020
3021 BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
3022 sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
3023
3024 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
3025 SKB_GSO_CB(skb)->encap_level = 0;
3026
3027 skb_reset_mac_header(skb);
3028 skb_reset_mac_len(skb);
3029
3030 segs = skb_mac_gso_segment(skb, features);
3031
3032 if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
3033 skb_warn_bad_offload(skb);
3034
3035 return segs;
3036 }
3037 EXPORT_SYMBOL(__skb_gso_segment);
3038
3039
3040 #ifdef CONFIG_BUG
3041 void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
3042 {
3043 if (net_ratelimit()) {
3044 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
3045 skb_dump(KERN_ERR, skb, true);
3046 dump_stack();
3047 }
3048 }
3049 EXPORT_SYMBOL(netdev_rx_csum_fault);
3050 #endif
3051
3052
3053 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
3054 {
3055 #ifdef CONFIG_HIGHMEM
3056 int i;
3057
3058 if (!(dev->features & NETIF_F_HIGHDMA)) {
3059 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
3060 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3061
3062 if (PageHighMem(skb_frag_page(frag)))
3063 return 1;
3064 }
3065 }
3066 #endif
3067 return 0;
3068 }
3069
3070
3071
3072
3073 #if IS_ENABLED(CONFIG_NET_MPLS_GSO)
3074 static netdev_features_t net_mpls_features(struct sk_buff *skb,
3075 netdev_features_t features,
3076 __be16 type)
3077 {
3078 if (eth_p_mpls(type))
3079 features &= skb->dev->mpls_features;
3080
3081 return features;
3082 }
3083 #else
3084 static netdev_features_t net_mpls_features(struct sk_buff *skb,
3085 netdev_features_t features,
3086 __be16 type)
3087 {
3088 return features;
3089 }
3090 #endif
3091
3092 static netdev_features_t harmonize_features(struct sk_buff *skb,
3093 netdev_features_t features)
3094 {
3095 int tmp;
3096 __be16 type;
3097
3098 type = skb_network_protocol(skb, &tmp);
3099 features = net_mpls_features(skb, features, type);
3100
3101 if (skb->ip_summed != CHECKSUM_NONE &&
3102 !can_checksum_protocol(features, type)) {
3103 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3104 }
3105 if (illegal_highdma(skb->dev, skb))
3106 features &= ~NETIF_F_SG;
3107
3108 return features;
3109 }
3110
3111 netdev_features_t passthru_features_check(struct sk_buff *skb,
3112 struct net_device *dev,
3113 netdev_features_t features)
3114 {
3115 return features;
3116 }
3117 EXPORT_SYMBOL(passthru_features_check);
3118
3119 static netdev_features_t dflt_features_check(struct sk_buff *skb,
3120 struct net_device *dev,
3121 netdev_features_t features)
3122 {
3123 return vlan_features_check(skb, features);
3124 }
3125
3126 static netdev_features_t gso_features_check(const struct sk_buff *skb,
3127 struct net_device *dev,
3128 netdev_features_t features)
3129 {
3130 u16 gso_segs = skb_shinfo(skb)->gso_segs;
3131
3132 if (gso_segs > dev->gso_max_segs)
3133 return features & ~NETIF_F_GSO_MASK;
3134
3135
3136
3137
3138
3139
3140
3141 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
3142 features &= ~dev->gso_partial_features;
3143
3144
3145
3146
3147 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
3148 struct iphdr *iph = skb->encapsulation ?
3149 inner_ip_hdr(skb) : ip_hdr(skb);
3150
3151 if (!(iph->frag_off & htons(IP_DF)))
3152 features &= ~NETIF_F_TSO_MANGLEID;
3153 }
3154
3155 return features;
3156 }
3157
3158 netdev_features_t netif_skb_features(struct sk_buff *skb)
3159 {
3160 struct net_device *dev = skb->dev;
3161 netdev_features_t features = dev->features;
3162
3163 if (skb_is_gso(skb))
3164 features = gso_features_check(skb, dev, features);
3165
3166
3167
3168
3169
3170 if (skb->encapsulation)
3171 features &= dev->hw_enc_features;
3172
3173 if (skb_vlan_tagged(skb))
3174 features = netdev_intersect_features(features,
3175 dev->vlan_features |
3176 NETIF_F_HW_VLAN_CTAG_TX |
3177 NETIF_F_HW_VLAN_STAG_TX);
3178
3179 if (dev->netdev_ops->ndo_features_check)
3180 features &= dev->netdev_ops->ndo_features_check(skb, dev,
3181 features);
3182 else
3183 features &= dflt_features_check(skb, dev, features);
3184
3185 return harmonize_features(skb, features);
3186 }
3187 EXPORT_SYMBOL(netif_skb_features);
3188
3189 static int xmit_one(struct sk_buff *skb, struct net_device *dev,
3190 struct netdev_queue *txq, bool more)
3191 {
3192 unsigned int len;
3193 int rc;
3194
3195 if (dev_nit_active(dev))
3196 dev_queue_xmit_nit(skb, dev);
3197
3198 len = skb->len;
3199 trace_net_dev_start_xmit(skb, dev);
3200 rc = netdev_start_xmit(skb, dev, txq, more);
3201 trace_net_dev_xmit(skb, rc, dev, len);
3202
3203 return rc;
3204 }
3205
3206 struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
3207 struct netdev_queue *txq, int *ret)
3208 {
3209 struct sk_buff *skb = first;
3210 int rc = NETDEV_TX_OK;
3211
3212 while (skb) {
3213 struct sk_buff *next = skb->next;
3214
3215 skb_mark_not_on_list(skb);
3216 rc = xmit_one(skb, dev, txq, next != NULL);
3217 if (unlikely(!dev_xmit_complete(rc))) {
3218 skb->next = next;
3219 goto out;
3220 }
3221
3222 skb = next;
3223 if (netif_tx_queue_stopped(txq) && skb) {
3224 rc = NETDEV_TX_BUSY;
3225 break;
3226 }
3227 }
3228
3229 out:
3230 *ret = rc;
3231 return skb;
3232 }
3233
3234 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
3235 netdev_features_t features)
3236 {
3237 if (skb_vlan_tag_present(skb) &&
3238 !vlan_hw_offload_capable(features, skb->vlan_proto))
3239 skb = __vlan_hwaccel_push_inside(skb);
3240 return skb;
3241 }
3242
3243 int skb_csum_hwoffload_help(struct sk_buff *skb,
3244 const netdev_features_t features)
3245 {
3246 if (unlikely(skb->csum_not_inet))
3247 return !!(features & NETIF_F_SCTP_CRC) ? 0 :
3248 skb_crc32c_csum_help(skb);
3249
3250 return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
3251 }
3252 EXPORT_SYMBOL(skb_csum_hwoffload_help);
3253
3254 static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
3255 {
3256 netdev_features_t features;
3257
3258 features = netif_skb_features(skb);
3259 skb = validate_xmit_vlan(skb, features);
3260 if (unlikely(!skb))
3261 goto out_null;
3262
3263 skb = sk_validate_xmit_skb(skb, dev);
3264 if (unlikely(!skb))
3265 goto out_null;
3266
3267 if (netif_needs_gso(skb, features)) {
3268 struct sk_buff *segs;
3269
3270 segs = skb_gso_segment(skb, features);
3271 if (IS_ERR(segs)) {
3272 goto out_kfree_skb;
3273 } else if (segs) {
3274 consume_skb(skb);
3275 skb = segs;
3276 }
3277 } else {
3278 if (skb_needs_linearize(skb, features) &&
3279 __skb_linearize(skb))
3280 goto out_kfree_skb;
3281
3282
3283
3284
3285
3286 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3287 if (skb->encapsulation)
3288 skb_set_inner_transport_header(skb,
3289 skb_checksum_start_offset(skb));
3290 else
3291 skb_set_transport_header(skb,
3292 skb_checksum_start_offset(skb));
3293 if (skb_csum_hwoffload_help(skb, features))
3294 goto out_kfree_skb;
3295 }
3296 }
3297
3298 skb = validate_xmit_xfrm(skb, features, again);
3299
3300 return skb;
3301
3302 out_kfree_skb:
3303 kfree_skb(skb);
3304 out_null:
3305 atomic_long_inc(&dev->tx_dropped);
3306 return NULL;
3307 }
3308
3309 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
3310 {
3311 struct sk_buff *next, *head = NULL, *tail;
3312
3313 for (; skb != NULL; skb = next) {
3314 next = skb->next;
3315 skb_mark_not_on_list(skb);
3316
3317
3318 skb->prev = skb;
3319
3320 skb = validate_xmit_skb(skb, dev, again);
3321 if (!skb)
3322 continue;
3323
3324 if (!head)
3325 head = skb;
3326 else
3327 tail->next = skb;
3328
3329
3330
3331 tail = skb->prev;
3332 }
3333 return head;
3334 }
3335 EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
3336
3337 static void qdisc_pkt_len_init(struct sk_buff *skb)
3338 {
3339 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3340
3341 qdisc_skb_cb(skb)->pkt_len = skb->len;
3342
3343
3344
3345
3346 if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
3347 unsigned int hdr_len;
3348 u16 gso_segs = shinfo->gso_segs;
3349
3350
3351 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3352
3353
3354 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
3355 const struct tcphdr *th;
3356 struct tcphdr _tcphdr;
3357
3358 th = skb_header_pointer(skb, skb_transport_offset(skb),
3359 sizeof(_tcphdr), &_tcphdr);
3360 if (likely(th))
3361 hdr_len += __tcp_hdrlen(th);
3362 } else {
3363 struct udphdr _udphdr;
3364
3365 if (skb_header_pointer(skb, skb_transport_offset(skb),
3366 sizeof(_udphdr), &_udphdr))
3367 hdr_len += sizeof(struct udphdr);
3368 }
3369
3370 if (shinfo->gso_type & SKB_GSO_DODGY)
3371 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3372 shinfo->gso_size);
3373
3374 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
3375 }
3376 }
3377
3378 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3379 struct net_device *dev,
3380 struct netdev_queue *txq)
3381 {
3382 spinlock_t *root_lock = qdisc_lock(q);
3383 struct sk_buff *to_free = NULL;
3384 bool contended;
3385 int rc;
3386
3387 qdisc_calculate_pkt_len(skb, q);
3388
3389 if (q->flags & TCQ_F_NOLOCK) {
3390 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3391 qdisc_run(q);
3392
3393 if (unlikely(to_free))
3394 kfree_skb_list(to_free);
3395 return rc;
3396 }
3397
3398
3399
3400
3401
3402
3403
3404 contended = qdisc_is_running(q);
3405 if (unlikely(contended))
3406 spin_lock(&q->busylock);
3407
3408 spin_lock(root_lock);
3409 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3410 __qdisc_drop(skb, &to_free);
3411 rc = NET_XMIT_DROP;
3412 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3413 qdisc_run_begin(q)) {
3414
3415
3416
3417
3418
3419
3420 qdisc_bstats_update(q, skb);
3421
3422 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
3423 if (unlikely(contended)) {
3424 spin_unlock(&q->busylock);
3425 contended = false;
3426 }
3427 __qdisc_run(q);
3428 }
3429
3430 qdisc_run_end(q);
3431 rc = NET_XMIT_SUCCESS;
3432 } else {
3433 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3434 if (qdisc_run_begin(q)) {
3435 if (unlikely(contended)) {
3436 spin_unlock(&q->busylock);
3437 contended = false;
3438 }
3439 __qdisc_run(q);
3440 qdisc_run_end(q);
3441 }
3442 }
3443 spin_unlock(root_lock);
3444 if (unlikely(to_free))
3445 kfree_skb_list(to_free);
3446 if (unlikely(contended))
3447 spin_unlock(&q->busylock);
3448 return rc;
3449 }
3450
3451 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3452 static void skb_update_prio(struct sk_buff *skb)
3453 {
3454 const struct netprio_map *map;
3455 const struct sock *sk;
3456 unsigned int prioidx;
3457
3458 if (skb->priority)
3459 return;
3460 map = rcu_dereference_bh(skb->dev->priomap);
3461 if (!map)
3462 return;
3463 sk = skb_to_full_sk(skb);
3464 if (!sk)
3465 return;
3466
3467 prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
3468
3469 if (prioidx < map->priomap_len)
3470 skb->priority = map->priomap[prioidx];
3471 }
3472 #else
3473 #define skb_update_prio(skb)
3474 #endif
3475
3476
3477
3478
3479
3480
3481
3482 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
3483 {
3484 skb_reset_mac_header(skb);
3485 __skb_pull(skb, skb_network_offset(skb));
3486 skb->pkt_type = PACKET_LOOPBACK;
3487 skb->ip_summed = CHECKSUM_UNNECESSARY;
3488 WARN_ON(!skb_dst(skb));
3489 skb_dst_force(skb);
3490 netif_rx_ni(skb);
3491 return 0;
3492 }
3493 EXPORT_SYMBOL(dev_loopback_xmit);
3494
3495 #ifdef CONFIG_NET_EGRESS
3496 static struct sk_buff *
3497 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3498 {
3499 struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
3500 struct tcf_result cl_res;
3501
3502 if (!miniq)
3503 return skb;
3504
3505
3506 mini_qdisc_bstats_cpu_update(miniq, skb);
3507
3508 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
3509 case TC_ACT_OK:
3510 case TC_ACT_RECLASSIFY:
3511 skb->tc_index = TC_H_MIN(cl_res.classid);
3512 break;
3513 case TC_ACT_SHOT:
3514 mini_qdisc_qstats_cpu_drop(miniq);
3515 *ret = NET_XMIT_DROP;
3516 kfree_skb(skb);
3517 return NULL;
3518 case TC_ACT_STOLEN:
3519 case TC_ACT_QUEUED:
3520 case TC_ACT_TRAP:
3521 *ret = NET_XMIT_SUCCESS;
3522 consume_skb(skb);
3523 return NULL;
3524 case TC_ACT_REDIRECT:
3525
3526 skb_do_redirect(skb);
3527 *ret = NET_XMIT_SUCCESS;
3528 return NULL;
3529 default:
3530 break;
3531 }
3532
3533 return skb;
3534 }
3535 #endif
3536
3537 #ifdef CONFIG_XPS
3538 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
3539 struct xps_dev_maps *dev_maps, unsigned int tci)
3540 {
3541 struct xps_map *map;
3542 int queue_index = -1;
3543
3544 if (dev->num_tc) {
3545 tci *= dev->num_tc;
3546 tci += netdev_get_prio_tc_map(dev, skb->priority);
3547 }
3548
3549 map = rcu_dereference(dev_maps->attr_map[tci]);
3550 if (map) {
3551 if (map->len == 1)
3552 queue_index = map->queues[0];
3553 else
3554 queue_index = map->queues[reciprocal_scale(
3555 skb_get_hash(skb), map->len)];
3556 if (unlikely(queue_index >= dev->real_num_tx_queues))
3557 queue_index = -1;
3558 }
3559 return queue_index;
3560 }
3561 #endif
3562
3563 static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
3564 struct sk_buff *skb)
3565 {
3566 #ifdef CONFIG_XPS
3567 struct xps_dev_maps *dev_maps;
3568 struct sock *sk = skb->sk;
3569 int queue_index = -1;
3570
3571 if (!static_key_false(&xps_needed))
3572 return -1;
3573
3574 rcu_read_lock();
3575 if (!static_key_false(&xps_rxqs_needed))
3576 goto get_cpus_map;
3577
3578 dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
3579 if (dev_maps) {
3580 int tci = sk_rx_queue_get(sk);
3581
3582 if (tci >= 0 && tci < dev->num_rx_queues)
3583 queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
3584 tci);
3585 }
3586
3587 get_cpus_map:
3588 if (queue_index < 0) {
3589 dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
3590 if (dev_maps) {
3591 unsigned int tci = skb->sender_cpu - 1;
3592
3593 queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
3594 tci);
3595 }
3596 }
3597 rcu_read_unlock();
3598
3599 return queue_index;
3600 #else
3601 return -1;
3602 #endif
3603 }
3604
3605 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
3606 struct net_device *sb_dev)
3607 {
3608 return 0;
3609 }
3610 EXPORT_SYMBOL(dev_pick_tx_zero);
3611
3612 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
3613 struct net_device *sb_dev)
3614 {
3615 return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
3616 }
3617 EXPORT_SYMBOL(dev_pick_tx_cpu_id);
3618
3619 u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
3620 struct net_device *sb_dev)
3621 {
3622 struct sock *sk = skb->sk;
3623 int queue_index = sk_tx_queue_get(sk);
3624
3625 sb_dev = sb_dev ? : dev;
3626
3627 if (queue_index < 0 || skb->ooo_okay ||
3628 queue_index >= dev->real_num_tx_queues) {
3629 int new_index = get_xps_queue(dev, sb_dev, skb);
3630
3631 if (new_index < 0)
3632 new_index = skb_tx_hash(dev, sb_dev, skb);
3633
3634 if (queue_index != new_index && sk &&
3635 sk_fullsock(sk) &&
3636 rcu_access_pointer(sk->sk_dst_cache))
3637 sk_tx_queue_set(sk, new_index);
3638
3639 queue_index = new_index;
3640 }
3641
3642 return queue_index;
3643 }
3644 EXPORT_SYMBOL(netdev_pick_tx);
3645
3646 struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
3647 struct sk_buff *skb,
3648 struct net_device *sb_dev)
3649 {
3650 int queue_index = 0;
3651
3652 #ifdef CONFIG_XPS
3653 u32 sender_cpu = skb->sender_cpu - 1;
3654
3655 if (sender_cpu >= (u32)NR_CPUS)
3656 skb->sender_cpu = raw_smp_processor_id() + 1;
3657 #endif
3658
3659 if (dev->real_num_tx_queues != 1) {
3660 const struct net_device_ops *ops = dev->netdev_ops;
3661
3662 if (ops->ndo_select_queue)
3663 queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
3664 else
3665 queue_index = netdev_pick_tx(dev, skb, sb_dev);
3666
3667 queue_index = netdev_cap_txqueue(dev, queue_index);
3668 }
3669
3670 skb_set_queue_mapping(skb, queue_index);
3671 return netdev_get_tx_queue(dev, queue_index);
3672 }
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700 static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
3701 {
3702 struct net_device *dev = skb->dev;
3703 struct netdev_queue *txq;
3704 struct Qdisc *q;
3705 int rc = -ENOMEM;
3706 bool again = false;
3707
3708 skb_reset_mac_header(skb);
3709
3710 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
3711 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
3712
3713
3714
3715
3716 rcu_read_lock_bh();
3717
3718 skb_update_prio(skb);
3719
3720 qdisc_pkt_len_init(skb);
3721 #ifdef CONFIG_NET_CLS_ACT
3722 skb->tc_at_ingress = 0;
3723 # ifdef CONFIG_NET_EGRESS
3724 if (static_branch_unlikely(&egress_needed_key)) {
3725 skb = sch_handle_egress(skb, &rc, dev);
3726 if (!skb)
3727 goto out;
3728 }
3729 # endif
3730 #endif
3731
3732
3733
3734 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
3735 skb_dst_drop(skb);
3736 else
3737 skb_dst_force(skb);
3738
3739 txq = netdev_core_pick_tx(dev, skb, sb_dev);
3740 q = rcu_dereference_bh(txq->qdisc);
3741
3742 trace_net_dev_queue(skb);
3743 if (q->enqueue) {
3744 rc = __dev_xmit_skb(skb, q, dev, txq);
3745 goto out;
3746 }
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760 if (dev->flags & IFF_UP) {
3761 int cpu = smp_processor_id();
3762
3763 if (txq->xmit_lock_owner != cpu) {
3764 if (dev_xmit_recursion())
3765 goto recursion_alert;
3766
3767 skb = validate_xmit_skb(skb, dev, &again);
3768 if (!skb)
3769 goto out;
3770
3771 HARD_TX_LOCK(dev, txq, cpu);
3772
3773 if (!netif_xmit_stopped(txq)) {
3774 dev_xmit_recursion_inc();
3775 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
3776 dev_xmit_recursion_dec();
3777 if (dev_xmit_complete(rc)) {
3778 HARD_TX_UNLOCK(dev, txq);
3779 goto out;
3780 }
3781 }
3782 HARD_TX_UNLOCK(dev, txq);
3783 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
3784 dev->name);
3785 } else {
3786
3787
3788
3789 recursion_alert:
3790 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
3791 dev->name);
3792 }
3793 }
3794
3795 rc = -ENETDOWN;
3796 rcu_read_unlock_bh();
3797
3798 atomic_long_inc(&dev->tx_dropped);
3799 kfree_skb_list(skb);
3800 return rc;
3801 out:
3802 rcu_read_unlock_bh();
3803 return rc;
3804 }
3805
3806 int dev_queue_xmit(struct sk_buff *skb)
3807 {
3808 return __dev_queue_xmit(skb, NULL);
3809 }
3810 EXPORT_SYMBOL(dev_queue_xmit);
3811
3812 int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
3813 {
3814 return __dev_queue_xmit(skb, sb_dev);
3815 }
3816 EXPORT_SYMBOL(dev_queue_xmit_accel);
3817
3818 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
3819 {
3820 struct net_device *dev = skb->dev;
3821 struct sk_buff *orig_skb = skb;
3822 struct netdev_queue *txq;
3823 int ret = NETDEV_TX_BUSY;
3824 bool again = false;
3825
3826 if (unlikely(!netif_running(dev) ||
3827 !netif_carrier_ok(dev)))
3828 goto drop;
3829
3830 skb = validate_xmit_skb_list(skb, dev, &again);
3831 if (skb != orig_skb)
3832 goto drop;
3833
3834 skb_set_queue_mapping(skb, queue_id);
3835 txq = skb_get_tx_queue(dev, skb);
3836
3837 local_bh_disable();
3838
3839 HARD_TX_LOCK(dev, txq, smp_processor_id());
3840 if (!netif_xmit_frozen_or_drv_stopped(txq))
3841 ret = netdev_start_xmit(skb, dev, txq, false);
3842 HARD_TX_UNLOCK(dev, txq);
3843
3844 local_bh_enable();
3845
3846 if (!dev_xmit_complete(ret))
3847 kfree_skb(skb);
3848
3849 return ret;
3850 drop:
3851 atomic_long_inc(&dev->tx_dropped);
3852 kfree_skb_list(skb);
3853 return NET_XMIT_DROP;
3854 }
3855 EXPORT_SYMBOL(dev_direct_xmit);
3856
3857
3858
3859
3860
3861 int netdev_max_backlog __read_mostly = 1000;
3862 EXPORT_SYMBOL(netdev_max_backlog);
3863
3864 int netdev_tstamp_prequeue __read_mostly = 1;
3865 int netdev_budget __read_mostly = 300;
3866
3867 unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
3868 int weight_p __read_mostly = 64;
3869 int dev_weight_rx_bias __read_mostly = 1;
3870 int dev_weight_tx_bias __read_mostly = 1;
3871 int dev_rx_weight __read_mostly = 64;
3872 int dev_tx_weight __read_mostly = 64;
3873
3874 int gro_normal_batch __read_mostly = 8;
3875
3876
3877 static inline void ____napi_schedule(struct softnet_data *sd,
3878 struct napi_struct *napi)
3879 {
3880 list_add_tail(&napi->poll_list, &sd->poll_list);
3881 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3882 }
3883
3884 #ifdef CONFIG_RPS
3885
3886
3887 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
3888 EXPORT_SYMBOL(rps_sock_flow_table);
3889 u32 rps_cpu_mask __read_mostly;
3890 EXPORT_SYMBOL(rps_cpu_mask);
3891
3892 struct static_key_false rps_needed __read_mostly;
3893 EXPORT_SYMBOL(rps_needed);
3894 struct static_key_false rfs_needed __read_mostly;
3895 EXPORT_SYMBOL(rfs_needed);
3896
3897 static struct rps_dev_flow *
3898 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3899 struct rps_dev_flow *rflow, u16 next_cpu)
3900 {
3901 if (next_cpu < nr_cpu_ids) {
3902 #ifdef CONFIG_RFS_ACCEL
3903 struct netdev_rx_queue *rxqueue;
3904 struct rps_dev_flow_table *flow_table;
3905 struct rps_dev_flow *old_rflow;
3906 u32 flow_id;
3907 u16 rxq_index;
3908 int rc;
3909
3910
3911 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
3912 !(dev->features & NETIF_F_NTUPLE))
3913 goto out;
3914 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
3915 if (rxq_index == skb_get_rx_queue(skb))
3916 goto out;
3917
3918 rxqueue = dev->_rx + rxq_index;
3919 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3920 if (!flow_table)
3921 goto out;
3922 flow_id = skb_get_hash(skb) & flow_table->mask;
3923 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
3924 rxq_index, flow_id);
3925 if (rc < 0)
3926 goto out;
3927 old_rflow = rflow;
3928 rflow = &flow_table->flows[flow_id];
3929 rflow->filter = rc;
3930 if (old_rflow->filter == rflow->filter)
3931 old_rflow->filter = RPS_NO_FILTER;
3932 out:
3933 #endif
3934 rflow->last_qtail =
3935 per_cpu(softnet_data, next_cpu).input_queue_head;
3936 }
3937
3938 rflow->cpu = next_cpu;
3939 return rflow;
3940 }
3941
3942
3943
3944
3945
3946
3947 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3948 struct rps_dev_flow **rflowp)
3949 {
3950 const struct rps_sock_flow_table *sock_flow_table;
3951 struct netdev_rx_queue *rxqueue = dev->_rx;
3952 struct rps_dev_flow_table *flow_table;
3953 struct rps_map *map;
3954 int cpu = -1;
3955 u32 tcpu;
3956 u32 hash;
3957
3958 if (skb_rx_queue_recorded(skb)) {
3959 u16 index = skb_get_rx_queue(skb);
3960
3961 if (unlikely(index >= dev->real_num_rx_queues)) {
3962 WARN_ONCE(dev->real_num_rx_queues > 1,
3963 "%s received packet on queue %u, but number "
3964 "of RX queues is %u\n",
3965 dev->name, index, dev->real_num_rx_queues);
3966 goto done;
3967 }
3968 rxqueue += index;
3969 }
3970
3971
3972
3973 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3974 map = rcu_dereference(rxqueue->rps_map);
3975 if (!flow_table && !map)
3976 goto done;
3977
3978 skb_reset_network_header(skb);
3979 hash = skb_get_hash(skb);
3980 if (!hash)
3981 goto done;
3982
3983 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3984 if (flow_table && sock_flow_table) {
3985 struct rps_dev_flow *rflow;
3986 u32 next_cpu;
3987 u32 ident;
3988
3989
3990 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
3991 if ((ident ^ hash) & ~rps_cpu_mask)
3992 goto try_rps;
3993
3994 next_cpu = ident & rps_cpu_mask;
3995
3996
3997
3998
3999 rflow = &flow_table->flows[hash & flow_table->mask];
4000 tcpu = rflow->cpu;
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013 if (unlikely(tcpu != next_cpu) &&
4014 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
4015 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
4016 rflow->last_qtail)) >= 0)) {
4017 tcpu = next_cpu;
4018 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
4019 }
4020
4021 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
4022 *rflowp = rflow;
4023 cpu = tcpu;
4024 goto done;
4025 }
4026 }
4027
4028 try_rps:
4029
4030 if (map) {
4031 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
4032 if (cpu_online(tcpu)) {
4033 cpu = tcpu;
4034 goto done;
4035 }
4036 }
4037
4038 done:
4039 return cpu;
4040 }
4041
4042 #ifdef CONFIG_RFS_ACCEL
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
4056 u32 flow_id, u16 filter_id)
4057 {
4058 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
4059 struct rps_dev_flow_table *flow_table;
4060 struct rps_dev_flow *rflow;
4061 bool expire = true;
4062 unsigned int cpu;
4063
4064 rcu_read_lock();
4065 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4066 if (flow_table && flow_id <= flow_table->mask) {
4067 rflow = &flow_table->flows[flow_id];
4068 cpu = READ_ONCE(rflow->cpu);
4069 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
4070 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
4071 rflow->last_qtail) <
4072 (int)(10 * flow_table->mask)))
4073 expire = false;
4074 }
4075 rcu_read_unlock();
4076 return expire;
4077 }
4078 EXPORT_SYMBOL(rps_may_expire_flow);
4079
4080 #endif
4081
4082
4083 static void rps_trigger_softirq(void *data)
4084 {
4085 struct softnet_data *sd = data;
4086
4087 ____napi_schedule(sd, &sd->backlog);
4088 sd->received_rps++;
4089 }
4090
4091 #endif
4092
4093
4094
4095
4096
4097
4098 static int rps_ipi_queued(struct softnet_data *sd)
4099 {
4100 #ifdef CONFIG_RPS
4101 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
4102
4103 if (sd != mysd) {
4104 sd->rps_ipi_next = mysd->rps_ipi_list;
4105 mysd->rps_ipi_list = sd;
4106
4107 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4108 return 1;
4109 }
4110 #endif
4111 return 0;
4112 }
4113
4114 #ifdef CONFIG_NET_FLOW_LIMIT
4115 int netdev_flow_limit_table_len __read_mostly = (1 << 12);
4116 #endif
4117
4118 static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
4119 {
4120 #ifdef CONFIG_NET_FLOW_LIMIT
4121 struct sd_flow_limit *fl;
4122 struct softnet_data *sd;
4123 unsigned int old_flow, new_flow;
4124
4125 if (qlen < (netdev_max_backlog >> 1))
4126 return false;
4127
4128 sd = this_cpu_ptr(&softnet_data);
4129
4130 rcu_read_lock();
4131 fl = rcu_dereference(sd->flow_limit);
4132 if (fl) {
4133 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
4134 old_flow = fl->history[fl->history_head];
4135 fl->history[fl->history_head] = new_flow;
4136
4137 fl->history_head++;
4138 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
4139
4140 if (likely(fl->buckets[old_flow]))
4141 fl->buckets[old_flow]--;
4142
4143 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
4144 fl->count++;
4145 rcu_read_unlock();
4146 return true;
4147 }
4148 }
4149 rcu_read_unlock();
4150 #endif
4151 return false;
4152 }
4153
4154
4155
4156
4157
4158 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
4159 unsigned int *qtail)
4160 {
4161 struct softnet_data *sd;
4162 unsigned long flags;
4163 unsigned int qlen;
4164
4165 sd = &per_cpu(softnet_data, cpu);
4166
4167 local_irq_save(flags);
4168
4169 rps_lock(sd);
4170 if (!netif_running(skb->dev))
4171 goto drop;
4172 qlen = skb_queue_len(&sd->input_pkt_queue);
4173 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
4174 if (qlen) {
4175 enqueue:
4176 __skb_queue_tail(&sd->input_pkt_queue, skb);
4177 input_queue_tail_incr_save(sd, qtail);
4178 rps_unlock(sd);
4179 local_irq_restore(flags);
4180 return NET_RX_SUCCESS;
4181 }
4182
4183
4184
4185
4186 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
4187 if (!rps_ipi_queued(sd))
4188 ____napi_schedule(sd, &sd->backlog);
4189 }
4190 goto enqueue;
4191 }
4192
4193 drop:
4194 sd->dropped++;
4195 rps_unlock(sd);
4196
4197 local_irq_restore(flags);
4198
4199 atomic_long_inc(&skb->dev->rx_dropped);
4200 kfree_skb(skb);
4201 return NET_RX_DROP;
4202 }
4203
4204 static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
4205 {
4206 struct net_device *dev = skb->dev;
4207 struct netdev_rx_queue *rxqueue;
4208
4209 rxqueue = dev->_rx;
4210
4211 if (skb_rx_queue_recorded(skb)) {
4212 u16 index = skb_get_rx_queue(skb);
4213
4214 if (unlikely(index >= dev->real_num_rx_queues)) {
4215 WARN_ONCE(dev->real_num_rx_queues > 1,
4216 "%s received packet on queue %u, but number "
4217 "of RX queues is %u\n",
4218 dev->name, index, dev->real_num_rx_queues);
4219
4220 return rxqueue;
4221 }
4222 rxqueue += index;
4223 }
4224 return rxqueue;
4225 }
4226
4227 static u32 netif_receive_generic_xdp(struct sk_buff *skb,
4228 struct xdp_buff *xdp,
4229 struct bpf_prog *xdp_prog)
4230 {
4231 struct netdev_rx_queue *rxqueue;
4232 void *orig_data, *orig_data_end;
4233 u32 metalen, act = XDP_DROP;
4234 __be16 orig_eth_type;
4235 struct ethhdr *eth;
4236 bool orig_bcast;
4237 int hlen, off;
4238 u32 mac_len;
4239
4240
4241
4242
4243 if (skb_is_redirected(skb))
4244 return XDP_PASS;
4245
4246
4247
4248
4249
4250 if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
4251 skb_headroom(skb) < XDP_PACKET_HEADROOM) {
4252 int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
4253 int troom = skb->tail + skb->data_len - skb->end;
4254
4255
4256
4257
4258 if (pskb_expand_head(skb,
4259 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
4260 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
4261 goto do_drop;
4262 if (skb_linearize(skb))
4263 goto do_drop;
4264 }
4265
4266
4267
4268
4269 mac_len = skb->data - skb_mac_header(skb);
4270 hlen = skb_headlen(skb) + mac_len;
4271 xdp->data = skb->data - mac_len;
4272 xdp->data_meta = xdp->data;
4273 xdp->data_end = xdp->data + hlen;
4274 xdp->data_hard_start = skb->data - skb_headroom(skb);
4275 orig_data_end = xdp->data_end;
4276 orig_data = xdp->data;
4277 eth = (struct ethhdr *)xdp->data;
4278 orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
4279 orig_eth_type = eth->h_proto;
4280
4281 rxqueue = netif_get_rxqueue(skb);
4282 xdp->rxq = &rxqueue->xdp_rxq;
4283
4284 act = bpf_prog_run_xdp(xdp_prog, xdp);
4285
4286
4287 off = xdp->data - orig_data;
4288 if (off) {
4289 if (off > 0)
4290 __skb_pull(skb, off);
4291 else if (off < 0)
4292 __skb_push(skb, -off);
4293
4294 skb->mac_header += off;
4295 skb_reset_network_header(skb);
4296 }
4297
4298
4299
4300
4301 off = orig_data_end - xdp->data_end;
4302 if (off != 0) {
4303 skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
4304 skb->len -= off;
4305
4306 }
4307
4308
4309 eth = (struct ethhdr *)xdp->data;
4310 if ((orig_eth_type != eth->h_proto) ||
4311 (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
4312 __skb_push(skb, ETH_HLEN);
4313 skb->protocol = eth_type_trans(skb, skb->dev);
4314 }
4315
4316 switch (act) {
4317 case XDP_REDIRECT:
4318 case XDP_TX:
4319 __skb_push(skb, mac_len);
4320 break;
4321 case XDP_PASS:
4322 metalen = xdp->data - xdp->data_meta;
4323 if (metalen)
4324 skb_metadata_set(skb, metalen);
4325 break;
4326 default:
4327 bpf_warn_invalid_xdp_action(act);
4328
4329 case XDP_ABORTED:
4330 trace_xdp_exception(skb->dev, xdp_prog, act);
4331
4332 case XDP_DROP:
4333 do_drop:
4334 kfree_skb(skb);
4335 break;
4336 }
4337
4338 return act;
4339 }
4340
4341
4342
4343
4344 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
4345 {
4346 struct net_device *dev = skb->dev;
4347 struct netdev_queue *txq;
4348 bool free_skb = true;
4349 int cpu, rc;
4350
4351 txq = netdev_core_pick_tx(dev, skb, NULL);
4352 cpu = smp_processor_id();
4353 HARD_TX_LOCK(dev, txq, cpu);
4354 if (!netif_xmit_stopped(txq)) {
4355 rc = netdev_start_xmit(skb, dev, txq, 0);
4356 if (dev_xmit_complete(rc))
4357 free_skb = false;
4358 }
4359 HARD_TX_UNLOCK(dev, txq);
4360 if (free_skb) {
4361 trace_xdp_exception(dev, xdp_prog, XDP_TX);
4362 kfree_skb(skb);
4363 }
4364 }
4365 EXPORT_SYMBOL_GPL(generic_xdp_tx);
4366
4367 static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
4368
4369 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
4370 {
4371 if (xdp_prog) {
4372 struct xdp_buff xdp;
4373 u32 act;
4374 int err;
4375
4376 act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
4377 if (act != XDP_PASS) {
4378 switch (act) {
4379 case XDP_REDIRECT:
4380 err = xdp_do_generic_redirect(skb->dev, skb,
4381 &xdp, xdp_prog);
4382 if (err)
4383 goto out_redir;
4384 break;
4385 case XDP_TX:
4386 generic_xdp_tx(skb, xdp_prog);
4387 break;
4388 }
4389 return XDP_DROP;
4390 }
4391 }
4392 return XDP_PASS;
4393 out_redir:
4394 kfree_skb(skb);
4395 return XDP_DROP;
4396 }
4397 EXPORT_SYMBOL_GPL(do_xdp_generic);
4398
4399 static int netif_rx_internal(struct sk_buff *skb)
4400 {
4401 int ret;
4402
4403 net_timestamp_check(netdev_tstamp_prequeue, skb);
4404
4405 trace_netif_rx(skb);
4406
4407 #ifdef CONFIG_RPS
4408 if (static_branch_unlikely(&rps_needed)) {
4409 struct rps_dev_flow voidflow, *rflow = &voidflow;
4410 int cpu;
4411
4412 preempt_disable();
4413 rcu_read_lock();
4414
4415 cpu = get_rps_cpu(skb->dev, skb, &rflow);
4416 if (cpu < 0)
4417 cpu = smp_processor_id();
4418
4419 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
4420
4421 rcu_read_unlock();
4422 preempt_enable();
4423 } else
4424 #endif
4425 {
4426 unsigned int qtail;
4427
4428 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
4429 put_cpu();
4430 }
4431 return ret;
4432 }
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449 int netif_rx(struct sk_buff *skb)
4450 {
4451 int ret;
4452
4453 trace_netif_rx_entry(skb);
4454
4455 ret = netif_rx_internal(skb);
4456 trace_netif_rx_exit(ret);
4457
4458 return ret;
4459 }
4460 EXPORT_SYMBOL(netif_rx);
4461
4462 int netif_rx_ni(struct sk_buff *skb)
4463 {
4464 int err;
4465
4466 trace_netif_rx_ni_entry(skb);
4467
4468 preempt_disable();
4469 err = netif_rx_internal(skb);
4470 if (local_softirq_pending())
4471 do_softirq();
4472 preempt_enable();
4473 trace_netif_rx_ni_exit(err);
4474
4475 return err;
4476 }
4477 EXPORT_SYMBOL(netif_rx_ni);
4478
4479 static __latent_entropy void net_tx_action(struct softirq_action *h)
4480 {
4481 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
4482
4483 if (sd->completion_queue) {
4484 struct sk_buff *clist;
4485
4486 local_irq_disable();
4487 clist = sd->completion_queue;
4488 sd->completion_queue = NULL;
4489 local_irq_enable();
4490
4491 while (clist) {
4492 struct sk_buff *skb = clist;
4493
4494 clist = clist->next;
4495
4496 WARN_ON(refcount_read(&skb->users));
4497 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
4498 trace_consume_skb(skb);
4499 else
4500 trace_kfree_skb(skb, net_tx_action);
4501
4502 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
4503 __kfree_skb(skb);
4504 else
4505 __kfree_skb_defer(skb);
4506 }
4507
4508 __kfree_skb_flush();
4509 }
4510
4511 if (sd->output_queue) {
4512 struct Qdisc *head;
4513
4514 local_irq_disable();
4515 head = sd->output_queue;
4516 sd->output_queue = NULL;
4517 sd->output_queue_tailp = &sd->output_queue;
4518 local_irq_enable();
4519
4520 while (head) {
4521 struct Qdisc *q = head;
4522 spinlock_t *root_lock = NULL;
4523
4524 head = head->next_sched;
4525
4526 if (!(q->flags & TCQ_F_NOLOCK)) {
4527 root_lock = qdisc_lock(q);
4528 spin_lock(root_lock);
4529 }
4530
4531
4532
4533 smp_mb__before_atomic();
4534 clear_bit(__QDISC_STATE_SCHED, &q->state);
4535 qdisc_run(q);
4536 if (root_lock)
4537 spin_unlock(root_lock);
4538 }
4539 }
4540
4541 xfrm_dev_backlog(sd);
4542 }
4543
4544 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
4545
4546 int (*br_fdb_test_addr_hook)(struct net_device *dev,
4547 unsigned char *addr) __read_mostly;
4548 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
4549 #endif
4550
4551 static inline struct sk_buff *
4552 sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4553 struct net_device *orig_dev)
4554 {
4555 #ifdef CONFIG_NET_CLS_ACT
4556 struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
4557 struct tcf_result cl_res;
4558
4559
4560
4561
4562
4563
4564 if (!miniq)
4565 return skb;
4566
4567 if (*pt_prev) {
4568 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4569 *pt_prev = NULL;
4570 }
4571
4572 qdisc_skb_cb(skb)->pkt_len = skb->len;
4573 skb->tc_at_ingress = 1;
4574 mini_qdisc_bstats_cpu_update(miniq, skb);
4575
4576 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
4577 case TC_ACT_OK:
4578 case TC_ACT_RECLASSIFY:
4579 skb->tc_index = TC_H_MIN(cl_res.classid);
4580 break;
4581 case TC_ACT_SHOT:
4582 mini_qdisc_qstats_cpu_drop(miniq);
4583 kfree_skb(skb);
4584 return NULL;
4585 case TC_ACT_STOLEN:
4586 case TC_ACT_QUEUED:
4587 case TC_ACT_TRAP:
4588 consume_skb(skb);
4589 return NULL;
4590 case TC_ACT_REDIRECT:
4591
4592
4593
4594
4595 __skb_push(skb, skb->mac_len);
4596 skb_do_redirect(skb);
4597 return NULL;
4598 case TC_ACT_CONSUMED:
4599 return NULL;
4600 default:
4601 break;
4602 }
4603 #endif
4604 return skb;
4605 }
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616 bool netdev_is_rx_handler_busy(struct net_device *dev)
4617 {
4618 ASSERT_RTNL();
4619 return dev && rtnl_dereference(dev->rx_handler);
4620 }
4621 EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637 int netdev_rx_handler_register(struct net_device *dev,
4638 rx_handler_func_t *rx_handler,
4639 void *rx_handler_data)
4640 {
4641 if (netdev_is_rx_handler_busy(dev))
4642 return -EBUSY;
4643
4644 if (dev->priv_flags & IFF_NO_RX_HANDLER)
4645 return -EINVAL;
4646
4647
4648 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
4649 rcu_assign_pointer(dev->rx_handler, rx_handler);
4650
4651 return 0;
4652 }
4653 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663 void netdev_rx_handler_unregister(struct net_device *dev)
4664 {
4665
4666 ASSERT_RTNL();
4667 RCU_INIT_POINTER(dev->rx_handler, NULL);
4668
4669
4670
4671
4672 synchronize_net();
4673 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
4674 }
4675 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
4676
4677
4678
4679
4680
4681 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
4682 {
4683 switch (skb->protocol) {
4684 case htons(ETH_P_ARP):
4685 case htons(ETH_P_IP):
4686 case htons(ETH_P_IPV6):
4687 case htons(ETH_P_8021Q):
4688 case htons(ETH_P_8021AD):
4689 return true;
4690 default:
4691 return false;
4692 }
4693 }
4694
4695 static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
4696 int *ret, struct net_device *orig_dev)
4697 {
4698 #ifdef CONFIG_NETFILTER_INGRESS
4699 if (nf_hook_ingress_active(skb)) {
4700 int ingress_retval;
4701
4702 if (*pt_prev) {
4703 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4704 *pt_prev = NULL;
4705 }
4706
4707 rcu_read_lock();
4708 ingress_retval = nf_hook_ingress(skb);
4709 rcu_read_unlock();
4710 return ingress_retval;
4711 }
4712 #endif
4713 return 0;
4714 }
4715
4716 static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
4717 struct packet_type **ppt_prev)
4718 {
4719 struct packet_type *ptype, *pt_prev;
4720 rx_handler_func_t *rx_handler;
4721 struct sk_buff *skb = *pskb;
4722 struct net_device *orig_dev;
4723 bool deliver_exact = false;
4724 int ret = NET_RX_DROP;
4725 __be16 type;
4726
4727 net_timestamp_check(!netdev_tstamp_prequeue, skb);
4728
4729 trace_netif_receive_skb(skb);
4730
4731 orig_dev = skb->dev;
4732
4733 skb_reset_network_header(skb);
4734 if (!skb_transport_header_was_set(skb))
4735 skb_reset_transport_header(skb);
4736 skb_reset_mac_len(skb);
4737
4738 pt_prev = NULL;
4739
4740 another_round:
4741 skb->skb_iif = skb->dev->ifindex;
4742
4743 __this_cpu_inc(softnet_data.processed);
4744
4745 if (static_branch_unlikely(&generic_xdp_needed_key)) {
4746 int ret2;
4747
4748 preempt_disable();
4749 ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
4750 preempt_enable();
4751
4752 if (ret2 != XDP_PASS) {
4753 ret = NET_RX_DROP;
4754 goto out;
4755 }
4756 skb_reset_mac_len(skb);
4757 }
4758
4759 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
4760 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
4761 skb = skb_vlan_untag(skb);
4762 if (unlikely(!skb))
4763 goto out;
4764 }
4765
4766 if (skb_skip_tc_classify(skb))
4767 goto skip_classify;
4768
4769 if (pfmemalloc)
4770 goto skip_taps;
4771
4772 list_for_each_entry_rcu(ptype, &ptype_all, list) {
4773 if (pt_prev)
4774 ret = deliver_skb(skb, pt_prev, orig_dev);
4775 pt_prev = ptype;
4776 }
4777
4778 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
4779 if (pt_prev)
4780 ret = deliver_skb(skb, pt_prev, orig_dev);
4781 pt_prev = ptype;
4782 }
4783
4784 skip_taps:
4785 #ifdef CONFIG_NET_INGRESS
4786 if (static_branch_unlikely(&ingress_needed_key)) {
4787 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
4788 if (!skb)
4789 goto out;
4790
4791 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
4792 goto out;
4793 }
4794 #endif
4795 skb_reset_redirect(skb);
4796 skip_classify:
4797 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
4798 goto drop;
4799
4800 if (skb_vlan_tag_present(skb)) {
4801 if (pt_prev) {
4802 ret = deliver_skb(skb, pt_prev, orig_dev);
4803 pt_prev = NULL;
4804 }
4805 if (vlan_do_receive(&skb))
4806 goto another_round;
4807 else if (unlikely(!skb))
4808 goto out;
4809 }
4810
4811 rx_handler = rcu_dereference(skb->dev->rx_handler);
4812 if (rx_handler) {
4813 if (pt_prev) {
4814 ret = deliver_skb(skb, pt_prev, orig_dev);
4815 pt_prev = NULL;
4816 }
4817 switch (rx_handler(&skb)) {
4818 case RX_HANDLER_CONSUMED:
4819 ret = NET_RX_SUCCESS;
4820 goto out;
4821 case RX_HANDLER_ANOTHER:
4822 goto another_round;
4823 case RX_HANDLER_EXACT:
4824 deliver_exact = true;
4825 case RX_HANDLER_PASS:
4826 break;
4827 default:
4828 BUG();
4829 }
4830 }
4831
4832 if (unlikely(skb_vlan_tag_present(skb))) {
4833 check_vlan_id:
4834 if (skb_vlan_tag_get_id(skb)) {
4835
4836
4837
4838 skb->pkt_type = PACKET_OTHERHOST;
4839 } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
4840 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
4841
4842
4843
4844
4845 __vlan_hwaccel_clear_tag(skb);
4846 skb = skb_vlan_untag(skb);
4847 if (unlikely(!skb))
4848 goto out;
4849 if (vlan_do_receive(&skb))
4850
4851
4852
4853 goto another_round;
4854 else if (unlikely(!skb))
4855 goto out;
4856 else
4857
4858
4859
4860
4861 goto check_vlan_id;
4862 }
4863
4864
4865
4866
4867 __vlan_hwaccel_clear_tag(skb);
4868 }
4869
4870 type = skb->protocol;
4871
4872
4873 if (likely(!deliver_exact)) {
4874 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4875 &ptype_base[ntohs(type) &
4876 PTYPE_HASH_MASK]);
4877 }
4878
4879 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4880 &orig_dev->ptype_specific);
4881
4882 if (unlikely(skb->dev != orig_dev)) {
4883 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4884 &skb->dev->ptype_specific);
4885 }
4886
4887 if (pt_prev) {
4888 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
4889 goto drop;
4890 *ppt_prev = pt_prev;
4891 } else {
4892 drop:
4893 if (!deliver_exact)
4894 atomic_long_inc(&skb->dev->rx_dropped);
4895 else
4896 atomic_long_inc(&skb->dev->rx_nohandler);
4897 kfree_skb(skb);
4898
4899
4900
4901 ret = NET_RX_DROP;
4902 }
4903
4904 out:
4905
4906
4907
4908
4909
4910
4911 *pskb = skb;
4912 return ret;
4913 }
4914
4915 static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
4916 {
4917 struct net_device *orig_dev = skb->dev;
4918 struct packet_type *pt_prev = NULL;
4919 int ret;
4920
4921 ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
4922 if (pt_prev)
4923 ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
4924 skb->dev, pt_prev, orig_dev);
4925 return ret;
4926 }
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943 int netif_receive_skb_core(struct sk_buff *skb)
4944 {
4945 int ret;
4946
4947 rcu_read_lock();
4948 ret = __netif_receive_skb_one_core(skb, false);
4949 rcu_read_unlock();
4950
4951 return ret;
4952 }
4953 EXPORT_SYMBOL(netif_receive_skb_core);
4954
4955 static inline void __netif_receive_skb_list_ptype(struct list_head *head,
4956 struct packet_type *pt_prev,
4957 struct net_device *orig_dev)
4958 {
4959 struct sk_buff *skb, *next;
4960
4961 if (!pt_prev)
4962 return;
4963 if (list_empty(head))
4964 return;
4965 if (pt_prev->list_func != NULL)
4966 INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
4967 ip_list_rcv, head, pt_prev, orig_dev);
4968 else
4969 list_for_each_entry_safe(skb, next, head, list) {
4970 skb_list_del_init(skb);
4971 pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4972 }
4973 }
4974
4975 static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
4976 {
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987 struct packet_type *pt_curr = NULL;
4988
4989 struct net_device *od_curr = NULL;
4990 struct list_head sublist;
4991 struct sk_buff *skb, *next;
4992
4993 INIT_LIST_HEAD(&sublist);
4994 list_for_each_entry_safe(skb, next, head, list) {
4995 struct net_device *orig_dev = skb->dev;
4996 struct packet_type *pt_prev = NULL;
4997
4998 skb_list_del_init(skb);
4999 __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
5000 if (!pt_prev)
5001 continue;
5002 if (pt_curr != pt_prev || od_curr != orig_dev) {
5003
5004 __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
5005
5006 INIT_LIST_HEAD(&sublist);
5007 pt_curr = pt_prev;
5008 od_curr = orig_dev;
5009 }
5010 list_add_tail(&skb->list, &sublist);
5011 }
5012
5013
5014 __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
5015 }
5016
5017 static int __netif_receive_skb(struct sk_buff *skb)
5018 {
5019 int ret;
5020
5021 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
5022 unsigned int noreclaim_flag;
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033 noreclaim_flag = memalloc_noreclaim_save();
5034 ret = __netif_receive_skb_one_core(skb, true);
5035 memalloc_noreclaim_restore(noreclaim_flag);
5036 } else
5037 ret = __netif_receive_skb_one_core(skb, false);
5038
5039 return ret;
5040 }
5041
5042 static void __netif_receive_skb_list(struct list_head *head)
5043 {
5044 unsigned long noreclaim_flag = 0;
5045 struct sk_buff *skb, *next;
5046 bool pfmemalloc = false;
5047
5048 list_for_each_entry_safe(skb, next, head, list) {
5049 if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
5050 struct list_head sublist;
5051
5052
5053 list_cut_before(&sublist, head, &skb->list);
5054 if (!list_empty(&sublist))
5055 __netif_receive_skb_list_core(&sublist, pfmemalloc);
5056 pfmemalloc = !pfmemalloc;
5057
5058 if (pfmemalloc)
5059 noreclaim_flag = memalloc_noreclaim_save();
5060 else
5061 memalloc_noreclaim_restore(noreclaim_flag);
5062 }
5063 }
5064
5065 if (!list_empty(head))
5066 __netif_receive_skb_list_core(head, pfmemalloc);
5067
5068 if (pfmemalloc)
5069 memalloc_noreclaim_restore(noreclaim_flag);
5070 }
5071
5072 static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
5073 {
5074 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
5075 struct bpf_prog *new = xdp->prog;
5076 int ret = 0;
5077
5078 switch (xdp->command) {
5079 case XDP_SETUP_PROG:
5080 rcu_assign_pointer(dev->xdp_prog, new);
5081 if (old)
5082 bpf_prog_put(old);
5083
5084 if (old && !new) {
5085 static_branch_dec(&generic_xdp_needed_key);
5086 } else if (new && !old) {
5087 static_branch_inc(&generic_xdp_needed_key);
5088 dev_disable_lro(dev);
5089 dev_disable_gro_hw(dev);
5090 }
5091 break;
5092
5093 case XDP_QUERY_PROG:
5094 xdp->prog_id = old ? old->aux->id : 0;
5095 break;
5096
5097 default:
5098 ret = -EINVAL;
5099 break;
5100 }
5101
5102 return ret;
5103 }
5104
5105 static int netif_receive_skb_internal(struct sk_buff *skb)
5106 {
5107 int ret;
5108
5109 net_timestamp_check(netdev_tstamp_prequeue, skb);
5110
5111 if (skb_defer_rx_timestamp(skb))
5112 return NET_RX_SUCCESS;
5113
5114 rcu_read_lock();
5115 #ifdef CONFIG_RPS
5116 if (static_branch_unlikely(&rps_needed)) {
5117 struct rps_dev_flow voidflow, *rflow = &voidflow;
5118 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
5119
5120 if (cpu >= 0) {
5121 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
5122 rcu_read_unlock();
5123 return ret;
5124 }
5125 }
5126 #endif
5127 ret = __netif_receive_skb(skb);
5128 rcu_read_unlock();
5129 return ret;
5130 }
5131
5132 static void netif_receive_skb_list_internal(struct list_head *head)
5133 {
5134 struct sk_buff *skb, *next;
5135 struct list_head sublist;
5136
5137 INIT_LIST_HEAD(&sublist);
5138 list_for_each_entry_safe(skb, next, head, list) {
5139 net_timestamp_check(netdev_tstamp_prequeue, skb);
5140 skb_list_del_init(skb);
5141 if (!skb_defer_rx_timestamp(skb))
5142 list_add_tail(&skb->list, &sublist);
5143 }
5144 list_splice_init(&sublist, head);
5145
5146 rcu_read_lock();
5147 #ifdef CONFIG_RPS
5148 if (static_branch_unlikely(&rps_needed)) {
5149 list_for_each_entry_safe(skb, next, head, list) {
5150 struct rps_dev_flow voidflow, *rflow = &voidflow;
5151 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
5152
5153 if (cpu >= 0) {
5154
5155 skb_list_del_init(skb);
5156 enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
5157 }
5158 }
5159 }
5160 #endif
5161 __netif_receive_skb_list(head);
5162 rcu_read_unlock();
5163 }
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180 int netif_receive_skb(struct sk_buff *skb)
5181 {
5182 int ret;
5183
5184 trace_netif_receive_skb_entry(skb);
5185
5186 ret = netif_receive_skb_internal(skb);
5187 trace_netif_receive_skb_exit(ret);
5188
5189 return ret;
5190 }
5191 EXPORT_SYMBOL(netif_receive_skb);
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203 void netif_receive_skb_list(struct list_head *head)
5204 {
5205 struct sk_buff *skb;
5206
5207 if (list_empty(head))
5208 return;
5209 if (trace_netif_receive_skb_list_entry_enabled()) {
5210 list_for_each_entry(skb, head, list)
5211 trace_netif_receive_skb_list_entry(skb);
5212 }
5213 netif_receive_skb_list_internal(head);
5214 trace_netif_receive_skb_list_exit(0);
5215 }
5216 EXPORT_SYMBOL(netif_receive_skb_list);
5217
5218 DEFINE_PER_CPU(struct work_struct, flush_works);
5219
5220
5221 static void flush_backlog(struct work_struct *work)
5222 {
5223 struct sk_buff *skb, *tmp;
5224 struct softnet_data *sd;
5225
5226 local_bh_disable();
5227 sd = this_cpu_ptr(&softnet_data);
5228
5229 local_irq_disable();
5230 rps_lock(sd);
5231 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
5232 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5233 __skb_unlink(skb, &sd->input_pkt_queue);
5234 kfree_skb(skb);
5235 input_queue_head_incr(sd);
5236 }
5237 }
5238 rps_unlock(sd);
5239 local_irq_enable();
5240
5241 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
5242 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5243 __skb_unlink(skb, &sd->process_queue);
5244 kfree_skb(skb);
5245 input_queue_head_incr(sd);
5246 }
5247 }
5248 local_bh_enable();
5249 }
5250
5251 static void flush_all_backlogs(void)
5252 {
5253 unsigned int cpu;
5254
5255 get_online_cpus();
5256
5257 for_each_online_cpu(cpu)
5258 queue_work_on(cpu, system_highpri_wq,
5259 per_cpu_ptr(&flush_works, cpu));
5260
5261 for_each_online_cpu(cpu)
5262 flush_work(per_cpu_ptr(&flush_works, cpu));
5263
5264 put_online_cpus();
5265 }
5266
5267
5268 static void gro_normal_list(struct napi_struct *napi)
5269 {
5270 if (!napi->rx_count)
5271 return;
5272 netif_receive_skb_list_internal(&napi->rx_list);
5273 INIT_LIST_HEAD(&napi->rx_list);
5274 napi->rx_count = 0;
5275 }
5276
5277
5278
5279
5280 static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
5281 {
5282 list_add_tail(&skb->list, &napi->rx_list);
5283 if (++napi->rx_count >= gro_normal_batch)
5284 gro_normal_list(napi);
5285 }
5286
5287 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
5288 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
5289 static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
5290 {
5291 struct packet_offload *ptype;
5292 __be16 type = skb->protocol;
5293 struct list_head *head = &offload_base;
5294 int err = -ENOENT;
5295
5296 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
5297
5298 if (NAPI_GRO_CB(skb)->count == 1) {
5299 skb_shinfo(skb)->gso_size = 0;
5300 goto out;
5301 }
5302
5303 rcu_read_lock();
5304 list_for_each_entry_rcu(ptype, head, list) {
5305 if (ptype->type != type || !ptype->callbacks.gro_complete)
5306 continue;
5307
5308 err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
5309 ipv6_gro_complete, inet_gro_complete,
5310 skb, 0);
5311 break;
5312 }
5313 rcu_read_unlock();
5314
5315 if (err) {
5316 WARN_ON(&ptype->list == head);
5317 kfree_skb(skb);
5318 return NET_RX_SUCCESS;
5319 }
5320
5321 out:
5322 gro_normal_one(napi, skb);
5323 return NET_RX_SUCCESS;
5324 }
5325
5326 static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
5327 bool flush_old)
5328 {
5329 struct list_head *head = &napi->gro_hash[index].list;
5330 struct sk_buff *skb, *p;
5331
5332 list_for_each_entry_safe_reverse(skb, p, head, list) {
5333 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
5334 return;
5335 skb_list_del_init(skb);
5336 napi_gro_complete(napi, skb);
5337 napi->gro_hash[index].count--;
5338 }
5339
5340 if (!napi->gro_hash[index].count)
5341 __clear_bit(index, &napi->gro_bitmask);
5342 }
5343
5344
5345
5346
5347
5348 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
5349 {
5350 unsigned long bitmask = napi->gro_bitmask;
5351 unsigned int i, base = ~0U;
5352
5353 while ((i = ffs(bitmask)) != 0) {
5354 bitmask >>= i;
5355 base += i;
5356 __napi_gro_flush_chain(napi, base, flush_old);
5357 }
5358 }
5359 EXPORT_SYMBOL(napi_gro_flush);
5360
5361 static struct list_head *gro_list_prepare(struct napi_struct *napi,
5362 struct sk_buff *skb)
5363 {
5364 unsigned int maclen = skb->dev->hard_header_len;
5365 u32 hash = skb_get_hash_raw(skb);
5366 struct list_head *head;
5367 struct sk_buff *p;
5368
5369 head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
5370 list_for_each_entry(p, head, list) {
5371 unsigned long diffs;
5372
5373 NAPI_GRO_CB(p)->flush = 0;
5374
5375 if (hash != skb_get_hash_raw(p)) {
5376 NAPI_GRO_CB(p)->same_flow = 0;
5377 continue;
5378 }
5379
5380 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
5381 diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
5382 if (skb_vlan_tag_present(p))
5383 diffs |= p->vlan_tci ^ skb->vlan_tci;
5384 diffs |= skb_metadata_dst_cmp(p, skb);
5385 diffs |= skb_metadata_differs(p, skb);
5386 if (maclen == ETH_HLEN)
5387 diffs |= compare_ether_header(skb_mac_header(p),
5388 skb_mac_header(skb));
5389 else if (!diffs)
5390 diffs = memcmp(skb_mac_header(p),
5391 skb_mac_header(skb),
5392 maclen);
5393 NAPI_GRO_CB(p)->same_flow = !diffs;
5394 }
5395
5396 return head;
5397 }
5398
5399 static void skb_gro_reset_offset(struct sk_buff *skb)
5400 {
5401 const struct skb_shared_info *pinfo = skb_shinfo(skb);
5402 const skb_frag_t *frag0 = &pinfo->frags[0];
5403
5404 NAPI_GRO_CB(skb)->data_offset = 0;
5405 NAPI_GRO_CB(skb)->frag0 = NULL;
5406 NAPI_GRO_CB(skb)->frag0_len = 0;
5407
5408 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
5409 pinfo->nr_frags &&
5410 !PageHighMem(skb_frag_page(frag0))) {
5411 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
5412 NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
5413 skb_frag_size(frag0),
5414 skb->end - skb->tail);
5415 }
5416 }
5417
5418 static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
5419 {
5420 struct skb_shared_info *pinfo = skb_shinfo(skb);
5421
5422 BUG_ON(skb->end - skb->tail < grow);
5423
5424 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
5425
5426 skb->data_len -= grow;
5427 skb->tail += grow;
5428
5429 skb_frag_off_add(&pinfo->frags[0], grow);
5430 skb_frag_size_sub(&pinfo->frags[0], grow);
5431
5432 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
5433 skb_frag_unref(skb, 0);
5434 memmove(pinfo->frags, pinfo->frags + 1,
5435 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
5436 }
5437 }
5438
5439 static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
5440 {
5441 struct sk_buff *oldest;
5442
5443 oldest = list_last_entry(head, struct sk_buff, list);
5444
5445
5446
5447
5448 if (WARN_ON_ONCE(!oldest))
5449 return;
5450
5451
5452
5453
5454 skb_list_del_init(oldest);
5455 napi_gro_complete(napi, oldest);
5456 }
5457
5458 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
5459 struct sk_buff *));
5460 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
5461 struct sk_buff *));
5462 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
5463 {
5464 u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
5465 struct list_head *head = &offload_base;
5466 struct packet_offload *ptype;
5467 __be16 type = skb->protocol;
5468 struct list_head *gro_head;
5469 struct sk_buff *pp = NULL;
5470 enum gro_result ret;
5471 int same_flow;
5472 int grow;
5473
5474 if (netif_elide_gro(skb->dev))
5475 goto normal;
5476
5477 gro_head = gro_list_prepare(napi, skb);
5478
5479 rcu_read_lock();
5480 list_for_each_entry_rcu(ptype, head, list) {
5481 if (ptype->type != type || !ptype->callbacks.gro_receive)
5482 continue;
5483
5484 skb_set_network_header(skb, skb_gro_offset(skb));
5485 skb_reset_mac_len(skb);
5486 NAPI_GRO_CB(skb)->same_flow = 0;
5487 NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
5488 NAPI_GRO_CB(skb)->free = 0;
5489 NAPI_GRO_CB(skb)->encap_mark = 0;
5490 NAPI_GRO_CB(skb)->recursion_counter = 0;
5491 NAPI_GRO_CB(skb)->is_fou = 0;
5492 NAPI_GRO_CB(skb)->is_atomic = 1;
5493 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
5494
5495
5496 switch (skb->ip_summed) {
5497 case CHECKSUM_COMPLETE:
5498 NAPI_GRO_CB(skb)->csum = skb->csum;
5499 NAPI_GRO_CB(skb)->csum_valid = 1;
5500 NAPI_GRO_CB(skb)->csum_cnt = 0;
5501 break;
5502 case CHECKSUM_UNNECESSARY:
5503 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
5504 NAPI_GRO_CB(skb)->csum_valid = 0;
5505 break;
5506 default:
5507 NAPI_GRO_CB(skb)->csum_cnt = 0;
5508 NAPI_GRO_CB(skb)->csum_valid = 0;
5509 }
5510
5511 pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
5512 ipv6_gro_receive, inet_gro_receive,
5513 gro_head, skb);
5514 break;
5515 }
5516 rcu_read_unlock();
5517
5518 if (&ptype->list == head)
5519 goto normal;
5520
5521 if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
5522 ret = GRO_CONSUMED;
5523 goto ok;
5524 }
5525
5526 same_flow = NAPI_GRO_CB(skb)->same_flow;
5527 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
5528
5529 if (pp) {
5530 skb_list_del_init(pp);
5531 napi_gro_complete(napi, pp);
5532 napi->gro_hash[hash].count--;
5533 }
5534
5535 if (same_flow)
5536 goto ok;
5537
5538 if (NAPI_GRO_CB(skb)->flush)
5539 goto normal;
5540
5541 if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
5542 gro_flush_oldest(napi, gro_head);
5543 } else {
5544 napi->gro_hash[hash].count++;
5545 }
5546 NAPI_GRO_CB(skb)->count = 1;
5547 NAPI_GRO_CB(skb)->age = jiffies;
5548 NAPI_GRO_CB(skb)->last = skb;
5549 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
5550 list_add(&skb->list, gro_head);
5551 ret = GRO_HELD;
5552
5553 pull:
5554 grow = skb_gro_offset(skb) - skb_headlen(skb);
5555 if (grow > 0)
5556 gro_pull_from_frag0(skb, grow);
5557 ok:
5558 if (napi->gro_hash[hash].count) {
5559 if (!test_bit(hash, &napi->gro_bitmask))
5560 __set_bit(hash, &napi->gro_bitmask);
5561 } else if (test_bit(hash, &napi->gro_bitmask)) {
5562 __clear_bit(hash, &napi->gro_bitmask);
5563 }
5564
5565 return ret;
5566
5567 normal:
5568 ret = GRO_NORMAL;
5569 goto pull;
5570 }
5571
5572 struct packet_offload *gro_find_receive_by_type(__be16 type)
5573 {
5574 struct list_head *offload_head = &offload_base;
5575 struct packet_offload *ptype;
5576
5577 list_for_each_entry_rcu(ptype, offload_head, list) {
5578 if (ptype->type != type || !ptype->callbacks.gro_receive)
5579 continue;
5580 return ptype;
5581 }
5582 return NULL;
5583 }
5584 EXPORT_SYMBOL(gro_find_receive_by_type);
5585
5586 struct packet_offload *gro_find_complete_by_type(__be16 type)
5587 {
5588 struct list_head *offload_head = &offload_base;
5589 struct packet_offload *ptype;
5590
5591 list_for_each_entry_rcu(ptype, offload_head, list) {
5592 if (ptype->type != type || !ptype->callbacks.gro_complete)
5593 continue;
5594 return ptype;
5595 }
5596 return NULL;
5597 }
5598 EXPORT_SYMBOL(gro_find_complete_by_type);
5599
5600 static void napi_skb_free_stolen_head(struct sk_buff *skb)
5601 {
5602 skb_dst_drop(skb);
5603 skb_ext_put(skb);
5604 kmem_cache_free(skbuff_head_cache, skb);
5605 }
5606
5607 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
5608 {
5609 switch (ret) {
5610 case GRO_NORMAL:
5611 if (netif_receive_skb_internal(skb))
5612 ret = GRO_DROP;
5613 break;
5614
5615 case GRO_DROP:
5616 kfree_skb(skb);
5617 break;
5618
5619 case GRO_MERGED_FREE:
5620 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
5621 napi_skb_free_stolen_head(skb);
5622 else
5623 __kfree_skb(skb);
5624 break;
5625
5626 case GRO_HELD:
5627 case GRO_MERGED:
5628 case GRO_CONSUMED:
5629 break;
5630 }
5631
5632 return ret;
5633 }
5634
5635 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
5636 {
5637 gro_result_t ret;
5638
5639 skb_mark_napi_id(skb, napi);
5640 trace_napi_gro_receive_entry(skb);
5641
5642 skb_gro_reset_offset(skb);
5643
5644 ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
5645 trace_napi_gro_receive_exit(ret);
5646
5647 return ret;
5648 }
5649 EXPORT_SYMBOL(napi_gro_receive);
5650
5651 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
5652 {
5653 if (unlikely(skb->pfmemalloc)) {
5654 consume_skb(skb);
5655 return;
5656 }
5657 __skb_pull(skb, skb_headlen(skb));
5658
5659 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
5660 __vlan_hwaccel_clear_tag(skb);
5661 skb->dev = napi->dev;
5662 skb->skb_iif = 0;
5663
5664
5665 skb->pkt_type = PACKET_HOST;
5666
5667 skb->encapsulation = 0;
5668 skb_shinfo(skb)->gso_type = 0;
5669 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
5670 skb_ext_reset(skb);
5671
5672 napi->skb = skb;
5673 }
5674
5675 struct sk_buff *napi_get_frags(struct napi_struct *napi)
5676 {
5677 struct sk_buff *skb = napi->skb;
5678
5679 if (!skb) {
5680 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
5681 if (skb) {
5682 napi->skb = skb;
5683 skb_mark_napi_id(skb, napi);
5684 }
5685 }
5686 return skb;
5687 }
5688 EXPORT_SYMBOL(napi_get_frags);
5689
5690 static gro_result_t napi_frags_finish(struct napi_struct *napi,
5691 struct sk_buff *skb,
5692 gro_result_t ret)
5693 {
5694 switch (ret) {
5695 case GRO_NORMAL:
5696 case GRO_HELD:
5697 __skb_push(skb, ETH_HLEN);
5698 skb->protocol = eth_type_trans(skb, skb->dev);
5699 if (ret == GRO_NORMAL)
5700 gro_normal_one(napi, skb);
5701 break;
5702
5703 case GRO_DROP:
5704 napi_reuse_skb(napi, skb);
5705 break;
5706
5707 case GRO_MERGED_FREE:
5708 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
5709 napi_skb_free_stolen_head(skb);
5710 else
5711 napi_reuse_skb(napi, skb);
5712 break;
5713
5714 case GRO_MERGED:
5715 case GRO_CONSUMED:
5716 break;
5717 }
5718
5719 return ret;
5720 }
5721
5722
5723
5724
5725
5726 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
5727 {
5728 struct sk_buff *skb = napi->skb;
5729 const struct ethhdr *eth;
5730 unsigned int hlen = sizeof(*eth);
5731
5732 napi->skb = NULL;
5733
5734 skb_reset_mac_header(skb);
5735 skb_gro_reset_offset(skb);
5736
5737 if (unlikely(skb_gro_header_hard(skb, hlen))) {
5738 eth = skb_gro_header_slow(skb, hlen, 0);
5739 if (unlikely(!eth)) {
5740 net_warn_ratelimited("%s: dropping impossible skb from %s\n",
5741 __func__, napi->dev->name);
5742 napi_reuse_skb(napi, skb);
5743 return NULL;
5744 }
5745 } else {
5746 eth = (const struct ethhdr *)skb->data;
5747 gro_pull_from_frag0(skb, hlen);
5748 NAPI_GRO_CB(skb)->frag0 += hlen;
5749 NAPI_GRO_CB(skb)->frag0_len -= hlen;
5750 }
5751 __skb_pull(skb, hlen);
5752
5753
5754
5755
5756
5757
5758 skb->protocol = eth->h_proto;
5759
5760 return skb;
5761 }
5762
5763 gro_result_t napi_gro_frags(struct napi_struct *napi)
5764 {
5765 gro_result_t ret;
5766 struct sk_buff *skb = napi_frags_skb(napi);
5767
5768 if (!skb)
5769 return GRO_DROP;
5770
5771 trace_napi_gro_frags_entry(skb);
5772
5773 ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
5774 trace_napi_gro_frags_exit(ret);
5775
5776 return ret;
5777 }
5778 EXPORT_SYMBOL(napi_gro_frags);
5779
5780
5781
5782
5783 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
5784 {
5785 __wsum wsum;
5786 __sum16 sum;
5787
5788 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
5789
5790
5791 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
5792
5793 if (likely(!sum)) {
5794 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
5795 !skb->csum_complete_sw)
5796 netdev_rx_csum_fault(skb->dev, skb);
5797 }
5798
5799 NAPI_GRO_CB(skb)->csum = wsum;
5800 NAPI_GRO_CB(skb)->csum_valid = 1;
5801
5802 return sum;
5803 }
5804 EXPORT_SYMBOL(__skb_gro_checksum_complete);
5805
5806 static void net_rps_send_ipi(struct softnet_data *remsd)
5807 {
5808 #ifdef CONFIG_RPS
5809 while (remsd) {
5810 struct softnet_data *next = remsd->rps_ipi_next;
5811
5812 if (cpu_online(remsd->cpu))
5813 smp_call_function_single_async(remsd->cpu, &remsd->csd);
5814 remsd = next;
5815 }
5816 #endif
5817 }
5818
5819
5820
5821
5822
5823 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
5824 {
5825 #ifdef CONFIG_RPS
5826 struct softnet_data *remsd = sd->rps_ipi_list;
5827
5828 if (remsd) {
5829 sd->rps_ipi_list = NULL;
5830
5831 local_irq_enable();
5832
5833
5834 net_rps_send_ipi(remsd);
5835 } else
5836 #endif
5837 local_irq_enable();
5838 }
5839
5840 static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
5841 {
5842 #ifdef CONFIG_RPS
5843 return sd->rps_ipi_list != NULL;
5844 #else
5845 return false;
5846 #endif
5847 }
5848
5849 static int process_backlog(struct napi_struct *napi, int quota)
5850 {
5851 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
5852 bool again = true;
5853 int work = 0;
5854
5855
5856
5857
5858 if (sd_has_rps_ipi_waiting(sd)) {
5859 local_irq_disable();
5860 net_rps_action_and_irq_enable(sd);
5861 }
5862
5863 napi->weight = dev_rx_weight;
5864 while (again) {
5865 struct sk_buff *skb;
5866
5867 while ((skb = __skb_dequeue(&sd->process_queue))) {
5868 rcu_read_lock();
5869 __netif_receive_skb(skb);
5870 rcu_read_unlock();
5871 input_queue_head_incr(sd);
5872 if (++work >= quota)
5873 return work;
5874
5875 }
5876
5877 local_irq_disable();
5878 rps_lock(sd);
5879 if (skb_queue_empty(&sd->input_pkt_queue)) {
5880
5881
5882
5883
5884
5885
5886
5887
5888 napi->state = 0;
5889 again = false;
5890 } else {
5891 skb_queue_splice_tail_init(&sd->input_pkt_queue,
5892 &sd->process_queue);
5893 }
5894 rps_unlock(sd);
5895 local_irq_enable();
5896 }
5897
5898 return work;
5899 }
5900
5901
5902
5903
5904
5905
5906
5907
5908 void __napi_schedule(struct napi_struct *n)
5909 {
5910 unsigned long flags;
5911
5912 local_irq_save(flags);
5913 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
5914 local_irq_restore(flags);
5915 }
5916 EXPORT_SYMBOL(__napi_schedule);
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927 bool napi_schedule_prep(struct napi_struct *n)
5928 {
5929 unsigned long val, new;
5930
5931 do {
5932 val = READ_ONCE(n->state);
5933 if (unlikely(val & NAPIF_STATE_DISABLE))
5934 return false;
5935 new = val | NAPIF_STATE_SCHED;
5936
5937
5938
5939
5940
5941
5942
5943 new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
5944 NAPIF_STATE_MISSED;
5945 } while (cmpxchg(&n->state, val, new) != val);
5946
5947 return !(val & NAPIF_STATE_SCHED);
5948 }
5949 EXPORT_SYMBOL(napi_schedule_prep);
5950
5951
5952
5953
5954
5955
5956
5957 void __napi_schedule_irqoff(struct napi_struct *n)
5958 {
5959 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
5960 }
5961 EXPORT_SYMBOL(__napi_schedule_irqoff);
5962
5963 bool napi_complete_done(struct napi_struct *n, int work_done)
5964 {
5965 unsigned long flags, val, new;
5966
5967
5968
5969
5970
5971
5972
5973 if (unlikely(n->state & (NAPIF_STATE_NPSVC |
5974 NAPIF_STATE_IN_BUSY_POLL)))
5975 return false;
5976
5977 if (n->gro_bitmask) {
5978 unsigned long timeout = 0;
5979
5980 if (work_done)
5981 timeout = n->dev->gro_flush_timeout;
5982
5983
5984
5985
5986
5987 napi_gro_flush(n, !!timeout);
5988 if (timeout)
5989 hrtimer_start(&n->timer, ns_to_ktime(timeout),
5990 HRTIMER_MODE_REL_PINNED);
5991 }
5992
5993 gro_normal_list(n);
5994
5995 if (unlikely(!list_empty(&n->poll_list))) {
5996
5997 local_irq_save(flags);
5998 list_del_init(&n->poll_list);
5999 local_irq_restore(flags);
6000 }
6001
6002 do {
6003 val = READ_ONCE(n->state);
6004
6005 WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
6006
6007 new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
6008
6009
6010
6011
6012
6013 new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
6014 NAPIF_STATE_SCHED;
6015 } while (cmpxchg(&n->state, val, new) != val);
6016
6017 if (unlikely(val & NAPIF_STATE_MISSED)) {
6018 __napi_schedule(n);
6019 return false;
6020 }
6021
6022 return true;
6023 }
6024 EXPORT_SYMBOL(napi_complete_done);
6025
6026
6027 static struct napi_struct *napi_by_id(unsigned int napi_id)
6028 {
6029 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
6030 struct napi_struct *napi;
6031
6032 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
6033 if (napi->napi_id == napi_id)
6034 return napi;
6035
6036 return NULL;
6037 }
6038
6039 #if defined(CONFIG_NET_RX_BUSY_POLL)
6040
6041 #define BUSY_POLL_BUDGET 8
6042
6043 static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
6044 {
6045 int rc;
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056 clear_bit(NAPI_STATE_MISSED, &napi->state);
6057 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
6058
6059 local_bh_disable();
6060
6061
6062
6063
6064 rc = napi->poll(napi, BUSY_POLL_BUDGET);
6065
6066
6067
6068
6069 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
6070 netpoll_poll_unlock(have_poll_lock);
6071 if (rc == BUSY_POLL_BUDGET) {
6072
6073
6074
6075 gro_normal_list(napi);
6076 __napi_schedule(napi);
6077 }
6078 local_bh_enable();
6079 }
6080
6081 void napi_busy_loop(unsigned int napi_id,
6082 bool (*loop_end)(void *, unsigned long),
6083 void *loop_end_arg)
6084 {
6085 unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
6086 int (*napi_poll)(struct napi_struct *napi, int budget);
6087 void *have_poll_lock = NULL;
6088 struct napi_struct *napi;
6089
6090 restart:
6091 napi_poll = NULL;
6092
6093 rcu_read_lock();
6094
6095 napi = napi_by_id(napi_id);
6096 if (!napi)
6097 goto out;
6098
6099 preempt_disable();
6100 for (;;) {
6101 int work = 0;
6102
6103 local_bh_disable();
6104 if (!napi_poll) {
6105 unsigned long val = READ_ONCE(napi->state);
6106
6107
6108
6109
6110 if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
6111 NAPIF_STATE_IN_BUSY_POLL))
6112 goto count;
6113 if (cmpxchg(&napi->state, val,
6114 val | NAPIF_STATE_IN_BUSY_POLL |
6115 NAPIF_STATE_SCHED) != val)
6116 goto count;
6117 have_poll_lock = netpoll_poll_lock(napi);
6118 napi_poll = napi->poll;
6119 }
6120 work = napi_poll(napi, BUSY_POLL_BUDGET);
6121 trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
6122 gro_normal_list(napi);
6123 count:
6124 if (work > 0)
6125 __NET_ADD_STATS(dev_net(napi->dev),
6126 LINUX_MIB_BUSYPOLLRXPACKETS, work);
6127 local_bh_enable();
6128
6129 if (!loop_end || loop_end(loop_end_arg, start_time))
6130 break;
6131
6132 if (unlikely(need_resched())) {
6133 if (napi_poll)
6134 busy_poll_stop(napi, have_poll_lock);
6135 preempt_enable();
6136 rcu_read_unlock();
6137 cond_resched();
6138 if (loop_end(loop_end_arg, start_time))
6139 return;
6140 goto restart;
6141 }
6142 cpu_relax();
6143 }
6144 if (napi_poll)
6145 busy_poll_stop(napi, have_poll_lock);
6146 preempt_enable();
6147 out:
6148 rcu_read_unlock();
6149 }
6150 EXPORT_SYMBOL(napi_busy_loop);
6151
6152 #endif
6153
6154 static void napi_hash_add(struct napi_struct *napi)
6155 {
6156 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
6157 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
6158 return;
6159
6160 spin_lock(&napi_hash_lock);
6161
6162
6163 do {
6164 if (unlikely(++napi_gen_id < MIN_NAPI_ID))
6165 napi_gen_id = MIN_NAPI_ID;
6166 } while (napi_by_id(napi_gen_id));
6167 napi->napi_id = napi_gen_id;
6168
6169 hlist_add_head_rcu(&napi->napi_hash_node,
6170 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
6171
6172 spin_unlock(&napi_hash_lock);
6173 }
6174
6175
6176
6177
6178 bool napi_hash_del(struct napi_struct *napi)
6179 {
6180 bool rcu_sync_needed = false;
6181
6182 spin_lock(&napi_hash_lock);
6183
6184 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
6185 rcu_sync_needed = true;
6186 hlist_del_rcu(&napi->napi_hash_node);
6187 }
6188 spin_unlock(&napi_hash_lock);
6189 return rcu_sync_needed;
6190 }
6191 EXPORT_SYMBOL_GPL(napi_hash_del);
6192
6193 static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
6194 {
6195 struct napi_struct *napi;
6196
6197 napi = container_of(timer, struct napi_struct, timer);
6198
6199
6200
6201
6202 if (napi->gro_bitmask && !napi_disable_pending(napi) &&
6203 !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
6204 __napi_schedule_irqoff(napi);
6205
6206 return HRTIMER_NORESTART;
6207 }
6208
6209 static void init_gro_hash(struct napi_struct *napi)
6210 {
6211 int i;
6212
6213 for (i = 0; i < GRO_HASH_BUCKETS; i++) {
6214 INIT_LIST_HEAD(&napi->gro_hash[i].list);
6215 napi->gro_hash[i].count = 0;
6216 }
6217 napi->gro_bitmask = 0;
6218 }
6219
6220 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
6221 int (*poll)(struct napi_struct *, int), int weight)
6222 {
6223 INIT_LIST_HEAD(&napi->poll_list);
6224 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
6225 napi->timer.function = napi_watchdog;
6226 init_gro_hash(napi);
6227 napi->skb = NULL;
6228 INIT_LIST_HEAD(&napi->rx_list);
6229 napi->rx_count = 0;
6230 napi->poll = poll;
6231 if (weight > NAPI_POLL_WEIGHT)
6232 netdev_err_once(dev, "%s() called with weight %d\n", __func__,
6233 weight);
6234 napi->weight = weight;
6235 list_add(&napi->dev_list, &dev->napi_list);
6236 napi->dev = dev;
6237 #ifdef CONFIG_NETPOLL
6238 napi->poll_owner = -1;
6239 #endif
6240 set_bit(NAPI_STATE_SCHED, &napi->state);
6241 napi_hash_add(napi);
6242 }
6243 EXPORT_SYMBOL(netif_napi_add);
6244
6245 void napi_disable(struct napi_struct *n)
6246 {
6247 might_sleep();
6248 set_bit(NAPI_STATE_DISABLE, &n->state);
6249
6250 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
6251 msleep(1);
6252 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
6253 msleep(1);
6254
6255 hrtimer_cancel(&n->timer);
6256
6257 clear_bit(NAPI_STATE_DISABLE, &n->state);
6258 }
6259 EXPORT_SYMBOL(napi_disable);
6260
6261 static void flush_gro_hash(struct napi_struct *napi)
6262 {
6263 int i;
6264
6265 for (i = 0; i < GRO_HASH_BUCKETS; i++) {
6266 struct sk_buff *skb, *n;
6267
6268 list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
6269 kfree_skb(skb);
6270 napi->gro_hash[i].count = 0;
6271 }
6272 }
6273
6274
6275 void netif_napi_del(struct napi_struct *napi)
6276 {
6277 might_sleep();
6278 if (napi_hash_del(napi))
6279 synchronize_net();
6280 list_del_init(&napi->dev_list);
6281 napi_free_frags(napi);
6282
6283 flush_gro_hash(napi);
6284 napi->gro_bitmask = 0;
6285 }
6286 EXPORT_SYMBOL(netif_napi_del);
6287
6288 static int napi_poll(struct napi_struct *n, struct list_head *repoll)
6289 {
6290 void *have;
6291 int work, weight;
6292
6293 list_del_init(&n->poll_list);
6294
6295 have = netpoll_poll_lock(n);
6296
6297 weight = n->weight;
6298
6299
6300
6301
6302
6303
6304
6305 work = 0;
6306 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
6307 work = n->poll(n, weight);
6308 trace_napi_poll(n, work, weight);
6309 }
6310
6311 WARN_ON_ONCE(work > weight);
6312
6313 if (likely(work < weight))
6314 goto out_unlock;
6315
6316
6317
6318
6319
6320
6321 if (unlikely(napi_disable_pending(n))) {
6322 napi_complete(n);
6323 goto out_unlock;
6324 }
6325
6326 if (n->gro_bitmask) {
6327
6328
6329
6330 napi_gro_flush(n, HZ >= 1000);
6331 }
6332
6333 gro_normal_list(n);
6334
6335
6336
6337
6338 if (unlikely(!list_empty(&n->poll_list))) {
6339 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
6340 n->dev ? n->dev->name : "backlog");
6341 goto out_unlock;
6342 }
6343
6344 list_add_tail(&n->poll_list, repoll);
6345
6346 out_unlock:
6347 netpoll_poll_unlock(have);
6348
6349 return work;
6350 }
6351
6352 static __latent_entropy void net_rx_action(struct softirq_action *h)
6353 {
6354 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
6355 unsigned long time_limit = jiffies +
6356 usecs_to_jiffies(netdev_budget_usecs);
6357 int budget = netdev_budget;
6358 LIST_HEAD(list);
6359 LIST_HEAD(repoll);
6360
6361 local_irq_disable();
6362 list_splice_init(&sd->poll_list, &list);
6363 local_irq_enable();
6364
6365 for (;;) {
6366 struct napi_struct *n;
6367
6368 if (list_empty(&list)) {
6369 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
6370 goto out;
6371 break;
6372 }
6373
6374 n = list_first_entry(&list, struct napi_struct, poll_list);
6375 budget -= napi_poll(n, &repoll);
6376
6377
6378
6379
6380
6381 if (unlikely(budget <= 0 ||
6382 time_after_eq(jiffies, time_limit))) {
6383 sd->time_squeeze++;
6384 break;
6385 }
6386 }
6387
6388 local_irq_disable();
6389
6390 list_splice_tail_init(&sd->poll_list, &list);
6391 list_splice_tail(&repoll, &list);
6392 list_splice(&list, &sd->poll_list);
6393 if (!list_empty(&sd->poll_list))
6394 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
6395
6396 net_rps_action_and_irq_enable(sd);
6397 out:
6398 __kfree_skb_flush();
6399 }
6400
6401 struct netdev_adjacent {
6402 struct net_device *dev;
6403
6404
6405 bool master;
6406
6407
6408 bool ignore;
6409
6410
6411 u16 ref_nr;
6412
6413
6414 void *private;
6415
6416 struct list_head list;
6417 struct rcu_head rcu;
6418 };
6419
6420 static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
6421 struct list_head *adj_list)
6422 {
6423 struct netdev_adjacent *adj;
6424
6425 list_for_each_entry(adj, adj_list, list) {
6426 if (adj->dev == adj_dev)
6427 return adj;
6428 }
6429 return NULL;
6430 }
6431
6432 static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
6433 {
6434 struct net_device *dev = data;
6435
6436 return upper_dev == dev;
6437 }
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448 bool netdev_has_upper_dev(struct net_device *dev,
6449 struct net_device *upper_dev)
6450 {
6451 ASSERT_RTNL();
6452
6453 return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
6454 upper_dev);
6455 }
6456 EXPORT_SYMBOL(netdev_has_upper_dev);
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
6469 struct net_device *upper_dev)
6470 {
6471 return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
6472 upper_dev);
6473 }
6474 EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
6475
6476
6477
6478
6479
6480
6481
6482
6483 bool netdev_has_any_upper_dev(struct net_device *dev)
6484 {
6485 ASSERT_RTNL();
6486
6487 return !list_empty(&dev->adj_list.upper);
6488 }
6489 EXPORT_SYMBOL(netdev_has_any_upper_dev);
6490
6491
6492
6493
6494
6495
6496
6497
6498 struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
6499 {
6500 struct netdev_adjacent *upper;
6501
6502 ASSERT_RTNL();
6503
6504 if (list_empty(&dev->adj_list.upper))
6505 return NULL;
6506
6507 upper = list_first_entry(&dev->adj_list.upper,
6508 struct netdev_adjacent, list);
6509 if (likely(upper->master))
6510 return upper->dev;
6511 return NULL;
6512 }
6513 EXPORT_SYMBOL(netdev_master_upper_dev_get);
6514
6515 static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
6516 {
6517 struct netdev_adjacent *upper;
6518
6519 ASSERT_RTNL();
6520
6521 if (list_empty(&dev->adj_list.upper))
6522 return NULL;
6523
6524 upper = list_first_entry(&dev->adj_list.upper,
6525 struct netdev_adjacent, list);
6526 if (likely(upper->master) && !upper->ignore)
6527 return upper->dev;
6528 return NULL;
6529 }
6530
6531
6532
6533
6534
6535
6536
6537
6538 static bool netdev_has_any_lower_dev(struct net_device *dev)
6539 {
6540 ASSERT_RTNL();
6541
6542 return !list_empty(&dev->adj_list.lower);
6543 }
6544
6545 void *netdev_adjacent_get_private(struct list_head *adj_list)
6546 {
6547 struct netdev_adjacent *adj;
6548
6549 adj = list_entry(adj_list, struct netdev_adjacent, list);
6550
6551 return adj->private;
6552 }
6553 EXPORT_SYMBOL(netdev_adjacent_get_private);
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
6564 struct list_head **iter)
6565 {
6566 struct netdev_adjacent *upper;
6567
6568 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
6569
6570 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6571
6572 if (&upper->list == &dev->adj_list.upper)
6573 return NULL;
6574
6575 *iter = &upper->list;
6576
6577 return upper->dev;
6578 }
6579 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
6580
6581 static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
6582 struct list_head **iter,
6583 bool *ignore)
6584 {
6585 struct netdev_adjacent *upper;
6586
6587 upper = list_entry((*iter)->next, struct netdev_adjacent, list);
6588
6589 if (&upper->list == &dev->adj_list.upper)
6590 return NULL;
6591
6592 *iter = &upper->list;
6593 *ignore = upper->ignore;
6594
6595 return upper->dev;
6596 }
6597
6598 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
6599 struct list_head **iter)
6600 {
6601 struct netdev_adjacent *upper;
6602
6603 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
6604
6605 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6606
6607 if (&upper->list == &dev->adj_list.upper)
6608 return NULL;
6609
6610 *iter = &upper->list;
6611
6612 return upper->dev;
6613 }
6614
6615 static int __netdev_walk_all_upper_dev(struct net_device *dev,
6616 int (*fn)(struct net_device *dev,
6617 void *data),
6618 void *data)
6619 {
6620 struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
6621 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
6622 int ret, cur = 0;
6623 bool ignore;
6624
6625 now = dev;
6626 iter = &dev->adj_list.upper;
6627
6628 while (1) {
6629 if (now != dev) {
6630 ret = fn(now, data);
6631 if (ret)
6632 return ret;
6633 }
6634
6635 next = NULL;
6636 while (1) {
6637 udev = __netdev_next_upper_dev(now, &iter, &ignore);
6638 if (!udev)
6639 break;
6640 if (ignore)
6641 continue;
6642
6643 next = udev;
6644 niter = &udev->adj_list.upper;
6645 dev_stack[cur] = now;
6646 iter_stack[cur++] = iter;
6647 break;
6648 }
6649
6650 if (!next) {
6651 if (!cur)
6652 return 0;
6653 next = dev_stack[--cur];
6654 niter = iter_stack[cur];
6655 }
6656
6657 now = next;
6658 iter = niter;
6659 }
6660
6661 return 0;
6662 }
6663
6664 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
6665 int (*fn)(struct net_device *dev,
6666 void *data),
6667 void *data)
6668 {
6669 struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
6670 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
6671 int ret, cur = 0;
6672
6673 now = dev;
6674 iter = &dev->adj_list.upper;
6675
6676 while (1) {
6677 if (now != dev) {
6678 ret = fn(now, data);
6679 if (ret)
6680 return ret;
6681 }
6682
6683 next = NULL;
6684 while (1) {
6685 udev = netdev_next_upper_dev_rcu(now, &iter);
6686 if (!udev)
6687 break;
6688
6689 next = udev;
6690 niter = &udev->adj_list.upper;
6691 dev_stack[cur] = now;
6692 iter_stack[cur++] = iter;
6693 break;
6694 }
6695
6696 if (!next) {
6697 if (!cur)
6698 return 0;
6699 next = dev_stack[--cur];
6700 niter = iter_stack[cur];
6701 }
6702
6703 now = next;
6704 iter = niter;
6705 }
6706
6707 return 0;
6708 }
6709 EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
6710
6711 static bool __netdev_has_upper_dev(struct net_device *dev,
6712 struct net_device *upper_dev)
6713 {
6714 ASSERT_RTNL();
6715
6716 return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
6717 upper_dev);
6718 }
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731 void *netdev_lower_get_next_private(struct net_device *dev,
6732 struct list_head **iter)
6733 {
6734 struct netdev_adjacent *lower;
6735
6736 lower = list_entry(*iter, struct netdev_adjacent, list);
6737
6738 if (&lower->list == &dev->adj_list.lower)
6739 return NULL;
6740
6741 *iter = lower->list.next;
6742
6743 return lower->private;
6744 }
6745 EXPORT_SYMBOL(netdev_lower_get_next_private);
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757 void *netdev_lower_get_next_private_rcu(struct net_device *dev,
6758 struct list_head **iter)
6759 {
6760 struct netdev_adjacent *lower;
6761
6762 WARN_ON_ONCE(!rcu_read_lock_held());
6763
6764 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6765
6766 if (&lower->list == &dev->adj_list.lower)
6767 return NULL;
6768
6769 *iter = &lower->list;
6770
6771 return lower->private;
6772 }
6773 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786 void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
6787 {
6788 struct netdev_adjacent *lower;
6789
6790 lower = list_entry(*iter, struct netdev_adjacent, list);
6791
6792 if (&lower->list == &dev->adj_list.lower)
6793 return NULL;
6794
6795 *iter = lower->list.next;
6796
6797 return lower->dev;
6798 }
6799 EXPORT_SYMBOL(netdev_lower_get_next);
6800
6801 static struct net_device *netdev_next_lower_dev(struct net_device *dev,
6802 struct list_head **iter)
6803 {
6804 struct netdev_adjacent *lower;
6805
6806 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
6807
6808 if (&lower->list == &dev->adj_list.lower)
6809 return NULL;
6810
6811 *iter = &lower->list;
6812
6813 return lower->dev;
6814 }
6815
6816 static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
6817 struct list_head **iter,
6818 bool *ignore)
6819 {
6820 struct netdev_adjacent *lower;
6821
6822 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
6823
6824 if (&lower->list == &dev->adj_list.lower)
6825 return NULL;
6826
6827 *iter = &lower->list;
6828 *ignore = lower->ignore;
6829
6830 return lower->dev;
6831 }
6832
6833 int netdev_walk_all_lower_dev(struct net_device *dev,
6834 int (*fn)(struct net_device *dev,
6835 void *data),
6836 void *data)
6837 {
6838 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
6839 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
6840 int ret, cur = 0;
6841
6842 now = dev;
6843 iter = &dev->adj_list.lower;
6844
6845 while (1) {
6846 if (now != dev) {
6847 ret = fn(now, data);
6848 if (ret)
6849 return ret;
6850 }
6851
6852 next = NULL;
6853 while (1) {
6854 ldev = netdev_next_lower_dev(now, &iter);
6855 if (!ldev)
6856 break;
6857
6858 next = ldev;
6859 niter = &ldev->adj_list.lower;
6860 dev_stack[cur] = now;
6861 iter_stack[cur++] = iter;
6862 break;
6863 }
6864
6865 if (!next) {
6866 if (!cur)
6867 return 0;
6868 next = dev_stack[--cur];
6869 niter = iter_stack[cur];
6870 }
6871
6872 now = next;
6873 iter = niter;
6874 }
6875
6876 return 0;
6877 }
6878 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
6879
6880 static int __netdev_walk_all_lower_dev(struct net_device *dev,
6881 int (*fn)(struct net_device *dev,
6882 void *data),
6883 void *data)
6884 {
6885 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
6886 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
6887 int ret, cur = 0;
6888 bool ignore;
6889
6890 now = dev;
6891 iter = &dev->adj_list.lower;
6892
6893 while (1) {
6894 if (now != dev) {
6895 ret = fn(now, data);
6896 if (ret)
6897 return ret;
6898 }
6899
6900 next = NULL;
6901 while (1) {
6902 ldev = __netdev_next_lower_dev(now, &iter, &ignore);
6903 if (!ldev)
6904 break;
6905 if (ignore)
6906 continue;
6907
6908 next = ldev;
6909 niter = &ldev->adj_list.lower;
6910 dev_stack[cur] = now;
6911 iter_stack[cur++] = iter;
6912 break;
6913 }
6914
6915 if (!next) {
6916 if (!cur)
6917 return 0;
6918 next = dev_stack[--cur];
6919 niter = iter_stack[cur];
6920 }
6921
6922 now = next;
6923 iter = niter;
6924 }
6925
6926 return 0;
6927 }
6928
6929 struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
6930 struct list_head **iter)
6931 {
6932 struct netdev_adjacent *lower;
6933
6934 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6935 if (&lower->list == &dev->adj_list.lower)
6936 return NULL;
6937
6938 *iter = &lower->list;
6939
6940 return lower->dev;
6941 }
6942 EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
6943
6944 static u8 __netdev_upper_depth(struct net_device *dev)
6945 {
6946 struct net_device *udev;
6947 struct list_head *iter;
6948 u8 max_depth = 0;
6949 bool ignore;
6950
6951 for (iter = &dev->adj_list.upper,
6952 udev = __netdev_next_upper_dev(dev, &iter, &ignore);
6953 udev;
6954 udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
6955 if (ignore)
6956 continue;
6957 if (max_depth < udev->upper_level)
6958 max_depth = udev->upper_level;
6959 }
6960
6961 return max_depth;
6962 }
6963
6964 static u8 __netdev_lower_depth(struct net_device *dev)
6965 {
6966 struct net_device *ldev;
6967 struct list_head *iter;
6968 u8 max_depth = 0;
6969 bool ignore;
6970
6971 for (iter = &dev->adj_list.lower,
6972 ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
6973 ldev;
6974 ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
6975 if (ignore)
6976 continue;
6977 if (max_depth < ldev->lower_level)
6978 max_depth = ldev->lower_level;
6979 }
6980
6981 return max_depth;
6982 }
6983
6984 static int __netdev_update_upper_level(struct net_device *dev, void *data)
6985 {
6986 dev->upper_level = __netdev_upper_depth(dev) + 1;
6987 return 0;
6988 }
6989
6990 static int __netdev_update_lower_level(struct net_device *dev, void *data)
6991 {
6992 dev->lower_level = __netdev_lower_depth(dev) + 1;
6993 return 0;
6994 }
6995
6996 int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
6997 int (*fn)(struct net_device *dev,
6998 void *data),
6999 void *data)
7000 {
7001 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7002 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7003 int ret, cur = 0;
7004
7005 now = dev;
7006 iter = &dev->adj_list.lower;
7007
7008 while (1) {
7009 if (now != dev) {
7010 ret = fn(now, data);
7011 if (ret)
7012 return ret;
7013 }
7014
7015 next = NULL;
7016 while (1) {
7017 ldev = netdev_next_lower_dev_rcu(now, &iter);
7018 if (!ldev)
7019 break;
7020
7021 next = ldev;
7022 niter = &ldev->adj_list.lower;
7023 dev_stack[cur] = now;
7024 iter_stack[cur++] = iter;
7025 break;
7026 }
7027
7028 if (!next) {
7029 if (!cur)
7030 return 0;
7031 next = dev_stack[--cur];
7032 niter = iter_stack[cur];
7033 }
7034
7035 now = next;
7036 iter = niter;
7037 }
7038
7039 return 0;
7040 }
7041 EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052 void *netdev_lower_get_first_private_rcu(struct net_device *dev)
7053 {
7054 struct netdev_adjacent *lower;
7055
7056 lower = list_first_or_null_rcu(&dev->adj_list.lower,
7057 struct netdev_adjacent, list);
7058 if (lower)
7059 return lower->private;
7060 return NULL;
7061 }
7062 EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
7063
7064
7065
7066
7067
7068
7069
7070
7071 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
7072 {
7073 struct netdev_adjacent *upper;
7074
7075 upper = list_first_or_null_rcu(&dev->adj_list.upper,
7076 struct netdev_adjacent, list);
7077 if (upper && likely(upper->master))
7078 return upper->dev;
7079 return NULL;
7080 }
7081 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
7082
7083 static int netdev_adjacent_sysfs_add(struct net_device *dev,
7084 struct net_device *adj_dev,
7085 struct list_head *dev_list)
7086 {
7087 char linkname[IFNAMSIZ+7];
7088
7089 sprintf(linkname, dev_list == &dev->adj_list.upper ?
7090 "upper_%s" : "lower_%s", adj_dev->name);
7091 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
7092 linkname);
7093 }
7094 static void netdev_adjacent_sysfs_del(struct net_device *dev,
7095 char *name,
7096 struct list_head *dev_list)
7097 {
7098 char linkname[IFNAMSIZ+7];
7099
7100 sprintf(linkname, dev_list == &dev->adj_list.upper ?
7101 "upper_%s" : "lower_%s", name);
7102 sysfs_remove_link(&(dev->dev.kobj), linkname);
7103 }
7104
7105 static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
7106 struct net_device *adj_dev,
7107 struct list_head *dev_list)
7108 {
7109 return (dev_list == &dev->adj_list.upper ||
7110 dev_list == &dev->adj_list.lower) &&
7111 net_eq(dev_net(dev), dev_net(adj_dev));
7112 }
7113
7114 static int __netdev_adjacent_dev_insert(struct net_device *dev,
7115 struct net_device *adj_dev,
7116 struct list_head *dev_list,
7117 void *private, bool master)
7118 {
7119 struct netdev_adjacent *adj;
7120 int ret;
7121
7122 adj = __netdev_find_adj(adj_dev, dev_list);
7123
7124 if (adj) {
7125 adj->ref_nr += 1;
7126 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
7127 dev->name, adj_dev->name, adj->ref_nr);
7128
7129 return 0;
7130 }
7131
7132 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
7133 if (!adj)
7134 return -ENOMEM;
7135
7136 adj->dev = adj_dev;
7137 adj->master = master;
7138 adj->ref_nr = 1;
7139 adj->private = private;
7140 adj->ignore = false;
7141 dev_hold(adj_dev);
7142
7143 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
7144 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
7145
7146 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
7147 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
7148 if (ret)
7149 goto free_adj;
7150 }
7151
7152
7153 if (master) {
7154 ret = sysfs_create_link(&(dev->dev.kobj),
7155 &(adj_dev->dev.kobj), "master");
7156 if (ret)
7157 goto remove_symlinks;
7158
7159 list_add_rcu(&adj->list, dev_list);
7160 } else {
7161 list_add_tail_rcu(&adj->list, dev_list);
7162 }
7163
7164 return 0;
7165
7166 remove_symlinks:
7167 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7168 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
7169 free_adj:
7170 kfree(adj);
7171 dev_put(adj_dev);
7172
7173 return ret;
7174 }
7175
7176 static void __netdev_adjacent_dev_remove(struct net_device *dev,
7177 struct net_device *adj_dev,
7178 u16 ref_nr,
7179 struct list_head *dev_list)
7180 {
7181 struct netdev_adjacent *adj;
7182
7183 pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
7184 dev->name, adj_dev->name, ref_nr);
7185
7186 adj = __netdev_find_adj(adj_dev, dev_list);
7187
7188 if (!adj) {
7189 pr_err("Adjacency does not exist for device %s from %s\n",
7190 dev->name, adj_dev->name);
7191 WARN_ON(1);
7192 return;
7193 }
7194
7195 if (adj->ref_nr > ref_nr) {
7196 pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
7197 dev->name, adj_dev->name, ref_nr,
7198 adj->ref_nr - ref_nr);
7199 adj->ref_nr -= ref_nr;
7200 return;
7201 }
7202
7203 if (adj->master)
7204 sysfs_remove_link(&(dev->dev.kobj), "master");
7205
7206 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7207 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
7208
7209 list_del_rcu(&adj->list);
7210 pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
7211 adj_dev->name, dev->name, adj_dev->name);
7212 dev_put(adj_dev);
7213 kfree_rcu(adj, rcu);
7214 }
7215
7216 static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
7217 struct net_device *upper_dev,
7218 struct list_head *up_list,
7219 struct list_head *down_list,
7220 void *private, bool master)
7221 {
7222 int ret;
7223
7224 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
7225 private, master);
7226 if (ret)
7227 return ret;
7228
7229 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
7230 private, false);
7231 if (ret) {
7232 __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
7233 return ret;
7234 }
7235
7236 return 0;
7237 }
7238
7239 static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
7240 struct net_device *upper_dev,
7241 u16 ref_nr,
7242 struct list_head *up_list,
7243 struct list_head *down_list)
7244 {
7245 __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
7246 __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
7247 }
7248
7249 static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
7250 struct net_device *upper_dev,
7251 void *private, bool master)
7252 {
7253 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
7254 &dev->adj_list.upper,
7255 &upper_dev->adj_list.lower,
7256 private, master);
7257 }
7258
7259 static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
7260 struct net_device *upper_dev)
7261 {
7262 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
7263 &dev->adj_list.upper,
7264 &upper_dev->adj_list.lower);
7265 }
7266
7267 static int __netdev_upper_dev_link(struct net_device *dev,
7268 struct net_device *upper_dev, bool master,
7269 void *upper_priv, void *upper_info,
7270 struct netlink_ext_ack *extack)
7271 {
7272 struct netdev_notifier_changeupper_info changeupper_info = {
7273 .info = {
7274 .dev = dev,
7275 .extack = extack,
7276 },
7277 .upper_dev = upper_dev,
7278 .master = master,
7279 .linking = true,
7280 .upper_info = upper_info,
7281 };
7282 struct net_device *master_dev;
7283 int ret = 0;
7284
7285 ASSERT_RTNL();
7286
7287 if (dev == upper_dev)
7288 return -EBUSY;
7289
7290
7291 if (__netdev_has_upper_dev(upper_dev, dev))
7292 return -EBUSY;
7293
7294 if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
7295 return -EMLINK;
7296
7297 if (!master) {
7298 if (__netdev_has_upper_dev(dev, upper_dev))
7299 return -EEXIST;
7300 } else {
7301 master_dev = __netdev_master_upper_dev_get(dev);
7302 if (master_dev)
7303 return master_dev == upper_dev ? -EEXIST : -EBUSY;
7304 }
7305
7306 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
7307 &changeupper_info.info);
7308 ret = notifier_to_errno(ret);
7309 if (ret)
7310 return ret;
7311
7312 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
7313 master);
7314 if (ret)
7315 return ret;
7316
7317 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
7318 &changeupper_info.info);
7319 ret = notifier_to_errno(ret);
7320 if (ret)
7321 goto rollback;
7322
7323 __netdev_update_upper_level(dev, NULL);
7324 __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
7325
7326 __netdev_update_lower_level(upper_dev, NULL);
7327 __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
7328 NULL);
7329
7330 return 0;
7331
7332 rollback:
7333 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7334
7335 return ret;
7336 }
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349 int netdev_upper_dev_link(struct net_device *dev,
7350 struct net_device *upper_dev,
7351 struct netlink_ext_ack *extack)
7352 {
7353 return __netdev_upper_dev_link(dev, upper_dev, false,
7354 NULL, NULL, extack);
7355 }
7356 EXPORT_SYMBOL(netdev_upper_dev_link);
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372 int netdev_master_upper_dev_link(struct net_device *dev,
7373 struct net_device *upper_dev,
7374 void *upper_priv, void *upper_info,
7375 struct netlink_ext_ack *extack)
7376 {
7377 return __netdev_upper_dev_link(dev, upper_dev, true,
7378 upper_priv, upper_info, extack);
7379 }
7380 EXPORT_SYMBOL(netdev_master_upper_dev_link);
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390 void netdev_upper_dev_unlink(struct net_device *dev,
7391 struct net_device *upper_dev)
7392 {
7393 struct netdev_notifier_changeupper_info changeupper_info = {
7394 .info = {
7395 .dev = dev,
7396 },
7397 .upper_dev = upper_dev,
7398 .linking = false,
7399 };
7400
7401 ASSERT_RTNL();
7402
7403 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
7404
7405 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
7406 &changeupper_info.info);
7407
7408 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7409
7410 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
7411 &changeupper_info.info);
7412
7413 __netdev_update_upper_level(dev, NULL);
7414 __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
7415
7416 __netdev_update_lower_level(upper_dev, NULL);
7417 __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
7418 NULL);
7419 }
7420 EXPORT_SYMBOL(netdev_upper_dev_unlink);
7421
7422 static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
7423 struct net_device *lower_dev,
7424 bool val)
7425 {
7426 struct netdev_adjacent *adj;
7427
7428 adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
7429 if (adj)
7430 adj->ignore = val;
7431
7432 adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
7433 if (adj)
7434 adj->ignore = val;
7435 }
7436
7437 static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
7438 struct net_device *lower_dev)
7439 {
7440 __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
7441 }
7442
7443 static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
7444 struct net_device *lower_dev)
7445 {
7446 __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
7447 }
7448
7449 int netdev_adjacent_change_prepare(struct net_device *old_dev,
7450 struct net_device *new_dev,
7451 struct net_device *dev,
7452 struct netlink_ext_ack *extack)
7453 {
7454 int err;
7455
7456 if (!new_dev)
7457 return 0;
7458
7459 if (old_dev && new_dev != old_dev)
7460 netdev_adjacent_dev_disable(dev, old_dev);
7461
7462 err = netdev_upper_dev_link(new_dev, dev, extack);
7463 if (err) {
7464 if (old_dev && new_dev != old_dev)
7465 netdev_adjacent_dev_enable(dev, old_dev);
7466 return err;
7467 }
7468
7469 return 0;
7470 }
7471 EXPORT_SYMBOL(netdev_adjacent_change_prepare);
7472
7473 void netdev_adjacent_change_commit(struct net_device *old_dev,
7474 struct net_device *new_dev,
7475 struct net_device *dev)
7476 {
7477 if (!new_dev || !old_dev)
7478 return;
7479
7480 if (new_dev == old_dev)
7481 return;
7482
7483 netdev_adjacent_dev_enable(dev, old_dev);
7484 netdev_upper_dev_unlink(old_dev, dev);
7485 }
7486 EXPORT_SYMBOL(netdev_adjacent_change_commit);
7487
7488 void netdev_adjacent_change_abort(struct net_device *old_dev,
7489 struct net_device *new_dev,
7490 struct net_device *dev)
7491 {
7492 if (!new_dev)
7493 return;
7494
7495 if (old_dev && new_dev != old_dev)
7496 netdev_adjacent_dev_enable(dev, old_dev);
7497
7498 netdev_upper_dev_unlink(new_dev, dev);
7499 }
7500 EXPORT_SYMBOL(netdev_adjacent_change_abort);
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510 void netdev_bonding_info_change(struct net_device *dev,
7511 struct netdev_bonding_info *bonding_info)
7512 {
7513 struct netdev_notifier_bonding_info info = {
7514 .info.dev = dev,
7515 };
7516
7517 memcpy(&info.bonding_info, bonding_info,
7518 sizeof(struct netdev_bonding_info));
7519 call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
7520 &info.info);
7521 }
7522 EXPORT_SYMBOL(netdev_bonding_info_change);
7523
7524 static void netdev_adjacent_add_links(struct net_device *dev)
7525 {
7526 struct netdev_adjacent *iter;
7527
7528 struct net *net = dev_net(dev);
7529
7530 list_for_each_entry(iter, &dev->adj_list.upper, list) {
7531 if (!net_eq(net, dev_net(iter->dev)))
7532 continue;
7533 netdev_adjacent_sysfs_add(iter->dev, dev,
7534 &iter->dev->adj_list.lower);
7535 netdev_adjacent_sysfs_add(dev, iter->dev,
7536 &dev->adj_list.upper);
7537 }
7538
7539 list_for_each_entry(iter, &dev->adj_list.lower, list) {
7540 if (!net_eq(net, dev_net(iter->dev)))
7541 continue;
7542 netdev_adjacent_sysfs_add(iter->dev, dev,
7543 &iter->dev->adj_list.upper);
7544 netdev_adjacent_sysfs_add(dev, iter->dev,
7545 &dev->adj_list.lower);
7546 }
7547 }
7548
7549 static void netdev_adjacent_del_links(struct net_device *dev)
7550 {
7551 struct netdev_adjacent *iter;
7552
7553 struct net *net = dev_net(dev);
7554
7555 list_for_each_entry(iter, &dev->adj_list.upper, list) {
7556 if (!net_eq(net, dev_net(iter->dev)))
7557 continue;
7558 netdev_adjacent_sysfs_del(iter->dev, dev->name,
7559 &iter->dev->adj_list.lower);
7560 netdev_adjacent_sysfs_del(dev, iter->dev->name,
7561 &dev->adj_list.upper);
7562 }
7563
7564 list_for_each_entry(iter, &dev->adj_list.lower, list) {
7565 if (!net_eq(net, dev_net(iter->dev)))
7566 continue;
7567 netdev_adjacent_sysfs_del(iter->dev, dev->name,
7568 &iter->dev->adj_list.upper);
7569 netdev_adjacent_sysfs_del(dev, iter->dev->name,
7570 &dev->adj_list.lower);
7571 }
7572 }
7573
7574 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
7575 {
7576 struct netdev_adjacent *iter;
7577
7578 struct net *net = dev_net(dev);
7579
7580 list_for_each_entry(iter, &dev->adj_list.upper, list) {
7581 if (!net_eq(net, dev_net(iter->dev)))
7582 continue;
7583 netdev_adjacent_sysfs_del(iter->dev, oldname,
7584 &iter->dev->adj_list.lower);
7585 netdev_adjacent_sysfs_add(iter->dev, dev,
7586 &iter->dev->adj_list.lower);
7587 }
7588
7589 list_for_each_entry(iter, &dev->adj_list.lower, list) {
7590 if (!net_eq(net, dev_net(iter->dev)))
7591 continue;
7592 netdev_adjacent_sysfs_del(iter->dev, oldname,
7593 &iter->dev->adj_list.upper);
7594 netdev_adjacent_sysfs_add(iter->dev, dev,
7595 &iter->dev->adj_list.upper);
7596 }
7597 }
7598
7599 void *netdev_lower_dev_get_private(struct net_device *dev,
7600 struct net_device *lower_dev)
7601 {
7602 struct netdev_adjacent *lower;
7603
7604 if (!lower_dev)
7605 return NULL;
7606 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
7607 if (!lower)
7608 return NULL;
7609
7610 return lower->private;
7611 }
7612 EXPORT_SYMBOL(netdev_lower_dev_get_private);
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623 void netdev_lower_state_changed(struct net_device *lower_dev,
7624 void *lower_state_info)
7625 {
7626 struct netdev_notifier_changelowerstate_info changelowerstate_info = {
7627 .info.dev = lower_dev,
7628 };
7629
7630 ASSERT_RTNL();
7631 changelowerstate_info.lower_state_info = lower_state_info;
7632 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
7633 &changelowerstate_info.info);
7634 }
7635 EXPORT_SYMBOL(netdev_lower_state_changed);
7636
7637 static void dev_change_rx_flags(struct net_device *dev, int flags)
7638 {
7639 const struct net_device_ops *ops = dev->netdev_ops;
7640
7641 if (ops->ndo_change_rx_flags)
7642 ops->ndo_change_rx_flags(dev, flags);
7643 }
7644
7645 static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
7646 {
7647 unsigned int old_flags = dev->flags;
7648 kuid_t uid;
7649 kgid_t gid;
7650
7651 ASSERT_RTNL();
7652
7653 dev->flags |= IFF_PROMISC;
7654 dev->promiscuity += inc;
7655 if (dev->promiscuity == 0) {
7656
7657
7658
7659
7660 if (inc < 0)
7661 dev->flags &= ~IFF_PROMISC;
7662 else {
7663 dev->promiscuity -= inc;
7664 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
7665 dev->name);
7666 return -EOVERFLOW;
7667 }
7668 }
7669 if (dev->flags != old_flags) {
7670 pr_info("device %s %s promiscuous mode\n",
7671 dev->name,
7672 dev->flags & IFF_PROMISC ? "entered" : "left");
7673 if (audit_enabled) {
7674 current_uid_gid(&uid, &gid);
7675 audit_log(audit_context(), GFP_ATOMIC,
7676 AUDIT_ANOM_PROMISCUOUS,
7677 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
7678 dev->name, (dev->flags & IFF_PROMISC),
7679 (old_flags & IFF_PROMISC),
7680 from_kuid(&init_user_ns, audit_get_loginuid(current)),
7681 from_kuid(&init_user_ns, uid),
7682 from_kgid(&init_user_ns, gid),
7683 audit_get_sessionid(current));
7684 }
7685
7686 dev_change_rx_flags(dev, IFF_PROMISC);
7687 }
7688 if (notify)
7689 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
7690 return 0;
7691 }
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704 int dev_set_promiscuity(struct net_device *dev, int inc)
7705 {
7706 unsigned int old_flags = dev->flags;
7707 int err;
7708
7709 err = __dev_set_promiscuity(dev, inc, true);
7710 if (err < 0)
7711 return err;
7712 if (dev->flags != old_flags)
7713 dev_set_rx_mode(dev);
7714 return err;
7715 }
7716 EXPORT_SYMBOL(dev_set_promiscuity);
7717
7718 static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
7719 {
7720 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
7721
7722 ASSERT_RTNL();
7723
7724 dev->flags |= IFF_ALLMULTI;
7725 dev->allmulti += inc;
7726 if (dev->allmulti == 0) {
7727
7728
7729
7730
7731 if (inc < 0)
7732 dev->flags &= ~IFF_ALLMULTI;
7733 else {
7734 dev->allmulti -= inc;
7735 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
7736 dev->name);
7737 return -EOVERFLOW;
7738 }
7739 }
7740 if (dev->flags ^ old_flags) {
7741 dev_change_rx_flags(dev, IFF_ALLMULTI);
7742 dev_set_rx_mode(dev);
7743 if (notify)
7744 __dev_notify_flags(dev, old_flags,
7745 dev->gflags ^ old_gflags);
7746 }
7747 return 0;
7748 }
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763 int dev_set_allmulti(struct net_device *dev, int inc)
7764 {
7765 return __dev_set_allmulti(dev, inc, true);
7766 }
7767 EXPORT_SYMBOL(dev_set_allmulti);
7768
7769
7770
7771
7772
7773
7774
7775 void __dev_set_rx_mode(struct net_device *dev)
7776 {
7777 const struct net_device_ops *ops = dev->netdev_ops;
7778
7779
7780 if (!(dev->flags&IFF_UP))
7781 return;
7782
7783 if (!netif_device_present(dev))
7784 return;
7785
7786 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
7787
7788
7789
7790 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
7791 __dev_set_promiscuity(dev, 1, false);
7792 dev->uc_promisc = true;
7793 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
7794 __dev_set_promiscuity(dev, -1, false);
7795 dev->uc_promisc = false;
7796 }
7797 }
7798
7799 if (ops->ndo_set_rx_mode)
7800 ops->ndo_set_rx_mode(dev);
7801 }
7802
7803 void dev_set_rx_mode(struct net_device *dev)
7804 {
7805 netif_addr_lock_bh(dev);
7806 __dev_set_rx_mode(dev);
7807 netif_addr_unlock_bh(dev);
7808 }
7809
7810
7811
7812
7813
7814
7815
7816 unsigned int dev_get_flags(const struct net_device *dev)
7817 {
7818 unsigned int flags;
7819
7820 flags = (dev->flags & ~(IFF_PROMISC |
7821 IFF_ALLMULTI |
7822 IFF_RUNNING |
7823 IFF_LOWER_UP |
7824 IFF_DORMANT)) |
7825 (dev->gflags & (IFF_PROMISC |
7826 IFF_ALLMULTI));
7827
7828 if (netif_running(dev)) {
7829 if (netif_oper_up(dev))
7830 flags |= IFF_RUNNING;
7831 if (netif_carrier_ok(dev))
7832 flags |= IFF_LOWER_UP;
7833 if (netif_dormant(dev))
7834 flags |= IFF_DORMANT;
7835 }
7836
7837 return flags;
7838 }
7839 EXPORT_SYMBOL(dev_get_flags);
7840
7841 int __dev_change_flags(struct net_device *dev, unsigned int flags,
7842 struct netlink_ext_ack *extack)
7843 {
7844 unsigned int old_flags = dev->flags;
7845 int ret;
7846
7847 ASSERT_RTNL();
7848
7849
7850
7851
7852
7853 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
7854 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
7855 IFF_AUTOMEDIA)) |
7856 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
7857 IFF_ALLMULTI));
7858
7859
7860
7861
7862
7863 if ((old_flags ^ flags) & IFF_MULTICAST)
7864 dev_change_rx_flags(dev, IFF_MULTICAST);
7865
7866 dev_set_rx_mode(dev);
7867
7868
7869
7870
7871
7872
7873
7874 ret = 0;
7875 if ((old_flags ^ flags) & IFF_UP) {
7876 if (old_flags & IFF_UP)
7877 __dev_close(dev);
7878 else
7879 ret = __dev_open(dev, extack);
7880 }
7881
7882 if ((flags ^ dev->gflags) & IFF_PROMISC) {
7883 int inc = (flags & IFF_PROMISC) ? 1 : -1;
7884 unsigned int old_flags = dev->flags;
7885
7886 dev->gflags ^= IFF_PROMISC;
7887
7888 if (__dev_set_promiscuity(dev, inc, false) >= 0)
7889 if (dev->flags != old_flags)
7890 dev_set_rx_mode(dev);
7891 }
7892
7893
7894
7895
7896
7897 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
7898 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
7899
7900 dev->gflags ^= IFF_ALLMULTI;
7901 __dev_set_allmulti(dev, inc, false);
7902 }
7903
7904 return ret;
7905 }
7906
7907 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
7908 unsigned int gchanges)
7909 {
7910 unsigned int changes = dev->flags ^ old_flags;
7911
7912 if (gchanges)
7913 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
7914
7915 if (changes & IFF_UP) {
7916 if (dev->flags & IFF_UP)
7917 call_netdevice_notifiers(NETDEV_UP, dev);
7918 else
7919 call_netdevice_notifiers(NETDEV_DOWN, dev);
7920 }
7921
7922 if (dev->flags & IFF_UP &&
7923 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
7924 struct netdev_notifier_change_info change_info = {
7925 .info = {
7926 .dev = dev,
7927 },
7928 .flags_changed = changes,
7929 };
7930
7931 call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
7932 }
7933 }
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944 int dev_change_flags(struct net_device *dev, unsigned int flags,
7945 struct netlink_ext_ack *extack)
7946 {
7947 int ret;
7948 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
7949
7950 ret = __dev_change_flags(dev, flags, extack);
7951 if (ret < 0)
7952 return ret;
7953
7954 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
7955 __dev_notify_flags(dev, old_flags, changes);
7956 return ret;
7957 }
7958 EXPORT_SYMBOL(dev_change_flags);
7959
7960 int __dev_set_mtu(struct net_device *dev, int new_mtu)
7961 {
7962 const struct net_device_ops *ops = dev->netdev_ops;
7963
7964 if (ops->ndo_change_mtu)
7965 return ops->ndo_change_mtu(dev, new_mtu);
7966
7967
7968 WRITE_ONCE(dev->mtu, new_mtu);
7969 return 0;
7970 }
7971 EXPORT_SYMBOL(__dev_set_mtu);
7972
7973 int dev_validate_mtu(struct net_device *dev, int new_mtu,
7974 struct netlink_ext_ack *extack)
7975 {
7976
7977 if (new_mtu < 0 || new_mtu < dev->min_mtu) {
7978 NL_SET_ERR_MSG(extack, "mtu less than device minimum");
7979 return -EINVAL;
7980 }
7981
7982 if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
7983 NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
7984 return -EINVAL;
7985 }
7986 return 0;
7987 }
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997 int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
7998 struct netlink_ext_ack *extack)
7999 {
8000 int err, orig_mtu;
8001
8002 if (new_mtu == dev->mtu)
8003 return 0;
8004
8005 err = dev_validate_mtu(dev, new_mtu, extack);
8006 if (err)
8007 return err;
8008
8009 if (!netif_device_present(dev))
8010 return -ENODEV;
8011
8012 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
8013 err = notifier_to_errno(err);
8014 if (err)
8015 return err;
8016
8017 orig_mtu = dev->mtu;
8018 err = __dev_set_mtu(dev, new_mtu);
8019
8020 if (!err) {
8021 err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
8022 orig_mtu);
8023 err = notifier_to_errno(err);
8024 if (err) {
8025
8026
8027
8028 __dev_set_mtu(dev, orig_mtu);
8029 call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
8030 new_mtu);
8031 }
8032 }
8033 return err;
8034 }
8035
8036 int dev_set_mtu(struct net_device *dev, int new_mtu)
8037 {
8038 struct netlink_ext_ack extack;
8039 int err;
8040
8041 memset(&extack, 0, sizeof(extack));
8042 err = dev_set_mtu_ext(dev, new_mtu, &extack);
8043 if (err && extack._msg)
8044 net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
8045 return err;
8046 }
8047 EXPORT_SYMBOL(dev_set_mtu);
8048
8049
8050
8051
8052
8053
8054 int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
8055 {
8056 unsigned int orig_len = dev->tx_queue_len;
8057 int res;
8058
8059 if (new_len != (unsigned int)new_len)
8060 return -ERANGE;
8061
8062 if (new_len != orig_len) {
8063 dev->tx_queue_len = new_len;
8064 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
8065 res = notifier_to_errno(res);
8066 if (res)
8067 goto err_rollback;
8068 res = dev_qdisc_change_tx_queue_len(dev);
8069 if (res)
8070 goto err_rollback;
8071 }
8072
8073 return 0;
8074
8075 err_rollback:
8076 netdev_err(dev, "refused to change device tx_queue_len\n");
8077 dev->tx_queue_len = orig_len;
8078 return res;
8079 }
8080
8081
8082
8083
8084
8085
8086 void dev_set_group(struct net_device *dev, int new_group)
8087 {
8088 dev->group = new_group;
8089 }
8090 EXPORT_SYMBOL(dev_set_group);
8091
8092
8093
8094
8095
8096
8097
8098 int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
8099 struct netlink_ext_ack *extack)
8100 {
8101 struct netdev_notifier_pre_changeaddr_info info = {
8102 .info.dev = dev,
8103 .info.extack = extack,
8104 .dev_addr = addr,
8105 };
8106 int rc;
8107
8108 rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
8109 return notifier_to_errno(rc);
8110 }
8111 EXPORT_SYMBOL(dev_pre_changeaddr_notify);
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
8122 struct netlink_ext_ack *extack)
8123 {
8124 const struct net_device_ops *ops = dev->netdev_ops;
8125 int err;
8126
8127 if (!ops->ndo_set_mac_address)
8128 return -EOPNOTSUPP;
8129 if (sa->sa_family != dev->type)
8130 return -EINVAL;
8131 if (!netif_device_present(dev))
8132 return -ENODEV;
8133 err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
8134 if (err)
8135 return err;
8136 err = ops->ndo_set_mac_address(dev, sa);
8137 if (err)
8138 return err;
8139 dev->addr_assign_type = NET_ADDR_SET;
8140 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
8141 add_device_randomness(dev->dev_addr, dev->addr_len);
8142 return 0;
8143 }
8144 EXPORT_SYMBOL(dev_set_mac_address);
8145
8146
8147
8148
8149
8150
8151
8152
8153 int dev_change_carrier(struct net_device *dev, bool new_carrier)
8154 {
8155 const struct net_device_ops *ops = dev->netdev_ops;
8156
8157 if (!ops->ndo_change_carrier)
8158 return -EOPNOTSUPP;
8159 if (!netif_device_present(dev))
8160 return -ENODEV;
8161 return ops->ndo_change_carrier(dev, new_carrier);
8162 }
8163 EXPORT_SYMBOL(dev_change_carrier);
8164
8165
8166
8167
8168
8169
8170
8171
8172 int dev_get_phys_port_id(struct net_device *dev,
8173 struct netdev_phys_item_id *ppid)
8174 {
8175 const struct net_device_ops *ops = dev->netdev_ops;
8176
8177 if (!ops->ndo_get_phys_port_id)
8178 return -EOPNOTSUPP;
8179 return ops->ndo_get_phys_port_id(dev, ppid);
8180 }
8181 EXPORT_SYMBOL(dev_get_phys_port_id);
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191 int dev_get_phys_port_name(struct net_device *dev,
8192 char *name, size_t len)
8193 {
8194 const struct net_device_ops *ops = dev->netdev_ops;
8195 int err;
8196
8197 if (ops->ndo_get_phys_port_name) {
8198 err = ops->ndo_get_phys_port_name(dev, name, len);
8199 if (err != -EOPNOTSUPP)
8200 return err;
8201 }
8202 return devlink_compat_phys_port_name_get(dev, name, len);
8203 }
8204 EXPORT_SYMBOL(dev_get_phys_port_name);
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214 int dev_get_port_parent_id(struct net_device *dev,
8215 struct netdev_phys_item_id *ppid,
8216 bool recurse)
8217 {
8218 const struct net_device_ops *ops = dev->netdev_ops;
8219 struct netdev_phys_item_id first = { };
8220 struct net_device *lower_dev;
8221 struct list_head *iter;
8222 int err;
8223
8224 if (ops->ndo_get_port_parent_id) {
8225 err = ops->ndo_get_port_parent_id(dev, ppid);
8226 if (err != -EOPNOTSUPP)
8227 return err;
8228 }
8229
8230 err = devlink_compat_switch_id_get(dev, ppid);
8231 if (!err || err != -EOPNOTSUPP)
8232 return err;
8233
8234 if (!recurse)
8235 return -EOPNOTSUPP;
8236
8237 netdev_for_each_lower_dev(dev, lower_dev, iter) {
8238 err = dev_get_port_parent_id(lower_dev, ppid, recurse);
8239 if (err)
8240 break;
8241 if (!first.id_len)
8242 first = *ppid;
8243 else if (memcmp(&first, ppid, sizeof(*ppid)))
8244 return -ENODATA;
8245 }
8246
8247 return err;
8248 }
8249 EXPORT_SYMBOL(dev_get_port_parent_id);
8250
8251
8252
8253
8254
8255
8256
8257 bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
8258 {
8259 struct netdev_phys_item_id a_id = { };
8260 struct netdev_phys_item_id b_id = { };
8261
8262 if (dev_get_port_parent_id(a, &a_id, true) ||
8263 dev_get_port_parent_id(b, &b_id, true))
8264 return false;
8265
8266 return netdev_phys_item_id_same(&a_id, &b_id);
8267 }
8268 EXPORT_SYMBOL(netdev_port_same_parent_id);
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278 int dev_change_proto_down(struct net_device *dev, bool proto_down)
8279 {
8280 const struct net_device_ops *ops = dev->netdev_ops;
8281
8282 if (!ops->ndo_change_proto_down)
8283 return -EOPNOTSUPP;
8284 if (!netif_device_present(dev))
8285 return -ENODEV;
8286 return ops->ndo_change_proto_down(dev, proto_down);
8287 }
8288 EXPORT_SYMBOL(dev_change_proto_down);
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298 int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
8299 {
8300 if (proto_down)
8301 netif_carrier_off(dev);
8302 else
8303 netif_carrier_on(dev);
8304 dev->proto_down = proto_down;
8305 return 0;
8306 }
8307 EXPORT_SYMBOL(dev_change_proto_down_generic);
8308
8309 u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
8310 enum bpf_netdev_command cmd)
8311 {
8312 struct netdev_bpf xdp;
8313
8314 if (!bpf_op)
8315 return 0;
8316
8317 memset(&xdp, 0, sizeof(xdp));
8318 xdp.command = cmd;
8319
8320
8321 WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
8322
8323 return xdp.prog_id;
8324 }
8325
8326 static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
8327 struct netlink_ext_ack *extack, u32 flags,
8328 struct bpf_prog *prog)
8329 {
8330 struct netdev_bpf xdp;
8331
8332 memset(&xdp, 0, sizeof(xdp));
8333 if (flags & XDP_FLAGS_HW_MODE)
8334 xdp.command = XDP_SETUP_PROG_HW;
8335 else
8336 xdp.command = XDP_SETUP_PROG;
8337 xdp.extack = extack;
8338 xdp.flags = flags;
8339 xdp.prog = prog;
8340
8341 return bpf_op(dev, &xdp);
8342 }
8343
8344 static void dev_xdp_uninstall(struct net_device *dev)
8345 {
8346 struct netdev_bpf xdp;
8347 bpf_op_t ndo_bpf;
8348
8349
8350 WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
8351
8352
8353 ndo_bpf = dev->netdev_ops->ndo_bpf;
8354 if (!ndo_bpf)
8355 return;
8356
8357 memset(&xdp, 0, sizeof(xdp));
8358 xdp.command = XDP_QUERY_PROG;
8359 WARN_ON(ndo_bpf(dev, &xdp));
8360 if (xdp.prog_id)
8361 WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
8362 NULL));
8363
8364
8365 memset(&xdp, 0, sizeof(xdp));
8366 xdp.command = XDP_QUERY_PROG_HW;
8367 if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
8368 WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
8369 NULL));
8370 }
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
8382 int fd, u32 flags)
8383 {
8384 const struct net_device_ops *ops = dev->netdev_ops;
8385 enum bpf_netdev_command query;
8386 struct bpf_prog *prog = NULL;
8387 bpf_op_t bpf_op, bpf_chk;
8388 bool offload;
8389 int err;
8390
8391 ASSERT_RTNL();
8392
8393 offload = flags & XDP_FLAGS_HW_MODE;
8394 query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
8395
8396 bpf_op = bpf_chk = ops->ndo_bpf;
8397 if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
8398 NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
8399 return -EOPNOTSUPP;
8400 }
8401 if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
8402 bpf_op = generic_xdp_install;
8403 if (bpf_op == bpf_chk)
8404 bpf_chk = generic_xdp_install;
8405
8406 if (fd >= 0) {
8407 u32 prog_id;
8408
8409 if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
8410 NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
8411 return -EEXIST;
8412 }
8413
8414 prog_id = __dev_xdp_query(dev, bpf_op, query);
8415 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
8416 NL_SET_ERR_MSG(extack, "XDP program already attached");
8417 return -EBUSY;
8418 }
8419
8420 prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
8421 bpf_op == ops->ndo_bpf);
8422 if (IS_ERR(prog))
8423 return PTR_ERR(prog);
8424
8425 if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
8426 NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
8427 bpf_prog_put(prog);
8428 return -EINVAL;
8429 }
8430
8431
8432 if (prog->aux->id && prog->aux->id == prog_id) {
8433 bpf_prog_put(prog);
8434 return 0;
8435 }
8436 } else {
8437 if (!__dev_xdp_query(dev, bpf_op, query))
8438 return 0;
8439 }
8440
8441 err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
8442 if (err < 0 && prog)
8443 bpf_prog_put(prog);
8444
8445 return err;
8446 }
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456 static int dev_new_index(struct net *net)
8457 {
8458 int ifindex = net->ifindex;
8459
8460 for (;;) {
8461 if (++ifindex <= 0)
8462 ifindex = 1;
8463 if (!__dev_get_by_index(net, ifindex))
8464 return net->ifindex = ifindex;
8465 }
8466 }
8467
8468
8469 static LIST_HEAD(net_todo_list);
8470 DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
8471
8472 static void net_set_todo(struct net_device *dev)
8473 {
8474 list_add_tail(&dev->todo_list, &net_todo_list);
8475 dev_net(dev)->dev_unreg_count++;
8476 }
8477
8478 static void rollback_registered_many(struct list_head *head)
8479 {
8480 struct net_device *dev, *tmp;
8481 LIST_HEAD(close_head);
8482
8483 BUG_ON(dev_boot_phase);
8484 ASSERT_RTNL();
8485
8486 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
8487
8488
8489
8490
8491 if (dev->reg_state == NETREG_UNINITIALIZED) {
8492 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
8493 dev->name, dev);
8494
8495 WARN_ON(1);
8496 list_del(&dev->unreg_list);
8497 continue;
8498 }
8499 dev->dismantle = true;
8500 BUG_ON(dev->reg_state != NETREG_REGISTERED);
8501 }
8502
8503
8504 list_for_each_entry(dev, head, unreg_list)
8505 list_add_tail(&dev->close_list, &close_head);
8506 dev_close_many(&close_head, true);
8507
8508 list_for_each_entry(dev, head, unreg_list) {
8509
8510 unlist_netdevice(dev);
8511
8512 dev->reg_state = NETREG_UNREGISTERING;
8513 }
8514 flush_all_backlogs();
8515
8516 synchronize_net();
8517
8518 list_for_each_entry(dev, head, unreg_list) {
8519 struct sk_buff *skb = NULL;
8520
8521
8522 dev_shutdown(dev);
8523
8524 dev_xdp_uninstall(dev);
8525
8526
8527
8528
8529 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
8530
8531 if (!dev->rtnl_link_ops ||
8532 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
8533 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
8534 GFP_KERNEL, NULL, 0);
8535
8536
8537
8538
8539 dev_uc_flush(dev);
8540 dev_mc_flush(dev);
8541
8542 if (dev->netdev_ops->ndo_uninit)
8543 dev->netdev_ops->ndo_uninit(dev);
8544
8545 if (skb)
8546 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
8547
8548
8549 WARN_ON(netdev_has_any_upper_dev(dev));
8550 WARN_ON(netdev_has_any_lower_dev(dev));
8551
8552
8553 netdev_unregister_kobject(dev);
8554 #ifdef CONFIG_XPS
8555
8556 netif_reset_xps_queues_gt(dev, 0);
8557 #endif
8558 }
8559
8560 synchronize_net();
8561
8562 list_for_each_entry(dev, head, unreg_list)
8563 dev_put(dev);
8564 }
8565
8566 static void rollback_registered(struct net_device *dev)
8567 {
8568 LIST_HEAD(single);
8569
8570 list_add(&dev->unreg_list, &single);
8571 rollback_registered_many(&single);
8572 list_del(&single);
8573 }
8574
8575 static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
8576 struct net_device *upper, netdev_features_t features)
8577 {
8578 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
8579 netdev_features_t feature;
8580 int feature_bit;
8581
8582 for_each_netdev_feature(upper_disables, feature_bit) {
8583 feature = __NETIF_F_BIT(feature_bit);
8584 if (!(upper->wanted_features & feature)
8585 && (features & feature)) {
8586 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
8587 &feature, upper->name);
8588 features &= ~feature;
8589 }
8590 }
8591
8592 return features;
8593 }
8594
8595 static void netdev_sync_lower_features(struct net_device *upper,
8596 struct net_device *lower, netdev_features_t features)
8597 {
8598 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
8599 netdev_features_t feature;
8600 int feature_bit;
8601
8602 for_each_netdev_feature(upper_disables, feature_bit) {
8603 feature = __NETIF_F_BIT(feature_bit);
8604 if (!(features & feature) && (lower->features & feature)) {
8605 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
8606 &feature, lower->name);
8607 lower->wanted_features &= ~feature;
8608 __netdev_update_features(lower);
8609
8610 if (unlikely(lower->features & feature))
8611 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
8612 &feature, lower->name);
8613 else
8614 netdev_features_change(lower);
8615 }
8616 }
8617 }
8618
8619 static netdev_features_t netdev_fix_features(struct net_device *dev,
8620 netdev_features_t features)
8621 {
8622
8623 if ((features & NETIF_F_HW_CSUM) &&
8624 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
8625 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
8626 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
8627 }
8628
8629
8630 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
8631 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
8632 features &= ~NETIF_F_ALL_TSO;
8633 }
8634
8635 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
8636 !(features & NETIF_F_IP_CSUM)) {
8637 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
8638 features &= ~NETIF_F_TSO;
8639 features &= ~NETIF_F_TSO_ECN;
8640 }
8641
8642 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
8643 !(features & NETIF_F_IPV6_CSUM)) {
8644 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
8645 features &= ~NETIF_F_TSO6;
8646 }
8647
8648
8649 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
8650 features &= ~NETIF_F_TSO_MANGLEID;
8651
8652
8653 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
8654 features &= ~NETIF_F_TSO_ECN;
8655
8656
8657 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
8658 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
8659 features &= ~NETIF_F_GSO;
8660 }
8661
8662
8663 if ((features & dev->gso_partial_features) &&
8664 !(features & NETIF_F_GSO_PARTIAL)) {
8665 netdev_dbg(dev,
8666 "Dropping partially supported GSO features since no GSO partial.\n");
8667 features &= ~dev->gso_partial_features;
8668 }
8669
8670 if (!(features & NETIF_F_RXCSUM)) {
8671
8672
8673
8674
8675
8676 if (features & NETIF_F_GRO_HW) {
8677 netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
8678 features &= ~NETIF_F_GRO_HW;
8679 }
8680 }
8681
8682
8683 if (features & NETIF_F_RXFCS) {
8684 if (features & NETIF_F_LRO) {
8685 netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
8686 features &= ~NETIF_F_LRO;
8687 }
8688
8689 if (features & NETIF_F_GRO_HW) {
8690 netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
8691 features &= ~NETIF_F_GRO_HW;
8692 }
8693 }
8694
8695 return features;
8696 }
8697
8698 int __netdev_update_features(struct net_device *dev)
8699 {
8700 struct net_device *upper, *lower;
8701 netdev_features_t features;
8702 struct list_head *iter;
8703 int err = -1;
8704
8705 ASSERT_RTNL();
8706
8707 features = netdev_get_wanted_features(dev);
8708
8709 if (dev->netdev_ops->ndo_fix_features)
8710 features = dev->netdev_ops->ndo_fix_features(dev, features);
8711
8712
8713 features = netdev_fix_features(dev, features);
8714
8715
8716 netdev_for_each_upper_dev_rcu(dev, upper, iter)
8717 features = netdev_sync_upper_features(dev, upper, features);
8718
8719 if (dev->features == features)
8720 goto sync_lower;
8721
8722 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
8723 &dev->features, &features);
8724
8725 if (dev->netdev_ops->ndo_set_features)
8726 err = dev->netdev_ops->ndo_set_features(dev, features);
8727 else
8728 err = 0;
8729
8730 if (unlikely(err < 0)) {
8731 netdev_err(dev,
8732 "set_features() failed (%d); wanted %pNF, left %pNF\n",
8733 err, &features, &dev->features);
8734
8735
8736
8737 return -1;
8738 }
8739
8740 sync_lower:
8741
8742
8743
8744 netdev_for_each_lower_dev(dev, lower, iter)
8745 netdev_sync_lower_features(dev, lower, features);
8746
8747 if (!err) {
8748 netdev_features_t diff = features ^ dev->features;
8749
8750 if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
8751
8752
8753
8754
8755
8756
8757
8758 if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
8759 dev->features = features;
8760 udp_tunnel_get_rx_info(dev);
8761 } else {
8762 udp_tunnel_drop_rx_info(dev);
8763 }
8764 }
8765
8766 if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
8767 if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
8768 dev->features = features;
8769 err |= vlan_get_rx_ctag_filter_info(dev);
8770 } else {
8771 vlan_drop_rx_ctag_filter_info(dev);
8772 }
8773 }
8774
8775 if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
8776 if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
8777 dev->features = features;
8778 err |= vlan_get_rx_stag_filter_info(dev);
8779 } else {
8780 vlan_drop_rx_stag_filter_info(dev);
8781 }
8782 }
8783
8784 dev->features = features;
8785 }
8786
8787 return err < 0 ? 0 : 1;
8788 }
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798 void netdev_update_features(struct net_device *dev)
8799 {
8800 if (__netdev_update_features(dev))
8801 netdev_features_change(dev);
8802 }
8803 EXPORT_SYMBOL(netdev_update_features);
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815 void netdev_change_features(struct net_device *dev)
8816 {
8817 __netdev_update_features(dev);
8818 netdev_features_change(dev);
8819 }
8820 EXPORT_SYMBOL(netdev_change_features);
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
8832 struct net_device *dev)
8833 {
8834 if (rootdev->operstate == IF_OPER_DORMANT)
8835 netif_dormant_on(dev);
8836 else
8837 netif_dormant_off(dev);
8838
8839 if (netif_carrier_ok(rootdev))
8840 netif_carrier_on(dev);
8841 else
8842 netif_carrier_off(dev);
8843 }
8844 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
8845
8846 static int netif_alloc_rx_queues(struct net_device *dev)
8847 {
8848 unsigned int i, count = dev->num_rx_queues;
8849 struct netdev_rx_queue *rx;
8850 size_t sz = count * sizeof(*rx);
8851 int err = 0;
8852
8853 BUG_ON(count < 1);
8854
8855 rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
8856 if (!rx)
8857 return -ENOMEM;
8858
8859 dev->_rx = rx;
8860
8861 for (i = 0; i < count; i++) {
8862 rx[i].dev = dev;
8863
8864
8865 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
8866 if (err < 0)
8867 goto err_rxq_info;
8868 }
8869 return 0;
8870
8871 err_rxq_info:
8872
8873 while (i--)
8874 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
8875 kvfree(dev->_rx);
8876 dev->_rx = NULL;
8877 return err;
8878 }
8879
8880 static void netif_free_rx_queues(struct net_device *dev)
8881 {
8882 unsigned int i, count = dev->num_rx_queues;
8883
8884
8885 if (!dev->_rx)
8886 return;
8887
8888 for (i = 0; i < count; i++)
8889 xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
8890
8891 kvfree(dev->_rx);
8892 }
8893
8894 static void netdev_init_one_queue(struct net_device *dev,
8895 struct netdev_queue *queue, void *_unused)
8896 {
8897
8898 spin_lock_init(&queue->_xmit_lock);
8899 lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
8900 queue->xmit_lock_owner = -1;
8901 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
8902 queue->dev = dev;
8903 #ifdef CONFIG_BQL
8904 dql_init(&queue->dql, HZ);
8905 #endif
8906 }
8907
8908 static void netif_free_tx_queues(struct net_device *dev)
8909 {
8910 kvfree(dev->_tx);
8911 }
8912
8913 static int netif_alloc_netdev_queues(struct net_device *dev)
8914 {
8915 unsigned int count = dev->num_tx_queues;
8916 struct netdev_queue *tx;
8917 size_t sz = count * sizeof(*tx);
8918
8919 if (count < 1 || count > 0xffff)
8920 return -EINVAL;
8921
8922 tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
8923 if (!tx)
8924 return -ENOMEM;
8925
8926 dev->_tx = tx;
8927
8928 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
8929 spin_lock_init(&dev->tx_global_lock);
8930
8931 return 0;
8932 }
8933
8934 void netif_tx_stop_all_queues(struct net_device *dev)
8935 {
8936 unsigned int i;
8937
8938 for (i = 0; i < dev->num_tx_queues; i++) {
8939 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
8940
8941 netif_tx_stop_queue(txq);
8942 }
8943 }
8944 EXPORT_SYMBOL(netif_tx_stop_all_queues);
8945
8946 static void netdev_register_lockdep_key(struct net_device *dev)
8947 {
8948 lockdep_register_key(&dev->qdisc_tx_busylock_key);
8949 lockdep_register_key(&dev->qdisc_running_key);
8950 lockdep_register_key(&dev->qdisc_xmit_lock_key);
8951 lockdep_register_key(&dev->addr_list_lock_key);
8952 }
8953
8954 static void netdev_unregister_lockdep_key(struct net_device *dev)
8955 {
8956 lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
8957 lockdep_unregister_key(&dev->qdisc_running_key);
8958 lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
8959 lockdep_unregister_key(&dev->addr_list_lock_key);
8960 }
8961
8962 void netdev_update_lockdep_key(struct net_device *dev)
8963 {
8964 lockdep_unregister_key(&dev->addr_list_lock_key);
8965 lockdep_register_key(&dev->addr_list_lock_key);
8966
8967 lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
8968 }
8969 EXPORT_SYMBOL(netdev_update_lockdep_key);
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988 int register_netdevice(struct net_device *dev)
8989 {
8990 int ret;
8991 struct net *net = dev_net(dev);
8992
8993 BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
8994 NETDEV_FEATURE_COUNT);
8995 BUG_ON(dev_boot_phase);
8996 ASSERT_RTNL();
8997
8998 might_sleep();
8999
9000
9001 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
9002 BUG_ON(!net);
9003
9004 spin_lock_init(&dev->addr_list_lock);
9005 lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
9006
9007 ret = dev_get_valid_name(net, dev, dev->name);
9008 if (ret < 0)
9009 goto out;
9010
9011
9012 if (dev->netdev_ops->ndo_init) {
9013 ret = dev->netdev_ops->ndo_init(dev);
9014 if (ret) {
9015 if (ret > 0)
9016 ret = -EIO;
9017 goto out;
9018 }
9019 }
9020
9021 if (((dev->hw_features | dev->features) &
9022 NETIF_F_HW_VLAN_CTAG_FILTER) &&
9023 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
9024 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
9025 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
9026 ret = -EINVAL;
9027 goto err_uninit;
9028 }
9029
9030 ret = -EBUSY;
9031 if (!dev->ifindex)
9032 dev->ifindex = dev_new_index(net);
9033 else if (__dev_get_by_index(net, dev->ifindex))
9034 goto err_uninit;
9035
9036
9037
9038
9039 dev->hw_features |= NETIF_F_SOFT_FEATURES;
9040 dev->features |= NETIF_F_SOFT_FEATURES;
9041
9042 if (dev->netdev_ops->ndo_udp_tunnel_add) {
9043 dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
9044 dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
9045 }
9046
9047 dev->wanted_features = dev->features & dev->hw_features;
9048
9049 if (!(dev->flags & IFF_LOOPBACK))
9050 dev->hw_features |= NETIF_F_NOCACHE_COPY;
9051
9052
9053
9054
9055
9056
9057 if (dev->hw_features & NETIF_F_TSO)
9058 dev->hw_features |= NETIF_F_TSO_MANGLEID;
9059 if (dev->vlan_features & NETIF_F_TSO)
9060 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
9061 if (dev->mpls_features & NETIF_F_TSO)
9062 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
9063 if (dev->hw_enc_features & NETIF_F_TSO)
9064 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
9065
9066
9067
9068 dev->vlan_features |= NETIF_F_HIGHDMA;
9069
9070
9071
9072 dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
9073
9074
9075
9076 dev->mpls_features |= NETIF_F_SG;
9077
9078 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
9079 ret = notifier_to_errno(ret);
9080 if (ret)
9081 goto err_uninit;
9082
9083 ret = netdev_register_kobject(dev);
9084 if (ret) {
9085 dev->reg_state = NETREG_UNREGISTERED;
9086 goto err_uninit;
9087 }
9088 dev->reg_state = NETREG_REGISTERED;
9089
9090 __netdev_update_features(dev);
9091
9092
9093
9094
9095
9096
9097 set_bit(__LINK_STATE_PRESENT, &dev->state);
9098
9099 linkwatch_init_dev(dev);
9100
9101 dev_init_scheduler(dev);
9102 dev_hold(dev);
9103 list_netdevice(dev);
9104 add_device_randomness(dev->dev_addr, dev->addr_len);
9105
9106
9107
9108
9109
9110 if (dev->addr_assign_type == NET_ADDR_PERM)
9111 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
9112
9113
9114 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
9115 ret = notifier_to_errno(ret);
9116 if (ret) {
9117 rollback_registered(dev);
9118 rcu_barrier();
9119
9120 dev->reg_state = NETREG_UNREGISTERED;
9121 }
9122
9123
9124
9125
9126 if (!dev->rtnl_link_ops ||
9127 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
9128 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
9129
9130 out:
9131 return ret;
9132
9133 err_uninit:
9134 if (dev->netdev_ops->ndo_uninit)
9135 dev->netdev_ops->ndo_uninit(dev);
9136 if (dev->priv_destructor)
9137 dev->priv_destructor(dev);
9138 goto out;
9139 }
9140 EXPORT_SYMBOL(register_netdevice);
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152 int init_dummy_netdev(struct net_device *dev)
9153 {
9154
9155
9156
9157
9158
9159 memset(dev, 0, sizeof(struct net_device));
9160
9161
9162
9163
9164 dev->reg_state = NETREG_DUMMY;
9165
9166
9167 INIT_LIST_HEAD(&dev->napi_list);
9168
9169
9170 set_bit(__LINK_STATE_PRESENT, &dev->state);
9171 set_bit(__LINK_STATE_START, &dev->state);
9172
9173
9174 dev_net_set(dev, &init_net);
9175
9176
9177
9178
9179
9180
9181 return 0;
9182 }
9183 EXPORT_SYMBOL_GPL(init_dummy_netdev);
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199 int register_netdev(struct net_device *dev)
9200 {
9201 int err;
9202
9203 if (rtnl_lock_killable())
9204 return -EINTR;
9205 err = register_netdevice(dev);
9206 rtnl_unlock();
9207 return err;
9208 }
9209 EXPORT_SYMBOL(register_netdev);
9210
9211 int netdev_refcnt_read(const struct net_device *dev)
9212 {
9213 int i, refcnt = 0;
9214
9215 for_each_possible_cpu(i)
9216 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
9217 return refcnt;
9218 }
9219 EXPORT_SYMBOL(netdev_refcnt_read);
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233 static void netdev_wait_allrefs(struct net_device *dev)
9234 {
9235 unsigned long rebroadcast_time, warning_time;
9236 int refcnt;
9237
9238 linkwatch_forget_dev(dev);
9239
9240 rebroadcast_time = warning_time = jiffies;
9241 refcnt = netdev_refcnt_read(dev);
9242
9243 while (refcnt != 0) {
9244 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
9245 rtnl_lock();
9246
9247
9248 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
9249
9250 __rtnl_unlock();
9251 rcu_barrier();
9252 rtnl_lock();
9253
9254 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
9255 &dev->state)) {
9256
9257
9258
9259
9260
9261
9262 linkwatch_run_queue();
9263 }
9264
9265 __rtnl_unlock();
9266
9267 rebroadcast_time = jiffies;
9268 }
9269
9270 msleep(250);
9271
9272 refcnt = netdev_refcnt_read(dev);
9273
9274 if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
9275 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
9276 dev->name, refcnt);
9277 warning_time = jiffies;
9278 }
9279 }
9280 }
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306 void netdev_run_todo(void)
9307 {
9308 struct list_head list;
9309
9310
9311 list_replace_init(&net_todo_list, &list);
9312
9313 __rtnl_unlock();
9314
9315
9316
9317 if (!list_empty(&list))
9318 rcu_barrier();
9319
9320 while (!list_empty(&list)) {
9321 struct net_device *dev
9322 = list_first_entry(&list, struct net_device, todo_list);
9323 list_del(&dev->todo_list);
9324
9325 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
9326 pr_err("network todo '%s' but state %d\n",
9327 dev->name, dev->reg_state);
9328 dump_stack();
9329 continue;
9330 }
9331
9332 dev->reg_state = NETREG_UNREGISTERED;
9333
9334 netdev_wait_allrefs(dev);
9335
9336
9337 BUG_ON(netdev_refcnt_read(dev));
9338 BUG_ON(!list_empty(&dev->ptype_all));
9339 BUG_ON(!list_empty(&dev->ptype_specific));
9340 WARN_ON(rcu_access_pointer(dev->ip_ptr));
9341 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
9342 #if IS_ENABLED(CONFIG_DECNET)
9343 WARN_ON(dev->dn_ptr);
9344 #endif
9345 if (dev->priv_destructor)
9346 dev->priv_destructor(dev);
9347 if (dev->needs_free_netdev)
9348 free_netdev(dev);
9349
9350
9351 rtnl_lock();
9352 dev_net(dev)->dev_unreg_count--;
9353 __rtnl_unlock();
9354 wake_up(&netdev_unregistering_wq);
9355
9356
9357 kobject_put(&dev->dev.kobj);
9358 }
9359 }
9360
9361
9362
9363
9364
9365
9366 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
9367 const struct net_device_stats *netdev_stats)
9368 {
9369 #if BITS_PER_LONG == 64
9370 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
9371 memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
9372
9373 memset((char *)stats64 + sizeof(*netdev_stats), 0,
9374 sizeof(*stats64) - sizeof(*netdev_stats));
9375 #else
9376 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
9377 const unsigned long *src = (const unsigned long *)netdev_stats;
9378 u64 *dst = (u64 *)stats64;
9379
9380 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
9381 for (i = 0; i < n; i++)
9382 dst[i] = src[i];
9383
9384 memset((char *)stats64 + n * sizeof(u64), 0,
9385 sizeof(*stats64) - n * sizeof(u64));
9386 #endif
9387 }
9388 EXPORT_SYMBOL(netdev_stats_to_stats64);
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
9401 struct rtnl_link_stats64 *storage)
9402 {
9403 const struct net_device_ops *ops = dev->netdev_ops;
9404
9405 if (ops->ndo_get_stats64) {
9406 memset(storage, 0, sizeof(*storage));
9407 ops->ndo_get_stats64(dev, storage);
9408 } else if (ops->ndo_get_stats) {
9409 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
9410 } else {
9411 netdev_stats_to_stats64(storage, &dev->stats);
9412 }
9413 storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
9414 storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
9415 storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
9416 return storage;
9417 }
9418 EXPORT_SYMBOL(dev_get_stats);
9419
9420 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
9421 {
9422 struct netdev_queue *queue = dev_ingress_queue(dev);
9423
9424 #ifdef CONFIG_NET_CLS_ACT
9425 if (queue)
9426 return queue;
9427 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
9428 if (!queue)
9429 return NULL;
9430 netdev_init_one_queue(dev, queue, NULL);
9431 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
9432 queue->qdisc_sleeping = &noop_qdisc;
9433 rcu_assign_pointer(dev->ingress_queue, queue);
9434 #endif
9435 return queue;
9436 }
9437
9438 static const struct ethtool_ops default_ethtool_ops;
9439
9440 void netdev_set_default_ethtool_ops(struct net_device *dev,
9441 const struct ethtool_ops *ops)
9442 {
9443 if (dev->ethtool_ops == &default_ethtool_ops)
9444 dev->ethtool_ops = ops;
9445 }
9446 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
9447
9448 void netdev_freemem(struct net_device *dev)
9449 {
9450 char *addr = (char *)dev - dev->padded;
9451
9452 kvfree(addr);
9453 }
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
9469 unsigned char name_assign_type,
9470 void (*setup)(struct net_device *),
9471 unsigned int txqs, unsigned int rxqs)
9472 {
9473 struct net_device *dev;
9474 unsigned int alloc_size;
9475 struct net_device *p;
9476
9477 BUG_ON(strlen(name) >= sizeof(dev->name));
9478
9479 if (txqs < 1) {
9480 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
9481 return NULL;
9482 }
9483
9484 if (rxqs < 1) {
9485 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
9486 return NULL;
9487 }
9488
9489 alloc_size = sizeof(struct net_device);
9490 if (sizeof_priv) {
9491
9492 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
9493 alloc_size += sizeof_priv;
9494 }
9495
9496 alloc_size += NETDEV_ALIGN - 1;
9497
9498 p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
9499 if (!p)
9500 return NULL;
9501
9502 dev = PTR_ALIGN(p, NETDEV_ALIGN);
9503 dev->padded = (char *)dev - (char *)p;
9504
9505 dev->pcpu_refcnt = alloc_percpu(int);
9506 if (!dev->pcpu_refcnt)
9507 goto free_dev;
9508
9509 if (dev_addr_init(dev))
9510 goto free_pcpu;
9511
9512 dev_mc_init(dev);
9513 dev_uc_init(dev);
9514
9515 dev_net_set(dev, &init_net);
9516
9517 netdev_register_lockdep_key(dev);
9518
9519 dev->gso_max_size = GSO_MAX_SIZE;
9520 dev->gso_max_segs = GSO_MAX_SEGS;
9521 dev->upper_level = 1;
9522 dev->lower_level = 1;
9523
9524 INIT_LIST_HEAD(&dev->napi_list);
9525 INIT_LIST_HEAD(&dev->unreg_list);
9526 INIT_LIST_HEAD(&dev->close_list);
9527 INIT_LIST_HEAD(&dev->link_watch_list);
9528 INIT_LIST_HEAD(&dev->adj_list.upper);
9529 INIT_LIST_HEAD(&dev->adj_list.lower);
9530 INIT_LIST_HEAD(&dev->ptype_all);
9531 INIT_LIST_HEAD(&dev->ptype_specific);
9532 #ifdef CONFIG_NET_SCHED
9533 hash_init(dev->qdisc_hash);
9534 #endif
9535 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
9536 setup(dev);
9537
9538 if (!dev->tx_queue_len) {
9539 dev->priv_flags |= IFF_NO_QUEUE;
9540 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
9541 }
9542
9543 dev->num_tx_queues = txqs;
9544 dev->real_num_tx_queues = txqs;
9545 if (netif_alloc_netdev_queues(dev))
9546 goto free_all;
9547
9548 dev->num_rx_queues = rxqs;
9549 dev->real_num_rx_queues = rxqs;
9550 if (netif_alloc_rx_queues(dev))
9551 goto free_all;
9552
9553 strcpy(dev->name, name);
9554 dev->name_assign_type = name_assign_type;
9555 dev->group = INIT_NETDEV_GROUP;
9556 if (!dev->ethtool_ops)
9557 dev->ethtool_ops = &default_ethtool_ops;
9558
9559 nf_hook_ingress_init(dev);
9560
9561 return dev;
9562
9563 free_all:
9564 free_netdev(dev);
9565 return NULL;
9566
9567 free_pcpu:
9568 free_percpu(dev->pcpu_refcnt);
9569 free_dev:
9570 netdev_freemem(dev);
9571 return NULL;
9572 }
9573 EXPORT_SYMBOL(alloc_netdev_mqs);
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584 void free_netdev(struct net_device *dev)
9585 {
9586 struct napi_struct *p, *n;
9587
9588 might_sleep();
9589 netif_free_tx_queues(dev);
9590 netif_free_rx_queues(dev);
9591
9592 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
9593
9594
9595 dev_addr_flush(dev);
9596
9597 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
9598 netif_napi_del(p);
9599
9600 free_percpu(dev->pcpu_refcnt);
9601 dev->pcpu_refcnt = NULL;
9602
9603 netdev_unregister_lockdep_key(dev);
9604
9605
9606 if (dev->reg_state == NETREG_UNINITIALIZED) {
9607 netdev_freemem(dev);
9608 return;
9609 }
9610
9611 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
9612 dev->reg_state = NETREG_RELEASED;
9613
9614
9615 put_device(&dev->dev);
9616 }
9617 EXPORT_SYMBOL(free_netdev);
9618
9619
9620
9621
9622
9623
9624
9625 void synchronize_net(void)
9626 {
9627 might_sleep();
9628 if (rtnl_is_locked())
9629 synchronize_rcu_expedited();
9630 else
9631 synchronize_rcu();
9632 }
9633 EXPORT_SYMBOL(synchronize_net);
9634
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
9649 {
9650 ASSERT_RTNL();
9651
9652 if (head) {
9653 list_move_tail(&dev->unreg_list, head);
9654 } else {
9655 rollback_registered(dev);
9656
9657 net_set_todo(dev);
9658 }
9659 }
9660 EXPORT_SYMBOL(unregister_netdevice_queue);
9661
9662
9663
9664
9665
9666
9667
9668
9669 void unregister_netdevice_many(struct list_head *head)
9670 {
9671 struct net_device *dev;
9672
9673 if (!list_empty(head)) {
9674 rollback_registered_many(head);
9675 list_for_each_entry(dev, head, unreg_list)
9676 net_set_todo(dev);
9677 list_del(head);
9678 }
9679 }
9680 EXPORT_SYMBOL(unregister_netdevice_many);
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693 void unregister_netdev(struct net_device *dev)
9694 {
9695 rtnl_lock();
9696 unregister_netdevice(dev);
9697 rtnl_unlock();
9698 }
9699 EXPORT_SYMBOL(unregister_netdev);
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
9716 {
9717 int err, new_nsid, new_ifindex;
9718
9719 ASSERT_RTNL();
9720
9721
9722 err = -EINVAL;
9723 if (dev->features & NETIF_F_NETNS_LOCAL)
9724 goto out;
9725
9726
9727 if (dev->reg_state != NETREG_REGISTERED)
9728 goto out;
9729
9730
9731 err = 0;
9732 if (net_eq(dev_net(dev), net))
9733 goto out;
9734
9735
9736
9737
9738 err = -EEXIST;
9739 if (__dev_get_by_name(net, dev->name)) {
9740
9741 if (!pat)
9742 goto out;
9743 err = dev_get_valid_name(net, dev, pat);
9744 if (err < 0)
9745 goto out;
9746 }
9747
9748
9749
9750
9751
9752
9753 dev_close(dev);
9754
9755
9756 unlist_netdevice(dev);
9757
9758 synchronize_net();
9759
9760
9761 dev_shutdown(dev);
9762
9763
9764
9765
9766
9767
9768
9769
9770 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
9771 rcu_barrier();
9772
9773 new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
9774
9775 if (__dev_get_by_index(net, dev->ifindex))
9776 new_ifindex = dev_new_index(net);
9777 else
9778 new_ifindex = dev->ifindex;
9779
9780 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
9781 new_ifindex);
9782
9783
9784
9785
9786 dev_uc_flush(dev);
9787 dev_mc_flush(dev);
9788
9789
9790 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
9791 netdev_adjacent_del_links(dev);
9792
9793
9794 dev_net_set(dev, net);
9795 dev->ifindex = new_ifindex;
9796
9797
9798 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
9799 netdev_adjacent_add_links(dev);
9800
9801
9802 err = device_rename(&dev->dev, dev->name);
9803 WARN_ON(err);
9804
9805
9806 list_netdevice(dev);
9807
9808
9809 call_netdevice_notifiers(NETDEV_REGISTER, dev);
9810
9811
9812
9813
9814
9815 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
9816
9817 synchronize_net();
9818 err = 0;
9819 out:
9820 return err;
9821 }
9822 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
9823
9824 static int dev_cpu_dead(unsigned int oldcpu)
9825 {
9826 struct sk_buff **list_skb;
9827 struct sk_buff *skb;
9828 unsigned int cpu;
9829 struct softnet_data *sd, *oldsd, *remsd = NULL;
9830
9831 local_irq_disable();
9832 cpu = smp_processor_id();
9833 sd = &per_cpu(softnet_data, cpu);
9834 oldsd = &per_cpu(softnet_data, oldcpu);
9835
9836
9837 list_skb = &sd->completion_queue;
9838 while (*list_skb)
9839 list_skb = &(*list_skb)->next;
9840
9841 *list_skb = oldsd->completion_queue;
9842 oldsd->completion_queue = NULL;
9843
9844
9845 if (oldsd->output_queue) {
9846 *sd->output_queue_tailp = oldsd->output_queue;
9847 sd->output_queue_tailp = oldsd->output_queue_tailp;
9848 oldsd->output_queue = NULL;
9849 oldsd->output_queue_tailp = &oldsd->output_queue;
9850 }
9851
9852
9853
9854
9855 while (!list_empty(&oldsd->poll_list)) {
9856 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
9857 struct napi_struct,
9858 poll_list);
9859
9860 list_del_init(&napi->poll_list);
9861 if (napi->poll == process_backlog)
9862 napi->state = 0;
9863 else
9864 ____napi_schedule(sd, napi);
9865 }
9866
9867 raise_softirq_irqoff(NET_TX_SOFTIRQ);
9868 local_irq_enable();
9869
9870 #ifdef CONFIG_RPS
9871 remsd = oldsd->rps_ipi_list;
9872 oldsd->rps_ipi_list = NULL;
9873 #endif
9874
9875 net_rps_send_ipi(remsd);
9876
9877
9878 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
9879 netif_rx_ni(skb);
9880 input_queue_head_incr(oldsd);
9881 }
9882 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
9883 netif_rx_ni(skb);
9884 input_queue_head_incr(oldsd);
9885 }
9886
9887 return 0;
9888 }
9889
9890
9891
9892
9893
9894
9895
9896
9897
9898
9899
9900 netdev_features_t netdev_increment_features(netdev_features_t all,
9901 netdev_features_t one, netdev_features_t mask)
9902 {
9903 if (mask & NETIF_F_HW_CSUM)
9904 mask |= NETIF_F_CSUM_MASK;
9905 mask |= NETIF_F_VLAN_CHALLENGED;
9906
9907 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
9908 all &= one | ~NETIF_F_ALL_FOR_ALL;
9909
9910
9911 if (all & NETIF_F_HW_CSUM)
9912 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
9913
9914 return all;
9915 }
9916 EXPORT_SYMBOL(netdev_increment_features);
9917
9918 static struct hlist_head * __net_init netdev_create_hash(void)
9919 {
9920 int i;
9921 struct hlist_head *hash;
9922
9923 hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
9924 if (hash != NULL)
9925 for (i = 0; i < NETDEV_HASHENTRIES; i++)
9926 INIT_HLIST_HEAD(&hash[i]);
9927
9928 return hash;
9929 }
9930
9931
9932 static int __net_init netdev_init(struct net *net)
9933 {
9934 BUILD_BUG_ON(GRO_HASH_BUCKETS >
9935 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask));
9936
9937 if (net != &init_net)
9938 INIT_LIST_HEAD(&net->dev_base_head);
9939
9940 net->dev_name_head = netdev_create_hash();
9941 if (net->dev_name_head == NULL)
9942 goto err_name;
9943
9944 net->dev_index_head = netdev_create_hash();
9945 if (net->dev_index_head == NULL)
9946 goto err_idx;
9947
9948 return 0;
9949
9950 err_idx:
9951 kfree(net->dev_name_head);
9952 err_name:
9953 return -ENOMEM;
9954 }
9955
9956
9957
9958
9959
9960
9961
9962 const char *netdev_drivername(const struct net_device *dev)
9963 {
9964 const struct device_driver *driver;
9965 const struct device *parent;
9966 const char *empty = "";
9967
9968 parent = dev->dev.parent;
9969 if (!parent)
9970 return empty;
9971
9972 driver = parent->driver;
9973 if (driver && driver->name)
9974 return driver->name;
9975 return empty;
9976 }
9977
9978 static void __netdev_printk(const char *level, const struct net_device *dev,
9979 struct va_format *vaf)
9980 {
9981 if (dev && dev->dev.parent) {
9982 dev_printk_emit(level[1] - '0',
9983 dev->dev.parent,
9984 "%s %s %s%s: %pV",
9985 dev_driver_string(dev->dev.parent),
9986 dev_name(dev->dev.parent),
9987 netdev_name(dev), netdev_reg_state(dev),
9988 vaf);
9989 } else if (dev) {
9990 printk("%s%s%s: %pV",
9991 level, netdev_name(dev), netdev_reg_state(dev), vaf);
9992 } else {
9993 printk("%s(NULL net_device): %pV", level, vaf);
9994 }
9995 }
9996
9997 void netdev_printk(const char *level, const struct net_device *dev,
9998 const char *format, ...)
9999 {
10000 struct va_format vaf;
10001 va_list args;
10002
10003 va_start(args, format);
10004
10005 vaf.fmt = format;
10006 vaf.va = &args;
10007
10008 __netdev_printk(level, dev, &vaf);
10009
10010 va_end(args);
10011 }
10012 EXPORT_SYMBOL(netdev_printk);
10013
10014 #define define_netdev_printk_level(func, level) \
10015 void func(const struct net_device *dev, const char *fmt, ...) \
10016 { \
10017 struct va_format vaf; \
10018 va_list args; \
10019 \
10020 va_start(args, fmt); \
10021 \
10022 vaf.fmt = fmt; \
10023 vaf.va = &args; \
10024 \
10025 __netdev_printk(level, dev, &vaf); \
10026 \
10027 va_end(args); \
10028 } \
10029 EXPORT_SYMBOL(func);
10030
10031 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
10032 define_netdev_printk_level(netdev_alert, KERN_ALERT);
10033 define_netdev_printk_level(netdev_crit, KERN_CRIT);
10034 define_netdev_printk_level(netdev_err, KERN_ERR);
10035 define_netdev_printk_level(netdev_warn, KERN_WARNING);
10036 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
10037 define_netdev_printk_level(netdev_info, KERN_INFO);
10038
10039 static void __net_exit netdev_exit(struct net *net)
10040 {
10041 kfree(net->dev_name_head);
10042 kfree(net->dev_index_head);
10043 if (net != &init_net)
10044 WARN_ON_ONCE(!list_empty(&net->dev_base_head));
10045 }
10046
10047 static struct pernet_operations __net_initdata netdev_net_ops = {
10048 .init = netdev_init,
10049 .exit = netdev_exit,
10050 };
10051
10052 static void __net_exit default_device_exit(struct net *net)
10053 {
10054 struct net_device *dev, *aux;
10055
10056
10057
10058
10059 rtnl_lock();
10060 for_each_netdev_safe(net, dev, aux) {
10061 int err;
10062 char fb_name[IFNAMSIZ];
10063
10064
10065 if (dev->features & NETIF_F_NETNS_LOCAL)
10066 continue;
10067
10068
10069 if (dev->rtnl_link_ops)
10070 continue;
10071
10072
10073 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
10074 if (__dev_get_by_name(&init_net, fb_name))
10075 snprintf(fb_name, IFNAMSIZ, "dev%%d");
10076 err = dev_change_net_namespace(dev, &init_net, fb_name);
10077 if (err) {
10078 pr_emerg("%s: failed to move %s to init_net: %d\n",
10079 __func__, dev->name, err);
10080 BUG();
10081 }
10082 }
10083 rtnl_unlock();
10084 }
10085
10086 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
10087 {
10088
10089
10090
10091 struct net *net;
10092 bool unregistering;
10093 DEFINE_WAIT_FUNC(wait, woken_wake_function);
10094
10095 add_wait_queue(&netdev_unregistering_wq, &wait);
10096 for (;;) {
10097 unregistering = false;
10098 rtnl_lock();
10099 list_for_each_entry(net, net_list, exit_list) {
10100 if (net->dev_unreg_count > 0) {
10101 unregistering = true;
10102 break;
10103 }
10104 }
10105 if (!unregistering)
10106 break;
10107 __rtnl_unlock();
10108
10109 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
10110 }
10111 remove_wait_queue(&netdev_unregistering_wq, &wait);
10112 }
10113
10114 static void __net_exit default_device_exit_batch(struct list_head *net_list)
10115 {
10116
10117
10118
10119
10120
10121 struct net_device *dev;
10122 struct net *net;
10123 LIST_HEAD(dev_kill_list);
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136 rtnl_lock_unregistering(net_list);
10137 list_for_each_entry(net, net_list, exit_list) {
10138 for_each_netdev_reverse(net, dev) {
10139 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
10140 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
10141 else
10142 unregister_netdevice_queue(dev, &dev_kill_list);
10143 }
10144 }
10145 unregister_netdevice_many(&dev_kill_list);
10146 rtnl_unlock();
10147 }
10148
10149 static struct pernet_operations __net_initdata default_device_ops = {
10150 .exit = default_device_exit,
10151 .exit_batch = default_device_exit_batch,
10152 };
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165 static int __init net_dev_init(void)
10166 {
10167 int i, rc = -ENOMEM;
10168
10169 BUG_ON(!dev_boot_phase);
10170
10171 if (dev_proc_init())
10172 goto out;
10173
10174 if (netdev_kobject_init())
10175 goto out;
10176
10177 INIT_LIST_HEAD(&ptype_all);
10178 for (i = 0; i < PTYPE_HASH_SIZE; i++)
10179 INIT_LIST_HEAD(&ptype_base[i]);
10180
10181 INIT_LIST_HEAD(&offload_base);
10182
10183 if (register_pernet_subsys(&netdev_net_ops))
10184 goto out;
10185
10186
10187
10188
10189
10190 for_each_possible_cpu(i) {
10191 struct work_struct *flush = per_cpu_ptr(&flush_works, i);
10192 struct softnet_data *sd = &per_cpu(softnet_data, i);
10193
10194 INIT_WORK(flush, flush_backlog);
10195
10196 skb_queue_head_init(&sd->input_pkt_queue);
10197 skb_queue_head_init(&sd->process_queue);
10198 #ifdef CONFIG_XFRM_OFFLOAD
10199 skb_queue_head_init(&sd->xfrm_backlog);
10200 #endif
10201 INIT_LIST_HEAD(&sd->poll_list);
10202 sd->output_queue_tailp = &sd->output_queue;
10203 #ifdef CONFIG_RPS
10204 sd->csd.func = rps_trigger_softirq;
10205 sd->csd.info = sd;
10206 sd->cpu = i;
10207 #endif
10208
10209 init_gro_hash(&sd->backlog);
10210 sd->backlog.poll = process_backlog;
10211 sd->backlog.weight = weight_p;
10212 }
10213
10214 dev_boot_phase = 0;
10215
10216
10217
10218
10219
10220
10221
10222
10223
10224
10225 if (register_pernet_device(&loopback_net_ops))
10226 goto out;
10227
10228 if (register_pernet_device(&default_device_ops))
10229 goto out;
10230
10231 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
10232 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
10233
10234 rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
10235 NULL, dev_cpu_dead);
10236 WARN_ON(rc < 0);
10237 rc = 0;
10238 out:
10239 return rc;
10240 }
10241
10242 subsys_initcall(net_dev_init);