1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58#include <linux/netconf.h>
59
60#include <net/arp.h>
61#include <net/ip.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66#include <net/addrconf.h>
67
68#include "fib_lookup.h"
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78	},
79};
80
81static struct ipv4_devconf ipv4_devconf_dflt = {
82	.data = {
83		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90	},
91};
92
93#define IPV4_DEVCONF_DFLT(net, attr) \
94	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97	[IFA_LOCAL]     	= { .type = NLA_U32 },
98	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102	[IFA_FLAGS]		= { .type = NLA_U32 },
103};
104
105#define IN4_ADDR_HSIZE_SHIFT	8
106#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107
108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110static u32 inet_addr_hash(const struct net *net, __be32 addr)
111{
112	u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115}
116
117static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118{
119	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121	ASSERT_RTNL();
122	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123}
124
125static void inet_hash_remove(struct in_ifaddr *ifa)
126{
127	ASSERT_RTNL();
128	hlist_del_init_rcu(&ifa->hash);
129}
130
131/**
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
136 *
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
138 */
139struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140{
141	u32 hash = inet_addr_hash(net, addr);
142	struct net_device *result = NULL;
143	struct in_ifaddr *ifa;
144
145	rcu_read_lock();
146	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147		if (ifa->ifa_local == addr) {
148			struct net_device *dev = ifa->ifa_dev->dev;
149
150			if (!net_eq(dev_net(dev), net))
151				continue;
152			result = dev;
153			break;
154		}
155	}
156	if (!result) {
157		struct flowi4 fl4 = { .daddr = addr };
158		struct fib_result res = { 0 };
159		struct fib_table *local;
160
161		/* Fallback to FIB local table so that communication
162		 * over loopback subnets work.
163		 */
164		local = fib_get_table(net, RT_TABLE_LOCAL);
165		if (local &&
166		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167		    res.type == RTN_LOCAL)
168			result = FIB_RES_DEV(res);
169	}
170	if (result && devref)
171		dev_hold(result);
172	rcu_read_unlock();
173	return result;
174}
175EXPORT_SYMBOL(__ip_dev_find);
176
177static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181			 int destroy);
182#ifdef CONFIG_SYSCTL
183static int devinet_sysctl_register(struct in_device *idev);
184static void devinet_sysctl_unregister(struct in_device *idev);
185#else
186static int devinet_sysctl_register(struct in_device *idev)
187{
188	return 0;
189}
190static void devinet_sysctl_unregister(struct in_device *idev)
191{
192}
193#endif
194
195/* Locks all the inet devices. */
196
197static struct in_ifaddr *inet_alloc_ifa(void)
198{
199	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200}
201
202static void inet_rcu_free_ifa(struct rcu_head *head)
203{
204	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205	if (ifa->ifa_dev)
206		in_dev_put(ifa->ifa_dev);
207	kfree(ifa);
208}
209
210static void inet_free_ifa(struct in_ifaddr *ifa)
211{
212	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213}
214
215void in_dev_finish_destroy(struct in_device *idev)
216{
217	struct net_device *dev = idev->dev;
218
219	WARN_ON(idev->ifa_list);
220	WARN_ON(idev->mc_list);
221	kfree(rcu_dereference_protected(idev->mc_hash, 1));
222#ifdef NET_REFCNT_DEBUG
223	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224#endif
225	dev_put(dev);
226	if (!idev->dead)
227		pr_err("Freeing alive in_device %p\n", idev);
228	else
229		kfree(idev);
230}
231EXPORT_SYMBOL(in_dev_finish_destroy);
232
233static struct in_device *inetdev_init(struct net_device *dev)
234{
235	struct in_device *in_dev;
236	int err = -ENOMEM;
237
238	ASSERT_RTNL();
239
240	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241	if (!in_dev)
242		goto out;
243	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244			sizeof(in_dev->cnf));
245	in_dev->cnf.sysctl = NULL;
246	in_dev->dev = dev;
247	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248	if (!in_dev->arp_parms)
249		goto out_kfree;
250	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251		dev_disable_lro(dev);
252	/* Reference in_dev->dev */
253	dev_hold(dev);
254	/* Account for reference dev->ip_ptr (below) */
255	in_dev_hold(in_dev);
256
257	err = devinet_sysctl_register(in_dev);
258	if (err) {
259		in_dev->dead = 1;
260		in_dev_put(in_dev);
261		in_dev = NULL;
262		goto out;
263	}
264	ip_mc_init_dev(in_dev);
265	if (dev->flags & IFF_UP)
266		ip_mc_up(in_dev);
267
268	/* we can receive as soon as ip_ptr is set -- do this last */
269	rcu_assign_pointer(dev->ip_ptr, in_dev);
270out:
271	return in_dev ?: ERR_PTR(err);
272out_kfree:
273	kfree(in_dev);
274	in_dev = NULL;
275	goto out;
276}
277
278static void in_dev_rcu_put(struct rcu_head *head)
279{
280	struct in_device *idev = container_of(head, struct in_device, rcu_head);
281	in_dev_put(idev);
282}
283
284static void inetdev_destroy(struct in_device *in_dev)
285{
286	struct in_ifaddr *ifa;
287	struct net_device *dev;
288
289	ASSERT_RTNL();
290
291	dev = in_dev->dev;
292
293	in_dev->dead = 1;
294
295	ip_mc_destroy_dev(in_dev);
296
297	while ((ifa = in_dev->ifa_list) != NULL) {
298		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299		inet_free_ifa(ifa);
300	}
301
302	RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304	devinet_sysctl_unregister(in_dev);
305	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306	arp_ifdown(dev);
307
308	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309}
310
311int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312{
313	rcu_read_lock();
314	for_primary_ifa(in_dev) {
315		if (inet_ifa_match(a, ifa)) {
316			if (!b || inet_ifa_match(b, ifa)) {
317				rcu_read_unlock();
318				return 1;
319			}
320		}
321	} endfor_ifa(in_dev);
322	rcu_read_unlock();
323	return 0;
324}
325
326static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327			 int destroy, struct nlmsghdr *nlh, u32 portid)
328{
329	struct in_ifaddr *promote = NULL;
330	struct in_ifaddr *ifa, *ifa1 = *ifap;
331	struct in_ifaddr *last_prim = in_dev->ifa_list;
332	struct in_ifaddr *prev_prom = NULL;
333	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335	ASSERT_RTNL();
336
337	/* 1. Deleting primary ifaddr forces deletion all secondaries
338	 * unless alias promotion is set
339	 **/
340
341	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344		while ((ifa = *ifap1) != NULL) {
345			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346			    ifa1->ifa_scope <= ifa->ifa_scope)
347				last_prim = ifa;
348
349			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350			    ifa1->ifa_mask != ifa->ifa_mask ||
351			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
352				ifap1 = &ifa->ifa_next;
353				prev_prom = ifa;
354				continue;
355			}
356
357			if (!do_promote) {
358				inet_hash_remove(ifa);
359				*ifap1 = ifa->ifa_next;
360
361				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362				blocking_notifier_call_chain(&inetaddr_chain,
363						NETDEV_DOWN, ifa);
364				inet_free_ifa(ifa);
365			} else {
366				promote = ifa;
367				break;
368			}
369		}
370	}
371
372	/* On promotion all secondaries from subnet are changing
373	 * the primary IP, we must remove all their routes silently
374	 * and later to add them back with new prefsrc. Do this
375	 * while all addresses are on the device list.
376	 */
377	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378		if (ifa1->ifa_mask == ifa->ifa_mask &&
379		    inet_ifa_match(ifa1->ifa_address, ifa))
380			fib_del_ifaddr(ifa, ifa1);
381	}
382
383	/* 2. Unlink it */
384
385	*ifap = ifa1->ifa_next;
386	inet_hash_remove(ifa1);
387
388	/* 3. Announce address deletion */
389
390	/* Send message first, then call notifier.
391	   At first sight, FIB update triggered by notifier
392	   will refer to already deleted ifaddr, that could confuse
393	   netlink listeners. It is not true: look, gated sees
394	   that route deleted and if it still thinks that ifaddr
395	   is valid, it will try to restore deleted routes... Grr.
396	   So that, this order is correct.
397	 */
398	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401	if (promote) {
402		struct in_ifaddr *next_sec = promote->ifa_next;
403
404		if (prev_prom) {
405			prev_prom->ifa_next = promote->ifa_next;
406			promote->ifa_next = last_prim->ifa_next;
407			last_prim->ifa_next = promote;
408		}
409
410		promote->ifa_flags &= ~IFA_F_SECONDARY;
411		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412		blocking_notifier_call_chain(&inetaddr_chain,
413				NETDEV_UP, promote);
414		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415			if (ifa1->ifa_mask != ifa->ifa_mask ||
416			    !inet_ifa_match(ifa1->ifa_address, ifa))
417					continue;
418			fib_add_ifaddr(ifa);
419		}
420
421	}
422	if (destroy)
423		inet_free_ifa(ifa1);
424}
425
426static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427			 int destroy)
428{
429	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430}
431
432static void check_lifetime(struct work_struct *work);
433
434static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437			     u32 portid)
438{
439	struct in_device *in_dev = ifa->ifa_dev;
440	struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442	ASSERT_RTNL();
443
444	if (!ifa->ifa_local) {
445		inet_free_ifa(ifa);
446		return 0;
447	}
448
449	ifa->ifa_flags &= ~IFA_F_SECONDARY;
450	last_primary = &in_dev->ifa_list;
451
452	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453	     ifap = &ifa1->ifa_next) {
454		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455		    ifa->ifa_scope <= ifa1->ifa_scope)
456			last_primary = &ifa1->ifa_next;
457		if (ifa1->ifa_mask == ifa->ifa_mask &&
458		    inet_ifa_match(ifa1->ifa_address, ifa)) {
459			if (ifa1->ifa_local == ifa->ifa_local) {
460				inet_free_ifa(ifa);
461				return -EEXIST;
462			}
463			if (ifa1->ifa_scope != ifa->ifa_scope) {
464				inet_free_ifa(ifa);
465				return -EINVAL;
466			}
467			ifa->ifa_flags |= IFA_F_SECONDARY;
468		}
469	}
470
471	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472		prandom_seed((__force u32) ifa->ifa_local);
473		ifap = last_primary;
474	}
475
476	ifa->ifa_next = *ifap;
477	*ifap = ifa;
478
479	inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481	cancel_delayed_work(&check_lifetime_work);
482	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484	/* Send message first, then call notifier.
485	   Notifier will trigger FIB update, so that
486	   listeners of netlink will know about new ifaddr */
487	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490	return 0;
491}
492
493static int inet_insert_ifa(struct in_ifaddr *ifa)
494{
495	return __inet_insert_ifa(ifa, NULL, 0);
496}
497
498static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499{
500	struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502	ASSERT_RTNL();
503
504	if (!in_dev) {
505		inet_free_ifa(ifa);
506		return -ENOBUFS;
507	}
508	ipv4_devconf_setall(in_dev);
509	neigh_parms_data_state_setall(in_dev->arp_parms);
510	if (ifa->ifa_dev != in_dev) {
511		WARN_ON(ifa->ifa_dev);
512		in_dev_hold(in_dev);
513		ifa->ifa_dev = in_dev;
514	}
515	if (ipv4_is_loopback(ifa->ifa_local))
516		ifa->ifa_scope = RT_SCOPE_HOST;
517	return inet_insert_ifa(ifa);
518}
519
520/* Caller must hold RCU or RTNL :
521 * We dont take a reference on found in_device
522 */
523struct in_device *inetdev_by_index(struct net *net, int ifindex)
524{
525	struct net_device *dev;
526	struct in_device *in_dev = NULL;
527
528	rcu_read_lock();
529	dev = dev_get_by_index_rcu(net, ifindex);
530	if (dev)
531		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532	rcu_read_unlock();
533	return in_dev;
534}
535EXPORT_SYMBOL(inetdev_by_index);
536
537/* Called only from RTNL semaphored context. No locks. */
538
539struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540				    __be32 mask)
541{
542	ASSERT_RTNL();
543
544	for_primary_ifa(in_dev) {
545		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546			return ifa;
547	} endfor_ifa(in_dev);
548	return NULL;
549}
550
551static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
552{
553	struct ip_mreqn mreq = {
554		.imr_multiaddr.s_addr = ifa->ifa_address,
555		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
556	};
557	int ret;
558
559	ASSERT_RTNL();
560
561	lock_sock(sk);
562	if (join)
563		ret = ip_mc_join_group(sk, &mreq);
564	else
565		ret = ip_mc_leave_group(sk, &mreq);
566	release_sock(sk);
567
568	return ret;
569}
570
571static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
572{
573	struct net *net = sock_net(skb->sk);
574	struct nlattr *tb[IFA_MAX+1];
575	struct in_device *in_dev;
576	struct ifaddrmsg *ifm;
577	struct in_ifaddr *ifa, **ifap;
578	int err = -EINVAL;
579
580	ASSERT_RTNL();
581
582	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
583	if (err < 0)
584		goto errout;
585
586	ifm = nlmsg_data(nlh);
587	in_dev = inetdev_by_index(net, ifm->ifa_index);
588	if (!in_dev) {
589		err = -ENODEV;
590		goto errout;
591	}
592
593	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
594	     ifap = &ifa->ifa_next) {
595		if (tb[IFA_LOCAL] &&
596		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
597			continue;
598
599		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
600			continue;
601
602		if (tb[IFA_ADDRESS] &&
603		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
604		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
605			continue;
606
607		if (ipv4_is_multicast(ifa->ifa_address))
608			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
609		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
610		return 0;
611	}
612
613	err = -EADDRNOTAVAIL;
614errout:
615	return err;
616}
617
618#define INFINITY_LIFE_TIME	0xFFFFFFFF
619
620static void check_lifetime(struct work_struct *work)
621{
622	unsigned long now, next, next_sec, next_sched;
623	struct in_ifaddr *ifa;
624	struct hlist_node *n;
625	int i;
626
627	now = jiffies;
628	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
629
630	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
631		bool change_needed = false;
632
633		rcu_read_lock();
634		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
635			unsigned long age;
636
637			if (ifa->ifa_flags & IFA_F_PERMANENT)
638				continue;
639
640			/* We try to batch several events at once. */
641			age = (now - ifa->ifa_tstamp +
642			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
643
644			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
645			    age >= ifa->ifa_valid_lft) {
646				change_needed = true;
647			} else if (ifa->ifa_preferred_lft ==
648				   INFINITY_LIFE_TIME) {
649				continue;
650			} else if (age >= ifa->ifa_preferred_lft) {
651				if (time_before(ifa->ifa_tstamp +
652						ifa->ifa_valid_lft * HZ, next))
653					next = ifa->ifa_tstamp +
654					       ifa->ifa_valid_lft * HZ;
655
656				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
657					change_needed = true;
658			} else if (time_before(ifa->ifa_tstamp +
659					       ifa->ifa_preferred_lft * HZ,
660					       next)) {
661				next = ifa->ifa_tstamp +
662				       ifa->ifa_preferred_lft * HZ;
663			}
664		}
665		rcu_read_unlock();
666		if (!change_needed)
667			continue;
668		rtnl_lock();
669		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
670			unsigned long age;
671
672			if (ifa->ifa_flags & IFA_F_PERMANENT)
673				continue;
674
675			/* We try to batch several events at once. */
676			age = (now - ifa->ifa_tstamp +
677			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
678
679			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
680			    age >= ifa->ifa_valid_lft) {
681				struct in_ifaddr **ifap;
682
683				for (ifap = &ifa->ifa_dev->ifa_list;
684				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
685					if (*ifap == ifa) {
686						inet_del_ifa(ifa->ifa_dev,
687							     ifap, 1);
688						break;
689					}
690				}
691			} else if (ifa->ifa_preferred_lft !=
692				   INFINITY_LIFE_TIME &&
693				   age >= ifa->ifa_preferred_lft &&
694				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
695				ifa->ifa_flags |= IFA_F_DEPRECATED;
696				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
697			}
698		}
699		rtnl_unlock();
700	}
701
702	next_sec = round_jiffies_up(next);
703	next_sched = next;
704
705	/* If rounded timeout is accurate enough, accept it. */
706	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
707		next_sched = next_sec;
708
709	now = jiffies;
710	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
711	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
712		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
713
714	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
715			next_sched - now);
716}
717
718static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
719			     __u32 prefered_lft)
720{
721	unsigned long timeout;
722
723	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
724
725	timeout = addrconf_timeout_fixup(valid_lft, HZ);
726	if (addrconf_finite_timeout(timeout))
727		ifa->ifa_valid_lft = timeout;
728	else
729		ifa->ifa_flags |= IFA_F_PERMANENT;
730
731	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
732	if (addrconf_finite_timeout(timeout)) {
733		if (timeout == 0)
734			ifa->ifa_flags |= IFA_F_DEPRECATED;
735		ifa->ifa_preferred_lft = timeout;
736	}
737	ifa->ifa_tstamp = jiffies;
738	if (!ifa->ifa_cstamp)
739		ifa->ifa_cstamp = ifa->ifa_tstamp;
740}
741
742static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
743				       __u32 *pvalid_lft, __u32 *pprefered_lft)
744{
745	struct nlattr *tb[IFA_MAX+1];
746	struct in_ifaddr *ifa;
747	struct ifaddrmsg *ifm;
748	struct net_device *dev;
749	struct in_device *in_dev;
750	int err;
751
752	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
753	if (err < 0)
754		goto errout;
755
756	ifm = nlmsg_data(nlh);
757	err = -EINVAL;
758	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
759		goto errout;
760
761	dev = __dev_get_by_index(net, ifm->ifa_index);
762	err = -ENODEV;
763	if (!dev)
764		goto errout;
765
766	in_dev = __in_dev_get_rtnl(dev);
767	err = -ENOBUFS;
768	if (!in_dev)
769		goto errout;
770
771	ifa = inet_alloc_ifa();
772	if (!ifa)
773		/*
774		 * A potential indev allocation can be left alive, it stays
775		 * assigned to its device and is destroy with it.
776		 */
777		goto errout;
778
779	ipv4_devconf_setall(in_dev);
780	neigh_parms_data_state_setall(in_dev->arp_parms);
781	in_dev_hold(in_dev);
782
783	if (!tb[IFA_ADDRESS])
784		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
785
786	INIT_HLIST_NODE(&ifa->hash);
787	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
788	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
789	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
790					 ifm->ifa_flags;
791	ifa->ifa_scope = ifm->ifa_scope;
792	ifa->ifa_dev = in_dev;
793
794	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
795	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
796
797	if (tb[IFA_BROADCAST])
798		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
799
800	if (tb[IFA_LABEL])
801		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
802	else
803		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
804
805	if (tb[IFA_CACHEINFO]) {
806		struct ifa_cacheinfo *ci;
807
808		ci = nla_data(tb[IFA_CACHEINFO]);
809		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
810			err = -EINVAL;
811			goto errout_free;
812		}
813		*pvalid_lft = ci->ifa_valid;
814		*pprefered_lft = ci->ifa_prefered;
815	}
816
817	return ifa;
818
819errout_free:
820	inet_free_ifa(ifa);
821errout:
822	return ERR_PTR(err);
823}
824
825static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
826{
827	struct in_device *in_dev = ifa->ifa_dev;
828	struct in_ifaddr *ifa1, **ifap;
829
830	if (!ifa->ifa_local)
831		return NULL;
832
833	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
834	     ifap = &ifa1->ifa_next) {
835		if (ifa1->ifa_mask == ifa->ifa_mask &&
836		    inet_ifa_match(ifa1->ifa_address, ifa) &&
837		    ifa1->ifa_local == ifa->ifa_local)
838			return ifa1;
839	}
840	return NULL;
841}
842
843static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
844{
845	struct net *net = sock_net(skb->sk);
846	struct in_ifaddr *ifa;
847	struct in_ifaddr *ifa_existing;
848	__u32 valid_lft = INFINITY_LIFE_TIME;
849	__u32 prefered_lft = INFINITY_LIFE_TIME;
850
851	ASSERT_RTNL();
852
853	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
854	if (IS_ERR(ifa))
855		return PTR_ERR(ifa);
856
857	ifa_existing = find_matching_ifa(ifa);
858	if (!ifa_existing) {
859		/* It would be best to check for !NLM_F_CREATE here but
860		 * userspace already relies on not having to provide this.
861		 */
862		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
863		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
864			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
865					       true, ifa);
866
867			if (ret < 0) {
868				inet_free_ifa(ifa);
869				return ret;
870			}
871		}
872		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
873	} else {
874		inet_free_ifa(ifa);
875
876		if (nlh->nlmsg_flags & NLM_F_EXCL ||
877		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
878			return -EEXIST;
879		ifa = ifa_existing;
880		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
881		cancel_delayed_work(&check_lifetime_work);
882		queue_delayed_work(system_power_efficient_wq,
883				&check_lifetime_work, 0);
884		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
886	}
887	return 0;
888}
889
890/*
891 *	Determine a default network mask, based on the IP address.
892 */
893
894static int inet_abc_len(__be32 addr)
895{
896	int rc = -1;	/* Something else, probably a multicast. */
897
898	if (ipv4_is_zeronet(addr))
899		rc = 0;
900	else {
901		__u32 haddr = ntohl(addr);
902
903		if (IN_CLASSA(haddr))
904			rc = 8;
905		else if (IN_CLASSB(haddr))
906			rc = 16;
907		else if (IN_CLASSC(haddr))
908			rc = 24;
909	}
910
911	return rc;
912}
913
914
915int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
916{
917	struct ifreq ifr;
918	struct sockaddr_in sin_orig;
919	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
920	struct in_device *in_dev;
921	struct in_ifaddr **ifap = NULL;
922	struct in_ifaddr *ifa = NULL;
923	struct net_device *dev;
924	char *colon;
925	int ret = -EFAULT;
926	int tryaddrmatch = 0;
927
928	/*
929	 *	Fetch the caller's info block into kernel space
930	 */
931
932	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
933		goto out;
934	ifr.ifr_name[IFNAMSIZ - 1] = 0;
935
936	/* save original address for comparison */
937	memcpy(&sin_orig, sin, sizeof(*sin));
938
939	colon = strchr(ifr.ifr_name, ':');
940	if (colon)
941		*colon = 0;
942
943	dev_load(net, ifr.ifr_name);
944
945	switch (cmd) {
946	case SIOCGIFADDR:	/* Get interface address */
947	case SIOCGIFBRDADDR:	/* Get the broadcast address */
948	case SIOCGIFDSTADDR:	/* Get the destination address */
949	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
950		/* Note that these ioctls will not sleep,
951		   so that we do not impose a lock.
952		   One day we will be forced to put shlock here (I mean SMP)
953		 */
954		tryaddrmatch = (sin_orig.sin_family == AF_INET);
955		memset(sin, 0, sizeof(*sin));
956		sin->sin_family = AF_INET;
957		break;
958
959	case SIOCSIFFLAGS:
960		ret = -EPERM;
961		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
962			goto out;
963		break;
964	case SIOCSIFADDR:	/* Set interface address (and family) */
965	case SIOCSIFBRDADDR:	/* Set the broadcast address */
966	case SIOCSIFDSTADDR:	/* Set the destination address */
967	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
968		ret = -EPERM;
969		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
970			goto out;
971		ret = -EINVAL;
972		if (sin->sin_family != AF_INET)
973			goto out;
974		break;
975	default:
976		ret = -EINVAL;
977		goto out;
978	}
979
980	rtnl_lock();
981
982	ret = -ENODEV;
983	dev = __dev_get_by_name(net, ifr.ifr_name);
984	if (!dev)
985		goto done;
986
987	if (colon)
988		*colon = ':';
989
990	in_dev = __in_dev_get_rtnl(dev);
991	if (in_dev) {
992		if (tryaddrmatch) {
993			/* Matthias Andree */
994			/* compare label and address (4.4BSD style) */
995			/* note: we only do this for a limited set of ioctls
996			   and only if the original address family was AF_INET.
997			   This is checked above. */
998			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
999			     ifap = &ifa->ifa_next) {
1000				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1001				    sin_orig.sin_addr.s_addr ==
1002							ifa->ifa_local) {
1003					break; /* found */
1004				}
1005			}
1006		}
1007		/* we didn't get a match, maybe the application is
1008		   4.3BSD-style and passed in junk so we fall back to
1009		   comparing just the label */
1010		if (!ifa) {
1011			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1012			     ifap = &ifa->ifa_next)
1013				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1014					break;
1015		}
1016	}
1017
1018	ret = -EADDRNOTAVAIL;
1019	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1020		goto done;
1021
1022	switch (cmd) {
1023	case SIOCGIFADDR:	/* Get interface address */
1024		sin->sin_addr.s_addr = ifa->ifa_local;
1025		goto rarok;
1026
1027	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1028		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1029		goto rarok;
1030
1031	case SIOCGIFDSTADDR:	/* Get the destination address */
1032		sin->sin_addr.s_addr = ifa->ifa_address;
1033		goto rarok;
1034
1035	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1036		sin->sin_addr.s_addr = ifa->ifa_mask;
1037		goto rarok;
1038
1039	case SIOCSIFFLAGS:
1040		if (colon) {
1041			ret = -EADDRNOTAVAIL;
1042			if (!ifa)
1043				break;
1044			ret = 0;
1045			if (!(ifr.ifr_flags & IFF_UP))
1046				inet_del_ifa(in_dev, ifap, 1);
1047			break;
1048		}
1049		ret = dev_change_flags(dev, ifr.ifr_flags);
1050		break;
1051
1052	case SIOCSIFADDR:	/* Set interface address (and family) */
1053		ret = -EINVAL;
1054		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1055			break;
1056
1057		if (!ifa) {
1058			ret = -ENOBUFS;
1059			ifa = inet_alloc_ifa();
1060			if (!ifa)
1061				break;
1062			INIT_HLIST_NODE(&ifa->hash);
1063			if (colon)
1064				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1065			else
1066				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1067		} else {
1068			ret = 0;
1069			if (ifa->ifa_local == sin->sin_addr.s_addr)
1070				break;
1071			inet_del_ifa(in_dev, ifap, 0);
1072			ifa->ifa_broadcast = 0;
1073			ifa->ifa_scope = 0;
1074		}
1075
1076		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1077
1078		if (!(dev->flags & IFF_POINTOPOINT)) {
1079			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1080			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1081			if ((dev->flags & IFF_BROADCAST) &&
1082			    ifa->ifa_prefixlen < 31)
1083				ifa->ifa_broadcast = ifa->ifa_address |
1084						     ~ifa->ifa_mask;
1085		} else {
1086			ifa->ifa_prefixlen = 32;
1087			ifa->ifa_mask = inet_make_mask(32);
1088		}
1089		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1090		ret = inet_set_ifa(dev, ifa);
1091		break;
1092
1093	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1094		ret = 0;
1095		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1096			inet_del_ifa(in_dev, ifap, 0);
1097			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1098			inet_insert_ifa(ifa);
1099		}
1100		break;
1101
1102	case SIOCSIFDSTADDR:	/* Set the destination address */
1103		ret = 0;
1104		if (ifa->ifa_address == sin->sin_addr.s_addr)
1105			break;
1106		ret = -EINVAL;
1107		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1108			break;
1109		ret = 0;
1110		inet_del_ifa(in_dev, ifap, 0);
1111		ifa->ifa_address = sin->sin_addr.s_addr;
1112		inet_insert_ifa(ifa);
1113		break;
1114
1115	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1116
1117		/*
1118		 *	The mask we set must be legal.
1119		 */
1120		ret = -EINVAL;
1121		if (bad_mask(sin->sin_addr.s_addr, 0))
1122			break;
1123		ret = 0;
1124		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1125			__be32 old_mask = ifa->ifa_mask;
1126			inet_del_ifa(in_dev, ifap, 0);
1127			ifa->ifa_mask = sin->sin_addr.s_addr;
1128			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1129
1130			/* See if current broadcast address matches
1131			 * with current netmask, then recalculate
1132			 * the broadcast address. Otherwise it's a
1133			 * funny address, so don't touch it since
1134			 * the user seems to know what (s)he's doing...
1135			 */
1136			if ((dev->flags & IFF_BROADCAST) &&
1137			    (ifa->ifa_prefixlen < 31) &&
1138			    (ifa->ifa_broadcast ==
1139			     (ifa->ifa_local|~old_mask))) {
1140				ifa->ifa_broadcast = (ifa->ifa_local |
1141						      ~sin->sin_addr.s_addr);
1142			}
1143			inet_insert_ifa(ifa);
1144		}
1145		break;
1146	}
1147done:
1148	rtnl_unlock();
1149out:
1150	return ret;
1151rarok:
1152	rtnl_unlock();
1153	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1154	goto out;
1155}
1156
1157static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1158{
1159	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1160	struct in_ifaddr *ifa;
1161	struct ifreq ifr;
1162	int done = 0;
1163
1164	if (!in_dev)
1165		goto out;
1166
1167	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1168		if (!buf) {
1169			done += sizeof(ifr);
1170			continue;
1171		}
1172		if (len < (int) sizeof(ifr))
1173			break;
1174		memset(&ifr, 0, sizeof(struct ifreq));
1175		strcpy(ifr.ifr_name, ifa->ifa_label);
1176
1177		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1178		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1179								ifa->ifa_local;
1180
1181		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1182			done = -EFAULT;
1183			break;
1184		}
1185		buf  += sizeof(struct ifreq);
1186		len  -= sizeof(struct ifreq);
1187		done += sizeof(struct ifreq);
1188	}
1189out:
1190	return done;
1191}
1192
1193__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1194{
1195	__be32 addr = 0;
1196	struct in_device *in_dev;
1197	struct net *net = dev_net(dev);
1198
1199	rcu_read_lock();
1200	in_dev = __in_dev_get_rcu(dev);
1201	if (!in_dev)
1202		goto no_in_dev;
1203
1204	for_primary_ifa(in_dev) {
1205		if (ifa->ifa_scope > scope)
1206			continue;
1207		if (!dst || inet_ifa_match(dst, ifa)) {
1208			addr = ifa->ifa_local;
1209			break;
1210		}
1211		if (!addr)
1212			addr = ifa->ifa_local;
1213	} endfor_ifa(in_dev);
1214
1215	if (addr)
1216		goto out_unlock;
1217no_in_dev:
1218
1219	/* Not loopback addresses on loopback should be preferred
1220	   in this case. It is important that lo is the first interface
1221	   in dev_base list.
1222	 */
1223	for_each_netdev_rcu(net, dev) {
1224		in_dev = __in_dev_get_rcu(dev);
1225		if (!in_dev)
1226			continue;
1227
1228		for_primary_ifa(in_dev) {
1229			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1230			    ifa->ifa_scope <= scope) {
1231				addr = ifa->ifa_local;
1232				goto out_unlock;
1233			}
1234		} endfor_ifa(in_dev);
1235	}
1236out_unlock:
1237	rcu_read_unlock();
1238	return addr;
1239}
1240EXPORT_SYMBOL(inet_select_addr);
1241
1242static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1243			      __be32 local, int scope)
1244{
1245	int same = 0;
1246	__be32 addr = 0;
1247
1248	for_ifa(in_dev) {
1249		if (!addr &&
1250		    (local == ifa->ifa_local || !local) &&
1251		    ifa->ifa_scope <= scope) {
1252			addr = ifa->ifa_local;
1253			if (same)
1254				break;
1255		}
1256		if (!same) {
1257			same = (!local || inet_ifa_match(local, ifa)) &&
1258				(!dst || inet_ifa_match(dst, ifa));
1259			if (same && addr) {
1260				if (local || !dst)
1261					break;
1262				/* Is the selected addr into dst subnet? */
1263				if (inet_ifa_match(addr, ifa))
1264					break;
1265				/* No, then can we use new local src? */
1266				if (ifa->ifa_scope <= scope) {
1267					addr = ifa->ifa_local;
1268					break;
1269				}
1270				/* search for large dst subnet for addr */
1271				same = 0;
1272			}
1273		}
1274	} endfor_ifa(in_dev);
1275
1276	return same ? addr : 0;
1277}
1278
1279/*
1280 * Confirm that local IP address exists using wildcards:
1281 * - net: netns to check, cannot be NULL
1282 * - in_dev: only on this interface, NULL=any interface
1283 * - dst: only in the same subnet as dst, 0=any dst
1284 * - local: address, 0=autoselect the local address
1285 * - scope: maximum allowed scope value for the local address
1286 */
1287__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1288			 __be32 dst, __be32 local, int scope)
1289{
1290	__be32 addr = 0;
1291	struct net_device *dev;
1292
1293	if (in_dev)
1294		return confirm_addr_indev(in_dev, dst, local, scope);
1295
1296	rcu_read_lock();
1297	for_each_netdev_rcu(net, dev) {
1298		in_dev = __in_dev_get_rcu(dev);
1299		if (in_dev) {
1300			addr = confirm_addr_indev(in_dev, dst, local, scope);
1301			if (addr)
1302				break;
1303		}
1304	}
1305	rcu_read_unlock();
1306
1307	return addr;
1308}
1309EXPORT_SYMBOL(inet_confirm_addr);
1310
1311/*
1312 *	Device notifier
1313 */
1314
1315int register_inetaddr_notifier(struct notifier_block *nb)
1316{
1317	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1318}
1319EXPORT_SYMBOL(register_inetaddr_notifier);
1320
1321int unregister_inetaddr_notifier(struct notifier_block *nb)
1322{
1323	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1324}
1325EXPORT_SYMBOL(unregister_inetaddr_notifier);
1326
1327/* Rename ifa_labels for a device name change. Make some effort to preserve
1328 * existing alias numbering and to create unique labels if possible.
1329*/
1330static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1331{
1332	struct in_ifaddr *ifa;
1333	int named = 0;
1334
1335	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1336		char old[IFNAMSIZ], *dot;
1337
1338		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1339		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1340		if (named++ == 0)
1341			goto skip;
1342		dot = strchr(old, ':');
1343		if (!dot) {
1344			sprintf(old, ":%d", named);
1345			dot = old;
1346		}
1347		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1348			strcat(ifa->ifa_label, dot);
1349		else
1350			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1351skip:
1352		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1353	}
1354}
1355
1356static bool inetdev_valid_mtu(unsigned int mtu)
1357{
1358	return mtu >= 68;
1359}
1360
1361static void inetdev_send_gratuitous_arp(struct net_device *dev,
1362					struct in_device *in_dev)
1363
1364{
1365	struct in_ifaddr *ifa;
1366
1367	for (ifa = in_dev->ifa_list; ifa;
1368	     ifa = ifa->ifa_next) {
1369		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1370			 ifa->ifa_local, dev,
1371			 ifa->ifa_local, NULL,
1372			 dev->dev_addr, NULL);
1373	}
1374}
1375
1376/* Called only under RTNL semaphore */
1377
1378static int inetdev_event(struct notifier_block *this, unsigned long event,
1379			 void *ptr)
1380{
1381	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1382	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1383
1384	ASSERT_RTNL();
1385
1386	if (!in_dev) {
1387		if (event == NETDEV_REGISTER) {
1388			in_dev = inetdev_init(dev);
1389			if (IS_ERR(in_dev))
1390				return notifier_from_errno(PTR_ERR(in_dev));
1391			if (dev->flags & IFF_LOOPBACK) {
1392				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1393				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1394			}
1395		} else if (event == NETDEV_CHANGEMTU) {
1396			/* Re-enabling IP */
1397			if (inetdev_valid_mtu(dev->mtu))
1398				in_dev = inetdev_init(dev);
1399		}
1400		goto out;
1401	}
1402
1403	switch (event) {
1404	case NETDEV_REGISTER:
1405		pr_debug("%s: bug\n", __func__);
1406		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1407		break;
1408	case NETDEV_UP:
1409		if (!inetdev_valid_mtu(dev->mtu))
1410			break;
1411		if (dev->flags & IFF_LOOPBACK) {
1412			struct in_ifaddr *ifa = inet_alloc_ifa();
1413
1414			if (ifa) {
1415				INIT_HLIST_NODE(&ifa->hash);
1416				ifa->ifa_local =
1417				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1418				ifa->ifa_prefixlen = 8;
1419				ifa->ifa_mask = inet_make_mask(8);
1420				in_dev_hold(in_dev);
1421				ifa->ifa_dev = in_dev;
1422				ifa->ifa_scope = RT_SCOPE_HOST;
1423				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1424				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1425						 INFINITY_LIFE_TIME);
1426				ipv4_devconf_setall(in_dev);
1427				neigh_parms_data_state_setall(in_dev->arp_parms);
1428				inet_insert_ifa(ifa);
1429			}
1430		}
1431		ip_mc_up(in_dev);
1432		/* fall through */
1433	case NETDEV_CHANGEADDR:
1434		if (!IN_DEV_ARP_NOTIFY(in_dev))
1435			break;
1436		/* fall through */
1437	case NETDEV_NOTIFY_PEERS:
1438		/* Send gratuitous ARP to notify of link change */
1439		inetdev_send_gratuitous_arp(dev, in_dev);
1440		break;
1441	case NETDEV_DOWN:
1442		ip_mc_down(in_dev);
1443		break;
1444	case NETDEV_PRE_TYPE_CHANGE:
1445		ip_mc_unmap(in_dev);
1446		break;
1447	case NETDEV_POST_TYPE_CHANGE:
1448		ip_mc_remap(in_dev);
1449		break;
1450	case NETDEV_CHANGEMTU:
1451		if (inetdev_valid_mtu(dev->mtu))
1452			break;
1453		/* disable IP when MTU is not enough */
1454	case NETDEV_UNREGISTER:
1455		inetdev_destroy(in_dev);
1456		break;
1457	case NETDEV_CHANGENAME:
1458		/* Do not notify about label change, this event is
1459		 * not interesting to applications using netlink.
1460		 */
1461		inetdev_changename(dev, in_dev);
1462
1463		devinet_sysctl_unregister(in_dev);
1464		devinet_sysctl_register(in_dev);
1465		break;
1466	}
1467out:
1468	return NOTIFY_DONE;
1469}
1470
1471static struct notifier_block ip_netdev_notifier = {
1472	.notifier_call = inetdev_event,
1473};
1474
1475static size_t inet_nlmsg_size(void)
1476{
1477	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1478	       + nla_total_size(4) /* IFA_ADDRESS */
1479	       + nla_total_size(4) /* IFA_LOCAL */
1480	       + nla_total_size(4) /* IFA_BROADCAST */
1481	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1482	       + nla_total_size(4)  /* IFA_FLAGS */
1483	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1484}
1485
1486static inline u32 cstamp_delta(unsigned long cstamp)
1487{
1488	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1489}
1490
1491static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1492			 unsigned long tstamp, u32 preferred, u32 valid)
1493{
1494	struct ifa_cacheinfo ci;
1495
1496	ci.cstamp = cstamp_delta(cstamp);
1497	ci.tstamp = cstamp_delta(tstamp);
1498	ci.ifa_prefered = preferred;
1499	ci.ifa_valid = valid;
1500
1501	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1502}
1503
1504static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1505			    u32 portid, u32 seq, int event, unsigned int flags)
1506{
1507	struct ifaddrmsg *ifm;
1508	struct nlmsghdr  *nlh;
1509	u32 preferred, valid;
1510
1511	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1512	if (!nlh)
1513		return -EMSGSIZE;
1514
1515	ifm = nlmsg_data(nlh);
1516	ifm->ifa_family = AF_INET;
1517	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1518	ifm->ifa_flags = ifa->ifa_flags;
1519	ifm->ifa_scope = ifa->ifa_scope;
1520	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1521
1522	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1523		preferred = ifa->ifa_preferred_lft;
1524		valid = ifa->ifa_valid_lft;
1525		if (preferred != INFINITY_LIFE_TIME) {
1526			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1527
1528			if (preferred > tval)
1529				preferred -= tval;
1530			else
1531				preferred = 0;
1532			if (valid != INFINITY_LIFE_TIME) {
1533				if (valid > tval)
1534					valid -= tval;
1535				else
1536					valid = 0;
1537			}
1538		}
1539	} else {
1540		preferred = INFINITY_LIFE_TIME;
1541		valid = INFINITY_LIFE_TIME;
1542	}
1543	if ((ifa->ifa_address &&
1544	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1545	    (ifa->ifa_local &&
1546	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1547	    (ifa->ifa_broadcast &&
1548	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1549	    (ifa->ifa_label[0] &&
1550	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1551	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1552	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1553			  preferred, valid))
1554		goto nla_put_failure;
1555
1556	nlmsg_end(skb, nlh);
1557	return 0;
1558
1559nla_put_failure:
1560	nlmsg_cancel(skb, nlh);
1561	return -EMSGSIZE;
1562}
1563
1564static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1565{
1566	struct net *net = sock_net(skb->sk);
1567	int h, s_h;
1568	int idx, s_idx;
1569	int ip_idx, s_ip_idx;
1570	struct net_device *dev;
1571	struct in_device *in_dev;
1572	struct in_ifaddr *ifa;
1573	struct hlist_head *head;
1574
1575	s_h = cb->args[0];
1576	s_idx = idx = cb->args[1];
1577	s_ip_idx = ip_idx = cb->args[2];
1578
1579	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1580		idx = 0;
1581		head = &net->dev_index_head[h];
1582		rcu_read_lock();
1583		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1584			  net->dev_base_seq;
1585		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1586			if (idx < s_idx)
1587				goto cont;
1588			if (h > s_h || idx > s_idx)
1589				s_ip_idx = 0;
1590			in_dev = __in_dev_get_rcu(dev);
1591			if (!in_dev)
1592				goto cont;
1593
1594			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1595			     ifa = ifa->ifa_next, ip_idx++) {
1596				if (ip_idx < s_ip_idx)
1597					continue;
1598				if (inet_fill_ifaddr(skb, ifa,
1599					     NETLINK_CB(cb->skb).portid,
1600					     cb->nlh->nlmsg_seq,
1601					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1602					rcu_read_unlock();
1603					goto done;
1604				}
1605				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1606			}
1607cont:
1608			idx++;
1609		}
1610		rcu_read_unlock();
1611	}
1612
1613done:
1614	cb->args[0] = h;
1615	cb->args[1] = idx;
1616	cb->args[2] = ip_idx;
1617
1618	return skb->len;
1619}
1620
1621static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1622		      u32 portid)
1623{
1624	struct sk_buff *skb;
1625	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1626	int err = -ENOBUFS;
1627	struct net *net;
1628
1629	net = dev_net(ifa->ifa_dev->dev);
1630	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1631	if (!skb)
1632		goto errout;
1633
1634	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1635	if (err < 0) {
1636		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1637		WARN_ON(err == -EMSGSIZE);
1638		kfree_skb(skb);
1639		goto errout;
1640	}
1641	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1642	return;
1643errout:
1644	if (err < 0)
1645		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1646}
1647
1648static size_t inet_get_link_af_size(const struct net_device *dev)
1649{
1650	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1651
1652	if (!in_dev)
1653		return 0;
1654
1655	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1656}
1657
1658static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1659{
1660	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1661	struct nlattr *nla;
1662	int i;
1663
1664	if (!in_dev)
1665		return -ENODATA;
1666
1667	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1668	if (!nla)
1669		return -EMSGSIZE;
1670
1671	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1672		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1673
1674	return 0;
1675}
1676
1677static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1678	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1679};
1680
1681static int inet_validate_link_af(const struct net_device *dev,
1682				 const struct nlattr *nla)
1683{
1684	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1685	int err, rem;
1686
1687	if (dev && !__in_dev_get_rtnl(dev))
1688		return -EAFNOSUPPORT;
1689
1690	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1691	if (err < 0)
1692		return err;
1693
1694	if (tb[IFLA_INET_CONF]) {
1695		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1696			int cfgid = nla_type(a);
1697
1698			if (nla_len(a) < 4)
1699				return -EINVAL;
1700
1701			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1702				return -EINVAL;
1703		}
1704	}
1705
1706	return 0;
1707}
1708
1709static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1710{
1711	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1712	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1713	int rem;
1714
1715	if (!in_dev)
1716		return -EAFNOSUPPORT;
1717
1718	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1719		BUG();
1720
1721	if (tb[IFLA_INET_CONF]) {
1722		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1723			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1724	}
1725
1726	return 0;
1727}
1728
1729static int inet_netconf_msgsize_devconf(int type)
1730{
1731	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1732		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1733
1734	/* type -1 is used for ALL */
1735	if (type == -1 || type == NETCONFA_FORWARDING)
1736		size += nla_total_size(4);
1737	if (type == -1 || type == NETCONFA_RP_FILTER)
1738		size += nla_total_size(4);
1739	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1740		size += nla_total_size(4);
1741	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1742		size += nla_total_size(4);
1743
1744	return size;
1745}
1746
1747static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1748				     struct ipv4_devconf *devconf, u32 portid,
1749				     u32 seq, int event, unsigned int flags,
1750				     int type)
1751{
1752	struct nlmsghdr  *nlh;
1753	struct netconfmsg *ncm;
1754
1755	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1756			flags);
1757	if (!nlh)
1758		return -EMSGSIZE;
1759
1760	ncm = nlmsg_data(nlh);
1761	ncm->ncm_family = AF_INET;
1762
1763	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1764		goto nla_put_failure;
1765
1766	/* type -1 is used for ALL */
1767	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1768	    nla_put_s32(skb, NETCONFA_FORWARDING,
1769			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1770		goto nla_put_failure;
1771	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1772	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1773			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1774		goto nla_put_failure;
1775	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1776	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1777			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1778		goto nla_put_failure;
1779	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1780	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1781			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1782		goto nla_put_failure;
1783
1784	nlmsg_end(skb, nlh);
1785	return 0;
1786
1787nla_put_failure:
1788	nlmsg_cancel(skb, nlh);
1789	return -EMSGSIZE;
1790}
1791
1792void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1793				 struct ipv4_devconf *devconf)
1794{
1795	struct sk_buff *skb;
1796	int err = -ENOBUFS;
1797
1798	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1799	if (!skb)
1800		goto errout;
1801
1802	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1803					RTM_NEWNETCONF, 0, type);
1804	if (err < 0) {
1805		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1806		WARN_ON(err == -EMSGSIZE);
1807		kfree_skb(skb);
1808		goto errout;
1809	}
1810	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1811	return;
1812errout:
1813	if (err < 0)
1814		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1815}
1816
1817static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1818	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1819	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1820	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1821	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1822};
1823
1824static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1825				    struct nlmsghdr *nlh)
1826{
1827	struct net *net = sock_net(in_skb->sk);
1828	struct nlattr *tb[NETCONFA_MAX+1];
1829	struct netconfmsg *ncm;
1830	struct sk_buff *skb;
1831	struct ipv4_devconf *devconf;
1832	struct in_device *in_dev;
1833	struct net_device *dev;
1834	int ifindex;
1835	int err;
1836
1837	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1838			  devconf_ipv4_policy);
1839	if (err < 0)
1840		goto errout;
1841
1842	err = -EINVAL;
1843	if (!tb[NETCONFA_IFINDEX])
1844		goto errout;
1845
1846	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1847	switch (ifindex) {
1848	case NETCONFA_IFINDEX_ALL:
1849		devconf = net->ipv4.devconf_all;
1850		break;
1851	case NETCONFA_IFINDEX_DEFAULT:
1852		devconf = net->ipv4.devconf_dflt;
1853		break;
1854	default:
1855		dev = __dev_get_by_index(net, ifindex);
1856		if (!dev)
1857			goto errout;
1858		in_dev = __in_dev_get_rtnl(dev);
1859		if (!in_dev)
1860			goto errout;
1861		devconf = &in_dev->cnf;
1862		break;
1863	}
1864
1865	err = -ENOBUFS;
1866	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1867	if (!skb)
1868		goto errout;
1869
1870	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1871					NETLINK_CB(in_skb).portid,
1872					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1873					-1);
1874	if (err < 0) {
1875		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1876		WARN_ON(err == -EMSGSIZE);
1877		kfree_skb(skb);
1878		goto errout;
1879	}
1880	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1881errout:
1882	return err;
1883}
1884
1885static int inet_netconf_dump_devconf(struct sk_buff *skb,
1886				     struct netlink_callback *cb)
1887{
1888	struct net *net = sock_net(skb->sk);
1889	int h, s_h;
1890	int idx, s_idx;
1891	struct net_device *dev;
1892	struct in_device *in_dev;
1893	struct hlist_head *head;
1894
1895	s_h = cb->args[0];
1896	s_idx = idx = cb->args[1];
1897
1898	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1899		idx = 0;
1900		head = &net->dev_index_head[h];
1901		rcu_read_lock();
1902		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1903			  net->dev_base_seq;
1904		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1905			if (idx < s_idx)
1906				goto cont;
1907			in_dev = __in_dev_get_rcu(dev);
1908			if (!in_dev)
1909				goto cont;
1910
1911			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1912						      &in_dev->cnf,
1913						      NETLINK_CB(cb->skb).portid,
1914						      cb->nlh->nlmsg_seq,
1915						      RTM_NEWNETCONF,
1916						      NLM_F_MULTI,
1917						      -1) < 0) {
1918				rcu_read_unlock();
1919				goto done;
1920			}
1921			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1922cont:
1923			idx++;
1924		}
1925		rcu_read_unlock();
1926	}
1927	if (h == NETDEV_HASHENTRIES) {
1928		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1929					      net->ipv4.devconf_all,
1930					      NETLINK_CB(cb->skb).portid,
1931					      cb->nlh->nlmsg_seq,
1932					      RTM_NEWNETCONF, NLM_F_MULTI,
1933					      -1) < 0)
1934			goto done;
1935		else
1936			h++;
1937	}
1938	if (h == NETDEV_HASHENTRIES + 1) {
1939		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1940					      net->ipv4.devconf_dflt,
1941					      NETLINK_CB(cb->skb).portid,
1942					      cb->nlh->nlmsg_seq,
1943					      RTM_NEWNETCONF, NLM_F_MULTI,
1944					      -1) < 0)
1945			goto done;
1946		else
1947			h++;
1948	}
1949done:
1950	cb->args[0] = h;
1951	cb->args[1] = idx;
1952
1953	return skb->len;
1954}
1955
1956#ifdef CONFIG_SYSCTL
1957
1958static void devinet_copy_dflt_conf(struct net *net, int i)
1959{
1960	struct net_device *dev;
1961
1962	rcu_read_lock();
1963	for_each_netdev_rcu(net, dev) {
1964		struct in_device *in_dev;
1965
1966		in_dev = __in_dev_get_rcu(dev);
1967		if (in_dev && !test_bit(i, in_dev->cnf.state))
1968			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1969	}
1970	rcu_read_unlock();
1971}
1972
1973/* called with RTNL locked */
1974static void inet_forward_change(struct net *net)
1975{
1976	struct net_device *dev;
1977	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1978
1979	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1980	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1981	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1982				    NETCONFA_IFINDEX_ALL,
1983				    net->ipv4.devconf_all);
1984	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1985				    NETCONFA_IFINDEX_DEFAULT,
1986				    net->ipv4.devconf_dflt);
1987
1988	for_each_netdev(net, dev) {
1989		struct in_device *in_dev;
1990		if (on)
1991			dev_disable_lro(dev);
1992		rcu_read_lock();
1993		in_dev = __in_dev_get_rcu(dev);
1994		if (in_dev) {
1995			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1996			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1997						    dev->ifindex, &in_dev->cnf);
1998		}
1999		rcu_read_unlock();
2000	}
2001}
2002
2003static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2004{
2005	if (cnf == net->ipv4.devconf_dflt)
2006		return NETCONFA_IFINDEX_DEFAULT;
2007	else if (cnf == net->ipv4.devconf_all)
2008		return NETCONFA_IFINDEX_ALL;
2009	else {
2010		struct in_device *idev
2011			= container_of(cnf, struct in_device, cnf);
2012		return idev->dev->ifindex;
2013	}
2014}
2015
2016static int devinet_conf_proc(struct ctl_table *ctl, int write,
2017			     void __user *buffer,
2018			     size_t *lenp, loff_t *ppos)
2019{
2020	int old_value = *(int *)ctl->data;
2021	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2022	int new_value = *(int *)ctl->data;
2023
2024	if (write) {
2025		struct ipv4_devconf *cnf = ctl->extra1;
2026		struct net *net = ctl->extra2;
2027		int i = (int *)ctl->data - cnf->data;
2028		int ifindex;
2029
2030		set_bit(i, cnf->state);
2031
2032		if (cnf == net->ipv4.devconf_dflt)
2033			devinet_copy_dflt_conf(net, i);
2034		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2035		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2036			if ((new_value == 0) && (old_value != 0))
2037				rt_cache_flush(net);
2038
2039		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2040		    new_value != old_value) {
2041			ifindex = devinet_conf_ifindex(net, cnf);
2042			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2043						    ifindex, cnf);
2044		}
2045		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2046		    new_value != old_value) {
2047			ifindex = devinet_conf_ifindex(net, cnf);
2048			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2049						    ifindex, cnf);
2050		}
2051	}
2052
2053	return ret;
2054}
2055
2056static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2057				  void __user *buffer,
2058				  size_t *lenp, loff_t *ppos)
2059{
2060	int *valp = ctl->data;
2061	int val = *valp;
2062	loff_t pos = *ppos;
2063	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2064
2065	if (write && *valp != val) {
2066		struct net *net = ctl->extra2;
2067
2068		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2069			if (!rtnl_trylock()) {
2070				/* Restore the original values before restarting */
2071				*valp = val;
2072				*ppos = pos;
2073				return restart_syscall();
2074			}
2075			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2076				inet_forward_change(net);
2077			} else {
2078				struct ipv4_devconf *cnf = ctl->extra1;
2079				struct in_device *idev =
2080					container_of(cnf, struct in_device, cnf);
2081				if (*valp)
2082					dev_disable_lro(idev->dev);
2083				inet_netconf_notify_devconf(net,
2084							    NETCONFA_FORWARDING,
2085							    idev->dev->ifindex,
2086							    cnf);
2087			}
2088			rtnl_unlock();
2089			rt_cache_flush(net);
2090		} else
2091			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2092						    NETCONFA_IFINDEX_DEFAULT,
2093						    net->ipv4.devconf_dflt);
2094	}
2095
2096	return ret;
2097}
2098
2099static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2100				void __user *buffer,
2101				size_t *lenp, loff_t *ppos)
2102{
2103	int *valp = ctl->data;
2104	int val = *valp;
2105	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2106	struct net *net = ctl->extra2;
2107
2108	if (write && *valp != val)
2109		rt_cache_flush(net);
2110
2111	return ret;
2112}
2113
2114#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2115	{ \
2116		.procname	= name, \
2117		.data		= ipv4_devconf.data + \
2118				  IPV4_DEVCONF_ ## attr - 1, \
2119		.maxlen		= sizeof(int), \
2120		.mode		= mval, \
2121		.proc_handler	= proc, \
2122		.extra1		= &ipv4_devconf, \
2123	}
2124
2125#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2126	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2127
2128#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2129	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2130
2131#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2132	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2133
2134#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2135	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2136
2137static struct devinet_sysctl_table {
2138	struct ctl_table_header *sysctl_header;
2139	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2140} devinet_sysctl = {
2141	.devinet_vars = {
2142		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2143					     devinet_sysctl_forward),
2144		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2145
2146		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2147		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2148		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2149		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2150		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2151		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2152					"accept_source_route"),
2153		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2154		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2155		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2156		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2157		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2158		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2159		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2160		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2161		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2162		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2163		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2164		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2165		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2166		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2167					"force_igmp_version"),
2168		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2169					"igmpv2_unsolicited_report_interval"),
2170		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2171					"igmpv3_unsolicited_report_interval"),
2172
2173		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2174		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2175		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2176					      "promote_secondaries"),
2177		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2178					      "route_localnet"),
2179	},
2180};
2181
2182static int __devinet_sysctl_register(struct net *net, char *dev_name,
2183					struct ipv4_devconf *p)
2184{
2185	int i;
2186	struct devinet_sysctl_table *t;
2187	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2188
2189	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2190	if (!t)
2191		goto out;
2192
2193	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2194		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2195		t->devinet_vars[i].extra1 = p;
2196		t->devinet_vars[i].extra2 = net;
2197	}
2198
2199	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2200
2201	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2202	if (!t->sysctl_header)
2203		goto free;
2204
2205	p->sysctl = t;
2206	return 0;
2207
2208free:
2209	kfree(t);
2210out:
2211	return -ENOBUFS;
2212}
2213
2214static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2215{
2216	struct devinet_sysctl_table *t = cnf->sysctl;
2217
2218	if (!t)
2219		return;
2220
2221	cnf->sysctl = NULL;
2222	unregister_net_sysctl_table(t->sysctl_header);
2223	kfree(t);
2224}
2225
2226static int devinet_sysctl_register(struct in_device *idev)
2227{
2228	int err;
2229
2230	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2231		return -EINVAL;
2232
2233	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2234	if (err)
2235		return err;
2236	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2237					&idev->cnf);
2238	if (err)
2239		neigh_sysctl_unregister(idev->arp_parms);
2240	return err;
2241}
2242
2243static void devinet_sysctl_unregister(struct in_device *idev)
2244{
2245	__devinet_sysctl_unregister(&idev->cnf);
2246	neigh_sysctl_unregister(idev->arp_parms);
2247}
2248
2249static struct ctl_table ctl_forward_entry[] = {
2250	{
2251		.procname	= "ip_forward",
2252		.data		= &ipv4_devconf.data[
2253					IPV4_DEVCONF_FORWARDING - 1],
2254		.maxlen		= sizeof(int),
2255		.mode		= 0644,
2256		.proc_handler	= devinet_sysctl_forward,
2257		.extra1		= &ipv4_devconf,
2258		.extra2		= &init_net,
2259	},
2260	{ },
2261};
2262#endif
2263
2264static __net_init int devinet_init_net(struct net *net)
2265{
2266	int err;
2267	struct ipv4_devconf *all, *dflt;
2268#ifdef CONFIG_SYSCTL
2269	struct ctl_table *tbl = ctl_forward_entry;
2270	struct ctl_table_header *forw_hdr;
2271#endif
2272
2273	err = -ENOMEM;
2274	all = &ipv4_devconf;
2275	dflt = &ipv4_devconf_dflt;
2276
2277	if (!net_eq(net, &init_net)) {
2278		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2279		if (!all)
2280			goto err_alloc_all;
2281
2282		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2283		if (!dflt)
2284			goto err_alloc_dflt;
2285
2286#ifdef CONFIG_SYSCTL
2287		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2288		if (!tbl)
2289			goto err_alloc_ctl;
2290
2291		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2292		tbl[0].extra1 = all;
2293		tbl[0].extra2 = net;
2294#endif
2295	}
2296
2297#ifdef CONFIG_SYSCTL
2298	err = __devinet_sysctl_register(net, "all", all);
2299	if (err < 0)
2300		goto err_reg_all;
2301
2302	err = __devinet_sysctl_register(net, "default", dflt);
2303	if (err < 0)
2304		goto err_reg_dflt;
2305
2306	err = -ENOMEM;
2307	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2308	if (!forw_hdr)
2309		goto err_reg_ctl;
2310	net->ipv4.forw_hdr = forw_hdr;
2311#endif
2312
2313	net->ipv4.devconf_all = all;
2314	net->ipv4.devconf_dflt = dflt;
2315	return 0;
2316
2317#ifdef CONFIG_SYSCTL
2318err_reg_ctl:
2319	__devinet_sysctl_unregister(dflt);
2320err_reg_dflt:
2321	__devinet_sysctl_unregister(all);
2322err_reg_all:
2323	if (tbl != ctl_forward_entry)
2324		kfree(tbl);
2325err_alloc_ctl:
2326#endif
2327	if (dflt != &ipv4_devconf_dflt)
2328		kfree(dflt);
2329err_alloc_dflt:
2330	if (all != &ipv4_devconf)
2331		kfree(all);
2332err_alloc_all:
2333	return err;
2334}
2335
2336static __net_exit void devinet_exit_net(struct net *net)
2337{
2338#ifdef CONFIG_SYSCTL
2339	struct ctl_table *tbl;
2340
2341	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2342	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2343	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2344	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2345	kfree(tbl);
2346#endif
2347	kfree(net->ipv4.devconf_dflt);
2348	kfree(net->ipv4.devconf_all);
2349}
2350
2351static __net_initdata struct pernet_operations devinet_ops = {
2352	.init = devinet_init_net,
2353	.exit = devinet_exit_net,
2354};
2355
2356static struct rtnl_af_ops inet_af_ops __read_mostly = {
2357	.family		  = AF_INET,
2358	.fill_link_af	  = inet_fill_link_af,
2359	.get_link_af_size = inet_get_link_af_size,
2360	.validate_link_af = inet_validate_link_af,
2361	.set_link_af	  = inet_set_link_af,
2362};
2363
2364void __init devinet_init(void)
2365{
2366	int i;
2367
2368	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2369		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2370
2371	register_pernet_subsys(&devinet_ops);
2372
2373	register_gifconf(PF_INET, inet_gifconf);
2374	register_netdevice_notifier(&ip_netdev_notifier);
2375
2376	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2377
2378	rtnl_af_register(&inet_af_ops);
2379
2380	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2381	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2382	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2383	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2384		      inet_netconf_dump_devconf, NULL);
2385}
2386
2387