1/*
2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
3 *
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
6 */
7
8#include <linux/mm.h>
9#include <linux/module.h>
10#include <linux/sysctl.h>
11#include <linux/igmp.h>
12#include <linux/inetdevice.h>
13#include <linux/seqlock.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/nsproxy.h>
17#include <linux/swap.h>
18#include <net/snmp.h>
19#include <net/icmp.h>
20#include <net/ip.h>
21#include <net/route.h>
22#include <net/tcp.h>
23#include <net/udp.h>
24#include <net/cipso_ipv4.h>
25#include <net/inet_frag.h>
26#include <net/ping.h>
27#include <net/tcp_memcontrol.h>
28
29static int zero;
30static int one = 1;
31static int four = 4;
32static int thousand = 1000;
33static int gso_max_segs = GSO_MAX_SEGS;
34static int tcp_retr1_max = 255;
35static int ip_local_port_range_min[] = { 1, 1 };
36static int ip_local_port_range_max[] = { 65535, 65535 };
37static int tcp_adv_win_scale_min = -31;
38static int tcp_adv_win_scale_max = 31;
39static int ip_ttl_min = 1;
40static int ip_ttl_max = 255;
41static int tcp_syn_retries_min = 1;
42static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
43static int ip_ping_group_range_min[] = { 0, 0 };
44static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
45
46/* Update system visible IP port range */
47static void set_local_port_range(struct net *net, int range[2])
48{
49	bool same_parity = !((range[0] ^ range[1]) & 1);
50
51	write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
52	if (same_parity && !net->ipv4.ip_local_ports.warned) {
53		net->ipv4.ip_local_ports.warned = true;
54		pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
55	}
56	net->ipv4.ip_local_ports.range[0] = range[0];
57	net->ipv4.ip_local_ports.range[1] = range[1];
58	write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
59}
60
61/* Validate changes from /proc interface. */
62static int ipv4_local_port_range(struct ctl_table *table, int write,
63				 void __user *buffer,
64				 size_t *lenp, loff_t *ppos)
65{
66	struct net *net =
67		container_of(table->data, struct net, ipv4.ip_local_ports.range);
68	int ret;
69	int range[2];
70	struct ctl_table tmp = {
71		.data = &range,
72		.maxlen = sizeof(range),
73		.mode = table->mode,
74		.extra1 = &ip_local_port_range_min,
75		.extra2 = &ip_local_port_range_max,
76	};
77
78	inet_get_local_port_range(net, &range[0], &range[1]);
79
80	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
81
82	if (write && ret == 0) {
83		if (range[1] < range[0])
84			ret = -EINVAL;
85		else
86			set_local_port_range(net, range);
87	}
88
89	return ret;
90}
91
92
93static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
94{
95	kgid_t *data = table->data;
96	struct net *net =
97		container_of(table->data, struct net, ipv4.ping_group_range.range);
98	unsigned int seq;
99	do {
100		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
101
102		*low = data[0];
103		*high = data[1];
104	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
105}
106
107/* Update system visible IP port range */
108static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
109{
110	kgid_t *data = table->data;
111	struct net *net =
112		container_of(table->data, struct net, ipv4.ping_group_range.range);
113	write_seqlock(&net->ipv4.ip_local_ports.lock);
114	data[0] = low;
115	data[1] = high;
116	write_sequnlock(&net->ipv4.ip_local_ports.lock);
117}
118
119/* Validate changes from /proc interface. */
120static int ipv4_ping_group_range(struct ctl_table *table, int write,
121				 void __user *buffer,
122				 size_t *lenp, loff_t *ppos)
123{
124	struct user_namespace *user_ns = current_user_ns();
125	int ret;
126	gid_t urange[2];
127	kgid_t low, high;
128	struct ctl_table tmp = {
129		.data = &urange,
130		.maxlen = sizeof(urange),
131		.mode = table->mode,
132		.extra1 = &ip_ping_group_range_min,
133		.extra2 = &ip_ping_group_range_max,
134	};
135
136	inet_get_ping_group_range_table(table, &low, &high);
137	urange[0] = from_kgid_munged(user_ns, low);
138	urange[1] = from_kgid_munged(user_ns, high);
139	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
140
141	if (write && ret == 0) {
142		low = make_kgid(user_ns, urange[0]);
143		high = make_kgid(user_ns, urange[1]);
144		if (!gid_valid(low) || !gid_valid(high) ||
145		    (urange[1] < urange[0]) || gid_lt(high, low)) {
146			low = make_kgid(&init_user_ns, 1);
147			high = make_kgid(&init_user_ns, 0);
148		}
149		set_ping_group_range(table, low, high);
150	}
151
152	return ret;
153}
154
155static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
156				       void __user *buffer, size_t *lenp, loff_t *ppos)
157{
158	char val[TCP_CA_NAME_MAX];
159	struct ctl_table tbl = {
160		.data = val,
161		.maxlen = TCP_CA_NAME_MAX,
162	};
163	int ret;
164
165	tcp_get_default_congestion_control(val);
166
167	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
168	if (write && ret == 0)
169		ret = tcp_set_default_congestion_control(val);
170	return ret;
171}
172
173static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
174						 int write,
175						 void __user *buffer, size_t *lenp,
176						 loff_t *ppos)
177{
178	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
179	int ret;
180
181	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
182	if (!tbl.data)
183		return -ENOMEM;
184	tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
185	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
186	kfree(tbl.data);
187	return ret;
188}
189
190static int proc_allowed_congestion_control(struct ctl_table *ctl,
191					   int write,
192					   void __user *buffer, size_t *lenp,
193					   loff_t *ppos)
194{
195	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
196	int ret;
197
198	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
199	if (!tbl.data)
200		return -ENOMEM;
201
202	tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
203	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
204	if (write && ret == 0)
205		ret = tcp_set_allowed_congestion_control(tbl.data);
206	kfree(tbl.data);
207	return ret;
208}
209
210static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
211				 void __user *buffer, size_t *lenp,
212				 loff_t *ppos)
213{
214	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
215	struct tcp_fastopen_context *ctxt;
216	int ret;
217	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
218
219	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
220	if (!tbl.data)
221		return -ENOMEM;
222
223	rcu_read_lock();
224	ctxt = rcu_dereference(tcp_fastopen_ctx);
225	if (ctxt)
226		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
227	else
228		memset(user_key, 0, sizeof(user_key));
229	rcu_read_unlock();
230
231	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
232		user_key[0], user_key[1], user_key[2], user_key[3]);
233	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
234
235	if (write && ret == 0) {
236		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
237			   user_key + 2, user_key + 3) != 4) {
238			ret = -EINVAL;
239			goto bad_key;
240		}
241		/* Generate a dummy secret but don't publish it. This
242		 * is needed so we don't regenerate a new key on the
243		 * first invocation of tcp_fastopen_cookie_gen
244		 */
245		tcp_fastopen_init_key_once(false);
246		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
247	}
248
249bad_key:
250	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
251	       user_key[0], user_key[1], user_key[2], user_key[3],
252	       (char *)tbl.data, ret);
253	kfree(tbl.data);
254	return ret;
255}
256
257static struct ctl_table ipv4_table[] = {
258	{
259		.procname	= "tcp_timestamps",
260		.data		= &sysctl_tcp_timestamps,
261		.maxlen		= sizeof(int),
262		.mode		= 0644,
263		.proc_handler	= proc_dointvec
264	},
265	{
266		.procname	= "tcp_window_scaling",
267		.data		= &sysctl_tcp_window_scaling,
268		.maxlen		= sizeof(int),
269		.mode		= 0644,
270		.proc_handler	= proc_dointvec
271	},
272	{
273		.procname	= "tcp_sack",
274		.data		= &sysctl_tcp_sack,
275		.maxlen		= sizeof(int),
276		.mode		= 0644,
277		.proc_handler	= proc_dointvec
278	},
279	{
280		.procname	= "tcp_retrans_collapse",
281		.data		= &sysctl_tcp_retrans_collapse,
282		.maxlen		= sizeof(int),
283		.mode		= 0644,
284		.proc_handler	= proc_dointvec
285	},
286	{
287		.procname	= "ip_default_ttl",
288		.data		= &sysctl_ip_default_ttl,
289		.maxlen		= sizeof(int),
290		.mode		= 0644,
291		.proc_handler	= proc_dointvec_minmax,
292		.extra1		= &ip_ttl_min,
293		.extra2		= &ip_ttl_max,
294	},
295	{
296		.procname	= "tcp_syn_retries",
297		.data		= &sysctl_tcp_syn_retries,
298		.maxlen		= sizeof(int),
299		.mode		= 0644,
300		.proc_handler	= proc_dointvec_minmax,
301		.extra1		= &tcp_syn_retries_min,
302		.extra2		= &tcp_syn_retries_max
303	},
304	{
305		.procname	= "tcp_synack_retries",
306		.data		= &sysctl_tcp_synack_retries,
307		.maxlen		= sizeof(int),
308		.mode		= 0644,
309		.proc_handler	= proc_dointvec
310	},
311	{
312		.procname	= "tcp_max_orphans",
313		.data		= &sysctl_tcp_max_orphans,
314		.maxlen		= sizeof(int),
315		.mode		= 0644,
316		.proc_handler	= proc_dointvec
317	},
318	{
319		.procname	= "tcp_max_tw_buckets",
320		.data		= &tcp_death_row.sysctl_max_tw_buckets,
321		.maxlen		= sizeof(int),
322		.mode		= 0644,
323		.proc_handler	= proc_dointvec
324	},
325	{
326		.procname	= "ip_early_demux",
327		.data		= &sysctl_ip_early_demux,
328		.maxlen		= sizeof(int),
329		.mode		= 0644,
330		.proc_handler	= proc_dointvec
331	},
332	{
333		.procname	= "ip_dynaddr",
334		.data		= &sysctl_ip_dynaddr,
335		.maxlen		= sizeof(int),
336		.mode		= 0644,
337		.proc_handler	= proc_dointvec
338	},
339	{
340		.procname	= "tcp_keepalive_time",
341		.data		= &sysctl_tcp_keepalive_time,
342		.maxlen		= sizeof(int),
343		.mode		= 0644,
344		.proc_handler	= proc_dointvec_jiffies,
345	},
346	{
347		.procname	= "tcp_keepalive_probes",
348		.data		= &sysctl_tcp_keepalive_probes,
349		.maxlen		= sizeof(int),
350		.mode		= 0644,
351		.proc_handler	= proc_dointvec
352	},
353	{
354		.procname	= "tcp_keepalive_intvl",
355		.data		= &sysctl_tcp_keepalive_intvl,
356		.maxlen		= sizeof(int),
357		.mode		= 0644,
358		.proc_handler	= proc_dointvec_jiffies,
359	},
360	{
361		.procname	= "tcp_retries1",
362		.data		= &sysctl_tcp_retries1,
363		.maxlen		= sizeof(int),
364		.mode		= 0644,
365		.proc_handler	= proc_dointvec_minmax,
366		.extra2		= &tcp_retr1_max
367	},
368	{
369		.procname	= "tcp_retries2",
370		.data		= &sysctl_tcp_retries2,
371		.maxlen		= sizeof(int),
372		.mode		= 0644,
373		.proc_handler	= proc_dointvec
374	},
375	{
376		.procname	= "tcp_fin_timeout",
377		.data		= &sysctl_tcp_fin_timeout,
378		.maxlen		= sizeof(int),
379		.mode		= 0644,
380		.proc_handler	= proc_dointvec_jiffies,
381	},
382#ifdef CONFIG_SYN_COOKIES
383	{
384		.procname	= "tcp_syncookies",
385		.data		= &sysctl_tcp_syncookies,
386		.maxlen		= sizeof(int),
387		.mode		= 0644,
388		.proc_handler	= proc_dointvec
389	},
390#endif
391	{
392		.procname	= "tcp_fastopen",
393		.data		= &sysctl_tcp_fastopen,
394		.maxlen		= sizeof(int),
395		.mode		= 0644,
396		.proc_handler	= proc_dointvec,
397	},
398	{
399		.procname	= "tcp_fastopen_key",
400		.mode		= 0600,
401		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
402		.proc_handler	= proc_tcp_fastopen_key,
403	},
404	{
405		.procname	= "tcp_tw_recycle",
406		.data		= &tcp_death_row.sysctl_tw_recycle,
407		.maxlen		= sizeof(int),
408		.mode		= 0644,
409		.proc_handler	= proc_dointvec
410	},
411	{
412		.procname	= "tcp_abort_on_overflow",
413		.data		= &sysctl_tcp_abort_on_overflow,
414		.maxlen		= sizeof(int),
415		.mode		= 0644,
416		.proc_handler	= proc_dointvec
417	},
418	{
419		.procname	= "tcp_stdurg",
420		.data		= &sysctl_tcp_stdurg,
421		.maxlen		= sizeof(int),
422		.mode		= 0644,
423		.proc_handler	= proc_dointvec
424	},
425	{
426		.procname	= "tcp_rfc1337",
427		.data		= &sysctl_tcp_rfc1337,
428		.maxlen		= sizeof(int),
429		.mode		= 0644,
430		.proc_handler	= proc_dointvec
431	},
432	{
433		.procname	= "tcp_max_syn_backlog",
434		.data		= &sysctl_max_syn_backlog,
435		.maxlen		= sizeof(int),
436		.mode		= 0644,
437		.proc_handler	= proc_dointvec
438	},
439	{
440		.procname	= "igmp_max_memberships",
441		.data		= &sysctl_igmp_max_memberships,
442		.maxlen		= sizeof(int),
443		.mode		= 0644,
444		.proc_handler	= proc_dointvec
445	},
446	{
447		.procname	= "igmp_max_msf",
448		.data		= &sysctl_igmp_max_msf,
449		.maxlen		= sizeof(int),
450		.mode		= 0644,
451		.proc_handler	= proc_dointvec
452	},
453#ifdef CONFIG_IP_MULTICAST
454	{
455		.procname	= "igmp_qrv",
456		.data		= &sysctl_igmp_qrv,
457		.maxlen		= sizeof(int),
458		.mode		= 0644,
459		.proc_handler	= proc_dointvec_minmax,
460		.extra1		= &one
461	},
462#endif
463	{
464		.procname	= "inet_peer_threshold",
465		.data		= &inet_peer_threshold,
466		.maxlen		= sizeof(int),
467		.mode		= 0644,
468		.proc_handler	= proc_dointvec
469	},
470	{
471		.procname	= "inet_peer_minttl",
472		.data		= &inet_peer_minttl,
473		.maxlen		= sizeof(int),
474		.mode		= 0644,
475		.proc_handler	= proc_dointvec_jiffies,
476	},
477	{
478		.procname	= "inet_peer_maxttl",
479		.data		= &inet_peer_maxttl,
480		.maxlen		= sizeof(int),
481		.mode		= 0644,
482		.proc_handler	= proc_dointvec_jiffies,
483	},
484	{
485		.procname	= "tcp_orphan_retries",
486		.data		= &sysctl_tcp_orphan_retries,
487		.maxlen		= sizeof(int),
488		.mode		= 0644,
489		.proc_handler	= proc_dointvec
490	},
491	{
492		.procname	= "tcp_fack",
493		.data		= &sysctl_tcp_fack,
494		.maxlen		= sizeof(int),
495		.mode		= 0644,
496		.proc_handler	= proc_dointvec
497	},
498	{
499		.procname	= "tcp_recovery",
500		.data		= &sysctl_tcp_recovery,
501		.maxlen		= sizeof(int),
502		.mode		= 0644,
503		.proc_handler	= proc_dointvec,
504	},
505	{
506		.procname	= "tcp_reordering",
507		.data		= &sysctl_tcp_reordering,
508		.maxlen		= sizeof(int),
509		.mode		= 0644,
510		.proc_handler	= proc_dointvec
511	},
512	{
513		.procname	= "tcp_max_reordering",
514		.data		= &sysctl_tcp_max_reordering,
515		.maxlen		= sizeof(int),
516		.mode		= 0644,
517		.proc_handler	= proc_dointvec
518	},
519	{
520		.procname	= "tcp_dsack",
521		.data		= &sysctl_tcp_dsack,
522		.maxlen		= sizeof(int),
523		.mode		= 0644,
524		.proc_handler	= proc_dointvec
525	},
526	{
527		.procname	= "tcp_mem",
528		.maxlen		= sizeof(sysctl_tcp_mem),
529		.data		= &sysctl_tcp_mem,
530		.mode		= 0644,
531		.proc_handler	= proc_doulongvec_minmax,
532	},
533	{
534		.procname	= "tcp_wmem",
535		.data		= &sysctl_tcp_wmem,
536		.maxlen		= sizeof(sysctl_tcp_wmem),
537		.mode		= 0644,
538		.proc_handler	= proc_dointvec_minmax,
539		.extra1		= &one,
540	},
541	{
542		.procname	= "tcp_notsent_lowat",
543		.data		= &sysctl_tcp_notsent_lowat,
544		.maxlen		= sizeof(sysctl_tcp_notsent_lowat),
545		.mode		= 0644,
546		.proc_handler	= proc_dointvec,
547	},
548	{
549		.procname	= "tcp_rmem",
550		.data		= &sysctl_tcp_rmem,
551		.maxlen		= sizeof(sysctl_tcp_rmem),
552		.mode		= 0644,
553		.proc_handler	= proc_dointvec_minmax,
554		.extra1		= &one,
555	},
556	{
557		.procname	= "tcp_app_win",
558		.data		= &sysctl_tcp_app_win,
559		.maxlen		= sizeof(int),
560		.mode		= 0644,
561		.proc_handler	= proc_dointvec
562	},
563	{
564		.procname	= "tcp_adv_win_scale",
565		.data		= &sysctl_tcp_adv_win_scale,
566		.maxlen		= sizeof(int),
567		.mode		= 0644,
568		.proc_handler	= proc_dointvec_minmax,
569		.extra1		= &tcp_adv_win_scale_min,
570		.extra2		= &tcp_adv_win_scale_max,
571	},
572	{
573		.procname	= "tcp_tw_reuse",
574		.data		= &sysctl_tcp_tw_reuse,
575		.maxlen		= sizeof(int),
576		.mode		= 0644,
577		.proc_handler	= proc_dointvec
578	},
579	{
580		.procname	= "tcp_frto",
581		.data		= &sysctl_tcp_frto,
582		.maxlen		= sizeof(int),
583		.mode		= 0644,
584		.proc_handler	= proc_dointvec
585	},
586	{
587		.procname	= "tcp_min_rtt_wlen",
588		.data		= &sysctl_tcp_min_rtt_wlen,
589		.maxlen		= sizeof(int),
590		.mode		= 0644,
591		.proc_handler	= proc_dointvec
592	},
593	{
594		.procname	= "tcp_low_latency",
595		.data		= &sysctl_tcp_low_latency,
596		.maxlen		= sizeof(int),
597		.mode		= 0644,
598		.proc_handler	= proc_dointvec
599	},
600	{
601		.procname	= "tcp_no_metrics_save",
602		.data		= &sysctl_tcp_nometrics_save,
603		.maxlen		= sizeof(int),
604		.mode		= 0644,
605		.proc_handler	= proc_dointvec,
606	},
607	{
608		.procname	= "tcp_moderate_rcvbuf",
609		.data		= &sysctl_tcp_moderate_rcvbuf,
610		.maxlen		= sizeof(int),
611		.mode		= 0644,
612		.proc_handler	= proc_dointvec,
613	},
614	{
615		.procname	= "tcp_tso_win_divisor",
616		.data		= &sysctl_tcp_tso_win_divisor,
617		.maxlen		= sizeof(int),
618		.mode		= 0644,
619		.proc_handler	= proc_dointvec,
620	},
621	{
622		.procname	= "tcp_congestion_control",
623		.mode		= 0644,
624		.maxlen		= TCP_CA_NAME_MAX,
625		.proc_handler	= proc_tcp_congestion_control,
626	},
627	{
628		.procname	= "tcp_workaround_signed_windows",
629		.data		= &sysctl_tcp_workaround_signed_windows,
630		.maxlen		= sizeof(int),
631		.mode		= 0644,
632		.proc_handler	= proc_dointvec
633	},
634	{
635		.procname	= "tcp_limit_output_bytes",
636		.data		= &sysctl_tcp_limit_output_bytes,
637		.maxlen		= sizeof(int),
638		.mode		= 0644,
639		.proc_handler	= proc_dointvec
640	},
641	{
642		.procname	= "tcp_challenge_ack_limit",
643		.data		= &sysctl_tcp_challenge_ack_limit,
644		.maxlen		= sizeof(int),
645		.mode		= 0644,
646		.proc_handler	= proc_dointvec
647	},
648	{
649		.procname	= "tcp_slow_start_after_idle",
650		.data		= &sysctl_tcp_slow_start_after_idle,
651		.maxlen		= sizeof(int),
652		.mode		= 0644,
653		.proc_handler	= proc_dointvec
654	},
655#ifdef CONFIG_NETLABEL
656	{
657		.procname	= "cipso_cache_enable",
658		.data		= &cipso_v4_cache_enabled,
659		.maxlen		= sizeof(int),
660		.mode		= 0644,
661		.proc_handler	= proc_dointvec,
662	},
663	{
664		.procname	= "cipso_cache_bucket_size",
665		.data		= &cipso_v4_cache_bucketsize,
666		.maxlen		= sizeof(int),
667		.mode		= 0644,
668		.proc_handler	= proc_dointvec,
669	},
670	{
671		.procname	= "cipso_rbm_optfmt",
672		.data		= &cipso_v4_rbm_optfmt,
673		.maxlen		= sizeof(int),
674		.mode		= 0644,
675		.proc_handler	= proc_dointvec,
676	},
677	{
678		.procname	= "cipso_rbm_strictvalid",
679		.data		= &cipso_v4_rbm_strictvalid,
680		.maxlen		= sizeof(int),
681		.mode		= 0644,
682		.proc_handler	= proc_dointvec,
683	},
684#endif /* CONFIG_NETLABEL */
685	{
686		.procname	= "tcp_available_congestion_control",
687		.maxlen		= TCP_CA_BUF_MAX,
688		.mode		= 0444,
689		.proc_handler   = proc_tcp_available_congestion_control,
690	},
691	{
692		.procname	= "tcp_allowed_congestion_control",
693		.maxlen		= TCP_CA_BUF_MAX,
694		.mode		= 0644,
695		.proc_handler   = proc_allowed_congestion_control,
696	},
697	{
698		.procname       = "tcp_thin_linear_timeouts",
699		.data           = &sysctl_tcp_thin_linear_timeouts,
700		.maxlen         = sizeof(int),
701		.mode           = 0644,
702		.proc_handler   = proc_dointvec
703	},
704	{
705		.procname       = "tcp_thin_dupack",
706		.data           = &sysctl_tcp_thin_dupack,
707		.maxlen         = sizeof(int),
708		.mode           = 0644,
709		.proc_handler   = proc_dointvec
710	},
711	{
712		.procname	= "tcp_early_retrans",
713		.data		= &sysctl_tcp_early_retrans,
714		.maxlen		= sizeof(int),
715		.mode		= 0644,
716		.proc_handler	= proc_dointvec_minmax,
717		.extra1		= &zero,
718		.extra2		= &four,
719	},
720	{
721		.procname	= "tcp_min_tso_segs",
722		.data		= &sysctl_tcp_min_tso_segs,
723		.maxlen		= sizeof(int),
724		.mode		= 0644,
725		.proc_handler	= proc_dointvec_minmax,
726		.extra1		= &one,
727		.extra2		= &gso_max_segs,
728	},
729	{
730		.procname	= "tcp_pacing_ss_ratio",
731		.data		= &sysctl_tcp_pacing_ss_ratio,
732		.maxlen		= sizeof(int),
733		.mode		= 0644,
734		.proc_handler	= proc_dointvec_minmax,
735		.extra1		= &zero,
736		.extra2		= &thousand,
737	},
738	{
739		.procname	= "tcp_pacing_ca_ratio",
740		.data		= &sysctl_tcp_pacing_ca_ratio,
741		.maxlen		= sizeof(int),
742		.mode		= 0644,
743		.proc_handler	= proc_dointvec_minmax,
744		.extra1		= &zero,
745		.extra2		= &thousand,
746	},
747	{
748		.procname	= "tcp_autocorking",
749		.data		= &sysctl_tcp_autocorking,
750		.maxlen		= sizeof(int),
751		.mode		= 0644,
752		.proc_handler	= proc_dointvec_minmax,
753		.extra1		= &zero,
754		.extra2		= &one,
755	},
756	{
757		.procname	= "tcp_invalid_ratelimit",
758		.data		= &sysctl_tcp_invalid_ratelimit,
759		.maxlen		= sizeof(int),
760		.mode		= 0644,
761		.proc_handler	= proc_dointvec_ms_jiffies,
762	},
763	{
764		.procname	= "icmp_msgs_per_sec",
765		.data		= &sysctl_icmp_msgs_per_sec,
766		.maxlen		= sizeof(int),
767		.mode		= 0644,
768		.proc_handler	= proc_dointvec_minmax,
769		.extra1		= &zero,
770	},
771	{
772		.procname	= "icmp_msgs_burst",
773		.data		= &sysctl_icmp_msgs_burst,
774		.maxlen		= sizeof(int),
775		.mode		= 0644,
776		.proc_handler	= proc_dointvec_minmax,
777		.extra1		= &zero,
778	},
779	{
780		.procname	= "udp_mem",
781		.data		= &sysctl_udp_mem,
782		.maxlen		= sizeof(sysctl_udp_mem),
783		.mode		= 0644,
784		.proc_handler	= proc_doulongvec_minmax,
785	},
786	{
787		.procname	= "udp_rmem_min",
788		.data		= &sysctl_udp_rmem_min,
789		.maxlen		= sizeof(sysctl_udp_rmem_min),
790		.mode		= 0644,
791		.proc_handler	= proc_dointvec_minmax,
792		.extra1		= &one
793	},
794	{
795		.procname	= "udp_wmem_min",
796		.data		= &sysctl_udp_wmem_min,
797		.maxlen		= sizeof(sysctl_udp_wmem_min),
798		.mode		= 0644,
799		.proc_handler	= proc_dointvec_minmax,
800		.extra1		= &one
801	},
802	{ }
803};
804
805static struct ctl_table ipv4_net_table[] = {
806	{
807		.procname	= "icmp_echo_ignore_all",
808		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_all,
809		.maxlen		= sizeof(int),
810		.mode		= 0644,
811		.proc_handler	= proc_dointvec
812	},
813	{
814		.procname	= "icmp_echo_ignore_broadcasts",
815		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
816		.maxlen		= sizeof(int),
817		.mode		= 0644,
818		.proc_handler	= proc_dointvec
819	},
820	{
821		.procname	= "icmp_ignore_bogus_error_responses",
822		.data		= &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
823		.maxlen		= sizeof(int),
824		.mode		= 0644,
825		.proc_handler	= proc_dointvec
826	},
827	{
828		.procname	= "icmp_errors_use_inbound_ifaddr",
829		.data		= &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
830		.maxlen		= sizeof(int),
831		.mode		= 0644,
832		.proc_handler	= proc_dointvec
833	},
834	{
835		.procname	= "icmp_ratelimit",
836		.data		= &init_net.ipv4.sysctl_icmp_ratelimit,
837		.maxlen		= sizeof(int),
838		.mode		= 0644,
839		.proc_handler	= proc_dointvec_ms_jiffies,
840	},
841	{
842		.procname	= "icmp_ratemask",
843		.data		= &init_net.ipv4.sysctl_icmp_ratemask,
844		.maxlen		= sizeof(int),
845		.mode		= 0644,
846		.proc_handler	= proc_dointvec
847	},
848	{
849		.procname	= "ping_group_range",
850		.data		= &init_net.ipv4.ping_group_range.range,
851		.maxlen		= sizeof(gid_t)*2,
852		.mode		= 0644,
853		.proc_handler	= ipv4_ping_group_range,
854	},
855	{
856		.procname	= "tcp_ecn",
857		.data		= &init_net.ipv4.sysctl_tcp_ecn,
858		.maxlen		= sizeof(int),
859		.mode		= 0644,
860		.proc_handler	= proc_dointvec
861	},
862	{
863		.procname	= "tcp_ecn_fallback",
864		.data		= &init_net.ipv4.sysctl_tcp_ecn_fallback,
865		.maxlen		= sizeof(int),
866		.mode		= 0644,
867		.proc_handler	= proc_dointvec
868	},
869	{
870		.procname	= "ip_local_port_range",
871		.maxlen		= sizeof(init_net.ipv4.ip_local_ports.range),
872		.data		= &init_net.ipv4.ip_local_ports.range,
873		.mode		= 0644,
874		.proc_handler	= ipv4_local_port_range,
875	},
876	{
877		.procname	= "ip_local_reserved_ports",
878		.data		= &init_net.ipv4.sysctl_local_reserved_ports,
879		.maxlen		= 65536,
880		.mode		= 0644,
881		.proc_handler	= proc_do_large_bitmap,
882	},
883	{
884		.procname	= "ip_no_pmtu_disc",
885		.data		= &init_net.ipv4.sysctl_ip_no_pmtu_disc,
886		.maxlen		= sizeof(int),
887		.mode		= 0644,
888		.proc_handler	= proc_dointvec
889	},
890	{
891		.procname	= "ip_forward_use_pmtu",
892		.data		= &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
893		.maxlen		= sizeof(int),
894		.mode		= 0644,
895		.proc_handler	= proc_dointvec,
896	},
897	{
898		.procname	= "ip_nonlocal_bind",
899		.data		= &init_net.ipv4.sysctl_ip_nonlocal_bind,
900		.maxlen		= sizeof(int),
901		.mode		= 0644,
902		.proc_handler	= proc_dointvec
903	},
904	{
905		.procname	= "fwmark_reflect",
906		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
907		.maxlen		= sizeof(int),
908		.mode		= 0644,
909		.proc_handler	= proc_dointvec,
910	},
911	{
912		.procname	= "tcp_fwmark_accept",
913		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
914		.maxlen		= sizeof(int),
915		.mode		= 0644,
916		.proc_handler	= proc_dointvec,
917	},
918	{
919		.procname	= "tcp_mtu_probing",
920		.data		= &init_net.ipv4.sysctl_tcp_mtu_probing,
921		.maxlen		= sizeof(int),
922		.mode		= 0644,
923		.proc_handler	= proc_dointvec,
924	},
925	{
926		.procname	= "tcp_base_mss",
927		.data		= &init_net.ipv4.sysctl_tcp_base_mss,
928		.maxlen		= sizeof(int),
929		.mode		= 0644,
930		.proc_handler	= proc_dointvec,
931	},
932	{
933		.procname	= "tcp_probe_threshold",
934		.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
935		.maxlen		= sizeof(int),
936		.mode		= 0644,
937		.proc_handler	= proc_dointvec,
938	},
939	{
940		.procname	= "tcp_probe_interval",
941		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
942		.maxlen		= sizeof(int),
943		.mode		= 0644,
944		.proc_handler	= proc_dointvec,
945	},
946	{
947		.procname	= "igmp_link_local_mcast_reports",
948		.data		= &sysctl_igmp_llm_reports,
949		.maxlen		= sizeof(int),
950		.mode		= 0644,
951		.proc_handler	= proc_dointvec
952	},
953	{ }
954};
955
956static __net_init int ipv4_sysctl_init_net(struct net *net)
957{
958	struct ctl_table *table;
959
960	table = ipv4_net_table;
961	if (!net_eq(net, &init_net)) {
962		int i;
963
964		table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
965		if (!table)
966			goto err_alloc;
967
968		/* Update the variables to point into the current struct net */
969		for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
970			table[i].data += (void *)net - (void *)&init_net;
971	}
972
973	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
974	if (!net->ipv4.ipv4_hdr)
975		goto err_reg;
976
977	net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
978	if (!net->ipv4.sysctl_local_reserved_ports)
979		goto err_ports;
980
981	return 0;
982
983err_ports:
984	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
985err_reg:
986	if (!net_eq(net, &init_net))
987		kfree(table);
988err_alloc:
989	return -ENOMEM;
990}
991
992static __net_exit void ipv4_sysctl_exit_net(struct net *net)
993{
994	struct ctl_table *table;
995
996	kfree(net->ipv4.sysctl_local_reserved_ports);
997	table = net->ipv4.ipv4_hdr->ctl_table_arg;
998	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
999	kfree(table);
1000}
1001
1002static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1003	.init = ipv4_sysctl_init_net,
1004	.exit = ipv4_sysctl_exit_net,
1005};
1006
1007static __init int sysctl_ipv4_init(void)
1008{
1009	struct ctl_table_header *hdr;
1010
1011	hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1012	if (!hdr)
1013		return -ENOMEM;
1014
1015	if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1016		unregister_net_sysctl_table(hdr);
1017		return -ENOMEM;
1018	}
1019
1020	return 0;
1021}
1022
1023__initcall(sysctl_ipv4_init);
1024