1 /*
2  * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Development of IPv6 NAT funded by Astaro.
9  */
10 #include <linux/types.h>
11 #include <linux/module.h>
12 #include <linux/skbuff.h>
13 #include <linux/ipv6.h>
14 #include <linux/netfilter.h>
15 #include <linux/netfilter_ipv6.h>
16 #include <net/secure_seq.h>
17 #include <net/checksum.h>
18 #include <net/ip6_checksum.h>
19 #include <net/ip6_route.h>
20 #include <net/ipv6.h>
21 
22 #include <net/netfilter/nf_conntrack_core.h>
23 #include <net/netfilter/nf_conntrack.h>
24 #include <net/netfilter/nf_nat_core.h>
25 #include <net/netfilter/nf_nat_l3proto.h>
26 #include <net/netfilter/nf_nat_l4proto.h>
27 
28 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
29 
30 #ifdef CONFIG_XFRM
nf_nat_ipv6_decode_session(struct sk_buff * skb,const struct nf_conn * ct,enum ip_conntrack_dir dir,unsigned long statusbit,struct flowi * fl)31 static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
32 				       const struct nf_conn *ct,
33 				       enum ip_conntrack_dir dir,
34 				       unsigned long statusbit,
35 				       struct flowi *fl)
36 {
37 	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
38 	struct flowi6 *fl6 = &fl->u.ip6;
39 
40 	if (ct->status & statusbit) {
41 		fl6->daddr = t->dst.u3.in6;
42 		if (t->dst.protonum == IPPROTO_TCP ||
43 		    t->dst.protonum == IPPROTO_UDP ||
44 		    t->dst.protonum == IPPROTO_UDPLITE ||
45 		    t->dst.protonum == IPPROTO_DCCP ||
46 		    t->dst.protonum == IPPROTO_SCTP)
47 			fl6->fl6_dport = t->dst.u.all;
48 	}
49 
50 	statusbit ^= IPS_NAT_MASK;
51 
52 	if (ct->status & statusbit) {
53 		fl6->saddr = t->src.u3.in6;
54 		if (t->dst.protonum == IPPROTO_TCP ||
55 		    t->dst.protonum == IPPROTO_UDP ||
56 		    t->dst.protonum == IPPROTO_UDPLITE ||
57 		    t->dst.protonum == IPPROTO_DCCP ||
58 		    t->dst.protonum == IPPROTO_SCTP)
59 			fl6->fl6_sport = t->src.u.all;
60 	}
61 }
62 #endif
63 
nf_nat_ipv6_in_range(const struct nf_conntrack_tuple * t,const struct nf_nat_range * range)64 static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
65 				 const struct nf_nat_range *range)
66 {
67 	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
68 	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
69 }
70 
nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple * t,__be16 dport)71 static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
72 				   __be16 dport)
73 {
74 	return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
75 }
76 
nf_nat_ipv6_manip_pkt(struct sk_buff * skb,unsigned int iphdroff,const struct nf_nat_l4proto * l4proto,const struct nf_conntrack_tuple * target,enum nf_nat_manip_type maniptype)77 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
78 				  unsigned int iphdroff,
79 				  const struct nf_nat_l4proto *l4proto,
80 				  const struct nf_conntrack_tuple *target,
81 				  enum nf_nat_manip_type maniptype)
82 {
83 	struct ipv6hdr *ipv6h;
84 	__be16 frag_off;
85 	int hdroff;
86 	u8 nexthdr;
87 
88 	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
89 		return false;
90 
91 	ipv6h = (void *)skb->data + iphdroff;
92 	nexthdr = ipv6h->nexthdr;
93 	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
94 				  &nexthdr, &frag_off);
95 	if (hdroff < 0)
96 		goto manip_addr;
97 
98 	if ((frag_off & htons(~0x7)) == 0 &&
99 	    !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
100 				target, maniptype))
101 		return false;
102 manip_addr:
103 	if (maniptype == NF_NAT_MANIP_SRC)
104 		ipv6h->saddr = target->src.u3.in6;
105 	else
106 		ipv6h->daddr = target->dst.u3.in6;
107 
108 	return true;
109 }
110 
nf_nat_ipv6_csum_update(struct sk_buff * skb,unsigned int iphdroff,__sum16 * check,const struct nf_conntrack_tuple * t,enum nf_nat_manip_type maniptype)111 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
112 				    unsigned int iphdroff, __sum16 *check,
113 				    const struct nf_conntrack_tuple *t,
114 				    enum nf_nat_manip_type maniptype)
115 {
116 	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
117 	const struct in6_addr *oldip, *newip;
118 
119 	if (maniptype == NF_NAT_MANIP_SRC) {
120 		oldip = &ipv6h->saddr;
121 		newip = &t->src.u3.in6;
122 	} else {
123 		oldip = &ipv6h->daddr;
124 		newip = &t->dst.u3.in6;
125 	}
126 	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
127 				  newip->s6_addr32, true);
128 }
129 
nf_nat_ipv6_csum_recalc(struct sk_buff * skb,u8 proto,void * data,__sum16 * check,int datalen,int oldlen)130 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
131 				    u8 proto, void *data, __sum16 *check,
132 				    int datalen, int oldlen)
133 {
134 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
135 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
136 
137 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
138 		if (!(rt->rt6i_flags & RTF_LOCAL) &&
139 		    (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
140 			skb->ip_summed = CHECKSUM_PARTIAL;
141 			skb->csum_start = skb_headroom(skb) +
142 					  skb_network_offset(skb) +
143 					  (data - (void *)skb->data);
144 			skb->csum_offset = (void *)check - data;
145 			*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
146 						  datalen, proto, 0);
147 		} else {
148 			*check = 0;
149 			*check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
150 						 datalen, proto,
151 						 csum_partial(data, datalen,
152 							      0));
153 			if (proto == IPPROTO_UDP && !*check)
154 				*check = CSUM_MANGLED_0;
155 		}
156 	} else
157 		inet_proto_csum_replace2(check, skb,
158 					 htons(oldlen), htons(datalen), true);
159 }
160 
161 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
nf_nat_ipv6_nlattr_to_range(struct nlattr * tb[],struct nf_nat_range * range)162 static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
163 				       struct nf_nat_range *range)
164 {
165 	if (tb[CTA_NAT_V6_MINIP]) {
166 		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
167 			   sizeof(struct in6_addr));
168 		range->flags |= NF_NAT_RANGE_MAP_IPS;
169 	}
170 
171 	if (tb[CTA_NAT_V6_MAXIP])
172 		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
173 			   sizeof(struct in6_addr));
174 	else
175 		range->max_addr = range->min_addr;
176 
177 	return 0;
178 }
179 #endif
180 
181 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
182 	.l3proto		= NFPROTO_IPV6,
183 	.secure_port		= nf_nat_ipv6_secure_port,
184 	.in_range		= nf_nat_ipv6_in_range,
185 	.manip_pkt		= nf_nat_ipv6_manip_pkt,
186 	.csum_update		= nf_nat_ipv6_csum_update,
187 	.csum_recalc		= nf_nat_ipv6_csum_recalc,
188 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
189 	.nlattr_to_range	= nf_nat_ipv6_nlattr_to_range,
190 #endif
191 #ifdef CONFIG_XFRM
192 	.decode_session	= nf_nat_ipv6_decode_session,
193 #endif
194 };
195 
nf_nat_icmpv6_reply_translation(struct sk_buff * skb,struct nf_conn * ct,enum ip_conntrack_info ctinfo,unsigned int hooknum,unsigned int hdrlen)196 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
197 				    struct nf_conn *ct,
198 				    enum ip_conntrack_info ctinfo,
199 				    unsigned int hooknum,
200 				    unsigned int hdrlen)
201 {
202 	struct {
203 		struct icmp6hdr	icmp6;
204 		struct ipv6hdr	ip6;
205 	} *inside;
206 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
207 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
208 	const struct nf_nat_l4proto *l4proto;
209 	struct nf_conntrack_tuple target;
210 	unsigned long statusbit;
211 
212 	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
213 
214 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
215 		return 0;
216 	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
217 		return 0;
218 
219 	inside = (void *)skb->data + hdrlen;
220 	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
221 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
222 			return 0;
223 		if (ct->status & IPS_NAT_MASK)
224 			return 0;
225 	}
226 
227 	if (manip == NF_NAT_MANIP_SRC)
228 		statusbit = IPS_SRC_NAT;
229 	else
230 		statusbit = IPS_DST_NAT;
231 
232 	/* Invert if this is reply direction */
233 	if (dir == IP_CT_DIR_REPLY)
234 		statusbit ^= IPS_NAT_MASK;
235 
236 	if (!(ct->status & statusbit))
237 		return 1;
238 
239 	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
240 	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
241 				   l4proto, &ct->tuplehash[!dir].tuple, !manip))
242 		return 0;
243 
244 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
245 		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
246 		inside = (void *)skb->data + hdrlen;
247 		inside->icmp6.icmp6_cksum = 0;
248 		inside->icmp6.icmp6_cksum =
249 			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
250 					skb->len - hdrlen, IPPROTO_ICMPV6,
251 					csum_partial(&inside->icmp6,
252 						     skb->len - hdrlen, 0));
253 	}
254 
255 	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
256 	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
257 	if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
258 		return 0;
259 
260 	return 1;
261 }
262 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
263 
264 unsigned int
nf_nat_ipv6_fn(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,unsigned int (* do_chain)(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,struct nf_conn * ct))265 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
266 	       const struct nf_hook_state *state,
267 	       unsigned int (*do_chain)(void *priv,
268 					struct sk_buff *skb,
269 					const struct nf_hook_state *state,
270 					struct nf_conn *ct))
271 {
272 	struct nf_conn *ct;
273 	enum ip_conntrack_info ctinfo;
274 	struct nf_conn_nat *nat;
275 	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
276 	__be16 frag_off;
277 	int hdrlen;
278 	u8 nexthdr;
279 
280 	ct = nf_ct_get(skb, &ctinfo);
281 	/* Can't track?  It's not due to stress, or conntrack would
282 	 * have dropped it.  Hence it's the user's responsibilty to
283 	 * packet filter it out, or implement conntrack/NAT for that
284 	 * protocol. 8) --RR
285 	 */
286 	if (!ct)
287 		return NF_ACCEPT;
288 
289 	/* Don't try to NAT if this packet is not conntracked */
290 	if (nf_ct_is_untracked(ct))
291 		return NF_ACCEPT;
292 
293 	nat = nf_ct_nat_ext_add(ct);
294 	if (nat == NULL)
295 		return NF_ACCEPT;
296 
297 	switch (ctinfo) {
298 	case IP_CT_RELATED:
299 	case IP_CT_RELATED_REPLY:
300 		nexthdr = ipv6_hdr(skb)->nexthdr;
301 		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
302 					  &nexthdr, &frag_off);
303 
304 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
305 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
306 							     state->hook,
307 							     hdrlen))
308 				return NF_DROP;
309 			else
310 				return NF_ACCEPT;
311 		}
312 		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
313 	case IP_CT_NEW:
314 		/* Seen it before?  This can happen for loopback, retrans,
315 		 * or local packets.
316 		 */
317 		if (!nf_nat_initialized(ct, maniptype)) {
318 			unsigned int ret;
319 
320 			ret = do_chain(priv, skb, state, ct);
321 			if (ret != NF_ACCEPT)
322 				return ret;
323 
324 			if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
325 				break;
326 
327 			ret = nf_nat_alloc_null_binding(ct, state->hook);
328 			if (ret != NF_ACCEPT)
329 				return ret;
330 		} else {
331 			pr_debug("Already setup manip %s for ct %p\n",
332 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
333 				 ct);
334 			if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
335 				goto oif_changed;
336 		}
337 		break;
338 
339 	default:
340 		/* ESTABLISHED */
341 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
342 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
343 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
344 			goto oif_changed;
345 	}
346 
347 	return nf_nat_packet(ct, ctinfo, state->hook, skb);
348 
349 oif_changed:
350 	nf_ct_kill_acct(ct, ctinfo, skb);
351 	return NF_DROP;
352 }
353 EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
354 
355 unsigned int
nf_nat_ipv6_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,unsigned int (* do_chain)(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,struct nf_conn * ct))356 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
357 	       const struct nf_hook_state *state,
358 	       unsigned int (*do_chain)(void *priv,
359 					struct sk_buff *skb,
360 					const struct nf_hook_state *state,
361 					struct nf_conn *ct))
362 {
363 	unsigned int ret;
364 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
365 
366 	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
367 	if (ret != NF_DROP && ret != NF_STOLEN &&
368 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
369 		skb_dst_drop(skb);
370 
371 	return ret;
372 }
373 EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
374 
375 unsigned int
nf_nat_ipv6_out(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,unsigned int (* do_chain)(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,struct nf_conn * ct))376 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
377 		const struct nf_hook_state *state,
378 		unsigned int (*do_chain)(void *priv,
379 					 struct sk_buff *skb,
380 					 const struct nf_hook_state *state,
381 					 struct nf_conn *ct))
382 {
383 #ifdef CONFIG_XFRM
384 	const struct nf_conn *ct;
385 	enum ip_conntrack_info ctinfo;
386 	int err;
387 #endif
388 	unsigned int ret;
389 
390 	/* root is playing with raw sockets. */
391 	if (skb->len < sizeof(struct ipv6hdr))
392 		return NF_ACCEPT;
393 
394 	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
395 #ifdef CONFIG_XFRM
396 	if (ret != NF_DROP && ret != NF_STOLEN &&
397 	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
398 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
399 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
400 
401 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
402 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
403 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
404 		     ct->tuplehash[dir].tuple.src.u.all !=
405 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
406 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
407 			if (err < 0)
408 				ret = NF_DROP_ERR(err);
409 		}
410 	}
411 #endif
412 	return ret;
413 }
414 EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
415 
416 unsigned int
nf_nat_ipv6_local_fn(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,unsigned int (* do_chain)(void * priv,struct sk_buff * skb,const struct nf_hook_state * state,struct nf_conn * ct))417 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
418 		     const struct nf_hook_state *state,
419 		     unsigned int (*do_chain)(void *priv,
420 					      struct sk_buff *skb,
421 					      const struct nf_hook_state *state,
422 					      struct nf_conn *ct))
423 {
424 	const struct nf_conn *ct;
425 	enum ip_conntrack_info ctinfo;
426 	unsigned int ret;
427 	int err;
428 
429 	/* root is playing with raw sockets. */
430 	if (skb->len < sizeof(struct ipv6hdr))
431 		return NF_ACCEPT;
432 
433 	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
434 	if (ret != NF_DROP && ret != NF_STOLEN &&
435 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
436 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
437 
438 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
439 				      &ct->tuplehash[!dir].tuple.src.u3)) {
440 			err = ip6_route_me_harder(state->net, skb);
441 			if (err < 0)
442 				ret = NF_DROP_ERR(err);
443 		}
444 #ifdef CONFIG_XFRM
445 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
446 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
447 			 ct->tuplehash[dir].tuple.dst.u.all !=
448 			 ct->tuplehash[!dir].tuple.src.u.all) {
449 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
450 			if (err < 0)
451 				ret = NF_DROP_ERR(err);
452 		}
453 #endif
454 	}
455 	return ret;
456 }
457 EXPORT_SYMBOL_GPL(nf_nat_ipv6_local_fn);
458 
nf_nat_l3proto_ipv6_init(void)459 static int __init nf_nat_l3proto_ipv6_init(void)
460 {
461 	int err;
462 
463 	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
464 	if (err < 0)
465 		goto err1;
466 	err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
467 	if (err < 0)
468 		goto err2;
469 	return err;
470 
471 err2:
472 	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
473 err1:
474 	return err;
475 }
476 
nf_nat_l3proto_ipv6_exit(void)477 static void __exit nf_nat_l3proto_ipv6_exit(void)
478 {
479 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
480 	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
481 }
482 
483 MODULE_LICENSE("GPL");
484 MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
485 
486 module_init(nf_nat_l3proto_ipv6_init);
487 module_exit(nf_nat_l3proto_ipv6_exit);
488