1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	:	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44 
45 #include <net/sock.h>
46 #include <net/snmp.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 #include <net/l3mdev.h>
59 
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)60 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
61 {
62 	struct dst_entry *dst = skb_dst(skb);
63 	struct net_device *dev = dst->dev;
64 	struct neighbour *neigh;
65 	struct in6_addr *nexthop;
66 	int ret;
67 
68 	skb->protocol = htons(ETH_P_IPV6);
69 	skb->dev = dev;
70 
71 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
72 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 
74 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
75 		    ((mroute6_socket(net, skb) &&
76 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
77 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 					 &ipv6_hdr(skb)->saddr))) {
79 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 
81 			/* Do not check for IFF_ALLMULTI; multicast routing
82 			   is not supported in any case.
83 			 */
84 			if (newskb)
85 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
86 					net, sk, newskb, NULL, newskb->dev,
87 					dev_loopback_xmit);
88 
89 			if (ipv6_hdr(skb)->hop_limit == 0) {
90 				IP6_INC_STATS(net, idev,
91 					      IPSTATS_MIB_OUTDISCARDS);
92 				kfree_skb(skb);
93 				return 0;
94 			}
95 		}
96 
97 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 
99 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 		    IPV6_ADDR_SCOPE_NODELOCAL &&
101 		    !(dev->flags & IFF_LOOPBACK)) {
102 			kfree_skb(skb);
103 			return 0;
104 		}
105 	}
106 
107 	rcu_read_lock_bh();
108 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 	if (unlikely(!neigh))
111 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 	if (!IS_ERR(neigh)) {
113 		ret = dst_neigh_output(dst, neigh, skb);
114 		rcu_read_unlock_bh();
115 		return ret;
116 	}
117 	rcu_read_unlock_bh();
118 
119 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
120 	kfree_skb(skb);
121 	return -EINVAL;
122 }
123 
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)124 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
125 {
126 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
127 	    dst_allfrag(skb_dst(skb)) ||
128 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
129 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
130 	else
131 		return ip6_finish_output2(net, sk, skb);
132 }
133 
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)134 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
135 {
136 	struct net_device *dev = skb_dst(skb)->dev;
137 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
138 
139 	if (unlikely(idev->cnf.disable_ipv6)) {
140 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
141 		kfree_skb(skb);
142 		return 0;
143 	}
144 
145 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 			    net, sk, skb, NULL, dev,
147 			    ip6_finish_output,
148 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150 
151 /*
152  * xmit an sk_buff (used by TCP, SCTP and DCCP)
153  * Note : socket lock is not held for SYNACK packets, but might be modified
154  * by calls to skb_set_owner_w() and ipv6_local_error(),
155  * which are using proper atomic operations or spinlocks.
156  */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,struct ipv6_txoptions * opt,int tclass)157 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
158 	     struct ipv6_txoptions *opt, int tclass)
159 {
160 	struct net *net = sock_net(sk);
161 	const struct ipv6_pinfo *np = inet6_sk(sk);
162 	struct in6_addr *first_hop = &fl6->daddr;
163 	struct dst_entry *dst = skb_dst(skb);
164 	struct ipv6hdr *hdr;
165 	u8  proto = fl6->flowi6_proto;
166 	int seg_len = skb->len;
167 	int hlimit = -1;
168 	u32 mtu;
169 
170 	if (opt) {
171 		unsigned int head_room;
172 
173 		/* First: exthdrs may take lots of space (~8K for now)
174 		   MAX_HEADER is not enough.
175 		 */
176 		head_room = opt->opt_nflen + opt->opt_flen;
177 		seg_len += head_room;
178 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
179 
180 		if (skb_headroom(skb) < head_room) {
181 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
182 			if (!skb2) {
183 				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
184 					      IPSTATS_MIB_OUTDISCARDS);
185 				kfree_skb(skb);
186 				return -ENOBUFS;
187 			}
188 			consume_skb(skb);
189 			skb = skb2;
190 			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
191 			 * it is safe to call in our context (socket lock not held)
192 			 */
193 			skb_set_owner_w(skb, (struct sock *)sk);
194 		}
195 		if (opt->opt_flen)
196 			ipv6_push_frag_opts(skb, opt, &proto);
197 		if (opt->opt_nflen)
198 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 	}
200 
201 	skb_push(skb, sizeof(struct ipv6hdr));
202 	skb_reset_network_header(skb);
203 	hdr = ipv6_hdr(skb);
204 
205 	/*
206 	 *	Fill in the IPv6 header
207 	 */
208 	if (np)
209 		hlimit = np->hop_limit;
210 	if (hlimit < 0)
211 		hlimit = ip6_dst_hoplimit(dst);
212 
213 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
214 						     np->autoflowlabel, fl6));
215 
216 	hdr->payload_len = htons(seg_len);
217 	hdr->nexthdr = proto;
218 	hdr->hop_limit = hlimit;
219 
220 	hdr->saddr = fl6->saddr;
221 	hdr->daddr = *first_hop;
222 
223 	skb->protocol = htons(ETH_P_IPV6);
224 	skb->priority = sk->sk_priority;
225 	skb->mark = sk->sk_mark;
226 
227 	mtu = dst_mtu(dst);
228 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
229 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
230 			      IPSTATS_MIB_OUT, skb->len);
231 		/* hooks should never assume socket lock is held.
232 		 * we promote our socket to non const
233 		 */
234 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
235 			       net, (struct sock *)sk, skb, NULL, dst->dev,
236 			       dst_output);
237 	}
238 
239 	skb->dev = dst->dev;
240 	/* ipv6_local_error() does not require socket lock,
241 	 * we promote our socket to non const
242 	 */
243 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
244 
245 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
246 	kfree_skb(skb);
247 	return -EMSGSIZE;
248 }
249 EXPORT_SYMBOL(ip6_xmit);
250 
ip6_call_ra_chain(struct sk_buff * skb,int sel)251 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
252 {
253 	struct ip6_ra_chain *ra;
254 	struct sock *last = NULL;
255 
256 	read_lock(&ip6_ra_lock);
257 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
258 		struct sock *sk = ra->sk;
259 		if (sk && ra->sel == sel &&
260 		    (!sk->sk_bound_dev_if ||
261 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
262 			if (last) {
263 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
264 				if (skb2)
265 					rawv6_rcv(last, skb2);
266 			}
267 			last = sk;
268 		}
269 	}
270 
271 	if (last) {
272 		rawv6_rcv(last, skb);
273 		read_unlock(&ip6_ra_lock);
274 		return 1;
275 	}
276 	read_unlock(&ip6_ra_lock);
277 	return 0;
278 }
279 
ip6_forward_proxy_check(struct sk_buff * skb)280 static int ip6_forward_proxy_check(struct sk_buff *skb)
281 {
282 	struct ipv6hdr *hdr = ipv6_hdr(skb);
283 	u8 nexthdr = hdr->nexthdr;
284 	__be16 frag_off;
285 	int offset;
286 
287 	if (ipv6_ext_hdr(nexthdr)) {
288 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
289 		if (offset < 0)
290 			return 0;
291 	} else
292 		offset = sizeof(struct ipv6hdr);
293 
294 	if (nexthdr == IPPROTO_ICMPV6) {
295 		struct icmp6hdr *icmp6;
296 
297 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
298 					 offset + 1 - skb->data)))
299 			return 0;
300 
301 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
302 
303 		switch (icmp6->icmp6_type) {
304 		case NDISC_ROUTER_SOLICITATION:
305 		case NDISC_ROUTER_ADVERTISEMENT:
306 		case NDISC_NEIGHBOUR_SOLICITATION:
307 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
308 		case NDISC_REDIRECT:
309 			/* For reaction involving unicast neighbor discovery
310 			 * message destined to the proxied address, pass it to
311 			 * input function.
312 			 */
313 			return 1;
314 		default:
315 			break;
316 		}
317 	}
318 
319 	/*
320 	 * The proxying router can't forward traffic sent to a link-local
321 	 * address, so signal the sender and discard the packet. This
322 	 * behavior is clarified by the MIPv6 specification.
323 	 */
324 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
325 		dst_link_failure(skb);
326 		return -1;
327 	}
328 
329 	return 0;
330 }
331 
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)332 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 				     struct sk_buff *skb)
334 {
335 	skb_sender_cpu_clear(skb);
336 	return dst_output(net, sk, skb);
337 }
338 
ip6_dst_mtu_forward(const struct dst_entry * dst)339 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
340 {
341 	unsigned int mtu;
342 	struct inet6_dev *idev;
343 
344 	if (dst_metric_locked(dst, RTAX_MTU)) {
345 		mtu = dst_metric_raw(dst, RTAX_MTU);
346 		if (mtu)
347 			return mtu;
348 	}
349 
350 	mtu = IPV6_MIN_MTU;
351 	rcu_read_lock();
352 	idev = __in6_dev_get(dst->dev);
353 	if (idev)
354 		mtu = idev->cnf.mtu6;
355 	rcu_read_unlock();
356 
357 	return mtu;
358 }
359 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)360 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
361 {
362 	if (skb->len <= mtu)
363 		return false;
364 
365 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
366 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
367 		return true;
368 
369 	if (skb->ignore_df)
370 		return false;
371 
372 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
373 		return false;
374 
375 	return true;
376 }
377 
ip6_forward(struct sk_buff * skb)378 int ip6_forward(struct sk_buff *skb)
379 {
380 	struct dst_entry *dst = skb_dst(skb);
381 	struct ipv6hdr *hdr = ipv6_hdr(skb);
382 	struct inet6_skb_parm *opt = IP6CB(skb);
383 	struct net *net = dev_net(dst->dev);
384 	u32 mtu;
385 
386 	if (net->ipv6.devconf_all->forwarding == 0)
387 		goto error;
388 
389 	if (skb->pkt_type != PACKET_HOST)
390 		goto drop;
391 
392 	if (unlikely(skb->sk))
393 		goto drop;
394 
395 	if (skb_warn_if_lro(skb))
396 		goto drop;
397 
398 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
399 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
400 				 IPSTATS_MIB_INDISCARDS);
401 		goto drop;
402 	}
403 
404 	skb_forward_csum(skb);
405 
406 	/*
407 	 *	We DO NOT make any processing on
408 	 *	RA packets, pushing them to user level AS IS
409 	 *	without ane WARRANTY that application will be able
410 	 *	to interpret them. The reason is that we
411 	 *	cannot make anything clever here.
412 	 *
413 	 *	We are not end-node, so that if packet contains
414 	 *	AH/ESP, we cannot make anything.
415 	 *	Defragmentation also would be mistake, RA packets
416 	 *	cannot be fragmented, because there is no warranty
417 	 *	that different fragments will go along one path. --ANK
418 	 */
419 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
420 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
421 			return 0;
422 	}
423 
424 	/*
425 	 *	check and decrement ttl
426 	 */
427 	if (hdr->hop_limit <= 1) {
428 		/* Force OUTPUT device used as source address */
429 		skb->dev = dst->dev;
430 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
431 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
432 				 IPSTATS_MIB_INHDRERRORS);
433 
434 		kfree_skb(skb);
435 		return -ETIMEDOUT;
436 	}
437 
438 	/* XXX: idev->cnf.proxy_ndp? */
439 	if (net->ipv6.devconf_all->proxy_ndp &&
440 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
441 		int proxied = ip6_forward_proxy_check(skb);
442 		if (proxied > 0)
443 			return ip6_input(skb);
444 		else if (proxied < 0) {
445 			IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
446 					 IPSTATS_MIB_INDISCARDS);
447 			goto drop;
448 		}
449 	}
450 
451 	if (!xfrm6_route_forward(skb)) {
452 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
453 				 IPSTATS_MIB_INDISCARDS);
454 		goto drop;
455 	}
456 	dst = skb_dst(skb);
457 
458 	/* IPv6 specs say nothing about it, but it is clear that we cannot
459 	   send redirects to source routed frames.
460 	   We don't send redirects to frames decapsulated from IPsec.
461 	 */
462 	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
463 		struct in6_addr *target = NULL;
464 		struct inet_peer *peer;
465 		struct rt6_info *rt;
466 
467 		/*
468 		 *	incoming and outgoing devices are the same
469 		 *	send a redirect.
470 		 */
471 
472 		rt = (struct rt6_info *) dst;
473 		if (rt->rt6i_flags & RTF_GATEWAY)
474 			target = &rt->rt6i_gateway;
475 		else
476 			target = &hdr->daddr;
477 
478 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
479 
480 		/* Limit redirects both by destination (here)
481 		   and by source (inside ndisc_send_redirect)
482 		 */
483 		if (inet_peer_xrlim_allow(peer, 1*HZ))
484 			ndisc_send_redirect(skb, target);
485 		if (peer)
486 			inet_putpeer(peer);
487 	} else {
488 		int addrtype = ipv6_addr_type(&hdr->saddr);
489 
490 		/* This check is security critical. */
491 		if (addrtype == IPV6_ADDR_ANY ||
492 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493 			goto error;
494 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496 				    ICMPV6_NOT_NEIGHBOUR, 0);
497 			goto error;
498 		}
499 	}
500 
501 	mtu = ip6_dst_mtu_forward(dst);
502 	if (mtu < IPV6_MIN_MTU)
503 		mtu = IPV6_MIN_MTU;
504 
505 	if (ip6_pkt_too_big(skb, mtu)) {
506 		/* Again, force OUTPUT device used as source address */
507 		skb->dev = dst->dev;
508 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
510 				 IPSTATS_MIB_INTOOBIGERRORS);
511 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
512 				 IPSTATS_MIB_FRAGFAILS);
513 		kfree_skb(skb);
514 		return -EMSGSIZE;
515 	}
516 
517 	if (skb_cow(skb, dst->dev->hard_header_len)) {
518 		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
519 				 IPSTATS_MIB_OUTDISCARDS);
520 		goto drop;
521 	}
522 
523 	hdr = ipv6_hdr(skb);
524 
525 	/* Mangling hops number delayed to point after skb COW */
526 
527 	hdr->hop_limit--;
528 
529 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
530 	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
531 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
532 		       net, NULL, skb, skb->dev, dst->dev,
533 		       ip6_forward_finish);
534 
535 error:
536 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
537 drop:
538 	kfree_skb(skb);
539 	return -EINVAL;
540 }
541 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)542 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543 {
544 	to->pkt_type = from->pkt_type;
545 	to->priority = from->priority;
546 	to->protocol = from->protocol;
547 	skb_dst_drop(to);
548 	skb_dst_set(to, dst_clone(skb_dst(from)));
549 	to->dev = from->dev;
550 	to->mark = from->mark;
551 
552 #ifdef CONFIG_NET_SCHED
553 	to->tc_index = from->tc_index;
554 #endif
555 	nf_copy(to, from);
556 	skb_copy_secmark(to, from);
557 }
558 
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))559 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
560 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
561 {
562 	struct sk_buff *frag;
563 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
564 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
565 				inet6_sk(skb->sk) : NULL;
566 	struct ipv6hdr *tmp_hdr;
567 	struct frag_hdr *fh;
568 	unsigned int mtu, hlen, left, len;
569 	int hroom, troom;
570 	__be32 frag_id;
571 	int ptr, offset = 0, err = 0;
572 	u8 *prevhdr, nexthdr = 0;
573 
574 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
575 	nexthdr = *prevhdr;
576 
577 	mtu = ip6_skb_dst_mtu(skb);
578 
579 	/* We must not fragment if the socket is set to force MTU discovery
580 	 * or if the skb it not generated by a local socket.
581 	 */
582 	if (unlikely(!skb->ignore_df && skb->len > mtu))
583 		goto fail_toobig;
584 
585 	if (IP6CB(skb)->frag_max_size) {
586 		if (IP6CB(skb)->frag_max_size > mtu)
587 			goto fail_toobig;
588 
589 		/* don't send fragments larger than what we received */
590 		mtu = IP6CB(skb)->frag_max_size;
591 		if (mtu < IPV6_MIN_MTU)
592 			mtu = IPV6_MIN_MTU;
593 	}
594 
595 	if (np && np->frag_size < mtu) {
596 		if (np->frag_size)
597 			mtu = np->frag_size;
598 	}
599 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
600 		goto fail_toobig;
601 	mtu -= hlen + sizeof(struct frag_hdr);
602 
603 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
604 				    &ipv6_hdr(skb)->saddr);
605 
606 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
607 	    (err = skb_checksum_help(skb)))
608 		goto fail;
609 
610 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
611 	if (skb_has_frag_list(skb)) {
612 		int first_len = skb_pagelen(skb);
613 		struct sk_buff *frag2;
614 
615 		if (first_len - hlen > mtu ||
616 		    ((first_len - hlen) & 7) ||
617 		    skb_cloned(skb) ||
618 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
619 			goto slow_path;
620 
621 		skb_walk_frags(skb, frag) {
622 			/* Correct geometry. */
623 			if (frag->len > mtu ||
624 			    ((frag->len & 7) && frag->next) ||
625 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
626 				goto slow_path_clean;
627 
628 			/* Partially cloned skb? */
629 			if (skb_shared(frag))
630 				goto slow_path_clean;
631 
632 			BUG_ON(frag->sk);
633 			if (skb->sk) {
634 				frag->sk = skb->sk;
635 				frag->destructor = sock_wfree;
636 			}
637 			skb->truesize -= frag->truesize;
638 		}
639 
640 		err = 0;
641 		offset = 0;
642 		/* BUILD HEADER */
643 
644 		*prevhdr = NEXTHDR_FRAGMENT;
645 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
646 		if (!tmp_hdr) {
647 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
648 				      IPSTATS_MIB_FRAGFAILS);
649 			err = -ENOMEM;
650 			goto fail;
651 		}
652 		frag = skb_shinfo(skb)->frag_list;
653 		skb_frag_list_init(skb);
654 
655 		__skb_pull(skb, hlen);
656 		fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
657 		__skb_push(skb, hlen);
658 		skb_reset_network_header(skb);
659 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
660 
661 		fh->nexthdr = nexthdr;
662 		fh->reserved = 0;
663 		fh->frag_off = htons(IP6_MF);
664 		fh->identification = frag_id;
665 
666 		first_len = skb_pagelen(skb);
667 		skb->data_len = first_len - skb_headlen(skb);
668 		skb->len = first_len;
669 		ipv6_hdr(skb)->payload_len = htons(first_len -
670 						   sizeof(struct ipv6hdr));
671 
672 		dst_hold(&rt->dst);
673 
674 		for (;;) {
675 			/* Prepare header of the next frame,
676 			 * before previous one went down. */
677 			if (frag) {
678 				frag->ip_summed = CHECKSUM_NONE;
679 				skb_reset_transport_header(frag);
680 				fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
681 				__skb_push(frag, hlen);
682 				skb_reset_network_header(frag);
683 				memcpy(skb_network_header(frag), tmp_hdr,
684 				       hlen);
685 				offset += skb->len - hlen - sizeof(struct frag_hdr);
686 				fh->nexthdr = nexthdr;
687 				fh->reserved = 0;
688 				fh->frag_off = htons(offset);
689 				if (frag->next)
690 					fh->frag_off |= htons(IP6_MF);
691 				fh->identification = frag_id;
692 				ipv6_hdr(frag)->payload_len =
693 						htons(frag->len -
694 						      sizeof(struct ipv6hdr));
695 				ip6_copy_metadata(frag, skb);
696 			}
697 
698 			err = output(net, sk, skb);
699 			if (!err)
700 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
701 					      IPSTATS_MIB_FRAGCREATES);
702 
703 			if (err || !frag)
704 				break;
705 
706 			skb = frag;
707 			frag = skb->next;
708 			skb->next = NULL;
709 		}
710 
711 		kfree(tmp_hdr);
712 
713 		if (err == 0) {
714 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
715 				      IPSTATS_MIB_FRAGOKS);
716 			ip6_rt_put(rt);
717 			return 0;
718 		}
719 
720 		kfree_skb_list(frag);
721 
722 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
723 			      IPSTATS_MIB_FRAGFAILS);
724 		ip6_rt_put(rt);
725 		return err;
726 
727 slow_path_clean:
728 		skb_walk_frags(skb, frag2) {
729 			if (frag2 == frag)
730 				break;
731 			frag2->sk = NULL;
732 			frag2->destructor = NULL;
733 			skb->truesize += frag2->truesize;
734 		}
735 	}
736 
737 slow_path:
738 	left = skb->len - hlen;		/* Space per frame */
739 	ptr = hlen;			/* Where to start from */
740 
741 	/*
742 	 *	Fragment the datagram.
743 	 */
744 
745 	*prevhdr = NEXTHDR_FRAGMENT;
746 	troom = rt->dst.dev->needed_tailroom;
747 
748 	/*
749 	 *	Keep copying data until we run out.
750 	 */
751 	while (left > 0)	{
752 		len = left;
753 		/* IF: it doesn't fit, use 'mtu' - the data space left */
754 		if (len > mtu)
755 			len = mtu;
756 		/* IF: we are not sending up to and including the packet end
757 		   then align the next start on an eight byte boundary */
758 		if (len < left)	{
759 			len &= ~7;
760 		}
761 
762 		/* Allocate buffer */
763 		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
764 				 hroom + troom, GFP_ATOMIC);
765 		if (!frag) {
766 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
767 				      IPSTATS_MIB_FRAGFAILS);
768 			err = -ENOMEM;
769 			goto fail;
770 		}
771 
772 		/*
773 		 *	Set up data on packet
774 		 */
775 
776 		ip6_copy_metadata(frag, skb);
777 		skb_reserve(frag, hroom);
778 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
779 		skb_reset_network_header(frag);
780 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
781 		frag->transport_header = (frag->network_header + hlen +
782 					  sizeof(struct frag_hdr));
783 
784 		/*
785 		 *	Charge the memory for the fragment to any owner
786 		 *	it might possess
787 		 */
788 		if (skb->sk)
789 			skb_set_owner_w(frag, skb->sk);
790 
791 		/*
792 		 *	Copy the packet header into the new buffer.
793 		 */
794 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
795 
796 		/*
797 		 *	Build fragment header.
798 		 */
799 		fh->nexthdr = nexthdr;
800 		fh->reserved = 0;
801 		fh->identification = frag_id;
802 
803 		/*
804 		 *	Copy a block of the IP datagram.
805 		 */
806 		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
807 				     len));
808 		left -= len;
809 
810 		fh->frag_off = htons(offset);
811 		if (left > 0)
812 			fh->frag_off |= htons(IP6_MF);
813 		ipv6_hdr(frag)->payload_len = htons(frag->len -
814 						    sizeof(struct ipv6hdr));
815 
816 		ptr += len;
817 		offset += len;
818 
819 		/*
820 		 *	Put this fragment into the sending queue.
821 		 */
822 		err = output(net, sk, frag);
823 		if (err)
824 			goto fail;
825 
826 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
827 			      IPSTATS_MIB_FRAGCREATES);
828 	}
829 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
830 		      IPSTATS_MIB_FRAGOKS);
831 	consume_skb(skb);
832 	return err;
833 
834 fail_toobig:
835 	if (skb->sk && dst_allfrag(skb_dst(skb)))
836 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
837 
838 	skb->dev = skb_dst(skb)->dev;
839 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
840 	err = -EMSGSIZE;
841 
842 fail:
843 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
844 		      IPSTATS_MIB_FRAGFAILS);
845 	kfree_skb(skb);
846 	return err;
847 }
848 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)849 static inline int ip6_rt_check(const struct rt6key *rt_key,
850 			       const struct in6_addr *fl_addr,
851 			       const struct in6_addr *addr_cache)
852 {
853 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
854 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
855 }
856 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)857 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
858 					  struct dst_entry *dst,
859 					  const struct flowi6 *fl6)
860 {
861 	struct ipv6_pinfo *np = inet6_sk(sk);
862 	struct rt6_info *rt;
863 
864 	if (!dst)
865 		goto out;
866 
867 	if (dst->ops->family != AF_INET6) {
868 		dst_release(dst);
869 		return NULL;
870 	}
871 
872 	rt = (struct rt6_info *)dst;
873 	/* Yes, checking route validity in not connected
874 	 * case is not very simple. Take into account,
875 	 * that we do not support routing by source, TOS,
876 	 * and MSG_DONTROUTE		--ANK (980726)
877 	 *
878 	 * 1. ip6_rt_check(): If route was host route,
879 	 *    check that cached destination is current.
880 	 *    If it is network route, we still may
881 	 *    check its validity using saved pointer
882 	 *    to the last used address: daddr_cache.
883 	 *    We do not want to save whole address now,
884 	 *    (because main consumer of this service
885 	 *    is tcp, which has not this problem),
886 	 *    so that the last trick works only on connected
887 	 *    sockets.
888 	 * 2. oif also should be the same.
889 	 */
890 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
891 #ifdef CONFIG_IPV6_SUBTREES
892 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
893 #endif
894 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
895 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
896 		dst_release(dst);
897 		dst = NULL;
898 	}
899 
900 out:
901 	return dst;
902 }
903 
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)904 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
905 			       struct dst_entry **dst, struct flowi6 *fl6)
906 {
907 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
908 	struct neighbour *n;
909 	struct rt6_info *rt;
910 #endif
911 	int err;
912 	int flags = 0;
913 
914 	/* The correct way to handle this would be to do
915 	 * ip6_route_get_saddr, and then ip6_route_output; however,
916 	 * the route-specific preferred source forces the
917 	 * ip6_route_output call _before_ ip6_route_get_saddr.
918 	 *
919 	 * In source specific routing (no src=any default route),
920 	 * ip6_route_output will fail given src=any saddr, though, so
921 	 * that's why we try it again later.
922 	 */
923 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
924 		struct rt6_info *rt;
925 		bool had_dst = *dst != NULL;
926 
927 		if (!had_dst)
928 			*dst = ip6_route_output(net, sk, fl6);
929 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
930 		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
931 					  sk ? inet6_sk(sk)->srcprefs : 0,
932 					  &fl6->saddr);
933 		if (err)
934 			goto out_err_release;
935 
936 		/* If we had an erroneous initial result, pretend it
937 		 * never existed and let the SA-enabled version take
938 		 * over.
939 		 */
940 		if (!had_dst && (*dst)->error) {
941 			dst_release(*dst);
942 			*dst = NULL;
943 		}
944 
945 		if (fl6->flowi6_oif)
946 			flags |= RT6_LOOKUP_F_IFACE;
947 	}
948 
949 	if (!*dst)
950 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
951 
952 	err = (*dst)->error;
953 	if (err)
954 		goto out_err_release;
955 
956 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
957 	/*
958 	 * Here if the dst entry we've looked up
959 	 * has a neighbour entry that is in the INCOMPLETE
960 	 * state and the src address from the flow is
961 	 * marked as OPTIMISTIC, we release the found
962 	 * dst entry and replace it instead with the
963 	 * dst entry of the nexthop router
964 	 */
965 	rt = (struct rt6_info *) *dst;
966 	rcu_read_lock_bh();
967 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
968 				      rt6_nexthop(rt, &fl6->daddr));
969 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
970 	rcu_read_unlock_bh();
971 
972 	if (err) {
973 		struct inet6_ifaddr *ifp;
974 		struct flowi6 fl_gw6;
975 		int redirect;
976 
977 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
978 				      (*dst)->dev, 1);
979 
980 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
981 		if (ifp)
982 			in6_ifa_put(ifp);
983 
984 		if (redirect) {
985 			/*
986 			 * We need to get the dst entry for the
987 			 * default router instead
988 			 */
989 			dst_release(*dst);
990 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
991 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
992 			*dst = ip6_route_output(net, sk, &fl_gw6);
993 			err = (*dst)->error;
994 			if (err)
995 				goto out_err_release;
996 		}
997 	}
998 #endif
999 
1000 	return 0;
1001 
1002 out_err_release:
1003 	if (err == -ENETUNREACH)
1004 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1005 	dst_release(*dst);
1006 	*dst = NULL;
1007 	return err;
1008 }
1009 
1010 /**
1011  *	ip6_dst_lookup - perform route lookup on flow
1012  *	@sk: socket which provides route info
1013  *	@dst: pointer to dst_entry * for result
1014  *	@fl6: flow to lookup
1015  *
1016  *	This function performs a route lookup on the given flow.
1017  *
1018  *	It returns zero on success, or a standard errno code on error.
1019  */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1020 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1021 		   struct flowi6 *fl6)
1022 {
1023 	*dst = NULL;
1024 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1025 }
1026 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1027 
1028 /**
1029  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1030  *	@sk: socket which provides route info
1031  *	@fl6: flow to lookup
1032  *	@final_dst: final destination address for ipsec lookup
1033  *
1034  *	This function performs a route lookup on the given flow.
1035  *
1036  *	It returns a valid dst pointer on success, or a pointer encoded
1037  *	error code.
1038  */
ip6_dst_lookup_flow(const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1039 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1040 				      const struct in6_addr *final_dst)
1041 {
1042 	struct dst_entry *dst = NULL;
1043 	int err;
1044 
1045 	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1046 	if (err)
1047 		return ERR_PTR(err);
1048 	if (final_dst)
1049 		fl6->daddr = *final_dst;
1050 	if (!fl6->flowi6_oif)
1051 		fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
1052 
1053 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1054 }
1055 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1056 
1057 /**
1058  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1059  *	@sk: socket which provides the dst cache and route info
1060  *	@fl6: flow to lookup
1061  *	@final_dst: final destination address for ipsec lookup
1062  *
1063  *	This function performs a route lookup on the given flow with the
1064  *	possibility of using the cached route in the socket if it is valid.
1065  *	It will take the socket dst lock when operating on the dst cache.
1066  *	As a result, this function can only be used in process context.
1067  *
1068  *	It returns a valid dst pointer on success, or a pointer encoded
1069  *	error code.
1070  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1071 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1072 					 const struct in6_addr *final_dst)
1073 {
1074 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1075 
1076 	dst = ip6_sk_dst_check(sk, dst, fl6);
1077 	if (!dst)
1078 		dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1079 
1080 	return dst;
1081 }
1082 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1083 
ip6_ufo_append_data(struct sock * sk,struct sk_buff_head * queue,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int hh_len,int fragheaderlen,int exthdrlen,int transhdrlen,int mtu,unsigned int flags,const struct flowi6 * fl6)1084 static inline int ip6_ufo_append_data(struct sock *sk,
1085 			struct sk_buff_head *queue,
1086 			int getfrag(void *from, char *to, int offset, int len,
1087 			int odd, struct sk_buff *skb),
1088 			void *from, int length, int hh_len, int fragheaderlen,
1089 			int exthdrlen, int transhdrlen, int mtu,
1090 			unsigned int flags, const struct flowi6 *fl6)
1091 
1092 {
1093 	struct sk_buff *skb;
1094 	int err;
1095 
1096 	/* There is support for UDP large send offload by network
1097 	 * device, so create one single skb packet containing complete
1098 	 * udp datagram
1099 	 */
1100 	skb = skb_peek_tail(queue);
1101 	if (!skb) {
1102 		skb = sock_alloc_send_skb(sk,
1103 			hh_len + fragheaderlen + transhdrlen + 20,
1104 			(flags & MSG_DONTWAIT), &err);
1105 		if (!skb)
1106 			return err;
1107 
1108 		/* reserve space for Hardware header */
1109 		skb_reserve(skb, hh_len);
1110 
1111 		/* create space for UDP/IP header */
1112 		skb_put(skb, fragheaderlen + transhdrlen);
1113 
1114 		/* initialize network header pointer */
1115 		skb_set_network_header(skb, exthdrlen);
1116 
1117 		/* initialize protocol header pointer */
1118 		skb->transport_header = skb->network_header + fragheaderlen;
1119 
1120 		skb->protocol = htons(ETH_P_IPV6);
1121 		skb->csum = 0;
1122 
1123 		__skb_queue_tail(queue, skb);
1124 	} else if (skb_is_gso(skb)) {
1125 		goto append;
1126 	}
1127 
1128 	skb->ip_summed = CHECKSUM_PARTIAL;
1129 	/* Specify the length of each IPv6 datagram fragment.
1130 	 * It has to be a multiple of 8.
1131 	 */
1132 	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1133 				     sizeof(struct frag_hdr)) & ~7;
1134 	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1135 	skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1136 							 &fl6->daddr,
1137 							 &fl6->saddr);
1138 
1139 append:
1140 	return skb_append_datato_frags(sk, skb, getfrag, from,
1141 				       (length - transhdrlen));
1142 }
1143 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1144 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1145 					       gfp_t gfp)
1146 {
1147 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1148 }
1149 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1150 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1151 						gfp_t gfp)
1152 {
1153 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1154 }
1155 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1156 static void ip6_append_data_mtu(unsigned int *mtu,
1157 				int *maxfraglen,
1158 				unsigned int fragheaderlen,
1159 				struct sk_buff *skb,
1160 				struct rt6_info *rt,
1161 				unsigned int orig_mtu)
1162 {
1163 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1164 		if (!skb) {
1165 			/* first fragment, reserve header_len */
1166 			*mtu = orig_mtu - rt->dst.header_len;
1167 
1168 		} else {
1169 			/*
1170 			 * this fragment is not first, the headers
1171 			 * space is regarded as data space.
1172 			 */
1173 			*mtu = orig_mtu;
1174 		}
1175 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1176 			      + fragheaderlen - sizeof(struct frag_hdr);
1177 	}
1178 }
1179 
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,int hlimit,int tclass,struct ipv6_txoptions * opt,struct rt6_info * rt,struct flowi6 * fl6)1180 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1181 			  struct inet6_cork *v6_cork,
1182 			  int hlimit, int tclass, struct ipv6_txoptions *opt,
1183 			  struct rt6_info *rt, struct flowi6 *fl6)
1184 {
1185 	struct ipv6_pinfo *np = inet6_sk(sk);
1186 	unsigned int mtu;
1187 
1188 	/*
1189 	 * setup for corking
1190 	 */
1191 	if (opt) {
1192 		if (WARN_ON(v6_cork->opt))
1193 			return -EINVAL;
1194 
1195 		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1196 		if (unlikely(!v6_cork->opt))
1197 			return -ENOBUFS;
1198 
1199 		v6_cork->opt->tot_len = opt->tot_len;
1200 		v6_cork->opt->opt_flen = opt->opt_flen;
1201 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1202 
1203 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1204 						    sk->sk_allocation);
1205 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1206 			return -ENOBUFS;
1207 
1208 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1209 						    sk->sk_allocation);
1210 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1211 			return -ENOBUFS;
1212 
1213 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1214 						   sk->sk_allocation);
1215 		if (opt->hopopt && !v6_cork->opt->hopopt)
1216 			return -ENOBUFS;
1217 
1218 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1219 						    sk->sk_allocation);
1220 		if (opt->srcrt && !v6_cork->opt->srcrt)
1221 			return -ENOBUFS;
1222 
1223 		/* need source address above miyazawa*/
1224 	}
1225 	dst_hold(&rt->dst);
1226 	cork->base.dst = &rt->dst;
1227 	cork->fl.u.ip6 = *fl6;
1228 	v6_cork->hop_limit = hlimit;
1229 	v6_cork->tclass = tclass;
1230 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1231 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1232 		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
1233 	else
1234 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1235 		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1236 	if (np->frag_size < mtu) {
1237 		if (np->frag_size)
1238 			mtu = np->frag_size;
1239 	}
1240 	cork->base.fragsize = mtu;
1241 	if (dst_allfrag(rt->dst.path))
1242 		cork->base.flags |= IPCORK_ALLFRAG;
1243 	cork->base.length = 0;
1244 
1245 	return 0;
1246 }
1247 
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,int dontfrag)1248 static int __ip6_append_data(struct sock *sk,
1249 			     struct flowi6 *fl6,
1250 			     struct sk_buff_head *queue,
1251 			     struct inet_cork *cork,
1252 			     struct inet6_cork *v6_cork,
1253 			     struct page_frag *pfrag,
1254 			     int getfrag(void *from, char *to, int offset,
1255 					 int len, int odd, struct sk_buff *skb),
1256 			     void *from, int length, int transhdrlen,
1257 			     unsigned int flags, int dontfrag)
1258 {
1259 	struct sk_buff *skb, *skb_prev = NULL;
1260 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1261 	int exthdrlen = 0;
1262 	int dst_exthdrlen = 0;
1263 	int hh_len;
1264 	int copy;
1265 	int err;
1266 	int offset = 0;
1267 	__u8 tx_flags = 0;
1268 	u32 tskey = 0;
1269 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1270 	struct ipv6_txoptions *opt = v6_cork->opt;
1271 	int csummode = CHECKSUM_NONE;
1272 	unsigned int maxnonfragsize, headersize;
1273 
1274 	skb = skb_peek_tail(queue);
1275 	if (!skb) {
1276 		exthdrlen = opt ? opt->opt_flen : 0;
1277 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1278 	}
1279 
1280 	mtu = cork->fragsize;
1281 	orig_mtu = mtu;
1282 
1283 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1284 
1285 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1286 			(opt ? opt->opt_nflen : 0);
1287 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1288 		     sizeof(struct frag_hdr);
1289 
1290 	headersize = sizeof(struct ipv6hdr) +
1291 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1292 		     (dst_allfrag(&rt->dst) ?
1293 		      sizeof(struct frag_hdr) : 0) +
1294 		     rt->rt6i_nfheader_len;
1295 
1296 	if (cork->length + length > mtu - headersize && dontfrag &&
1297 	    (sk->sk_protocol == IPPROTO_UDP ||
1298 	     sk->sk_protocol == IPPROTO_RAW)) {
1299 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1300 				sizeof(struct ipv6hdr));
1301 		goto emsgsize;
1302 	}
1303 
1304 	if (ip6_sk_ignore_df(sk))
1305 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1306 	else
1307 		maxnonfragsize = mtu;
1308 
1309 	if (cork->length + length > maxnonfragsize - headersize) {
1310 emsgsize:
1311 		ipv6_local_error(sk, EMSGSIZE, fl6,
1312 				 mtu - headersize +
1313 				 sizeof(struct ipv6hdr));
1314 		return -EMSGSIZE;
1315 	}
1316 
1317 	/* CHECKSUM_PARTIAL only with no extension headers and when
1318 	 * we are not going to fragment
1319 	 */
1320 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1321 	    headersize == sizeof(struct ipv6hdr) &&
1322 	    length < mtu - headersize &&
1323 	    !(flags & MSG_MORE) &&
1324 	    rt->dst.dev->features & NETIF_F_V6_CSUM)
1325 		csummode = CHECKSUM_PARTIAL;
1326 
1327 	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1328 		sock_tx_timestamp(sk, &tx_flags);
1329 		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1330 		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1331 			tskey = sk->sk_tskey++;
1332 	}
1333 
1334 	/*
1335 	 * Let's try using as much space as possible.
1336 	 * Use MTU if total length of the message fits into the MTU.
1337 	 * Otherwise, we need to reserve fragment header and
1338 	 * fragment alignment (= 8-15 octects, in total).
1339 	 *
1340 	 * Note that we may need to "move" the data from the tail of
1341 	 * of the buffer to the new fragment when we split
1342 	 * the message.
1343 	 *
1344 	 * FIXME: It may be fragmented into multiple chunks
1345 	 *        at once if non-fragmentable extension headers
1346 	 *        are too large.
1347 	 * --yoshfuji
1348 	 */
1349 
1350 	cork->length += length;
1351 	if (((length > mtu) ||
1352 	     (skb && skb_is_gso(skb))) &&
1353 	    (sk->sk_protocol == IPPROTO_UDP) &&
1354 	    (rt->dst.dev->features & NETIF_F_UFO) &&
1355 	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
1356 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1357 					  hh_len, fragheaderlen, exthdrlen,
1358 					  transhdrlen, mtu, flags, fl6);
1359 		if (err)
1360 			goto error;
1361 		return 0;
1362 	}
1363 
1364 	if (!skb)
1365 		goto alloc_new_skb;
1366 
1367 	while (length > 0) {
1368 		/* Check if the remaining data fits into current packet. */
1369 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1370 		if (copy < length)
1371 			copy = maxfraglen - skb->len;
1372 
1373 		if (copy <= 0) {
1374 			char *data;
1375 			unsigned int datalen;
1376 			unsigned int fraglen;
1377 			unsigned int fraggap;
1378 			unsigned int alloclen;
1379 alloc_new_skb:
1380 			/* There's no room in the current skb */
1381 			if (skb)
1382 				fraggap = skb->len - maxfraglen;
1383 			else
1384 				fraggap = 0;
1385 			/* update mtu and maxfraglen if necessary */
1386 			if (!skb || !skb_prev)
1387 				ip6_append_data_mtu(&mtu, &maxfraglen,
1388 						    fragheaderlen, skb, rt,
1389 						    orig_mtu);
1390 
1391 			skb_prev = skb;
1392 
1393 			/*
1394 			 * If remaining data exceeds the mtu,
1395 			 * we know we need more fragment(s).
1396 			 */
1397 			datalen = length + fraggap;
1398 
1399 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1400 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1401 			if ((flags & MSG_MORE) &&
1402 			    !(rt->dst.dev->features&NETIF_F_SG))
1403 				alloclen = mtu;
1404 			else
1405 				alloclen = datalen + fragheaderlen;
1406 
1407 			alloclen += dst_exthdrlen;
1408 
1409 			if (datalen != length + fraggap) {
1410 				/*
1411 				 * this is not the last fragment, the trailer
1412 				 * space is regarded as data space.
1413 				 */
1414 				datalen += rt->dst.trailer_len;
1415 			}
1416 
1417 			alloclen += rt->dst.trailer_len;
1418 			fraglen = datalen + fragheaderlen;
1419 
1420 			/*
1421 			 * We just reserve space for fragment header.
1422 			 * Note: this may be overallocation if the message
1423 			 * (without MSG_MORE) fits into the MTU.
1424 			 */
1425 			alloclen += sizeof(struct frag_hdr);
1426 
1427 			if (transhdrlen) {
1428 				skb = sock_alloc_send_skb(sk,
1429 						alloclen + hh_len,
1430 						(flags & MSG_DONTWAIT), &err);
1431 			} else {
1432 				skb = NULL;
1433 				if (atomic_read(&sk->sk_wmem_alloc) <=
1434 				    2 * sk->sk_sndbuf)
1435 					skb = sock_wmalloc(sk,
1436 							   alloclen + hh_len, 1,
1437 							   sk->sk_allocation);
1438 				if (unlikely(!skb))
1439 					err = -ENOBUFS;
1440 			}
1441 			if (!skb)
1442 				goto error;
1443 			/*
1444 			 *	Fill in the control structures
1445 			 */
1446 			skb->protocol = htons(ETH_P_IPV6);
1447 			skb->ip_summed = csummode;
1448 			skb->csum = 0;
1449 			/* reserve for fragmentation and ipsec header */
1450 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1451 				    dst_exthdrlen);
1452 
1453 			/* Only the initial fragment is time stamped */
1454 			skb_shinfo(skb)->tx_flags = tx_flags;
1455 			tx_flags = 0;
1456 			skb_shinfo(skb)->tskey = tskey;
1457 			tskey = 0;
1458 
1459 			/*
1460 			 *	Find where to start putting bytes
1461 			 */
1462 			data = skb_put(skb, fraglen);
1463 			skb_set_network_header(skb, exthdrlen);
1464 			data += fragheaderlen;
1465 			skb->transport_header = (skb->network_header +
1466 						 fragheaderlen);
1467 			if (fraggap) {
1468 				skb->csum = skb_copy_and_csum_bits(
1469 					skb_prev, maxfraglen,
1470 					data + transhdrlen, fraggap, 0);
1471 				skb_prev->csum = csum_sub(skb_prev->csum,
1472 							  skb->csum);
1473 				data += fraggap;
1474 				pskb_trim_unique(skb_prev, maxfraglen);
1475 			}
1476 			copy = datalen - transhdrlen - fraggap;
1477 
1478 			if (copy < 0) {
1479 				err = -EINVAL;
1480 				kfree_skb(skb);
1481 				goto error;
1482 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1483 				err = -EFAULT;
1484 				kfree_skb(skb);
1485 				goto error;
1486 			}
1487 
1488 			offset += copy;
1489 			length -= datalen - fraggap;
1490 			transhdrlen = 0;
1491 			exthdrlen = 0;
1492 			dst_exthdrlen = 0;
1493 
1494 			/*
1495 			 * Put the packet on the pending queue
1496 			 */
1497 			__skb_queue_tail(queue, skb);
1498 			continue;
1499 		}
1500 
1501 		if (copy > length)
1502 			copy = length;
1503 
1504 		if (!(rt->dst.dev->features&NETIF_F_SG)) {
1505 			unsigned int off;
1506 
1507 			off = skb->len;
1508 			if (getfrag(from, skb_put(skb, copy),
1509 						offset, copy, off, skb) < 0) {
1510 				__skb_trim(skb, off);
1511 				err = -EFAULT;
1512 				goto error;
1513 			}
1514 		} else {
1515 			int i = skb_shinfo(skb)->nr_frags;
1516 
1517 			err = -ENOMEM;
1518 			if (!sk_page_frag_refill(sk, pfrag))
1519 				goto error;
1520 
1521 			if (!skb_can_coalesce(skb, i, pfrag->page,
1522 					      pfrag->offset)) {
1523 				err = -EMSGSIZE;
1524 				if (i == MAX_SKB_FRAGS)
1525 					goto error;
1526 
1527 				__skb_fill_page_desc(skb, i, pfrag->page,
1528 						     pfrag->offset, 0);
1529 				skb_shinfo(skb)->nr_frags = ++i;
1530 				get_page(pfrag->page);
1531 			}
1532 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1533 			if (getfrag(from,
1534 				    page_address(pfrag->page) + pfrag->offset,
1535 				    offset, copy, skb->len, skb) < 0)
1536 				goto error_efault;
1537 
1538 			pfrag->offset += copy;
1539 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1540 			skb->len += copy;
1541 			skb->data_len += copy;
1542 			skb->truesize += copy;
1543 			atomic_add(copy, &sk->sk_wmem_alloc);
1544 		}
1545 		offset += copy;
1546 		length -= copy;
1547 	}
1548 
1549 	return 0;
1550 
1551 error_efault:
1552 	err = -EFAULT;
1553 error:
1554 	cork->length -= length;
1555 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1556 	return err;
1557 }
1558 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,int hlimit,int tclass,struct ipv6_txoptions * opt,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,int dontfrag)1559 int ip6_append_data(struct sock *sk,
1560 		    int getfrag(void *from, char *to, int offset, int len,
1561 				int odd, struct sk_buff *skb),
1562 		    void *from, int length, int transhdrlen, int hlimit,
1563 		    int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1564 		    struct rt6_info *rt, unsigned int flags, int dontfrag)
1565 {
1566 	struct inet_sock *inet = inet_sk(sk);
1567 	struct ipv6_pinfo *np = inet6_sk(sk);
1568 	int exthdrlen;
1569 	int err;
1570 
1571 	if (flags&MSG_PROBE)
1572 		return 0;
1573 	if (skb_queue_empty(&sk->sk_write_queue)) {
1574 		/*
1575 		 * setup for corking
1576 		 */
1577 		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1578 				     tclass, opt, rt, fl6);
1579 		if (err)
1580 			return err;
1581 
1582 		exthdrlen = (opt ? opt->opt_flen : 0);
1583 		length += exthdrlen;
1584 		transhdrlen += exthdrlen;
1585 	} else {
1586 		fl6 = &inet->cork.fl.u.ip6;
1587 		transhdrlen = 0;
1588 	}
1589 
1590 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1591 				 &np->cork, sk_page_frag(sk), getfrag,
1592 				 from, length, transhdrlen, flags, dontfrag);
1593 }
1594 EXPORT_SYMBOL_GPL(ip6_append_data);
1595 
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1596 static void ip6_cork_release(struct inet_cork_full *cork,
1597 			     struct inet6_cork *v6_cork)
1598 {
1599 	if (v6_cork->opt) {
1600 		kfree(v6_cork->opt->dst0opt);
1601 		kfree(v6_cork->opt->dst1opt);
1602 		kfree(v6_cork->opt->hopopt);
1603 		kfree(v6_cork->opt->srcrt);
1604 		kfree(v6_cork->opt);
1605 		v6_cork->opt = NULL;
1606 	}
1607 
1608 	if (cork->base.dst) {
1609 		dst_release(cork->base.dst);
1610 		cork->base.dst = NULL;
1611 		cork->base.flags &= ~IPCORK_ALLFRAG;
1612 	}
1613 	memset(&cork->fl, 0, sizeof(cork->fl));
1614 }
1615 
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1616 struct sk_buff *__ip6_make_skb(struct sock *sk,
1617 			       struct sk_buff_head *queue,
1618 			       struct inet_cork_full *cork,
1619 			       struct inet6_cork *v6_cork)
1620 {
1621 	struct sk_buff *skb, *tmp_skb;
1622 	struct sk_buff **tail_skb;
1623 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1624 	struct ipv6_pinfo *np = inet6_sk(sk);
1625 	struct net *net = sock_net(sk);
1626 	struct ipv6hdr *hdr;
1627 	struct ipv6_txoptions *opt = v6_cork->opt;
1628 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1629 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1630 	unsigned char proto = fl6->flowi6_proto;
1631 
1632 	skb = __skb_dequeue(queue);
1633 	if (!skb)
1634 		goto out;
1635 	tail_skb = &(skb_shinfo(skb)->frag_list);
1636 
1637 	/* move skb->data to ip header from ext header */
1638 	if (skb->data < skb_network_header(skb))
1639 		__skb_pull(skb, skb_network_offset(skb));
1640 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1641 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1642 		*tail_skb = tmp_skb;
1643 		tail_skb = &(tmp_skb->next);
1644 		skb->len += tmp_skb->len;
1645 		skb->data_len += tmp_skb->len;
1646 		skb->truesize += tmp_skb->truesize;
1647 		tmp_skb->destructor = NULL;
1648 		tmp_skb->sk = NULL;
1649 	}
1650 
1651 	/* Allow local fragmentation. */
1652 	skb->ignore_df = ip6_sk_ignore_df(sk);
1653 
1654 	*final_dst = fl6->daddr;
1655 	__skb_pull(skb, skb_network_header_len(skb));
1656 	if (opt && opt->opt_flen)
1657 		ipv6_push_frag_opts(skb, opt, &proto);
1658 	if (opt && opt->opt_nflen)
1659 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1660 
1661 	skb_push(skb, sizeof(struct ipv6hdr));
1662 	skb_reset_network_header(skb);
1663 	hdr = ipv6_hdr(skb);
1664 
1665 	ip6_flow_hdr(hdr, v6_cork->tclass,
1666 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1667 					np->autoflowlabel, fl6));
1668 	hdr->hop_limit = v6_cork->hop_limit;
1669 	hdr->nexthdr = proto;
1670 	hdr->saddr = fl6->saddr;
1671 	hdr->daddr = *final_dst;
1672 
1673 	skb->priority = sk->sk_priority;
1674 	skb->mark = sk->sk_mark;
1675 
1676 	skb_dst_set(skb, dst_clone(&rt->dst));
1677 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1678 	if (proto == IPPROTO_ICMPV6) {
1679 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1680 
1681 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1682 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1683 	}
1684 
1685 	ip6_cork_release(cork, v6_cork);
1686 out:
1687 	return skb;
1688 }
1689 
ip6_send_skb(struct sk_buff * skb)1690 int ip6_send_skb(struct sk_buff *skb)
1691 {
1692 	struct net *net = sock_net(skb->sk);
1693 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1694 	int err;
1695 
1696 	err = ip6_local_out(net, skb->sk, skb);
1697 	if (err) {
1698 		if (err > 0)
1699 			err = net_xmit_errno(err);
1700 		if (err)
1701 			IP6_INC_STATS(net, rt->rt6i_idev,
1702 				      IPSTATS_MIB_OUTDISCARDS);
1703 	}
1704 
1705 	return err;
1706 }
1707 
ip6_push_pending_frames(struct sock * sk)1708 int ip6_push_pending_frames(struct sock *sk)
1709 {
1710 	struct sk_buff *skb;
1711 
1712 	skb = ip6_finish_skb(sk);
1713 	if (!skb)
1714 		return 0;
1715 
1716 	return ip6_send_skb(skb);
1717 }
1718 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1719 
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1720 static void __ip6_flush_pending_frames(struct sock *sk,
1721 				       struct sk_buff_head *queue,
1722 				       struct inet_cork_full *cork,
1723 				       struct inet6_cork *v6_cork)
1724 {
1725 	struct sk_buff *skb;
1726 
1727 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1728 		if (skb_dst(skb))
1729 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1730 				      IPSTATS_MIB_OUTDISCARDS);
1731 		kfree_skb(skb);
1732 	}
1733 
1734 	ip6_cork_release(cork, v6_cork);
1735 }
1736 
ip6_flush_pending_frames(struct sock * sk)1737 void ip6_flush_pending_frames(struct sock *sk)
1738 {
1739 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1740 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1741 }
1742 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1743 
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,int hlimit,int tclass,struct ipv6_txoptions * opt,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,int dontfrag)1744 struct sk_buff *ip6_make_skb(struct sock *sk,
1745 			     int getfrag(void *from, char *to, int offset,
1746 					 int len, int odd, struct sk_buff *skb),
1747 			     void *from, int length, int transhdrlen,
1748 			     int hlimit, int tclass,
1749 			     struct ipv6_txoptions *opt, struct flowi6 *fl6,
1750 			     struct rt6_info *rt, unsigned int flags,
1751 			     int dontfrag)
1752 {
1753 	struct inet_cork_full cork;
1754 	struct inet6_cork v6_cork;
1755 	struct sk_buff_head queue;
1756 	int exthdrlen = (opt ? opt->opt_flen : 0);
1757 	int err;
1758 
1759 	if (flags & MSG_PROBE)
1760 		return NULL;
1761 
1762 	__skb_queue_head_init(&queue);
1763 
1764 	cork.base.flags = 0;
1765 	cork.base.addr = 0;
1766 	cork.base.opt = NULL;
1767 	v6_cork.opt = NULL;
1768 	err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1769 	if (err)
1770 		return ERR_PTR(err);
1771 
1772 	if (dontfrag < 0)
1773 		dontfrag = inet6_sk(sk)->dontfrag;
1774 
1775 	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1776 				&current->task_frag, getfrag, from,
1777 				length + exthdrlen, transhdrlen + exthdrlen,
1778 				flags, dontfrag);
1779 	if (err) {
1780 		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1781 		return ERR_PTR(err);
1782 	}
1783 
1784 	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1785 }
1786