1/*
2 * xfrm_policy.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	Kazunori MIYAZAWA @USAGI
10 * 	YOSHIFUJI Hideaki
11 * 		Split up af-specific portion
12 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13 *
14 */
15
16#include <linux/err.h>
17#include <linux/slab.h>
18#include <linux/kmod.h>
19#include <linux/list.h>
20#include <linux/spinlock.h>
21#include <linux/workqueue.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
29#include <net/flow.h>
30#include <net/xfrm.h>
31#include <net/ip.h>
32#ifdef CONFIG_XFRM_STATISTICS
33#include <net/snmp.h>
34#endif
35
36#include "xfrm_hash.h"
37
38#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
39#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40#define XFRM_MAX_QUEUE_LEN	100
41
42struct xfrm_flo {
43	struct dst_entry *dst_orig;
44	u8 flags;
45};
46
47static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
48static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
49						__read_mostly;
50
51static struct kmem_cache *xfrm_dst_cache __read_mostly;
52
53static void xfrm_init_pmtu(struct dst_entry *dst);
54static int stale_bundle(struct dst_entry *dst);
55static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56static void xfrm_policy_queue_process(unsigned long arg);
57
58static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
59static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
60						int dir);
61
62static inline bool
63__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64{
65	const struct flowi4 *fl4 = &fl->u.ip4;
66
67	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
68		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
70		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
71		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
72		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73}
74
75static inline bool
76__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77{
78	const struct flowi6 *fl6 = &fl->u.ip6;
79
80	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
81		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
82		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
83		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
84		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
85		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86}
87
88bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
89			 unsigned short family)
90{
91	switch (family) {
92	case AF_INET:
93		return __xfrm4_selector_match(sel, fl);
94	case AF_INET6:
95		return __xfrm6_selector_match(sel, fl);
96	}
97	return false;
98}
99
100static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
101{
102	struct xfrm_policy_afinfo *afinfo;
103
104	if (unlikely(family >= NPROTO))
105		return NULL;
106	rcu_read_lock();
107	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
108	if (unlikely(!afinfo))
109		rcu_read_unlock();
110	return afinfo;
111}
112
113static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
114{
115	rcu_read_unlock();
116}
117
118static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
119						  int tos, int oif,
120						  const xfrm_address_t *saddr,
121						  const xfrm_address_t *daddr,
122						  int family)
123{
124	struct xfrm_policy_afinfo *afinfo;
125	struct dst_entry *dst;
126
127	afinfo = xfrm_policy_get_afinfo(family);
128	if (unlikely(afinfo == NULL))
129		return ERR_PTR(-EAFNOSUPPORT);
130
131	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
132
133	xfrm_policy_put_afinfo(afinfo);
134
135	return dst;
136}
137
138static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
139						int tos, int oif,
140						xfrm_address_t *prev_saddr,
141						xfrm_address_t *prev_daddr,
142						int family)
143{
144	struct net *net = xs_net(x);
145	xfrm_address_t *saddr = &x->props.saddr;
146	xfrm_address_t *daddr = &x->id.daddr;
147	struct dst_entry *dst;
148
149	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
150		saddr = x->coaddr;
151		daddr = prev_daddr;
152	}
153	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
154		saddr = prev_saddr;
155		daddr = x->coaddr;
156	}
157
158	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
159
160	if (!IS_ERR(dst)) {
161		if (prev_saddr != saddr)
162			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
163		if (prev_daddr != daddr)
164			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
165	}
166
167	return dst;
168}
169
170static inline unsigned long make_jiffies(long secs)
171{
172	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
173		return MAX_SCHEDULE_TIMEOUT-1;
174	else
175		return secs*HZ;
176}
177
178static void xfrm_policy_timer(unsigned long data)
179{
180	struct xfrm_policy *xp = (struct xfrm_policy *)data;
181	unsigned long now = get_seconds();
182	long next = LONG_MAX;
183	int warn = 0;
184	int dir;
185
186	read_lock(&xp->lock);
187
188	if (unlikely(xp->walk.dead))
189		goto out;
190
191	dir = xfrm_policy_id2dir(xp->index);
192
193	if (xp->lft.hard_add_expires_seconds) {
194		long tmo = xp->lft.hard_add_expires_seconds +
195			xp->curlft.add_time - now;
196		if (tmo <= 0)
197			goto expired;
198		if (tmo < next)
199			next = tmo;
200	}
201	if (xp->lft.hard_use_expires_seconds) {
202		long tmo = xp->lft.hard_use_expires_seconds +
203			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
204		if (tmo <= 0)
205			goto expired;
206		if (tmo < next)
207			next = tmo;
208	}
209	if (xp->lft.soft_add_expires_seconds) {
210		long tmo = xp->lft.soft_add_expires_seconds +
211			xp->curlft.add_time - now;
212		if (tmo <= 0) {
213			warn = 1;
214			tmo = XFRM_KM_TIMEOUT;
215		}
216		if (tmo < next)
217			next = tmo;
218	}
219	if (xp->lft.soft_use_expires_seconds) {
220		long tmo = xp->lft.soft_use_expires_seconds +
221			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
222		if (tmo <= 0) {
223			warn = 1;
224			tmo = XFRM_KM_TIMEOUT;
225		}
226		if (tmo < next)
227			next = tmo;
228	}
229
230	if (warn)
231		km_policy_expired(xp, dir, 0, 0);
232	if (next != LONG_MAX &&
233	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
234		xfrm_pol_hold(xp);
235
236out:
237	read_unlock(&xp->lock);
238	xfrm_pol_put(xp);
239	return;
240
241expired:
242	read_unlock(&xp->lock);
243	if (!xfrm_policy_delete(xp, dir))
244		km_policy_expired(xp, dir, 1, 0);
245	xfrm_pol_put(xp);
246}
247
248static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
249{
250	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
251
252	if (unlikely(pol->walk.dead))
253		flo = NULL;
254	else
255		xfrm_pol_hold(pol);
256
257	return flo;
258}
259
260static int xfrm_policy_flo_check(struct flow_cache_object *flo)
261{
262	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
263
264	return !pol->walk.dead;
265}
266
267static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
268{
269	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
270}
271
272static const struct flow_cache_ops xfrm_policy_fc_ops = {
273	.get = xfrm_policy_flo_get,
274	.check = xfrm_policy_flo_check,
275	.delete = xfrm_policy_flo_delete,
276};
277
278/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
279 * SPD calls.
280 */
281
282struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
283{
284	struct xfrm_policy *policy;
285
286	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
287
288	if (policy) {
289		write_pnet(&policy->xp_net, net);
290		INIT_LIST_HEAD(&policy->walk.all);
291		INIT_HLIST_NODE(&policy->bydst);
292		INIT_HLIST_NODE(&policy->byidx);
293		rwlock_init(&policy->lock);
294		atomic_set(&policy->refcnt, 1);
295		skb_queue_head_init(&policy->polq.hold_queue);
296		setup_timer(&policy->timer, xfrm_policy_timer,
297				(unsigned long)policy);
298		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
299			    (unsigned long)policy);
300		policy->flo.ops = &xfrm_policy_fc_ops;
301	}
302	return policy;
303}
304EXPORT_SYMBOL(xfrm_policy_alloc);
305
306static void xfrm_policy_destroy_rcu(struct rcu_head *head)
307{
308	struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
309
310	security_xfrm_policy_free(policy->security);
311	kfree(policy);
312}
313
314/* Destroy xfrm_policy: descendant resources must be released to this moment. */
315
316void xfrm_policy_destroy(struct xfrm_policy *policy)
317{
318	BUG_ON(!policy->walk.dead);
319
320	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
321		BUG();
322
323	call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
324}
325EXPORT_SYMBOL(xfrm_policy_destroy);
326
327/* Rule must be locked. Release descentant resources, announce
328 * entry dead. The rule must be unlinked from lists to the moment.
329 */
330
331static void xfrm_policy_kill(struct xfrm_policy *policy)
332{
333	policy->walk.dead = 1;
334
335	atomic_inc(&policy->genid);
336
337	if (del_timer(&policy->polq.hold_timer))
338		xfrm_pol_put(policy);
339	skb_queue_purge(&policy->polq.hold_queue);
340
341	if (del_timer(&policy->timer))
342		xfrm_pol_put(policy);
343
344	xfrm_pol_put(policy);
345}
346
347static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
348
349static inline unsigned int idx_hash(struct net *net, u32 index)
350{
351	return __idx_hash(index, net->xfrm.policy_idx_hmask);
352}
353
354/* calculate policy hash thresholds */
355static void __get_hash_thresh(struct net *net,
356			      unsigned short family, int dir,
357			      u8 *dbits, u8 *sbits)
358{
359	switch (family) {
360	case AF_INET:
361		*dbits = net->xfrm.policy_bydst[dir].dbits4;
362		*sbits = net->xfrm.policy_bydst[dir].sbits4;
363		break;
364
365	case AF_INET6:
366		*dbits = net->xfrm.policy_bydst[dir].dbits6;
367		*sbits = net->xfrm.policy_bydst[dir].sbits6;
368		break;
369
370	default:
371		*dbits = 0;
372		*sbits = 0;
373	}
374}
375
376static struct hlist_head *policy_hash_bysel(struct net *net,
377					    const struct xfrm_selector *sel,
378					    unsigned short family, int dir)
379{
380	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
381	unsigned int hash;
382	u8 dbits;
383	u8 sbits;
384
385	__get_hash_thresh(net, family, dir, &dbits, &sbits);
386	hash = __sel_hash(sel, family, hmask, dbits, sbits);
387
388	return (hash == hmask + 1 ?
389		&net->xfrm.policy_inexact[dir] :
390		net->xfrm.policy_bydst[dir].table + hash);
391}
392
393static struct hlist_head *policy_hash_direct(struct net *net,
394					     const xfrm_address_t *daddr,
395					     const xfrm_address_t *saddr,
396					     unsigned short family, int dir)
397{
398	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
399	unsigned int hash;
400	u8 dbits;
401	u8 sbits;
402
403	__get_hash_thresh(net, family, dir, &dbits, &sbits);
404	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
405
406	return net->xfrm.policy_bydst[dir].table + hash;
407}
408
409static void xfrm_dst_hash_transfer(struct net *net,
410				   struct hlist_head *list,
411				   struct hlist_head *ndsttable,
412				   unsigned int nhashmask,
413				   int dir)
414{
415	struct hlist_node *tmp, *entry0 = NULL;
416	struct xfrm_policy *pol;
417	unsigned int h0 = 0;
418	u8 dbits;
419	u8 sbits;
420
421redo:
422	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
423		unsigned int h;
424
425		__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
426		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
427				pol->family, nhashmask, dbits, sbits);
428		if (!entry0) {
429			hlist_del(&pol->bydst);
430			hlist_add_head(&pol->bydst, ndsttable+h);
431			h0 = h;
432		} else {
433			if (h != h0)
434				continue;
435			hlist_del(&pol->bydst);
436			hlist_add_behind(&pol->bydst, entry0);
437		}
438		entry0 = &pol->bydst;
439	}
440	if (!hlist_empty(list)) {
441		entry0 = NULL;
442		goto redo;
443	}
444}
445
446static void xfrm_idx_hash_transfer(struct hlist_head *list,
447				   struct hlist_head *nidxtable,
448				   unsigned int nhashmask)
449{
450	struct hlist_node *tmp;
451	struct xfrm_policy *pol;
452
453	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
454		unsigned int h;
455
456		h = __idx_hash(pol->index, nhashmask);
457		hlist_add_head(&pol->byidx, nidxtable+h);
458	}
459}
460
461static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
462{
463	return ((old_hmask + 1) << 1) - 1;
464}
465
466static void xfrm_bydst_resize(struct net *net, int dir)
467{
468	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
469	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
470	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
471	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
472	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
473	int i;
474
475	if (!ndst)
476		return;
477
478	write_lock_bh(&net->xfrm.xfrm_policy_lock);
479
480	for (i = hmask; i >= 0; i--)
481		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
482
483	net->xfrm.policy_bydst[dir].table = ndst;
484	net->xfrm.policy_bydst[dir].hmask = nhashmask;
485
486	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
487
488	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
489}
490
491static void xfrm_byidx_resize(struct net *net, int total)
492{
493	unsigned int hmask = net->xfrm.policy_idx_hmask;
494	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
495	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
496	struct hlist_head *oidx = net->xfrm.policy_byidx;
497	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
498	int i;
499
500	if (!nidx)
501		return;
502
503	write_lock_bh(&net->xfrm.xfrm_policy_lock);
504
505	for (i = hmask; i >= 0; i--)
506		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
507
508	net->xfrm.policy_byidx = nidx;
509	net->xfrm.policy_idx_hmask = nhashmask;
510
511	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
512
513	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
514}
515
516static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
517{
518	unsigned int cnt = net->xfrm.policy_count[dir];
519	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
520
521	if (total)
522		*total += cnt;
523
524	if ((hmask + 1) < xfrm_policy_hashmax &&
525	    cnt > hmask)
526		return 1;
527
528	return 0;
529}
530
531static inline int xfrm_byidx_should_resize(struct net *net, int total)
532{
533	unsigned int hmask = net->xfrm.policy_idx_hmask;
534
535	if ((hmask + 1) < xfrm_policy_hashmax &&
536	    total > hmask)
537		return 1;
538
539	return 0;
540}
541
542void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
543{
544	read_lock_bh(&net->xfrm.xfrm_policy_lock);
545	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
546	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
547	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
548	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
549	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
550	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
551	si->spdhcnt = net->xfrm.policy_idx_hmask;
552	si->spdhmcnt = xfrm_policy_hashmax;
553	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
554}
555EXPORT_SYMBOL(xfrm_spd_getinfo);
556
557static DEFINE_MUTEX(hash_resize_mutex);
558static void xfrm_hash_resize(struct work_struct *work)
559{
560	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
561	int dir, total;
562
563	mutex_lock(&hash_resize_mutex);
564
565	total = 0;
566	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
567		if (xfrm_bydst_should_resize(net, dir, &total))
568			xfrm_bydst_resize(net, dir);
569	}
570	if (xfrm_byidx_should_resize(net, total))
571		xfrm_byidx_resize(net, total);
572
573	mutex_unlock(&hash_resize_mutex);
574}
575
576static void xfrm_hash_rebuild(struct work_struct *work)
577{
578	struct net *net = container_of(work, struct net,
579				       xfrm.policy_hthresh.work);
580	unsigned int hmask;
581	struct xfrm_policy *pol;
582	struct xfrm_policy *policy;
583	struct hlist_head *chain;
584	struct hlist_head *odst;
585	struct hlist_node *newpos;
586	int i;
587	int dir;
588	unsigned seq;
589	u8 lbits4, rbits4, lbits6, rbits6;
590
591	mutex_lock(&hash_resize_mutex);
592
593	/* read selector prefixlen thresholds */
594	do {
595		seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
596
597		lbits4 = net->xfrm.policy_hthresh.lbits4;
598		rbits4 = net->xfrm.policy_hthresh.rbits4;
599		lbits6 = net->xfrm.policy_hthresh.lbits6;
600		rbits6 = net->xfrm.policy_hthresh.rbits6;
601	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
602
603	write_lock_bh(&net->xfrm.xfrm_policy_lock);
604
605	/* reset the bydst and inexact table in all directions */
606	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
607		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
608		hmask = net->xfrm.policy_bydst[dir].hmask;
609		odst = net->xfrm.policy_bydst[dir].table;
610		for (i = hmask; i >= 0; i--)
611			INIT_HLIST_HEAD(odst + i);
612		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
613			/* dir out => dst = remote, src = local */
614			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
615			net->xfrm.policy_bydst[dir].sbits4 = lbits4;
616			net->xfrm.policy_bydst[dir].dbits6 = rbits6;
617			net->xfrm.policy_bydst[dir].sbits6 = lbits6;
618		} else {
619			/* dir in/fwd => dst = local, src = remote */
620			net->xfrm.policy_bydst[dir].dbits4 = lbits4;
621			net->xfrm.policy_bydst[dir].sbits4 = rbits4;
622			net->xfrm.policy_bydst[dir].dbits6 = lbits6;
623			net->xfrm.policy_bydst[dir].sbits6 = rbits6;
624		}
625	}
626
627	/* re-insert all policies by order of creation */
628	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
629		newpos = NULL;
630		chain = policy_hash_bysel(net, &policy->selector,
631					  policy->family,
632					  xfrm_policy_id2dir(policy->index));
633		hlist_for_each_entry(pol, chain, bydst) {
634			if (policy->priority >= pol->priority)
635				newpos = &pol->bydst;
636			else
637				break;
638		}
639		if (newpos)
640			hlist_add_behind(&policy->bydst, newpos);
641		else
642			hlist_add_head(&policy->bydst, chain);
643	}
644
645	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
646
647	mutex_unlock(&hash_resize_mutex);
648}
649
650void xfrm_policy_hash_rebuild(struct net *net)
651{
652	schedule_work(&net->xfrm.policy_hthresh.work);
653}
654EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
655
656/* Generate new index... KAME seems to generate them ordered by cost
657 * of an absolute inpredictability of ordering of rules. This will not pass. */
658static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
659{
660	static u32 idx_generator;
661
662	for (;;) {
663		struct hlist_head *list;
664		struct xfrm_policy *p;
665		u32 idx;
666		int found;
667
668		if (!index) {
669			idx = (idx_generator | dir);
670			idx_generator += 8;
671		} else {
672			idx = index;
673			index = 0;
674		}
675
676		if (idx == 0)
677			idx = 8;
678		list = net->xfrm.policy_byidx + idx_hash(net, idx);
679		found = 0;
680		hlist_for_each_entry(p, list, byidx) {
681			if (p->index == idx) {
682				found = 1;
683				break;
684			}
685		}
686		if (!found)
687			return idx;
688	}
689}
690
691static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
692{
693	u32 *p1 = (u32 *) s1;
694	u32 *p2 = (u32 *) s2;
695	int len = sizeof(struct xfrm_selector) / sizeof(u32);
696	int i;
697
698	for (i = 0; i < len; i++) {
699		if (p1[i] != p2[i])
700			return 1;
701	}
702
703	return 0;
704}
705
706static void xfrm_policy_requeue(struct xfrm_policy *old,
707				struct xfrm_policy *new)
708{
709	struct xfrm_policy_queue *pq = &old->polq;
710	struct sk_buff_head list;
711
712	if (skb_queue_empty(&pq->hold_queue))
713		return;
714
715	__skb_queue_head_init(&list);
716
717	spin_lock_bh(&pq->hold_queue.lock);
718	skb_queue_splice_init(&pq->hold_queue, &list);
719	if (del_timer(&pq->hold_timer))
720		xfrm_pol_put(old);
721	spin_unlock_bh(&pq->hold_queue.lock);
722
723	pq = &new->polq;
724
725	spin_lock_bh(&pq->hold_queue.lock);
726	skb_queue_splice(&list, &pq->hold_queue);
727	pq->timeout = XFRM_QUEUE_TMO_MIN;
728	if (!mod_timer(&pq->hold_timer, jiffies))
729		xfrm_pol_hold(new);
730	spin_unlock_bh(&pq->hold_queue.lock);
731}
732
733static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
734				   struct xfrm_policy *pol)
735{
736	u32 mark = policy->mark.v & policy->mark.m;
737
738	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
739		return true;
740
741	if ((mark & pol->mark.m) == pol->mark.v &&
742	    policy->priority == pol->priority)
743		return true;
744
745	return false;
746}
747
748int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
749{
750	struct net *net = xp_net(policy);
751	struct xfrm_policy *pol;
752	struct xfrm_policy *delpol;
753	struct hlist_head *chain;
754	struct hlist_node *newpos;
755
756	write_lock_bh(&net->xfrm.xfrm_policy_lock);
757	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
758	delpol = NULL;
759	newpos = NULL;
760	hlist_for_each_entry(pol, chain, bydst) {
761		if (pol->type == policy->type &&
762		    !selector_cmp(&pol->selector, &policy->selector) &&
763		    xfrm_policy_mark_match(policy, pol) &&
764		    xfrm_sec_ctx_match(pol->security, policy->security) &&
765		    !WARN_ON(delpol)) {
766			if (excl) {
767				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
768				return -EEXIST;
769			}
770			delpol = pol;
771			if (policy->priority > pol->priority)
772				continue;
773		} else if (policy->priority >= pol->priority) {
774			newpos = &pol->bydst;
775			continue;
776		}
777		if (delpol)
778			break;
779	}
780	if (newpos)
781		hlist_add_behind(&policy->bydst, newpos);
782	else
783		hlist_add_head(&policy->bydst, chain);
784	__xfrm_policy_link(policy, dir);
785	atomic_inc(&net->xfrm.flow_cache_genid);
786
787	/* After previous checking, family can either be AF_INET or AF_INET6 */
788	if (policy->family == AF_INET)
789		rt_genid_bump_ipv4(net);
790	else
791		rt_genid_bump_ipv6(net);
792
793	if (delpol) {
794		xfrm_policy_requeue(delpol, policy);
795		__xfrm_policy_unlink(delpol, dir);
796	}
797	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
798	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
799	policy->curlft.add_time = get_seconds();
800	policy->curlft.use_time = 0;
801	if (!mod_timer(&policy->timer, jiffies + HZ))
802		xfrm_pol_hold(policy);
803	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
804
805	if (delpol)
806		xfrm_policy_kill(delpol);
807	else if (xfrm_bydst_should_resize(net, dir, NULL))
808		schedule_work(&net->xfrm.policy_hash_work);
809
810	return 0;
811}
812EXPORT_SYMBOL(xfrm_policy_insert);
813
814struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
815					  int dir, struct xfrm_selector *sel,
816					  struct xfrm_sec_ctx *ctx, int delete,
817					  int *err)
818{
819	struct xfrm_policy *pol, *ret;
820	struct hlist_head *chain;
821
822	*err = 0;
823	write_lock_bh(&net->xfrm.xfrm_policy_lock);
824	chain = policy_hash_bysel(net, sel, sel->family, dir);
825	ret = NULL;
826	hlist_for_each_entry(pol, chain, bydst) {
827		if (pol->type == type &&
828		    (mark & pol->mark.m) == pol->mark.v &&
829		    !selector_cmp(sel, &pol->selector) &&
830		    xfrm_sec_ctx_match(ctx, pol->security)) {
831			xfrm_pol_hold(pol);
832			if (delete) {
833				*err = security_xfrm_policy_delete(
834								pol->security);
835				if (*err) {
836					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
837					return pol;
838				}
839				__xfrm_policy_unlink(pol, dir);
840			}
841			ret = pol;
842			break;
843		}
844	}
845	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
846
847	if (ret && delete)
848		xfrm_policy_kill(ret);
849	return ret;
850}
851EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
852
853struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
854				     int dir, u32 id, int delete, int *err)
855{
856	struct xfrm_policy *pol, *ret;
857	struct hlist_head *chain;
858
859	*err = -ENOENT;
860	if (xfrm_policy_id2dir(id) != dir)
861		return NULL;
862
863	*err = 0;
864	write_lock_bh(&net->xfrm.xfrm_policy_lock);
865	chain = net->xfrm.policy_byidx + idx_hash(net, id);
866	ret = NULL;
867	hlist_for_each_entry(pol, chain, byidx) {
868		if (pol->type == type && pol->index == id &&
869		    (mark & pol->mark.m) == pol->mark.v) {
870			xfrm_pol_hold(pol);
871			if (delete) {
872				*err = security_xfrm_policy_delete(
873								pol->security);
874				if (*err) {
875					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
876					return pol;
877				}
878				__xfrm_policy_unlink(pol, dir);
879			}
880			ret = pol;
881			break;
882		}
883	}
884	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
885
886	if (ret && delete)
887		xfrm_policy_kill(ret);
888	return ret;
889}
890EXPORT_SYMBOL(xfrm_policy_byid);
891
892#ifdef CONFIG_SECURITY_NETWORK_XFRM
893static inline int
894xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
895{
896	int dir, err = 0;
897
898	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
899		struct xfrm_policy *pol;
900		int i;
901
902		hlist_for_each_entry(pol,
903				     &net->xfrm.policy_inexact[dir], bydst) {
904			if (pol->type != type)
905				continue;
906			err = security_xfrm_policy_delete(pol->security);
907			if (err) {
908				xfrm_audit_policy_delete(pol, 0, task_valid);
909				return err;
910			}
911		}
912		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
913			hlist_for_each_entry(pol,
914					     net->xfrm.policy_bydst[dir].table + i,
915					     bydst) {
916				if (pol->type != type)
917					continue;
918				err = security_xfrm_policy_delete(
919								pol->security);
920				if (err) {
921					xfrm_audit_policy_delete(pol, 0,
922								 task_valid);
923					return err;
924				}
925			}
926		}
927	}
928	return err;
929}
930#else
931static inline int
932xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
933{
934	return 0;
935}
936#endif
937
938int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
939{
940	int dir, err = 0, cnt = 0;
941
942	write_lock_bh(&net->xfrm.xfrm_policy_lock);
943
944	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
945	if (err)
946		goto out;
947
948	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
949		struct xfrm_policy *pol;
950		int i;
951
952	again1:
953		hlist_for_each_entry(pol,
954				     &net->xfrm.policy_inexact[dir], bydst) {
955			if (pol->type != type)
956				continue;
957			__xfrm_policy_unlink(pol, dir);
958			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
959			cnt++;
960
961			xfrm_audit_policy_delete(pol, 1, task_valid);
962
963			xfrm_policy_kill(pol);
964
965			write_lock_bh(&net->xfrm.xfrm_policy_lock);
966			goto again1;
967		}
968
969		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
970	again2:
971			hlist_for_each_entry(pol,
972					     net->xfrm.policy_bydst[dir].table + i,
973					     bydst) {
974				if (pol->type != type)
975					continue;
976				__xfrm_policy_unlink(pol, dir);
977				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
978				cnt++;
979
980				xfrm_audit_policy_delete(pol, 1, task_valid);
981				xfrm_policy_kill(pol);
982
983				write_lock_bh(&net->xfrm.xfrm_policy_lock);
984				goto again2;
985			}
986		}
987
988	}
989	if (!cnt)
990		err = -ESRCH;
991out:
992	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
993	return err;
994}
995EXPORT_SYMBOL(xfrm_policy_flush);
996
997int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
998		     int (*func)(struct xfrm_policy *, int, int, void*),
999		     void *data)
1000{
1001	struct xfrm_policy *pol;
1002	struct xfrm_policy_walk_entry *x;
1003	int error = 0;
1004
1005	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
1006	    walk->type != XFRM_POLICY_TYPE_ANY)
1007		return -EINVAL;
1008
1009	if (list_empty(&walk->walk.all) && walk->seq != 0)
1010		return 0;
1011
1012	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1013	if (list_empty(&walk->walk.all))
1014		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1015	else
1016		x = list_first_entry(&walk->walk.all,
1017				     struct xfrm_policy_walk_entry, all);
1018
1019	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1020		if (x->dead)
1021			continue;
1022		pol = container_of(x, struct xfrm_policy, walk);
1023		if (walk->type != XFRM_POLICY_TYPE_ANY &&
1024		    walk->type != pol->type)
1025			continue;
1026		error = func(pol, xfrm_policy_id2dir(pol->index),
1027			     walk->seq, data);
1028		if (error) {
1029			list_move_tail(&walk->walk.all, &x->all);
1030			goto out;
1031		}
1032		walk->seq++;
1033	}
1034	if (walk->seq == 0) {
1035		error = -ENOENT;
1036		goto out;
1037	}
1038	list_del_init(&walk->walk.all);
1039out:
1040	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1041	return error;
1042}
1043EXPORT_SYMBOL(xfrm_policy_walk);
1044
1045void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
1046{
1047	INIT_LIST_HEAD(&walk->walk.all);
1048	walk->walk.dead = 1;
1049	walk->type = type;
1050	walk->seq = 0;
1051}
1052EXPORT_SYMBOL(xfrm_policy_walk_init);
1053
1054void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1055{
1056	if (list_empty(&walk->walk.all))
1057		return;
1058
1059	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
1060	list_del(&walk->walk.all);
1061	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1062}
1063EXPORT_SYMBOL(xfrm_policy_walk_done);
1064
1065/*
1066 * Find policy to apply to this flow.
1067 *
1068 * Returns 0 if policy found, else an -errno.
1069 */
1070static int xfrm_policy_match(const struct xfrm_policy *pol,
1071			     const struct flowi *fl,
1072			     u8 type, u16 family, int dir)
1073{
1074	const struct xfrm_selector *sel = &pol->selector;
1075	int ret = -ESRCH;
1076	bool match;
1077
1078	if (pol->family != family ||
1079	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1080	    pol->type != type)
1081		return ret;
1082
1083	match = xfrm_selector_match(sel, fl, family);
1084	if (match)
1085		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1086						  dir);
1087
1088	return ret;
1089}
1090
1091static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1092						     const struct flowi *fl,
1093						     u16 family, u8 dir)
1094{
1095	int err;
1096	struct xfrm_policy *pol, *ret;
1097	const xfrm_address_t *daddr, *saddr;
1098	struct hlist_head *chain;
1099	u32 priority = ~0U;
1100
1101	daddr = xfrm_flowi_daddr(fl, family);
1102	saddr = xfrm_flowi_saddr(fl, family);
1103	if (unlikely(!daddr || !saddr))
1104		return NULL;
1105
1106	read_lock_bh(&net->xfrm.xfrm_policy_lock);
1107	chain = policy_hash_direct(net, daddr, saddr, family, dir);
1108	ret = NULL;
1109	hlist_for_each_entry(pol, chain, bydst) {
1110		err = xfrm_policy_match(pol, fl, type, family, dir);
1111		if (err) {
1112			if (err == -ESRCH)
1113				continue;
1114			else {
1115				ret = ERR_PTR(err);
1116				goto fail;
1117			}
1118		} else {
1119			ret = pol;
1120			priority = ret->priority;
1121			break;
1122		}
1123	}
1124	chain = &net->xfrm.policy_inexact[dir];
1125	hlist_for_each_entry(pol, chain, bydst) {
1126		if ((pol->priority >= priority) && ret)
1127			break;
1128
1129		err = xfrm_policy_match(pol, fl, type, family, dir);
1130		if (err) {
1131			if (err == -ESRCH)
1132				continue;
1133			else {
1134				ret = ERR_PTR(err);
1135				goto fail;
1136			}
1137		} else {
1138			ret = pol;
1139			break;
1140		}
1141	}
1142
1143	xfrm_pol_hold(ret);
1144fail:
1145	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1146
1147	return ret;
1148}
1149
1150static struct xfrm_policy *
1151__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1152{
1153#ifdef CONFIG_XFRM_SUB_POLICY
1154	struct xfrm_policy *pol;
1155
1156	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1157	if (pol != NULL)
1158		return pol;
1159#endif
1160	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1161}
1162
1163static int flow_to_policy_dir(int dir)
1164{
1165	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1166	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1167	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1168		return dir;
1169
1170	switch (dir) {
1171	default:
1172	case FLOW_DIR_IN:
1173		return XFRM_POLICY_IN;
1174	case FLOW_DIR_OUT:
1175		return XFRM_POLICY_OUT;
1176	case FLOW_DIR_FWD:
1177		return XFRM_POLICY_FWD;
1178	}
1179}
1180
1181static struct flow_cache_object *
1182xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1183		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1184{
1185	struct xfrm_policy *pol;
1186
1187	if (old_obj)
1188		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1189
1190	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1191	if (IS_ERR_OR_NULL(pol))
1192		return ERR_CAST(pol);
1193
1194	/* Resolver returns two references:
1195	 * one for cache and one for caller of flow_cache_lookup() */
1196	xfrm_pol_hold(pol);
1197
1198	return &pol->flo;
1199}
1200
1201static inline int policy_to_flow_dir(int dir)
1202{
1203	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1204	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1205	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1206		return dir;
1207	switch (dir) {
1208	default:
1209	case XFRM_POLICY_IN:
1210		return FLOW_DIR_IN;
1211	case XFRM_POLICY_OUT:
1212		return FLOW_DIR_OUT;
1213	case XFRM_POLICY_FWD:
1214		return FLOW_DIR_FWD;
1215	}
1216}
1217
1218static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1219						 const struct flowi *fl)
1220{
1221	struct xfrm_policy *pol;
1222	struct net *net = sock_net(sk);
1223
1224	rcu_read_lock();
1225	read_lock_bh(&net->xfrm.xfrm_policy_lock);
1226	pol = rcu_dereference(sk->sk_policy[dir]);
1227	if (pol != NULL) {
1228		bool match = xfrm_selector_match(&pol->selector, fl,
1229						 sk->sk_family);
1230		int err = 0;
1231
1232		if (match) {
1233			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1234				pol = NULL;
1235				goto out;
1236			}
1237			err = security_xfrm_policy_lookup(pol->security,
1238						      fl->flowi_secid,
1239						      policy_to_flow_dir(dir));
1240			if (!err)
1241				xfrm_pol_hold(pol);
1242			else if (err == -ESRCH)
1243				pol = NULL;
1244			else
1245				pol = ERR_PTR(err);
1246		} else
1247			pol = NULL;
1248	}
1249out:
1250	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1251	rcu_read_unlock();
1252	return pol;
1253}
1254
1255static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1256{
1257	struct net *net = xp_net(pol);
1258
1259	list_add(&pol->walk.all, &net->xfrm.policy_all);
1260	net->xfrm.policy_count[dir]++;
1261	xfrm_pol_hold(pol);
1262}
1263
1264static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1265						int dir)
1266{
1267	struct net *net = xp_net(pol);
1268
1269	if (list_empty(&pol->walk.all))
1270		return NULL;
1271
1272	/* Socket policies are not hashed. */
1273	if (!hlist_unhashed(&pol->bydst)) {
1274		hlist_del(&pol->bydst);
1275		hlist_del(&pol->byidx);
1276	}
1277
1278	list_del_init(&pol->walk.all);
1279	net->xfrm.policy_count[dir]--;
1280
1281	return pol;
1282}
1283
1284static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
1285{
1286	__xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
1287}
1288
1289static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
1290{
1291	__xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
1292}
1293
1294int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1295{
1296	struct net *net = xp_net(pol);
1297
1298	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1299	pol = __xfrm_policy_unlink(pol, dir);
1300	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1301	if (pol) {
1302		xfrm_policy_kill(pol);
1303		return 0;
1304	}
1305	return -ENOENT;
1306}
1307EXPORT_SYMBOL(xfrm_policy_delete);
1308
1309int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1310{
1311	struct net *net = xp_net(pol);
1312	struct xfrm_policy *old_pol;
1313
1314#ifdef CONFIG_XFRM_SUB_POLICY
1315	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1316		return -EINVAL;
1317#endif
1318
1319	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1320	old_pol = rcu_dereference_protected(sk->sk_policy[dir],
1321				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
1322	if (pol) {
1323		pol->curlft.add_time = get_seconds();
1324		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
1325		xfrm_sk_policy_link(pol, dir);
1326	}
1327	rcu_assign_pointer(sk->sk_policy[dir], pol);
1328	if (old_pol) {
1329		if (pol)
1330			xfrm_policy_requeue(old_pol, pol);
1331
1332		/* Unlinking succeeds always. This is the only function
1333		 * allowed to delete or replace socket policy.
1334		 */
1335		xfrm_sk_policy_unlink(old_pol, dir);
1336	}
1337	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1338
1339	if (old_pol) {
1340		xfrm_policy_kill(old_pol);
1341	}
1342	return 0;
1343}
1344
1345static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1346{
1347	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1348	struct net *net = xp_net(old);
1349
1350	if (newp) {
1351		newp->selector = old->selector;
1352		if (security_xfrm_policy_clone(old->security,
1353					       &newp->security)) {
1354			kfree(newp);
1355			return NULL;  /* ENOMEM */
1356		}
1357		newp->lft = old->lft;
1358		newp->curlft = old->curlft;
1359		newp->mark = old->mark;
1360		newp->action = old->action;
1361		newp->flags = old->flags;
1362		newp->xfrm_nr = old->xfrm_nr;
1363		newp->index = old->index;
1364		newp->type = old->type;
1365		memcpy(newp->xfrm_vec, old->xfrm_vec,
1366		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1367		write_lock_bh(&net->xfrm.xfrm_policy_lock);
1368		xfrm_sk_policy_link(newp, dir);
1369		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1370		xfrm_pol_put(newp);
1371	}
1372	return newp;
1373}
1374
1375int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
1376{
1377	const struct xfrm_policy *p;
1378	struct xfrm_policy *np;
1379	int i, ret = 0;
1380
1381	rcu_read_lock();
1382	for (i = 0; i < 2; i++) {
1383		p = rcu_dereference(osk->sk_policy[i]);
1384		if (p) {
1385			np = clone_policy(p, i);
1386			if (unlikely(!np)) {
1387				ret = -ENOMEM;
1388				break;
1389			}
1390			rcu_assign_pointer(sk->sk_policy[i], np);
1391		}
1392	}
1393	rcu_read_unlock();
1394	return ret;
1395}
1396
1397static int
1398xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
1399	       xfrm_address_t *remote, unsigned short family)
1400{
1401	int err;
1402	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1403
1404	if (unlikely(afinfo == NULL))
1405		return -EINVAL;
1406	err = afinfo->get_saddr(net, oif, local, remote);
1407	xfrm_policy_put_afinfo(afinfo);
1408	return err;
1409}
1410
1411/* Resolve list of templates for the flow, given policy. */
1412
1413static int
1414xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1415		      struct xfrm_state **xfrm, unsigned short family)
1416{
1417	struct net *net = xp_net(policy);
1418	int nx;
1419	int i, error;
1420	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1421	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1422	xfrm_address_t tmp;
1423
1424	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1425		struct xfrm_state *x;
1426		xfrm_address_t *remote = daddr;
1427		xfrm_address_t *local  = saddr;
1428		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1429
1430		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1431		    tmpl->mode == XFRM_MODE_BEET) {
1432			remote = &tmpl->id.daddr;
1433			local = &tmpl->saddr;
1434			if (xfrm_addr_any(local, tmpl->encap_family)) {
1435				error = xfrm_get_saddr(net, fl->flowi_oif,
1436						       &tmp, remote,
1437						       tmpl->encap_family);
1438				if (error)
1439					goto fail;
1440				local = &tmp;
1441			}
1442		}
1443
1444		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1445
1446		if (x && x->km.state == XFRM_STATE_VALID) {
1447			xfrm[nx++] = x;
1448			daddr = remote;
1449			saddr = local;
1450			continue;
1451		}
1452		if (x) {
1453			error = (x->km.state == XFRM_STATE_ERROR ?
1454				 -EINVAL : -EAGAIN);
1455			xfrm_state_put(x);
1456		} else if (error == -ESRCH) {
1457			error = -EAGAIN;
1458		}
1459
1460		if (!tmpl->optional)
1461			goto fail;
1462	}
1463	return nx;
1464
1465fail:
1466	for (nx--; nx >= 0; nx--)
1467		xfrm_state_put(xfrm[nx]);
1468	return error;
1469}
1470
1471static int
1472xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1473		  struct xfrm_state **xfrm, unsigned short family)
1474{
1475	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1476	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1477	int cnx = 0;
1478	int error;
1479	int ret;
1480	int i;
1481
1482	for (i = 0; i < npols; i++) {
1483		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1484			error = -ENOBUFS;
1485			goto fail;
1486		}
1487
1488		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1489		if (ret < 0) {
1490			error = ret;
1491			goto fail;
1492		} else
1493			cnx += ret;
1494	}
1495
1496	/* found states are sorted for outbound processing */
1497	if (npols > 1)
1498		xfrm_state_sort(xfrm, tpp, cnx, family);
1499
1500	return cnx;
1501
1502 fail:
1503	for (cnx--; cnx >= 0; cnx--)
1504		xfrm_state_put(tpp[cnx]);
1505	return error;
1506
1507}
1508
1509/* Check that the bundle accepts the flow and its components are
1510 * still valid.
1511 */
1512
1513static inline int xfrm_get_tos(const struct flowi *fl, int family)
1514{
1515	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1516	int tos;
1517
1518	if (!afinfo)
1519		return -EINVAL;
1520
1521	tos = afinfo->get_tos(fl);
1522
1523	xfrm_policy_put_afinfo(afinfo);
1524
1525	return tos;
1526}
1527
1528static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1529{
1530	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1531	struct dst_entry *dst = &xdst->u.dst;
1532
1533	if (xdst->route == NULL) {
1534		/* Dummy bundle - if it has xfrms we were not
1535		 * able to build bundle as template resolution failed.
1536		 * It means we need to try again resolving. */
1537		if (xdst->num_xfrms > 0)
1538			return NULL;
1539	} else if (dst->flags & DST_XFRM_QUEUE) {
1540		return NULL;
1541	} else {
1542		/* Real bundle */
1543		if (stale_bundle(dst))
1544			return NULL;
1545	}
1546
1547	dst_hold(dst);
1548	return flo;
1549}
1550
1551static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1552{
1553	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1554	struct dst_entry *dst = &xdst->u.dst;
1555
1556	if (!xdst->route)
1557		return 0;
1558	if (stale_bundle(dst))
1559		return 0;
1560
1561	return 1;
1562}
1563
1564static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1565{
1566	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1567	struct dst_entry *dst = &xdst->u.dst;
1568
1569	dst_free(dst);
1570}
1571
1572static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1573	.get = xfrm_bundle_flo_get,
1574	.check = xfrm_bundle_flo_check,
1575	.delete = xfrm_bundle_flo_delete,
1576};
1577
1578static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1579{
1580	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1581	struct dst_ops *dst_ops;
1582	struct xfrm_dst *xdst;
1583
1584	if (!afinfo)
1585		return ERR_PTR(-EINVAL);
1586
1587	switch (family) {
1588	case AF_INET:
1589		dst_ops = &net->xfrm.xfrm4_dst_ops;
1590		break;
1591#if IS_ENABLED(CONFIG_IPV6)
1592	case AF_INET6:
1593		dst_ops = &net->xfrm.xfrm6_dst_ops;
1594		break;
1595#endif
1596	default:
1597		BUG();
1598	}
1599	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1600
1601	if (likely(xdst)) {
1602		struct dst_entry *dst = &xdst->u.dst;
1603
1604		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1605		xdst->flo.ops = &xfrm_bundle_fc_ops;
1606	} else
1607		xdst = ERR_PTR(-ENOBUFS);
1608
1609	xfrm_policy_put_afinfo(afinfo);
1610
1611	return xdst;
1612}
1613
1614static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1615				 int nfheader_len)
1616{
1617	struct xfrm_policy_afinfo *afinfo =
1618		xfrm_policy_get_afinfo(dst->ops->family);
1619	int err;
1620
1621	if (!afinfo)
1622		return -EINVAL;
1623
1624	err = afinfo->init_path(path, dst, nfheader_len);
1625
1626	xfrm_policy_put_afinfo(afinfo);
1627
1628	return err;
1629}
1630
1631static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1632				const struct flowi *fl)
1633{
1634	struct xfrm_policy_afinfo *afinfo =
1635		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1636	int err;
1637
1638	if (!afinfo)
1639		return -EINVAL;
1640
1641	err = afinfo->fill_dst(xdst, dev, fl);
1642
1643	xfrm_policy_put_afinfo(afinfo);
1644
1645	return err;
1646}
1647
1648
1649/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1650 * all the metrics... Shortly, bundle a bundle.
1651 */
1652
1653static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1654					    struct xfrm_state **xfrm, int nx,
1655					    const struct flowi *fl,
1656					    struct dst_entry *dst)
1657{
1658	struct net *net = xp_net(policy);
1659	unsigned long now = jiffies;
1660	struct net_device *dev;
1661	struct xfrm_mode *inner_mode;
1662	struct dst_entry *dst_prev = NULL;
1663	struct dst_entry *dst0 = NULL;
1664	int i = 0;
1665	int err;
1666	int header_len = 0;
1667	int nfheader_len = 0;
1668	int trailer_len = 0;
1669	int tos;
1670	int family = policy->selector.family;
1671	xfrm_address_t saddr, daddr;
1672
1673	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1674
1675	tos = xfrm_get_tos(fl, family);
1676	err = tos;
1677	if (tos < 0)
1678		goto put_states;
1679
1680	dst_hold(dst);
1681
1682	for (; i < nx; i++) {
1683		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1684		struct dst_entry *dst1 = &xdst->u.dst;
1685
1686		err = PTR_ERR(xdst);
1687		if (IS_ERR(xdst)) {
1688			dst_release(dst);
1689			goto put_states;
1690		}
1691
1692		if (xfrm[i]->sel.family == AF_UNSPEC) {
1693			inner_mode = xfrm_ip2inner_mode(xfrm[i],
1694							xfrm_af2proto(family));
1695			if (!inner_mode) {
1696				err = -EAFNOSUPPORT;
1697				dst_release(dst);
1698				goto put_states;
1699			}
1700		} else
1701			inner_mode = xfrm[i]->inner_mode;
1702
1703		if (!dst_prev)
1704			dst0 = dst1;
1705		else {
1706			dst_prev->child = dst_clone(dst1);
1707			dst1->flags |= DST_NOHASH;
1708		}
1709
1710		xdst->route = dst;
1711		dst_copy_metrics(dst1, dst);
1712
1713		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1714			family = xfrm[i]->props.family;
1715			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
1716					      &saddr, &daddr, family);
1717			err = PTR_ERR(dst);
1718			if (IS_ERR(dst))
1719				goto put_states;
1720		} else
1721			dst_hold(dst);
1722
1723		dst1->xfrm = xfrm[i];
1724		xdst->xfrm_genid = xfrm[i]->genid;
1725
1726		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1727		dst1->flags |= DST_HOST;
1728		dst1->lastuse = now;
1729
1730		dst1->input = dst_discard;
1731		dst1->output = inner_mode->afinfo->output;
1732
1733		dst1->next = dst_prev;
1734		dst_prev = dst1;
1735
1736		header_len += xfrm[i]->props.header_len;
1737		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1738			nfheader_len += xfrm[i]->props.header_len;
1739		trailer_len += xfrm[i]->props.trailer_len;
1740	}
1741
1742	dst_prev->child = dst;
1743	dst0->path = dst;
1744
1745	err = -ENODEV;
1746	dev = dst->dev;
1747	if (!dev)
1748		goto free_dst;
1749
1750	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1751	xfrm_init_pmtu(dst_prev);
1752
1753	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1754		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1755
1756		err = xfrm_fill_dst(xdst, dev, fl);
1757		if (err)
1758			goto free_dst;
1759
1760		dst_prev->header_len = header_len;
1761		dst_prev->trailer_len = trailer_len;
1762		header_len -= xdst->u.dst.xfrm->props.header_len;
1763		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1764	}
1765
1766out:
1767	return dst0;
1768
1769put_states:
1770	for (; i < nx; i++)
1771		xfrm_state_put(xfrm[i]);
1772free_dst:
1773	if (dst0)
1774		dst_free(dst0);
1775	dst0 = ERR_PTR(err);
1776	goto out;
1777}
1778
1779#ifdef CONFIG_XFRM_SUB_POLICY
1780static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1781{
1782	if (!*target) {
1783		*target = kmalloc(size, GFP_ATOMIC);
1784		if (!*target)
1785			return -ENOMEM;
1786	}
1787
1788	memcpy(*target, src, size);
1789	return 0;
1790}
1791#endif
1792
1793static int xfrm_dst_update_parent(struct dst_entry *dst,
1794				  const struct xfrm_selector *sel)
1795{
1796#ifdef CONFIG_XFRM_SUB_POLICY
1797	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1798	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1799				   sel, sizeof(*sel));
1800#else
1801	return 0;
1802#endif
1803}
1804
1805static int xfrm_dst_update_origin(struct dst_entry *dst,
1806				  const struct flowi *fl)
1807{
1808#ifdef CONFIG_XFRM_SUB_POLICY
1809	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1810	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1811#else
1812	return 0;
1813#endif
1814}
1815
1816static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1817				struct xfrm_policy **pols,
1818				int *num_pols, int *num_xfrms)
1819{
1820	int i;
1821
1822	if (*num_pols == 0 || !pols[0]) {
1823		*num_pols = 0;
1824		*num_xfrms = 0;
1825		return 0;
1826	}
1827	if (IS_ERR(pols[0]))
1828		return PTR_ERR(pols[0]);
1829
1830	*num_xfrms = pols[0]->xfrm_nr;
1831
1832#ifdef CONFIG_XFRM_SUB_POLICY
1833	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1834	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1835		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1836						    XFRM_POLICY_TYPE_MAIN,
1837						    fl, family,
1838						    XFRM_POLICY_OUT);
1839		if (pols[1]) {
1840			if (IS_ERR(pols[1])) {
1841				xfrm_pols_put(pols, *num_pols);
1842				return PTR_ERR(pols[1]);
1843			}
1844			(*num_pols)++;
1845			(*num_xfrms) += pols[1]->xfrm_nr;
1846		}
1847	}
1848#endif
1849	for (i = 0; i < *num_pols; i++) {
1850		if (pols[i]->action != XFRM_POLICY_ALLOW) {
1851			*num_xfrms = -1;
1852			break;
1853		}
1854	}
1855
1856	return 0;
1857
1858}
1859
1860static struct xfrm_dst *
1861xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1862			       const struct flowi *fl, u16 family,
1863			       struct dst_entry *dst_orig)
1864{
1865	struct net *net = xp_net(pols[0]);
1866	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1867	struct dst_entry *dst;
1868	struct xfrm_dst *xdst;
1869	int err;
1870
1871	/* Try to instantiate a bundle */
1872	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1873	if (err <= 0) {
1874		if (err != 0 && err != -EAGAIN)
1875			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1876		return ERR_PTR(err);
1877	}
1878
1879	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1880	if (IS_ERR(dst)) {
1881		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1882		return ERR_CAST(dst);
1883	}
1884
1885	xdst = (struct xfrm_dst *)dst;
1886	xdst->num_xfrms = err;
1887	if (num_pols > 1)
1888		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1889	else
1890		err = xfrm_dst_update_origin(dst, fl);
1891	if (unlikely(err)) {
1892		dst_free(dst);
1893		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1894		return ERR_PTR(err);
1895	}
1896
1897	xdst->num_pols = num_pols;
1898	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1899	xdst->policy_genid = atomic_read(&pols[0]->genid);
1900
1901	return xdst;
1902}
1903
1904static void xfrm_policy_queue_process(unsigned long arg)
1905{
1906	struct sk_buff *skb;
1907	struct sock *sk;
1908	struct dst_entry *dst;
1909	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1910	struct net *net = xp_net(pol);
1911	struct xfrm_policy_queue *pq = &pol->polq;
1912	struct flowi fl;
1913	struct sk_buff_head list;
1914
1915	spin_lock(&pq->hold_queue.lock);
1916	skb = skb_peek(&pq->hold_queue);
1917	if (!skb) {
1918		spin_unlock(&pq->hold_queue.lock);
1919		goto out;
1920	}
1921	dst = skb_dst(skb);
1922	sk = skb->sk;
1923	xfrm_decode_session(skb, &fl, dst->ops->family);
1924	spin_unlock(&pq->hold_queue.lock);
1925
1926	dst_hold(dst->path);
1927	dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
1928	if (IS_ERR(dst))
1929		goto purge_queue;
1930
1931	if (dst->flags & DST_XFRM_QUEUE) {
1932		dst_release(dst);
1933
1934		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1935			goto purge_queue;
1936
1937		pq->timeout = pq->timeout << 1;
1938		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
1939			xfrm_pol_hold(pol);
1940	goto out;
1941	}
1942
1943	dst_release(dst);
1944
1945	__skb_queue_head_init(&list);
1946
1947	spin_lock(&pq->hold_queue.lock);
1948	pq->timeout = 0;
1949	skb_queue_splice_init(&pq->hold_queue, &list);
1950	spin_unlock(&pq->hold_queue.lock);
1951
1952	while (!skb_queue_empty(&list)) {
1953		skb = __skb_dequeue(&list);
1954
1955		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1956		dst_hold(skb_dst(skb)->path);
1957		dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
1958		if (IS_ERR(dst)) {
1959			kfree_skb(skb);
1960			continue;
1961		}
1962
1963		nf_reset(skb);
1964		skb_dst_drop(skb);
1965		skb_dst_set(skb, dst);
1966
1967		dst_output(net, skb->sk, skb);
1968	}
1969
1970out:
1971	xfrm_pol_put(pol);
1972	return;
1973
1974purge_queue:
1975	pq->timeout = 0;
1976	skb_queue_purge(&pq->hold_queue);
1977	xfrm_pol_put(pol);
1978}
1979
1980static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1981{
1982	unsigned long sched_next;
1983	struct dst_entry *dst = skb_dst(skb);
1984	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1985	struct xfrm_policy *pol = xdst->pols[0];
1986	struct xfrm_policy_queue *pq = &pol->polq;
1987
1988	if (unlikely(skb_fclone_busy(sk, skb))) {
1989		kfree_skb(skb);
1990		return 0;
1991	}
1992
1993	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1994		kfree_skb(skb);
1995		return -EAGAIN;
1996	}
1997
1998	skb_dst_force(skb);
1999
2000	spin_lock_bh(&pq->hold_queue.lock);
2001
2002	if (!pq->timeout)
2003		pq->timeout = XFRM_QUEUE_TMO_MIN;
2004
2005	sched_next = jiffies + pq->timeout;
2006
2007	if (del_timer(&pq->hold_timer)) {
2008		if (time_before(pq->hold_timer.expires, sched_next))
2009			sched_next = pq->hold_timer.expires;
2010		xfrm_pol_put(pol);
2011	}
2012
2013	__skb_queue_tail(&pq->hold_queue, skb);
2014	if (!mod_timer(&pq->hold_timer, sched_next))
2015		xfrm_pol_hold(pol);
2016
2017	spin_unlock_bh(&pq->hold_queue.lock);
2018
2019	return 0;
2020}
2021
2022static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2023						 struct xfrm_flo *xflo,
2024						 const struct flowi *fl,
2025						 int num_xfrms,
2026						 u16 family)
2027{
2028	int err;
2029	struct net_device *dev;
2030	struct dst_entry *dst;
2031	struct dst_entry *dst1;
2032	struct xfrm_dst *xdst;
2033
2034	xdst = xfrm_alloc_dst(net, family);
2035	if (IS_ERR(xdst))
2036		return xdst;
2037
2038	if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
2039	    net->xfrm.sysctl_larval_drop ||
2040	    num_xfrms <= 0)
2041		return xdst;
2042
2043	dst = xflo->dst_orig;
2044	dst1 = &xdst->u.dst;
2045	dst_hold(dst);
2046	xdst->route = dst;
2047
2048	dst_copy_metrics(dst1, dst);
2049
2050	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2051	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
2052	dst1->lastuse = jiffies;
2053
2054	dst1->input = dst_discard;
2055	dst1->output = xdst_queue_output;
2056
2057	dst_hold(dst);
2058	dst1->child = dst;
2059	dst1->path = dst;
2060
2061	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2062
2063	err = -ENODEV;
2064	dev = dst->dev;
2065	if (!dev)
2066		goto free_dst;
2067
2068	err = xfrm_fill_dst(xdst, dev, fl);
2069	if (err)
2070		goto free_dst;
2071
2072out:
2073	return xdst;
2074
2075free_dst:
2076	dst_release(dst1);
2077	xdst = ERR_PTR(err);
2078	goto out;
2079}
2080
2081static struct flow_cache_object *
2082xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2083		   struct flow_cache_object *oldflo, void *ctx)
2084{
2085	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2086	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2087	struct xfrm_dst *xdst, *new_xdst;
2088	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
2089
2090	/* Check if the policies from old bundle are usable */
2091	xdst = NULL;
2092	if (oldflo) {
2093		xdst = container_of(oldflo, struct xfrm_dst, flo);
2094		num_pols = xdst->num_pols;
2095		num_xfrms = xdst->num_xfrms;
2096		pol_dead = 0;
2097		for (i = 0; i < num_pols; i++) {
2098			pols[i] = xdst->pols[i];
2099			pol_dead |= pols[i]->walk.dead;
2100		}
2101		if (pol_dead) {
2102			dst_free(&xdst->u.dst);
2103			xdst = NULL;
2104			num_pols = 0;
2105			num_xfrms = 0;
2106			oldflo = NULL;
2107		}
2108	}
2109
2110	/* Resolve policies to use if we couldn't get them from
2111	 * previous cache entry */
2112	if (xdst == NULL) {
2113		num_pols = 1;
2114		pols[0] = __xfrm_policy_lookup(net, fl, family,
2115					       flow_to_policy_dir(dir));
2116		err = xfrm_expand_policies(fl, family, pols,
2117					   &num_pols, &num_xfrms);
2118		if (err < 0)
2119			goto inc_error;
2120		if (num_pols == 0)
2121			return NULL;
2122		if (num_xfrms <= 0)
2123			goto make_dummy_bundle;
2124	}
2125
2126	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2127						  xflo->dst_orig);
2128	if (IS_ERR(new_xdst)) {
2129		err = PTR_ERR(new_xdst);
2130		if (err != -EAGAIN)
2131			goto error;
2132		if (oldflo == NULL)
2133			goto make_dummy_bundle;
2134		dst_hold(&xdst->u.dst);
2135		return oldflo;
2136	} else if (new_xdst == NULL) {
2137		num_xfrms = 0;
2138		if (oldflo == NULL)
2139			goto make_dummy_bundle;
2140		xdst->num_xfrms = 0;
2141		dst_hold(&xdst->u.dst);
2142		return oldflo;
2143	}
2144
2145	/* Kill the previous bundle */
2146	if (xdst) {
2147		/* The policies were stolen for newly generated bundle */
2148		xdst->num_pols = 0;
2149		dst_free(&xdst->u.dst);
2150	}
2151
2152	/* Flow cache does not have reference, it dst_free()'s,
2153	 * but we do need to return one reference for original caller */
2154	dst_hold(&new_xdst->u.dst);
2155	return &new_xdst->flo;
2156
2157make_dummy_bundle:
2158	/* We found policies, but there's no bundles to instantiate:
2159	 * either because the policy blocks, has no transformations or
2160	 * we could not build template (no xfrm_states).*/
2161	xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2162	if (IS_ERR(xdst)) {
2163		xfrm_pols_put(pols, num_pols);
2164		return ERR_CAST(xdst);
2165	}
2166	xdst->num_pols = num_pols;
2167	xdst->num_xfrms = num_xfrms;
2168	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2169
2170	dst_hold(&xdst->u.dst);
2171	return &xdst->flo;
2172
2173inc_error:
2174	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2175error:
2176	if (xdst != NULL)
2177		dst_free(&xdst->u.dst);
2178	else
2179		xfrm_pols_put(pols, num_pols);
2180	return ERR_PTR(err);
2181}
2182
2183static struct dst_entry *make_blackhole(struct net *net, u16 family,
2184					struct dst_entry *dst_orig)
2185{
2186	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2187	struct dst_entry *ret;
2188
2189	if (!afinfo) {
2190		dst_release(dst_orig);
2191		return ERR_PTR(-EINVAL);
2192	} else {
2193		ret = afinfo->blackhole_route(net, dst_orig);
2194	}
2195	xfrm_policy_put_afinfo(afinfo);
2196
2197	return ret;
2198}
2199
2200/* Main function: finds/creates a bundle for given flow.
2201 *
2202 * At the moment we eat a raw IP route. Mostly to speed up lookups
2203 * on interfaces with disabled IPsec.
2204 */
2205struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2206			      const struct flowi *fl,
2207			      const struct sock *sk, int flags)
2208{
2209	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2210	struct flow_cache_object *flo;
2211	struct xfrm_dst *xdst;
2212	struct dst_entry *dst, *route;
2213	u16 family = dst_orig->ops->family;
2214	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2215	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2216
2217	dst = NULL;
2218	xdst = NULL;
2219	route = NULL;
2220
2221	sk = sk_const_to_full_sk(sk);
2222	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2223		num_pols = 1;
2224		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
2225		err = xfrm_expand_policies(fl, family, pols,
2226					   &num_pols, &num_xfrms);
2227		if (err < 0)
2228			goto dropdst;
2229
2230		if (num_pols) {
2231			if (num_xfrms <= 0) {
2232				drop_pols = num_pols;
2233				goto no_transform;
2234			}
2235
2236			xdst = xfrm_resolve_and_create_bundle(
2237					pols, num_pols, fl,
2238					family, dst_orig);
2239			if (IS_ERR(xdst)) {
2240				xfrm_pols_put(pols, num_pols);
2241				err = PTR_ERR(xdst);
2242				goto dropdst;
2243			} else if (xdst == NULL) {
2244				num_xfrms = 0;
2245				drop_pols = num_pols;
2246				goto no_transform;
2247			}
2248
2249			dst_hold(&xdst->u.dst);
2250			xdst->u.dst.flags |= DST_NOCACHE;
2251			route = xdst->route;
2252		}
2253	}
2254
2255	if (xdst == NULL) {
2256		struct xfrm_flo xflo;
2257
2258		xflo.dst_orig = dst_orig;
2259		xflo.flags = flags;
2260
2261		/* To accelerate a bit...  */
2262		if ((dst_orig->flags & DST_NOXFRM) ||
2263		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2264			goto nopol;
2265
2266		flo = flow_cache_lookup(net, fl, family, dir,
2267					xfrm_bundle_lookup, &xflo);
2268		if (flo == NULL)
2269			goto nopol;
2270		if (IS_ERR(flo)) {
2271			err = PTR_ERR(flo);
2272			goto dropdst;
2273		}
2274		xdst = container_of(flo, struct xfrm_dst, flo);
2275
2276		num_pols = xdst->num_pols;
2277		num_xfrms = xdst->num_xfrms;
2278		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2279		route = xdst->route;
2280	}
2281
2282	dst = &xdst->u.dst;
2283	if (route == NULL && num_xfrms > 0) {
2284		/* The only case when xfrm_bundle_lookup() returns a
2285		 * bundle with null route, is when the template could
2286		 * not be resolved. It means policies are there, but
2287		 * bundle could not be created, since we don't yet
2288		 * have the xfrm_state's. We need to wait for KM to
2289		 * negotiate new SA's or bail out with error.*/
2290		if (net->xfrm.sysctl_larval_drop) {
2291			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2292			err = -EREMOTE;
2293			goto error;
2294		}
2295
2296		err = -EAGAIN;
2297
2298		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2299		goto error;
2300	}
2301
2302no_transform:
2303	if (num_pols == 0)
2304		goto nopol;
2305
2306	if ((flags & XFRM_LOOKUP_ICMP) &&
2307	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2308		err = -ENOENT;
2309		goto error;
2310	}
2311
2312	for (i = 0; i < num_pols; i++)
2313		pols[i]->curlft.use_time = get_seconds();
2314
2315	if (num_xfrms < 0) {
2316		/* Prohibit the flow */
2317		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2318		err = -EPERM;
2319		goto error;
2320	} else if (num_xfrms > 0) {
2321		/* Flow transformed */
2322		dst_release(dst_orig);
2323	} else {
2324		/* Flow passes untransformed */
2325		dst_release(dst);
2326		dst = dst_orig;
2327	}
2328ok:
2329	xfrm_pols_put(pols, drop_pols);
2330	if (dst && dst->xfrm &&
2331	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2332		dst->flags |= DST_XFRM_TUNNEL;
2333	return dst;
2334
2335nopol:
2336	if (!(flags & XFRM_LOOKUP_ICMP)) {
2337		dst = dst_orig;
2338		goto ok;
2339	}
2340	err = -ENOENT;
2341error:
2342	dst_release(dst);
2343dropdst:
2344	if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
2345		dst_release(dst_orig);
2346	xfrm_pols_put(pols, drop_pols);
2347	return ERR_PTR(err);
2348}
2349EXPORT_SYMBOL(xfrm_lookup);
2350
2351/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
2352 * Otherwise we may send out blackholed packets.
2353 */
2354struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2355				    const struct flowi *fl,
2356				    const struct sock *sk, int flags)
2357{
2358	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2359					    flags | XFRM_LOOKUP_QUEUE |
2360					    XFRM_LOOKUP_KEEP_DST_REF);
2361
2362	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2363		return make_blackhole(net, dst_orig->ops->family, dst_orig);
2364
2365	return dst;
2366}
2367EXPORT_SYMBOL(xfrm_lookup_route);
2368
2369static inline int
2370xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2371{
2372	struct xfrm_state *x;
2373
2374	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2375		return 0;
2376	x = skb->sp->xvec[idx];
2377	if (!x->type->reject)
2378		return 0;
2379	return x->type->reject(x, skb, fl);
2380}
2381
2382/* When skb is transformed back to its "native" form, we have to
2383 * check policy restrictions. At the moment we make this in maximally
2384 * stupid way. Shame on me. :-) Of course, connected sockets must
2385 * have policy cached at them.
2386 */
2387
2388static inline int
2389xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2390	      unsigned short family)
2391{
2392	if (xfrm_state_kern(x))
2393		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2394	return	x->id.proto == tmpl->id.proto &&
2395		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2396		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2397		x->props.mode == tmpl->mode &&
2398		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2399		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2400		!(x->props.mode != XFRM_MODE_TRANSPORT &&
2401		  xfrm_state_addr_cmp(tmpl, x, family));
2402}
2403
2404/*
2405 * 0 or more than 0 is returned when validation is succeeded (either bypass
2406 * because of optional transport mode, or next index of the mathced secpath
2407 * state with the template.
2408 * -1 is returned when no matching template is found.
2409 * Otherwise "-2 - errored_index" is returned.
2410 */
2411static inline int
2412xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2413	       unsigned short family)
2414{
2415	int idx = start;
2416
2417	if (tmpl->optional) {
2418		if (tmpl->mode == XFRM_MODE_TRANSPORT)
2419			return start;
2420	} else
2421		start = -1;
2422	for (; idx < sp->len; idx++) {
2423		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2424			return ++idx;
2425		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2426			if (start == -1)
2427				start = -2-idx;
2428			break;
2429		}
2430	}
2431	return start;
2432}
2433
2434int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2435			  unsigned int family, int reverse)
2436{
2437	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2438	int err;
2439
2440	if (unlikely(afinfo == NULL))
2441		return -EAFNOSUPPORT;
2442
2443	afinfo->decode_session(skb, fl, reverse);
2444	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2445	xfrm_policy_put_afinfo(afinfo);
2446	return err;
2447}
2448EXPORT_SYMBOL(__xfrm_decode_session);
2449
2450static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2451{
2452	for (; k < sp->len; k++) {
2453		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2454			*idxp = k;
2455			return 1;
2456		}
2457	}
2458
2459	return 0;
2460}
2461
2462int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2463			unsigned short family)
2464{
2465	struct net *net = dev_net(skb->dev);
2466	struct xfrm_policy *pol;
2467	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2468	int npols = 0;
2469	int xfrm_nr;
2470	int pi;
2471	int reverse;
2472	struct flowi fl;
2473	u8 fl_dir;
2474	int xerr_idx = -1;
2475
2476	reverse = dir & ~XFRM_POLICY_MASK;
2477	dir &= XFRM_POLICY_MASK;
2478	fl_dir = policy_to_flow_dir(dir);
2479
2480	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2481		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2482		return 0;
2483	}
2484
2485	nf_nat_decode_session(skb, &fl, family);
2486
2487	/* First, check used SA against their selectors. */
2488	if (skb->sp) {
2489		int i;
2490
2491		for (i = skb->sp->len-1; i >= 0; i--) {
2492			struct xfrm_state *x = skb->sp->xvec[i];
2493			if (!xfrm_selector_match(&x->sel, &fl, family)) {
2494				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2495				return 0;
2496			}
2497		}
2498	}
2499
2500	pol = NULL;
2501	sk = sk_to_full_sk(sk);
2502	if (sk && sk->sk_policy[dir]) {
2503		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2504		if (IS_ERR(pol)) {
2505			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2506			return 0;
2507		}
2508	}
2509
2510	if (!pol) {
2511		struct flow_cache_object *flo;
2512
2513		flo = flow_cache_lookup(net, &fl, family, fl_dir,
2514					xfrm_policy_lookup, NULL);
2515		if (IS_ERR_OR_NULL(flo))
2516			pol = ERR_CAST(flo);
2517		else
2518			pol = container_of(flo, struct xfrm_policy, flo);
2519	}
2520
2521	if (IS_ERR(pol)) {
2522		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2523		return 0;
2524	}
2525
2526	if (!pol) {
2527		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2528			xfrm_secpath_reject(xerr_idx, skb, &fl);
2529			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2530			return 0;
2531		}
2532		return 1;
2533	}
2534
2535	pol->curlft.use_time = get_seconds();
2536
2537	pols[0] = pol;
2538	npols++;
2539#ifdef CONFIG_XFRM_SUB_POLICY
2540	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2541		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2542						    &fl, family,
2543						    XFRM_POLICY_IN);
2544		if (pols[1]) {
2545			if (IS_ERR(pols[1])) {
2546				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2547				return 0;
2548			}
2549			pols[1]->curlft.use_time = get_seconds();
2550			npols++;
2551		}
2552	}
2553#endif
2554
2555	if (pol->action == XFRM_POLICY_ALLOW) {
2556		struct sec_path *sp;
2557		static struct sec_path dummy;
2558		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2559		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2560		struct xfrm_tmpl **tpp = tp;
2561		int ti = 0;
2562		int i, k;
2563
2564		if ((sp = skb->sp) == NULL)
2565			sp = &dummy;
2566
2567		for (pi = 0; pi < npols; pi++) {
2568			if (pols[pi] != pol &&
2569			    pols[pi]->action != XFRM_POLICY_ALLOW) {
2570				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2571				goto reject;
2572			}
2573			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2574				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2575				goto reject_error;
2576			}
2577			for (i = 0; i < pols[pi]->xfrm_nr; i++)
2578				tpp[ti++] = &pols[pi]->xfrm_vec[i];
2579		}
2580		xfrm_nr = ti;
2581		if (npols > 1) {
2582			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2583			tpp = stp;
2584		}
2585
2586		/* For each tunnel xfrm, find the first matching tmpl.
2587		 * For each tmpl before that, find corresponding xfrm.
2588		 * Order is _important_. Later we will implement
2589		 * some barriers, but at the moment barriers
2590		 * are implied between each two transformations.
2591		 */
2592		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2593			k = xfrm_policy_ok(tpp[i], sp, k, family);
2594			if (k < 0) {
2595				if (k < -1)
2596					/* "-2 - errored_index" returned */
2597					xerr_idx = -(2+k);
2598				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2599				goto reject;
2600			}
2601		}
2602
2603		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2604			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2605			goto reject;
2606		}
2607
2608		xfrm_pols_put(pols, npols);
2609		return 1;
2610	}
2611	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2612
2613reject:
2614	xfrm_secpath_reject(xerr_idx, skb, &fl);
2615reject_error:
2616	xfrm_pols_put(pols, npols);
2617	return 0;
2618}
2619EXPORT_SYMBOL(__xfrm_policy_check);
2620
2621int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2622{
2623	struct net *net = dev_net(skb->dev);
2624	struct flowi fl;
2625	struct dst_entry *dst;
2626	int res = 1;
2627
2628	if (xfrm_decode_session(skb, &fl, family) < 0) {
2629		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2630		return 0;
2631	}
2632
2633	skb_dst_force(skb);
2634
2635	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2636	if (IS_ERR(dst)) {
2637		res = 0;
2638		dst = NULL;
2639	}
2640	skb_dst_set(skb, dst);
2641	return res;
2642}
2643EXPORT_SYMBOL(__xfrm_route_forward);
2644
2645/* Optimize later using cookies and generation ids. */
2646
2647static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2648{
2649	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2650	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2651	 * get validated by dst_ops->check on every use.  We do this
2652	 * because when a normal route referenced by an XFRM dst is
2653	 * obsoleted we do not go looking around for all parent
2654	 * referencing XFRM dsts so that we can invalidate them.  It
2655	 * is just too much work.  Instead we make the checks here on
2656	 * every use.  For example:
2657	 *
2658	 *	XFRM dst A --> IPv4 dst X
2659	 *
2660	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2661	 * in this example).  If X is marked obsolete, "A" will not
2662	 * notice.  That's what we are validating here via the
2663	 * stale_bundle() check.
2664	 *
2665	 * When a policy's bundle is pruned, we dst_free() the XFRM
2666	 * dst which causes it's ->obsolete field to be set to
2667	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2668	 * this, we want to force a new route lookup.
2669	 */
2670	if (dst->obsolete < 0 && !stale_bundle(dst))
2671		return dst;
2672
2673	return NULL;
2674}
2675
2676static int stale_bundle(struct dst_entry *dst)
2677{
2678	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2679}
2680
2681void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2682{
2683	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2684		dst->dev = dev_net(dev)->loopback_dev;
2685		dev_hold(dst->dev);
2686		dev_put(dev);
2687	}
2688}
2689EXPORT_SYMBOL(xfrm_dst_ifdown);
2690
2691static void xfrm_link_failure(struct sk_buff *skb)
2692{
2693	/* Impossible. Such dst must be popped before reaches point of failure. */
2694}
2695
2696static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2697{
2698	if (dst) {
2699		if (dst->obsolete) {
2700			dst_release(dst);
2701			dst = NULL;
2702		}
2703	}
2704	return dst;
2705}
2706
2707void xfrm_garbage_collect(struct net *net)
2708{
2709	flow_cache_flush(net);
2710}
2711EXPORT_SYMBOL(xfrm_garbage_collect);
2712
2713static void xfrm_garbage_collect_deferred(struct net *net)
2714{
2715	flow_cache_flush_deferred(net);
2716}
2717
2718static void xfrm_init_pmtu(struct dst_entry *dst)
2719{
2720	do {
2721		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2722		u32 pmtu, route_mtu_cached;
2723
2724		pmtu = dst_mtu(dst->child);
2725		xdst->child_mtu_cached = pmtu;
2726
2727		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2728
2729		route_mtu_cached = dst_mtu(xdst->route);
2730		xdst->route_mtu_cached = route_mtu_cached;
2731
2732		if (pmtu > route_mtu_cached)
2733			pmtu = route_mtu_cached;
2734
2735		dst_metric_set(dst, RTAX_MTU, pmtu);
2736	} while ((dst = dst->next));
2737}
2738
2739/* Check that the bundle accepts the flow and its components are
2740 * still valid.
2741 */
2742
2743static int xfrm_bundle_ok(struct xfrm_dst *first)
2744{
2745	struct dst_entry *dst = &first->u.dst;
2746	struct xfrm_dst *last;
2747	u32 mtu;
2748
2749	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2750	    (dst->dev && !netif_running(dst->dev)))
2751		return 0;
2752
2753	if (dst->flags & DST_XFRM_QUEUE)
2754		return 1;
2755
2756	last = NULL;
2757
2758	do {
2759		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2760
2761		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2762			return 0;
2763		if (xdst->xfrm_genid != dst->xfrm->genid)
2764			return 0;
2765		if (xdst->num_pols > 0 &&
2766		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2767			return 0;
2768
2769		mtu = dst_mtu(dst->child);
2770		if (xdst->child_mtu_cached != mtu) {
2771			last = xdst;
2772			xdst->child_mtu_cached = mtu;
2773		}
2774
2775		if (!dst_check(xdst->route, xdst->route_cookie))
2776			return 0;
2777		mtu = dst_mtu(xdst->route);
2778		if (xdst->route_mtu_cached != mtu) {
2779			last = xdst;
2780			xdst->route_mtu_cached = mtu;
2781		}
2782
2783		dst = dst->child;
2784	} while (dst->xfrm);
2785
2786	if (likely(!last))
2787		return 1;
2788
2789	mtu = last->child_mtu_cached;
2790	for (;;) {
2791		dst = &last->u.dst;
2792
2793		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2794		if (mtu > last->route_mtu_cached)
2795			mtu = last->route_mtu_cached;
2796		dst_metric_set(dst, RTAX_MTU, mtu);
2797
2798		if (last == first)
2799			break;
2800
2801		last = (struct xfrm_dst *)last->u.dst.next;
2802		last->child_mtu_cached = mtu;
2803	}
2804
2805	return 1;
2806}
2807
2808static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2809{
2810	return dst_metric_advmss(dst->path);
2811}
2812
2813static unsigned int xfrm_mtu(const struct dst_entry *dst)
2814{
2815	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2816
2817	return mtu ? : dst_mtu(dst->path);
2818}
2819
2820static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2821					   struct sk_buff *skb,
2822					   const void *daddr)
2823{
2824	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2825}
2826
2827int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2828{
2829	int err = 0;
2830	if (unlikely(afinfo == NULL))
2831		return -EINVAL;
2832	if (unlikely(afinfo->family >= NPROTO))
2833		return -EAFNOSUPPORT;
2834	spin_lock(&xfrm_policy_afinfo_lock);
2835	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2836		err = -EEXIST;
2837	else {
2838		struct dst_ops *dst_ops = afinfo->dst_ops;
2839		if (likely(dst_ops->kmem_cachep == NULL))
2840			dst_ops->kmem_cachep = xfrm_dst_cache;
2841		if (likely(dst_ops->check == NULL))
2842			dst_ops->check = xfrm_dst_check;
2843		if (likely(dst_ops->default_advmss == NULL))
2844			dst_ops->default_advmss = xfrm_default_advmss;
2845		if (likely(dst_ops->mtu == NULL))
2846			dst_ops->mtu = xfrm_mtu;
2847		if (likely(dst_ops->negative_advice == NULL))
2848			dst_ops->negative_advice = xfrm_negative_advice;
2849		if (likely(dst_ops->link_failure == NULL))
2850			dst_ops->link_failure = xfrm_link_failure;
2851		if (likely(dst_ops->neigh_lookup == NULL))
2852			dst_ops->neigh_lookup = xfrm_neigh_lookup;
2853		if (likely(afinfo->garbage_collect == NULL))
2854			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2855		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2856	}
2857	spin_unlock(&xfrm_policy_afinfo_lock);
2858
2859	return err;
2860}
2861EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2862
2863int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2864{
2865	int err = 0;
2866	if (unlikely(afinfo == NULL))
2867		return -EINVAL;
2868	if (unlikely(afinfo->family >= NPROTO))
2869		return -EAFNOSUPPORT;
2870	spin_lock(&xfrm_policy_afinfo_lock);
2871	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2872		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2873			err = -EINVAL;
2874		else
2875			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2876					 NULL);
2877	}
2878	spin_unlock(&xfrm_policy_afinfo_lock);
2879	if (!err) {
2880		struct dst_ops *dst_ops = afinfo->dst_ops;
2881
2882		synchronize_rcu();
2883
2884		dst_ops->kmem_cachep = NULL;
2885		dst_ops->check = NULL;
2886		dst_ops->negative_advice = NULL;
2887		dst_ops->link_failure = NULL;
2888		afinfo->garbage_collect = NULL;
2889	}
2890	return err;
2891}
2892EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2893
2894static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2895{
2896	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2897
2898	switch (event) {
2899	case NETDEV_DOWN:
2900		xfrm_garbage_collect(dev_net(dev));
2901	}
2902	return NOTIFY_DONE;
2903}
2904
2905static struct notifier_block xfrm_dev_notifier = {
2906	.notifier_call	= xfrm_dev_event,
2907};
2908
2909#ifdef CONFIG_XFRM_STATISTICS
2910static int __net_init xfrm_statistics_init(struct net *net)
2911{
2912	int rv;
2913	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
2914	if (!net->mib.xfrm_statistics)
2915		return -ENOMEM;
2916	rv = xfrm_proc_init(net);
2917	if (rv < 0)
2918		free_percpu(net->mib.xfrm_statistics);
2919	return rv;
2920}
2921
2922static void xfrm_statistics_fini(struct net *net)
2923{
2924	xfrm_proc_fini(net);
2925	free_percpu(net->mib.xfrm_statistics);
2926}
2927#else
2928static int __net_init xfrm_statistics_init(struct net *net)
2929{
2930	return 0;
2931}
2932
2933static void xfrm_statistics_fini(struct net *net)
2934{
2935}
2936#endif
2937
2938static int __net_init xfrm_policy_init(struct net *net)
2939{
2940	unsigned int hmask, sz;
2941	int dir;
2942
2943	if (net_eq(net, &init_net))
2944		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2945					   sizeof(struct xfrm_dst),
2946					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2947					   NULL);
2948
2949	hmask = 8 - 1;
2950	sz = (hmask+1) * sizeof(struct hlist_head);
2951
2952	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2953	if (!net->xfrm.policy_byidx)
2954		goto out_byidx;
2955	net->xfrm.policy_idx_hmask = hmask;
2956
2957	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2958		struct xfrm_policy_hash *htab;
2959
2960		net->xfrm.policy_count[dir] = 0;
2961		net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2962		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2963
2964		htab = &net->xfrm.policy_bydst[dir];
2965		htab->table = xfrm_hash_alloc(sz);
2966		if (!htab->table)
2967			goto out_bydst;
2968		htab->hmask = hmask;
2969		htab->dbits4 = 32;
2970		htab->sbits4 = 32;
2971		htab->dbits6 = 128;
2972		htab->sbits6 = 128;
2973	}
2974	net->xfrm.policy_hthresh.lbits4 = 32;
2975	net->xfrm.policy_hthresh.rbits4 = 32;
2976	net->xfrm.policy_hthresh.lbits6 = 128;
2977	net->xfrm.policy_hthresh.rbits6 = 128;
2978
2979	seqlock_init(&net->xfrm.policy_hthresh.lock);
2980
2981	INIT_LIST_HEAD(&net->xfrm.policy_all);
2982	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2983	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
2984	if (net_eq(net, &init_net))
2985		register_netdevice_notifier(&xfrm_dev_notifier);
2986	return 0;
2987
2988out_bydst:
2989	for (dir--; dir >= 0; dir--) {
2990		struct xfrm_policy_hash *htab;
2991
2992		htab = &net->xfrm.policy_bydst[dir];
2993		xfrm_hash_free(htab->table, sz);
2994	}
2995	xfrm_hash_free(net->xfrm.policy_byidx, sz);
2996out_byidx:
2997	return -ENOMEM;
2998}
2999
3000static void xfrm_policy_fini(struct net *net)
3001{
3002	unsigned int sz;
3003	int dir;
3004
3005	flush_work(&net->xfrm.policy_hash_work);
3006#ifdef CONFIG_XFRM_SUB_POLICY
3007	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
3008#endif
3009	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
3010
3011	WARN_ON(!list_empty(&net->xfrm.policy_all));
3012
3013	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
3014		struct xfrm_policy_hash *htab;
3015
3016		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
3017
3018		htab = &net->xfrm.policy_bydst[dir];
3019		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
3020		WARN_ON(!hlist_empty(htab->table));
3021		xfrm_hash_free(htab->table, sz);
3022	}
3023
3024	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
3025	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
3026	xfrm_hash_free(net->xfrm.policy_byidx, sz);
3027}
3028
3029static int __net_init xfrm_net_init(struct net *net)
3030{
3031	int rv;
3032
3033	rv = xfrm_statistics_init(net);
3034	if (rv < 0)
3035		goto out_statistics;
3036	rv = xfrm_state_init(net);
3037	if (rv < 0)
3038		goto out_state;
3039	rv = xfrm_policy_init(net);
3040	if (rv < 0)
3041		goto out_policy;
3042	rv = xfrm_sysctl_init(net);
3043	if (rv < 0)
3044		goto out_sysctl;
3045	rv = flow_cache_init(net);
3046	if (rv < 0)
3047		goto out;
3048
3049	/* Initialize the per-net locks here */
3050	spin_lock_init(&net->xfrm.xfrm_state_lock);
3051	rwlock_init(&net->xfrm.xfrm_policy_lock);
3052	mutex_init(&net->xfrm.xfrm_cfg_mutex);
3053
3054	return 0;
3055
3056out:
3057	xfrm_sysctl_fini(net);
3058out_sysctl:
3059	xfrm_policy_fini(net);
3060out_policy:
3061	xfrm_state_fini(net);
3062out_state:
3063	xfrm_statistics_fini(net);
3064out_statistics:
3065	return rv;
3066}
3067
3068static void __net_exit xfrm_net_exit(struct net *net)
3069{
3070	flow_cache_fini(net);
3071	xfrm_sysctl_fini(net);
3072	xfrm_policy_fini(net);
3073	xfrm_state_fini(net);
3074	xfrm_statistics_fini(net);
3075}
3076
3077static struct pernet_operations __net_initdata xfrm_net_ops = {
3078	.init = xfrm_net_init,
3079	.exit = xfrm_net_exit,
3080};
3081
3082void __init xfrm_init(void)
3083{
3084	register_pernet_subsys(&xfrm_net_ops);
3085	xfrm_input_init();
3086}
3087
3088#ifdef CONFIG_AUDITSYSCALL
3089static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
3090					 struct audit_buffer *audit_buf)
3091{
3092	struct xfrm_sec_ctx *ctx = xp->security;
3093	struct xfrm_selector *sel = &xp->selector;
3094
3095	if (ctx)
3096		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3097				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3098
3099	switch (sel->family) {
3100	case AF_INET:
3101		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3102		if (sel->prefixlen_s != 32)
3103			audit_log_format(audit_buf, " src_prefixlen=%d",
3104					 sel->prefixlen_s);
3105		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3106		if (sel->prefixlen_d != 32)
3107			audit_log_format(audit_buf, " dst_prefixlen=%d",
3108					 sel->prefixlen_d);
3109		break;
3110	case AF_INET6:
3111		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3112		if (sel->prefixlen_s != 128)
3113			audit_log_format(audit_buf, " src_prefixlen=%d",
3114					 sel->prefixlen_s);
3115		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3116		if (sel->prefixlen_d != 128)
3117			audit_log_format(audit_buf, " dst_prefixlen=%d",
3118					 sel->prefixlen_d);
3119		break;
3120	}
3121}
3122
3123void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
3124{
3125	struct audit_buffer *audit_buf;
3126
3127	audit_buf = xfrm_audit_start("SPD-add");
3128	if (audit_buf == NULL)
3129		return;
3130	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3131	audit_log_format(audit_buf, " res=%u", result);
3132	xfrm_audit_common_policyinfo(xp, audit_buf);
3133	audit_log_end(audit_buf);
3134}
3135EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3136
3137void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3138			      bool task_valid)
3139{
3140	struct audit_buffer *audit_buf;
3141
3142	audit_buf = xfrm_audit_start("SPD-delete");
3143	if (audit_buf == NULL)
3144		return;
3145	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3146	audit_log_format(audit_buf, " res=%u", result);
3147	xfrm_audit_common_policyinfo(xp, audit_buf);
3148	audit_log_end(audit_buf);
3149}
3150EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3151#endif
3152
3153#ifdef CONFIG_XFRM_MIGRATE
3154static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3155					const struct xfrm_selector *sel_tgt)
3156{
3157	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3158		if (sel_tgt->family == sel_cmp->family &&
3159		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3160				    sel_cmp->family) &&
3161		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3162				    sel_cmp->family) &&
3163		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3164		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3165			return true;
3166		}
3167	} else {
3168		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3169			return true;
3170		}
3171	}
3172	return false;
3173}
3174
3175static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3176						    u8 dir, u8 type, struct net *net)
3177{
3178	struct xfrm_policy *pol, *ret = NULL;
3179	struct hlist_head *chain;
3180	u32 priority = ~0U;
3181
3182	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3183	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3184	hlist_for_each_entry(pol, chain, bydst) {
3185		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3186		    pol->type == type) {
3187			ret = pol;
3188			priority = ret->priority;
3189			break;
3190		}
3191	}
3192	chain = &net->xfrm.policy_inexact[dir];
3193	hlist_for_each_entry(pol, chain, bydst) {
3194		if ((pol->priority >= priority) && ret)
3195			break;
3196
3197		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3198		    pol->type == type) {
3199			ret = pol;
3200			break;
3201		}
3202	}
3203
3204	xfrm_pol_hold(ret);
3205
3206	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3207
3208	return ret;
3209}
3210
3211static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3212{
3213	int match = 0;
3214
3215	if (t->mode == m->mode && t->id.proto == m->proto &&
3216	    (m->reqid == 0 || t->reqid == m->reqid)) {
3217		switch (t->mode) {
3218		case XFRM_MODE_TUNNEL:
3219		case XFRM_MODE_BEET:
3220			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3221					    m->old_family) &&
3222			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
3223					    m->old_family)) {
3224				match = 1;
3225			}
3226			break;
3227		case XFRM_MODE_TRANSPORT:
3228			/* in case of transport mode, template does not store
3229			   any IP addresses, hence we just compare mode and
3230			   protocol */
3231			match = 1;
3232			break;
3233		default:
3234			break;
3235		}
3236	}
3237	return match;
3238}
3239
3240/* update endpoint address(es) of template(s) */
3241static int xfrm_policy_migrate(struct xfrm_policy *pol,
3242			       struct xfrm_migrate *m, int num_migrate)
3243{
3244	struct xfrm_migrate *mp;
3245	int i, j, n = 0;
3246
3247	write_lock_bh(&pol->lock);
3248	if (unlikely(pol->walk.dead)) {
3249		/* target policy has been deleted */
3250		write_unlock_bh(&pol->lock);
3251		return -ENOENT;
3252	}
3253
3254	for (i = 0; i < pol->xfrm_nr; i++) {
3255		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3256			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3257				continue;
3258			n++;
3259			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3260			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3261				continue;
3262			/* update endpoints */
3263			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3264			       sizeof(pol->xfrm_vec[i].id.daddr));
3265			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3266			       sizeof(pol->xfrm_vec[i].saddr));
3267			pol->xfrm_vec[i].encap_family = mp->new_family;
3268			/* flush bundles */
3269			atomic_inc(&pol->genid);
3270		}
3271	}
3272
3273	write_unlock_bh(&pol->lock);
3274
3275	if (!n)
3276		return -ENODATA;
3277
3278	return 0;
3279}
3280
3281static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3282{
3283	int i, j;
3284
3285	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3286		return -EINVAL;
3287
3288	for (i = 0; i < num_migrate; i++) {
3289		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3290				    m[i].old_family) &&
3291		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3292				    m[i].old_family))
3293			return -EINVAL;
3294		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3295		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3296			return -EINVAL;
3297
3298		/* check if there is any duplicated entry */
3299		for (j = i + 1; j < num_migrate; j++) {
3300			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3301				    sizeof(m[i].old_daddr)) &&
3302			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3303				    sizeof(m[i].old_saddr)) &&
3304			    m[i].proto == m[j].proto &&
3305			    m[i].mode == m[j].mode &&
3306			    m[i].reqid == m[j].reqid &&
3307			    m[i].old_family == m[j].old_family)
3308				return -EINVAL;
3309		}
3310	}
3311
3312	return 0;
3313}
3314
3315int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3316		 struct xfrm_migrate *m, int num_migrate,
3317		 struct xfrm_kmaddress *k, struct net *net)
3318{
3319	int i, err, nx_cur = 0, nx_new = 0;
3320	struct xfrm_policy *pol = NULL;
3321	struct xfrm_state *x, *xc;
3322	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3323	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3324	struct xfrm_migrate *mp;
3325
3326	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3327		goto out;
3328
3329	/* Stage 1 - find policy */
3330	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3331		err = -ENOENT;
3332		goto out;
3333	}
3334
3335	/* Stage 2 - find and update state(s) */
3336	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3337		if ((x = xfrm_migrate_state_find(mp, net))) {
3338			x_cur[nx_cur] = x;
3339			nx_cur++;
3340			if ((xc = xfrm_state_migrate(x, mp))) {
3341				x_new[nx_new] = xc;
3342				nx_new++;
3343			} else {
3344				err = -ENODATA;
3345				goto restore_state;
3346			}
3347		}
3348	}
3349
3350	/* Stage 3 - update policy */
3351	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3352		goto restore_state;
3353
3354	/* Stage 4 - delete old state(s) */
3355	if (nx_cur) {
3356		xfrm_states_put(x_cur, nx_cur);
3357		xfrm_states_delete(x_cur, nx_cur);
3358	}
3359
3360	/* Stage 5 - announce */
3361	km_migrate(sel, dir, type, m, num_migrate, k);
3362
3363	xfrm_pol_put(pol);
3364
3365	return 0;
3366out:
3367	return err;
3368
3369restore_state:
3370	if (pol)
3371		xfrm_pol_put(pol);
3372	if (nx_cur)
3373		xfrm_states_put(x_cur, nx_cur);
3374	if (nx_new)
3375		xfrm_states_delete(x_new, nx_new);
3376
3377	return err;
3378}
3379EXPORT_SYMBOL(xfrm_migrate);
3380#endif
3381