1/*
2 * Copyright (c) 2007-2014 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include "flow.h"
22#include "datapath.h"
23#include <linux/uaccess.h>
24#include <linux/netdevice.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <net/llc_pdu.h>
29#include <linux/kernel.h>
30#include <linux/jhash.h>
31#include <linux/jiffies.h>
32#include <linux/llc.h>
33#include <linux/module.h>
34#include <linux/in.h>
35#include <linux/rcupdate.h>
36#include <linux/if_arp.h>
37#include <linux/ip.h>
38#include <linux/ipv6.h>
39#include <linux/sctp.h>
40#include <linux/tcp.h>
41#include <linux/udp.h>
42#include <linux/icmp.h>
43#include <linux/icmpv6.h>
44#include <linux/rculist.h>
45#include <net/geneve.h>
46#include <net/ip.h>
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/mpls.h>
50
51#include "flow_netlink.h"
52#include "vport-vxlan.h"
53
54struct ovs_len_tbl {
55	int len;
56	const struct ovs_len_tbl *next;
57};
58
59#define OVS_ATTR_NESTED -1
60
61static void update_range(struct sw_flow_match *match,
62			 size_t offset, size_t size, bool is_mask)
63{
64	struct sw_flow_key_range *range;
65	size_t start = rounddown(offset, sizeof(long));
66	size_t end = roundup(offset + size, sizeof(long));
67
68	if (!is_mask)
69		range = &match->range;
70	else
71		range = &match->mask->range;
72
73	if (range->start == range->end) {
74		range->start = start;
75		range->end = end;
76		return;
77	}
78
79	if (range->start > start)
80		range->start = start;
81
82	if (range->end < end)
83		range->end = end;
84}
85
86#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
87	do { \
88		update_range(match, offsetof(struct sw_flow_key, field),    \
89			     sizeof((match)->key->field), is_mask);	    \
90		if (is_mask)						    \
91			(match)->mask->key.field = value;		    \
92		else							    \
93			(match)->key->field = value;		            \
94	} while (0)
95
96#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)	    \
97	do {								    \
98		update_range(match, offset, len, is_mask);		    \
99		if (is_mask)						    \
100			memcpy((u8 *)&(match)->mask->key + offset, value_p, \
101			       len);					   \
102		else							    \
103			memcpy((u8 *)(match)->key + offset, value_p, len);  \
104	} while (0)
105
106#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)		      \
107	SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
108				  value_p, len, is_mask)
109
110#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)		    \
111	do {								    \
112		update_range(match, offsetof(struct sw_flow_key, field),    \
113			     sizeof((match)->key->field), is_mask);	    \
114		if (is_mask)						    \
115			memset((u8 *)&(match)->mask->key.field, value,      \
116			       sizeof((match)->mask->key.field));	    \
117		else							    \
118			memset((u8 *)&(match)->key->field, value,           \
119			       sizeof((match)->key->field));                \
120	} while (0)
121
122static bool match_validate(const struct sw_flow_match *match,
123			   u64 key_attrs, u64 mask_attrs, bool log)
124{
125	u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
126	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
127
128	/* The following mask attributes allowed only if they
129	 * pass the validation tests. */
130	mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
131			| (1 << OVS_KEY_ATTR_IPV6)
132			| (1 << OVS_KEY_ATTR_TCP)
133			| (1 << OVS_KEY_ATTR_TCP_FLAGS)
134			| (1 << OVS_KEY_ATTR_UDP)
135			| (1 << OVS_KEY_ATTR_SCTP)
136			| (1 << OVS_KEY_ATTR_ICMP)
137			| (1 << OVS_KEY_ATTR_ICMPV6)
138			| (1 << OVS_KEY_ATTR_ARP)
139			| (1 << OVS_KEY_ATTR_ND)
140			| (1 << OVS_KEY_ATTR_MPLS));
141
142	/* Always allowed mask fields. */
143	mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
144		       | (1 << OVS_KEY_ATTR_IN_PORT)
145		       | (1 << OVS_KEY_ATTR_ETHERTYPE));
146
147	/* Check key attributes. */
148	if (match->key->eth.type == htons(ETH_P_ARP)
149			|| match->key->eth.type == htons(ETH_P_RARP)) {
150		key_expected |= 1 << OVS_KEY_ATTR_ARP;
151		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
152			mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
153	}
154
155	if (eth_p_mpls(match->key->eth.type)) {
156		key_expected |= 1 << OVS_KEY_ATTR_MPLS;
157		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
158			mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
159	}
160
161	if (match->key->eth.type == htons(ETH_P_IP)) {
162		key_expected |= 1 << OVS_KEY_ATTR_IPV4;
163		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
164			mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
165
166		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
167			if (match->key->ip.proto == IPPROTO_UDP) {
168				key_expected |= 1 << OVS_KEY_ATTR_UDP;
169				if (match->mask && (match->mask->key.ip.proto == 0xff))
170					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
171			}
172
173			if (match->key->ip.proto == IPPROTO_SCTP) {
174				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
175				if (match->mask && (match->mask->key.ip.proto == 0xff))
176					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
177			}
178
179			if (match->key->ip.proto == IPPROTO_TCP) {
180				key_expected |= 1 << OVS_KEY_ATTR_TCP;
181				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
182				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
183					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
184					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
185				}
186			}
187
188			if (match->key->ip.proto == IPPROTO_ICMP) {
189				key_expected |= 1 << OVS_KEY_ATTR_ICMP;
190				if (match->mask && (match->mask->key.ip.proto == 0xff))
191					mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
192			}
193		}
194	}
195
196	if (match->key->eth.type == htons(ETH_P_IPV6)) {
197		key_expected |= 1 << OVS_KEY_ATTR_IPV6;
198		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
199			mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
200
201		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
202			if (match->key->ip.proto == IPPROTO_UDP) {
203				key_expected |= 1 << OVS_KEY_ATTR_UDP;
204				if (match->mask && (match->mask->key.ip.proto == 0xff))
205					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
206			}
207
208			if (match->key->ip.proto == IPPROTO_SCTP) {
209				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
210				if (match->mask && (match->mask->key.ip.proto == 0xff))
211					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
212			}
213
214			if (match->key->ip.proto == IPPROTO_TCP) {
215				key_expected |= 1 << OVS_KEY_ATTR_TCP;
216				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
217				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
218					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
219					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
220				}
221			}
222
223			if (match->key->ip.proto == IPPROTO_ICMPV6) {
224				key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
225				if (match->mask && (match->mask->key.ip.proto == 0xff))
226					mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
227
228				if (match->key->tp.src ==
229						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
230				    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
231					key_expected |= 1 << OVS_KEY_ATTR_ND;
232					if (match->mask && (match->mask->key.tp.src == htons(0xff)))
233						mask_allowed |= 1 << OVS_KEY_ATTR_ND;
234				}
235			}
236		}
237	}
238
239	if ((key_attrs & key_expected) != key_expected) {
240		/* Key attributes check failed. */
241		OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
242			  (unsigned long long)key_attrs,
243			  (unsigned long long)key_expected);
244		return false;
245	}
246
247	if ((mask_attrs & mask_allowed) != mask_attrs) {
248		/* Mask attributes check failed. */
249		OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
250			  (unsigned long long)mask_attrs,
251			  (unsigned long long)mask_allowed);
252		return false;
253	}
254
255	return true;
256}
257
258size_t ovs_tun_key_attr_size(void)
259{
260	/* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
261	 * updating this function.
262	 */
263	return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */
264		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
265		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
266		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
267		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
268		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
269		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
270		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
271		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
272		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
273		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
274		 */
275		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
276		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
277}
278
279size_t ovs_key_attr_size(void)
280{
281	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
282	 * updating this function.
283	 */
284	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
285
286	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
287		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
288		  + ovs_tun_key_attr_size()
289		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
290		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
291		+ nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
292		+ nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
293		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
294		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
295		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
296		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
297		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
298		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
299		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
300		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
301}
302
303static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
304	[OVS_TUNNEL_KEY_ATTR_ID]	    = { .len = sizeof(u64) },
305	[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]	    = { .len = sizeof(u32) },
306	[OVS_TUNNEL_KEY_ATTR_IPV4_DST]	    = { .len = sizeof(u32) },
307	[OVS_TUNNEL_KEY_ATTR_TOS]	    = { .len = 1 },
308	[OVS_TUNNEL_KEY_ATTR_TTL]	    = { .len = 1 },
309	[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
310	[OVS_TUNNEL_KEY_ATTR_CSUM]	    = { .len = 0 },
311	[OVS_TUNNEL_KEY_ATTR_TP_SRC]	    = { .len = sizeof(u16) },
312	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
313	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
314	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
315	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED },
316};
317
318/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
319static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
320	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
321	[OVS_KEY_ATTR_PRIORITY]	 = { .len = sizeof(u32) },
322	[OVS_KEY_ATTR_IN_PORT]	 = { .len = sizeof(u32) },
323	[OVS_KEY_ATTR_SKB_MARK]	 = { .len = sizeof(u32) },
324	[OVS_KEY_ATTR_ETHERNET]	 = { .len = sizeof(struct ovs_key_ethernet) },
325	[OVS_KEY_ATTR_VLAN]	 = { .len = sizeof(__be16) },
326	[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
327	[OVS_KEY_ATTR_IPV4]	 = { .len = sizeof(struct ovs_key_ipv4) },
328	[OVS_KEY_ATTR_IPV6]	 = { .len = sizeof(struct ovs_key_ipv6) },
329	[OVS_KEY_ATTR_TCP]	 = { .len = sizeof(struct ovs_key_tcp) },
330	[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
331	[OVS_KEY_ATTR_UDP]	 = { .len = sizeof(struct ovs_key_udp) },
332	[OVS_KEY_ATTR_SCTP]	 = { .len = sizeof(struct ovs_key_sctp) },
333	[OVS_KEY_ATTR_ICMP]	 = { .len = sizeof(struct ovs_key_icmp) },
334	[OVS_KEY_ATTR_ICMPV6]	 = { .len = sizeof(struct ovs_key_icmpv6) },
335	[OVS_KEY_ATTR_ARP]	 = { .len = sizeof(struct ovs_key_arp) },
336	[OVS_KEY_ATTR_ND]	 = { .len = sizeof(struct ovs_key_nd) },
337	[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
338	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
339	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
340				     .next = ovs_tunnel_key_lens, },
341	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
342};
343
344static bool is_all_zero(const u8 *fp, size_t size)
345{
346	int i;
347
348	if (!fp)
349		return false;
350
351	for (i = 0; i < size; i++)
352		if (fp[i])
353			return false;
354
355	return true;
356}
357
358static int __parse_flow_nlattrs(const struct nlattr *attr,
359				const struct nlattr *a[],
360				u64 *attrsp, bool log, bool nz)
361{
362	const struct nlattr *nla;
363	u64 attrs;
364	int rem;
365
366	attrs = *attrsp;
367	nla_for_each_nested(nla, attr, rem) {
368		u16 type = nla_type(nla);
369		int expected_len;
370
371		if (type > OVS_KEY_ATTR_MAX) {
372			OVS_NLERR(log, "Key type %d is out of range max %d",
373				  type, OVS_KEY_ATTR_MAX);
374			return -EINVAL;
375		}
376
377		if (attrs & (1 << type)) {
378			OVS_NLERR(log, "Duplicate key (type %d).", type);
379			return -EINVAL;
380		}
381
382		expected_len = ovs_key_lens[type].len;
383		if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
384			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
385				  type, nla_len(nla), expected_len);
386			return -EINVAL;
387		}
388
389		if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
390			attrs |= 1 << type;
391			a[type] = nla;
392		}
393	}
394	if (rem) {
395		OVS_NLERR(log, "Message has %d unknown bytes.", rem);
396		return -EINVAL;
397	}
398
399	*attrsp = attrs;
400	return 0;
401}
402
403static int parse_flow_mask_nlattrs(const struct nlattr *attr,
404				   const struct nlattr *a[], u64 *attrsp,
405				   bool log)
406{
407	return __parse_flow_nlattrs(attr, a, attrsp, log, true);
408}
409
410static int parse_flow_nlattrs(const struct nlattr *attr,
411			      const struct nlattr *a[], u64 *attrsp,
412			      bool log)
413{
414	return __parse_flow_nlattrs(attr, a, attrsp, log, false);
415}
416
417static int genev_tun_opt_from_nlattr(const struct nlattr *a,
418				     struct sw_flow_match *match, bool is_mask,
419				     bool log)
420{
421	unsigned long opt_key_offset;
422
423	if (nla_len(a) > sizeof(match->key->tun_opts)) {
424		OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
425			  nla_len(a), sizeof(match->key->tun_opts));
426		return -EINVAL;
427	}
428
429	if (nla_len(a) % 4 != 0) {
430		OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
431			  nla_len(a));
432		return -EINVAL;
433	}
434
435	/* We need to record the length of the options passed
436	 * down, otherwise packets with the same format but
437	 * additional options will be silently matched.
438	 */
439	if (!is_mask) {
440		SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
441				false);
442	} else {
443		/* This is somewhat unusual because it looks at
444		 * both the key and mask while parsing the
445		 * attributes (and by extension assumes the key
446		 * is parsed first). Normally, we would verify
447		 * that each is the correct length and that the
448		 * attributes line up in the validate function.
449		 * However, that is difficult because this is
450		 * variable length and we won't have the
451		 * information later.
452		 */
453		if (match->key->tun_opts_len != nla_len(a)) {
454			OVS_NLERR(log, "Geneve option len %d != mask len %d",
455				  match->key->tun_opts_len, nla_len(a));
456			return -EINVAL;
457		}
458
459		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
460	}
461
462	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
463	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
464				  nla_len(a), is_mask);
465	return 0;
466}
467
468static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
469	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_U32 },
470};
471
472static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
473				     struct sw_flow_match *match, bool is_mask,
474				     bool log)
475{
476	struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
477	unsigned long opt_key_offset;
478	struct ovs_vxlan_opts opts;
479	int err;
480
481	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
482
483	err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
484	if (err < 0)
485		return err;
486
487	memset(&opts, 0, sizeof(opts));
488
489	if (tb[OVS_VXLAN_EXT_GBP])
490		opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
491
492	if (!is_mask)
493		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
494	else
495		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
496
497	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
498	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
499				  is_mask);
500	return 0;
501}
502
503static int ipv4_tun_from_nlattr(const struct nlattr *attr,
504				struct sw_flow_match *match, bool is_mask,
505				bool log)
506{
507	struct nlattr *a;
508	int rem;
509	bool ttl = false;
510	__be16 tun_flags = 0;
511	int opts_type = 0;
512
513	nla_for_each_nested(a, attr, rem) {
514		int type = nla_type(a);
515		int err;
516
517		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
518			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
519				  type, OVS_TUNNEL_KEY_ATTR_MAX);
520			return -EINVAL;
521		}
522
523		if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
524		    ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
525			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
526				  type, nla_len(a), ovs_tunnel_key_lens[type].len);
527			return -EINVAL;
528		}
529
530		switch (type) {
531		case OVS_TUNNEL_KEY_ATTR_ID:
532			SW_FLOW_KEY_PUT(match, tun_key.tun_id,
533					nla_get_be64(a), is_mask);
534			tun_flags |= TUNNEL_KEY;
535			break;
536		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
537			SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
538					nla_get_in_addr(a), is_mask);
539			break;
540		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
541			SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
542					nla_get_in_addr(a), is_mask);
543			break;
544		case OVS_TUNNEL_KEY_ATTR_TOS:
545			SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
546					nla_get_u8(a), is_mask);
547			break;
548		case OVS_TUNNEL_KEY_ATTR_TTL:
549			SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
550					nla_get_u8(a), is_mask);
551			ttl = true;
552			break;
553		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
554			tun_flags |= TUNNEL_DONT_FRAGMENT;
555			break;
556		case OVS_TUNNEL_KEY_ATTR_CSUM:
557			tun_flags |= TUNNEL_CSUM;
558			break;
559		case OVS_TUNNEL_KEY_ATTR_TP_SRC:
560			SW_FLOW_KEY_PUT(match, tun_key.tp_src,
561					nla_get_be16(a), is_mask);
562			break;
563		case OVS_TUNNEL_KEY_ATTR_TP_DST:
564			SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
565					nla_get_be16(a), is_mask);
566			break;
567		case OVS_TUNNEL_KEY_ATTR_OAM:
568			tun_flags |= TUNNEL_OAM;
569			break;
570		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
571			if (opts_type) {
572				OVS_NLERR(log, "Multiple metadata blocks provided");
573				return -EINVAL;
574			}
575
576			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
577			if (err)
578				return err;
579
580			tun_flags |= TUNNEL_GENEVE_OPT;
581			opts_type = type;
582			break;
583		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
584			if (opts_type) {
585				OVS_NLERR(log, "Multiple metadata blocks provided");
586				return -EINVAL;
587			}
588
589			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
590			if (err)
591				return err;
592
593			tun_flags |= TUNNEL_VXLAN_OPT;
594			opts_type = type;
595			break;
596		default:
597			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
598				  type);
599			return -EINVAL;
600		}
601	}
602
603	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
604
605	if (rem > 0) {
606		OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
607			  rem);
608		return -EINVAL;
609	}
610
611	if (!is_mask) {
612		if (!match->key->tun_key.ipv4_dst) {
613			OVS_NLERR(log, "IPv4 tunnel dst address is zero");
614			return -EINVAL;
615		}
616
617		if (!ttl) {
618			OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
619			return -EINVAL;
620		}
621	}
622
623	return opts_type;
624}
625
626static int vxlan_opt_to_nlattr(struct sk_buff *skb,
627			       const void *tun_opts, int swkey_tun_opts_len)
628{
629	const struct ovs_vxlan_opts *opts = tun_opts;
630	struct nlattr *nla;
631
632	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
633	if (!nla)
634		return -EMSGSIZE;
635
636	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
637		return -EMSGSIZE;
638
639	nla_nest_end(skb, nla);
640	return 0;
641}
642
643static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
644				const struct ovs_key_ipv4_tunnel *output,
645				const void *tun_opts, int swkey_tun_opts_len)
646{
647	if (output->tun_flags & TUNNEL_KEY &&
648	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
649		return -EMSGSIZE;
650	if (output->ipv4_src &&
651	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
652			    output->ipv4_src))
653		return -EMSGSIZE;
654	if (output->ipv4_dst &&
655	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
656			    output->ipv4_dst))
657		return -EMSGSIZE;
658	if (output->ipv4_tos &&
659	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
660		return -EMSGSIZE;
661	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
662		return -EMSGSIZE;
663	if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
664	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
665		return -EMSGSIZE;
666	if ((output->tun_flags & TUNNEL_CSUM) &&
667	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
668		return -EMSGSIZE;
669	if (output->tp_src &&
670	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
671		return -EMSGSIZE;
672	if (output->tp_dst &&
673	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
674		return -EMSGSIZE;
675	if ((output->tun_flags & TUNNEL_OAM) &&
676	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
677		return -EMSGSIZE;
678	if (tun_opts) {
679		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
680		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
681			    swkey_tun_opts_len, tun_opts))
682			return -EMSGSIZE;
683		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
684			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
685			return -EMSGSIZE;
686	}
687
688	return 0;
689}
690
691static int ipv4_tun_to_nlattr(struct sk_buff *skb,
692			      const struct ovs_key_ipv4_tunnel *output,
693			      const void *tun_opts, int swkey_tun_opts_len)
694{
695	struct nlattr *nla;
696	int err;
697
698	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
699	if (!nla)
700		return -EMSGSIZE;
701
702	err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
703	if (err)
704		return err;
705
706	nla_nest_end(skb, nla);
707	return 0;
708}
709
710int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
711				  const struct ovs_tunnel_info *egress_tun_info)
712{
713	return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
714				    egress_tun_info->options,
715				    egress_tun_info->options_len);
716}
717
718static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
719				 const struct nlattr **a, bool is_mask,
720				 bool log)
721{
722	if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
723		u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
724
725		SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
726		*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
727	}
728
729	if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
730		u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
731
732		SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
733		*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
734	}
735
736	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
737		SW_FLOW_KEY_PUT(match, phy.priority,
738			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
739		*attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
740	}
741
742	if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
743		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
744
745		if (is_mask) {
746			in_port = 0xffffffff; /* Always exact match in_port. */
747		} else if (in_port >= DP_MAX_PORTS) {
748			OVS_NLERR(log, "Port %d exceeds max allowable %d",
749				  in_port, DP_MAX_PORTS);
750			return -EINVAL;
751		}
752
753		SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
754		*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
755	} else if (!is_mask) {
756		SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
757	}
758
759	if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
760		uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
761
762		SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
763		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
764	}
765	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
766		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
767					 is_mask, log) < 0)
768			return -EINVAL;
769		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
770	}
771	return 0;
772}
773
774static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
775				const struct nlattr **a, bool is_mask,
776				bool log)
777{
778	int err;
779
780	err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
781	if (err)
782		return err;
783
784	if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
785		const struct ovs_key_ethernet *eth_key;
786
787		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
788		SW_FLOW_KEY_MEMCPY(match, eth.src,
789				eth_key->eth_src, ETH_ALEN, is_mask);
790		SW_FLOW_KEY_MEMCPY(match, eth.dst,
791				eth_key->eth_dst, ETH_ALEN, is_mask);
792		attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
793	}
794
795	if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
796		__be16 tci;
797
798		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
799		if (!(tci & htons(VLAN_TAG_PRESENT))) {
800			if (is_mask)
801				OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
802			else
803				OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
804
805			return -EINVAL;
806		}
807
808		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
809		attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
810	}
811
812	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
813		__be16 eth_type;
814
815		eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
816		if (is_mask) {
817			/* Always exact match EtherType. */
818			eth_type = htons(0xffff);
819		} else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
820			OVS_NLERR(log, "EtherType %x is less than min %x",
821				  ntohs(eth_type), ETH_P_802_3_MIN);
822			return -EINVAL;
823		}
824
825		SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
826		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
827	} else if (!is_mask) {
828		SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
829	}
830
831	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
832		const struct ovs_key_ipv4 *ipv4_key;
833
834		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
835		if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
836			OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
837				  ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
838			return -EINVAL;
839		}
840		SW_FLOW_KEY_PUT(match, ip.proto,
841				ipv4_key->ipv4_proto, is_mask);
842		SW_FLOW_KEY_PUT(match, ip.tos,
843				ipv4_key->ipv4_tos, is_mask);
844		SW_FLOW_KEY_PUT(match, ip.ttl,
845				ipv4_key->ipv4_ttl, is_mask);
846		SW_FLOW_KEY_PUT(match, ip.frag,
847				ipv4_key->ipv4_frag, is_mask);
848		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
849				ipv4_key->ipv4_src, is_mask);
850		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
851				ipv4_key->ipv4_dst, is_mask);
852		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
853	}
854
855	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
856		const struct ovs_key_ipv6 *ipv6_key;
857
858		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
859		if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
860			OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
861				  ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
862			return -EINVAL;
863		}
864
865		if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
866			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
867				  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
868			return -EINVAL;
869		}
870
871		SW_FLOW_KEY_PUT(match, ipv6.label,
872				ipv6_key->ipv6_label, is_mask);
873		SW_FLOW_KEY_PUT(match, ip.proto,
874				ipv6_key->ipv6_proto, is_mask);
875		SW_FLOW_KEY_PUT(match, ip.tos,
876				ipv6_key->ipv6_tclass, is_mask);
877		SW_FLOW_KEY_PUT(match, ip.ttl,
878				ipv6_key->ipv6_hlimit, is_mask);
879		SW_FLOW_KEY_PUT(match, ip.frag,
880				ipv6_key->ipv6_frag, is_mask);
881		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
882				ipv6_key->ipv6_src,
883				sizeof(match->key->ipv6.addr.src),
884				is_mask);
885		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
886				ipv6_key->ipv6_dst,
887				sizeof(match->key->ipv6.addr.dst),
888				is_mask);
889
890		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
891	}
892
893	if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
894		const struct ovs_key_arp *arp_key;
895
896		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
897		if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
898			OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
899				  arp_key->arp_op);
900			return -EINVAL;
901		}
902
903		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
904				arp_key->arp_sip, is_mask);
905		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
906			arp_key->arp_tip, is_mask);
907		SW_FLOW_KEY_PUT(match, ip.proto,
908				ntohs(arp_key->arp_op), is_mask);
909		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
910				arp_key->arp_sha, ETH_ALEN, is_mask);
911		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
912				arp_key->arp_tha, ETH_ALEN, is_mask);
913
914		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
915	}
916
917	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
918		const struct ovs_key_mpls *mpls_key;
919
920		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
921		SW_FLOW_KEY_PUT(match, mpls.top_lse,
922				mpls_key->mpls_lse, is_mask);
923
924		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
925	 }
926
927	if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
928		const struct ovs_key_tcp *tcp_key;
929
930		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
931		SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
932		SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
933		attrs &= ~(1 << OVS_KEY_ATTR_TCP);
934	}
935
936	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
937		SW_FLOW_KEY_PUT(match, tp.flags,
938				nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
939				is_mask);
940		attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
941	}
942
943	if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
944		const struct ovs_key_udp *udp_key;
945
946		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
947		SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
948		SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
949		attrs &= ~(1 << OVS_KEY_ATTR_UDP);
950	}
951
952	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
953		const struct ovs_key_sctp *sctp_key;
954
955		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
956		SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
957		SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
958		attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
959	}
960
961	if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
962		const struct ovs_key_icmp *icmp_key;
963
964		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
965		SW_FLOW_KEY_PUT(match, tp.src,
966				htons(icmp_key->icmp_type), is_mask);
967		SW_FLOW_KEY_PUT(match, tp.dst,
968				htons(icmp_key->icmp_code), is_mask);
969		attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
970	}
971
972	if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
973		const struct ovs_key_icmpv6 *icmpv6_key;
974
975		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
976		SW_FLOW_KEY_PUT(match, tp.src,
977				htons(icmpv6_key->icmpv6_type), is_mask);
978		SW_FLOW_KEY_PUT(match, tp.dst,
979				htons(icmpv6_key->icmpv6_code), is_mask);
980		attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
981	}
982
983	if (attrs & (1 << OVS_KEY_ATTR_ND)) {
984		const struct ovs_key_nd *nd_key;
985
986		nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
987		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
988			nd_key->nd_target,
989			sizeof(match->key->ipv6.nd.target),
990			is_mask);
991		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
992			nd_key->nd_sll, ETH_ALEN, is_mask);
993		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
994				nd_key->nd_tll, ETH_ALEN, is_mask);
995		attrs &= ~(1 << OVS_KEY_ATTR_ND);
996	}
997
998	if (attrs != 0) {
999		OVS_NLERR(log, "Unknown key attributes %llx",
1000			  (unsigned long long)attrs);
1001		return -EINVAL;
1002	}
1003
1004	return 0;
1005}
1006
1007static void nlattr_set(struct nlattr *attr, u8 val,
1008		       const struct ovs_len_tbl *tbl)
1009{
1010	struct nlattr *nla;
1011	int rem;
1012
1013	/* The nlattr stream should already have been validated */
1014	nla_for_each_nested(nla, attr, rem) {
1015		if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1016			nlattr_set(nla, val, tbl[nla_type(nla)].next);
1017		else
1018			memset(nla_data(nla), val, nla_len(nla));
1019	}
1020}
1021
1022static void mask_set_nlattr(struct nlattr *attr, u8 val)
1023{
1024	nlattr_set(attr, val, ovs_key_lens);
1025}
1026
1027/**
1028 * ovs_nla_get_match - parses Netlink attributes into a flow key and
1029 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1030 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1031 * does not include any don't care bit.
1032 * @match: receives the extracted flow match information.
1033 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1034 * sequence. The fields should of the packet that triggered the creation
1035 * of this flow.
1036 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1037 * attribute specifies the mask field of the wildcarded flow.
1038 * @log: Boolean to allow kernel error logging.  Normally true, but when
1039 * probing for feature compatibility this should be passed in as false to
1040 * suppress unnecessary error logging.
1041 */
1042int ovs_nla_get_match(struct sw_flow_match *match,
1043		      const struct nlattr *nla_key,
1044		      const struct nlattr *nla_mask,
1045		      bool log)
1046{
1047	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1048	const struct nlattr *encap;
1049	struct nlattr *newmask = NULL;
1050	u64 key_attrs = 0;
1051	u64 mask_attrs = 0;
1052	bool encap_valid = false;
1053	int err;
1054
1055	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1056	if (err)
1057		return err;
1058
1059	if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1060	    (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1061	    (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
1062		__be16 tci;
1063
1064		if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1065		      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1066			OVS_NLERR(log, "Invalid Vlan frame.");
1067			return -EINVAL;
1068		}
1069
1070		key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1071		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1072		encap = a[OVS_KEY_ATTR_ENCAP];
1073		key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1074		encap_valid = true;
1075
1076		if (tci & htons(VLAN_TAG_PRESENT)) {
1077			err = parse_flow_nlattrs(encap, a, &key_attrs, log);
1078			if (err)
1079				return err;
1080		} else if (!tci) {
1081			/* Corner case for truncated 802.1Q header. */
1082			if (nla_len(encap)) {
1083				OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
1084				return -EINVAL;
1085			}
1086		} else {
1087			OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
1088			return  -EINVAL;
1089		}
1090	}
1091
1092	err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
1093	if (err)
1094		return err;
1095
1096	if (match->mask) {
1097		if (!nla_mask) {
1098			/* Create an exact match mask. We need to set to 0xff
1099			 * all the 'match->mask' fields that have been touched
1100			 * in 'match->key'. We cannot simply memset
1101			 * 'match->mask', because padding bytes and fields not
1102			 * specified in 'match->key' should be left to 0.
1103			 * Instead, we use a stream of netlink attributes,
1104			 * copied from 'key' and set to 0xff.
1105			 * ovs_key_from_nlattrs() will take care of filling
1106			 * 'match->mask' appropriately.
1107			 */
1108			newmask = kmemdup(nla_key,
1109					  nla_total_size(nla_len(nla_key)),
1110					  GFP_KERNEL);
1111			if (!newmask)
1112				return -ENOMEM;
1113
1114			mask_set_nlattr(newmask, 0xff);
1115
1116			/* The userspace does not send tunnel attributes that
1117			 * are 0, but we should not wildcard them nonetheless.
1118			 */
1119			if (match->key->tun_key.ipv4_dst)
1120				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1121							 0xff, true);
1122
1123			nla_mask = newmask;
1124		}
1125
1126		err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1127		if (err)
1128			goto free_newmask;
1129
1130		/* Always match on tci. */
1131		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1132
1133		if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
1134			__be16 eth_type = 0;
1135			__be16 tci = 0;
1136
1137			if (!encap_valid) {
1138				OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
1139				err = -EINVAL;
1140				goto free_newmask;
1141			}
1142
1143			mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1144			if (a[OVS_KEY_ATTR_ETHERTYPE])
1145				eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1146
1147			if (eth_type == htons(0xffff)) {
1148				mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1149				encap = a[OVS_KEY_ATTR_ENCAP];
1150				err = parse_flow_mask_nlattrs(encap, a,
1151							      &mask_attrs, log);
1152				if (err)
1153					goto free_newmask;
1154			} else {
1155				OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
1156					  ntohs(eth_type));
1157				err = -EINVAL;
1158				goto free_newmask;
1159			}
1160
1161			if (a[OVS_KEY_ATTR_VLAN])
1162				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1163
1164			if (!(tci & htons(VLAN_TAG_PRESENT))) {
1165				OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
1166					  ntohs(tci));
1167				err = -EINVAL;
1168				goto free_newmask;
1169			}
1170		}
1171
1172		err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
1173		if (err)
1174			goto free_newmask;
1175	}
1176
1177	if (!match_validate(match, key_attrs, mask_attrs, log))
1178		err = -EINVAL;
1179
1180free_newmask:
1181	kfree(newmask);
1182	return err;
1183}
1184
1185static size_t get_ufid_len(const struct nlattr *attr, bool log)
1186{
1187	size_t len;
1188
1189	if (!attr)
1190		return 0;
1191
1192	len = nla_len(attr);
1193	if (len < 1 || len > MAX_UFID_LENGTH) {
1194		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1195			  nla_len(attr), MAX_UFID_LENGTH);
1196		return 0;
1197	}
1198
1199	return len;
1200}
1201
1202/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1203 * or false otherwise.
1204 */
1205bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1206		      bool log)
1207{
1208	sfid->ufid_len = get_ufid_len(attr, log);
1209	if (sfid->ufid_len)
1210		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1211
1212	return sfid->ufid_len;
1213}
1214
1215int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1216			   const struct sw_flow_key *key, bool log)
1217{
1218	struct sw_flow_key *new_key;
1219
1220	if (ovs_nla_get_ufid(sfid, ufid, log))
1221		return 0;
1222
1223	/* If UFID was not provided, use unmasked key. */
1224	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1225	if (!new_key)
1226		return -ENOMEM;
1227	memcpy(new_key, key, sizeof(*key));
1228	sfid->unmasked_key = new_key;
1229
1230	return 0;
1231}
1232
1233u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1234{
1235	return attr ? nla_get_u32(attr) : 0;
1236}
1237
1238/**
1239 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1240 * @key: Receives extracted in_port, priority, tun_key and skb_mark.
1241 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1242 * sequence.
1243 * @log: Boolean to allow kernel error logging.  Normally true, but when
1244 * probing for feature compatibility this should be passed in as false to
1245 * suppress unnecessary error logging.
1246 *
1247 * This parses a series of Netlink attributes that form a flow key, which must
1248 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1249 * get the metadata, that is, the parts of the flow key that cannot be
1250 * extracted from the packet itself.
1251 */
1252
1253int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1254			      struct sw_flow_key *key,
1255			      bool log)
1256{
1257	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1258	struct sw_flow_match match;
1259	u64 attrs = 0;
1260	int err;
1261
1262	err = parse_flow_nlattrs(attr, a, &attrs, log);
1263	if (err)
1264		return -EINVAL;
1265
1266	memset(&match, 0, sizeof(match));
1267	match.key = key;
1268
1269	key->phy.in_port = DP_MAX_PORTS;
1270
1271	return metadata_from_nlattrs(&match, &attrs, a, false, log);
1272}
1273
1274static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1275			     const struct sw_flow_key *output, bool is_mask,
1276			     struct sk_buff *skb)
1277{
1278	struct ovs_key_ethernet *eth_key;
1279	struct nlattr *nla, *encap;
1280
1281	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1282		goto nla_put_failure;
1283
1284	if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1285		goto nla_put_failure;
1286
1287	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1288		goto nla_put_failure;
1289
1290	if ((swkey->tun_key.ipv4_dst || is_mask)) {
1291		const void *opts = NULL;
1292
1293		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1294			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1295
1296		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
1297				       swkey->tun_opts_len))
1298			goto nla_put_failure;
1299	}
1300
1301	if (swkey->phy.in_port == DP_MAX_PORTS) {
1302		if (is_mask && (output->phy.in_port == 0xffff))
1303			if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1304				goto nla_put_failure;
1305	} else {
1306		u16 upper_u16;
1307		upper_u16 = !is_mask ? 0 : 0xffff;
1308
1309		if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1310				(upper_u16 << 16) | output->phy.in_port))
1311			goto nla_put_failure;
1312	}
1313
1314	if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1315		goto nla_put_failure;
1316
1317	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1318	if (!nla)
1319		goto nla_put_failure;
1320
1321	eth_key = nla_data(nla);
1322	ether_addr_copy(eth_key->eth_src, output->eth.src);
1323	ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1324
1325	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1326		__be16 eth_type;
1327		eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1328		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1329		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1330			goto nla_put_failure;
1331		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1332		if (!swkey->eth.tci)
1333			goto unencap;
1334	} else
1335		encap = NULL;
1336
1337	if (swkey->eth.type == htons(ETH_P_802_2)) {
1338		/*
1339		 * Ethertype 802.2 is represented in the netlink with omitted
1340		 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1341		 * 0xffff in the mask attribute.  Ethertype can also
1342		 * be wildcarded.
1343		 */
1344		if (is_mask && output->eth.type)
1345			if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1346						output->eth.type))
1347				goto nla_put_failure;
1348		goto unencap;
1349	}
1350
1351	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1352		goto nla_put_failure;
1353
1354	if (swkey->eth.type == htons(ETH_P_IP)) {
1355		struct ovs_key_ipv4 *ipv4_key;
1356
1357		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
1358		if (!nla)
1359			goto nla_put_failure;
1360		ipv4_key = nla_data(nla);
1361		ipv4_key->ipv4_src = output->ipv4.addr.src;
1362		ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1363		ipv4_key->ipv4_proto = output->ip.proto;
1364		ipv4_key->ipv4_tos = output->ip.tos;
1365		ipv4_key->ipv4_ttl = output->ip.ttl;
1366		ipv4_key->ipv4_frag = output->ip.frag;
1367	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1368		struct ovs_key_ipv6 *ipv6_key;
1369
1370		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
1371		if (!nla)
1372			goto nla_put_failure;
1373		ipv6_key = nla_data(nla);
1374		memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1375				sizeof(ipv6_key->ipv6_src));
1376		memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1377				sizeof(ipv6_key->ipv6_dst));
1378		ipv6_key->ipv6_label = output->ipv6.label;
1379		ipv6_key->ipv6_proto = output->ip.proto;
1380		ipv6_key->ipv6_tclass = output->ip.tos;
1381		ipv6_key->ipv6_hlimit = output->ip.ttl;
1382		ipv6_key->ipv6_frag = output->ip.frag;
1383	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
1384		   swkey->eth.type == htons(ETH_P_RARP)) {
1385		struct ovs_key_arp *arp_key;
1386
1387		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1388		if (!nla)
1389			goto nla_put_failure;
1390		arp_key = nla_data(nla);
1391		memset(arp_key, 0, sizeof(struct ovs_key_arp));
1392		arp_key->arp_sip = output->ipv4.addr.src;
1393		arp_key->arp_tip = output->ipv4.addr.dst;
1394		arp_key->arp_op = htons(output->ip.proto);
1395		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1396		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1397	} else if (eth_p_mpls(swkey->eth.type)) {
1398		struct ovs_key_mpls *mpls_key;
1399
1400		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1401		if (!nla)
1402			goto nla_put_failure;
1403		mpls_key = nla_data(nla);
1404		mpls_key->mpls_lse = output->mpls.top_lse;
1405	}
1406
1407	if ((swkey->eth.type == htons(ETH_P_IP) ||
1408	     swkey->eth.type == htons(ETH_P_IPV6)) &&
1409	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1410
1411		if (swkey->ip.proto == IPPROTO_TCP) {
1412			struct ovs_key_tcp *tcp_key;
1413
1414			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1415			if (!nla)
1416				goto nla_put_failure;
1417			tcp_key = nla_data(nla);
1418			tcp_key->tcp_src = output->tp.src;
1419			tcp_key->tcp_dst = output->tp.dst;
1420			if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1421					 output->tp.flags))
1422				goto nla_put_failure;
1423		} else if (swkey->ip.proto == IPPROTO_UDP) {
1424			struct ovs_key_udp *udp_key;
1425
1426			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1427			if (!nla)
1428				goto nla_put_failure;
1429			udp_key = nla_data(nla);
1430			udp_key->udp_src = output->tp.src;
1431			udp_key->udp_dst = output->tp.dst;
1432		} else if (swkey->ip.proto == IPPROTO_SCTP) {
1433			struct ovs_key_sctp *sctp_key;
1434
1435			nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1436			if (!nla)
1437				goto nla_put_failure;
1438			sctp_key = nla_data(nla);
1439			sctp_key->sctp_src = output->tp.src;
1440			sctp_key->sctp_dst = output->tp.dst;
1441		} else if (swkey->eth.type == htons(ETH_P_IP) &&
1442			   swkey->ip.proto == IPPROTO_ICMP) {
1443			struct ovs_key_icmp *icmp_key;
1444
1445			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1446			if (!nla)
1447				goto nla_put_failure;
1448			icmp_key = nla_data(nla);
1449			icmp_key->icmp_type = ntohs(output->tp.src);
1450			icmp_key->icmp_code = ntohs(output->tp.dst);
1451		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1452			   swkey->ip.proto == IPPROTO_ICMPV6) {
1453			struct ovs_key_icmpv6 *icmpv6_key;
1454
1455			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1456						sizeof(*icmpv6_key));
1457			if (!nla)
1458				goto nla_put_failure;
1459			icmpv6_key = nla_data(nla);
1460			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
1461			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
1462
1463			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1464			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1465				struct ovs_key_nd *nd_key;
1466
1467				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1468				if (!nla)
1469					goto nla_put_failure;
1470				nd_key = nla_data(nla);
1471				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1472							sizeof(nd_key->nd_target));
1473				ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
1474				ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
1475			}
1476		}
1477	}
1478
1479unencap:
1480	if (encap)
1481		nla_nest_end(skb, encap);
1482
1483	return 0;
1484
1485nla_put_failure:
1486	return -EMSGSIZE;
1487}
1488
1489int ovs_nla_put_key(const struct sw_flow_key *swkey,
1490		    const struct sw_flow_key *output, int attr, bool is_mask,
1491		    struct sk_buff *skb)
1492{
1493	int err;
1494	struct nlattr *nla;
1495
1496	nla = nla_nest_start(skb, attr);
1497	if (!nla)
1498		return -EMSGSIZE;
1499	err = __ovs_nla_put_key(swkey, output, is_mask, skb);
1500	if (err)
1501		return err;
1502	nla_nest_end(skb, nla);
1503
1504	return 0;
1505}
1506
1507/* Called with ovs_mutex or RCU read lock. */
1508int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
1509{
1510	if (ovs_identifier_is_ufid(&flow->id))
1511		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
1512			       flow->id.ufid);
1513
1514	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
1515			       OVS_FLOW_ATTR_KEY, false, skb);
1516}
1517
1518/* Called with ovs_mutex or RCU read lock. */
1519int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
1520{
1521	return ovs_nla_put_key(&flow->key, &flow->key,
1522				OVS_FLOW_ATTR_KEY, false, skb);
1523}
1524
1525/* Called with ovs_mutex or RCU read lock. */
1526int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
1527{
1528	return ovs_nla_put_key(&flow->key, &flow->mask->key,
1529				OVS_FLOW_ATTR_MASK, true, skb);
1530}
1531
1532#define MAX_ACTIONS_BUFSIZE	(32 * 1024)
1533
1534static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
1535{
1536	struct sw_flow_actions *sfa;
1537
1538	if (size > MAX_ACTIONS_BUFSIZE) {
1539		OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
1540		return ERR_PTR(-EINVAL);
1541	}
1542
1543	sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1544	if (!sfa)
1545		return ERR_PTR(-ENOMEM);
1546
1547	sfa->actions_len = 0;
1548	return sfa;
1549}
1550
1551/* Schedules 'sf_acts' to be freed after the next RCU grace period.
1552 * The caller must hold rcu_read_lock for this to be sensible. */
1553void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1554{
1555	kfree_rcu(sf_acts, rcu);
1556}
1557
1558static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1559				       int attr_len, bool log)
1560{
1561
1562	struct sw_flow_actions *acts;
1563	int new_acts_size;
1564	int req_size = NLA_ALIGN(attr_len);
1565	int next_offset = offsetof(struct sw_flow_actions, actions) +
1566					(*sfa)->actions_len;
1567
1568	if (req_size <= (ksize(*sfa) - next_offset))
1569		goto out;
1570
1571	new_acts_size = ksize(*sfa) * 2;
1572
1573	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1574		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1575			return ERR_PTR(-EMSGSIZE);
1576		new_acts_size = MAX_ACTIONS_BUFSIZE;
1577	}
1578
1579	acts = nla_alloc_flow_actions(new_acts_size, log);
1580	if (IS_ERR(acts))
1581		return (void *)acts;
1582
1583	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1584	acts->actions_len = (*sfa)->actions_len;
1585	kfree(*sfa);
1586	*sfa = acts;
1587
1588out:
1589	(*sfa)->actions_len += req_size;
1590	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1591}
1592
1593static struct nlattr *__add_action(struct sw_flow_actions **sfa,
1594				   int attrtype, void *data, int len, bool log)
1595{
1596	struct nlattr *a;
1597
1598	a = reserve_sfa_size(sfa, nla_attr_size(len), log);
1599	if (IS_ERR(a))
1600		return a;
1601
1602	a->nla_type = attrtype;
1603	a->nla_len = nla_attr_size(len);
1604
1605	if (data)
1606		memcpy(nla_data(a), data, len);
1607	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1608
1609	return a;
1610}
1611
1612static int add_action(struct sw_flow_actions **sfa, int attrtype,
1613		      void *data, int len, bool log)
1614{
1615	struct nlattr *a;
1616
1617	a = __add_action(sfa, attrtype, data, len, log);
1618
1619	return PTR_ERR_OR_ZERO(a);
1620}
1621
1622static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1623					  int attrtype, bool log)
1624{
1625	int used = (*sfa)->actions_len;
1626	int err;
1627
1628	err = add_action(sfa, attrtype, NULL, 0, log);
1629	if (err)
1630		return err;
1631
1632	return used;
1633}
1634
1635static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1636					 int st_offset)
1637{
1638	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1639							       st_offset);
1640
1641	a->nla_len = sfa->actions_len - st_offset;
1642}
1643
1644static int __ovs_nla_copy_actions(const struct nlattr *attr,
1645				  const struct sw_flow_key *key,
1646				  int depth, struct sw_flow_actions **sfa,
1647				  __be16 eth_type, __be16 vlan_tci, bool log);
1648
1649static int validate_and_copy_sample(const struct nlattr *attr,
1650				    const struct sw_flow_key *key, int depth,
1651				    struct sw_flow_actions **sfa,
1652				    __be16 eth_type, __be16 vlan_tci, bool log)
1653{
1654	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1655	const struct nlattr *probability, *actions;
1656	const struct nlattr *a;
1657	int rem, start, err, st_acts;
1658
1659	memset(attrs, 0, sizeof(attrs));
1660	nla_for_each_nested(a, attr, rem) {
1661		int type = nla_type(a);
1662		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1663			return -EINVAL;
1664		attrs[type] = a;
1665	}
1666	if (rem)
1667		return -EINVAL;
1668
1669	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1670	if (!probability || nla_len(probability) != sizeof(u32))
1671		return -EINVAL;
1672
1673	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1674	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1675		return -EINVAL;
1676
1677	/* validation done, copy sample action. */
1678	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
1679	if (start < 0)
1680		return start;
1681	err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1682			 nla_data(probability), sizeof(u32), log);
1683	if (err)
1684		return err;
1685	st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
1686	if (st_acts < 0)
1687		return st_acts;
1688
1689	err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
1690				     eth_type, vlan_tci, log);
1691	if (err)
1692		return err;
1693
1694	add_nested_action_end(*sfa, st_acts);
1695	add_nested_action_end(*sfa, start);
1696
1697	return 0;
1698}
1699
1700void ovs_match_init(struct sw_flow_match *match,
1701		    struct sw_flow_key *key,
1702		    struct sw_flow_mask *mask)
1703{
1704	memset(match, 0, sizeof(*match));
1705	match->key = key;
1706	match->mask = mask;
1707
1708	memset(key, 0, sizeof(*key));
1709
1710	if (mask) {
1711		memset(&mask->key, 0, sizeof(mask->key));
1712		mask->range.start = mask->range.end = 0;
1713	}
1714}
1715
1716static int validate_geneve_opts(struct sw_flow_key *key)
1717{
1718	struct geneve_opt *option;
1719	int opts_len = key->tun_opts_len;
1720	bool crit_opt = false;
1721
1722	option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
1723	while (opts_len > 0) {
1724		int len;
1725
1726		if (opts_len < sizeof(*option))
1727			return -EINVAL;
1728
1729		len = sizeof(*option) + option->length * 4;
1730		if (len > opts_len)
1731			return -EINVAL;
1732
1733		crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
1734
1735		option = (struct geneve_opt *)((u8 *)option + len);
1736		opts_len -= len;
1737	};
1738
1739	key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
1740
1741	return 0;
1742}
1743
1744static int validate_and_copy_set_tun(const struct nlattr *attr,
1745				     struct sw_flow_actions **sfa, bool log)
1746{
1747	struct sw_flow_match match;
1748	struct sw_flow_key key;
1749	struct ovs_tunnel_info *tun_info;
1750	struct nlattr *a;
1751	int err = 0, start, opts_type;
1752
1753	ovs_match_init(&match, &key, NULL);
1754	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
1755	if (opts_type < 0)
1756		return opts_type;
1757
1758	if (key.tun_opts_len) {
1759		switch (opts_type) {
1760		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
1761			err = validate_geneve_opts(&key);
1762			if (err < 0)
1763				return err;
1764			break;
1765		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
1766			break;
1767		}
1768	};
1769
1770	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
1771	if (start < 0)
1772		return start;
1773
1774	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1775			 sizeof(*tun_info) + key.tun_opts_len, log);
1776	if (IS_ERR(a))
1777		return PTR_ERR(a);
1778
1779	tun_info = nla_data(a);
1780	tun_info->tunnel = key.tun_key;
1781	tun_info->options_len = key.tun_opts_len;
1782
1783	if (tun_info->options_len) {
1784		/* We need to store the options in the action itself since
1785		 * everything else will go away after flow setup. We can append
1786		 * it to tun_info and then point there.
1787		 */
1788		memcpy((tun_info + 1),
1789		       TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
1790		tun_info->options = (tun_info + 1);
1791	} else {
1792		tun_info->options = NULL;
1793	}
1794
1795	add_nested_action_end(*sfa, start);
1796
1797	return err;
1798}
1799
1800/* Return false if there are any non-masked bits set.
1801 * Mask follows data immediately, before any netlink padding.
1802 */
1803static bool validate_masked(u8 *data, int len)
1804{
1805	u8 *mask = data + len;
1806
1807	while (len--)
1808		if (*data++ & ~*mask++)
1809			return false;
1810
1811	return true;
1812}
1813
1814static int validate_set(const struct nlattr *a,
1815			const struct sw_flow_key *flow_key,
1816			struct sw_flow_actions **sfa,
1817			bool *skip_copy, __be16 eth_type, bool masked, bool log)
1818{
1819	const struct nlattr *ovs_key = nla_data(a);
1820	int key_type = nla_type(ovs_key);
1821	size_t key_len;
1822
1823	/* There can be only one key in a action */
1824	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1825		return -EINVAL;
1826
1827	key_len = nla_len(ovs_key);
1828	if (masked)
1829		key_len /= 2;
1830
1831	if (key_type > OVS_KEY_ATTR_MAX ||
1832	    (ovs_key_lens[key_type].len != key_len &&
1833	     ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
1834		return -EINVAL;
1835
1836	if (masked && !validate_masked(nla_data(ovs_key), key_len))
1837		return -EINVAL;
1838
1839	switch (key_type) {
1840	const struct ovs_key_ipv4 *ipv4_key;
1841	const struct ovs_key_ipv6 *ipv6_key;
1842	int err;
1843
1844	case OVS_KEY_ATTR_PRIORITY:
1845	case OVS_KEY_ATTR_SKB_MARK:
1846	case OVS_KEY_ATTR_ETHERNET:
1847		break;
1848
1849	case OVS_KEY_ATTR_TUNNEL:
1850		if (eth_p_mpls(eth_type))
1851			return -EINVAL;
1852
1853		if (masked)
1854			return -EINVAL; /* Masked tunnel set not supported. */
1855
1856		*skip_copy = true;
1857		err = validate_and_copy_set_tun(a, sfa, log);
1858		if (err)
1859			return err;
1860		break;
1861
1862	case OVS_KEY_ATTR_IPV4:
1863		if (eth_type != htons(ETH_P_IP))
1864			return -EINVAL;
1865
1866		ipv4_key = nla_data(ovs_key);
1867
1868		if (masked) {
1869			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
1870
1871			/* Non-writeable fields. */
1872			if (mask->ipv4_proto || mask->ipv4_frag)
1873				return -EINVAL;
1874		} else {
1875			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1876				return -EINVAL;
1877
1878			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1879				return -EINVAL;
1880		}
1881		break;
1882
1883	case OVS_KEY_ATTR_IPV6:
1884		if (eth_type != htons(ETH_P_IPV6))
1885			return -EINVAL;
1886
1887		ipv6_key = nla_data(ovs_key);
1888
1889		if (masked) {
1890			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
1891
1892			/* Non-writeable fields. */
1893			if (mask->ipv6_proto || mask->ipv6_frag)
1894				return -EINVAL;
1895
1896			/* Invalid bits in the flow label mask? */
1897			if (ntohl(mask->ipv6_label) & 0xFFF00000)
1898				return -EINVAL;
1899		} else {
1900			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1901				return -EINVAL;
1902
1903			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1904				return -EINVAL;
1905		}
1906		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1907			return -EINVAL;
1908
1909		break;
1910
1911	case OVS_KEY_ATTR_TCP:
1912		if ((eth_type != htons(ETH_P_IP) &&
1913		     eth_type != htons(ETH_P_IPV6)) ||
1914		    flow_key->ip.proto != IPPROTO_TCP)
1915			return -EINVAL;
1916
1917		break;
1918
1919	case OVS_KEY_ATTR_UDP:
1920		if ((eth_type != htons(ETH_P_IP) &&
1921		     eth_type != htons(ETH_P_IPV6)) ||
1922		    flow_key->ip.proto != IPPROTO_UDP)
1923			return -EINVAL;
1924
1925		break;
1926
1927	case OVS_KEY_ATTR_MPLS:
1928		if (!eth_p_mpls(eth_type))
1929			return -EINVAL;
1930		break;
1931
1932	case OVS_KEY_ATTR_SCTP:
1933		if ((eth_type != htons(ETH_P_IP) &&
1934		     eth_type != htons(ETH_P_IPV6)) ||
1935		    flow_key->ip.proto != IPPROTO_SCTP)
1936			return -EINVAL;
1937
1938		break;
1939
1940	default:
1941		return -EINVAL;
1942	}
1943
1944	/* Convert non-masked non-tunnel set actions to masked set actions. */
1945	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
1946		int start, len = key_len * 2;
1947		struct nlattr *at;
1948
1949		*skip_copy = true;
1950
1951		start = add_nested_action_start(sfa,
1952						OVS_ACTION_ATTR_SET_TO_MASKED,
1953						log);
1954		if (start < 0)
1955			return start;
1956
1957		at = __add_action(sfa, key_type, NULL, len, log);
1958		if (IS_ERR(at))
1959			return PTR_ERR(at);
1960
1961		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
1962		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
1963		/* Clear non-writeable bits from otherwise writeable fields. */
1964		if (key_type == OVS_KEY_ATTR_IPV6) {
1965			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
1966
1967			mask->ipv6_label &= htonl(0x000FFFFF);
1968		}
1969		add_nested_action_end(*sfa, start);
1970	}
1971
1972	return 0;
1973}
1974
1975static int validate_userspace(const struct nlattr *attr)
1976{
1977	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1978		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1979		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1980		[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
1981	};
1982	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1983	int error;
1984
1985	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1986				 attr, userspace_policy);
1987	if (error)
1988		return error;
1989
1990	if (!a[OVS_USERSPACE_ATTR_PID] ||
1991	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1992		return -EINVAL;
1993
1994	return 0;
1995}
1996
1997static int copy_action(const struct nlattr *from,
1998		       struct sw_flow_actions **sfa, bool log)
1999{
2000	int totlen = NLA_ALIGN(from->nla_len);
2001	struct nlattr *to;
2002
2003	to = reserve_sfa_size(sfa, from->nla_len, log);
2004	if (IS_ERR(to))
2005		return PTR_ERR(to);
2006
2007	memcpy(to, from, totlen);
2008	return 0;
2009}
2010
2011static int __ovs_nla_copy_actions(const struct nlattr *attr,
2012				  const struct sw_flow_key *key,
2013				  int depth, struct sw_flow_actions **sfa,
2014				  __be16 eth_type, __be16 vlan_tci, bool log)
2015{
2016	const struct nlattr *a;
2017	int rem, err;
2018
2019	if (depth >= SAMPLE_ACTION_DEPTH)
2020		return -EOVERFLOW;
2021
2022	nla_for_each_nested(a, attr, rem) {
2023		/* Expected argument lengths, (u32)-1 for variable length. */
2024		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2025			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2026			[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2027			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2028			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2029			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2030			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2031			[OVS_ACTION_ATTR_POP_VLAN] = 0,
2032			[OVS_ACTION_ATTR_SET] = (u32)-1,
2033			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2034			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2035			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
2036		};
2037		const struct ovs_action_push_vlan *vlan;
2038		int type = nla_type(a);
2039		bool skip_copy;
2040
2041		if (type > OVS_ACTION_ATTR_MAX ||
2042		    (action_lens[type] != nla_len(a) &&
2043		     action_lens[type] != (u32)-1))
2044			return -EINVAL;
2045
2046		skip_copy = false;
2047		switch (type) {
2048		case OVS_ACTION_ATTR_UNSPEC:
2049			return -EINVAL;
2050
2051		case OVS_ACTION_ATTR_USERSPACE:
2052			err = validate_userspace(a);
2053			if (err)
2054				return err;
2055			break;
2056
2057		case OVS_ACTION_ATTR_OUTPUT:
2058			if (nla_get_u32(a) >= DP_MAX_PORTS)
2059				return -EINVAL;
2060			break;
2061
2062		case OVS_ACTION_ATTR_HASH: {
2063			const struct ovs_action_hash *act_hash = nla_data(a);
2064
2065			switch (act_hash->hash_alg) {
2066			case OVS_HASH_ALG_L4:
2067				break;
2068			default:
2069				return  -EINVAL;
2070			}
2071
2072			break;
2073		}
2074
2075		case OVS_ACTION_ATTR_POP_VLAN:
2076			vlan_tci = htons(0);
2077			break;
2078
2079		case OVS_ACTION_ATTR_PUSH_VLAN:
2080			vlan = nla_data(a);
2081			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
2082				return -EINVAL;
2083			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2084				return -EINVAL;
2085			vlan_tci = vlan->vlan_tci;
2086			break;
2087
2088		case OVS_ACTION_ATTR_RECIRC:
2089			break;
2090
2091		case OVS_ACTION_ATTR_PUSH_MPLS: {
2092			const struct ovs_action_push_mpls *mpls = nla_data(a);
2093
2094			if (!eth_p_mpls(mpls->mpls_ethertype))
2095				return -EINVAL;
2096			/* Prohibit push MPLS other than to a white list
2097			 * for packets that have a known tag order.
2098			 */
2099			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2100			    (eth_type != htons(ETH_P_IP) &&
2101			     eth_type != htons(ETH_P_IPV6) &&
2102			     eth_type != htons(ETH_P_ARP) &&
2103			     eth_type != htons(ETH_P_RARP) &&
2104			     !eth_p_mpls(eth_type)))
2105				return -EINVAL;
2106			eth_type = mpls->mpls_ethertype;
2107			break;
2108		}
2109
2110		case OVS_ACTION_ATTR_POP_MPLS:
2111			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2112			    !eth_p_mpls(eth_type))
2113				return -EINVAL;
2114
2115			/* Disallow subsequent L2.5+ set and mpls_pop actions
2116			 * as there is no check here to ensure that the new
2117			 * eth_type is valid and thus set actions could
2118			 * write off the end of the packet or otherwise
2119			 * corrupt it.
2120			 *
2121			 * Support for these actions is planned using packet
2122			 * recirculation.
2123			 */
2124			eth_type = htons(0);
2125			break;
2126
2127		case OVS_ACTION_ATTR_SET:
2128			err = validate_set(a, key, sfa,
2129					   &skip_copy, eth_type, false, log);
2130			if (err)
2131				return err;
2132			break;
2133
2134		case OVS_ACTION_ATTR_SET_MASKED:
2135			err = validate_set(a, key, sfa,
2136					   &skip_copy, eth_type, true, log);
2137			if (err)
2138				return err;
2139			break;
2140
2141		case OVS_ACTION_ATTR_SAMPLE:
2142			err = validate_and_copy_sample(a, key, depth, sfa,
2143						       eth_type, vlan_tci, log);
2144			if (err)
2145				return err;
2146			skip_copy = true;
2147			break;
2148
2149		default:
2150			OVS_NLERR(log, "Unknown Action type %d", type);
2151			return -EINVAL;
2152		}
2153		if (!skip_copy) {
2154			err = copy_action(a, sfa, log);
2155			if (err)
2156				return err;
2157		}
2158	}
2159
2160	if (rem > 0)
2161		return -EINVAL;
2162
2163	return 0;
2164}
2165
2166/* 'key' must be the masked key. */
2167int ovs_nla_copy_actions(const struct nlattr *attr,
2168			 const struct sw_flow_key *key,
2169			 struct sw_flow_actions **sfa, bool log)
2170{
2171	int err;
2172
2173	*sfa = nla_alloc_flow_actions(nla_len(attr), log);
2174	if (IS_ERR(*sfa))
2175		return PTR_ERR(*sfa);
2176
2177	err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
2178				     key->eth.tci, log);
2179	if (err)
2180		kfree(*sfa);
2181
2182	return err;
2183}
2184
2185static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
2186{
2187	const struct nlattr *a;
2188	struct nlattr *start;
2189	int err = 0, rem;
2190
2191	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
2192	if (!start)
2193		return -EMSGSIZE;
2194
2195	nla_for_each_nested(a, attr, rem) {
2196		int type = nla_type(a);
2197		struct nlattr *st_sample;
2198
2199		switch (type) {
2200		case OVS_SAMPLE_ATTR_PROBABILITY:
2201			if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
2202				    sizeof(u32), nla_data(a)))
2203				return -EMSGSIZE;
2204			break;
2205		case OVS_SAMPLE_ATTR_ACTIONS:
2206			st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
2207			if (!st_sample)
2208				return -EMSGSIZE;
2209			err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
2210			if (err)
2211				return err;
2212			nla_nest_end(skb, st_sample);
2213			break;
2214		}
2215	}
2216
2217	nla_nest_end(skb, start);
2218	return err;
2219}
2220
2221static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
2222{
2223	const struct nlattr *ovs_key = nla_data(a);
2224	int key_type = nla_type(ovs_key);
2225	struct nlattr *start;
2226	int err;
2227
2228	switch (key_type) {
2229	case OVS_KEY_ATTR_TUNNEL_INFO: {
2230		struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
2231
2232		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
2233		if (!start)
2234			return -EMSGSIZE;
2235
2236		err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
2237					 tun_info->options_len ?
2238						tun_info->options : NULL,
2239					 tun_info->options_len);
2240		if (err)
2241			return err;
2242		nla_nest_end(skb, start);
2243		break;
2244	}
2245	default:
2246		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
2247			return -EMSGSIZE;
2248		break;
2249	}
2250
2251	return 0;
2252}
2253
2254static int masked_set_action_to_set_action_attr(const struct nlattr *a,
2255						struct sk_buff *skb)
2256{
2257	const struct nlattr *ovs_key = nla_data(a);
2258	struct nlattr *nla;
2259	size_t key_len = nla_len(ovs_key) / 2;
2260
2261	/* Revert the conversion we did from a non-masked set action to
2262	 * masked set action.
2263	 */
2264	nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
2265	if (!nla)
2266		return -EMSGSIZE;
2267
2268	if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
2269		return -EMSGSIZE;
2270
2271	nla_nest_end(skb, nla);
2272	return 0;
2273}
2274
2275int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2276{
2277	const struct nlattr *a;
2278	int rem, err;
2279
2280	nla_for_each_attr(a, attr, len, rem) {
2281		int type = nla_type(a);
2282
2283		switch (type) {
2284		case OVS_ACTION_ATTR_SET:
2285			err = set_action_to_attr(a, skb);
2286			if (err)
2287				return err;
2288			break;
2289
2290		case OVS_ACTION_ATTR_SET_TO_MASKED:
2291			err = masked_set_action_to_set_action_attr(a, skb);
2292			if (err)
2293				return err;
2294			break;
2295
2296		case OVS_ACTION_ATTR_SAMPLE:
2297			err = sample_action_to_attr(a, skb);
2298			if (err)
2299				return err;
2300			break;
2301		default:
2302			if (nla_put(skb, type, nla_len(a), nla_data(a)))
2303				return -EMSGSIZE;
2304			break;
2305		}
2306	}
2307
2308	return 0;
2309}
2310