1/* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21#include "flow.h" 22#include "datapath.h" 23#include <linux/uaccess.h> 24#include <linux/netdevice.h> 25#include <linux/etherdevice.h> 26#include <linux/if_ether.h> 27#include <linux/if_vlan.h> 28#include <net/llc_pdu.h> 29#include <linux/kernel.h> 30#include <linux/jhash.h> 31#include <linux/jiffies.h> 32#include <linux/llc.h> 33#include <linux/module.h> 34#include <linux/in.h> 35#include <linux/rcupdate.h> 36#include <linux/if_arp.h> 37#include <linux/ip.h> 38#include <linux/ipv6.h> 39#include <linux/sctp.h> 40#include <linux/tcp.h> 41#include <linux/udp.h> 42#include <linux/icmp.h> 43#include <linux/icmpv6.h> 44#include <linux/rculist.h> 45#include <net/geneve.h> 46#include <net/ip.h> 47#include <net/ipv6.h> 48#include <net/ndisc.h> 49#include <net/mpls.h> 50 51#include "flow_netlink.h" 52#include "vport-vxlan.h" 53 54struct ovs_len_tbl { 55 int len; 56 const struct ovs_len_tbl *next; 57}; 58 59#define OVS_ATTR_NESTED -1 60 61static void update_range(struct sw_flow_match *match, 62 size_t offset, size_t size, bool is_mask) 63{ 64 struct sw_flow_key_range *range; 65 size_t start = rounddown(offset, sizeof(long)); 66 size_t end = roundup(offset + size, sizeof(long)); 67 68 if (!is_mask) 69 range = &match->range; 70 else 71 range = &match->mask->range; 72 73 if (range->start == range->end) { 74 range->start = start; 75 range->end = end; 76 return; 77 } 78 79 if (range->start > start) 80 range->start = start; 81 82 if (range->end < end) 83 range->end = end; 84} 85 86#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 87 do { \ 88 update_range(match, offsetof(struct sw_flow_key, field), \ 89 sizeof((match)->key->field), is_mask); \ 90 if (is_mask) \ 91 (match)->mask->key.field = value; \ 92 else \ 93 (match)->key->field = value; \ 94 } while (0) 95 96#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 97 do { \ 98 update_range(match, offset, len, is_mask); \ 99 if (is_mask) \ 100 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 101 len); \ 102 else \ 103 memcpy((u8 *)(match)->key + offset, value_p, len); \ 104 } while (0) 105 106#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 107 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 108 value_p, len, is_mask) 109 110#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 111 do { \ 112 update_range(match, offsetof(struct sw_flow_key, field), \ 113 sizeof((match)->key->field), is_mask); \ 114 if (is_mask) \ 115 memset((u8 *)&(match)->mask->key.field, value, \ 116 sizeof((match)->mask->key.field)); \ 117 else \ 118 memset((u8 *)&(match)->key->field, value, \ 119 sizeof((match)->key->field)); \ 120 } while (0) 121 122static bool match_validate(const struct sw_flow_match *match, 123 u64 key_attrs, u64 mask_attrs, bool log) 124{ 125 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 126 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 127 128 /* The following mask attributes allowed only if they 129 * pass the validation tests. */ 130 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 131 | (1 << OVS_KEY_ATTR_IPV6) 132 | (1 << OVS_KEY_ATTR_TCP) 133 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 134 | (1 << OVS_KEY_ATTR_UDP) 135 | (1 << OVS_KEY_ATTR_SCTP) 136 | (1 << OVS_KEY_ATTR_ICMP) 137 | (1 << OVS_KEY_ATTR_ICMPV6) 138 | (1 << OVS_KEY_ATTR_ARP) 139 | (1 << OVS_KEY_ATTR_ND) 140 | (1 << OVS_KEY_ATTR_MPLS)); 141 142 /* Always allowed mask fields. */ 143 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 144 | (1 << OVS_KEY_ATTR_IN_PORT) 145 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 146 147 /* Check key attributes. */ 148 if (match->key->eth.type == htons(ETH_P_ARP) 149 || match->key->eth.type == htons(ETH_P_RARP)) { 150 key_expected |= 1 << OVS_KEY_ATTR_ARP; 151 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 152 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 153 } 154 155 if (eth_p_mpls(match->key->eth.type)) { 156 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 157 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 158 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 159 } 160 161 if (match->key->eth.type == htons(ETH_P_IP)) { 162 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 163 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 164 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 165 166 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 167 if (match->key->ip.proto == IPPROTO_UDP) { 168 key_expected |= 1 << OVS_KEY_ATTR_UDP; 169 if (match->mask && (match->mask->key.ip.proto == 0xff)) 170 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 171 } 172 173 if (match->key->ip.proto == IPPROTO_SCTP) { 174 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 175 if (match->mask && (match->mask->key.ip.proto == 0xff)) 176 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 177 } 178 179 if (match->key->ip.proto == IPPROTO_TCP) { 180 key_expected |= 1 << OVS_KEY_ATTR_TCP; 181 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 182 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 183 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 184 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 185 } 186 } 187 188 if (match->key->ip.proto == IPPROTO_ICMP) { 189 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 190 if (match->mask && (match->mask->key.ip.proto == 0xff)) 191 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 192 } 193 } 194 } 195 196 if (match->key->eth.type == htons(ETH_P_IPV6)) { 197 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 198 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 199 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 200 201 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 202 if (match->key->ip.proto == IPPROTO_UDP) { 203 key_expected |= 1 << OVS_KEY_ATTR_UDP; 204 if (match->mask && (match->mask->key.ip.proto == 0xff)) 205 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 206 } 207 208 if (match->key->ip.proto == IPPROTO_SCTP) { 209 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 210 if (match->mask && (match->mask->key.ip.proto == 0xff)) 211 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 212 } 213 214 if (match->key->ip.proto == IPPROTO_TCP) { 215 key_expected |= 1 << OVS_KEY_ATTR_TCP; 216 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 217 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 218 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 219 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 220 } 221 } 222 223 if (match->key->ip.proto == IPPROTO_ICMPV6) { 224 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 225 if (match->mask && (match->mask->key.ip.proto == 0xff)) 226 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 227 228 if (match->key->tp.src == 229 htons(NDISC_NEIGHBOUR_SOLICITATION) || 230 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 231 key_expected |= 1 << OVS_KEY_ATTR_ND; 232 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 233 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 234 } 235 } 236 } 237 } 238 239 if ((key_attrs & key_expected) != key_expected) { 240 /* Key attributes check failed. */ 241 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 242 (unsigned long long)key_attrs, 243 (unsigned long long)key_expected); 244 return false; 245 } 246 247 if ((mask_attrs & mask_allowed) != mask_attrs) { 248 /* Mask attributes check failed. */ 249 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 250 (unsigned long long)mask_attrs, 251 (unsigned long long)mask_allowed); 252 return false; 253 } 254 255 return true; 256} 257 258size_t ovs_tun_key_attr_size(void) 259{ 260 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 261 * updating this function. 262 */ 263 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 264 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ 265 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ 266 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 267 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 268 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 269 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 270 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 271 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 272 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 273 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 274 */ 275 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 276 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 277} 278 279size_t ovs_key_attr_size(void) 280{ 281 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 282 * updating this function. 283 */ 284 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); 285 286 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 287 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 288 + ovs_tun_key_attr_size() 289 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 290 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 291 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 292 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 293 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 294 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 295 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 296 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 297 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 298 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 299 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 300 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 301} 302 303static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 304 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 305 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 306 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 307 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 308 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 309 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 310 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 311 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 312 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 313 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 314 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, 315 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, 316}; 317 318/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 319static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 320 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 321 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 322 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 323 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 324 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 325 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 326 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 327 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 328 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 329 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 330 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 331 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 332 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 333 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 334 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 335 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 336 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 337 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 338 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 339 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 340 .next = ovs_tunnel_key_lens, }, 341 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 342}; 343 344static bool is_all_zero(const u8 *fp, size_t size) 345{ 346 int i; 347 348 if (!fp) 349 return false; 350 351 for (i = 0; i < size; i++) 352 if (fp[i]) 353 return false; 354 355 return true; 356} 357 358static int __parse_flow_nlattrs(const struct nlattr *attr, 359 const struct nlattr *a[], 360 u64 *attrsp, bool log, bool nz) 361{ 362 const struct nlattr *nla; 363 u64 attrs; 364 int rem; 365 366 attrs = *attrsp; 367 nla_for_each_nested(nla, attr, rem) { 368 u16 type = nla_type(nla); 369 int expected_len; 370 371 if (type > OVS_KEY_ATTR_MAX) { 372 OVS_NLERR(log, "Key type %d is out of range max %d", 373 type, OVS_KEY_ATTR_MAX); 374 return -EINVAL; 375 } 376 377 if (attrs & (1 << type)) { 378 OVS_NLERR(log, "Duplicate key (type %d).", type); 379 return -EINVAL; 380 } 381 382 expected_len = ovs_key_lens[type].len; 383 if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { 384 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 385 type, nla_len(nla), expected_len); 386 return -EINVAL; 387 } 388 389 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 390 attrs |= 1 << type; 391 a[type] = nla; 392 } 393 } 394 if (rem) { 395 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 396 return -EINVAL; 397 } 398 399 *attrsp = attrs; 400 return 0; 401} 402 403static int parse_flow_mask_nlattrs(const struct nlattr *attr, 404 const struct nlattr *a[], u64 *attrsp, 405 bool log) 406{ 407 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 408} 409 410static int parse_flow_nlattrs(const struct nlattr *attr, 411 const struct nlattr *a[], u64 *attrsp, 412 bool log) 413{ 414 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 415} 416 417static int genev_tun_opt_from_nlattr(const struct nlattr *a, 418 struct sw_flow_match *match, bool is_mask, 419 bool log) 420{ 421 unsigned long opt_key_offset; 422 423 if (nla_len(a) > sizeof(match->key->tun_opts)) { 424 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 425 nla_len(a), sizeof(match->key->tun_opts)); 426 return -EINVAL; 427 } 428 429 if (nla_len(a) % 4 != 0) { 430 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 431 nla_len(a)); 432 return -EINVAL; 433 } 434 435 /* We need to record the length of the options passed 436 * down, otherwise packets with the same format but 437 * additional options will be silently matched. 438 */ 439 if (!is_mask) { 440 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 441 false); 442 } else { 443 /* This is somewhat unusual because it looks at 444 * both the key and mask while parsing the 445 * attributes (and by extension assumes the key 446 * is parsed first). Normally, we would verify 447 * that each is the correct length and that the 448 * attributes line up in the validate function. 449 * However, that is difficult because this is 450 * variable length and we won't have the 451 * information later. 452 */ 453 if (match->key->tun_opts_len != nla_len(a)) { 454 OVS_NLERR(log, "Geneve option len %d != mask len %d", 455 match->key->tun_opts_len, nla_len(a)); 456 return -EINVAL; 457 } 458 459 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 460 } 461 462 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 463 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 464 nla_len(a), is_mask); 465 return 0; 466} 467 468static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { 469 [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, 470}; 471 472static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, 473 struct sw_flow_match *match, bool is_mask, 474 bool log) 475{ 476 struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; 477 unsigned long opt_key_offset; 478 struct ovs_vxlan_opts opts; 479 int err; 480 481 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 482 483 err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); 484 if (err < 0) 485 return err; 486 487 memset(&opts, 0, sizeof(opts)); 488 489 if (tb[OVS_VXLAN_EXT_GBP]) 490 opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); 491 492 if (!is_mask) 493 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 494 else 495 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 496 497 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 498 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 499 is_mask); 500 return 0; 501} 502 503static int ipv4_tun_from_nlattr(const struct nlattr *attr, 504 struct sw_flow_match *match, bool is_mask, 505 bool log) 506{ 507 struct nlattr *a; 508 int rem; 509 bool ttl = false; 510 __be16 tun_flags = 0; 511 int opts_type = 0; 512 513 nla_for_each_nested(a, attr, rem) { 514 int type = nla_type(a); 515 int err; 516 517 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 518 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 519 type, OVS_TUNNEL_KEY_ATTR_MAX); 520 return -EINVAL; 521 } 522 523 if (ovs_tunnel_key_lens[type].len != nla_len(a) && 524 ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { 525 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 526 type, nla_len(a), ovs_tunnel_key_lens[type].len); 527 return -EINVAL; 528 } 529 530 switch (type) { 531 case OVS_TUNNEL_KEY_ATTR_ID: 532 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 533 nla_get_be64(a), is_mask); 534 tun_flags |= TUNNEL_KEY; 535 break; 536 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 537 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 538 nla_get_in_addr(a), is_mask); 539 break; 540 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 541 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 542 nla_get_in_addr(a), is_mask); 543 break; 544 case OVS_TUNNEL_KEY_ATTR_TOS: 545 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 546 nla_get_u8(a), is_mask); 547 break; 548 case OVS_TUNNEL_KEY_ATTR_TTL: 549 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 550 nla_get_u8(a), is_mask); 551 ttl = true; 552 break; 553 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 554 tun_flags |= TUNNEL_DONT_FRAGMENT; 555 break; 556 case OVS_TUNNEL_KEY_ATTR_CSUM: 557 tun_flags |= TUNNEL_CSUM; 558 break; 559 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 560 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 561 nla_get_be16(a), is_mask); 562 break; 563 case OVS_TUNNEL_KEY_ATTR_TP_DST: 564 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 565 nla_get_be16(a), is_mask); 566 break; 567 case OVS_TUNNEL_KEY_ATTR_OAM: 568 tun_flags |= TUNNEL_OAM; 569 break; 570 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 571 if (opts_type) { 572 OVS_NLERR(log, "Multiple metadata blocks provided"); 573 return -EINVAL; 574 } 575 576 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 577 if (err) 578 return err; 579 580 tun_flags |= TUNNEL_GENEVE_OPT; 581 opts_type = type; 582 break; 583 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 584 if (opts_type) { 585 OVS_NLERR(log, "Multiple metadata blocks provided"); 586 return -EINVAL; 587 } 588 589 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 590 if (err) 591 return err; 592 593 tun_flags |= TUNNEL_VXLAN_OPT; 594 opts_type = type; 595 break; 596 default: 597 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", 598 type); 599 return -EINVAL; 600 } 601 } 602 603 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 604 605 if (rem > 0) { 606 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", 607 rem); 608 return -EINVAL; 609 } 610 611 if (!is_mask) { 612 if (!match->key->tun_key.ipv4_dst) { 613 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 614 return -EINVAL; 615 } 616 617 if (!ttl) { 618 OVS_NLERR(log, "IPv4 tunnel TTL not specified."); 619 return -EINVAL; 620 } 621 } 622 623 return opts_type; 624} 625 626static int vxlan_opt_to_nlattr(struct sk_buff *skb, 627 const void *tun_opts, int swkey_tun_opts_len) 628{ 629 const struct ovs_vxlan_opts *opts = tun_opts; 630 struct nlattr *nla; 631 632 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 633 if (!nla) 634 return -EMSGSIZE; 635 636 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 637 return -EMSGSIZE; 638 639 nla_nest_end(skb, nla); 640 return 0; 641} 642 643static int __ipv4_tun_to_nlattr(struct sk_buff *skb, 644 const struct ovs_key_ipv4_tunnel *output, 645 const void *tun_opts, int swkey_tun_opts_len) 646{ 647 if (output->tun_flags & TUNNEL_KEY && 648 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 649 return -EMSGSIZE; 650 if (output->ipv4_src && 651 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, 652 output->ipv4_src)) 653 return -EMSGSIZE; 654 if (output->ipv4_dst && 655 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, 656 output->ipv4_dst)) 657 return -EMSGSIZE; 658 if (output->ipv4_tos && 659 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 660 return -EMSGSIZE; 661 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 662 return -EMSGSIZE; 663 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 664 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 665 return -EMSGSIZE; 666 if ((output->tun_flags & TUNNEL_CSUM) && 667 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 668 return -EMSGSIZE; 669 if (output->tp_src && 670 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 671 return -EMSGSIZE; 672 if (output->tp_dst && 673 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 674 return -EMSGSIZE; 675 if ((output->tun_flags & TUNNEL_OAM) && 676 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 677 return -EMSGSIZE; 678 if (tun_opts) { 679 if (output->tun_flags & TUNNEL_GENEVE_OPT && 680 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 681 swkey_tun_opts_len, tun_opts)) 682 return -EMSGSIZE; 683 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 684 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 685 return -EMSGSIZE; 686 } 687 688 return 0; 689} 690 691static int ipv4_tun_to_nlattr(struct sk_buff *skb, 692 const struct ovs_key_ipv4_tunnel *output, 693 const void *tun_opts, int swkey_tun_opts_len) 694{ 695 struct nlattr *nla; 696 int err; 697 698 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 699 if (!nla) 700 return -EMSGSIZE; 701 702 err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); 703 if (err) 704 return err; 705 706 nla_nest_end(skb, nla); 707 return 0; 708} 709 710int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, 711 const struct ovs_tunnel_info *egress_tun_info) 712{ 713 return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel, 714 egress_tun_info->options, 715 egress_tun_info->options_len); 716} 717 718static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 719 const struct nlattr **a, bool is_mask, 720 bool log) 721{ 722 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 723 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 724 725 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 726 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 727 } 728 729 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 730 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 731 732 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 733 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 734 } 735 736 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 737 SW_FLOW_KEY_PUT(match, phy.priority, 738 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 739 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 740 } 741 742 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 743 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 744 745 if (is_mask) { 746 in_port = 0xffffffff; /* Always exact match in_port. */ 747 } else if (in_port >= DP_MAX_PORTS) { 748 OVS_NLERR(log, "Port %d exceeds max allowable %d", 749 in_port, DP_MAX_PORTS); 750 return -EINVAL; 751 } 752 753 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 754 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 755 } else if (!is_mask) { 756 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 757 } 758 759 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 760 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 761 762 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 763 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 764 } 765 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 766 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 767 is_mask, log) < 0) 768 return -EINVAL; 769 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 770 } 771 return 0; 772} 773 774static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 775 const struct nlattr **a, bool is_mask, 776 bool log) 777{ 778 int err; 779 780 err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); 781 if (err) 782 return err; 783 784 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 785 const struct ovs_key_ethernet *eth_key; 786 787 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 788 SW_FLOW_KEY_MEMCPY(match, eth.src, 789 eth_key->eth_src, ETH_ALEN, is_mask); 790 SW_FLOW_KEY_MEMCPY(match, eth.dst, 791 eth_key->eth_dst, ETH_ALEN, is_mask); 792 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 793 } 794 795 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 796 __be16 tci; 797 798 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 799 if (!(tci & htons(VLAN_TAG_PRESENT))) { 800 if (is_mask) 801 OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); 802 else 803 OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); 804 805 return -EINVAL; 806 } 807 808 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 809 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 810 } 811 812 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 813 __be16 eth_type; 814 815 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 816 if (is_mask) { 817 /* Always exact match EtherType. */ 818 eth_type = htons(0xffff); 819 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 820 OVS_NLERR(log, "EtherType %x is less than min %x", 821 ntohs(eth_type), ETH_P_802_3_MIN); 822 return -EINVAL; 823 } 824 825 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 826 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 827 } else if (!is_mask) { 828 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 829 } 830 831 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 832 const struct ovs_key_ipv4 *ipv4_key; 833 834 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 835 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 836 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 837 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 838 return -EINVAL; 839 } 840 SW_FLOW_KEY_PUT(match, ip.proto, 841 ipv4_key->ipv4_proto, is_mask); 842 SW_FLOW_KEY_PUT(match, ip.tos, 843 ipv4_key->ipv4_tos, is_mask); 844 SW_FLOW_KEY_PUT(match, ip.ttl, 845 ipv4_key->ipv4_ttl, is_mask); 846 SW_FLOW_KEY_PUT(match, ip.frag, 847 ipv4_key->ipv4_frag, is_mask); 848 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 849 ipv4_key->ipv4_src, is_mask); 850 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 851 ipv4_key->ipv4_dst, is_mask); 852 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 853 } 854 855 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 856 const struct ovs_key_ipv6 *ipv6_key; 857 858 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 859 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 860 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 861 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 862 return -EINVAL; 863 } 864 865 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 866 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 867 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 868 return -EINVAL; 869 } 870 871 SW_FLOW_KEY_PUT(match, ipv6.label, 872 ipv6_key->ipv6_label, is_mask); 873 SW_FLOW_KEY_PUT(match, ip.proto, 874 ipv6_key->ipv6_proto, is_mask); 875 SW_FLOW_KEY_PUT(match, ip.tos, 876 ipv6_key->ipv6_tclass, is_mask); 877 SW_FLOW_KEY_PUT(match, ip.ttl, 878 ipv6_key->ipv6_hlimit, is_mask); 879 SW_FLOW_KEY_PUT(match, ip.frag, 880 ipv6_key->ipv6_frag, is_mask); 881 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 882 ipv6_key->ipv6_src, 883 sizeof(match->key->ipv6.addr.src), 884 is_mask); 885 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 886 ipv6_key->ipv6_dst, 887 sizeof(match->key->ipv6.addr.dst), 888 is_mask); 889 890 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 891 } 892 893 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 894 const struct ovs_key_arp *arp_key; 895 896 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 897 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 898 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 899 arp_key->arp_op); 900 return -EINVAL; 901 } 902 903 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 904 arp_key->arp_sip, is_mask); 905 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 906 arp_key->arp_tip, is_mask); 907 SW_FLOW_KEY_PUT(match, ip.proto, 908 ntohs(arp_key->arp_op), is_mask); 909 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 910 arp_key->arp_sha, ETH_ALEN, is_mask); 911 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 912 arp_key->arp_tha, ETH_ALEN, is_mask); 913 914 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 915 } 916 917 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 918 const struct ovs_key_mpls *mpls_key; 919 920 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 921 SW_FLOW_KEY_PUT(match, mpls.top_lse, 922 mpls_key->mpls_lse, is_mask); 923 924 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 925 } 926 927 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 928 const struct ovs_key_tcp *tcp_key; 929 930 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 931 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 932 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 933 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 934 } 935 936 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 937 SW_FLOW_KEY_PUT(match, tp.flags, 938 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 939 is_mask); 940 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 941 } 942 943 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 944 const struct ovs_key_udp *udp_key; 945 946 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 947 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 948 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 949 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 950 } 951 952 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 953 const struct ovs_key_sctp *sctp_key; 954 955 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 956 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 957 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 958 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 959 } 960 961 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 962 const struct ovs_key_icmp *icmp_key; 963 964 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 965 SW_FLOW_KEY_PUT(match, tp.src, 966 htons(icmp_key->icmp_type), is_mask); 967 SW_FLOW_KEY_PUT(match, tp.dst, 968 htons(icmp_key->icmp_code), is_mask); 969 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 970 } 971 972 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 973 const struct ovs_key_icmpv6 *icmpv6_key; 974 975 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 976 SW_FLOW_KEY_PUT(match, tp.src, 977 htons(icmpv6_key->icmpv6_type), is_mask); 978 SW_FLOW_KEY_PUT(match, tp.dst, 979 htons(icmpv6_key->icmpv6_code), is_mask); 980 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 981 } 982 983 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 984 const struct ovs_key_nd *nd_key; 985 986 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 987 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 988 nd_key->nd_target, 989 sizeof(match->key->ipv6.nd.target), 990 is_mask); 991 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 992 nd_key->nd_sll, ETH_ALEN, is_mask); 993 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 994 nd_key->nd_tll, ETH_ALEN, is_mask); 995 attrs &= ~(1 << OVS_KEY_ATTR_ND); 996 } 997 998 if (attrs != 0) { 999 OVS_NLERR(log, "Unknown key attributes %llx", 1000 (unsigned long long)attrs); 1001 return -EINVAL; 1002 } 1003 1004 return 0; 1005} 1006 1007static void nlattr_set(struct nlattr *attr, u8 val, 1008 const struct ovs_len_tbl *tbl) 1009{ 1010 struct nlattr *nla; 1011 int rem; 1012 1013 /* The nlattr stream should already have been validated */ 1014 nla_for_each_nested(nla, attr, rem) { 1015 if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) 1016 nlattr_set(nla, val, tbl[nla_type(nla)].next); 1017 else 1018 memset(nla_data(nla), val, nla_len(nla)); 1019 } 1020} 1021 1022static void mask_set_nlattr(struct nlattr *attr, u8 val) 1023{ 1024 nlattr_set(attr, val, ovs_key_lens); 1025} 1026 1027/** 1028 * ovs_nla_get_match - parses Netlink attributes into a flow key and 1029 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1030 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1031 * does not include any don't care bit. 1032 * @match: receives the extracted flow match information. 1033 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1034 * sequence. The fields should of the packet that triggered the creation 1035 * of this flow. 1036 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1037 * attribute specifies the mask field of the wildcarded flow. 1038 * @log: Boolean to allow kernel error logging. Normally true, but when 1039 * probing for feature compatibility this should be passed in as false to 1040 * suppress unnecessary error logging. 1041 */ 1042int ovs_nla_get_match(struct sw_flow_match *match, 1043 const struct nlattr *nla_key, 1044 const struct nlattr *nla_mask, 1045 bool log) 1046{ 1047 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1048 const struct nlattr *encap; 1049 struct nlattr *newmask = NULL; 1050 u64 key_attrs = 0; 1051 u64 mask_attrs = 0; 1052 bool encap_valid = false; 1053 int err; 1054 1055 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 1056 if (err) 1057 return err; 1058 1059 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 1060 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 1061 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 1062 __be16 tci; 1063 1064 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 1065 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 1066 OVS_NLERR(log, "Invalid Vlan frame."); 1067 return -EINVAL; 1068 } 1069 1070 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1071 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1072 encap = a[OVS_KEY_ATTR_ENCAP]; 1073 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1074 encap_valid = true; 1075 1076 if (tci & htons(VLAN_TAG_PRESENT)) { 1077 err = parse_flow_nlattrs(encap, a, &key_attrs, log); 1078 if (err) 1079 return err; 1080 } else if (!tci) { 1081 /* Corner case for truncated 802.1Q header. */ 1082 if (nla_len(encap)) { 1083 OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); 1084 return -EINVAL; 1085 } 1086 } else { 1087 OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); 1088 return -EINVAL; 1089 } 1090 } 1091 1092 err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); 1093 if (err) 1094 return err; 1095 1096 if (match->mask) { 1097 if (!nla_mask) { 1098 /* Create an exact match mask. We need to set to 0xff 1099 * all the 'match->mask' fields that have been touched 1100 * in 'match->key'. We cannot simply memset 1101 * 'match->mask', because padding bytes and fields not 1102 * specified in 'match->key' should be left to 0. 1103 * Instead, we use a stream of netlink attributes, 1104 * copied from 'key' and set to 0xff. 1105 * ovs_key_from_nlattrs() will take care of filling 1106 * 'match->mask' appropriately. 1107 */ 1108 newmask = kmemdup(nla_key, 1109 nla_total_size(nla_len(nla_key)), 1110 GFP_KERNEL); 1111 if (!newmask) 1112 return -ENOMEM; 1113 1114 mask_set_nlattr(newmask, 0xff); 1115 1116 /* The userspace does not send tunnel attributes that 1117 * are 0, but we should not wildcard them nonetheless. 1118 */ 1119 if (match->key->tun_key.ipv4_dst) 1120 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1121 0xff, true); 1122 1123 nla_mask = newmask; 1124 } 1125 1126 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1127 if (err) 1128 goto free_newmask; 1129 1130 /* Always match on tci. */ 1131 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 1132 1133 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 1134 __be16 eth_type = 0; 1135 __be16 tci = 0; 1136 1137 if (!encap_valid) { 1138 OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); 1139 err = -EINVAL; 1140 goto free_newmask; 1141 } 1142 1143 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1144 if (a[OVS_KEY_ATTR_ETHERTYPE]) 1145 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1146 1147 if (eth_type == htons(0xffff)) { 1148 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1149 encap = a[OVS_KEY_ATTR_ENCAP]; 1150 err = parse_flow_mask_nlattrs(encap, a, 1151 &mask_attrs, log); 1152 if (err) 1153 goto free_newmask; 1154 } else { 1155 OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", 1156 ntohs(eth_type)); 1157 err = -EINVAL; 1158 goto free_newmask; 1159 } 1160 1161 if (a[OVS_KEY_ATTR_VLAN]) 1162 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1163 1164 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1165 OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", 1166 ntohs(tci)); 1167 err = -EINVAL; 1168 goto free_newmask; 1169 } 1170 } 1171 1172 err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); 1173 if (err) 1174 goto free_newmask; 1175 } 1176 1177 if (!match_validate(match, key_attrs, mask_attrs, log)) 1178 err = -EINVAL; 1179 1180free_newmask: 1181 kfree(newmask); 1182 return err; 1183} 1184 1185static size_t get_ufid_len(const struct nlattr *attr, bool log) 1186{ 1187 size_t len; 1188 1189 if (!attr) 1190 return 0; 1191 1192 len = nla_len(attr); 1193 if (len < 1 || len > MAX_UFID_LENGTH) { 1194 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", 1195 nla_len(attr), MAX_UFID_LENGTH); 1196 return 0; 1197 } 1198 1199 return len; 1200} 1201 1202/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, 1203 * or false otherwise. 1204 */ 1205bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, 1206 bool log) 1207{ 1208 sfid->ufid_len = get_ufid_len(attr, log); 1209 if (sfid->ufid_len) 1210 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); 1211 1212 return sfid->ufid_len; 1213} 1214 1215int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 1216 const struct sw_flow_key *key, bool log) 1217{ 1218 struct sw_flow_key *new_key; 1219 1220 if (ovs_nla_get_ufid(sfid, ufid, log)) 1221 return 0; 1222 1223 /* If UFID was not provided, use unmasked key. */ 1224 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); 1225 if (!new_key) 1226 return -ENOMEM; 1227 memcpy(new_key, key, sizeof(*key)); 1228 sfid->unmasked_key = new_key; 1229 1230 return 0; 1231} 1232 1233u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) 1234{ 1235 return attr ? nla_get_u32(attr) : 0; 1236} 1237 1238/** 1239 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1240 * @key: Receives extracted in_port, priority, tun_key and skb_mark. 1241 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1242 * sequence. 1243 * @log: Boolean to allow kernel error logging. Normally true, but when 1244 * probing for feature compatibility this should be passed in as false to 1245 * suppress unnecessary error logging. 1246 * 1247 * This parses a series of Netlink attributes that form a flow key, which must 1248 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1249 * get the metadata, that is, the parts of the flow key that cannot be 1250 * extracted from the packet itself. 1251 */ 1252 1253int ovs_nla_get_flow_metadata(const struct nlattr *attr, 1254 struct sw_flow_key *key, 1255 bool log) 1256{ 1257 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1258 struct sw_flow_match match; 1259 u64 attrs = 0; 1260 int err; 1261 1262 err = parse_flow_nlattrs(attr, a, &attrs, log); 1263 if (err) 1264 return -EINVAL; 1265 1266 memset(&match, 0, sizeof(match)); 1267 match.key = key; 1268 1269 key->phy.in_port = DP_MAX_PORTS; 1270 1271 return metadata_from_nlattrs(&match, &attrs, a, false, log); 1272} 1273 1274static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1275 const struct sw_flow_key *output, bool is_mask, 1276 struct sk_buff *skb) 1277{ 1278 struct ovs_key_ethernet *eth_key; 1279 struct nlattr *nla, *encap; 1280 1281 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1282 goto nla_put_failure; 1283 1284 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1285 goto nla_put_failure; 1286 1287 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1288 goto nla_put_failure; 1289 1290 if ((swkey->tun_key.ipv4_dst || is_mask)) { 1291 const void *opts = NULL; 1292 1293 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1294 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1295 1296 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, 1297 swkey->tun_opts_len)) 1298 goto nla_put_failure; 1299 } 1300 1301 if (swkey->phy.in_port == DP_MAX_PORTS) { 1302 if (is_mask && (output->phy.in_port == 0xffff)) 1303 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1304 goto nla_put_failure; 1305 } else { 1306 u16 upper_u16; 1307 upper_u16 = !is_mask ? 0 : 0xffff; 1308 1309 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1310 (upper_u16 << 16) | output->phy.in_port)) 1311 goto nla_put_failure; 1312 } 1313 1314 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1315 goto nla_put_failure; 1316 1317 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1318 if (!nla) 1319 goto nla_put_failure; 1320 1321 eth_key = nla_data(nla); 1322 ether_addr_copy(eth_key->eth_src, output->eth.src); 1323 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1324 1325 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1326 __be16 eth_type; 1327 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 1328 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1329 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 1330 goto nla_put_failure; 1331 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1332 if (!swkey->eth.tci) 1333 goto unencap; 1334 } else 1335 encap = NULL; 1336 1337 if (swkey->eth.type == htons(ETH_P_802_2)) { 1338 /* 1339 * Ethertype 802.2 is represented in the netlink with omitted 1340 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1341 * 0xffff in the mask attribute. Ethertype can also 1342 * be wildcarded. 1343 */ 1344 if (is_mask && output->eth.type) 1345 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1346 output->eth.type)) 1347 goto nla_put_failure; 1348 goto unencap; 1349 } 1350 1351 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1352 goto nla_put_failure; 1353 1354 if (swkey->eth.type == htons(ETH_P_IP)) { 1355 struct ovs_key_ipv4 *ipv4_key; 1356 1357 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1358 if (!nla) 1359 goto nla_put_failure; 1360 ipv4_key = nla_data(nla); 1361 ipv4_key->ipv4_src = output->ipv4.addr.src; 1362 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1363 ipv4_key->ipv4_proto = output->ip.proto; 1364 ipv4_key->ipv4_tos = output->ip.tos; 1365 ipv4_key->ipv4_ttl = output->ip.ttl; 1366 ipv4_key->ipv4_frag = output->ip.frag; 1367 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1368 struct ovs_key_ipv6 *ipv6_key; 1369 1370 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1371 if (!nla) 1372 goto nla_put_failure; 1373 ipv6_key = nla_data(nla); 1374 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1375 sizeof(ipv6_key->ipv6_src)); 1376 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1377 sizeof(ipv6_key->ipv6_dst)); 1378 ipv6_key->ipv6_label = output->ipv6.label; 1379 ipv6_key->ipv6_proto = output->ip.proto; 1380 ipv6_key->ipv6_tclass = output->ip.tos; 1381 ipv6_key->ipv6_hlimit = output->ip.ttl; 1382 ipv6_key->ipv6_frag = output->ip.frag; 1383 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1384 swkey->eth.type == htons(ETH_P_RARP)) { 1385 struct ovs_key_arp *arp_key; 1386 1387 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1388 if (!nla) 1389 goto nla_put_failure; 1390 arp_key = nla_data(nla); 1391 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1392 arp_key->arp_sip = output->ipv4.addr.src; 1393 arp_key->arp_tip = output->ipv4.addr.dst; 1394 arp_key->arp_op = htons(output->ip.proto); 1395 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1396 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1397 } else if (eth_p_mpls(swkey->eth.type)) { 1398 struct ovs_key_mpls *mpls_key; 1399 1400 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1401 if (!nla) 1402 goto nla_put_failure; 1403 mpls_key = nla_data(nla); 1404 mpls_key->mpls_lse = output->mpls.top_lse; 1405 } 1406 1407 if ((swkey->eth.type == htons(ETH_P_IP) || 1408 swkey->eth.type == htons(ETH_P_IPV6)) && 1409 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1410 1411 if (swkey->ip.proto == IPPROTO_TCP) { 1412 struct ovs_key_tcp *tcp_key; 1413 1414 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1415 if (!nla) 1416 goto nla_put_failure; 1417 tcp_key = nla_data(nla); 1418 tcp_key->tcp_src = output->tp.src; 1419 tcp_key->tcp_dst = output->tp.dst; 1420 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1421 output->tp.flags)) 1422 goto nla_put_failure; 1423 } else if (swkey->ip.proto == IPPROTO_UDP) { 1424 struct ovs_key_udp *udp_key; 1425 1426 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1427 if (!nla) 1428 goto nla_put_failure; 1429 udp_key = nla_data(nla); 1430 udp_key->udp_src = output->tp.src; 1431 udp_key->udp_dst = output->tp.dst; 1432 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1433 struct ovs_key_sctp *sctp_key; 1434 1435 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1436 if (!nla) 1437 goto nla_put_failure; 1438 sctp_key = nla_data(nla); 1439 sctp_key->sctp_src = output->tp.src; 1440 sctp_key->sctp_dst = output->tp.dst; 1441 } else if (swkey->eth.type == htons(ETH_P_IP) && 1442 swkey->ip.proto == IPPROTO_ICMP) { 1443 struct ovs_key_icmp *icmp_key; 1444 1445 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1446 if (!nla) 1447 goto nla_put_failure; 1448 icmp_key = nla_data(nla); 1449 icmp_key->icmp_type = ntohs(output->tp.src); 1450 icmp_key->icmp_code = ntohs(output->tp.dst); 1451 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1452 swkey->ip.proto == IPPROTO_ICMPV6) { 1453 struct ovs_key_icmpv6 *icmpv6_key; 1454 1455 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1456 sizeof(*icmpv6_key)); 1457 if (!nla) 1458 goto nla_put_failure; 1459 icmpv6_key = nla_data(nla); 1460 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1461 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1462 1463 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1464 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1465 struct ovs_key_nd *nd_key; 1466 1467 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1468 if (!nla) 1469 goto nla_put_failure; 1470 nd_key = nla_data(nla); 1471 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1472 sizeof(nd_key->nd_target)); 1473 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1474 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1475 } 1476 } 1477 } 1478 1479unencap: 1480 if (encap) 1481 nla_nest_end(skb, encap); 1482 1483 return 0; 1484 1485nla_put_failure: 1486 return -EMSGSIZE; 1487} 1488 1489int ovs_nla_put_key(const struct sw_flow_key *swkey, 1490 const struct sw_flow_key *output, int attr, bool is_mask, 1491 struct sk_buff *skb) 1492{ 1493 int err; 1494 struct nlattr *nla; 1495 1496 nla = nla_nest_start(skb, attr); 1497 if (!nla) 1498 return -EMSGSIZE; 1499 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 1500 if (err) 1501 return err; 1502 nla_nest_end(skb, nla); 1503 1504 return 0; 1505} 1506 1507/* Called with ovs_mutex or RCU read lock. */ 1508int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) 1509{ 1510 if (ovs_identifier_is_ufid(&flow->id)) 1511 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, 1512 flow->id.ufid); 1513 1514 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, 1515 OVS_FLOW_ATTR_KEY, false, skb); 1516} 1517 1518/* Called with ovs_mutex or RCU read lock. */ 1519int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) 1520{ 1521 return ovs_nla_put_key(&flow->key, &flow->key, 1522 OVS_FLOW_ATTR_KEY, false, skb); 1523} 1524 1525/* Called with ovs_mutex or RCU read lock. */ 1526int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) 1527{ 1528 return ovs_nla_put_key(&flow->key, &flow->mask->key, 1529 OVS_FLOW_ATTR_MASK, true, skb); 1530} 1531 1532#define MAX_ACTIONS_BUFSIZE (32 * 1024) 1533 1534static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1535{ 1536 struct sw_flow_actions *sfa; 1537 1538 if (size > MAX_ACTIONS_BUFSIZE) { 1539 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1540 return ERR_PTR(-EINVAL); 1541 } 1542 1543 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1544 if (!sfa) 1545 return ERR_PTR(-ENOMEM); 1546 1547 sfa->actions_len = 0; 1548 return sfa; 1549} 1550 1551/* Schedules 'sf_acts' to be freed after the next RCU grace period. 1552 * The caller must hold rcu_read_lock for this to be sensible. */ 1553void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1554{ 1555 kfree_rcu(sf_acts, rcu); 1556} 1557 1558static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1559 int attr_len, bool log) 1560{ 1561 1562 struct sw_flow_actions *acts; 1563 int new_acts_size; 1564 int req_size = NLA_ALIGN(attr_len); 1565 int next_offset = offsetof(struct sw_flow_actions, actions) + 1566 (*sfa)->actions_len; 1567 1568 if (req_size <= (ksize(*sfa) - next_offset)) 1569 goto out; 1570 1571 new_acts_size = ksize(*sfa) * 2; 1572 1573 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1574 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1575 return ERR_PTR(-EMSGSIZE); 1576 new_acts_size = MAX_ACTIONS_BUFSIZE; 1577 } 1578 1579 acts = nla_alloc_flow_actions(new_acts_size, log); 1580 if (IS_ERR(acts)) 1581 return (void *)acts; 1582 1583 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1584 acts->actions_len = (*sfa)->actions_len; 1585 kfree(*sfa); 1586 *sfa = acts; 1587 1588out: 1589 (*sfa)->actions_len += req_size; 1590 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1591} 1592 1593static struct nlattr *__add_action(struct sw_flow_actions **sfa, 1594 int attrtype, void *data, int len, bool log) 1595{ 1596 struct nlattr *a; 1597 1598 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 1599 if (IS_ERR(a)) 1600 return a; 1601 1602 a->nla_type = attrtype; 1603 a->nla_len = nla_attr_size(len); 1604 1605 if (data) 1606 memcpy(nla_data(a), data, len); 1607 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1608 1609 return a; 1610} 1611 1612static int add_action(struct sw_flow_actions **sfa, int attrtype, 1613 void *data, int len, bool log) 1614{ 1615 struct nlattr *a; 1616 1617 a = __add_action(sfa, attrtype, data, len, log); 1618 1619 return PTR_ERR_OR_ZERO(a); 1620} 1621 1622static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1623 int attrtype, bool log) 1624{ 1625 int used = (*sfa)->actions_len; 1626 int err; 1627 1628 err = add_action(sfa, attrtype, NULL, 0, log); 1629 if (err) 1630 return err; 1631 1632 return used; 1633} 1634 1635static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1636 int st_offset) 1637{ 1638 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1639 st_offset); 1640 1641 a->nla_len = sfa->actions_len - st_offset; 1642} 1643 1644static int __ovs_nla_copy_actions(const struct nlattr *attr, 1645 const struct sw_flow_key *key, 1646 int depth, struct sw_flow_actions **sfa, 1647 __be16 eth_type, __be16 vlan_tci, bool log); 1648 1649static int validate_and_copy_sample(const struct nlattr *attr, 1650 const struct sw_flow_key *key, int depth, 1651 struct sw_flow_actions **sfa, 1652 __be16 eth_type, __be16 vlan_tci, bool log) 1653{ 1654 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1655 const struct nlattr *probability, *actions; 1656 const struct nlattr *a; 1657 int rem, start, err, st_acts; 1658 1659 memset(attrs, 0, sizeof(attrs)); 1660 nla_for_each_nested(a, attr, rem) { 1661 int type = nla_type(a); 1662 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1663 return -EINVAL; 1664 attrs[type] = a; 1665 } 1666 if (rem) 1667 return -EINVAL; 1668 1669 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 1670 if (!probability || nla_len(probability) != sizeof(u32)) 1671 return -EINVAL; 1672 1673 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 1674 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 1675 return -EINVAL; 1676 1677 /* validation done, copy sample action. */ 1678 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 1679 if (start < 0) 1680 return start; 1681 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1682 nla_data(probability), sizeof(u32), log); 1683 if (err) 1684 return err; 1685 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); 1686 if (st_acts < 0) 1687 return st_acts; 1688 1689 err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, 1690 eth_type, vlan_tci, log); 1691 if (err) 1692 return err; 1693 1694 add_nested_action_end(*sfa, st_acts); 1695 add_nested_action_end(*sfa, start); 1696 1697 return 0; 1698} 1699 1700void ovs_match_init(struct sw_flow_match *match, 1701 struct sw_flow_key *key, 1702 struct sw_flow_mask *mask) 1703{ 1704 memset(match, 0, sizeof(*match)); 1705 match->key = key; 1706 match->mask = mask; 1707 1708 memset(key, 0, sizeof(*key)); 1709 1710 if (mask) { 1711 memset(&mask->key, 0, sizeof(mask->key)); 1712 mask->range.start = mask->range.end = 0; 1713 } 1714} 1715 1716static int validate_geneve_opts(struct sw_flow_key *key) 1717{ 1718 struct geneve_opt *option; 1719 int opts_len = key->tun_opts_len; 1720 bool crit_opt = false; 1721 1722 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 1723 while (opts_len > 0) { 1724 int len; 1725 1726 if (opts_len < sizeof(*option)) 1727 return -EINVAL; 1728 1729 len = sizeof(*option) + option->length * 4; 1730 if (len > opts_len) 1731 return -EINVAL; 1732 1733 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 1734 1735 option = (struct geneve_opt *)((u8 *)option + len); 1736 opts_len -= len; 1737 }; 1738 1739 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 1740 1741 return 0; 1742} 1743 1744static int validate_and_copy_set_tun(const struct nlattr *attr, 1745 struct sw_flow_actions **sfa, bool log) 1746{ 1747 struct sw_flow_match match; 1748 struct sw_flow_key key; 1749 struct ovs_tunnel_info *tun_info; 1750 struct nlattr *a; 1751 int err = 0, start, opts_type; 1752 1753 ovs_match_init(&match, &key, NULL); 1754 opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); 1755 if (opts_type < 0) 1756 return opts_type; 1757 1758 if (key.tun_opts_len) { 1759 switch (opts_type) { 1760 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 1761 err = validate_geneve_opts(&key); 1762 if (err < 0) 1763 return err; 1764 break; 1765 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 1766 break; 1767 } 1768 }; 1769 1770 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 1771 if (start < 0) 1772 return start; 1773 1774 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1775 sizeof(*tun_info) + key.tun_opts_len, log); 1776 if (IS_ERR(a)) 1777 return PTR_ERR(a); 1778 1779 tun_info = nla_data(a); 1780 tun_info->tunnel = key.tun_key; 1781 tun_info->options_len = key.tun_opts_len; 1782 1783 if (tun_info->options_len) { 1784 /* We need to store the options in the action itself since 1785 * everything else will go away after flow setup. We can append 1786 * it to tun_info and then point there. 1787 */ 1788 memcpy((tun_info + 1), 1789 TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); 1790 tun_info->options = (tun_info + 1); 1791 } else { 1792 tun_info->options = NULL; 1793 } 1794 1795 add_nested_action_end(*sfa, start); 1796 1797 return err; 1798} 1799 1800/* Return false if there are any non-masked bits set. 1801 * Mask follows data immediately, before any netlink padding. 1802 */ 1803static bool validate_masked(u8 *data, int len) 1804{ 1805 u8 *mask = data + len; 1806 1807 while (len--) 1808 if (*data++ & ~*mask++) 1809 return false; 1810 1811 return true; 1812} 1813 1814static int validate_set(const struct nlattr *a, 1815 const struct sw_flow_key *flow_key, 1816 struct sw_flow_actions **sfa, 1817 bool *skip_copy, __be16 eth_type, bool masked, bool log) 1818{ 1819 const struct nlattr *ovs_key = nla_data(a); 1820 int key_type = nla_type(ovs_key); 1821 size_t key_len; 1822 1823 /* There can be only one key in a action */ 1824 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1825 return -EINVAL; 1826 1827 key_len = nla_len(ovs_key); 1828 if (masked) 1829 key_len /= 2; 1830 1831 if (key_type > OVS_KEY_ATTR_MAX || 1832 (ovs_key_lens[key_type].len != key_len && 1833 ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) 1834 return -EINVAL; 1835 1836 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 1837 return -EINVAL; 1838 1839 switch (key_type) { 1840 const struct ovs_key_ipv4 *ipv4_key; 1841 const struct ovs_key_ipv6 *ipv6_key; 1842 int err; 1843 1844 case OVS_KEY_ATTR_PRIORITY: 1845 case OVS_KEY_ATTR_SKB_MARK: 1846 case OVS_KEY_ATTR_ETHERNET: 1847 break; 1848 1849 case OVS_KEY_ATTR_TUNNEL: 1850 if (eth_p_mpls(eth_type)) 1851 return -EINVAL; 1852 1853 if (masked) 1854 return -EINVAL; /* Masked tunnel set not supported. */ 1855 1856 *skip_copy = true; 1857 err = validate_and_copy_set_tun(a, sfa, log); 1858 if (err) 1859 return err; 1860 break; 1861 1862 case OVS_KEY_ATTR_IPV4: 1863 if (eth_type != htons(ETH_P_IP)) 1864 return -EINVAL; 1865 1866 ipv4_key = nla_data(ovs_key); 1867 1868 if (masked) { 1869 const struct ovs_key_ipv4 *mask = ipv4_key + 1; 1870 1871 /* Non-writeable fields. */ 1872 if (mask->ipv4_proto || mask->ipv4_frag) 1873 return -EINVAL; 1874 } else { 1875 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1876 return -EINVAL; 1877 1878 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1879 return -EINVAL; 1880 } 1881 break; 1882 1883 case OVS_KEY_ATTR_IPV6: 1884 if (eth_type != htons(ETH_P_IPV6)) 1885 return -EINVAL; 1886 1887 ipv6_key = nla_data(ovs_key); 1888 1889 if (masked) { 1890 const struct ovs_key_ipv6 *mask = ipv6_key + 1; 1891 1892 /* Non-writeable fields. */ 1893 if (mask->ipv6_proto || mask->ipv6_frag) 1894 return -EINVAL; 1895 1896 /* Invalid bits in the flow label mask? */ 1897 if (ntohl(mask->ipv6_label) & 0xFFF00000) 1898 return -EINVAL; 1899 } else { 1900 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1901 return -EINVAL; 1902 1903 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1904 return -EINVAL; 1905 } 1906 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1907 return -EINVAL; 1908 1909 break; 1910 1911 case OVS_KEY_ATTR_TCP: 1912 if ((eth_type != htons(ETH_P_IP) && 1913 eth_type != htons(ETH_P_IPV6)) || 1914 flow_key->ip.proto != IPPROTO_TCP) 1915 return -EINVAL; 1916 1917 break; 1918 1919 case OVS_KEY_ATTR_UDP: 1920 if ((eth_type != htons(ETH_P_IP) && 1921 eth_type != htons(ETH_P_IPV6)) || 1922 flow_key->ip.proto != IPPROTO_UDP) 1923 return -EINVAL; 1924 1925 break; 1926 1927 case OVS_KEY_ATTR_MPLS: 1928 if (!eth_p_mpls(eth_type)) 1929 return -EINVAL; 1930 break; 1931 1932 case OVS_KEY_ATTR_SCTP: 1933 if ((eth_type != htons(ETH_P_IP) && 1934 eth_type != htons(ETH_P_IPV6)) || 1935 flow_key->ip.proto != IPPROTO_SCTP) 1936 return -EINVAL; 1937 1938 break; 1939 1940 default: 1941 return -EINVAL; 1942 } 1943 1944 /* Convert non-masked non-tunnel set actions to masked set actions. */ 1945 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 1946 int start, len = key_len * 2; 1947 struct nlattr *at; 1948 1949 *skip_copy = true; 1950 1951 start = add_nested_action_start(sfa, 1952 OVS_ACTION_ATTR_SET_TO_MASKED, 1953 log); 1954 if (start < 0) 1955 return start; 1956 1957 at = __add_action(sfa, key_type, NULL, len, log); 1958 if (IS_ERR(at)) 1959 return PTR_ERR(at); 1960 1961 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 1962 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 1963 /* Clear non-writeable bits from otherwise writeable fields. */ 1964 if (key_type == OVS_KEY_ATTR_IPV6) { 1965 struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 1966 1967 mask->ipv6_label &= htonl(0x000FFFFF); 1968 } 1969 add_nested_action_end(*sfa, start); 1970 } 1971 1972 return 0; 1973} 1974 1975static int validate_userspace(const struct nlattr *attr) 1976{ 1977 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1978 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1979 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1980 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 1981 }; 1982 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1983 int error; 1984 1985 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 1986 attr, userspace_policy); 1987 if (error) 1988 return error; 1989 1990 if (!a[OVS_USERSPACE_ATTR_PID] || 1991 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 1992 return -EINVAL; 1993 1994 return 0; 1995} 1996 1997static int copy_action(const struct nlattr *from, 1998 struct sw_flow_actions **sfa, bool log) 1999{ 2000 int totlen = NLA_ALIGN(from->nla_len); 2001 struct nlattr *to; 2002 2003 to = reserve_sfa_size(sfa, from->nla_len, log); 2004 if (IS_ERR(to)) 2005 return PTR_ERR(to); 2006 2007 memcpy(to, from, totlen); 2008 return 0; 2009} 2010 2011static int __ovs_nla_copy_actions(const struct nlattr *attr, 2012 const struct sw_flow_key *key, 2013 int depth, struct sw_flow_actions **sfa, 2014 __be16 eth_type, __be16 vlan_tci, bool log) 2015{ 2016 const struct nlattr *a; 2017 int rem, err; 2018 2019 if (depth >= SAMPLE_ACTION_DEPTH) 2020 return -EOVERFLOW; 2021 2022 nla_for_each_nested(a, attr, rem) { 2023 /* Expected argument lengths, (u32)-1 for variable length. */ 2024 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 2025 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 2026 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 2027 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 2028 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 2029 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 2030 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2031 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2032 [OVS_ACTION_ATTR_SET] = (u32)-1, 2033 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2034 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2035 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) 2036 }; 2037 const struct ovs_action_push_vlan *vlan; 2038 int type = nla_type(a); 2039 bool skip_copy; 2040 2041 if (type > OVS_ACTION_ATTR_MAX || 2042 (action_lens[type] != nla_len(a) && 2043 action_lens[type] != (u32)-1)) 2044 return -EINVAL; 2045 2046 skip_copy = false; 2047 switch (type) { 2048 case OVS_ACTION_ATTR_UNSPEC: 2049 return -EINVAL; 2050 2051 case OVS_ACTION_ATTR_USERSPACE: 2052 err = validate_userspace(a); 2053 if (err) 2054 return err; 2055 break; 2056 2057 case OVS_ACTION_ATTR_OUTPUT: 2058 if (nla_get_u32(a) >= DP_MAX_PORTS) 2059 return -EINVAL; 2060 break; 2061 2062 case OVS_ACTION_ATTR_HASH: { 2063 const struct ovs_action_hash *act_hash = nla_data(a); 2064 2065 switch (act_hash->hash_alg) { 2066 case OVS_HASH_ALG_L4: 2067 break; 2068 default: 2069 return -EINVAL; 2070 } 2071 2072 break; 2073 } 2074 2075 case OVS_ACTION_ATTR_POP_VLAN: 2076 vlan_tci = htons(0); 2077 break; 2078 2079 case OVS_ACTION_ATTR_PUSH_VLAN: 2080 vlan = nla_data(a); 2081 if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 2082 return -EINVAL; 2083 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 2084 return -EINVAL; 2085 vlan_tci = vlan->vlan_tci; 2086 break; 2087 2088 case OVS_ACTION_ATTR_RECIRC: 2089 break; 2090 2091 case OVS_ACTION_ATTR_PUSH_MPLS: { 2092 const struct ovs_action_push_mpls *mpls = nla_data(a); 2093 2094 if (!eth_p_mpls(mpls->mpls_ethertype)) 2095 return -EINVAL; 2096 /* Prohibit push MPLS other than to a white list 2097 * for packets that have a known tag order. 2098 */ 2099 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2100 (eth_type != htons(ETH_P_IP) && 2101 eth_type != htons(ETH_P_IPV6) && 2102 eth_type != htons(ETH_P_ARP) && 2103 eth_type != htons(ETH_P_RARP) && 2104 !eth_p_mpls(eth_type))) 2105 return -EINVAL; 2106 eth_type = mpls->mpls_ethertype; 2107 break; 2108 } 2109 2110 case OVS_ACTION_ATTR_POP_MPLS: 2111 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2112 !eth_p_mpls(eth_type)) 2113 return -EINVAL; 2114 2115 /* Disallow subsequent L2.5+ set and mpls_pop actions 2116 * as there is no check here to ensure that the new 2117 * eth_type is valid and thus set actions could 2118 * write off the end of the packet or otherwise 2119 * corrupt it. 2120 * 2121 * Support for these actions is planned using packet 2122 * recirculation. 2123 */ 2124 eth_type = htons(0); 2125 break; 2126 2127 case OVS_ACTION_ATTR_SET: 2128 err = validate_set(a, key, sfa, 2129 &skip_copy, eth_type, false, log); 2130 if (err) 2131 return err; 2132 break; 2133 2134 case OVS_ACTION_ATTR_SET_MASKED: 2135 err = validate_set(a, key, sfa, 2136 &skip_copy, eth_type, true, log); 2137 if (err) 2138 return err; 2139 break; 2140 2141 case OVS_ACTION_ATTR_SAMPLE: 2142 err = validate_and_copy_sample(a, key, depth, sfa, 2143 eth_type, vlan_tci, log); 2144 if (err) 2145 return err; 2146 skip_copy = true; 2147 break; 2148 2149 default: 2150 OVS_NLERR(log, "Unknown Action type %d", type); 2151 return -EINVAL; 2152 } 2153 if (!skip_copy) { 2154 err = copy_action(a, sfa, log); 2155 if (err) 2156 return err; 2157 } 2158 } 2159 2160 if (rem > 0) 2161 return -EINVAL; 2162 2163 return 0; 2164} 2165 2166/* 'key' must be the masked key. */ 2167int ovs_nla_copy_actions(const struct nlattr *attr, 2168 const struct sw_flow_key *key, 2169 struct sw_flow_actions **sfa, bool log) 2170{ 2171 int err; 2172 2173 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 2174 if (IS_ERR(*sfa)) 2175 return PTR_ERR(*sfa); 2176 2177 err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, 2178 key->eth.tci, log); 2179 if (err) 2180 kfree(*sfa); 2181 2182 return err; 2183} 2184 2185static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 2186{ 2187 const struct nlattr *a; 2188 struct nlattr *start; 2189 int err = 0, rem; 2190 2191 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 2192 if (!start) 2193 return -EMSGSIZE; 2194 2195 nla_for_each_nested(a, attr, rem) { 2196 int type = nla_type(a); 2197 struct nlattr *st_sample; 2198 2199 switch (type) { 2200 case OVS_SAMPLE_ATTR_PROBABILITY: 2201 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 2202 sizeof(u32), nla_data(a))) 2203 return -EMSGSIZE; 2204 break; 2205 case OVS_SAMPLE_ATTR_ACTIONS: 2206 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 2207 if (!st_sample) 2208 return -EMSGSIZE; 2209 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 2210 if (err) 2211 return err; 2212 nla_nest_end(skb, st_sample); 2213 break; 2214 } 2215 } 2216 2217 nla_nest_end(skb, start); 2218 return err; 2219} 2220 2221static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 2222{ 2223 const struct nlattr *ovs_key = nla_data(a); 2224 int key_type = nla_type(ovs_key); 2225 struct nlattr *start; 2226 int err; 2227 2228 switch (key_type) { 2229 case OVS_KEY_ATTR_TUNNEL_INFO: { 2230 struct ovs_tunnel_info *tun_info = nla_data(ovs_key); 2231 2232 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2233 if (!start) 2234 return -EMSGSIZE; 2235 2236 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, 2237 tun_info->options_len ? 2238 tun_info->options : NULL, 2239 tun_info->options_len); 2240 if (err) 2241 return err; 2242 nla_nest_end(skb, start); 2243 break; 2244 } 2245 default: 2246 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 2247 return -EMSGSIZE; 2248 break; 2249 } 2250 2251 return 0; 2252} 2253 2254static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2255 struct sk_buff *skb) 2256{ 2257 const struct nlattr *ovs_key = nla_data(a); 2258 struct nlattr *nla; 2259 size_t key_len = nla_len(ovs_key) / 2; 2260 2261 /* Revert the conversion we did from a non-masked set action to 2262 * masked set action. 2263 */ 2264 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2265 if (!nla) 2266 return -EMSGSIZE; 2267 2268 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) 2269 return -EMSGSIZE; 2270 2271 nla_nest_end(skb, nla); 2272 return 0; 2273} 2274 2275int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2276{ 2277 const struct nlattr *a; 2278 int rem, err; 2279 2280 nla_for_each_attr(a, attr, len, rem) { 2281 int type = nla_type(a); 2282 2283 switch (type) { 2284 case OVS_ACTION_ATTR_SET: 2285 err = set_action_to_attr(a, skb); 2286 if (err) 2287 return err; 2288 break; 2289 2290 case OVS_ACTION_ATTR_SET_TO_MASKED: 2291 err = masked_set_action_to_set_action_attr(a, skb); 2292 if (err) 2293 return err; 2294 break; 2295 2296 case OVS_ACTION_ATTR_SAMPLE: 2297 err = sample_action_to_attr(a, skb); 2298 if (err) 2299 return err; 2300 break; 2301 default: 2302 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2303 return -EMSGSIZE; 2304 break; 2305 } 2306 } 2307 2308 return 0; 2309} 2310