root/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mlx5e_get_tc_tun
  2. get_route_and_out_devs
  3. mlx5e_route_lookup_ipv4
  4. mlx5e_netdev_kind
  5. mlx5e_route_lookup_ipv6
  6. mlx5e_gen_ip_tunnel_header
  7. gen_eth_tnl_hdr
  8. mlx5e_tc_tun_create_header_ipv4
  9. mlx5e_tc_tun_create_header_ipv6
  10. mlx5e_tc_tun_device_to_offload
  11. mlx5e_tc_tun_init_encap_attr
  12. mlx5e_tc_tun_parse
  13. mlx5e_tc_tun_parse_udp_ports

   1 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
   2 /* Copyright (c) 2018 Mellanox Technologies. */
   3 
   4 #include <net/vxlan.h>
   5 #include <net/gre.h>
   6 #include <net/geneve.h>
   7 #include "en/tc_tun.h"
   8 #include "en_tc.h"
   9 
  10 struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
  11 {
  12         if (netif_is_vxlan(tunnel_dev))
  13                 return &vxlan_tunnel;
  14         else if (netif_is_geneve(tunnel_dev))
  15                 return &geneve_tunnel;
  16         else if (netif_is_gretap(tunnel_dev) ||
  17                  netif_is_ip6gretap(tunnel_dev))
  18                 return &gre_tunnel;
  19         else
  20                 return NULL;
  21 }
  22 
  23 static int get_route_and_out_devs(struct mlx5e_priv *priv,
  24                                   struct net_device *dev,
  25                                   struct net_device **route_dev,
  26                                   struct net_device **out_dev)
  27 {
  28         struct net_device *uplink_dev, *uplink_upper, *real_dev;
  29         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  30         bool dst_is_lag_dev;
  31 
  32         real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
  33         uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
  34 
  35         rcu_read_lock();
  36         uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
  37         /* mlx5_lag_is_sriov() is a blocking function which can't be called
  38          * while holding rcu read lock. Take the net_device for correctness
  39          * sake.
  40          */
  41         if (uplink_upper)
  42                 dev_hold(uplink_upper);
  43         rcu_read_unlock();
  44 
  45         dst_is_lag_dev = (uplink_upper &&
  46                           netif_is_lag_master(uplink_upper) &&
  47                           real_dev == uplink_upper &&
  48                           mlx5_lag_is_sriov(priv->mdev));
  49         if (uplink_upper)
  50                 dev_put(uplink_upper);
  51 
  52         /* if the egress device isn't on the same HW e-switch or
  53          * it's a LAG device, use the uplink
  54          */
  55         *route_dev = dev;
  56         if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
  57             dst_is_lag_dev || is_vlan_dev(*route_dev))
  58                 *out_dev = uplink_dev;
  59         else if (mlx5e_eswitch_rep(dev) &&
  60                  mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
  61                 *out_dev = *route_dev;
  62         else
  63                 return -EOPNOTSUPP;
  64 
  65         if (!(mlx5e_eswitch_rep(*out_dev) &&
  66               mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
  67                 return -EOPNOTSUPP;
  68 
  69         return 0;
  70 }
  71 
  72 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
  73                                    struct net_device *mirred_dev,
  74                                    struct net_device **out_dev,
  75                                    struct net_device **route_dev,
  76                                    struct flowi4 *fl4,
  77                                    struct neighbour **out_n,
  78                                    u8 *out_ttl)
  79 {
  80         struct rtable *rt;
  81         struct neighbour *n = NULL;
  82 
  83 #if IS_ENABLED(CONFIG_INET)
  84         struct mlx5_core_dev *mdev = priv->mdev;
  85         struct net_device *uplink_dev;
  86         int ret;
  87 
  88         if (mlx5_lag_is_multipath(mdev)) {
  89                 struct mlx5_eswitch *esw = mdev->priv.eswitch;
  90 
  91                 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
  92                 fl4->flowi4_oif = uplink_dev->ifindex;
  93         }
  94 
  95         rt = ip_route_output_key(dev_net(mirred_dev), fl4);
  96         ret = PTR_ERR_OR_ZERO(rt);
  97         if (ret)
  98                 return ret;
  99 
 100         if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
 101                 ip_rt_put(rt);
 102                 return -ENETUNREACH;
 103         }
 104 #else
 105         return -EOPNOTSUPP;
 106 #endif
 107 
 108         ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
 109         if (ret < 0) {
 110                 ip_rt_put(rt);
 111                 return ret;
 112         }
 113 
 114         if (!(*out_ttl))
 115                 *out_ttl = ip4_dst_hoplimit(&rt->dst);
 116         n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
 117         ip_rt_put(rt);
 118         if (!n)
 119                 return -ENOMEM;
 120 
 121         *out_n = n;
 122         return 0;
 123 }
 124 
 125 static const char *mlx5e_netdev_kind(struct net_device *dev)
 126 {
 127         if (dev->rtnl_link_ops)
 128                 return dev->rtnl_link_ops->kind;
 129         else
 130                 return "unknown";
 131 }
 132 
 133 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 134                                    struct net_device *mirred_dev,
 135                                    struct net_device **out_dev,
 136                                    struct net_device **route_dev,
 137                                    struct flowi6 *fl6,
 138                                    struct neighbour **out_n,
 139                                    u8 *out_ttl)
 140 {
 141         struct neighbour *n = NULL;
 142         struct dst_entry *dst;
 143 
 144 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
 145         int ret;
 146 
 147         dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
 148                                               NULL);
 149         if (IS_ERR(dst))
 150                 return PTR_ERR(dst);
 151 
 152         if (!(*out_ttl))
 153                 *out_ttl = ip6_dst_hoplimit(dst);
 154 
 155         ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
 156         if (ret < 0) {
 157                 dst_release(dst);
 158                 return ret;
 159         }
 160 #else
 161         return -EOPNOTSUPP;
 162 #endif
 163 
 164         n = dst_neigh_lookup(dst, &fl6->daddr);
 165         dst_release(dst);
 166         if (!n)
 167                 return -ENOMEM;
 168 
 169         *out_n = n;
 170         return 0;
 171 }
 172 
 173 static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
 174                                       struct mlx5e_encap_entry *e)
 175 {
 176         if (!e->tunnel) {
 177                 pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
 178                 return -EOPNOTSUPP;
 179         }
 180 
 181         return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
 182 }
 183 
 184 static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
 185                              struct mlx5e_encap_entry *e,
 186                              u16 proto)
 187 {
 188         struct ethhdr *eth = (struct ethhdr *)buf;
 189         char *ip;
 190 
 191         ether_addr_copy(eth->h_dest, e->h_dest);
 192         ether_addr_copy(eth->h_source, dev->dev_addr);
 193         if (is_vlan_dev(dev)) {
 194                 struct vlan_hdr *vlan = (struct vlan_hdr *)
 195                                         ((char *)eth + ETH_HLEN);
 196                 ip = (char *)vlan + VLAN_HLEN;
 197                 eth->h_proto = vlan_dev_vlan_proto(dev);
 198                 vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
 199                 vlan->h_vlan_encapsulated_proto = htons(proto);
 200         } else {
 201                 eth->h_proto = htons(proto);
 202                 ip = (char *)eth + ETH_HLEN;
 203         }
 204 
 205         return ip;
 206 }
 207 
 208 int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 209                                     struct net_device *mirred_dev,
 210                                     struct mlx5e_encap_entry *e)
 211 {
 212         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 213         const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 214         struct net_device *out_dev, *route_dev;
 215         struct neighbour *n = NULL;
 216         struct flowi4 fl4 = {};
 217         int ipv4_encap_size;
 218         char *encap_header;
 219         u8 nud_state, ttl;
 220         struct iphdr *ip;
 221         int err;
 222 
 223         /* add the IP fields */
 224         fl4.flowi4_tos = tun_key->tos;
 225         fl4.daddr = tun_key->u.ipv4.dst;
 226         fl4.saddr = tun_key->u.ipv4.src;
 227         ttl = tun_key->ttl;
 228 
 229         err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
 230                                       &fl4, &n, &ttl);
 231         if (err)
 232                 return err;
 233 
 234         ipv4_encap_size =
 235                 (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 236                 sizeof(struct iphdr) +
 237                 e->tunnel->calc_hlen(e);
 238 
 239         if (max_encap_size < ipv4_encap_size) {
 240                 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
 241                                ipv4_encap_size, max_encap_size);
 242                 err = -EOPNOTSUPP;
 243                 goto out;
 244         }
 245 
 246         encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
 247         if (!encap_header) {
 248                 err = -ENOMEM;
 249                 goto out;
 250         }
 251 
 252         /* used by mlx5e_detach_encap to lookup a neigh hash table
 253          * entry in the neigh hash table when a user deletes a rule
 254          */
 255         e->m_neigh.dev = n->dev;
 256         e->m_neigh.family = n->ops->family;
 257         memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
 258         e->out_dev = out_dev;
 259         e->route_dev = route_dev;
 260 
 261         /* It's important to add the neigh to the hash table before checking
 262          * the neigh validity state. So if we'll get a notification, in case the
 263          * neigh changes it's validity state, we would find the relevant neigh
 264          * in the hash.
 265          */
 266         err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
 267         if (err)
 268                 goto free_encap;
 269 
 270         read_lock_bh(&n->lock);
 271         nud_state = n->nud_state;
 272         ether_addr_copy(e->h_dest, n->ha);
 273         read_unlock_bh(&n->lock);
 274 
 275         /* add ethernet header */
 276         ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
 277                                              ETH_P_IP);
 278 
 279         /* add ip header */
 280         ip->tos = tun_key->tos;
 281         ip->version = 0x4;
 282         ip->ihl = 0x5;
 283         ip->ttl = ttl;
 284         ip->daddr = fl4.daddr;
 285         ip->saddr = fl4.saddr;
 286 
 287         /* add tunneling protocol header */
 288         err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
 289                                          &ip->protocol, e);
 290         if (err)
 291                 goto destroy_neigh_entry;
 292 
 293         e->encap_size = ipv4_encap_size;
 294         e->encap_header = encap_header;
 295 
 296         if (!(nud_state & NUD_VALID)) {
 297                 neigh_event_send(n, NULL);
 298                 /* the encap entry will be made valid on neigh update event
 299                  * and not used before that.
 300                  */
 301                 goto out;
 302         }
 303         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
 304                                                      e->reformat_type,
 305                                                      ipv4_encap_size, encap_header,
 306                                                      MLX5_FLOW_NAMESPACE_FDB);
 307         if (IS_ERR(e->pkt_reformat)) {
 308                 err = PTR_ERR(e->pkt_reformat);
 309                 goto destroy_neigh_entry;
 310         }
 311 
 312         e->flags |= MLX5_ENCAP_ENTRY_VALID;
 313         mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
 314         neigh_release(n);
 315         return err;
 316 
 317 destroy_neigh_entry:
 318         mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 319 free_encap:
 320         kfree(encap_header);
 321 out:
 322         if (n)
 323                 neigh_release(n);
 324         return err;
 325 }
 326 
 327 int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 328                                     struct net_device *mirred_dev,
 329                                     struct mlx5e_encap_entry *e)
 330 {
 331         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 332         const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 333         struct net_device *out_dev, *route_dev;
 334         struct neighbour *n = NULL;
 335         struct flowi6 fl6 = {};
 336         struct ipv6hdr *ip6h;
 337         int ipv6_encap_size;
 338         char *encap_header;
 339         u8 nud_state, ttl;
 340         int err;
 341 
 342         ttl = tun_key->ttl;
 343 
 344         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
 345         fl6.daddr = tun_key->u.ipv6.dst;
 346         fl6.saddr = tun_key->u.ipv6.src;
 347 
 348         err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
 349                                       &fl6, &n, &ttl);
 350         if (err)
 351                 return err;
 352 
 353         ipv6_encap_size =
 354                 (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 355                 sizeof(struct ipv6hdr) +
 356                 e->tunnel->calc_hlen(e);
 357 
 358         if (max_encap_size < ipv6_encap_size) {
 359                 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
 360                                ipv6_encap_size, max_encap_size);
 361                 err = -EOPNOTSUPP;
 362                 goto out;
 363         }
 364 
 365         encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
 366         if (!encap_header) {
 367                 err = -ENOMEM;
 368                 goto out;
 369         }
 370 
 371         /* used by mlx5e_detach_encap to lookup a neigh hash table
 372          * entry in the neigh hash table when a user deletes a rule
 373          */
 374         e->m_neigh.dev = n->dev;
 375         e->m_neigh.family = n->ops->family;
 376         memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
 377         e->out_dev = out_dev;
 378         e->route_dev = route_dev;
 379 
 380         /* It's importent to add the neigh to the hash table before checking
 381          * the neigh validity state. So if we'll get a notification, in case the
 382          * neigh changes it's validity state, we would find the relevant neigh
 383          * in the hash.
 384          */
 385         err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
 386         if (err)
 387                 goto free_encap;
 388 
 389         read_lock_bh(&n->lock);
 390         nud_state = n->nud_state;
 391         ether_addr_copy(e->h_dest, n->ha);
 392         read_unlock_bh(&n->lock);
 393 
 394         /* add ethernet header */
 395         ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
 396                                                  ETH_P_IPV6);
 397 
 398         /* add ip header */
 399         ip6_flow_hdr(ip6h, tun_key->tos, 0);
 400         /* the HW fills up ipv6 payload len */
 401         ip6h->hop_limit   = ttl;
 402         ip6h->daddr       = fl6.daddr;
 403         ip6h->saddr       = fl6.saddr;
 404 
 405         /* add tunneling protocol header */
 406         err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
 407                                          &ip6h->nexthdr, e);
 408         if (err)
 409                 goto destroy_neigh_entry;
 410 
 411         e->encap_size = ipv6_encap_size;
 412         e->encap_header = encap_header;
 413 
 414         if (!(nud_state & NUD_VALID)) {
 415                 neigh_event_send(n, NULL);
 416                 /* the encap entry will be made valid on neigh update event
 417                  * and not used before that.
 418                  */
 419                 goto out;
 420         }
 421 
 422         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
 423                                                      e->reformat_type,
 424                                                      ipv6_encap_size, encap_header,
 425                                                      MLX5_FLOW_NAMESPACE_FDB);
 426         if (IS_ERR(e->pkt_reformat)) {
 427                 err = PTR_ERR(e->pkt_reformat);
 428                 goto destroy_neigh_entry;
 429         }
 430 
 431         e->flags |= MLX5_ENCAP_ENTRY_VALID;
 432         mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
 433         neigh_release(n);
 434         return err;
 435 
 436 destroy_neigh_entry:
 437         mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 438 free_encap:
 439         kfree(encap_header);
 440 out:
 441         if (n)
 442                 neigh_release(n);
 443         return err;
 444 }
 445 
 446 bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
 447                                     struct net_device *netdev)
 448 {
 449         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
 450 
 451         if (tunnel && tunnel->can_offload(priv))
 452                 return true;
 453         else
 454                 return false;
 455 }
 456 
 457 int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
 458                                  struct mlx5e_priv *priv,
 459                                  struct mlx5e_encap_entry *e,
 460                                  struct netlink_ext_ack *extack)
 461 {
 462         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
 463 
 464         if (!tunnel) {
 465                 e->reformat_type = -1;
 466                 return -EOPNOTSUPP;
 467         }
 468 
 469         return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
 470 }
 471 
 472 int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 473                        struct mlx5e_priv *priv,
 474                        struct mlx5_flow_spec *spec,
 475                        struct flow_cls_offload *f,
 476                        void *headers_c,
 477                        void *headers_v, u8 *match_level)
 478 {
 479         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
 480         int err = 0;
 481 
 482         if (!tunnel) {
 483                 netdev_warn(priv->netdev,
 484                             "decapsulation offload is not supported for %s net device\n",
 485                             mlx5e_netdev_kind(filter_dev));
 486                 err = -EOPNOTSUPP;
 487                 goto out;
 488         }
 489 
 490         *match_level = tunnel->match_level;
 491 
 492         if (tunnel->parse_udp_ports) {
 493                 err = tunnel->parse_udp_ports(priv, spec, f,
 494                                               headers_c, headers_v);
 495                 if (err)
 496                         goto out;
 497         }
 498 
 499         if (tunnel->parse_tunnel) {
 500                 err = tunnel->parse_tunnel(priv, spec, f,
 501                                            headers_c, headers_v);
 502                 if (err)
 503                         goto out;
 504         }
 505 
 506 out:
 507         return err;
 508 }
 509 
 510 int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
 511                                  struct mlx5_flow_spec *spec,
 512                                  struct flow_cls_offload *f,
 513                                  void *headers_c,
 514                                  void *headers_v)
 515 {
 516         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 517         struct netlink_ext_ack *extack = f->common.extack;
 518         struct flow_match_ports enc_ports;
 519 
 520         /* Full udp dst port must be given */
 521 
 522         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 523                 NL_SET_ERR_MSG_MOD(extack,
 524                                    "UDP tunnel decap filter must include enc_dst_port condition");
 525                 netdev_warn(priv->netdev,
 526                             "UDP tunnel decap filter must include enc_dst_port condition\n");
 527                 return -EOPNOTSUPP;
 528         }
 529 
 530         flow_rule_match_enc_ports(rule, &enc_ports);
 531 
 532         if (memchr_inv(&enc_ports.mask->dst, 0xff,
 533                        sizeof(enc_ports.mask->dst))) {
 534                 NL_SET_ERR_MSG_MOD(extack,
 535                                    "UDP tunnel decap filter must match enc_dst_port fully");
 536                 netdev_warn(priv->netdev,
 537                             "UDP tunnel decap filter must match enc_dst_port fully\n");
 538                 return -EOPNOTSUPP;
 539         }
 540 
 541         /* match on UDP protocol and dst port number */
 542 
 543         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
 544         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
 545 
 546         MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
 547                  ntohs(enc_ports.mask->dst));
 548         MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 549                  ntohs(enc_ports.key->dst));
 550 
 551         /* UDP src port on outer header is generated by HW,
 552          * so it is probably a bad idea to request matching it.
 553          * Nonetheless, it is allowed.
 554          */
 555 
 556         MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 557                  ntohs(enc_ports.mask->src));
 558         MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 559                  ntohs(enc_ports.key->src));
 560 
 561         return 0;
 562 }

/* [<][>][^][v][top][bottom][index][help] */