root/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mlx5_eswitch_get_rep
  2. mlx5_eswitch_prios_supported
  3. mlx5_eswitch_get_chain_range
  4. mlx5_eswitch_get_prio_range
  5. mlx5_eswitch_set_rule_source_port
  6. mlx5_eswitch_add_offloaded_rule
  7. mlx5_eswitch_add_fwd_rule
  8. __mlx5_eswitch_del_rule
  9. mlx5_eswitch_del_offloaded_rule
  10. mlx5_eswitch_del_fwd_rule
  11. esw_set_global_vlan_pop
  12. esw_vlan_action_get_vport
  13. esw_add_vlan_action_check
  14. mlx5_eswitch_add_vlan_action
  15. mlx5_eswitch_del_vlan_action
  16. mlx5_eswitch_add_send_to_vport_rule
  17. mlx5_eswitch_del_send_to_vport_rule
  18. esw_set_passing_vport_metadata
  19. peer_miss_rules_setup
  20. esw_set_peer_miss_rule_source_port
  21. esw_add_fdb_peer_miss_rules
  22. esw_del_fdb_peer_miss_rules
  23. esw_add_fdb_miss_rule
  24. get_sz_from_pool
  25. put_sz_to_pool
  26. create_next_size_table
  27. esw_get_prio_table
  28. esw_put_prio_table
  29. esw_destroy_offloads_fast_fdb_tables
  30. esw_set_flow_group_source_port
  31. esw_create_offloads_fdb_tables
  32. esw_destroy_offloads_fdb_tables
  33. esw_create_offloads_table
  34. esw_destroy_offloads_table
  35. esw_create_vport_rx_group
  36. esw_destroy_vport_rx_group
  37. mlx5_eswitch_create_vport_rx_rule
  38. esw_offloads_start
  39. esw_offloads_cleanup_reps
  40. esw_offloads_init_reps
  41. __esw_offloads_unload_rep
  42. __unload_reps_special_vport
  43. __unload_reps_vf_vport
  44. esw_offloads_unload_vf_reps
  45. __unload_reps_all_vport
  46. esw_offloads_unload_all_reps
  47. __esw_offloads_load_rep
  48. __load_reps_special_vport
  49. __load_reps_vf_vport
  50. __load_reps_all_vport
  51. esw_offloads_load_vf_reps
  52. esw_offloads_load_all_reps
  53. mlx5_esw_offloads_pair
  54. mlx5_esw_offloads_unpair
  55. mlx5_esw_offloads_set_ns_peer
  56. mlx5_esw_offloads_devcom_event
  57. esw_offloads_devcom_init
  58. esw_offloads_devcom_cleanup
  59. esw_vport_ingress_prio_tag_config
  60. esw_vport_add_ingress_acl_modify_metadata
  61. esw_vport_del_ingress_acl_modify_metadata
  62. esw_vport_egress_prio_tag_config
  63. esw_vport_ingress_common_config
  64. esw_check_vport_match_metadata_supported
  65. esw_create_offloads_acl_tables
  66. esw_destroy_offloads_acl_tables
  67. esw_offloads_steering_init
  68. esw_offloads_steering_cleanup
  69. esw_vfs_changed_event_handler
  70. esw_functions_changed_event_handler
  71. mlx5_esw_funcs_changed_handler
  72. esw_offloads_enable
  73. esw_offloads_stop
  74. esw_offloads_disable
  75. esw_mode_from_devlink
  76. esw_mode_to_devlink
  77. esw_inline_mode_from_devlink
  78. esw_inline_mode_to_devlink
  79. mlx5_devlink_eswitch_check
  80. mlx5_devlink_eswitch_mode_set
  81. mlx5_devlink_eswitch_mode_get
  82. mlx5_devlink_eswitch_inline_mode_set
  83. mlx5_devlink_eswitch_inline_mode_get
  84. mlx5_eswitch_inline_mode_get
  85. mlx5_devlink_eswitch_encap_mode_set
  86. mlx5_devlink_eswitch_encap_mode_get
  87. mlx5_eswitch_register_vport_reps
  88. mlx5_eswitch_unregister_vport_reps
  89. mlx5_eswitch_get_uplink_priv
  90. mlx5_eswitch_get_proto_dev
  91. mlx5_eswitch_uplink_get_proto_dev
  92. mlx5_eswitch_vport_rep
  93. mlx5_eswitch_is_vf_vport
  94. mlx5_eswitch_vport_match_metadata_enabled
  95. mlx5_eswitch_get_vport_metadata_for_match

   1 /*
   2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32 
  33 #include <linux/etherdevice.h>
  34 #include <linux/mlx5/driver.h>
  35 #include <linux/mlx5/mlx5_ifc.h>
  36 #include <linux/mlx5/vport.h>
  37 #include <linux/mlx5/fs.h>
  38 #include "mlx5_core.h"
  39 #include "eswitch.h"
  40 #include "rdma.h"
  41 #include "en.h"
  42 #include "fs_core.h"
  43 #include "lib/devcom.h"
  44 #include "lib/eq.h"
  45 
  46 /* There are two match-all miss flows, one for unicast dst mac and
  47  * one for multicast.
  48  */
  49 #define MLX5_ESW_MISS_FLOWS (2)
  50 
  51 #define fdb_prio_table(esw, chain, prio, level) \
  52         (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
  53 
  54 #define UPLINK_REP_INDEX 0
  55 
  56 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
  57                                                      u16 vport_num)
  58 {
  59         int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
  60 
  61         WARN_ON(idx > esw->total_vports - 1);
  62         return &esw->offloads.vport_reps[idx];
  63 }
  64 
  65 static struct mlx5_flow_table *
  66 esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
  67 static void
  68 esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
  69 
  70 bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
  71 {
  72         return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
  73 }
  74 
  75 u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
  76 {
  77         if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
  78                 return FDB_MAX_CHAIN;
  79 
  80         return 0;
  81 }
  82 
  83 u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
  84 {
  85         if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
  86                 return FDB_MAX_PRIO;
  87 
  88         return 1;
  89 }
  90 
  91 static void
  92 mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
  93                                   struct mlx5_flow_spec *spec,
  94                                   struct mlx5_esw_flow_attr *attr)
  95 {
  96         void *misc2;
  97         void *misc;
  98 
  99         /* Use metadata matching because vport is not represented by single
 100          * VHCA in dual-port RoCE mode, and matching on source vport may fail.
 101          */
 102         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
 103                 misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
 104                 MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
 105                          mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
 106                                                                    attr->in_rep->vport));
 107 
 108                 misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
 109                 MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
 110 
 111                 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
 112                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 113                 if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
 114                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
 115         } else {
 116                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 117                 MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
 118 
 119                 if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 120                         MLX5_SET(fte_match_set_misc, misc,
 121                                  source_eswitch_owner_vhca_id,
 122                                  MLX5_CAP_GEN(attr->in_mdev, vhca_id));
 123 
 124                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 125                 MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 126                 if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 127                         MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 128                                          source_eswitch_owner_vhca_id);
 129 
 130                 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
 131         }
 132 
 133         if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
 134             attr->in_rep->vport == MLX5_VPORT_UPLINK)
 135                 spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
 136 }
 137 
 138 struct mlx5_flow_handle *
 139 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 140                                 struct mlx5_flow_spec *spec,
 141                                 struct mlx5_esw_flow_attr *attr)
 142 {
 143         struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
 144         struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
 145         bool split = !!(attr->split_count);
 146         struct mlx5_flow_handle *rule;
 147         struct mlx5_flow_table *fdb;
 148         int j, i = 0;
 149 
 150         if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 151                 return ERR_PTR(-EOPNOTSUPP);
 152 
 153         flow_act.action = attr->action;
 154         /* if per flow vlan pop/push is emulated, don't set that into the firmware */
 155         if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 156                 flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
 157                                      MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 158         else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
 159                 flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
 160                 flow_act.vlan[0].vid = attr->vlan_vid[0];
 161                 flow_act.vlan[0].prio = attr->vlan_prio[0];
 162                 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
 163                         flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
 164                         flow_act.vlan[1].vid = attr->vlan_vid[1];
 165                         flow_act.vlan[1].prio = attr->vlan_prio[1];
 166                 }
 167         }
 168 
 169         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 170                 if (attr->dest_chain) {
 171                         struct mlx5_flow_table *ft;
 172 
 173                         ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
 174                         if (IS_ERR(ft)) {
 175                                 rule = ERR_CAST(ft);
 176                                 goto err_create_goto_table;
 177                         }
 178 
 179                         dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 180                         dest[i].ft = ft;
 181                         i++;
 182                 } else {
 183                         for (j = attr->split_count; j < attr->out_count; j++) {
 184                                 dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 185                                 dest[i].vport.num = attr->dests[j].rep->vport;
 186                                 dest[i].vport.vhca_id =
 187                                         MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
 188                                 if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 189                                         dest[i].vport.flags |=
 190                                                 MLX5_FLOW_DEST_VPORT_VHCA_ID;
 191                                 if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
 192                                         flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 193                                         flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
 194                                         dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 195                                         dest[i].vport.pkt_reformat =
 196                                                 attr->dests[j].pkt_reformat;
 197                                 }
 198                                 i++;
 199                         }
 200                 }
 201         }
 202         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 203                 dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 204                 dest[i].counter_id = mlx5_fc_id(attr->counter);
 205                 i++;
 206         }
 207 
 208         mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 209 
 210         if (attr->outer_match_level != MLX5_MATCH_NONE)
 211                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 212         if (attr->inner_match_level != MLX5_MATCH_NONE)
 213                 spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 214 
 215         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 216                 flow_act.modify_hdr = attr->modify_hdr;
 217 
 218         fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
 219         if (IS_ERR(fdb)) {
 220                 rule = ERR_CAST(fdb);
 221                 goto err_esw_get;
 222         }
 223 
 224         if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec))
 225                 rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
 226                                                      &flow_act, dest, i);
 227         else
 228                 rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
 229         if (IS_ERR(rule))
 230                 goto err_add_rule;
 231         else
 232                 atomic64_inc(&esw->offloads.num_flows);
 233 
 234         return rule;
 235 
 236 err_add_rule:
 237         esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
 238 err_esw_get:
 239         if (attr->dest_chain)
 240                 esw_put_prio_table(esw, attr->dest_chain, 1, 0);
 241 err_create_goto_table:
 242         return rule;
 243 }
 244 
 245 struct mlx5_flow_handle *
 246 mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 247                           struct mlx5_flow_spec *spec,
 248                           struct mlx5_esw_flow_attr *attr)
 249 {
 250         struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
 251         struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
 252         struct mlx5_flow_table *fast_fdb;
 253         struct mlx5_flow_table *fwd_fdb;
 254         struct mlx5_flow_handle *rule;
 255         int i;
 256 
 257         fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
 258         if (IS_ERR(fast_fdb)) {
 259                 rule = ERR_CAST(fast_fdb);
 260                 goto err_get_fast;
 261         }
 262 
 263         fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
 264         if (IS_ERR(fwd_fdb)) {
 265                 rule = ERR_CAST(fwd_fdb);
 266                 goto err_get_fwd;
 267         }
 268 
 269         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 270         for (i = 0; i < attr->split_count; i++) {
 271                 dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 272                 dest[i].vport.num = attr->dests[i].rep->vport;
 273                 dest[i].vport.vhca_id =
 274                         MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
 275                 if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 276                         dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 277                 if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
 278                         dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 279                         dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
 280                 }
 281         }
 282         dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 283         dest[i].ft = fwd_fdb,
 284         i++;
 285 
 286         mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 287 
 288         if (attr->outer_match_level != MLX5_MATCH_NONE)
 289                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 290 
 291         rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 292 
 293         if (IS_ERR(rule))
 294                 goto add_err;
 295 
 296         atomic64_inc(&esw->offloads.num_flows);
 297 
 298         return rule;
 299 add_err:
 300         esw_put_prio_table(esw, attr->chain, attr->prio, 1);
 301 err_get_fwd:
 302         esw_put_prio_table(esw, attr->chain, attr->prio, 0);
 303 err_get_fast:
 304         return rule;
 305 }
 306 
 307 static void
 308 __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 309                         struct mlx5_flow_handle *rule,
 310                         struct mlx5_esw_flow_attr *attr,
 311                         bool fwd_rule)
 312 {
 313         bool split = (attr->split_count > 0);
 314         int i;
 315 
 316         mlx5_del_flow_rules(rule);
 317 
 318         /* unref the term table */
 319         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
 320                 if (attr->dests[i].termtbl)
 321                         mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
 322         }
 323 
 324         atomic64_dec(&esw->offloads.num_flows);
 325 
 326         if (fwd_rule)  {
 327                 esw_put_prio_table(esw, attr->chain, attr->prio, 1);
 328                 esw_put_prio_table(esw, attr->chain, attr->prio, 0);
 329         } else {
 330                 esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
 331                 if (attr->dest_chain)
 332                         esw_put_prio_table(esw, attr->dest_chain, 1, 0);
 333         }
 334 }
 335 
 336 void
 337 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 338                                 struct mlx5_flow_handle *rule,
 339                                 struct mlx5_esw_flow_attr *attr)
 340 {
 341         __mlx5_eswitch_del_rule(esw, rule, attr, false);
 342 }
 343 
 344 void
 345 mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 346                           struct mlx5_flow_handle *rule,
 347                           struct mlx5_esw_flow_attr *attr)
 348 {
 349         __mlx5_eswitch_del_rule(esw, rule, attr, true);
 350 }
 351 
 352 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 353 {
 354         struct mlx5_eswitch_rep *rep;
 355         int i, err = 0;
 356 
 357         esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
 358         mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
 359                 if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
 360                         continue;
 361 
 362                 err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
 363                 if (err)
 364                         goto out;
 365         }
 366 
 367 out:
 368         return err;
 369 }
 370 
 371 static struct mlx5_eswitch_rep *
 372 esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
 373 {
 374         struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
 375 
 376         in_rep  = attr->in_rep;
 377         out_rep = attr->dests[0].rep;
 378 
 379         if (push)
 380                 vport = in_rep;
 381         else if (pop)
 382                 vport = out_rep;
 383         else
 384                 vport = in_rep;
 385 
 386         return vport;
 387 }
 388 
 389 static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
 390                                      bool push, bool pop, bool fwd)
 391 {
 392         struct mlx5_eswitch_rep *in_rep, *out_rep;
 393 
 394         if ((push || pop) && !fwd)
 395                 goto out_notsupp;
 396 
 397         in_rep  = attr->in_rep;
 398         out_rep = attr->dests[0].rep;
 399 
 400         if (push && in_rep->vport == MLX5_VPORT_UPLINK)
 401                 goto out_notsupp;
 402 
 403         if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
 404                 goto out_notsupp;
 405 
 406         /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
 407         if (!push && !pop && fwd)
 408                 if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
 409                         goto out_notsupp;
 410 
 411         /* protects against (1) setting rules with different vlans to push and
 412          * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
 413          */
 414         if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
 415                 goto out_notsupp;
 416 
 417         return 0;
 418 
 419 out_notsupp:
 420         return -EOPNOTSUPP;
 421 }
 422 
 423 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 424                                  struct mlx5_esw_flow_attr *attr)
 425 {
 426         struct offloads_fdb *offloads = &esw->fdb_table.offloads;
 427         struct mlx5_eswitch_rep *vport = NULL;
 428         bool push, pop, fwd;
 429         int err = 0;
 430 
 431         /* nop if we're on the vlan push/pop non emulation mode */
 432         if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 433                 return 0;
 434 
 435         push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 436         pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 437         fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 438                    !attr->dest_chain);
 439 
 440         mutex_lock(&esw->state_lock);
 441 
 442         err = esw_add_vlan_action_check(attr, push, pop, fwd);
 443         if (err)
 444                 goto unlock;
 445 
 446         attr->vlan_handled = false;
 447 
 448         vport = esw_vlan_action_get_vport(attr, push, pop);
 449 
 450         if (!push && !pop && fwd) {
 451                 /* tracks VF --> wire rules without vlan push action */
 452                 if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
 453                         vport->vlan_refcount++;
 454                         attr->vlan_handled = true;
 455                 }
 456 
 457                 goto unlock;
 458         }
 459 
 460         if (!push && !pop)
 461                 goto unlock;
 462 
 463         if (!(offloads->vlan_push_pop_refcount)) {
 464                 /* it's the 1st vlan rule, apply global vlan pop policy */
 465                 err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
 466                 if (err)
 467                         goto out;
 468         }
 469         offloads->vlan_push_pop_refcount++;
 470 
 471         if (push) {
 472                 if (vport->vlan_refcount)
 473                         goto skip_set_push;
 474 
 475                 err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
 476                                                     SET_VLAN_INSERT | SET_VLAN_STRIP);
 477                 if (err)
 478                         goto out;
 479                 vport->vlan = attr->vlan_vid[0];
 480 skip_set_push:
 481                 vport->vlan_refcount++;
 482         }
 483 out:
 484         if (!err)
 485                 attr->vlan_handled = true;
 486 unlock:
 487         mutex_unlock(&esw->state_lock);
 488         return err;
 489 }
 490 
 491 int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 492                                  struct mlx5_esw_flow_attr *attr)
 493 {
 494         struct offloads_fdb *offloads = &esw->fdb_table.offloads;
 495         struct mlx5_eswitch_rep *vport = NULL;
 496         bool push, pop, fwd;
 497         int err = 0;
 498 
 499         /* nop if we're on the vlan push/pop non emulation mode */
 500         if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 501                 return 0;
 502 
 503         if (!attr->vlan_handled)
 504                 return 0;
 505 
 506         push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 507         pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 508         fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
 509 
 510         mutex_lock(&esw->state_lock);
 511 
 512         vport = esw_vlan_action_get_vport(attr, push, pop);
 513 
 514         if (!push && !pop && fwd) {
 515                 /* tracks VF --> wire rules without vlan push action */
 516                 if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
 517                         vport->vlan_refcount--;
 518 
 519                 goto out;
 520         }
 521 
 522         if (push) {
 523                 vport->vlan_refcount--;
 524                 if (vport->vlan_refcount)
 525                         goto skip_unset_push;
 526 
 527                 vport->vlan = 0;
 528                 err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
 529                                                     0, 0, SET_VLAN_STRIP);
 530                 if (err)
 531                         goto out;
 532         }
 533 
 534 skip_unset_push:
 535         offloads->vlan_push_pop_refcount--;
 536         if (offloads->vlan_push_pop_refcount)
 537                 goto out;
 538 
 539         /* no more vlan rules, stop global vlan pop policy */
 540         err = esw_set_global_vlan_pop(esw, 0);
 541 
 542 out:
 543         mutex_unlock(&esw->state_lock);
 544         return err;
 545 }
 546 
 547 struct mlx5_flow_handle *
 548 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
 549                                     u32 sqn)
 550 {
 551         struct mlx5_flow_act flow_act = {0};
 552         struct mlx5_flow_destination dest = {};
 553         struct mlx5_flow_handle *flow_rule;
 554         struct mlx5_flow_spec *spec;
 555         void *misc;
 556 
 557         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 558         if (!spec) {
 559                 flow_rule = ERR_PTR(-ENOMEM);
 560                 goto out;
 561         }
 562 
 563         misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 564         MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
 565         /* source vport is the esw manager */
 566         MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
 567 
 568         misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 569         MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
 570         MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 571 
 572         spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 573         dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 574         dest.vport.num = vport;
 575         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 576 
 577         flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
 578                                         &flow_act, &dest, 1);
 579         if (IS_ERR(flow_rule))
 580                 esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 581 out:
 582         kvfree(spec);
 583         return flow_rule;
 584 }
 585 EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 586 
 587 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 588 {
 589         mlx5_del_flow_rules(rule);
 590 }
 591 
 592 static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
 593 {
 594         u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
 595         u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
 596         u8 fdb_to_vport_reg_c_id;
 597         int err;
 598 
 599         if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
 600                 return 0;
 601 
 602         err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
 603                                                    out, sizeof(out));
 604         if (err)
 605                 return err;
 606 
 607         fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
 608                                          esw_vport_context.fdb_to_vport_reg_c_id);
 609 
 610         if (enable)
 611                 fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
 612         else
 613                 fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
 614 
 615         MLX5_SET(modify_esw_vport_context_in, in,
 616                  esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
 617 
 618         MLX5_SET(modify_esw_vport_context_in, in,
 619                  field_select.fdb_to_vport_reg_c_id, 1);
 620 
 621         return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
 622                                                      in, sizeof(in));
 623 }
 624 
 625 static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
 626                                   struct mlx5_core_dev *peer_dev,
 627                                   struct mlx5_flow_spec *spec,
 628                                   struct mlx5_flow_destination *dest)
 629 {
 630         void *misc;
 631 
 632         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
 633                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 634                                     misc_parameters_2);
 635                 MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
 636 
 637                 spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
 638         } else {
 639                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 640                                     misc_parameters);
 641 
 642                 MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
 643                          MLX5_CAP_GEN(peer_dev, vhca_id));
 644 
 645                 spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 646 
 647                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 648                                     misc_parameters);
 649                 MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 650                 MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 651                                  source_eswitch_owner_vhca_id);
 652         }
 653 
 654         dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 655         dest->vport.num = peer_dev->priv.eswitch->manager_vport;
 656         dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
 657         dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 658 }
 659 
 660 static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
 661                                                struct mlx5_eswitch *peer_esw,
 662                                                struct mlx5_flow_spec *spec,
 663                                                u16 vport)
 664 {
 665         void *misc;
 666 
 667         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
 668                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 669                                     misc_parameters_2);
 670                 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
 671                          mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
 672                                                                    vport));
 673         } else {
 674                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 675                                     misc_parameters);
 676                 MLX5_SET(fte_match_set_misc, misc, source_port, vport);
 677         }
 678 }
 679 
 680 static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 681                                        struct mlx5_core_dev *peer_dev)
 682 {
 683         struct mlx5_flow_destination dest = {};
 684         struct mlx5_flow_act flow_act = {0};
 685         struct mlx5_flow_handle **flows;
 686         struct mlx5_flow_handle *flow;
 687         struct mlx5_flow_spec *spec;
 688         /* total vports is the same for both e-switches */
 689         int nvports = esw->total_vports;
 690         void *misc;
 691         int err, i;
 692 
 693         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 694         if (!spec)
 695                 return -ENOMEM;
 696 
 697         peer_miss_rules_setup(esw, peer_dev, spec, &dest);
 698 
 699         flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
 700         if (!flows) {
 701                 err = -ENOMEM;
 702                 goto alloc_flows_err;
 703         }
 704 
 705         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 706         misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 707                             misc_parameters);
 708 
 709         if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
 710                 esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
 711                                                    spec, MLX5_VPORT_PF);
 712 
 713                 flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 714                                            spec, &flow_act, &dest, 1);
 715                 if (IS_ERR(flow)) {
 716                         err = PTR_ERR(flow);
 717                         goto add_pf_flow_err;
 718                 }
 719                 flows[MLX5_VPORT_PF] = flow;
 720         }
 721 
 722         if (mlx5_ecpf_vport_exists(esw->dev)) {
 723                 MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
 724                 flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 725                                            spec, &flow_act, &dest, 1);
 726                 if (IS_ERR(flow)) {
 727                         err = PTR_ERR(flow);
 728                         goto add_ecpf_flow_err;
 729                 }
 730                 flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
 731         }
 732 
 733         mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
 734                 esw_set_peer_miss_rule_source_port(esw,
 735                                                    peer_dev->priv.eswitch,
 736                                                    spec, i);
 737 
 738                 flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 739                                            spec, &flow_act, &dest, 1);
 740                 if (IS_ERR(flow)) {
 741                         err = PTR_ERR(flow);
 742                         goto add_vf_flow_err;
 743                 }
 744                 flows[i] = flow;
 745         }
 746 
 747         esw->fdb_table.offloads.peer_miss_rules = flows;
 748 
 749         kvfree(spec);
 750         return 0;
 751 
 752 add_vf_flow_err:
 753         nvports = --i;
 754         mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
 755                 mlx5_del_flow_rules(flows[i]);
 756 
 757         if (mlx5_ecpf_vport_exists(esw->dev))
 758                 mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
 759 add_ecpf_flow_err:
 760         if (mlx5_core_is_ecpf_esw_manager(esw->dev))
 761                 mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
 762 add_pf_flow_err:
 763         esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
 764         kvfree(flows);
 765 alloc_flows_err:
 766         kvfree(spec);
 767         return err;
 768 }
 769 
 770 static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
 771 {
 772         struct mlx5_flow_handle **flows;
 773         int i;
 774 
 775         flows = esw->fdb_table.offloads.peer_miss_rules;
 776 
 777         mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
 778                                                mlx5_core_max_vfs(esw->dev))
 779                 mlx5_del_flow_rules(flows[i]);
 780 
 781         if (mlx5_ecpf_vport_exists(esw->dev))
 782                 mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
 783 
 784         if (mlx5_core_is_ecpf_esw_manager(esw->dev))
 785                 mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
 786 
 787         kvfree(flows);
 788 }
 789 
 790 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 791 {
 792         struct mlx5_flow_act flow_act = {0};
 793         struct mlx5_flow_destination dest = {};
 794         struct mlx5_flow_handle *flow_rule = NULL;
 795         struct mlx5_flow_spec *spec;
 796         void *headers_c;
 797         void *headers_v;
 798         int err = 0;
 799         u8 *dmac_c;
 800         u8 *dmac_v;
 801 
 802         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 803         if (!spec) {
 804                 err = -ENOMEM;
 805                 goto out;
 806         }
 807 
 808         spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 809         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 810                                  outer_headers);
 811         dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
 812                               outer_headers.dmac_47_16);
 813         dmac_c[0] = 0x01;
 814 
 815         dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 816         dest.vport.num = esw->manager_vport;
 817         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 818 
 819         flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
 820                                         &flow_act, &dest, 1);
 821         if (IS_ERR(flow_rule)) {
 822                 err = PTR_ERR(flow_rule);
 823                 esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
 824                 goto out;
 825         }
 826 
 827         esw->fdb_table.offloads.miss_rule_uni = flow_rule;
 828 
 829         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 830                                  outer_headers);
 831         dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
 832                               outer_headers.dmac_47_16);
 833         dmac_v[0] = 0x01;
 834         flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
 835                                         &flow_act, &dest, 1);
 836         if (IS_ERR(flow_rule)) {
 837                 err = PTR_ERR(flow_rule);
 838                 esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
 839                 mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
 840                 goto out;
 841         }
 842 
 843         esw->fdb_table.offloads.miss_rule_multi = flow_rule;
 844 
 845 out:
 846         kvfree(spec);
 847         return err;
 848 }
 849 
 850 #define ESW_OFFLOADS_NUM_GROUPS  4
 851 
 852 /* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
 853  * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
 854  * for each flow table pool. We can allocate up to 16M of each pool,
 855  * and we keep track of how much we used via put/get_sz_to_pool.
 856  * Firmware doesn't report any of this for now.
 857  * ESW_POOL is expected to be sorted from large to small
 858  */
 859 #define ESW_SIZE (16 * 1024 * 1024)
 860 const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
 861                                     64 * 1024, 128 };
 862 
 863 static int
 864 get_sz_from_pool(struct mlx5_eswitch *esw)
 865 {
 866         int sz = 0, i;
 867 
 868         for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
 869                 if (esw->fdb_table.offloads.fdb_left[i]) {
 870                         --esw->fdb_table.offloads.fdb_left[i];
 871                         sz = ESW_POOLS[i];
 872                         break;
 873                 }
 874         }
 875 
 876         return sz;
 877 }
 878 
 879 static void
 880 put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
 881 {
 882         int i;
 883 
 884         for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
 885                 if (sz >= ESW_POOLS[i]) {
 886                         ++esw->fdb_table.offloads.fdb_left[i];
 887                         break;
 888                 }
 889         }
 890 }
 891 
 892 static struct mlx5_flow_table *
 893 create_next_size_table(struct mlx5_eswitch *esw,
 894                        struct mlx5_flow_namespace *ns,
 895                        u16 table_prio,
 896                        int level,
 897                        u32 flags)
 898 {
 899         struct mlx5_flow_table *fdb;
 900         int sz;
 901 
 902         sz = get_sz_from_pool(esw);
 903         if (!sz)
 904                 return ERR_PTR(-ENOSPC);
 905 
 906         fdb = mlx5_create_auto_grouped_flow_table(ns,
 907                                                   table_prio,
 908                                                   sz,
 909                                                   ESW_OFFLOADS_NUM_GROUPS,
 910                                                   level,
 911                                                   flags);
 912         if (IS_ERR(fdb)) {
 913                 esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
 914                          (int)PTR_ERR(fdb), table_prio, level, sz);
 915                 put_sz_to_pool(esw, sz);
 916         }
 917 
 918         return fdb;
 919 }
 920 
 921 static struct mlx5_flow_table *
 922 esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
 923 {
 924         struct mlx5_core_dev *dev = esw->dev;
 925         struct mlx5_flow_table *fdb = NULL;
 926         struct mlx5_flow_namespace *ns;
 927         int table_prio, l = 0;
 928         u32 flags = 0;
 929 
 930         if (chain == FDB_SLOW_PATH_CHAIN)
 931                 return esw->fdb_table.offloads.slow_fdb;
 932 
 933         mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
 934 
 935         fdb = fdb_prio_table(esw, chain, prio, level).fdb;
 936         if (fdb) {
 937                 /* take ref on earlier levels as well */
 938                 while (level >= 0)
 939                         fdb_prio_table(esw, chain, prio, level--).num_rules++;
 940                 mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 941                 return fdb;
 942         }
 943 
 944         ns = mlx5_get_fdb_sub_ns(dev, chain);
 945         if (!ns) {
 946                 esw_warn(dev, "Failed to get FDB sub namespace\n");
 947                 mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 948                 return ERR_PTR(-EOPNOTSUPP);
 949         }
 950 
 951         if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
 952                 flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
 953                           MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 954 
 955         table_prio = (chain * FDB_MAX_PRIO) + prio - 1;
 956 
 957         /* create earlier levels for correct fs_core lookup when
 958          * connecting tables
 959          */
 960         for (l = 0; l <= level; l++) {
 961                 if (fdb_prio_table(esw, chain, prio, l).fdb) {
 962                         fdb_prio_table(esw, chain, prio, l).num_rules++;
 963                         continue;
 964                 }
 965 
 966                 fdb = create_next_size_table(esw, ns, table_prio, l, flags);
 967                 if (IS_ERR(fdb)) {
 968                         l--;
 969                         goto err_create_fdb;
 970                 }
 971 
 972                 fdb_prio_table(esw, chain, prio, l).fdb = fdb;
 973                 fdb_prio_table(esw, chain, prio, l).num_rules = 1;
 974         }
 975 
 976         mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 977         return fdb;
 978 
 979 err_create_fdb:
 980         mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 981         if (l >= 0)
 982                 esw_put_prio_table(esw, chain, prio, l);
 983 
 984         return fdb;
 985 }
 986 
 987 static void
 988 esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
 989 {
 990         int l;
 991 
 992         if (chain == FDB_SLOW_PATH_CHAIN)
 993                 return;
 994 
 995         mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
 996 
 997         for (l = level; l >= 0; l--) {
 998                 if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
 999                         continue;
1000 
1001                 put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
1002                 mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
1003                 fdb_prio_table(esw, chain, prio, l).fdb = NULL;
1004         }
1005 
1006         mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
1007 }
1008 
1009 static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
1010 {
1011         /* If lazy creation isn't supported, deref the fast path tables */
1012         if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
1013                 esw_put_prio_table(esw, 0, 1, 1);
1014                 esw_put_prio_table(esw, 0, 1, 0);
1015         }
1016 }
1017 
1018 #define MAX_PF_SQ 256
1019 #define MAX_SQ_NVPORTS 32
1020 
1021 static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
1022                                            u32 *flow_group_in)
1023 {
1024         void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1025                                             flow_group_in,
1026                                             match_criteria);
1027 
1028         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1029                 MLX5_SET(create_flow_group_in, flow_group_in,
1030                          match_criteria_enable,
1031                          MLX5_MATCH_MISC_PARAMETERS_2);
1032 
1033                 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1034                                  misc_parameters_2.metadata_reg_c_0);
1035         } else {
1036                 MLX5_SET(create_flow_group_in, flow_group_in,
1037                          match_criteria_enable,
1038                          MLX5_MATCH_MISC_PARAMETERS);
1039 
1040                 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1041                                  misc_parameters.source_port);
1042         }
1043 }
1044 
1045 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
1046 {
1047         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1048         struct mlx5_flow_table_attr ft_attr = {};
1049         struct mlx5_core_dev *dev = esw->dev;
1050         u32 *flow_group_in, max_flow_counter;
1051         struct mlx5_flow_namespace *root_ns;
1052         struct mlx5_flow_table *fdb = NULL;
1053         int table_size, ix, err = 0, i;
1054         struct mlx5_flow_group *g;
1055         u32 flags = 0, fdb_max;
1056         void *match_criteria;
1057         u8 *dmac;
1058 
1059         esw_debug(esw->dev, "Create offloads FDB Tables\n");
1060         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1061         if (!flow_group_in)
1062                 return -ENOMEM;
1063 
1064         root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
1065         if (!root_ns) {
1066                 esw_warn(dev, "Failed to get FDB flow namespace\n");
1067                 err = -EOPNOTSUPP;
1068                 goto ns_err;
1069         }
1070         esw->fdb_table.offloads.ns = root_ns;
1071         err = mlx5_flow_namespace_set_mode(root_ns,
1072                                            esw->dev->priv.steering->mode);
1073         if (err) {
1074                 esw_warn(dev, "Failed to set FDB namespace steering mode\n");
1075                 goto ns_err;
1076         }
1077 
1078         max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
1079                             MLX5_CAP_GEN(dev, max_flow_counter_15_0);
1080         fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
1081 
1082         esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n",
1083                   MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
1084                   max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
1085                   fdb_max);
1086 
1087         for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
1088                 esw->fdb_table.offloads.fdb_left[i] =
1089                         ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
1090 
1091         table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
1092                 MLX5_ESW_MISS_FLOWS + esw->total_vports;
1093 
1094         /* create the slow path fdb with encap set, so further table instances
1095          * can be created at run time while VFs are probed if the FW allows that.
1096          */
1097         if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
1098                 flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
1099                           MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
1100 
1101         ft_attr.flags = flags;
1102         ft_attr.max_fte = table_size;
1103         ft_attr.prio = FDB_SLOW_PATH;
1104 
1105         fdb = mlx5_create_flow_table(root_ns, &ft_attr);
1106         if (IS_ERR(fdb)) {
1107                 err = PTR_ERR(fdb);
1108                 esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
1109                 goto slow_fdb_err;
1110         }
1111         esw->fdb_table.offloads.slow_fdb = fdb;
1112 
1113         /* If lazy creation isn't supported, open the fast path tables now */
1114         if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
1115             esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
1116                 esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
1117                 esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
1118                 esw_get_prio_table(esw, 0, 1, 0);
1119                 esw_get_prio_table(esw, 0, 1, 1);
1120         } else {
1121                 esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
1122                 esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
1123         }
1124 
1125         /* create send-to-vport group */
1126         MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1127                  MLX5_MATCH_MISC_PARAMETERS);
1128 
1129         match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
1130 
1131         MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
1132         MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
1133 
1134         ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
1135         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1136         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
1137 
1138         g = mlx5_create_flow_group(fdb, flow_group_in);
1139         if (IS_ERR(g)) {
1140                 err = PTR_ERR(g);
1141                 esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
1142                 goto send_vport_err;
1143         }
1144         esw->fdb_table.offloads.send_to_vport_grp = g;
1145 
1146         /* create peer esw miss group */
1147         memset(flow_group_in, 0, inlen);
1148 
1149         esw_set_flow_group_source_port(esw, flow_group_in);
1150 
1151         if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1152                 match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1153                                               flow_group_in,
1154                                               match_criteria);
1155 
1156                 MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1157                                  misc_parameters.source_eswitch_owner_vhca_id);
1158 
1159                 MLX5_SET(create_flow_group_in, flow_group_in,
1160                          source_eswitch_owner_vhca_id_valid, 1);
1161         }
1162 
1163         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
1164         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1165                  ix + esw->total_vports - 1);
1166         ix += esw->total_vports;
1167 
1168         g = mlx5_create_flow_group(fdb, flow_group_in);
1169         if (IS_ERR(g)) {
1170                 err = PTR_ERR(g);
1171                 esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
1172                 goto peer_miss_err;
1173         }
1174         esw->fdb_table.offloads.peer_miss_grp = g;
1175 
1176         /* create miss group */
1177         memset(flow_group_in, 0, inlen);
1178         MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1179                  MLX5_MATCH_OUTER_HEADERS);
1180         match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1181                                       match_criteria);
1182         dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
1183                             outer_headers.dmac_47_16);
1184         dmac[0] = 0x01;
1185 
1186         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
1187         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1188                  ix + MLX5_ESW_MISS_FLOWS);
1189 
1190         g = mlx5_create_flow_group(fdb, flow_group_in);
1191         if (IS_ERR(g)) {
1192                 err = PTR_ERR(g);
1193                 esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
1194                 goto miss_err;
1195         }
1196         esw->fdb_table.offloads.miss_grp = g;
1197 
1198         err = esw_add_fdb_miss_rule(esw);
1199         if (err)
1200                 goto miss_rule_err;
1201 
1202         esw->nvports = nvports;
1203         kvfree(flow_group_in);
1204         return 0;
1205 
1206 miss_rule_err:
1207         mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
1208 miss_err:
1209         mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
1210 peer_miss_err:
1211         mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
1212 send_vport_err:
1213         esw_destroy_offloads_fast_fdb_tables(esw);
1214         mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
1215 slow_fdb_err:
1216         /* Holds true only as long as DMFS is the default */
1217         mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS);
1218 ns_err:
1219         kvfree(flow_group_in);
1220         return err;
1221 }
1222 
1223 static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
1224 {
1225         if (!esw->fdb_table.offloads.slow_fdb)
1226                 return;
1227 
1228         esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
1229         mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
1230         mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
1231         mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
1232         mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
1233         mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
1234 
1235         mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
1236         esw_destroy_offloads_fast_fdb_tables(esw);
1237         /* Holds true only as long as DMFS is the default */
1238         mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
1239                                      MLX5_FLOW_STEERING_MODE_DMFS);
1240 }
1241 
1242 static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
1243 {
1244         struct mlx5_flow_table_attr ft_attr = {};
1245         struct mlx5_core_dev *dev = esw->dev;
1246         struct mlx5_flow_table *ft_offloads;
1247         struct mlx5_flow_namespace *ns;
1248         int err = 0;
1249 
1250         ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
1251         if (!ns) {
1252                 esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
1253                 return -EOPNOTSUPP;
1254         }
1255 
1256         ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
1257 
1258         ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
1259         if (IS_ERR(ft_offloads)) {
1260                 err = PTR_ERR(ft_offloads);
1261                 esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
1262                 return err;
1263         }
1264 
1265         esw->offloads.ft_offloads = ft_offloads;
1266         return 0;
1267 }
1268 
1269 static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
1270 {
1271         struct mlx5_esw_offload *offloads = &esw->offloads;
1272 
1273         mlx5_destroy_flow_table(offloads->ft_offloads);
1274 }
1275 
1276 static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
1277 {
1278         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1279         struct mlx5_flow_group *g;
1280         u32 *flow_group_in;
1281         int err = 0;
1282 
1283         nvports = nvports + MLX5_ESW_MISS_FLOWS;
1284         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1285         if (!flow_group_in)
1286                 return -ENOMEM;
1287 
1288         /* create vport rx group */
1289         esw_set_flow_group_source_port(esw, flow_group_in);
1290 
1291         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1292         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
1293 
1294         g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
1295 
1296         if (IS_ERR(g)) {
1297                 err = PTR_ERR(g);
1298                 mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
1299                 goto out;
1300         }
1301 
1302         esw->offloads.vport_rx_group = g;
1303 out:
1304         kvfree(flow_group_in);
1305         return err;
1306 }
1307 
1308 static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
1309 {
1310         mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
1311 }
1312 
1313 struct mlx5_flow_handle *
1314 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
1315                                   struct mlx5_flow_destination *dest)
1316 {
1317         struct mlx5_flow_act flow_act = {0};
1318         struct mlx5_flow_handle *flow_rule;
1319         struct mlx5_flow_spec *spec;
1320         void *misc;
1321 
1322         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1323         if (!spec) {
1324                 flow_rule = ERR_PTR(-ENOMEM);
1325                 goto out;
1326         }
1327 
1328         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1329                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
1330                 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1331                          mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
1332 
1333                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
1334                 MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
1335 
1336                 spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
1337         } else {
1338                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
1339                 MLX5_SET(fte_match_set_misc, misc, source_port, vport);
1340 
1341                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
1342                 MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1343 
1344                 spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
1345         }
1346 
1347         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1348         flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
1349                                         &flow_act, dest, 1);
1350         if (IS_ERR(flow_rule)) {
1351                 esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
1352                 goto out;
1353         }
1354 
1355 out:
1356         kvfree(spec);
1357         return flow_rule;
1358 }
1359 
1360 static int esw_offloads_start(struct mlx5_eswitch *esw,
1361                               struct netlink_ext_ack *extack)
1362 {
1363         int err, err1;
1364 
1365         if (esw->mode != MLX5_ESWITCH_LEGACY &&
1366             !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1367                 NL_SET_ERR_MSG_MOD(extack,
1368                                    "Can't set offloads mode, SRIOV legacy not enabled");
1369                 return -EINVAL;
1370         }
1371 
1372         mlx5_eswitch_disable(esw);
1373         mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
1374         err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
1375         if (err) {
1376                 NL_SET_ERR_MSG_MOD(extack,
1377                                    "Failed setting eswitch to offloads");
1378                 err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
1379                 if (err1) {
1380                         NL_SET_ERR_MSG_MOD(extack,
1381                                            "Failed setting eswitch back to legacy");
1382                 }
1383         }
1384         if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
1385                 if (mlx5_eswitch_inline_mode_get(esw,
1386                                                  &esw->offloads.inline_mode)) {
1387                         esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
1388                         NL_SET_ERR_MSG_MOD(extack,
1389                                            "Inline mode is different between vports");
1390                 }
1391         }
1392         return err;
1393 }
1394 
1395 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
1396 {
1397         kfree(esw->offloads.vport_reps);
1398 }
1399 
1400 int esw_offloads_init_reps(struct mlx5_eswitch *esw)
1401 {
1402         int total_vports = esw->total_vports;
1403         struct mlx5_eswitch_rep *rep;
1404         int vport_index;
1405         u8 rep_type;
1406 
1407         esw->offloads.vport_reps = kcalloc(total_vports,
1408                                            sizeof(struct mlx5_eswitch_rep),
1409                                            GFP_KERNEL);
1410         if (!esw->offloads.vport_reps)
1411                 return -ENOMEM;
1412 
1413         mlx5_esw_for_all_reps(esw, vport_index, rep) {
1414                 rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
1415                 rep->vport_index = vport_index;
1416 
1417                 for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
1418                         atomic_set(&rep->rep_data[rep_type].state,
1419                                    REP_UNREGISTERED);
1420         }
1421 
1422         return 0;
1423 }
1424 
1425 static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
1426                                       struct mlx5_eswitch_rep *rep, u8 rep_type)
1427 {
1428         if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
1429                            REP_LOADED, REP_REGISTERED) == REP_LOADED)
1430                 esw->offloads.rep_ops[rep_type]->unload(rep);
1431 }
1432 
1433 static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
1434 {
1435         struct mlx5_eswitch_rep *rep;
1436 
1437         if (mlx5_ecpf_vport_exists(esw->dev)) {
1438                 rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
1439                 __esw_offloads_unload_rep(esw, rep, rep_type);
1440         }
1441 
1442         if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1443                 rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
1444                 __esw_offloads_unload_rep(esw, rep, rep_type);
1445         }
1446 
1447         rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
1448         __esw_offloads_unload_rep(esw, rep, rep_type);
1449 }
1450 
1451 static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
1452                                    u8 rep_type)
1453 {
1454         struct mlx5_eswitch_rep *rep;
1455         int i;
1456 
1457         mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports)
1458                 __esw_offloads_unload_rep(esw, rep, rep_type);
1459 }
1460 
1461 static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
1462 {
1463         u8 rep_type = NUM_REP_TYPES;
1464 
1465         while (rep_type-- > 0)
1466                 __unload_reps_vf_vport(esw, nvports, rep_type);
1467 }
1468 
1469 static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
1470 {
1471         __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
1472 
1473         /* Special vports must be the last to unload. */
1474         __unload_reps_special_vport(esw, rep_type);
1475 }
1476 
1477 static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
1478 {
1479         u8 rep_type = NUM_REP_TYPES;
1480 
1481         while (rep_type-- > 0)
1482                 __unload_reps_all_vport(esw, rep_type);
1483 }
1484 
1485 static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
1486                                    struct mlx5_eswitch_rep *rep, u8 rep_type)
1487 {
1488         int err = 0;
1489 
1490         if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
1491                            REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
1492                 err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
1493                 if (err)
1494                         atomic_set(&rep->rep_data[rep_type].state,
1495                                    REP_REGISTERED);
1496         }
1497 
1498         return err;
1499 }
1500 
1501 static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
1502 {
1503         struct mlx5_eswitch_rep *rep;
1504         int err;
1505 
1506         rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
1507         err = __esw_offloads_load_rep(esw, rep, rep_type);
1508         if (err)
1509                 return err;
1510 
1511         if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1512                 rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
1513                 err = __esw_offloads_load_rep(esw, rep, rep_type);
1514                 if (err)
1515                         goto err_pf;
1516         }
1517 
1518         if (mlx5_ecpf_vport_exists(esw->dev)) {
1519                 rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
1520                 err = __esw_offloads_load_rep(esw, rep, rep_type);
1521                 if (err)
1522                         goto err_ecpf;
1523         }
1524 
1525         return 0;
1526 
1527 err_ecpf:
1528         if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1529                 rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
1530                 __esw_offloads_unload_rep(esw, rep, rep_type);
1531         }
1532 
1533 err_pf:
1534         rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
1535         __esw_offloads_unload_rep(esw, rep, rep_type);
1536         return err;
1537 }
1538 
1539 static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
1540                                 u8 rep_type)
1541 {
1542         struct mlx5_eswitch_rep *rep;
1543         int err, i;
1544 
1545         mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) {
1546                 err = __esw_offloads_load_rep(esw, rep, rep_type);
1547                 if (err)
1548                         goto err_vf;
1549         }
1550 
1551         return 0;
1552 
1553 err_vf:
1554         __unload_reps_vf_vport(esw, --i, rep_type);
1555         return err;
1556 }
1557 
1558 static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
1559 {
1560         int err;
1561 
1562         /* Special vports must be loaded first, uplink rep creates mdev resource. */
1563         err = __load_reps_special_vport(esw, rep_type);
1564         if (err)
1565                 return err;
1566 
1567         err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
1568         if (err)
1569                 goto err_vfs;
1570 
1571         return 0;
1572 
1573 err_vfs:
1574         __unload_reps_special_vport(esw, rep_type);
1575         return err;
1576 }
1577 
1578 static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
1579 {
1580         u8 rep_type = 0;
1581         int err;
1582 
1583         for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
1584                 err = __load_reps_vf_vport(esw, nvports, rep_type);
1585                 if (err)
1586                         goto err_reps;
1587         }
1588 
1589         return err;
1590 
1591 err_reps:
1592         while (rep_type-- > 0)
1593                 __unload_reps_vf_vport(esw, nvports, rep_type);
1594         return err;
1595 }
1596 
1597 static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
1598 {
1599         u8 rep_type = 0;
1600         int err;
1601 
1602         for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
1603                 err = __load_reps_all_vport(esw, rep_type);
1604                 if (err)
1605                         goto err_reps;
1606         }
1607 
1608         return err;
1609 
1610 err_reps:
1611         while (rep_type-- > 0)
1612                 __unload_reps_all_vport(esw, rep_type);
1613         return err;
1614 }
1615 
1616 #define ESW_OFFLOADS_DEVCOM_PAIR        (0)
1617 #define ESW_OFFLOADS_DEVCOM_UNPAIR      (1)
1618 
1619 static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
1620                                   struct mlx5_eswitch *peer_esw)
1621 {
1622         int err;
1623 
1624         err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
1625         if (err)
1626                 return err;
1627 
1628         return 0;
1629 }
1630 
1631 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
1632 {
1633         mlx5e_tc_clean_fdb_peer_flows(esw);
1634         esw_del_fdb_peer_miss_rules(esw);
1635 }
1636 
1637 static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
1638                                          struct mlx5_eswitch *peer_esw,
1639                                          bool pair)
1640 {
1641         struct mlx5_flow_root_namespace *peer_ns;
1642         struct mlx5_flow_root_namespace *ns;
1643         int err;
1644 
1645         peer_ns = peer_esw->dev->priv.steering->fdb_root_ns;
1646         ns = esw->dev->priv.steering->fdb_root_ns;
1647 
1648         if (pair) {
1649                 err = mlx5_flow_namespace_set_peer(ns, peer_ns);
1650                 if (err)
1651                         return err;
1652 
1653                 err = mlx5_flow_namespace_set_peer(peer_ns, ns);
1654                 if (err) {
1655                         mlx5_flow_namespace_set_peer(ns, NULL);
1656                         return err;
1657                 }
1658         } else {
1659                 mlx5_flow_namespace_set_peer(ns, NULL);
1660                 mlx5_flow_namespace_set_peer(peer_ns, NULL);
1661         }
1662 
1663         return 0;
1664 }
1665 
1666 static int mlx5_esw_offloads_devcom_event(int event,
1667                                           void *my_data,
1668                                           void *event_data)
1669 {
1670         struct mlx5_eswitch *esw = my_data;
1671         struct mlx5_devcom *devcom = esw->dev->priv.devcom;
1672         struct mlx5_eswitch *peer_esw = event_data;
1673         int err;
1674 
1675         switch (event) {
1676         case ESW_OFFLOADS_DEVCOM_PAIR:
1677                 if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
1678                     mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
1679                         break;
1680 
1681                 err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
1682                 if (err)
1683                         goto err_out;
1684                 err = mlx5_esw_offloads_pair(esw, peer_esw);
1685                 if (err)
1686                         goto err_peer;
1687 
1688                 err = mlx5_esw_offloads_pair(peer_esw, esw);
1689                 if (err)
1690                         goto err_pair;
1691 
1692                 mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
1693                 break;
1694 
1695         case ESW_OFFLOADS_DEVCOM_UNPAIR:
1696                 if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
1697                         break;
1698 
1699                 mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
1700                 mlx5_esw_offloads_unpair(peer_esw);
1701                 mlx5_esw_offloads_unpair(esw);
1702                 mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
1703                 break;
1704         }
1705 
1706         return 0;
1707 
1708 err_pair:
1709         mlx5_esw_offloads_unpair(esw);
1710 err_peer:
1711         mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
1712 err_out:
1713         mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
1714                       event, err);
1715         return err;
1716 }
1717 
1718 static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
1719 {
1720         struct mlx5_devcom *devcom = esw->dev->priv.devcom;
1721 
1722         INIT_LIST_HEAD(&esw->offloads.peer_flows);
1723         mutex_init(&esw->offloads.peer_mutex);
1724 
1725         if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
1726                 return;
1727 
1728         mlx5_devcom_register_component(devcom,
1729                                        MLX5_DEVCOM_ESW_OFFLOADS,
1730                                        mlx5_esw_offloads_devcom_event,
1731                                        esw);
1732 
1733         mlx5_devcom_send_event(devcom,
1734                                MLX5_DEVCOM_ESW_OFFLOADS,
1735                                ESW_OFFLOADS_DEVCOM_PAIR, esw);
1736 }
1737 
1738 static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
1739 {
1740         struct mlx5_devcom *devcom = esw->dev->priv.devcom;
1741 
1742         if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
1743                 return;
1744 
1745         mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
1746                                ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
1747 
1748         mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1749 }
1750 
1751 static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
1752                                              struct mlx5_vport *vport)
1753 {
1754         struct mlx5_flow_act flow_act = {0};
1755         struct mlx5_flow_spec *spec;
1756         int err = 0;
1757 
1758         /* For prio tag mode, there is only 1 FTEs:
1759          * 1) Untagged packets - push prio tag VLAN and modify metadata if
1760          * required, allow
1761          * Unmatched traffic is allowed by default
1762          */
1763 
1764         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1765         if (!spec) {
1766                 err = -ENOMEM;
1767                 goto out_no_mem;
1768         }
1769 
1770         /* Untagged packets - push prio tag VLAN, allow */
1771         MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
1772         MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
1773         spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1774         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
1775                           MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1776         flow_act.vlan[0].ethtype = ETH_P_8021Q;
1777         flow_act.vlan[0].vid = 0;
1778         flow_act.vlan[0].prio = 0;
1779 
1780         if (vport->ingress.modify_metadata_rule) {
1781                 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1782                 flow_act.modify_hdr = vport->ingress.modify_metadata;
1783         }
1784 
1785         vport->ingress.allow_rule =
1786                 mlx5_add_flow_rules(vport->ingress.acl, spec,
1787                                     &flow_act, NULL, 0);
1788         if (IS_ERR(vport->ingress.allow_rule)) {
1789                 err = PTR_ERR(vport->ingress.allow_rule);
1790                 esw_warn(esw->dev,
1791                          "vport[%d] configure ingress untagged allow rule, err(%d)\n",
1792                          vport->vport, err);
1793                 vport->ingress.allow_rule = NULL;
1794                 goto out;
1795         }
1796 
1797 out:
1798         kvfree(spec);
1799 out_no_mem:
1800         if (err)
1801                 esw_vport_cleanup_ingress_rules(esw, vport);
1802         return err;
1803 }
1804 
1805 static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
1806                                                      struct mlx5_vport *vport)
1807 {
1808         u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
1809         static const struct mlx5_flow_spec spec = {};
1810         struct mlx5_flow_act flow_act = {};
1811         int err = 0;
1812 
1813         MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
1814         MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
1815         MLX5_SET(set_action_in, action, data,
1816                  mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
1817 
1818         vport->ingress.modify_metadata =
1819                 mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
1820                                          1, action);
1821         if (IS_ERR(vport->ingress.modify_metadata)) {
1822                 err = PTR_ERR(vport->ingress.modify_metadata);
1823                 esw_warn(esw->dev,
1824                          "failed to alloc modify header for vport %d ingress acl (%d)\n",
1825                          vport->vport, err);
1826                 return err;
1827         }
1828 
1829         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1830         flow_act.modify_hdr = vport->ingress.modify_metadata;
1831         vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
1832                                                                   &spec, &flow_act, NULL, 0);
1833         if (IS_ERR(vport->ingress.modify_metadata_rule)) {
1834                 err = PTR_ERR(vport->ingress.modify_metadata_rule);
1835                 esw_warn(esw->dev,
1836                          "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
1837                          vport->vport, err);
1838                 vport->ingress.modify_metadata_rule = NULL;
1839                 goto out;
1840         }
1841 
1842 out:
1843         if (err)
1844                 mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
1845         return err;
1846 }
1847 
1848 void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
1849                                                struct mlx5_vport *vport)
1850 {
1851         if (vport->ingress.modify_metadata_rule) {
1852                 mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
1853                 mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
1854 
1855                 vport->ingress.modify_metadata_rule = NULL;
1856         }
1857 }
1858 
1859 static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
1860                                             struct mlx5_vport *vport)
1861 {
1862         struct mlx5_flow_act flow_act = {0};
1863         struct mlx5_flow_spec *spec;
1864         int err = 0;
1865 
1866         if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
1867                 return 0;
1868 
1869         /* For prio tag mode, there is only 1 FTEs:
1870          * 1) prio tag packets - pop the prio tag VLAN, allow
1871          * Unmatched traffic is allowed by default
1872          */
1873 
1874         esw_vport_cleanup_egress_rules(esw, vport);
1875 
1876         err = esw_vport_enable_egress_acl(esw, vport);
1877         if (err) {
1878                 mlx5_core_warn(esw->dev,
1879                                "failed to enable egress acl (%d) on vport[%d]\n",
1880                                err, vport->vport);
1881                 return err;
1882         }
1883 
1884         esw_debug(esw->dev,
1885                   "vport[%d] configure prio tag egress rules\n", vport->vport);
1886 
1887         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1888         if (!spec) {
1889                 err = -ENOMEM;
1890                 goto out_no_mem;
1891         }
1892 
1893         /* prio tag vlan rule - pop it so VF receives untagged packets */
1894         MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
1895         MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
1896         MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
1897         MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, 0);
1898 
1899         spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1900         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
1901                           MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1902         vport->egress.allowed_vlan =
1903                 mlx5_add_flow_rules(vport->egress.acl, spec,
1904                                     &flow_act, NULL, 0);
1905         if (IS_ERR(vport->egress.allowed_vlan)) {
1906                 err = PTR_ERR(vport->egress.allowed_vlan);
1907                 esw_warn(esw->dev,
1908                          "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n",
1909                          vport->vport, err);
1910                 vport->egress.allowed_vlan = NULL;
1911                 goto out;
1912         }
1913 
1914 out:
1915         kvfree(spec);
1916 out_no_mem:
1917         if (err)
1918                 esw_vport_cleanup_egress_rules(esw, vport);
1919         return err;
1920 }
1921 
1922 static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
1923                                            struct mlx5_vport *vport)
1924 {
1925         int err;
1926 
1927         if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
1928             !MLX5_CAP_GEN(esw->dev, prio_tag_required))
1929                 return 0;
1930 
1931         esw_vport_cleanup_ingress_rules(esw, vport);
1932 
1933         err = esw_vport_enable_ingress_acl(esw, vport);
1934         if (err) {
1935                 esw_warn(esw->dev,
1936                          "failed to enable ingress acl (%d) on vport[%d]\n",
1937                          err, vport->vport);
1938                 return err;
1939         }
1940 
1941         esw_debug(esw->dev,
1942                   "vport[%d] configure ingress rules\n", vport->vport);
1943 
1944         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1945                 err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
1946                 if (err)
1947                         goto out;
1948         }
1949 
1950         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
1951             mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
1952                 err = esw_vport_ingress_prio_tag_config(esw, vport);
1953                 if (err)
1954                         goto out;
1955         }
1956 
1957 out:
1958         if (err)
1959                 esw_vport_disable_ingress_acl(esw, vport);
1960         return err;
1961 }
1962 
1963 static bool
1964 esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
1965 {
1966         if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
1967                 return false;
1968 
1969         if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
1970               MLX5_FDB_TO_VPORT_REG_C_0))
1971                 return false;
1972 
1973         if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
1974                 return false;
1975 
1976         if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
1977             mlx5_ecpf_vport_exists(esw->dev))
1978                 return false;
1979 
1980         return true;
1981 }
1982 
1983 static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
1984 {
1985         struct mlx5_vport *vport;
1986         int i, j;
1987         int err;
1988 
1989         if (esw_check_vport_match_metadata_supported(esw))
1990                 esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
1991 
1992         mlx5_esw_for_all_vports(esw, i, vport) {
1993                 err = esw_vport_ingress_common_config(esw, vport);
1994                 if (err)
1995                         goto err_ingress;
1996 
1997                 if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
1998                         err = esw_vport_egress_prio_tag_config(esw, vport);
1999                         if (err)
2000                                 goto err_egress;
2001                 }
2002         }
2003 
2004         if (mlx5_eswitch_vport_match_metadata_enabled(esw))
2005                 esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
2006 
2007         return 0;
2008 
2009 err_egress:
2010         esw_vport_disable_ingress_acl(esw, vport);
2011 err_ingress:
2012         for (j = MLX5_VPORT_PF; j < i; j++) {
2013                 vport = &esw->vports[j];
2014                 esw_vport_disable_egress_acl(esw, vport);
2015                 esw_vport_disable_ingress_acl(esw, vport);
2016         }
2017 
2018         return err;
2019 }
2020 
2021 static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
2022 {
2023         struct mlx5_vport *vport;
2024         int i;
2025 
2026         mlx5_esw_for_all_vports(esw, i, vport) {
2027                 esw_vport_disable_egress_acl(esw, vport);
2028                 esw_vport_disable_ingress_acl(esw, vport);
2029         }
2030 
2031         esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
2032 }
2033 
2034 static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
2035 {
2036         int num_vfs = esw->esw_funcs.num_vfs;
2037         int total_vports;
2038         int err;
2039 
2040         if (mlx5_core_is_ecpf_esw_manager(esw->dev))
2041                 total_vports = esw->total_vports;
2042         else
2043                 total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
2044 
2045         memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
2046         mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
2047 
2048         err = esw_create_offloads_acl_tables(esw);
2049         if (err)
2050                 return err;
2051 
2052         err = esw_create_offloads_fdb_tables(esw, total_vports);
2053         if (err)
2054                 goto create_fdb_err;
2055 
2056         err = esw_create_offloads_table(esw, total_vports);
2057         if (err)
2058                 goto create_ft_err;
2059 
2060         err = esw_create_vport_rx_group(esw, total_vports);
2061         if (err)
2062                 goto create_fg_err;
2063 
2064         return 0;
2065 
2066 create_fg_err:
2067         esw_destroy_offloads_table(esw);
2068 
2069 create_ft_err:
2070         esw_destroy_offloads_fdb_tables(esw);
2071 
2072 create_fdb_err:
2073         esw_destroy_offloads_acl_tables(esw);
2074 
2075         return err;
2076 }
2077 
2078 static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
2079 {
2080         esw_destroy_vport_rx_group(esw);
2081         esw_destroy_offloads_table(esw);
2082         esw_destroy_offloads_fdb_tables(esw);
2083         esw_destroy_offloads_acl_tables(esw);
2084 }
2085 
2086 static void
2087 esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
2088 {
2089         bool host_pf_disabled;
2090         u16 new_num_vfs;
2091 
2092         new_num_vfs = MLX5_GET(query_esw_functions_out, out,
2093                                host_params_context.host_num_of_vfs);
2094         host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
2095                                     host_params_context.host_pf_disabled);
2096 
2097         if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
2098                 return;
2099 
2100         /* Number of VFs can only change from "0 to x" or "x to 0". */
2101         if (esw->esw_funcs.num_vfs > 0) {
2102                 esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
2103         } else {
2104                 int err;
2105 
2106                 err = esw_offloads_load_vf_reps(esw, new_num_vfs);
2107                 if (err)
2108                         return;
2109         }
2110         esw->esw_funcs.num_vfs = new_num_vfs;
2111 }
2112 
2113 static void esw_functions_changed_event_handler(struct work_struct *work)
2114 {
2115         struct mlx5_host_work *host_work;
2116         struct mlx5_eswitch *esw;
2117         const u32 *out;
2118 
2119         host_work = container_of(work, struct mlx5_host_work, work);
2120         esw = host_work->esw;
2121 
2122         out = mlx5_esw_query_functions(esw->dev);
2123         if (IS_ERR(out))
2124                 goto out;
2125 
2126         esw_vfs_changed_event_handler(esw, out);
2127         kvfree(out);
2128 out:
2129         kfree(host_work);
2130 }
2131 
2132 int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
2133 {
2134         struct mlx5_esw_functions *esw_funcs;
2135         struct mlx5_host_work *host_work;
2136         struct mlx5_eswitch *esw;
2137 
2138         host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
2139         if (!host_work)
2140                 return NOTIFY_DONE;
2141 
2142         esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
2143         esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
2144 
2145         host_work->esw = esw;
2146 
2147         INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
2148         queue_work(esw->work_queue, &host_work->work);
2149 
2150         return NOTIFY_OK;
2151 }
2152 
2153 int esw_offloads_enable(struct mlx5_eswitch *esw)
2154 {
2155         int err;
2156 
2157         if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
2158             MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
2159                 esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
2160         else
2161                 esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
2162 
2163         mlx5_rdma_enable_roce(esw->dev);
2164         err = esw_offloads_steering_init(esw);
2165         if (err)
2166                 goto err_steering_init;
2167 
2168         err = esw_set_passing_vport_metadata(esw, true);
2169         if (err)
2170                 goto err_vport_metadata;
2171 
2172         mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
2173 
2174         err = esw_offloads_load_all_reps(esw);
2175         if (err)
2176                 goto err_reps;
2177 
2178         esw_offloads_devcom_init(esw);
2179         mutex_init(&esw->offloads.termtbl_mutex);
2180 
2181         return 0;
2182 
2183 err_reps:
2184         mlx5_eswitch_disable_pf_vf_vports(esw);
2185         esw_set_passing_vport_metadata(esw, false);
2186 err_vport_metadata:
2187         esw_offloads_steering_cleanup(esw);
2188 err_steering_init:
2189         mlx5_rdma_disable_roce(esw->dev);
2190         return err;
2191 }
2192 
2193 static int esw_offloads_stop(struct mlx5_eswitch *esw,
2194                              struct netlink_ext_ack *extack)
2195 {
2196         int err, err1;
2197 
2198         mlx5_eswitch_disable(esw);
2199         err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
2200         if (err) {
2201                 NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
2202                 err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
2203                 if (err1) {
2204                         NL_SET_ERR_MSG_MOD(extack,
2205                                            "Failed setting eswitch back to offloads");
2206                 }
2207         }
2208 
2209         return err;
2210 }
2211 
2212 void esw_offloads_disable(struct mlx5_eswitch *esw)
2213 {
2214         esw_offloads_devcom_cleanup(esw);
2215         esw_offloads_unload_all_reps(esw);
2216         mlx5_eswitch_disable_pf_vf_vports(esw);
2217         esw_set_passing_vport_metadata(esw, false);
2218         esw_offloads_steering_cleanup(esw);
2219         mlx5_rdma_disable_roce(esw->dev);
2220         esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
2221 }
2222 
2223 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
2224 {
2225         switch (mode) {
2226         case DEVLINK_ESWITCH_MODE_LEGACY:
2227                 *mlx5_mode = MLX5_ESWITCH_LEGACY;
2228                 break;
2229         case DEVLINK_ESWITCH_MODE_SWITCHDEV:
2230                 *mlx5_mode = MLX5_ESWITCH_OFFLOADS;
2231                 break;
2232         default:
2233                 return -EINVAL;
2234         }
2235 
2236         return 0;
2237 }
2238 
2239 static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
2240 {
2241         switch (mlx5_mode) {
2242         case MLX5_ESWITCH_LEGACY:
2243                 *mode = DEVLINK_ESWITCH_MODE_LEGACY;
2244                 break;
2245         case MLX5_ESWITCH_OFFLOADS:
2246                 *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
2247                 break;
2248         default:
2249                 return -EINVAL;
2250         }
2251 
2252         return 0;
2253 }
2254 
2255 static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
2256 {
2257         switch (mode) {
2258         case DEVLINK_ESWITCH_INLINE_MODE_NONE:
2259                 *mlx5_mode = MLX5_INLINE_MODE_NONE;
2260                 break;
2261         case DEVLINK_ESWITCH_INLINE_MODE_LINK:
2262                 *mlx5_mode = MLX5_INLINE_MODE_L2;
2263                 break;
2264         case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
2265                 *mlx5_mode = MLX5_INLINE_MODE_IP;
2266                 break;
2267         case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
2268                 *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
2269                 break;
2270         default:
2271                 return -EINVAL;
2272         }
2273 
2274         return 0;
2275 }
2276 
2277 static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
2278 {
2279         switch (mlx5_mode) {
2280         case MLX5_INLINE_MODE_NONE:
2281                 *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
2282                 break;
2283         case MLX5_INLINE_MODE_L2:
2284                 *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
2285                 break;
2286         case MLX5_INLINE_MODE_IP:
2287                 *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
2288                 break;
2289         case MLX5_INLINE_MODE_TCP_UDP:
2290                 *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
2291                 break;
2292         default:
2293                 return -EINVAL;
2294         }
2295 
2296         return 0;
2297 }
2298 
2299 static int mlx5_devlink_eswitch_check(struct devlink *devlink)
2300 {
2301         struct mlx5_core_dev *dev = devlink_priv(devlink);
2302 
2303         if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2304                 return -EOPNOTSUPP;
2305 
2306         if(!MLX5_ESWITCH_MANAGER(dev))
2307                 return -EPERM;
2308 
2309         if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
2310             !mlx5_core_is_ecpf_esw_manager(dev))
2311                 return -EOPNOTSUPP;
2312 
2313         return 0;
2314 }
2315 
2316 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
2317                                   struct netlink_ext_ack *extack)
2318 {
2319         struct mlx5_core_dev *dev = devlink_priv(devlink);
2320         u16 cur_mlx5_mode, mlx5_mode = 0;
2321         int err;
2322 
2323         err = mlx5_devlink_eswitch_check(devlink);
2324         if (err)
2325                 return err;
2326 
2327         cur_mlx5_mode = dev->priv.eswitch->mode;
2328 
2329         if (esw_mode_from_devlink(mode, &mlx5_mode))
2330                 return -EINVAL;
2331 
2332         if (cur_mlx5_mode == mlx5_mode)
2333                 return 0;
2334 
2335         if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
2336                 return esw_offloads_start(dev->priv.eswitch, extack);
2337         else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
2338                 return esw_offloads_stop(dev->priv.eswitch, extack);
2339         else
2340                 return -EINVAL;
2341 }
2342 
2343 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
2344 {
2345         struct mlx5_core_dev *dev = devlink_priv(devlink);
2346         int err;
2347 
2348         err = mlx5_devlink_eswitch_check(devlink);
2349         if (err)
2350                 return err;
2351 
2352         return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
2353 }
2354 
2355 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
2356                                          struct netlink_ext_ack *extack)
2357 {
2358         struct mlx5_core_dev *dev = devlink_priv(devlink);
2359         struct mlx5_eswitch *esw = dev->priv.eswitch;
2360         int err, vport, num_vport;
2361         u8 mlx5_mode;
2362 
2363         err = mlx5_devlink_eswitch_check(devlink);
2364         if (err)
2365                 return err;
2366 
2367         switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
2368         case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2369                 if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
2370                         return 0;
2371                 /* fall through */
2372         case MLX5_CAP_INLINE_MODE_L2:
2373                 NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
2374                 return -EOPNOTSUPP;
2375         case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2376                 break;
2377         }
2378 
2379         if (atomic64_read(&esw->offloads.num_flows) > 0) {
2380                 NL_SET_ERR_MSG_MOD(extack,
2381                                    "Can't set inline mode when flows are configured");
2382                 return -EOPNOTSUPP;
2383         }
2384 
2385         err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
2386         if (err)
2387                 goto out;
2388 
2389         mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
2390                 err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
2391                 if (err) {
2392                         NL_SET_ERR_MSG_MOD(extack,
2393                                            "Failed to set min inline on vport");
2394                         goto revert_inline_mode;
2395                 }
2396         }
2397 
2398         esw->offloads.inline_mode = mlx5_mode;
2399         return 0;
2400 
2401 revert_inline_mode:
2402         num_vport = --vport;
2403         mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
2404                 mlx5_modify_nic_vport_min_inline(dev,
2405                                                  vport,
2406                                                  esw->offloads.inline_mode);
2407 out:
2408         return err;
2409 }
2410 
2411 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
2412 {
2413         struct mlx5_core_dev *dev = devlink_priv(devlink);
2414         struct mlx5_eswitch *esw = dev->priv.eswitch;
2415         int err;
2416 
2417         err = mlx5_devlink_eswitch_check(devlink);
2418         if (err)
2419                 return err;
2420 
2421         return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
2422 }
2423 
2424 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
2425 {
2426         u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
2427         struct mlx5_core_dev *dev = esw->dev;
2428         int vport;
2429 
2430         if (!MLX5_CAP_GEN(dev, vport_group_manager))
2431                 return -EOPNOTSUPP;
2432 
2433         if (esw->mode == MLX5_ESWITCH_NONE)
2434                 return -EOPNOTSUPP;
2435 
2436         switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
2437         case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2438                 mlx5_mode = MLX5_INLINE_MODE_NONE;
2439                 goto out;
2440         case MLX5_CAP_INLINE_MODE_L2:
2441                 mlx5_mode = MLX5_INLINE_MODE_L2;
2442                 goto out;
2443         case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2444                 goto query_vports;
2445         }
2446 
2447 query_vports:
2448         mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
2449         mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
2450                 mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
2451                 if (prev_mlx5_mode != mlx5_mode)
2452                         return -EINVAL;
2453                 prev_mlx5_mode = mlx5_mode;
2454         }
2455 
2456 out:
2457         *mode = mlx5_mode;
2458         return 0;
2459 }
2460 
2461 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
2462                                         enum devlink_eswitch_encap_mode encap,
2463                                         struct netlink_ext_ack *extack)
2464 {
2465         struct mlx5_core_dev *dev = devlink_priv(devlink);
2466         struct mlx5_eswitch *esw = dev->priv.eswitch;
2467         int err;
2468 
2469         err = mlx5_devlink_eswitch_check(devlink);
2470         if (err)
2471                 return err;
2472 
2473         if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
2474             (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
2475              !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
2476                 return -EOPNOTSUPP;
2477 
2478         if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
2479                 return -EOPNOTSUPP;
2480 
2481         if (esw->mode == MLX5_ESWITCH_LEGACY) {
2482                 esw->offloads.encap = encap;
2483                 return 0;
2484         }
2485 
2486         if (esw->offloads.encap == encap)
2487                 return 0;
2488 
2489         if (atomic64_read(&esw->offloads.num_flows) > 0) {
2490                 NL_SET_ERR_MSG_MOD(extack,
2491                                    "Can't set encapsulation when flows are configured");
2492                 return -EOPNOTSUPP;
2493         }
2494 
2495         esw_destroy_offloads_fdb_tables(esw);
2496 
2497         esw->offloads.encap = encap;
2498 
2499         err = esw_create_offloads_fdb_tables(esw, esw->nvports);
2500 
2501         if (err) {
2502                 NL_SET_ERR_MSG_MOD(extack,
2503                                    "Failed re-creating fast FDB table");
2504                 esw->offloads.encap = !encap;
2505                 (void)esw_create_offloads_fdb_tables(esw, esw->nvports);
2506         }
2507 
2508         return err;
2509 }
2510 
2511 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
2512                                         enum devlink_eswitch_encap_mode *encap)
2513 {
2514         struct mlx5_core_dev *dev = devlink_priv(devlink);
2515         struct mlx5_eswitch *esw = dev->priv.eswitch;
2516         int err;
2517 
2518         err = mlx5_devlink_eswitch_check(devlink);
2519         if (err)
2520                 return err;
2521 
2522         *encap = esw->offloads.encap;
2523         return 0;
2524 }
2525 
2526 void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
2527                                       const struct mlx5_eswitch_rep_ops *ops,
2528                                       u8 rep_type)
2529 {
2530         struct mlx5_eswitch_rep_data *rep_data;
2531         struct mlx5_eswitch_rep *rep;
2532         int i;
2533 
2534         esw->offloads.rep_ops[rep_type] = ops;
2535         mlx5_esw_for_all_reps(esw, i, rep) {
2536                 rep_data = &rep->rep_data[rep_type];
2537                 atomic_set(&rep_data->state, REP_REGISTERED);
2538         }
2539 }
2540 EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
2541 
2542 void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
2543 {
2544         struct mlx5_eswitch_rep *rep;
2545         int i;
2546 
2547         if (esw->mode == MLX5_ESWITCH_OFFLOADS)
2548                 __unload_reps_all_vport(esw, rep_type);
2549 
2550         mlx5_esw_for_all_reps(esw, i, rep)
2551                 atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
2552 }
2553 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
2554 
2555 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
2556 {
2557         struct mlx5_eswitch_rep *rep;
2558 
2559         rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
2560         return rep->rep_data[rep_type].priv;
2561 }
2562 
2563 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
2564                                  u16 vport,
2565                                  u8 rep_type)
2566 {
2567         struct mlx5_eswitch_rep *rep;
2568 
2569         rep = mlx5_eswitch_get_rep(esw, vport);
2570 
2571         if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
2572             esw->offloads.rep_ops[rep_type]->get_proto_dev)
2573                 return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
2574         return NULL;
2575 }
2576 EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
2577 
2578 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
2579 {
2580         return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
2581 }
2582 EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
2583 
2584 struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
2585                                                 u16 vport)
2586 {
2587         return mlx5_eswitch_get_rep(esw, vport);
2588 }
2589 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
2590 
2591 bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
2592 {
2593         return vport_num >= MLX5_VPORT_FIRST_VF &&
2594                vport_num <= esw->dev->priv.sriov.max_vfs;
2595 }
2596 
2597 bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
2598 {
2599         return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
2600 }
2601 EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
2602 
2603 u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
2604                                               u16 vport_num)
2605 {
2606         return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
2607 }
2608 EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);

/* [<][>][^][v][top][bottom][index][help] */