root/drivers/infiniband/hw/mlx4/mcg.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_state_string
  2. mcast_find
  3. mcast_insert
  4. send_mad_to_wire
  5. send_mad_to_slave
  6. send_join_to_wire
  7. send_leave_to_wire
  8. send_reply_to_slave
  9. check_selector
  10. cmp_rec
  11. release_group
  12. adjust_membership
  13. get_leave_state
  14. join_group
  15. leave_group
  16. check_leave
  17. mlx4_ib_mcg_timeout_handler
  18. handle_leave_req
  19. handle_join_req
  20. mlx4_ib_mcg_work_handler
  21. search_relocate_mgid0_group
  22. acquire_group
  23. queue_req
  24. mlx4_ib_mcg_demux_handler
  25. mlx4_ib_mcg_multiplex_handler
  26. sysfs_show_group
  27. mlx4_ib_mcg_port_init
  28. force_clean_group
  29. _mlx4_ib_mcg_port_cleanup
  30. mcg_clean_task
  31. mlx4_ib_mcg_port_cleanup
  32. build_leave_mad
  33. clear_pending_reqs
  34. push_deleteing_req
  35. clean_vf_mcast
  36. mlx4_ib_mcg_init
  37. mlx4_ib_mcg_destroy

   1 /*
   2  * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32 
  33 #include <rdma/ib_mad.h>
  34 #include <rdma/ib_smi.h>
  35 #include <rdma/ib_cache.h>
  36 #include <rdma/ib_sa.h>
  37 
  38 #include <linux/mlx4/cmd.h>
  39 #include <linux/rbtree.h>
  40 #include <linux/delay.h>
  41 
  42 #include "mlx4_ib.h"
  43 
  44 #define MAX_VFS         80
  45 #define MAX_PEND_REQS_PER_FUNC 4
  46 #define MAD_TIMEOUT_MS  2000
  47 
  48 #define mcg_warn(fmt, arg...)   pr_warn("MCG WARNING: " fmt, ##arg)
  49 #define mcg_error(fmt, arg...)  pr_err(fmt, ##arg)
  50 #define mcg_warn_group(group, format, arg...) \
  51         pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
  52         (group)->name, group->demux->port, ## arg)
  53 
  54 #define mcg_debug_group(group, format, arg...) \
  55         pr_debug("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
  56                  (group)->name, (group)->demux->port, ## arg)
  57 
  58 #define mcg_error_group(group, format, arg...) \
  59         pr_err("  %16s: " format, (group)->name, ## arg)
  60 
  61 
  62 static union ib_gid mgid0;
  63 
  64 static struct workqueue_struct *clean_wq;
  65 
  66 enum mcast_state {
  67         MCAST_NOT_MEMBER = 0,
  68         MCAST_MEMBER,
  69 };
  70 
  71 enum mcast_group_state {
  72         MCAST_IDLE,
  73         MCAST_JOIN_SENT,
  74         MCAST_LEAVE_SENT,
  75         MCAST_RESP_READY
  76 };
  77 
  78 struct mcast_member {
  79         enum mcast_state state;
  80         uint8_t                 join_state;
  81         int                     num_pend_reqs;
  82         struct list_head        pending;
  83 };
  84 
  85 struct ib_sa_mcmember_data {
  86         union ib_gid    mgid;
  87         union ib_gid    port_gid;
  88         __be32          qkey;
  89         __be16          mlid;
  90         u8              mtusel_mtu;
  91         u8              tclass;
  92         __be16          pkey;
  93         u8              ratesel_rate;
  94         u8              lifetmsel_lifetm;
  95         __be32          sl_flowlabel_hoplimit;
  96         u8              scope_join_state;
  97         u8              proxy_join;
  98         u8              reserved[2];
  99 } __packed __aligned(4);
 100 
 101 struct mcast_group {
 102         struct ib_sa_mcmember_data rec;
 103         struct rb_node          node;
 104         struct list_head        mgid0_list;
 105         struct mlx4_ib_demux_ctx *demux;
 106         struct mcast_member     func[MAX_VFS];
 107         struct mutex            lock;
 108         struct work_struct      work;
 109         struct list_head        pending_list;
 110         int                     members[3];
 111         enum mcast_group_state  state;
 112         enum mcast_group_state  prev_state;
 113         struct ib_sa_mad        response_sa_mad;
 114         __be64                  last_req_tid;
 115 
 116         char                    name[33]; /* MGID string */
 117         struct device_attribute dentry;
 118 
 119         /* refcount is the reference count for the following:
 120            1. Each queued request
 121            2. Each invocation of the worker thread
 122            3. Membership of the port at the SA
 123         */
 124         atomic_t                refcount;
 125 
 126         /* delayed work to clean pending SM request */
 127         struct delayed_work     timeout_work;
 128         struct list_head        cleanup_list;
 129 };
 130 
 131 struct mcast_req {
 132         int                     func;
 133         struct ib_sa_mad        sa_mad;
 134         struct list_head        group_list;
 135         struct list_head        func_list;
 136         struct mcast_group      *group;
 137         int                     clean;
 138 };
 139 
 140 
 141 #define safe_atomic_dec(ref) \
 142         do {\
 143                 if (atomic_dec_and_test(ref)) \
 144                         mcg_warn_group(group, "did not expect to reach zero\n"); \
 145         } while (0)
 146 
 147 static const char *get_state_string(enum mcast_group_state state)
 148 {
 149         switch (state) {
 150         case MCAST_IDLE:
 151                 return "MCAST_IDLE";
 152         case MCAST_JOIN_SENT:
 153                 return "MCAST_JOIN_SENT";
 154         case MCAST_LEAVE_SENT:
 155                 return "MCAST_LEAVE_SENT";
 156         case MCAST_RESP_READY:
 157                 return "MCAST_RESP_READY";
 158         }
 159         return "Invalid State";
 160 }
 161 
 162 static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
 163                                       union ib_gid *mgid)
 164 {
 165         struct rb_node *node = ctx->mcg_table.rb_node;
 166         struct mcast_group *group;
 167         int ret;
 168 
 169         while (node) {
 170                 group = rb_entry(node, struct mcast_group, node);
 171                 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
 172                 if (!ret)
 173                         return group;
 174 
 175                 if (ret < 0)
 176                         node = node->rb_left;
 177                 else
 178                         node = node->rb_right;
 179         }
 180         return NULL;
 181 }
 182 
 183 static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
 184                                         struct mcast_group *group)
 185 {
 186         struct rb_node **link = &ctx->mcg_table.rb_node;
 187         struct rb_node *parent = NULL;
 188         struct mcast_group *cur_group;
 189         int ret;
 190 
 191         while (*link) {
 192                 parent = *link;
 193                 cur_group = rb_entry(parent, struct mcast_group, node);
 194 
 195                 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
 196                              sizeof group->rec.mgid);
 197                 if (ret < 0)
 198                         link = &(*link)->rb_left;
 199                 else if (ret > 0)
 200                         link = &(*link)->rb_right;
 201                 else
 202                         return cur_group;
 203         }
 204         rb_link_node(&group->node, parent, link);
 205         rb_insert_color(&group->node, &ctx->mcg_table);
 206         return NULL;
 207 }
 208 
 209 static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
 210 {
 211         struct mlx4_ib_dev *dev = ctx->dev;
 212         struct rdma_ah_attr     ah_attr;
 213         unsigned long flags;
 214 
 215         spin_lock_irqsave(&dev->sm_lock, flags);
 216         if (!dev->sm_ah[ctx->port - 1]) {
 217                 /* port is not yet Active, sm_ah not ready */
 218                 spin_unlock_irqrestore(&dev->sm_lock, flags);
 219                 return -EAGAIN;
 220         }
 221         mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
 222         spin_unlock_irqrestore(&dev->sm_lock, flags);
 223         return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
 224                                     ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
 225                                     &ah_attr, NULL, 0xffff, mad);
 226 }
 227 
 228 static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
 229                              struct ib_mad *mad)
 230 {
 231         struct mlx4_ib_dev *dev = ctx->dev;
 232         struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
 233         struct ib_wc wc;
 234         struct rdma_ah_attr ah_attr;
 235 
 236         /* Our agent might not yet be registered when mads start to arrive */
 237         if (!agent)
 238                 return -EAGAIN;
 239 
 240         rdma_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
 241 
 242         if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
 243                 return -EINVAL;
 244         wc.sl = 0;
 245         wc.dlid_path_bits = 0;
 246         wc.port_num = ctx->port;
 247         wc.slid = rdma_ah_get_dlid(&ah_attr);  /* opensm lid */
 248         wc.src_qp = 1;
 249         return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
 250 }
 251 
 252 static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
 253 {
 254         struct ib_sa_mad mad;
 255         struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
 256         int ret;
 257 
 258         /* we rely on a mad request as arrived from a VF */
 259         memcpy(&mad, sa_mad, sizeof mad);
 260 
 261         /* fix port GID to be the real one (slave 0) */
 262         sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
 263 
 264         /* assign our own TID */
 265         mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
 266         group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
 267 
 268         ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
 269         /* set timeout handler */
 270         if (!ret) {
 271                 /* calls mlx4_ib_mcg_timeout_handler */
 272                 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
 273                                 msecs_to_jiffies(MAD_TIMEOUT_MS));
 274         }
 275 
 276         return ret;
 277 }
 278 
 279 static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
 280 {
 281         struct ib_sa_mad mad;
 282         struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
 283         int ret;
 284 
 285         memset(&mad, 0, sizeof mad);
 286         mad.mad_hdr.base_version = 1;
 287         mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
 288         mad.mad_hdr.class_version = 2;
 289         mad.mad_hdr.method = IB_SA_METHOD_DELETE;
 290         mad.mad_hdr.status = cpu_to_be16(0);
 291         mad.mad_hdr.class_specific = cpu_to_be16(0);
 292         mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
 293         group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
 294         mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
 295         mad.mad_hdr.attr_mod = cpu_to_be32(0);
 296         mad.sa_hdr.sm_key = 0x0;
 297         mad.sa_hdr.attr_offset = cpu_to_be16(7);
 298         mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
 299                 IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
 300 
 301         *sa_data = group->rec;
 302         sa_data->scope_join_state = join_state;
 303 
 304         ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
 305         if (ret)
 306                 group->state = MCAST_IDLE;
 307 
 308         /* set timeout handler */
 309         if (!ret) {
 310                 /* calls mlx4_ib_mcg_timeout_handler */
 311                 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
 312                                 msecs_to_jiffies(MAD_TIMEOUT_MS));
 313         }
 314 
 315         return ret;
 316 }
 317 
 318 static int send_reply_to_slave(int slave, struct mcast_group *group,
 319                 struct ib_sa_mad *req_sa_mad, u16 status)
 320 {
 321         struct ib_sa_mad mad;
 322         struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
 323         struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
 324         int ret;
 325 
 326         memset(&mad, 0, sizeof mad);
 327         mad.mad_hdr.base_version = 1;
 328         mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
 329         mad.mad_hdr.class_version = 2;
 330         mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
 331         mad.mad_hdr.status = cpu_to_be16(status);
 332         mad.mad_hdr.class_specific = cpu_to_be16(0);
 333         mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
 334         *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
 335         mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
 336         mad.mad_hdr.attr_mod = cpu_to_be32(0);
 337         mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
 338         mad.sa_hdr.attr_offset = cpu_to_be16(7);
 339         mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
 340 
 341         *sa_data = group->rec;
 342 
 343         /* reconstruct VF's requested join_state and port_gid */
 344         sa_data->scope_join_state &= 0xf0;
 345         sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
 346         memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
 347 
 348         ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
 349         return ret;
 350 }
 351 
 352 static int check_selector(ib_sa_comp_mask comp_mask,
 353                           ib_sa_comp_mask selector_mask,
 354                           ib_sa_comp_mask value_mask,
 355                           u8 src_value, u8 dst_value)
 356 {
 357         int err;
 358         u8 selector = dst_value >> 6;
 359         dst_value &= 0x3f;
 360         src_value &= 0x3f;
 361 
 362         if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
 363                 return 0;
 364 
 365         switch (selector) {
 366         case IB_SA_GT:
 367                 err = (src_value <= dst_value);
 368                 break;
 369         case IB_SA_LT:
 370                 err = (src_value >= dst_value);
 371                 break;
 372         case IB_SA_EQ:
 373                 err = (src_value != dst_value);
 374                 break;
 375         default:
 376                 err = 0;
 377                 break;
 378         }
 379 
 380         return err;
 381 }
 382 
 383 static u16 cmp_rec(struct ib_sa_mcmember_data *src,
 384                    struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
 385 {
 386         /* src is group record, dst is request record */
 387         /* MGID must already match */
 388         /* Port_GID we always replace to our Port_GID, so it is a match */
 389 
 390 #define MAD_STATUS_REQ_INVALID 0x0200
 391         if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
 392                 return MAD_STATUS_REQ_INVALID;
 393         if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
 394                 return MAD_STATUS_REQ_INVALID;
 395         if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
 396                                  IB_SA_MCMEMBER_REC_MTU,
 397                                  src->mtusel_mtu, dst->mtusel_mtu))
 398                 return MAD_STATUS_REQ_INVALID;
 399         if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
 400             src->tclass != dst->tclass)
 401                 return MAD_STATUS_REQ_INVALID;
 402         if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
 403                 return MAD_STATUS_REQ_INVALID;
 404         if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
 405                                  IB_SA_MCMEMBER_REC_RATE,
 406                                  src->ratesel_rate, dst->ratesel_rate))
 407                 return MAD_STATUS_REQ_INVALID;
 408         if (check_selector(comp_mask,
 409                                  IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
 410                                  IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
 411                                  src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
 412                 return MAD_STATUS_REQ_INVALID;
 413         if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
 414                         (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
 415                         (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
 416                 return MAD_STATUS_REQ_INVALID;
 417         if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
 418                         (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
 419                         (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
 420                 return MAD_STATUS_REQ_INVALID;
 421         if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
 422                         (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
 423                         (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
 424                 return MAD_STATUS_REQ_INVALID;
 425         if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
 426                         (src->scope_join_state & 0xf0) !=
 427                         (dst->scope_join_state & 0xf0))
 428                 return MAD_STATUS_REQ_INVALID;
 429 
 430         /* join_state checked separately, proxy_join ignored */
 431 
 432         return 0;
 433 }
 434 
 435 /* release group, return 1 if this was last release and group is destroyed
 436  * timout work is canceled sync */
 437 static int release_group(struct mcast_group *group, int from_timeout_handler)
 438 {
 439         struct mlx4_ib_demux_ctx *ctx = group->demux;
 440         int nzgroup;
 441 
 442         mutex_lock(&ctx->mcg_table_lock);
 443         mutex_lock(&group->lock);
 444         if (atomic_dec_and_test(&group->refcount)) {
 445                 if (!from_timeout_handler) {
 446                         if (group->state != MCAST_IDLE &&
 447                             !cancel_delayed_work(&group->timeout_work)) {
 448                                 atomic_inc(&group->refcount);
 449                                 mutex_unlock(&group->lock);
 450                                 mutex_unlock(&ctx->mcg_table_lock);
 451                                 return 0;
 452                         }
 453                 }
 454 
 455                 nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
 456                 if (nzgroup)
 457                         del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
 458                 if (!list_empty(&group->pending_list))
 459                         mcg_warn_group(group, "releasing a group with non empty pending list\n");
 460                 if (nzgroup)
 461                         rb_erase(&group->node, &ctx->mcg_table);
 462                 list_del_init(&group->mgid0_list);
 463                 mutex_unlock(&group->lock);
 464                 mutex_unlock(&ctx->mcg_table_lock);
 465                 kfree(group);
 466                 return 1;
 467         } else {
 468                 mutex_unlock(&group->lock);
 469                 mutex_unlock(&ctx->mcg_table_lock);
 470         }
 471         return 0;
 472 }
 473 
 474 static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
 475 {
 476         int i;
 477 
 478         for (i = 0; i < 3; i++, join_state >>= 1)
 479                 if (join_state & 0x1)
 480                         group->members[i] += inc;
 481 }
 482 
 483 static u8 get_leave_state(struct mcast_group *group)
 484 {
 485         u8 leave_state = 0;
 486         int i;
 487 
 488         for (i = 0; i < 3; i++)
 489                 if (!group->members[i])
 490                         leave_state |= (1 << i);
 491 
 492         return leave_state & (group->rec.scope_join_state & 0xf);
 493 }
 494 
 495 static int join_group(struct mcast_group *group, int slave, u8 join_mask)
 496 {
 497         int ret = 0;
 498         u8 join_state;
 499 
 500         /* remove bits that slave is already member of, and adjust */
 501         join_state = join_mask & (~group->func[slave].join_state);
 502         adjust_membership(group, join_state, 1);
 503         group->func[slave].join_state |= join_state;
 504         if (group->func[slave].state != MCAST_MEMBER && join_state) {
 505                 group->func[slave].state = MCAST_MEMBER;
 506                 ret = 1;
 507         }
 508         return ret;
 509 }
 510 
 511 static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
 512 {
 513         int ret = 0;
 514 
 515         adjust_membership(group, leave_state, -1);
 516         group->func[slave].join_state &= ~leave_state;
 517         if (!group->func[slave].join_state) {
 518                 group->func[slave].state = MCAST_NOT_MEMBER;
 519                 ret = 1;
 520         }
 521         return ret;
 522 }
 523 
 524 static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
 525 {
 526         if (group->func[slave].state != MCAST_MEMBER)
 527                 return MAD_STATUS_REQ_INVALID;
 528 
 529         /* make sure we're not deleting unset bits */
 530         if (~group->func[slave].join_state & leave_mask)
 531                 return MAD_STATUS_REQ_INVALID;
 532 
 533         if (!leave_mask)
 534                 return MAD_STATUS_REQ_INVALID;
 535 
 536         return 0;
 537 }
 538 
 539 static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
 540 {
 541         struct delayed_work *delay = to_delayed_work(work);
 542         struct mcast_group *group;
 543         struct mcast_req *req = NULL;
 544 
 545         group = container_of(delay, typeof(*group), timeout_work);
 546 
 547         mutex_lock(&group->lock);
 548         if (group->state == MCAST_JOIN_SENT) {
 549                 if (!list_empty(&group->pending_list)) {
 550                         req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
 551                         list_del(&req->group_list);
 552                         list_del(&req->func_list);
 553                         --group->func[req->func].num_pend_reqs;
 554                         mutex_unlock(&group->lock);
 555                         kfree(req);
 556                         if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
 557                                 if (release_group(group, 1))
 558                                         return;
 559                         } else {
 560                                 kfree(group);
 561                                 return;
 562                         }
 563                         mutex_lock(&group->lock);
 564                 } else
 565                         mcg_warn_group(group, "DRIVER BUG\n");
 566         } else if (group->state == MCAST_LEAVE_SENT) {
 567                 if (group->rec.scope_join_state & 0xf)
 568                         group->rec.scope_join_state &= 0xf0;
 569                 group->state = MCAST_IDLE;
 570                 mutex_unlock(&group->lock);
 571                 if (release_group(group, 1))
 572                         return;
 573                 mutex_lock(&group->lock);
 574         } else
 575                 mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
 576         group->state = MCAST_IDLE;
 577         atomic_inc(&group->refcount);
 578         if (!queue_work(group->demux->mcg_wq, &group->work))
 579                 safe_atomic_dec(&group->refcount);
 580 
 581         mutex_unlock(&group->lock);
 582 }
 583 
 584 static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
 585                             struct mcast_req *req)
 586 {
 587         u16 status;
 588 
 589         if (req->clean)
 590                 leave_mask = group->func[req->func].join_state;
 591 
 592         status = check_leave(group, req->func, leave_mask);
 593         if (!status)
 594                 leave_group(group, req->func, leave_mask);
 595 
 596         if (!req->clean)
 597                 send_reply_to_slave(req->func, group, &req->sa_mad, status);
 598         --group->func[req->func].num_pend_reqs;
 599         list_del(&req->group_list);
 600         list_del(&req->func_list);
 601         kfree(req);
 602         return 1;
 603 }
 604 
 605 static int handle_join_req(struct mcast_group *group, u8 join_mask,
 606                            struct mcast_req *req)
 607 {
 608         u8 group_join_state = group->rec.scope_join_state & 0xf;
 609         int ref = 0;
 610         u16 status;
 611         struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
 612 
 613         if (join_mask == (group_join_state & join_mask)) {
 614                 /* port's membership need not change */
 615                 status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
 616                 if (!status)
 617                         join_group(group, req->func, join_mask);
 618 
 619                 --group->func[req->func].num_pend_reqs;
 620                 send_reply_to_slave(req->func, group, &req->sa_mad, status);
 621                 list_del(&req->group_list);
 622                 list_del(&req->func_list);
 623                 kfree(req);
 624                 ++ref;
 625         } else {
 626                 /* port's membership needs to be updated */
 627                 group->prev_state = group->state;
 628                 if (send_join_to_wire(group, &req->sa_mad)) {
 629                         --group->func[req->func].num_pend_reqs;
 630                         list_del(&req->group_list);
 631                         list_del(&req->func_list);
 632                         kfree(req);
 633                         ref = 1;
 634                         group->state = group->prev_state;
 635                 } else
 636                         group->state = MCAST_JOIN_SENT;
 637         }
 638 
 639         return ref;
 640 }
 641 
 642 static void mlx4_ib_mcg_work_handler(struct work_struct *work)
 643 {
 644         struct mcast_group *group;
 645         struct mcast_req *req = NULL;
 646         struct ib_sa_mcmember_data *sa_data;
 647         u8 req_join_state;
 648         int rc = 1; /* release_count - this is for the scheduled work */
 649         u16 status;
 650         u8 method;
 651 
 652         group = container_of(work, typeof(*group), work);
 653 
 654         mutex_lock(&group->lock);
 655 
 656         /* First, let's see if a response from SM is waiting regarding this group.
 657          * If so, we need to update the group's REC. If this is a bad response, we
 658          * may need to send a bad response to a VF waiting for it. If VF is waiting
 659          * and this is a good response, the VF will be answered later in this func. */
 660         if (group->state == MCAST_RESP_READY) {
 661                 /* cancels mlx4_ib_mcg_timeout_handler */
 662                 cancel_delayed_work(&group->timeout_work);
 663                 status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
 664                 method = group->response_sa_mad.mad_hdr.method;
 665                 if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
 666                         mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
 667                                 be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
 668                                 be64_to_cpu(group->last_req_tid));
 669                         group->state = group->prev_state;
 670                         goto process_requests;
 671                 }
 672                 if (status) {
 673                         if (!list_empty(&group->pending_list))
 674                                 req = list_first_entry(&group->pending_list,
 675                                                 struct mcast_req, group_list);
 676                         if (method == IB_MGMT_METHOD_GET_RESP) {
 677                                         if (req) {
 678                                                 send_reply_to_slave(req->func, group, &req->sa_mad, status);
 679                                                 --group->func[req->func].num_pend_reqs;
 680                                                 list_del(&req->group_list);
 681                                                 list_del(&req->func_list);
 682                                                 kfree(req);
 683                                                 ++rc;
 684                                         } else
 685                                                 mcg_warn_group(group, "no request for failed join\n");
 686                         } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
 687                                 ++rc;
 688                 } else {
 689                         u8 resp_join_state;
 690                         u8 cur_join_state;
 691 
 692                         resp_join_state = ((struct ib_sa_mcmember_data *)
 693                                                 group->response_sa_mad.data)->scope_join_state & 0xf;
 694                         cur_join_state = group->rec.scope_join_state & 0xf;
 695 
 696                         if (method == IB_MGMT_METHOD_GET_RESP) {
 697                                 /* successfull join */
 698                                 if (!cur_join_state && resp_join_state)
 699                                         --rc;
 700                         } else if (!resp_join_state)
 701                                         ++rc;
 702                         memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
 703                 }
 704                 group->state = MCAST_IDLE;
 705         }
 706 
 707 process_requests:
 708         /* We should now go over pending join/leave requests, as long as we are idle. */
 709         while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
 710                 req = list_first_entry(&group->pending_list, struct mcast_req,
 711                                        group_list);
 712                 sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
 713                 req_join_state = sa_data->scope_join_state & 0xf;
 714 
 715                 /* For a leave request, we will immediately answer the VF, and
 716                  * update our internal counters. The actual leave will be sent
 717                  * to SM later, if at all needed. We dequeue the request now. */
 718                 if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
 719                         rc += handle_leave_req(group, req_join_state, req);
 720                 else
 721                         rc += handle_join_req(group, req_join_state, req);
 722         }
 723 
 724         /* Handle leaves */
 725         if (group->state == MCAST_IDLE) {
 726                 req_join_state = get_leave_state(group);
 727                 if (req_join_state) {
 728                         group->rec.scope_join_state &= ~req_join_state;
 729                         group->prev_state = group->state;
 730                         if (send_leave_to_wire(group, req_join_state)) {
 731                                 group->state = group->prev_state;
 732                                 ++rc;
 733                         } else
 734                                 group->state = MCAST_LEAVE_SENT;
 735                 }
 736         }
 737 
 738         if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
 739                 goto process_requests;
 740         mutex_unlock(&group->lock);
 741 
 742         while (rc--)
 743                 release_group(group, 0);
 744 }
 745 
 746 static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
 747                                                        __be64 tid,
 748                                                        union ib_gid *new_mgid)
 749 {
 750         struct mcast_group *group = NULL, *cur_group, *n;
 751         struct mcast_req *req;
 752 
 753         mutex_lock(&ctx->mcg_table_lock);
 754         list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
 755                 mutex_lock(&group->lock);
 756                 if (group->last_req_tid == tid) {
 757                         if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
 758                                 group->rec.mgid = *new_mgid;
 759                                 sprintf(group->name, "%016llx%016llx",
 760                                                 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
 761                                                 be64_to_cpu(group->rec.mgid.global.interface_id));
 762                                 list_del_init(&group->mgid0_list);
 763                                 cur_group = mcast_insert(ctx, group);
 764                                 if (cur_group) {
 765                                         /* A race between our code and SM. Silently cleaning the new one */
 766                                         req = list_first_entry(&group->pending_list,
 767                                                                struct mcast_req, group_list);
 768                                         --group->func[req->func].num_pend_reqs;
 769                                         list_del(&req->group_list);
 770                                         list_del(&req->func_list);
 771                                         kfree(req);
 772                                         mutex_unlock(&group->lock);
 773                                         mutex_unlock(&ctx->mcg_table_lock);
 774                                         release_group(group, 0);
 775                                         return NULL;
 776                                 }
 777 
 778                                 atomic_inc(&group->refcount);
 779                                 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
 780                                 mutex_unlock(&group->lock);
 781                                 mutex_unlock(&ctx->mcg_table_lock);
 782                                 return group;
 783                         } else {
 784                                 struct mcast_req *tmp1, *tmp2;
 785 
 786                                 list_del(&group->mgid0_list);
 787                                 if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
 788                                         cancel_delayed_work_sync(&group->timeout_work);
 789 
 790                                 list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
 791                                         list_del(&tmp1->group_list);
 792                                         kfree(tmp1);
 793                                 }
 794                                 mutex_unlock(&group->lock);
 795                                 mutex_unlock(&ctx->mcg_table_lock);
 796                                 kfree(group);
 797                                 return NULL;
 798                         }
 799                 }
 800                 mutex_unlock(&group->lock);
 801         }
 802         mutex_unlock(&ctx->mcg_table_lock);
 803 
 804         return NULL;
 805 }
 806 
 807 static ssize_t sysfs_show_group(struct device *dev,
 808                 struct device_attribute *attr, char *buf);
 809 
 810 static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
 811                                          union ib_gid *mgid, int create)
 812 {
 813         struct mcast_group *group, *cur_group;
 814         int is_mgid0;
 815         int i;
 816 
 817         is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
 818         if (!is_mgid0) {
 819                 group = mcast_find(ctx, mgid);
 820                 if (group)
 821                         goto found;
 822         }
 823 
 824         if (!create)
 825                 return ERR_PTR(-ENOENT);
 826 
 827         group = kzalloc(sizeof(*group), GFP_KERNEL);
 828         if (!group)
 829                 return ERR_PTR(-ENOMEM);
 830 
 831         group->demux = ctx;
 832         group->rec.mgid = *mgid;
 833         INIT_LIST_HEAD(&group->pending_list);
 834         INIT_LIST_HEAD(&group->mgid0_list);
 835         for (i = 0; i < MAX_VFS; ++i)
 836                 INIT_LIST_HEAD(&group->func[i].pending);
 837         INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
 838         INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
 839         mutex_init(&group->lock);
 840         sprintf(group->name, "%016llx%016llx",
 841                         be64_to_cpu(group->rec.mgid.global.subnet_prefix),
 842                         be64_to_cpu(group->rec.mgid.global.interface_id));
 843         sysfs_attr_init(&group->dentry.attr);
 844         group->dentry.show = sysfs_show_group;
 845         group->dentry.store = NULL;
 846         group->dentry.attr.name = group->name;
 847         group->dentry.attr.mode = 0400;
 848         group->state = MCAST_IDLE;
 849 
 850         if (is_mgid0) {
 851                 list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
 852                 goto found;
 853         }
 854 
 855         cur_group = mcast_insert(ctx, group);
 856         if (cur_group) {
 857                 mcg_warn("group just showed up %s - confused\n", cur_group->name);
 858                 kfree(group);
 859                 return ERR_PTR(-EINVAL);
 860         }
 861 
 862         add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
 863 
 864 found:
 865         atomic_inc(&group->refcount);
 866         return group;
 867 }
 868 
 869 static void queue_req(struct mcast_req *req)
 870 {
 871         struct mcast_group *group = req->group;
 872 
 873         atomic_inc(&group->refcount); /* for the request */
 874         atomic_inc(&group->refcount); /* for scheduling the work */
 875         list_add_tail(&req->group_list, &group->pending_list);
 876         list_add_tail(&req->func_list, &group->func[req->func].pending);
 877         /* calls mlx4_ib_mcg_work_handler */
 878         if (!queue_work(group->demux->mcg_wq, &group->work))
 879                 safe_atomic_dec(&group->refcount);
 880 }
 881 
 882 int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
 883                               struct ib_sa_mad *mad)
 884 {
 885         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 886         struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
 887         struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
 888         struct mcast_group *group;
 889 
 890         switch (mad->mad_hdr.method) {
 891         case IB_MGMT_METHOD_GET_RESP:
 892         case IB_SA_METHOD_DELETE_RESP:
 893                 mutex_lock(&ctx->mcg_table_lock);
 894                 group = acquire_group(ctx, &rec->mgid, 0);
 895                 mutex_unlock(&ctx->mcg_table_lock);
 896                 if (IS_ERR(group)) {
 897                         if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
 898                                 __be64 tid = mad->mad_hdr.tid;
 899                                 *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
 900                                 group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
 901                         } else
 902                                 group = NULL;
 903                 }
 904 
 905                 if (!group)
 906                         return 1;
 907 
 908                 mutex_lock(&group->lock);
 909                 group->response_sa_mad = *mad;
 910                 group->prev_state = group->state;
 911                 group->state = MCAST_RESP_READY;
 912                 /* calls mlx4_ib_mcg_work_handler */
 913                 atomic_inc(&group->refcount);
 914                 if (!queue_work(ctx->mcg_wq, &group->work))
 915                         safe_atomic_dec(&group->refcount);
 916                 mutex_unlock(&group->lock);
 917                 release_group(group, 0);
 918                 return 1; /* consumed */
 919         case IB_MGMT_METHOD_SET:
 920         case IB_SA_METHOD_GET_TABLE:
 921         case IB_SA_METHOD_GET_TABLE_RESP:
 922         case IB_SA_METHOD_DELETE:
 923                 return 0; /* not consumed, pass-through to guest over tunnel */
 924         default:
 925                 mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
 926                         port, mad->mad_hdr.method);
 927                 return 1; /* consumed */
 928         }
 929 }
 930 
 931 int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
 932                                   int slave, struct ib_sa_mad *sa_mad)
 933 {
 934         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 935         struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
 936         struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
 937         struct mcast_group *group;
 938         struct mcast_req *req;
 939         int may_create = 0;
 940 
 941         if (ctx->flushing)
 942                 return -EAGAIN;
 943 
 944         switch (sa_mad->mad_hdr.method) {
 945         case IB_MGMT_METHOD_SET:
 946                 may_create = 1;
 947                 /* fall through */
 948         case IB_SA_METHOD_DELETE:
 949                 req = kzalloc(sizeof *req, GFP_KERNEL);
 950                 if (!req)
 951                         return -ENOMEM;
 952 
 953                 req->func = slave;
 954                 req->sa_mad = *sa_mad;
 955 
 956                 mutex_lock(&ctx->mcg_table_lock);
 957                 group = acquire_group(ctx, &rec->mgid, may_create);
 958                 mutex_unlock(&ctx->mcg_table_lock);
 959                 if (IS_ERR(group)) {
 960                         kfree(req);
 961                         return PTR_ERR(group);
 962                 }
 963                 mutex_lock(&group->lock);
 964                 if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
 965                         mutex_unlock(&group->lock);
 966                         mcg_debug_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
 967                                         port, slave, MAX_PEND_REQS_PER_FUNC);
 968                         release_group(group, 0);
 969                         kfree(req);
 970                         return -ENOMEM;
 971                 }
 972                 ++group->func[slave].num_pend_reqs;
 973                 req->group = group;
 974                 queue_req(req);
 975                 mutex_unlock(&group->lock);
 976                 release_group(group, 0);
 977                 return 1; /* consumed */
 978         case IB_SA_METHOD_GET_TABLE:
 979         case IB_MGMT_METHOD_GET_RESP:
 980         case IB_SA_METHOD_GET_TABLE_RESP:
 981         case IB_SA_METHOD_DELETE_RESP:
 982                 return 0; /* not consumed, pass-through */
 983         default:
 984                 mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
 985                         port, slave, sa_mad->mad_hdr.method);
 986                 return 1; /* consumed */
 987         }
 988 }
 989 
 990 static ssize_t sysfs_show_group(struct device *dev,
 991                 struct device_attribute *attr, char *buf)
 992 {
 993         struct mcast_group *group =
 994                 container_of(attr, struct mcast_group, dentry);
 995         struct mcast_req *req = NULL;
 996         char pending_str[40];
 997         char state_str[40];
 998         ssize_t len = 0;
 999         int f;
1000 
1001         if (group->state == MCAST_IDLE)
1002                 sprintf(state_str, "%s", get_state_string(group->state));
1003         else
1004                 sprintf(state_str, "%s(TID=0x%llx)",
1005                                 get_state_string(group->state),
1006                                 be64_to_cpu(group->last_req_tid));
1007         if (list_empty(&group->pending_list)) {
1008                 sprintf(pending_str, "No");
1009         } else {
1010                 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1011                 sprintf(pending_str, "Yes(TID=0x%llx)",
1012                                 be64_to_cpu(req->sa_mad.mad_hdr.tid));
1013         }
1014         len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s     ",
1015                         group->rec.scope_join_state & 0xf,
1016                         group->members[2], group->members[1], group->members[0],
1017                         atomic_read(&group->refcount),
1018                         pending_str,
1019                         state_str);
1020         for (f = 0; f < MAX_VFS; ++f)
1021                 if (group->func[f].state == MCAST_MEMBER)
1022                         len += sprintf(buf + len, "%d[%1x] ",
1023                                         f, group->func[f].join_state);
1024 
1025         len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
1026                 "%4x %4x %2x %2x)\n",
1027                 be16_to_cpu(group->rec.pkey),
1028                 be32_to_cpu(group->rec.qkey),
1029                 (group->rec.mtusel_mtu & 0xc0) >> 6,
1030                 group->rec.mtusel_mtu & 0x3f,
1031                 group->rec.tclass,
1032                 (group->rec.ratesel_rate & 0xc0) >> 6,
1033                 group->rec.ratesel_rate & 0x3f,
1034                 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
1035                 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
1036                 be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
1037                 group->rec.proxy_join);
1038 
1039         return len;
1040 }
1041 
1042 int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
1043 {
1044         char name[20];
1045 
1046         atomic_set(&ctx->tid, 0);
1047         sprintf(name, "mlx4_ib_mcg%d", ctx->port);
1048         ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
1049         if (!ctx->mcg_wq)
1050                 return -ENOMEM;
1051 
1052         mutex_init(&ctx->mcg_table_lock);
1053         ctx->mcg_table = RB_ROOT;
1054         INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
1055         ctx->flushing = 0;
1056 
1057         return 0;
1058 }
1059 
1060 static void force_clean_group(struct mcast_group *group)
1061 {
1062         struct mcast_req *req, *tmp
1063                 ;
1064         list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
1065                 list_del(&req->group_list);
1066                 kfree(req);
1067         }
1068         del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
1069         rb_erase(&group->node, &group->demux->mcg_table);
1070         kfree(group);
1071 }
1072 
1073 static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1074 {
1075         int i;
1076         struct rb_node *p;
1077         struct mcast_group *group;
1078         unsigned long end;
1079         int count;
1080 
1081         for (i = 0; i < MAX_VFS; ++i)
1082                 clean_vf_mcast(ctx, i);
1083 
1084         end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
1085         do {
1086                 count = 0;
1087                 mutex_lock(&ctx->mcg_table_lock);
1088                 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
1089                         ++count;
1090                 mutex_unlock(&ctx->mcg_table_lock);
1091                 if (!count)
1092                         break;
1093 
1094                 usleep_range(1000, 2000);
1095         } while (time_after(end, jiffies));
1096 
1097         flush_workqueue(ctx->mcg_wq);
1098         if (destroy_wq)
1099                 destroy_workqueue(ctx->mcg_wq);
1100 
1101         mutex_lock(&ctx->mcg_table_lock);
1102         while ((p = rb_first(&ctx->mcg_table)) != NULL) {
1103                 group = rb_entry(p, struct mcast_group, node);
1104                 if (atomic_read(&group->refcount))
1105                         mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
1106                                         atomic_read(&group->refcount), group);
1107 
1108                 force_clean_group(group);
1109         }
1110         mutex_unlock(&ctx->mcg_table_lock);
1111 }
1112 
1113 struct clean_work {
1114         struct work_struct work;
1115         struct mlx4_ib_demux_ctx *ctx;
1116         int destroy_wq;
1117 };
1118 
1119 static void mcg_clean_task(struct work_struct *work)
1120 {
1121         struct clean_work *cw = container_of(work, struct clean_work, work);
1122 
1123         _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
1124         cw->ctx->flushing = 0;
1125         kfree(cw);
1126 }
1127 
1128 void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1129 {
1130         struct clean_work *work;
1131 
1132         if (ctx->flushing)
1133                 return;
1134 
1135         ctx->flushing = 1;
1136 
1137         if (destroy_wq) {
1138                 _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
1139                 ctx->flushing = 0;
1140                 return;
1141         }
1142 
1143         work = kmalloc(sizeof *work, GFP_KERNEL);
1144         if (!work) {
1145                 ctx->flushing = 0;
1146                 return;
1147         }
1148 
1149         work->ctx = ctx;
1150         work->destroy_wq = destroy_wq;
1151         INIT_WORK(&work->work, mcg_clean_task);
1152         queue_work(clean_wq, &work->work);
1153 }
1154 
1155 static void build_leave_mad(struct mcast_req *req)
1156 {
1157         struct ib_sa_mad *mad = &req->sa_mad;
1158 
1159         mad->mad_hdr.method = IB_SA_METHOD_DELETE;
1160 }
1161 
1162 
1163 static void clear_pending_reqs(struct mcast_group *group, int vf)
1164 {
1165         struct mcast_req *req, *tmp, *group_first = NULL;
1166         int clear;
1167         int pend = 0;
1168 
1169         if (!list_empty(&group->pending_list))
1170                 group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1171 
1172         list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
1173                 clear = 1;
1174                 if (group_first == req &&
1175                     (group->state == MCAST_JOIN_SENT ||
1176                      group->state == MCAST_LEAVE_SENT)) {
1177                         clear = cancel_delayed_work(&group->timeout_work);
1178                         pend = !clear;
1179                         group->state = MCAST_IDLE;
1180                 }
1181                 if (clear) {
1182                         --group->func[vf].num_pend_reqs;
1183                         list_del(&req->group_list);
1184                         list_del(&req->func_list);
1185                         kfree(req);
1186                         atomic_dec(&group->refcount);
1187                 }
1188         }
1189 
1190         if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
1191                 mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
1192                                list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
1193         }
1194 }
1195 
1196 static int push_deleteing_req(struct mcast_group *group, int slave)
1197 {
1198         struct mcast_req *req;
1199         struct mcast_req *pend_req;
1200 
1201         if (!group->func[slave].join_state)
1202                 return 0;
1203 
1204         req = kzalloc(sizeof *req, GFP_KERNEL);
1205         if (!req)
1206                 return -ENOMEM;
1207 
1208         if (!list_empty(&group->func[slave].pending)) {
1209                 pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
1210                 if (pend_req->clean) {
1211                         kfree(req);
1212                         return 0;
1213                 }
1214         }
1215 
1216         req->clean = 1;
1217         req->func = slave;
1218         req->group = group;
1219         ++group->func[slave].num_pend_reqs;
1220         build_leave_mad(req);
1221         queue_req(req);
1222         return 0;
1223 }
1224 
1225 void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
1226 {
1227         struct mcast_group *group;
1228         struct rb_node *p;
1229 
1230         mutex_lock(&ctx->mcg_table_lock);
1231         for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
1232                 group = rb_entry(p, struct mcast_group, node);
1233                 mutex_lock(&group->lock);
1234                 if (atomic_read(&group->refcount)) {
1235                         /* clear pending requests of this VF */
1236                         clear_pending_reqs(group, slave);
1237                         push_deleteing_req(group, slave);
1238                 }
1239                 mutex_unlock(&group->lock);
1240         }
1241         mutex_unlock(&ctx->mcg_table_lock);
1242 }
1243 
1244 
1245 int mlx4_ib_mcg_init(void)
1246 {
1247         clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM);
1248         if (!clean_wq)
1249                 return -ENOMEM;
1250 
1251         return 0;
1252 }
1253 
1254 void mlx4_ib_mcg_destroy(void)
1255 {
1256         destroy_workqueue(clean_wq);
1257 }

/* [<][>][^][v][top][bottom][index][help] */