root/drivers/infiniband/core/cache.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. dispatch_gid_change_event
  2. ib_cache_gid_type_str
  3. rdma_is_zero_gid
  4. is_gid_index_default
  5. ib_cache_gid_parse_type_str
  6. rdma_gid_table
  7. is_gid_entry_free
  8. is_gid_entry_valid
  9. schedule_free_gid
  10. put_gid_ndev
  11. free_gid_entry_locked
  12. free_gid_entry
  13. free_gid_work
  14. alloc_gid_entry
  15. store_gid_entry
  16. get_gid_entry
  17. put_gid_entry
  18. put_gid_entry_locked
  19. add_roce_gid
  20. del_gid
  21. add_modify_gid
  22. find_gid
  23. make_default_gid
  24. __ib_cache_gid_add
  25. ib_cache_gid_add
  26. _ib_cache_gid_del
  27. ib_cache_gid_del
  28. ib_cache_gid_del_all_netdev_gids
  29. rdma_find_gid_by_port
  30. rdma_find_gid_by_filter
  31. alloc_gid_table
  32. release_gid_table
  33. cleanup_gid_table_port
  34. ib_cache_gid_set_default_gid
  35. gid_table_reserve_default
  36. gid_table_release_one
  37. _gid_table_setup_one
  38. gid_table_cleanup_one
  39. gid_table_setup_one
  40. rdma_query_gid
  41. rdma_find_gid
  42. ib_get_cached_pkey
  43. ib_get_cached_subnet_prefix
  44. ib_find_cached_pkey
  45. ib_find_exact_cached_pkey
  46. ib_get_cached_lmc
  47. ib_get_cached_port_state
  48. rdma_get_gid_attr
  49. rdma_put_gid_attr
  50. rdma_hold_gid_attr
  51. rdma_read_gid_attr_ndev_rcu
  52. get_lower_dev_vlan
  53. rdma_read_gid_l2_fields
  54. config_non_roce_gid_cache
  55. ib_cache_update
  56. ib_cache_event_task
  57. ib_generic_event_task
  58. is_cache_update_event
  59. ib_dispatch_event
  60. ib_cache_setup_one
  61. ib_cache_release_one
  62. ib_cache_cleanup_one

   1 /*
   2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3  * Copyright (c) 2005 Intel Corporation. All rights reserved.
   4  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   5  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35 
  36 #include <linux/module.h>
  37 #include <linux/errno.h>
  38 #include <linux/slab.h>
  39 #include <linux/workqueue.h>
  40 #include <linux/netdevice.h>
  41 #include <net/addrconf.h>
  42 
  43 #include <rdma/ib_cache.h>
  44 
  45 #include "core_priv.h"
  46 
  47 struct ib_pkey_cache {
  48         int             table_len;
  49         u16             table[0];
  50 };
  51 
  52 struct ib_update_work {
  53         struct work_struct work;
  54         struct ib_event event;
  55         bool enforce_security;
  56 };
  57 
  58 union ib_gid zgid;
  59 EXPORT_SYMBOL(zgid);
  60 
  61 enum gid_attr_find_mask {
  62         GID_ATTR_FIND_MASK_GID          = 1UL << 0,
  63         GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
  64         GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
  65         GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
  66 };
  67 
  68 enum gid_table_entry_state {
  69         GID_TABLE_ENTRY_INVALID         = 1,
  70         GID_TABLE_ENTRY_VALID           = 2,
  71         /*
  72          * Indicates that entry is pending to be removed, there may
  73          * be active users of this GID entry.
  74          * When last user of the GID entry releases reference to it,
  75          * GID entry is detached from the table.
  76          */
  77         GID_TABLE_ENTRY_PENDING_DEL     = 3,
  78 };
  79 
  80 struct roce_gid_ndev_storage {
  81         struct rcu_head rcu_head;
  82         struct net_device *ndev;
  83 };
  84 
  85 struct ib_gid_table_entry {
  86         struct kref                     kref;
  87         struct work_struct              del_work;
  88         struct ib_gid_attr              attr;
  89         void                            *context;
  90         /* Store the ndev pointer to release reference later on in
  91          * call_rcu context because by that time gid_table_entry
  92          * and attr might be already freed. So keep a copy of it.
  93          * ndev_storage is freed by rcu callback.
  94          */
  95         struct roce_gid_ndev_storage    *ndev_storage;
  96         enum gid_table_entry_state      state;
  97 };
  98 
  99 struct ib_gid_table {
 100         int                             sz;
 101         /* In RoCE, adding a GID to the table requires:
 102          * (a) Find if this GID is already exists.
 103          * (b) Find a free space.
 104          * (c) Write the new GID
 105          *
 106          * Delete requires different set of operations:
 107          * (a) Find the GID
 108          * (b) Delete it.
 109          *
 110          **/
 111         /* Any writer to data_vec must hold this lock and the write side of
 112          * rwlock. Readers must hold only rwlock. All writers must be in a
 113          * sleepable context.
 114          */
 115         struct mutex                    lock;
 116         /* rwlock protects data_vec[ix]->state and entry pointer.
 117          */
 118         rwlock_t                        rwlock;
 119         struct ib_gid_table_entry       **data_vec;
 120         /* bit field, each bit indicates the index of default GID */
 121         u32                             default_gid_indices;
 122 };
 123 
 124 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 125 {
 126         struct ib_event event;
 127 
 128         event.device            = ib_dev;
 129         event.element.port_num  = port;
 130         event.event             = IB_EVENT_GID_CHANGE;
 131 
 132         ib_dispatch_event_clients(&event);
 133 }
 134 
 135 static const char * const gid_type_str[] = {
 136         [IB_GID_TYPE_IB]        = "IB/RoCE v1",
 137         [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
 138 };
 139 
 140 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
 141 {
 142         if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
 143                 return gid_type_str[gid_type];
 144 
 145         return "Invalid GID type";
 146 }
 147 EXPORT_SYMBOL(ib_cache_gid_type_str);
 148 
 149 /** rdma_is_zero_gid - Check if given GID is zero or not.
 150  * @gid:        GID to check
 151  * Returns true if given GID is zero, returns false otherwise.
 152  */
 153 bool rdma_is_zero_gid(const union ib_gid *gid)
 154 {
 155         return !memcmp(gid, &zgid, sizeof(*gid));
 156 }
 157 EXPORT_SYMBOL(rdma_is_zero_gid);
 158 
 159 /** is_gid_index_default - Check if a given index belongs to
 160  * reserved default GIDs or not.
 161  * @table:      GID table pointer
 162  * @index:      Index to check in GID table
 163  * Returns true if index is one of the reserved default GID index otherwise
 164  * returns false.
 165  */
 166 static bool is_gid_index_default(const struct ib_gid_table *table,
 167                                  unsigned int index)
 168 {
 169         return index < 32 && (BIT(index) & table->default_gid_indices);
 170 }
 171 
 172 int ib_cache_gid_parse_type_str(const char *buf)
 173 {
 174         unsigned int i;
 175         size_t len;
 176         int err = -EINVAL;
 177 
 178         len = strlen(buf);
 179         if (len == 0)
 180                 return -EINVAL;
 181 
 182         if (buf[len - 1] == '\n')
 183                 len--;
 184 
 185         for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
 186                 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
 187                     len == strlen(gid_type_str[i])) {
 188                         err = i;
 189                         break;
 190                 }
 191 
 192         return err;
 193 }
 194 EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 195 
 196 static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
 197 {
 198         return device->port_data[port].cache.gid;
 199 }
 200 
 201 static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
 202 {
 203         return !entry;
 204 }
 205 
 206 static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
 207 {
 208         return entry && entry->state == GID_TABLE_ENTRY_VALID;
 209 }
 210 
 211 static void schedule_free_gid(struct kref *kref)
 212 {
 213         struct ib_gid_table_entry *entry =
 214                         container_of(kref, struct ib_gid_table_entry, kref);
 215 
 216         queue_work(ib_wq, &entry->del_work);
 217 }
 218 
 219 static void put_gid_ndev(struct rcu_head *head)
 220 {
 221         struct roce_gid_ndev_storage *storage =
 222                 container_of(head, struct roce_gid_ndev_storage, rcu_head);
 223 
 224         WARN_ON(!storage->ndev);
 225         /* At this point its safe to release netdev reference,
 226          * as all callers working on gid_attr->ndev are done
 227          * using this netdev.
 228          */
 229         dev_put(storage->ndev);
 230         kfree(storage);
 231 }
 232 
 233 static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
 234 {
 235         struct ib_device *device = entry->attr.device;
 236         u8 port_num = entry->attr.port_num;
 237         struct ib_gid_table *table = rdma_gid_table(device, port_num);
 238 
 239         dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
 240                 port_num, entry->attr.index, entry->attr.gid.raw);
 241 
 242         write_lock_irq(&table->rwlock);
 243 
 244         /*
 245          * The only way to avoid overwriting NULL in table is
 246          * by comparing if it is same entry in table or not!
 247          * If new entry in table is added by the time we free here,
 248          * don't overwrite the table entry.
 249          */
 250         if (entry == table->data_vec[entry->attr.index])
 251                 table->data_vec[entry->attr.index] = NULL;
 252         /* Now this index is ready to be allocated */
 253         write_unlock_irq(&table->rwlock);
 254 
 255         if (entry->ndev_storage)
 256                 call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
 257         kfree(entry);
 258 }
 259 
 260 static void free_gid_entry(struct kref *kref)
 261 {
 262         struct ib_gid_table_entry *entry =
 263                         container_of(kref, struct ib_gid_table_entry, kref);
 264 
 265         free_gid_entry_locked(entry);
 266 }
 267 
 268 /**
 269  * free_gid_work - Release reference to the GID entry
 270  * @work: Work structure to refer to GID entry which needs to be
 271  * deleted.
 272  *
 273  * free_gid_work() frees the entry from the HCA's hardware table
 274  * if provider supports it. It releases reference to netdevice.
 275  */
 276 static void free_gid_work(struct work_struct *work)
 277 {
 278         struct ib_gid_table_entry *entry =
 279                 container_of(work, struct ib_gid_table_entry, del_work);
 280         struct ib_device *device = entry->attr.device;
 281         u8 port_num = entry->attr.port_num;
 282         struct ib_gid_table *table = rdma_gid_table(device, port_num);
 283 
 284         mutex_lock(&table->lock);
 285         free_gid_entry_locked(entry);
 286         mutex_unlock(&table->lock);
 287 }
 288 
 289 static struct ib_gid_table_entry *
 290 alloc_gid_entry(const struct ib_gid_attr *attr)
 291 {
 292         struct ib_gid_table_entry *entry;
 293         struct net_device *ndev;
 294 
 295         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 296         if (!entry)
 297                 return NULL;
 298 
 299         ndev = rcu_dereference_protected(attr->ndev, 1);
 300         if (ndev) {
 301                 entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
 302                                               GFP_KERNEL);
 303                 if (!entry->ndev_storage) {
 304                         kfree(entry);
 305                         return NULL;
 306                 }
 307                 dev_hold(ndev);
 308                 entry->ndev_storage->ndev = ndev;
 309         }
 310         kref_init(&entry->kref);
 311         memcpy(&entry->attr, attr, sizeof(*attr));
 312         INIT_WORK(&entry->del_work, free_gid_work);
 313         entry->state = GID_TABLE_ENTRY_INVALID;
 314         return entry;
 315 }
 316 
 317 static void store_gid_entry(struct ib_gid_table *table,
 318                             struct ib_gid_table_entry *entry)
 319 {
 320         entry->state = GID_TABLE_ENTRY_VALID;
 321 
 322         dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
 323                 __func__, entry->attr.port_num, entry->attr.index,
 324                 entry->attr.gid.raw);
 325 
 326         lockdep_assert_held(&table->lock);
 327         write_lock_irq(&table->rwlock);
 328         table->data_vec[entry->attr.index] = entry;
 329         write_unlock_irq(&table->rwlock);
 330 }
 331 
 332 static void get_gid_entry(struct ib_gid_table_entry *entry)
 333 {
 334         kref_get(&entry->kref);
 335 }
 336 
 337 static void put_gid_entry(struct ib_gid_table_entry *entry)
 338 {
 339         kref_put(&entry->kref, schedule_free_gid);
 340 }
 341 
 342 static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
 343 {
 344         kref_put(&entry->kref, free_gid_entry);
 345 }
 346 
 347 static int add_roce_gid(struct ib_gid_table_entry *entry)
 348 {
 349         const struct ib_gid_attr *attr = &entry->attr;
 350         int ret;
 351 
 352         if (!attr->ndev) {
 353                 dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
 354                         __func__, attr->port_num, attr->index);
 355                 return -EINVAL;
 356         }
 357         if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
 358                 ret = attr->device->ops.add_gid(attr, &entry->context);
 359                 if (ret) {
 360                         dev_err(&attr->device->dev,
 361                                 "%s GID add failed port=%d index=%d\n",
 362                                 __func__, attr->port_num, attr->index);
 363                         return ret;
 364                 }
 365         }
 366         return 0;
 367 }
 368 
 369 /**
 370  * del_gid - Delete GID table entry
 371  *
 372  * @ib_dev:     IB device whose GID entry to be deleted
 373  * @port:       Port number of the IB device
 374  * @table:      GID table of the IB device for a port
 375  * @ix:         GID entry index to delete
 376  *
 377  */
 378 static void del_gid(struct ib_device *ib_dev, u8 port,
 379                     struct ib_gid_table *table, int ix)
 380 {
 381         struct roce_gid_ndev_storage *ndev_storage;
 382         struct ib_gid_table_entry *entry;
 383 
 384         lockdep_assert_held(&table->lock);
 385 
 386         dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
 387                 ix, table->data_vec[ix]->attr.gid.raw);
 388 
 389         write_lock_irq(&table->rwlock);
 390         entry = table->data_vec[ix];
 391         entry->state = GID_TABLE_ENTRY_PENDING_DEL;
 392         /*
 393          * For non RoCE protocol, GID entry slot is ready to use.
 394          */
 395         if (!rdma_protocol_roce(ib_dev, port))
 396                 table->data_vec[ix] = NULL;
 397         write_unlock_irq(&table->rwlock);
 398 
 399         ndev_storage = entry->ndev_storage;
 400         if (ndev_storage) {
 401                 entry->ndev_storage = NULL;
 402                 rcu_assign_pointer(entry->attr.ndev, NULL);
 403                 call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
 404         }
 405 
 406         if (rdma_cap_roce_gid_table(ib_dev, port))
 407                 ib_dev->ops.del_gid(&entry->attr, &entry->context);
 408 
 409         put_gid_entry_locked(entry);
 410 }
 411 
 412 /**
 413  * add_modify_gid - Add or modify GID table entry
 414  *
 415  * @table:      GID table in which GID to be added or modified
 416  * @attr:       Attributes of the GID
 417  *
 418  * Returns 0 on success or appropriate error code. It accepts zero
 419  * GID addition for non RoCE ports for HCA's who report them as valid
 420  * GID. However such zero GIDs are not added to the cache.
 421  */
 422 static int add_modify_gid(struct ib_gid_table *table,
 423                           const struct ib_gid_attr *attr)
 424 {
 425         struct ib_gid_table_entry *entry;
 426         int ret = 0;
 427 
 428         /*
 429          * Invalidate any old entry in the table to make it safe to write to
 430          * this index.
 431          */
 432         if (is_gid_entry_valid(table->data_vec[attr->index]))
 433                 del_gid(attr->device, attr->port_num, table, attr->index);
 434 
 435         /*
 436          * Some HCA's report multiple GID entries with only one valid GID, and
 437          * leave other unused entries as the zero GID. Convert zero GIDs to
 438          * empty table entries instead of storing them.
 439          */
 440         if (rdma_is_zero_gid(&attr->gid))
 441                 return 0;
 442 
 443         entry = alloc_gid_entry(attr);
 444         if (!entry)
 445                 return -ENOMEM;
 446 
 447         if (rdma_protocol_roce(attr->device, attr->port_num)) {
 448                 ret = add_roce_gid(entry);
 449                 if (ret)
 450                         goto done;
 451         }
 452 
 453         store_gid_entry(table, entry);
 454         return 0;
 455 
 456 done:
 457         put_gid_entry(entry);
 458         return ret;
 459 }
 460 
 461 /* rwlock should be read locked, or lock should be held */
 462 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 463                     const struct ib_gid_attr *val, bool default_gid,
 464                     unsigned long mask, int *pempty)
 465 {
 466         int i = 0;
 467         int found = -1;
 468         int empty = pempty ? -1 : 0;
 469 
 470         while (i < table->sz && (found < 0 || empty < 0)) {
 471                 struct ib_gid_table_entry *data = table->data_vec[i];
 472                 struct ib_gid_attr *attr;
 473                 int curr_index = i;
 474 
 475                 i++;
 476 
 477                 /* find_gid() is used during GID addition where it is expected
 478                  * to return a free entry slot which is not duplicate.
 479                  * Free entry slot is requested and returned if pempty is set,
 480                  * so lookup free slot only if requested.
 481                  */
 482                 if (pempty && empty < 0) {
 483                         if (is_gid_entry_free(data) &&
 484                             default_gid ==
 485                                 is_gid_index_default(table, curr_index)) {
 486                                 /*
 487                                  * Found an invalid (free) entry; allocate it.
 488                                  * If default GID is requested, then our
 489                                  * found slot must be one of the DEFAULT
 490                                  * reserved slots or we fail.
 491                                  * This ensures that only DEFAULT reserved
 492                                  * slots are used for default property GIDs.
 493                                  */
 494                                 empty = curr_index;
 495                         }
 496                 }
 497 
 498                 /*
 499                  * Additionally find_gid() is used to find valid entry during
 500                  * lookup operation; so ignore the entries which are marked as
 501                  * pending for removal and the entries which are marked as
 502                  * invalid.
 503                  */
 504                 if (!is_gid_entry_valid(data))
 505                         continue;
 506 
 507                 if (found >= 0)
 508                         continue;
 509 
 510                 attr = &data->attr;
 511                 if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
 512                     attr->gid_type != val->gid_type)
 513                         continue;
 514 
 515                 if (mask & GID_ATTR_FIND_MASK_GID &&
 516                     memcmp(gid, &data->attr.gid, sizeof(*gid)))
 517                         continue;
 518 
 519                 if (mask & GID_ATTR_FIND_MASK_NETDEV &&
 520                     attr->ndev != val->ndev)
 521                         continue;
 522 
 523                 if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
 524                     is_gid_index_default(table, curr_index) != default_gid)
 525                         continue;
 526 
 527                 found = curr_index;
 528         }
 529 
 530         if (pempty)
 531                 *pempty = empty;
 532 
 533         return found;
 534 }
 535 
 536 static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
 537 {
 538         gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 539         addrconf_ifid_eui48(&gid->raw[8], dev);
 540 }
 541 
 542 static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 543                               union ib_gid *gid, struct ib_gid_attr *attr,
 544                               unsigned long mask, bool default_gid)
 545 {
 546         struct ib_gid_table *table;
 547         int ret = 0;
 548         int empty;
 549         int ix;
 550 
 551         /* Do not allow adding zero GID in support of
 552          * IB spec version 1.3 section 4.1.1 point (6) and
 553          * section 12.7.10 and section 12.7.20
 554          */
 555         if (rdma_is_zero_gid(gid))
 556                 return -EINVAL;
 557 
 558         table = rdma_gid_table(ib_dev, port);
 559 
 560         mutex_lock(&table->lock);
 561 
 562         ix = find_gid(table, gid, attr, default_gid, mask, &empty);
 563         if (ix >= 0)
 564                 goto out_unlock;
 565 
 566         if (empty < 0) {
 567                 ret = -ENOSPC;
 568                 goto out_unlock;
 569         }
 570         attr->device = ib_dev;
 571         attr->index = empty;
 572         attr->port_num = port;
 573         attr->gid = *gid;
 574         ret = add_modify_gid(table, attr);
 575         if (!ret)
 576                 dispatch_gid_change_event(ib_dev, port);
 577 
 578 out_unlock:
 579         mutex_unlock(&table->lock);
 580         if (ret)
 581                 pr_warn("%s: unable to add gid %pI6 error=%d\n",
 582                         __func__, gid->raw, ret);
 583         return ret;
 584 }
 585 
 586 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 587                      union ib_gid *gid, struct ib_gid_attr *attr)
 588 {
 589         unsigned long mask = GID_ATTR_FIND_MASK_GID |
 590                              GID_ATTR_FIND_MASK_GID_TYPE |
 591                              GID_ATTR_FIND_MASK_NETDEV;
 592 
 593         return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
 594 }
 595 
 596 static int
 597 _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 598                   union ib_gid *gid, struct ib_gid_attr *attr,
 599                   unsigned long mask, bool default_gid)
 600 {
 601         struct ib_gid_table *table;
 602         int ret = 0;
 603         int ix;
 604 
 605         table = rdma_gid_table(ib_dev, port);
 606 
 607         mutex_lock(&table->lock);
 608 
 609         ix = find_gid(table, gid, attr, default_gid, mask, NULL);
 610         if (ix < 0) {
 611                 ret = -EINVAL;
 612                 goto out_unlock;
 613         }
 614 
 615         del_gid(ib_dev, port, table, ix);
 616         dispatch_gid_change_event(ib_dev, port);
 617 
 618 out_unlock:
 619         mutex_unlock(&table->lock);
 620         if (ret)
 621                 pr_debug("%s: can't delete gid %pI6 error=%d\n",
 622                          __func__, gid->raw, ret);
 623         return ret;
 624 }
 625 
 626 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 627                      union ib_gid *gid, struct ib_gid_attr *attr)
 628 {
 629         unsigned long mask = GID_ATTR_FIND_MASK_GID       |
 630                              GID_ATTR_FIND_MASK_GID_TYPE |
 631                              GID_ATTR_FIND_MASK_DEFAULT  |
 632                              GID_ATTR_FIND_MASK_NETDEV;
 633 
 634         return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
 635 }
 636 
 637 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 638                                      struct net_device *ndev)
 639 {
 640         struct ib_gid_table *table;
 641         int ix;
 642         bool deleted = false;
 643 
 644         table = rdma_gid_table(ib_dev, port);
 645 
 646         mutex_lock(&table->lock);
 647 
 648         for (ix = 0; ix < table->sz; ix++) {
 649                 if (is_gid_entry_valid(table->data_vec[ix]) &&
 650                     table->data_vec[ix]->attr.ndev == ndev) {
 651                         del_gid(ib_dev, port, table, ix);
 652                         deleted = true;
 653                 }
 654         }
 655 
 656         mutex_unlock(&table->lock);
 657 
 658         if (deleted)
 659                 dispatch_gid_change_event(ib_dev, port);
 660 
 661         return 0;
 662 }
 663 
 664 /**
 665  * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
 666  * a valid GID entry for given search parameters. It searches for the specified
 667  * GID value in the local software cache.
 668  * @device: The device to query.
 669  * @gid: The GID value to search for.
 670  * @gid_type: The GID type to search for.
 671  * @port_num: The port number of the device where the GID value should be
 672  *   searched.
 673  * @ndev: In RoCE, the net device of the device. NULL means ignore.
 674  *
 675  * Returns sgid attributes if the GID is found with valid reference or
 676  * returns ERR_PTR for the error.
 677  * The caller must invoke rdma_put_gid_attr() to release the reference.
 678  */
 679 const struct ib_gid_attr *
 680 rdma_find_gid_by_port(struct ib_device *ib_dev,
 681                       const union ib_gid *gid,
 682                       enum ib_gid_type gid_type,
 683                       u8 port, struct net_device *ndev)
 684 {
 685         int local_index;
 686         struct ib_gid_table *table;
 687         unsigned long mask = GID_ATTR_FIND_MASK_GID |
 688                              GID_ATTR_FIND_MASK_GID_TYPE;
 689         struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
 690         const struct ib_gid_attr *attr;
 691         unsigned long flags;
 692 
 693         if (!rdma_is_port_valid(ib_dev, port))
 694                 return ERR_PTR(-ENOENT);
 695 
 696         table = rdma_gid_table(ib_dev, port);
 697 
 698         if (ndev)
 699                 mask |= GID_ATTR_FIND_MASK_NETDEV;
 700 
 701         read_lock_irqsave(&table->rwlock, flags);
 702         local_index = find_gid(table, gid, &val, false, mask, NULL);
 703         if (local_index >= 0) {
 704                 get_gid_entry(table->data_vec[local_index]);
 705                 attr = &table->data_vec[local_index]->attr;
 706                 read_unlock_irqrestore(&table->rwlock, flags);
 707                 return attr;
 708         }
 709 
 710         read_unlock_irqrestore(&table->rwlock, flags);
 711         return ERR_PTR(-ENOENT);
 712 }
 713 EXPORT_SYMBOL(rdma_find_gid_by_port);
 714 
 715 /**
 716  * rdma_find_gid_by_filter - Returns the GID table attribute where a
 717  * specified GID value occurs
 718  * @device: The device to query.
 719  * @gid: The GID value to search for.
 720  * @port: The port number of the device where the GID value could be
 721  *   searched.
 722  * @filter: The filter function is executed on any matching GID in the table.
 723  *   If the filter function returns true, the corresponding index is returned,
 724  *   otherwise, we continue searching the GID table. It's guaranteed that
 725  *   while filter is executed, ndev field is valid and the structure won't
 726  *   change. filter is executed in an atomic context. filter must not be NULL.
 727  *
 728  * rdma_find_gid_by_filter() searches for the specified GID value
 729  * of which the filter function returns true in the port's GID table.
 730  *
 731  */
 732 const struct ib_gid_attr *rdma_find_gid_by_filter(
 733         struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
 734         bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
 735                        void *),
 736         void *context)
 737 {
 738         const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
 739         struct ib_gid_table *table;
 740         unsigned long flags;
 741         unsigned int i;
 742 
 743         if (!rdma_is_port_valid(ib_dev, port))
 744                 return ERR_PTR(-EINVAL);
 745 
 746         table = rdma_gid_table(ib_dev, port);
 747 
 748         read_lock_irqsave(&table->rwlock, flags);
 749         for (i = 0; i < table->sz; i++) {
 750                 struct ib_gid_table_entry *entry = table->data_vec[i];
 751 
 752                 if (!is_gid_entry_valid(entry))
 753                         continue;
 754 
 755                 if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
 756                         continue;
 757 
 758                 if (filter(gid, &entry->attr, context)) {
 759                         get_gid_entry(entry);
 760                         res = &entry->attr;
 761                         break;
 762                 }
 763         }
 764         read_unlock_irqrestore(&table->rwlock, flags);
 765         return res;
 766 }
 767 
 768 static struct ib_gid_table *alloc_gid_table(int sz)
 769 {
 770         struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
 771 
 772         if (!table)
 773                 return NULL;
 774 
 775         table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
 776         if (!table->data_vec)
 777                 goto err_free_table;
 778 
 779         mutex_init(&table->lock);
 780 
 781         table->sz = sz;
 782         rwlock_init(&table->rwlock);
 783         return table;
 784 
 785 err_free_table:
 786         kfree(table);
 787         return NULL;
 788 }
 789 
 790 static void release_gid_table(struct ib_device *device,
 791                               struct ib_gid_table *table)
 792 {
 793         bool leak = false;
 794         int i;
 795 
 796         if (!table)
 797                 return;
 798 
 799         for (i = 0; i < table->sz; i++) {
 800                 if (is_gid_entry_free(table->data_vec[i]))
 801                         continue;
 802                 if (kref_read(&table->data_vec[i]->kref) > 1) {
 803                         dev_err(&device->dev,
 804                                 "GID entry ref leak for index %d ref=%d\n", i,
 805                                 kref_read(&table->data_vec[i]->kref));
 806                         leak = true;
 807                 }
 808         }
 809         if (leak)
 810                 return;
 811 
 812         mutex_destroy(&table->lock);
 813         kfree(table->data_vec);
 814         kfree(table);
 815 }
 816 
 817 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 818                                    struct ib_gid_table *table)
 819 {
 820         int i;
 821         bool deleted = false;
 822 
 823         if (!table)
 824                 return;
 825 
 826         mutex_lock(&table->lock);
 827         for (i = 0; i < table->sz; ++i) {
 828                 if (is_gid_entry_valid(table->data_vec[i])) {
 829                         del_gid(ib_dev, port, table, i);
 830                         deleted = true;
 831                 }
 832         }
 833         mutex_unlock(&table->lock);
 834 
 835         if (deleted)
 836                 dispatch_gid_change_event(ib_dev, port);
 837 }
 838 
 839 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 840                                   struct net_device *ndev,
 841                                   unsigned long gid_type_mask,
 842                                   enum ib_cache_gid_default_mode mode)
 843 {
 844         union ib_gid gid = { };
 845         struct ib_gid_attr gid_attr;
 846         unsigned int gid_type;
 847         unsigned long mask;
 848 
 849         mask = GID_ATTR_FIND_MASK_GID_TYPE |
 850                GID_ATTR_FIND_MASK_DEFAULT |
 851                GID_ATTR_FIND_MASK_NETDEV;
 852         memset(&gid_attr, 0, sizeof(gid_attr));
 853         gid_attr.ndev = ndev;
 854 
 855         for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
 856                 if (1UL << gid_type & ~gid_type_mask)
 857                         continue;
 858 
 859                 gid_attr.gid_type = gid_type;
 860 
 861                 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
 862                         make_default_gid(ndev, &gid);
 863                         __ib_cache_gid_add(ib_dev, port, &gid,
 864                                            &gid_attr, mask, true);
 865                 } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
 866                         _ib_cache_gid_del(ib_dev, port, &gid,
 867                                           &gid_attr, mask, true);
 868                 }
 869         }
 870 }
 871 
 872 static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 873                                       struct ib_gid_table *table)
 874 {
 875         unsigned int i;
 876         unsigned long roce_gid_type_mask;
 877         unsigned int num_default_gids;
 878 
 879         roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 880         num_default_gids = hweight_long(roce_gid_type_mask);
 881         /* Reserve starting indices for default GIDs */
 882         for (i = 0; i < num_default_gids && i < table->sz; i++)
 883                 table->default_gid_indices |= BIT(i);
 884 }
 885 
 886 
 887 static void gid_table_release_one(struct ib_device *ib_dev)
 888 {
 889         unsigned int p;
 890 
 891         rdma_for_each_port (ib_dev, p) {
 892                 release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
 893                 ib_dev->port_data[p].cache.gid = NULL;
 894         }
 895 }
 896 
 897 static int _gid_table_setup_one(struct ib_device *ib_dev)
 898 {
 899         struct ib_gid_table *table;
 900         unsigned int rdma_port;
 901 
 902         rdma_for_each_port (ib_dev, rdma_port) {
 903                 table = alloc_gid_table(
 904                         ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
 905                 if (!table)
 906                         goto rollback_table_setup;
 907 
 908                 gid_table_reserve_default(ib_dev, rdma_port, table);
 909                 ib_dev->port_data[rdma_port].cache.gid = table;
 910         }
 911         return 0;
 912 
 913 rollback_table_setup:
 914         gid_table_release_one(ib_dev);
 915         return -ENOMEM;
 916 }
 917 
 918 static void gid_table_cleanup_one(struct ib_device *ib_dev)
 919 {
 920         unsigned int p;
 921 
 922         rdma_for_each_port (ib_dev, p)
 923                 cleanup_gid_table_port(ib_dev, p,
 924                                        ib_dev->port_data[p].cache.gid);
 925 }
 926 
 927 static int gid_table_setup_one(struct ib_device *ib_dev)
 928 {
 929         int err;
 930 
 931         err = _gid_table_setup_one(ib_dev);
 932 
 933         if (err)
 934                 return err;
 935 
 936         rdma_roce_rescan_device(ib_dev);
 937 
 938         return err;
 939 }
 940 
 941 /**
 942  * rdma_query_gid - Read the GID content from the GID software cache
 943  * @device:             Device to query the GID
 944  * @port_num:           Port number of the device
 945  * @index:              Index of the GID table entry to read
 946  * @gid:                Pointer to GID where to store the entry's GID
 947  *
 948  * rdma_query_gid() only reads the GID entry content for requested device,
 949  * port and index. It reads for IB, RoCE and iWarp link layers.  It doesn't
 950  * hold any reference to the GID table entry in the HCA or software cache.
 951  *
 952  * Returns 0 on success or appropriate error code.
 953  *
 954  */
 955 int rdma_query_gid(struct ib_device *device, u8 port_num,
 956                    int index, union ib_gid *gid)
 957 {
 958         struct ib_gid_table *table;
 959         unsigned long flags;
 960         int res = -EINVAL;
 961 
 962         if (!rdma_is_port_valid(device, port_num))
 963                 return -EINVAL;
 964 
 965         table = rdma_gid_table(device, port_num);
 966         read_lock_irqsave(&table->rwlock, flags);
 967 
 968         if (index < 0 || index >= table->sz ||
 969             !is_gid_entry_valid(table->data_vec[index]))
 970                 goto done;
 971 
 972         memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
 973         res = 0;
 974 
 975 done:
 976         read_unlock_irqrestore(&table->rwlock, flags);
 977         return res;
 978 }
 979 EXPORT_SYMBOL(rdma_query_gid);
 980 
 981 /**
 982  * rdma_find_gid - Returns SGID attributes if the matching GID is found.
 983  * @device: The device to query.
 984  * @gid: The GID value to search for.
 985  * @gid_type: The GID type to search for.
 986  * @ndev: In RoCE, the net device of the device. NULL means ignore.
 987  *
 988  * rdma_find_gid() searches for the specified GID value in the software cache.
 989  *
 990  * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
 991  * error. The caller must invoke rdma_put_gid_attr() to release the reference.
 992  *
 993  */
 994 const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
 995                                         const union ib_gid *gid,
 996                                         enum ib_gid_type gid_type,
 997                                         struct net_device *ndev)
 998 {
 999         unsigned long mask = GID_ATTR_FIND_MASK_GID |
1000                              GID_ATTR_FIND_MASK_GID_TYPE;
1001         struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
1002         unsigned int p;
1003 
1004         if (ndev)
1005                 mask |= GID_ATTR_FIND_MASK_NETDEV;
1006 
1007         rdma_for_each_port(device, p) {
1008                 struct ib_gid_table *table;
1009                 unsigned long flags;
1010                 int index;
1011 
1012                 table = device->port_data[p].cache.gid;
1013                 read_lock_irqsave(&table->rwlock, flags);
1014                 index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
1015                 if (index >= 0) {
1016                         const struct ib_gid_attr *attr;
1017 
1018                         get_gid_entry(table->data_vec[index]);
1019                         attr = &table->data_vec[index]->attr;
1020                         read_unlock_irqrestore(&table->rwlock, flags);
1021                         return attr;
1022                 }
1023                 read_unlock_irqrestore(&table->rwlock, flags);
1024         }
1025 
1026         return ERR_PTR(-ENOENT);
1027 }
1028 EXPORT_SYMBOL(rdma_find_gid);
1029 
1030 int ib_get_cached_pkey(struct ib_device *device,
1031                        u8                port_num,
1032                        int               index,
1033                        u16              *pkey)
1034 {
1035         struct ib_pkey_cache *cache;
1036         unsigned long flags;
1037         int ret = 0;
1038 
1039         if (!rdma_is_port_valid(device, port_num))
1040                 return -EINVAL;
1041 
1042         read_lock_irqsave(&device->cache.lock, flags);
1043 
1044         cache = device->port_data[port_num].cache.pkey;
1045 
1046         if (index < 0 || index >= cache->table_len)
1047                 ret = -EINVAL;
1048         else
1049                 *pkey = cache->table[index];
1050 
1051         read_unlock_irqrestore(&device->cache.lock, flags);
1052 
1053         return ret;
1054 }
1055 EXPORT_SYMBOL(ib_get_cached_pkey);
1056 
1057 int ib_get_cached_subnet_prefix(struct ib_device *device,
1058                                 u8                port_num,
1059                                 u64              *sn_pfx)
1060 {
1061         unsigned long flags;
1062 
1063         if (!rdma_is_port_valid(device, port_num))
1064                 return -EINVAL;
1065 
1066         read_lock_irqsave(&device->cache.lock, flags);
1067         *sn_pfx = device->port_data[port_num].cache.subnet_prefix;
1068         read_unlock_irqrestore(&device->cache.lock, flags);
1069 
1070         return 0;
1071 }
1072 EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
1073 
1074 int ib_find_cached_pkey(struct ib_device *device,
1075                         u8                port_num,
1076                         u16               pkey,
1077                         u16              *index)
1078 {
1079         struct ib_pkey_cache *cache;
1080         unsigned long flags;
1081         int i;
1082         int ret = -ENOENT;
1083         int partial_ix = -1;
1084 
1085         if (!rdma_is_port_valid(device, port_num))
1086                 return -EINVAL;
1087 
1088         read_lock_irqsave(&device->cache.lock, flags);
1089 
1090         cache = device->port_data[port_num].cache.pkey;
1091 
1092         *index = -1;
1093 
1094         for (i = 0; i < cache->table_len; ++i)
1095                 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
1096                         if (cache->table[i] & 0x8000) {
1097                                 *index = i;
1098                                 ret = 0;
1099                                 break;
1100                         } else
1101                                 partial_ix = i;
1102                 }
1103 
1104         if (ret && partial_ix >= 0) {
1105                 *index = partial_ix;
1106                 ret = 0;
1107         }
1108 
1109         read_unlock_irqrestore(&device->cache.lock, flags);
1110 
1111         return ret;
1112 }
1113 EXPORT_SYMBOL(ib_find_cached_pkey);
1114 
1115 int ib_find_exact_cached_pkey(struct ib_device *device,
1116                               u8                port_num,
1117                               u16               pkey,
1118                               u16              *index)
1119 {
1120         struct ib_pkey_cache *cache;
1121         unsigned long flags;
1122         int i;
1123         int ret = -ENOENT;
1124 
1125         if (!rdma_is_port_valid(device, port_num))
1126                 return -EINVAL;
1127 
1128         read_lock_irqsave(&device->cache.lock, flags);
1129 
1130         cache = device->port_data[port_num].cache.pkey;
1131 
1132         *index = -1;
1133 
1134         for (i = 0; i < cache->table_len; ++i)
1135                 if (cache->table[i] == pkey) {
1136                         *index = i;
1137                         ret = 0;
1138                         break;
1139                 }
1140 
1141         read_unlock_irqrestore(&device->cache.lock, flags);
1142 
1143         return ret;
1144 }
1145 EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1146 
1147 int ib_get_cached_lmc(struct ib_device *device,
1148                       u8                port_num,
1149                       u8                *lmc)
1150 {
1151         unsigned long flags;
1152         int ret = 0;
1153 
1154         if (!rdma_is_port_valid(device, port_num))
1155                 return -EINVAL;
1156 
1157         read_lock_irqsave(&device->cache.lock, flags);
1158         *lmc = device->port_data[port_num].cache.lmc;
1159         read_unlock_irqrestore(&device->cache.lock, flags);
1160 
1161         return ret;
1162 }
1163 EXPORT_SYMBOL(ib_get_cached_lmc);
1164 
1165 int ib_get_cached_port_state(struct ib_device   *device,
1166                              u8                  port_num,
1167                              enum ib_port_state *port_state)
1168 {
1169         unsigned long flags;
1170         int ret = 0;
1171 
1172         if (!rdma_is_port_valid(device, port_num))
1173                 return -EINVAL;
1174 
1175         read_lock_irqsave(&device->cache.lock, flags);
1176         *port_state = device->port_data[port_num].cache.port_state;
1177         read_unlock_irqrestore(&device->cache.lock, flags);
1178 
1179         return ret;
1180 }
1181 EXPORT_SYMBOL(ib_get_cached_port_state);
1182 
1183 /**
1184  * rdma_get_gid_attr - Returns GID attributes for a port of a device
1185  * at a requested gid_index, if a valid GID entry exists.
1186  * @device:             The device to query.
1187  * @port_num:           The port number on the device where the GID value
1188  *                      is to be queried.
1189  * @index:              Index of the GID table entry whose attributes are to
1190  *                      be queried.
1191  *
1192  * rdma_get_gid_attr() acquires reference count of gid attributes from the
1193  * cached GID table. Caller must invoke rdma_put_gid_attr() to release
1194  * reference to gid attribute regardless of link layer.
1195  *
1196  * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
1197  * code.
1198  */
1199 const struct ib_gid_attr *
1200 rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
1201 {
1202         const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
1203         struct ib_gid_table *table;
1204         unsigned long flags;
1205 
1206         if (!rdma_is_port_valid(device, port_num))
1207                 return ERR_PTR(-EINVAL);
1208 
1209         table = rdma_gid_table(device, port_num);
1210         if (index < 0 || index >= table->sz)
1211                 return ERR_PTR(-EINVAL);
1212 
1213         read_lock_irqsave(&table->rwlock, flags);
1214         if (!is_gid_entry_valid(table->data_vec[index]))
1215                 goto done;
1216 
1217         get_gid_entry(table->data_vec[index]);
1218         attr = &table->data_vec[index]->attr;
1219 done:
1220         read_unlock_irqrestore(&table->rwlock, flags);
1221         return attr;
1222 }
1223 EXPORT_SYMBOL(rdma_get_gid_attr);
1224 
1225 /**
1226  * rdma_put_gid_attr - Release reference to the GID attribute
1227  * @attr:               Pointer to the GID attribute whose reference
1228  *                      needs to be released.
1229  *
1230  * rdma_put_gid_attr() must be used to release reference whose
1231  * reference is acquired using rdma_get_gid_attr() or any APIs
1232  * which returns a pointer to the ib_gid_attr regardless of link layer
1233  * of IB or RoCE.
1234  *
1235  */
1236 void rdma_put_gid_attr(const struct ib_gid_attr *attr)
1237 {
1238         struct ib_gid_table_entry *entry =
1239                 container_of(attr, struct ib_gid_table_entry, attr);
1240 
1241         put_gid_entry(entry);
1242 }
1243 EXPORT_SYMBOL(rdma_put_gid_attr);
1244 
1245 /**
1246  * rdma_hold_gid_attr - Get reference to existing GID attribute
1247  *
1248  * @attr:               Pointer to the GID attribute whose reference
1249  *                      needs to be taken.
1250  *
1251  * Increase the reference count to a GID attribute to keep it from being
1252  * freed. Callers are required to already be holding a reference to attribute.
1253  *
1254  */
1255 void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
1256 {
1257         struct ib_gid_table_entry *entry =
1258                 container_of(attr, struct ib_gid_table_entry, attr);
1259 
1260         get_gid_entry(entry);
1261 }
1262 EXPORT_SYMBOL(rdma_hold_gid_attr);
1263 
1264 /**
1265  * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1266  * which must be in UP state.
1267  *
1268  * @attr:Pointer to the GID attribute
1269  *
1270  * Returns pointer to netdevice if the netdevice was attached to GID and
1271  * netdevice is in UP state. Caller must hold RCU lock as this API
1272  * reads the netdev flags which can change while netdevice migrates to
1273  * different net namespace. Returns ERR_PTR with error code otherwise.
1274  *
1275  */
1276 struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
1277 {
1278         struct ib_gid_table_entry *entry =
1279                         container_of(attr, struct ib_gid_table_entry, attr);
1280         struct ib_device *device = entry->attr.device;
1281         struct net_device *ndev = ERR_PTR(-ENODEV);
1282         u8 port_num = entry->attr.port_num;
1283         struct ib_gid_table *table;
1284         unsigned long flags;
1285         bool valid;
1286 
1287         table = rdma_gid_table(device, port_num);
1288 
1289         read_lock_irqsave(&table->rwlock, flags);
1290         valid = is_gid_entry_valid(table->data_vec[attr->index]);
1291         if (valid) {
1292                 ndev = rcu_dereference(attr->ndev);
1293                 if (!ndev ||
1294                     (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
1295                         ndev = ERR_PTR(-ENODEV);
1296         }
1297         read_unlock_irqrestore(&table->rwlock, flags);
1298         return ndev;
1299 }
1300 EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
1301 
1302 static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
1303 {
1304         u16 *vlan_id = data;
1305 
1306         if (is_vlan_dev(lower_dev))
1307                 *vlan_id = vlan_dev_vlan_id(lower_dev);
1308 
1309         /* We are interested only in first level vlan device, so
1310          * always return 1 to stop iterating over next level devices.
1311          */
1312         return 1;
1313 }
1314 
1315 /**
1316  * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
1317  *                           of a GID entry.
1318  *
1319  * @attr:       GID attribute pointer whose L2 fields to be read
1320  * @vlan_id:    Pointer to vlan id to fill up if the GID entry has
1321  *              vlan id. It is optional.
1322  * @smac:       Pointer to smac to fill up for a GID entry. It is optional.
1323  *
1324  * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
1325  * (if gid entry has vlan) and source MAC, or returns error.
1326  */
1327 int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
1328                             u16 *vlan_id, u8 *smac)
1329 {
1330         struct net_device *ndev;
1331 
1332         rcu_read_lock();
1333         ndev = rcu_dereference(attr->ndev);
1334         if (!ndev) {
1335                 rcu_read_unlock();
1336                 return -ENODEV;
1337         }
1338         if (smac)
1339                 ether_addr_copy(smac, ndev->dev_addr);
1340         if (vlan_id) {
1341                 *vlan_id = 0xffff;
1342                 if (is_vlan_dev(ndev)) {
1343                         *vlan_id = vlan_dev_vlan_id(ndev);
1344                 } else {
1345                         /* If the netdev is upper device and if it's lower
1346                          * device is vlan device, consider vlan id of the
1347                          * the lower vlan device for this gid entry.
1348                          */
1349                         netdev_walk_all_lower_dev_rcu(attr->ndev,
1350                                         get_lower_dev_vlan, vlan_id);
1351                 }
1352         }
1353         rcu_read_unlock();
1354         return 0;
1355 }
1356 EXPORT_SYMBOL(rdma_read_gid_l2_fields);
1357 
1358 static int config_non_roce_gid_cache(struct ib_device *device,
1359                                      u8 port, int gid_tbl_len)
1360 {
1361         struct ib_gid_attr gid_attr = {};
1362         struct ib_gid_table *table;
1363         int ret = 0;
1364         int i;
1365 
1366         gid_attr.device = device;
1367         gid_attr.port_num = port;
1368         table = rdma_gid_table(device, port);
1369 
1370         mutex_lock(&table->lock);
1371         for (i = 0; i < gid_tbl_len; ++i) {
1372                 if (!device->ops.query_gid)
1373                         continue;
1374                 ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
1375                 if (ret) {
1376                         dev_warn(&device->dev,
1377                                  "query_gid failed (%d) for index %d\n", ret,
1378                                  i);
1379                         goto err;
1380                 }
1381                 gid_attr.index = i;
1382                 add_modify_gid(table, &gid_attr);
1383         }
1384 err:
1385         mutex_unlock(&table->lock);
1386         return ret;
1387 }
1388 
1389 static int
1390 ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
1391 {
1392         struct ib_port_attr       *tprops = NULL;
1393         struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1394         int                        i;
1395         int                        ret;
1396 
1397         if (!rdma_is_port_valid(device, port))
1398                 return -EINVAL;
1399 
1400         tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1401         if (!tprops)
1402                 return -ENOMEM;
1403 
1404         ret = ib_query_port(device, port, tprops);
1405         if (ret) {
1406                 dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
1407                 goto err;
1408         }
1409 
1410         if (!rdma_protocol_roce(device, port)) {
1411                 ret = config_non_roce_gid_cache(device, port,
1412                                                 tprops->gid_tbl_len);
1413                 if (ret)
1414                         goto err;
1415         }
1416 
1417         pkey_cache = kmalloc(struct_size(pkey_cache, table,
1418                                          tprops->pkey_tbl_len),
1419                              GFP_KERNEL);
1420         if (!pkey_cache) {
1421                 ret = -ENOMEM;
1422                 goto err;
1423         }
1424 
1425         pkey_cache->table_len = tprops->pkey_tbl_len;
1426 
1427         for (i = 0; i < pkey_cache->table_len; ++i) {
1428                 ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1429                 if (ret) {
1430                         dev_warn(&device->dev,
1431                                  "ib_query_pkey failed (%d) for index %d\n",
1432                                  ret, i);
1433                         goto err;
1434                 }
1435         }
1436 
1437         write_lock_irq(&device->cache.lock);
1438 
1439         old_pkey_cache = device->port_data[port].cache.pkey;
1440 
1441         device->port_data[port].cache.pkey = pkey_cache;
1442         device->port_data[port].cache.lmc = tprops->lmc;
1443         device->port_data[port].cache.port_state = tprops->state;
1444 
1445         device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
1446         write_unlock_irq(&device->cache.lock);
1447 
1448         if (enforce_security)
1449                 ib_security_cache_change(device,
1450                                          port,
1451                                          tprops->subnet_prefix);
1452 
1453         kfree(old_pkey_cache);
1454         kfree(tprops);
1455         return 0;
1456 
1457 err:
1458         kfree(pkey_cache);
1459         kfree(tprops);
1460         return ret;
1461 }
1462 
1463 static void ib_cache_event_task(struct work_struct *_work)
1464 {
1465         struct ib_update_work *work =
1466                 container_of(_work, struct ib_update_work, work);
1467         int ret;
1468 
1469         /* Before distributing the cache update event, first sync
1470          * the cache.
1471          */
1472         ret = ib_cache_update(work->event.device, work->event.element.port_num,
1473                               work->enforce_security);
1474 
1475         /* GID event is notified already for individual GID entries by
1476          * dispatch_gid_change_event(). Hence, notifiy for rest of the
1477          * events.
1478          */
1479         if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
1480                 ib_dispatch_event_clients(&work->event);
1481 
1482         kfree(work);
1483 }
1484 
1485 static void ib_generic_event_task(struct work_struct *_work)
1486 {
1487         struct ib_update_work *work =
1488                 container_of(_work, struct ib_update_work, work);
1489 
1490         ib_dispatch_event_clients(&work->event);
1491         kfree(work);
1492 }
1493 
1494 static bool is_cache_update_event(const struct ib_event *event)
1495 {
1496         return (event->event == IB_EVENT_PORT_ERR    ||
1497                 event->event == IB_EVENT_PORT_ACTIVE ||
1498                 event->event == IB_EVENT_LID_CHANGE  ||
1499                 event->event == IB_EVENT_PKEY_CHANGE ||
1500                 event->event == IB_EVENT_CLIENT_REREGISTER ||
1501                 event->event == IB_EVENT_GID_CHANGE);
1502 }
1503 
1504 /**
1505  * ib_dispatch_event - Dispatch an asynchronous event
1506  * @event:Event to dispatch
1507  *
1508  * Low-level drivers must call ib_dispatch_event() to dispatch the
1509  * event to all registered event handlers when an asynchronous event
1510  * occurs.
1511  */
1512 void ib_dispatch_event(const struct ib_event *event)
1513 {
1514         struct ib_update_work *work;
1515 
1516         work = kzalloc(sizeof(*work), GFP_ATOMIC);
1517         if (!work)
1518                 return;
1519 
1520         if (is_cache_update_event(event))
1521                 INIT_WORK(&work->work, ib_cache_event_task);
1522         else
1523                 INIT_WORK(&work->work, ib_generic_event_task);
1524 
1525         work->event = *event;
1526         if (event->event == IB_EVENT_PKEY_CHANGE ||
1527             event->event == IB_EVENT_GID_CHANGE)
1528                 work->enforce_security = true;
1529 
1530         queue_work(ib_wq, &work->work);
1531 }
1532 EXPORT_SYMBOL(ib_dispatch_event);
1533 
1534 int ib_cache_setup_one(struct ib_device *device)
1535 {
1536         unsigned int p;
1537         int err;
1538 
1539         rwlock_init(&device->cache.lock);
1540 
1541         err = gid_table_setup_one(device);
1542         if (err)
1543                 return err;
1544 
1545         rdma_for_each_port (device, p) {
1546                 err = ib_cache_update(device, p, true);
1547                 if (err)
1548                         return err;
1549         }
1550 
1551         return 0;
1552 }
1553 
1554 void ib_cache_release_one(struct ib_device *device)
1555 {
1556         unsigned int p;
1557 
1558         /*
1559          * The release function frees all the cache elements.
1560          * This function should be called as part of freeing
1561          * all the device's resources when the cache could no
1562          * longer be accessed.
1563          */
1564         rdma_for_each_port (device, p)
1565                 kfree(device->port_data[p].cache.pkey);
1566 
1567         gid_table_release_one(device);
1568 }
1569 
1570 void ib_cache_cleanup_one(struct ib_device *device)
1571 {
1572         /* The cleanup function waits for all in-progress workqueue
1573          * elements and cleans up the GID cache. This function should be
1574          * called after the device was removed from the devices list and
1575          * all clients were removed, so the cache exists but is
1576          * non-functional and shouldn't be updated anymore.
1577          */
1578         flush_workqueue(ib_wq);
1579         gid_table_cleanup_one(device);
1580 
1581         /*
1582          * Flush the wq second time for any pending GID delete work.
1583          */
1584         flush_workqueue(ib_wq);
1585 }

/* [<][>][^][v][top][bottom][index][help] */