root/net/smc/smc_core.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. smc_lgr_schedule_free_work
  2. smc_lgr_schedule_free_work_fast
  3. smc_lgr_add_alert_token
  4. smc_lgr_register_conn
  5. __smc_lgr_unregister_conn
  6. smc_lgr_unregister_conn
  7. smc_link_send_delete
  8. smc_lgr_free_work
  9. smc_lgr_create
  10. smc_buf_unuse
  11. smc_conn_free
  12. smc_link_clear
  13. smcr_buf_free
  14. smcd_buf_free
  15. smc_buf_free
  16. __smc_lgr_free_bufs
  17. smc_lgr_free_bufs
  18. smc_lgr_free
  19. smc_lgr_forget
  20. __smc_lgr_terminate
  21. smc_lgr_terminate
  22. smc_port_terminate
  23. smc_smcd_terminate
  24. smc_vlan_by_tcpsk
  25. smcr_lgr_match
  26. smcd_lgr_match
  27. smc_conn_create
  28. smc_compress_bufsize
  29. smc_uncompress_bufsize
  30. smc_buf_get_slot
  31. smc_rmb_wnd_update_limit
  32. smcr_new_buf_create
  33. smcd_new_buf_create
  34. __smc_buf_create
  35. smc_sndbuf_sync_sg_for_cpu
  36. smc_sndbuf_sync_sg_for_device
  37. smc_rmb_sync_sg_for_cpu
  38. smc_rmb_sync_sg_for_device
  39. smc_buf_create
  40. smc_rmb_reserve_rtoken_idx
  41. smc_rtoken_add
  42. smc_rtoken_delete
  43. smc_rmb_rtoken_handling
  44. smc_core_exit

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4  *
   5  *  Basic Transport Functions exploiting Infiniband API
   6  *
   7  *  Copyright IBM Corp. 2016
   8  *
   9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  10  */
  11 
  12 #include <linux/socket.h>
  13 #include <linux/if_vlan.h>
  14 #include <linux/random.h>
  15 #include <linux/workqueue.h>
  16 #include <net/tcp.h>
  17 #include <net/sock.h>
  18 #include <rdma/ib_verbs.h>
  19 #include <rdma/ib_cache.h>
  20 
  21 #include "smc.h"
  22 #include "smc_clc.h"
  23 #include "smc_core.h"
  24 #include "smc_ib.h"
  25 #include "smc_wr.h"
  26 #include "smc_llc.h"
  27 #include "smc_cdc.h"
  28 #include "smc_close.h"
  29 #include "smc_ism.h"
  30 
  31 #define SMC_LGR_NUM_INCR                256
  32 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
  33 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
  34 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
  35 
  36 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
  37         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
  38         .list = LIST_HEAD_INIT(smc_lgr_list.list),
  39         .num = 0,
  40 };
  41 
  42 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
  43                          struct smc_buf_desc *buf_desc);
  44 
  45 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
  46 {
  47         /* client link group creation always follows the server link group
  48          * creation. For client use a somewhat higher removal delay time,
  49          * otherwise there is a risk of out-of-sync link groups.
  50          */
  51         mod_delayed_work(system_wq, &lgr->free_work,
  52                          (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
  53                          SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
  54 }
  55 
  56 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
  57 {
  58         mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
  59 }
  60 
  61 /* Register connection's alert token in our lookup structure.
  62  * To use rbtrees we have to implement our own insert core.
  63  * Requires @conns_lock
  64  * @smc         connection to register
  65  * Returns 0 on success, != otherwise.
  66  */
  67 static void smc_lgr_add_alert_token(struct smc_connection *conn)
  68 {
  69         struct rb_node **link, *parent = NULL;
  70         u32 token = conn->alert_token_local;
  71 
  72         link = &conn->lgr->conns_all.rb_node;
  73         while (*link) {
  74                 struct smc_connection *cur = rb_entry(*link,
  75                                         struct smc_connection, alert_node);
  76 
  77                 parent = *link;
  78                 if (cur->alert_token_local > token)
  79                         link = &parent->rb_left;
  80                 else
  81                         link = &parent->rb_right;
  82         }
  83         /* Put the new node there */
  84         rb_link_node(&conn->alert_node, parent, link);
  85         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
  86 }
  87 
  88 /* Register connection in link group by assigning an alert token
  89  * registered in a search tree.
  90  * Requires @conns_lock
  91  * Note that '0' is a reserved value and not assigned.
  92  */
  93 static void smc_lgr_register_conn(struct smc_connection *conn)
  94 {
  95         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  96         static atomic_t nexttoken = ATOMIC_INIT(0);
  97 
  98         /* find a new alert_token_local value not yet used by some connection
  99          * in this link group
 100          */
 101         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
 102         while (!conn->alert_token_local) {
 103                 conn->alert_token_local = atomic_inc_return(&nexttoken);
 104                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
 105                         conn->alert_token_local = 0;
 106         }
 107         smc_lgr_add_alert_token(conn);
 108         conn->lgr->conns_num++;
 109 }
 110 
 111 /* Unregister connection and reset the alert token of the given connection<
 112  */
 113 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
 114 {
 115         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 116         struct smc_link_group *lgr = conn->lgr;
 117 
 118         rb_erase(&conn->alert_node, &lgr->conns_all);
 119         lgr->conns_num--;
 120         conn->alert_token_local = 0;
 121         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
 122 }
 123 
 124 /* Unregister connection from lgr
 125  */
 126 static void smc_lgr_unregister_conn(struct smc_connection *conn)
 127 {
 128         struct smc_link_group *lgr = conn->lgr;
 129 
 130         if (!lgr)
 131                 return;
 132         write_lock_bh(&lgr->conns_lock);
 133         if (conn->alert_token_local) {
 134                 __smc_lgr_unregister_conn(conn);
 135         }
 136         write_unlock_bh(&lgr->conns_lock);
 137 }
 138 
 139 /* Send delete link, either as client to request the initiation
 140  * of the DELETE LINK sequence from server; or as server to
 141  * initiate the delete processing. See smc_llc_rx_delete_link().
 142  */
 143 static int smc_link_send_delete(struct smc_link *lnk)
 144 {
 145         if (lnk->state == SMC_LNK_ACTIVE &&
 146             !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
 147                 smc_llc_link_deleting(lnk);
 148                 return 0;
 149         }
 150         return -ENOTCONN;
 151 }
 152 
 153 static void smc_lgr_free(struct smc_link_group *lgr);
 154 
 155 static void smc_lgr_free_work(struct work_struct *work)
 156 {
 157         struct smc_link_group *lgr = container_of(to_delayed_work(work),
 158                                                   struct smc_link_group,
 159                                                   free_work);
 160         bool conns;
 161 
 162         spin_lock_bh(&smc_lgr_list.lock);
 163         read_lock_bh(&lgr->conns_lock);
 164         conns = RB_EMPTY_ROOT(&lgr->conns_all);
 165         read_unlock_bh(&lgr->conns_lock);
 166         if (!conns) { /* number of lgr connections is no longer zero */
 167                 spin_unlock_bh(&smc_lgr_list.lock);
 168                 return;
 169         }
 170         if (!list_empty(&lgr->list))
 171                 list_del_init(&lgr->list); /* remove from smc_lgr_list */
 172         spin_unlock_bh(&smc_lgr_list.lock);
 173 
 174         if (!lgr->is_smcd && !lgr->terminating) {
 175                 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 176 
 177                 /* try to send del link msg, on error free lgr immediately */
 178                 if (lnk->state == SMC_LNK_ACTIVE &&
 179                     !smc_link_send_delete(lnk)) {
 180                         /* reschedule in case we never receive a response */
 181                         smc_lgr_schedule_free_work(lgr);
 182                         return;
 183                 }
 184         }
 185 
 186         if (!delayed_work_pending(&lgr->free_work)) {
 187                 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 188 
 189                 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
 190                         smc_llc_link_inactive(lnk);
 191                 if (lgr->is_smcd)
 192                         smc_ism_signal_shutdown(lgr);
 193                 smc_lgr_free(lgr);
 194         }
 195 }
 196 
 197 /* create a new SMC link group */
 198 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 199 {
 200         struct smc_link_group *lgr;
 201         struct smc_link *lnk;
 202         u8 rndvec[3];
 203         int rc = 0;
 204         int i;
 205 
 206         if (ini->is_smcd && ini->vlan_id) {
 207                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
 208                         rc = SMC_CLC_DECL_ISMVLANERR;
 209                         goto out;
 210                 }
 211         }
 212 
 213         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
 214         if (!lgr) {
 215                 rc = SMC_CLC_DECL_MEM;
 216                 goto ism_put_vlan;
 217         }
 218         lgr->is_smcd = ini->is_smcd;
 219         lgr->sync_err = 0;
 220         lgr->vlan_id = ini->vlan_id;
 221         rwlock_init(&lgr->sndbufs_lock);
 222         rwlock_init(&lgr->rmbs_lock);
 223         rwlock_init(&lgr->conns_lock);
 224         for (i = 0; i < SMC_RMBE_SIZES; i++) {
 225                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
 226                 INIT_LIST_HEAD(&lgr->rmbs[i]);
 227         }
 228         smc_lgr_list.num += SMC_LGR_NUM_INCR;
 229         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
 230         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
 231         lgr->conns_all = RB_ROOT;
 232         if (ini->is_smcd) {
 233                 /* SMC-D specific settings */
 234                 get_device(&ini->ism_dev->dev);
 235                 lgr->peer_gid = ini->ism_gid;
 236                 lgr->smcd = ini->ism_dev;
 237         } else {
 238                 /* SMC-R specific settings */
 239                 get_device(&ini->ib_dev->ibdev->dev);
 240                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 241                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
 242                        SMC_SYSTEMID_LEN);
 243 
 244                 lnk = &lgr->lnk[SMC_SINGLE_LINK];
 245                 /* initialize link */
 246                 lnk->state = SMC_LNK_ACTIVATING;
 247                 lnk->link_id = SMC_SINGLE_LINK;
 248                 lnk->smcibdev = ini->ib_dev;
 249                 lnk->ibport = ini->ib_port;
 250                 lnk->path_mtu =
 251                         ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
 252                 if (!ini->ib_dev->initialized)
 253                         smc_ib_setup_per_ibdev(ini->ib_dev);
 254                 get_random_bytes(rndvec, sizeof(rndvec));
 255                 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
 256                         (rndvec[2] << 16);
 257                 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
 258                                           ini->vlan_id, lnk->gid,
 259                                           &lnk->sgid_index);
 260                 if (rc)
 261                         goto free_lgr;
 262                 rc = smc_llc_link_init(lnk);
 263                 if (rc)
 264                         goto free_lgr;
 265                 rc = smc_wr_alloc_link_mem(lnk);
 266                 if (rc)
 267                         goto clear_llc_lnk;
 268                 rc = smc_ib_create_protection_domain(lnk);
 269                 if (rc)
 270                         goto free_link_mem;
 271                 rc = smc_ib_create_queue_pair(lnk);
 272                 if (rc)
 273                         goto dealloc_pd;
 274                 rc = smc_wr_create_link(lnk);
 275                 if (rc)
 276                         goto destroy_qp;
 277         }
 278         smc->conn.lgr = lgr;
 279         spin_lock_bh(&smc_lgr_list.lock);
 280         list_add(&lgr->list, &smc_lgr_list.list);
 281         spin_unlock_bh(&smc_lgr_list.lock);
 282         return 0;
 283 
 284 destroy_qp:
 285         smc_ib_destroy_queue_pair(lnk);
 286 dealloc_pd:
 287         smc_ib_dealloc_protection_domain(lnk);
 288 free_link_mem:
 289         smc_wr_free_link_mem(lnk);
 290 clear_llc_lnk:
 291         smc_llc_link_clear(lnk);
 292 free_lgr:
 293         kfree(lgr);
 294 ism_put_vlan:
 295         if (ini->is_smcd && ini->vlan_id)
 296                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
 297 out:
 298         if (rc < 0) {
 299                 if (rc == -ENOMEM)
 300                         rc = SMC_CLC_DECL_MEM;
 301                 else
 302                         rc = SMC_CLC_DECL_INTERR;
 303         }
 304         return rc;
 305 }
 306 
 307 static void smc_buf_unuse(struct smc_connection *conn,
 308                           struct smc_link_group *lgr)
 309 {
 310         if (conn->sndbuf_desc)
 311                 conn->sndbuf_desc->used = 0;
 312         if (conn->rmb_desc) {
 313                 if (!conn->rmb_desc->regerr) {
 314                         if (!lgr->is_smcd) {
 315                                 /* unregister rmb with peer */
 316                                 smc_llc_do_delete_rkey(
 317                                                 &lgr->lnk[SMC_SINGLE_LINK],
 318                                                 conn->rmb_desc);
 319                         }
 320                         conn->rmb_desc->used = 0;
 321                 } else {
 322                         /* buf registration failed, reuse not possible */
 323                         write_lock_bh(&lgr->rmbs_lock);
 324                         list_del(&conn->rmb_desc->list);
 325                         write_unlock_bh(&lgr->rmbs_lock);
 326 
 327                         smc_buf_free(lgr, true, conn->rmb_desc);
 328                 }
 329         }
 330 }
 331 
 332 /* remove a finished connection from its link group */
 333 void smc_conn_free(struct smc_connection *conn)
 334 {
 335         struct smc_link_group *lgr = conn->lgr;
 336 
 337         if (!lgr)
 338                 return;
 339         if (lgr->is_smcd) {
 340                 smc_ism_unset_conn(conn);
 341                 tasklet_kill(&conn->rx_tsklet);
 342         } else {
 343                 smc_cdc_tx_dismiss_slots(conn);
 344         }
 345         smc_lgr_unregister_conn(conn);
 346         smc_buf_unuse(conn, lgr);               /* allow buffer reuse */
 347         conn->lgr = NULL;
 348 
 349         if (!lgr->conns_num)
 350                 smc_lgr_schedule_free_work(lgr);
 351 }
 352 
 353 static void smc_link_clear(struct smc_link *lnk)
 354 {
 355         lnk->peer_qpn = 0;
 356         smc_llc_link_clear(lnk);
 357         smc_ib_modify_qp_reset(lnk);
 358         smc_wr_free_link(lnk);
 359         smc_ib_destroy_queue_pair(lnk);
 360         smc_ib_dealloc_protection_domain(lnk);
 361         smc_wr_free_link_mem(lnk);
 362 }
 363 
 364 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
 365                           struct smc_buf_desc *buf_desc)
 366 {
 367         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 368 
 369         if (is_rmb) {
 370                 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
 371                         smc_ib_put_memory_region(
 372                                         buf_desc->mr_rx[SMC_SINGLE_LINK]);
 373                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
 374                                     DMA_FROM_DEVICE);
 375         } else {
 376                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
 377                                     DMA_TO_DEVICE);
 378         }
 379         sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
 380         if (buf_desc->pages)
 381                 __free_pages(buf_desc->pages, buf_desc->order);
 382         kfree(buf_desc);
 383 }
 384 
 385 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
 386                           struct smc_buf_desc *buf_desc)
 387 {
 388         if (is_dmb) {
 389                 /* restore original buf len */
 390                 buf_desc->len += sizeof(struct smcd_cdc_msg);
 391                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
 392         } else {
 393                 kfree(buf_desc->cpu_addr);
 394         }
 395         kfree(buf_desc);
 396 }
 397 
 398 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
 399                          struct smc_buf_desc *buf_desc)
 400 {
 401         if (lgr->is_smcd)
 402                 smcd_buf_free(lgr, is_rmb, buf_desc);
 403         else
 404                 smcr_buf_free(lgr, is_rmb, buf_desc);
 405 }
 406 
 407 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 408 {
 409         struct smc_buf_desc *buf_desc, *bf_desc;
 410         struct list_head *buf_list;
 411         int i;
 412 
 413         for (i = 0; i < SMC_RMBE_SIZES; i++) {
 414                 if (is_rmb)
 415                         buf_list = &lgr->rmbs[i];
 416                 else
 417                         buf_list = &lgr->sndbufs[i];
 418                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
 419                                          list) {
 420                         list_del(&buf_desc->list);
 421                         smc_buf_free(lgr, is_rmb, buf_desc);
 422                 }
 423         }
 424 }
 425 
 426 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 427 {
 428         /* free send buffers */
 429         __smc_lgr_free_bufs(lgr, false);
 430         /* free rmbs */
 431         __smc_lgr_free_bufs(lgr, true);
 432 }
 433 
 434 /* remove a link group */
 435 static void smc_lgr_free(struct smc_link_group *lgr)
 436 {
 437         smc_lgr_free_bufs(lgr);
 438         if (lgr->is_smcd) {
 439                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
 440                 put_device(&lgr->smcd->dev);
 441         } else {
 442                 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 443                 put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
 444         }
 445         kfree(lgr);
 446 }
 447 
 448 void smc_lgr_forget(struct smc_link_group *lgr)
 449 {
 450         spin_lock_bh(&smc_lgr_list.lock);
 451         /* do not use this link group for new connections */
 452         if (!list_empty(&lgr->list))
 453                 list_del_init(&lgr->list);
 454         spin_unlock_bh(&smc_lgr_list.lock);
 455 }
 456 
 457 /* terminate linkgroup abnormally */
 458 static void __smc_lgr_terminate(struct smc_link_group *lgr)
 459 {
 460         struct smc_connection *conn;
 461         struct smc_sock *smc;
 462         struct rb_node *node;
 463 
 464         if (lgr->terminating)
 465                 return; /* lgr already terminating */
 466         lgr->terminating = 1;
 467         if (!list_empty(&lgr->list)) /* forget lgr */
 468                 list_del_init(&lgr->list);
 469         if (!lgr->is_smcd)
 470                 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 471 
 472         write_lock_bh(&lgr->conns_lock);
 473         node = rb_first(&lgr->conns_all);
 474         while (node) {
 475                 conn = rb_entry(node, struct smc_connection, alert_node);
 476                 smc = container_of(conn, struct smc_sock, conn);
 477                 sock_hold(&smc->sk); /* sock_put in close work */
 478                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
 479                 __smc_lgr_unregister_conn(conn);
 480                 conn->lgr = NULL;
 481                 write_unlock_bh(&lgr->conns_lock);
 482                 if (!schedule_work(&conn->close_work))
 483                         sock_put(&smc->sk);
 484                 write_lock_bh(&lgr->conns_lock);
 485                 node = rb_first(&lgr->conns_all);
 486         }
 487         write_unlock_bh(&lgr->conns_lock);
 488         if (!lgr->is_smcd)
 489                 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
 490         smc_lgr_schedule_free_work(lgr);
 491 }
 492 
 493 void smc_lgr_terminate(struct smc_link_group *lgr)
 494 {
 495         spin_lock_bh(&smc_lgr_list.lock);
 496         __smc_lgr_terminate(lgr);
 497         spin_unlock_bh(&smc_lgr_list.lock);
 498 }
 499 
 500 /* Called when IB port is terminated */
 501 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 502 {
 503         struct smc_link_group *lgr, *l;
 504 
 505         spin_lock_bh(&smc_lgr_list.lock);
 506         list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
 507                 if (!lgr->is_smcd &&
 508                     lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
 509                     lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
 510                         __smc_lgr_terminate(lgr);
 511         }
 512         spin_unlock_bh(&smc_lgr_list.lock);
 513 }
 514 
 515 /* Called when SMC-D device is terminated or peer is lost */
 516 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
 517 {
 518         struct smc_link_group *lgr, *l;
 519         LIST_HEAD(lgr_free_list);
 520 
 521         /* run common cleanup function and build free list */
 522         spin_lock_bh(&smc_lgr_list.lock);
 523         list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
 524                 if (lgr->is_smcd && lgr->smcd == dev &&
 525                     (!peer_gid || lgr->peer_gid == peer_gid) &&
 526                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
 527                         __smc_lgr_terminate(lgr);
 528                         list_move(&lgr->list, &lgr_free_list);
 529                 }
 530         }
 531         spin_unlock_bh(&smc_lgr_list.lock);
 532 
 533         /* cancel the regular free workers and actually free lgrs */
 534         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
 535                 list_del_init(&lgr->list);
 536                 cancel_delayed_work_sync(&lgr->free_work);
 537                 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
 538                         smc_ism_signal_shutdown(lgr);
 539                 smc_lgr_free(lgr);
 540         }
 541 }
 542 
 543 /* Determine vlan of internal TCP socket.
 544  * @vlan_id: address to store the determined vlan id into
 545  */
 546 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
 547 {
 548         struct dst_entry *dst = sk_dst_get(clcsock->sk);
 549         struct net_device *ndev;
 550         int i, nest_lvl, rc = 0;
 551 
 552         ini->vlan_id = 0;
 553         if (!dst) {
 554                 rc = -ENOTCONN;
 555                 goto out;
 556         }
 557         if (!dst->dev) {
 558                 rc = -ENODEV;
 559                 goto out_rel;
 560         }
 561 
 562         ndev = dst->dev;
 563         if (is_vlan_dev(ndev)) {
 564                 ini->vlan_id = vlan_dev_vlan_id(ndev);
 565                 goto out_rel;
 566         }
 567 
 568         rtnl_lock();
 569         nest_lvl = ndev->lower_level;
 570         for (i = 0; i < nest_lvl; i++) {
 571                 struct list_head *lower = &ndev->adj_list.lower;
 572 
 573                 if (list_empty(lower))
 574                         break;
 575                 lower = lower->next;
 576                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
 577                 if (is_vlan_dev(ndev)) {
 578                         ini->vlan_id = vlan_dev_vlan_id(ndev);
 579                         break;
 580                 }
 581         }
 582         rtnl_unlock();
 583 
 584 out_rel:
 585         dst_release(dst);
 586 out:
 587         return rc;
 588 }
 589 
 590 static bool smcr_lgr_match(struct smc_link_group *lgr,
 591                            struct smc_clc_msg_local *lcl,
 592                            enum smc_lgr_role role, u32 clcqpn)
 593 {
 594         return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
 595                        SMC_SYSTEMID_LEN) &&
 596                 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
 597                         SMC_GID_SIZE) &&
 598                 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
 599                         sizeof(lcl->mac)) &&
 600                 lgr->role == role &&
 601                 (lgr->role == SMC_SERV ||
 602                  lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
 603 }
 604 
 605 static bool smcd_lgr_match(struct smc_link_group *lgr,
 606                            struct smcd_dev *smcismdev, u64 peer_gid)
 607 {
 608         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
 609 }
 610 
 611 /* create a new SMC connection (and a new link group if necessary) */
 612 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 613 {
 614         struct smc_connection *conn = &smc->conn;
 615         struct smc_link_group *lgr;
 616         enum smc_lgr_role role;
 617         int rc = 0;
 618 
 619         ini->cln_first_contact = SMC_FIRST_CONTACT;
 620         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 621         if (role == SMC_CLNT && ini->srv_first_contact)
 622                 /* create new link group as well */
 623                 goto create;
 624 
 625         /* determine if an existing link group can be reused */
 626         spin_lock_bh(&smc_lgr_list.lock);
 627         list_for_each_entry(lgr, &smc_lgr_list.list, list) {
 628                 write_lock_bh(&lgr->conns_lock);
 629                 if ((ini->is_smcd ?
 630                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
 631                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
 632                     !lgr->sync_err &&
 633                     lgr->vlan_id == ini->vlan_id &&
 634                     (role == SMC_CLNT ||
 635                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
 636                         /* link group found */
 637                         ini->cln_first_contact = SMC_REUSE_CONTACT;
 638                         conn->lgr = lgr;
 639                         smc_lgr_register_conn(conn); /* add smc conn to lgr */
 640                         if (delayed_work_pending(&lgr->free_work))
 641                                 cancel_delayed_work(&lgr->free_work);
 642                         write_unlock_bh(&lgr->conns_lock);
 643                         break;
 644                 }
 645                 write_unlock_bh(&lgr->conns_lock);
 646         }
 647         spin_unlock_bh(&smc_lgr_list.lock);
 648 
 649         if (role == SMC_CLNT && !ini->srv_first_contact &&
 650             ini->cln_first_contact == SMC_FIRST_CONTACT) {
 651                 /* Server reuses a link group, but Client wants to start
 652                  * a new one
 653                  * send out_of_sync decline, reason synchr. error
 654                  */
 655                 return SMC_CLC_DECL_SYNCERR;
 656         }
 657 
 658 create:
 659         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 660                 rc = smc_lgr_create(smc, ini);
 661                 if (rc)
 662                         goto out;
 663                 lgr = conn->lgr;
 664                 write_lock_bh(&lgr->conns_lock);
 665                 smc_lgr_register_conn(conn); /* add smc conn to lgr */
 666                 write_unlock_bh(&lgr->conns_lock);
 667         }
 668         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 669         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 670         conn->urg_state = SMC_URG_READ;
 671         if (ini->is_smcd) {
 672                 conn->rx_off = sizeof(struct smcd_cdc_msg);
 673                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
 674         }
 675 #ifndef KERNEL_HAS_ATOMIC64
 676         spin_lock_init(&conn->acurs_lock);
 677 #endif
 678 
 679 out:
 680         return rc;
 681 }
 682 
 683 /* convert the RMB size into the compressed notation - minimum 16K.
 684  * In contrast to plain ilog2, this rounds towards the next power of 2,
 685  * so the socket application gets at least its desired sndbuf / rcvbuf size.
 686  */
 687 static u8 smc_compress_bufsize(int size)
 688 {
 689         u8 compressed;
 690 
 691         if (size <= SMC_BUF_MIN_SIZE)
 692                 return 0;
 693 
 694         size = (size - 1) >> 14;
 695         compressed = ilog2(size) + 1;
 696         if (compressed >= SMC_RMBE_SIZES)
 697                 compressed = SMC_RMBE_SIZES - 1;
 698         return compressed;
 699 }
 700 
 701 /* convert the RMB size from compressed notation into integer */
 702 int smc_uncompress_bufsize(u8 compressed)
 703 {
 704         u32 size;
 705 
 706         size = 0x00000001 << (((int)compressed) + 14);
 707         return (int)size;
 708 }
 709 
 710 /* try to reuse a sndbuf or rmb description slot for a certain
 711  * buffer size; if not available, return NULL
 712  */
 713 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
 714                                              rwlock_t *lock,
 715                                              struct list_head *buf_list)
 716 {
 717         struct smc_buf_desc *buf_slot;
 718 
 719         read_lock_bh(lock);
 720         list_for_each_entry(buf_slot, buf_list, list) {
 721                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
 722                         read_unlock_bh(lock);
 723                         return buf_slot;
 724                 }
 725         }
 726         read_unlock_bh(lock);
 727         return NULL;
 728 }
 729 
 730 /* one of the conditions for announcing a receiver's current window size is
 731  * that it "results in a minimum increase in the window size of 10% of the
 732  * receive buffer space" [RFC7609]
 733  */
 734 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 735 {
 736         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 737 }
 738 
 739 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 740                                                 bool is_rmb, int bufsize)
 741 {
 742         struct smc_buf_desc *buf_desc;
 743         struct smc_link *lnk;
 744         int rc;
 745 
 746         /* try to alloc a new buffer */
 747         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
 748         if (!buf_desc)
 749                 return ERR_PTR(-ENOMEM);
 750 
 751         buf_desc->order = get_order(bufsize);
 752         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
 753                                       __GFP_NOMEMALLOC | __GFP_COMP |
 754                                       __GFP_NORETRY | __GFP_ZERO,
 755                                       buf_desc->order);
 756         if (!buf_desc->pages) {
 757                 kfree(buf_desc);
 758                 return ERR_PTR(-EAGAIN);
 759         }
 760         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
 761 
 762         /* build the sg table from the pages */
 763         lnk = &lgr->lnk[SMC_SINGLE_LINK];
 764         rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
 765                             GFP_KERNEL);
 766         if (rc) {
 767                 smc_buf_free(lgr, is_rmb, buf_desc);
 768                 return ERR_PTR(rc);
 769         }
 770         sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
 771                    buf_desc->cpu_addr, bufsize);
 772 
 773         /* map sg table to DMA address */
 774         rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
 775                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 776         /* SMC protocol depends on mapping to one DMA address only */
 777         if (rc != 1)  {
 778                 smc_buf_free(lgr, is_rmb, buf_desc);
 779                 return ERR_PTR(-EAGAIN);
 780         }
 781 
 782         /* create a new memory region for the RMB */
 783         if (is_rmb) {
 784                 rc = smc_ib_get_memory_region(lnk->roce_pd,
 785                                               IB_ACCESS_REMOTE_WRITE |
 786                                               IB_ACCESS_LOCAL_WRITE,
 787                                               buf_desc);
 788                 if (rc) {
 789                         smc_buf_free(lgr, is_rmb, buf_desc);
 790                         return ERR_PTR(rc);
 791                 }
 792         }
 793 
 794         buf_desc->len = bufsize;
 795         return buf_desc;
 796 }
 797 
 798 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
 799 
 800 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
 801                                                 bool is_dmb, int bufsize)
 802 {
 803         struct smc_buf_desc *buf_desc;
 804         int rc;
 805 
 806         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
 807                 return ERR_PTR(-EAGAIN);
 808 
 809         /* try to alloc a new DMB */
 810         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
 811         if (!buf_desc)
 812                 return ERR_PTR(-ENOMEM);
 813         if (is_dmb) {
 814                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
 815                 if (rc) {
 816                         kfree(buf_desc);
 817                         return ERR_PTR(-EAGAIN);
 818                 }
 819                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
 820                 /* CDC header stored in buf. So, pretend it was smaller */
 821                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
 822         } else {
 823                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
 824                                              __GFP_NOWARN | __GFP_NORETRY |
 825                                              __GFP_NOMEMALLOC);
 826                 if (!buf_desc->cpu_addr) {
 827                         kfree(buf_desc);
 828                         return ERR_PTR(-EAGAIN);
 829                 }
 830                 buf_desc->len = bufsize;
 831         }
 832         return buf_desc;
 833 }
 834 
 835 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 836 {
 837         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
 838         struct smc_connection *conn = &smc->conn;
 839         struct smc_link_group *lgr = conn->lgr;
 840         struct list_head *buf_list;
 841         int bufsize, bufsize_short;
 842         int sk_buf_size;
 843         rwlock_t *lock;
 844 
 845         if (is_rmb)
 846                 /* use socket recv buffer size (w/o overhead) as start value */
 847                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
 848         else
 849                 /* use socket send buffer size (w/o overhead) as start value */
 850                 sk_buf_size = smc->sk.sk_sndbuf / 2;
 851 
 852         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
 853              bufsize_short >= 0; bufsize_short--) {
 854 
 855                 if (is_rmb) {
 856                         lock = &lgr->rmbs_lock;
 857                         buf_list = &lgr->rmbs[bufsize_short];
 858                 } else {
 859                         lock = &lgr->sndbufs_lock;
 860                         buf_list = &lgr->sndbufs[bufsize_short];
 861                 }
 862                 bufsize = smc_uncompress_bufsize(bufsize_short);
 863                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
 864                         continue;
 865 
 866                 /* check for reusable slot in the link group */
 867                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
 868                 if (buf_desc) {
 869                         memset(buf_desc->cpu_addr, 0, bufsize);
 870                         break; /* found reusable slot */
 871                 }
 872 
 873                 if (is_smcd)
 874                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
 875                 else
 876                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
 877 
 878                 if (PTR_ERR(buf_desc) == -ENOMEM)
 879                         break;
 880                 if (IS_ERR(buf_desc))
 881                         continue;
 882 
 883                 buf_desc->used = 1;
 884                 write_lock_bh(lock);
 885                 list_add(&buf_desc->list, buf_list);
 886                 write_unlock_bh(lock);
 887                 break; /* found */
 888         }
 889 
 890         if (IS_ERR(buf_desc))
 891                 return -ENOMEM;
 892 
 893         if (is_rmb) {
 894                 conn->rmb_desc = buf_desc;
 895                 conn->rmbe_size_short = bufsize_short;
 896                 smc->sk.sk_rcvbuf = bufsize * 2;
 897                 atomic_set(&conn->bytes_to_rcv, 0);
 898                 conn->rmbe_update_limit =
 899                         smc_rmb_wnd_update_limit(buf_desc->len);
 900                 if (is_smcd)
 901                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
 902         } else {
 903                 conn->sndbuf_desc = buf_desc;
 904                 smc->sk.sk_sndbuf = bufsize * 2;
 905                 atomic_set(&conn->sndbuf_space, bufsize);
 906         }
 907         return 0;
 908 }
 909 
 910 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 911 {
 912         struct smc_link_group *lgr = conn->lgr;
 913 
 914         if (!conn->lgr || conn->lgr->is_smcd)
 915                 return;
 916         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 917                                conn->sndbuf_desc, DMA_TO_DEVICE);
 918 }
 919 
 920 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 921 {
 922         struct smc_link_group *lgr = conn->lgr;
 923 
 924         if (!conn->lgr || conn->lgr->is_smcd)
 925                 return;
 926         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 927                                   conn->sndbuf_desc, DMA_TO_DEVICE);
 928 }
 929 
 930 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 931 {
 932         struct smc_link_group *lgr = conn->lgr;
 933 
 934         if (!conn->lgr || conn->lgr->is_smcd)
 935                 return;
 936         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 937                                conn->rmb_desc, DMA_FROM_DEVICE);
 938 }
 939 
 940 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 941 {
 942         struct smc_link_group *lgr = conn->lgr;
 943 
 944         if (!conn->lgr || conn->lgr->is_smcd)
 945                 return;
 946         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 947                                   conn->rmb_desc, DMA_FROM_DEVICE);
 948 }
 949 
 950 /* create the send and receive buffer for an SMC socket;
 951  * receive buffers are called RMBs;
 952  * (even though the SMC protocol allows more than one RMB-element per RMB,
 953  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
 954  * extra RMB for every connection in a link group
 955  */
 956 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 957 {
 958         int rc;
 959 
 960         /* create send buffer */
 961         rc = __smc_buf_create(smc, is_smcd, false);
 962         if (rc)
 963                 return rc;
 964         /* create rmb */
 965         rc = __smc_buf_create(smc, is_smcd, true);
 966         if (rc)
 967                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
 968         return rc;
 969 }
 970 
 971 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 972 {
 973         int i;
 974 
 975         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
 976                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
 977                         return i;
 978         }
 979         return -ENOSPC;
 980 }
 981 
 982 /* add a new rtoken from peer */
 983 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 984 {
 985         u64 dma_addr = be64_to_cpu(nw_vaddr);
 986         u32 rkey = ntohl(nw_rkey);
 987         int i;
 988 
 989         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 990                 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
 991                     (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 992                     test_bit(i, lgr->rtokens_used_mask)) {
 993                         /* already in list */
 994                         return i;
 995                 }
 996         }
 997         i = smc_rmb_reserve_rtoken_idx(lgr);
 998         if (i < 0)
 999                 return i;
1000         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
1001         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
1002         return i;
1003 }
1004 
1005 /* delete an rtoken */
1006 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
1007 {
1008         u32 rkey = ntohl(nw_rkey);
1009         int i;
1010 
1011         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1012                 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
1013                     test_bit(i, lgr->rtokens_used_mask)) {
1014                         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
1015                         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
1016 
1017                         clear_bit(i, lgr->rtokens_used_mask);
1018                         return 0;
1019                 }
1020         }
1021         return -ENOENT;
1022 }
1023 
1024 /* save rkey and dma_addr received from peer during clc handshake */
1025 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1026                             struct smc_clc_msg_accept_confirm *clc)
1027 {
1028         conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
1029                                           clc->rmb_rkey);
1030         if (conn->rtoken_idx < 0)
1031                 return conn->rtoken_idx;
1032         return 0;
1033 }
1034 
1035 /* Called (from smc_exit) when module is removed */
1036 void smc_core_exit(void)
1037 {
1038         struct smc_link_group *lgr, *lg;
1039         LIST_HEAD(lgr_freeing_list);
1040 
1041         spin_lock_bh(&smc_lgr_list.lock);
1042         if (!list_empty(&smc_lgr_list.list))
1043                 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
1044         spin_unlock_bh(&smc_lgr_list.lock);
1045         list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
1046                 list_del_init(&lgr->list);
1047                 if (!lgr->is_smcd) {
1048                         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
1049 
1050                         if (lnk->state == SMC_LNK_ACTIVE)
1051                                 smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
1052                                                          false);
1053                         smc_llc_link_inactive(lnk);
1054                 }
1055                 cancel_delayed_work_sync(&lgr->free_work);
1056                 if (lgr->is_smcd)
1057                         smc_ism_signal_shutdown(lgr);
1058                 smc_lgr_free(lgr); /* free link group */
1059         }
1060 }

/* [<][>][^][v][top][bottom][index][help] */